Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
7c1f1d88
Commit
7c1f1d88
authored
Feb 28, 2013
by
yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix bug #2787
parent
9ec5333a
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
116 additions
and
105 deletions
+116
-105
nonfree_surf.cl
modules/ocl/src/kernels/nonfree_surf.cl
+48
-29
surf.cpp
modules/ocl/src/surf.cpp
+68
-76
No files found.
modules/ocl/src/kernels/nonfree_surf.cl
View file @
7c1f1d88
...
...
@@ -78,7 +78,7 @@ uchar read_imgTex(IMAGE_INT8 img, sampler_t sam, float2 coord, int rows, int col
//
dynamically
change
the
precision
used
for
floating
type
#
if
defined
(
__ATI__
)
|
| defined (__NVIDIA__)
#
if
defined
DOUBLE_SUPPORT
#
define
F
double
#
else
#
define
F
float
...
...
@@ -299,7 +299,7 @@ __kernel
__global
const
float
*
det,
__global
const
float
*
trace,
__global
int4
*
maxPosBuffer,
volatile __global
unsigned
int* maxCounter,
volatile
__global
int*
maxCounter,
int
counter_offset,
int
det_step,
//
the
step
of
det
in
bytes
int
trace_step,
//
the
step
of
trace
in
bytes
...
...
@@ -408,7 +408,7 @@ __kernel
if
(
condmax
)
{
unsigned
int ind = atomic_inc(maxCounter);
int
ind
=
atomic_inc
(
maxCounter
)
;
if
(
ind
<
c_max_candidates
)
{
...
...
@@ -427,7 +427,7 @@ __kernel
__global
float
*
det,
__global
float
*
trace,
__global
int4
*
maxPosBuffer,
volatile __global
unsigned
int* maxCounter,
volatile
__global
int*
maxCounter,
int
counter_offset,
int
det_step,
//
the
step
of
det
in
bytes
int
trace_step,
//
the
step
of
trace
in
bytes
...
...
@@ -525,7 +525,7 @@ __kernel
if
(
condmax
)
{
unsigned
int ind = atomic_inc(maxCounter);
int
ind
=
atomic_inc
(
maxCounter
)
;
if
(
ind
<
c_max_candidates
)
{
...
...
@@ -585,7 +585,7 @@ __kernel
__global
const
float
*
det,
__global
const
int4
*
maxPosBuffer,
__global
float
*
keypoints,
volatile __global
unsigned
int * featureCounter,
volatile
__global
int
*
featureCounter,
int
det_step,
int
keypoints_step,
int
c_img_rows,
...
...
@@ -684,7 +684,7 @@ __kernel
if
((
c_img_rows
+
1
)
>=
grad_wav_size
&&
(
c_img_cols
+
1
)
>=
grad_wav_size
)
{
//
Get
a
new
feature
index.
unsigned
int ind = atomic_inc(featureCounter);
int
ind
=
atomic_inc
(
featureCounter
)
;
if
(
ind
<
c_max_features
)
{
...
...
@@ -737,19 +737,19 @@ __constant float c_aptW[ORI_SAMPLES] = {0.001455130288377404f, 0.001707611023448
__constant
float
c_NX[2][5]
=
{{0,
0
,
2
,
4
,
-1},
{2,
0
,
4
,
4
,
1}}
;
__constant
float
c_NY[2][5]
=
{{0,
0
,
4
,
2
,
1},
{0,
2
,
4
,
4
,
-1}}
;
void reduce_32_sum(volatile __local float * data,
float
partial_reduction, int tid)
void
reduce_32_sum
(
volatile
__local
float
*
data,
volatile
float*
partial_reduction,
int
tid
)
{
#define op(A, B) (A)+(B)
data[tid] = partial_reduction;
#
define
op
(
A,
B
)
(
*
A
)
+
(
B
)
data[tid]
=
*
partial_reduction
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]);
data[tid]
=
*
partial_reduction
=
op
(
partial_reduction,
data[tid
+
16]
)
;
data[tid]
=
*
partial_reduction
=
op
(
partial_reduction,
data[tid
+
8
]
)
;
data[tid]
=
*
partial_reduction
=
op
(
partial_reduction,
data[tid
+
4
]
)
;
data[tid]
=
*
partial_reduction
=
op
(
partial_reduction,
data[tid
+
2
]
)
;
data[tid]
=
*
partial_reduction
=
op
(
partial_reduction,
data[tid
+
1
]
)
;
}
#
undef
op
}
...
...
@@ -831,7 +831,7 @@ __kernel
{
const int dir = (i * 4 + get_local_id(1)) * ORI_SEARCH_INC;
float sumx = 0.0f, sumy = 0.0f;
volatile
float sumx = 0.0f, sumy = 0.0f;
int d = abs(convert_int_rte(s_angle[get_local_id(0)]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
...
...
@@ -856,8 +856,8 @@ __kernel
sumx += s_X[get_local_id(0) + 96];
sumy += s_Y[get_local_id(0) + 96];
}
reduce_32_sum(s_sumx + get_local_id(1) * 32, sumx, get_local_id(0));
reduce_32_sum(s_sumy + get_local_id(1) * 32, sumy, get_local_id(0));
reduce_32_sum(s_sumx + get_local_id(1) * 32,
&
sumx, get_local_id(0));
reduce_32_sum(s_sumy + get_local_id(1) * 32,
&
sumy, get_local_id(0));
const float temp_mod = sumx * sumx + sumy * sumy;
if (temp_mod > best_mod)
...
...
@@ -892,14 +892,32 @@ __kernel
kp_dir += 2.0f * CV_PI_F;
kp_dir *= 180.0f / CV_PI_F;
kp_dir = 360.0f - kp_dir;
if (fabs(kp_dir - 360.f) < FLT_EPSILON)
kp_dir = 0.f;
//
kp_dir = 360.0f - kp_dir;
//
if (fabs(kp_dir - 360.f) < FLT_EPSILON)
//
kp_dir = 0.f;
featureDir[get_group_id(0)] = kp_dir;
}
}
__kernel
void icvSetUpright(
__global float * keypoints,
int keypoints_step,
int nFeatures
)
{
keypoints_step /= sizeof(*keypoints);
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
if(get_global_id(0) <= nFeatures)
{
featureDir[get_global_id(0)] = 90.0f;
}
}
#undef ORI_SEARCH_INC
#undef ORI_WIN
#undef ORI_SAMPLES
...
...
@@ -993,10 +1011,7 @@ void calc_dx_dy(
const float centerX = featureX[get_group_id(0)];
const float centerY = featureY[get_group_id(0)];
const float size = featureSize[get_group_id(0)];
float descriptor_dir = 360.0f - featureDir[get_group_id(0)];
if (fabs(descriptor_dir - 360.f) < FLT_EPSILON)
descriptor_dir = 0.f;
descriptor_dir *= (float)(CV_PI_F / 180.0f);
float descriptor_dir = featureDir[get_group_id(0)] * (float)(CV_PI_F / 180.0f);
/* The sampling intervals and wavelet sized for selecting an orientation
and building the keypoint descriptor are defined relative to 's' */
...
...
@@ -1125,11 +1140,15 @@ __kernel
{
sdxabs[tid] = fabs(sdx[tid]); // |dx| array
sdyabs[tid] = fabs(sdy[tid]); // |dy| array
//barrier(CLK_LOCAL_MEM_FENCE);
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25)
{
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
//barrier(CLK_LOCAL_MEM_FENCE);
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25)
{
volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
// write dx, dy, |dx|, |dy|
...
...
modules/ocl/src/surf.cpp
View file @
7c1f1d88
...
...
@@ -57,6 +57,21 @@ namespace cv
{
///////////////////////////OpenCL kernel strings///////////////////////////
extern
const
char
*
nonfree_surf
;
const
char
*
noImage2dOption
=
"-D DISABLE_IMAGE2D"
;
void
openCLExecuteKernelSURF
(
Context
*
clCxt
,
const
char
**
source
,
string
kernelName
,
size_t
globalThreads
[
3
],
size_t
localThreads
[
3
],
vector
<
pair
<
size_t
,
const
void
*>
>
&
args
,
int
channels
,
int
depth
)
{
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
source
,
kernelName
,
globalThreads
,
localThreads
,
args
,
channels
,
depth
);
}
else
{
openCLExecuteKernel
(
clCxt
,
source
,
kernelName
,
globalThreads
,
localThreads
,
args
,
channels
,
depth
,
noImage2dOption
);
}
}
}
}
...
...
@@ -80,10 +95,6 @@ static inline int calcSize(int octave, int layer)
return
(
HAAR_SIZE0
+
HAAR_SIZE_INC
*
layer
)
<<
octave
;
}
namespace
{
const
char
*
noImage2dOption
=
"-D DISABLE_IMAGE2D"
;
}
class
SURF_OCL_Invoker
{
...
...
@@ -100,15 +111,16 @@ public:
void
icvFindMaximaInLayer_gpu
(
const
oclMat
&
det
,
const
oclMat
&
trace
,
oclMat
&
maxPosBuffer
,
oclMat
&
maxCounter
,
int
counterOffset
,
int
octave
,
bool
use_mask
,
int
nLayers
,
int
layer_rows
,
int
layer_cols
);
void
icvInterpolateKeypoint_gpu
(
const
oclMat
&
det
,
const
oclMat
&
maxPosBuffer
,
unsigned
int
maxCounter
,
void
icvInterpolateKeypoint_gpu
(
const
oclMat
&
det
,
const
oclMat
&
maxPosBuffer
,
int
maxCounter
,
oclMat
&
keypoints
,
oclMat
&
counters
,
int
octave
,
int
layer_rows
,
int
maxFeatures
);
void
icvCalcOrientation_gpu
(
const
oclMat
&
keypoints
,
int
nFeatures
);
void
icvSetUpright_gpu
(
const
oclMat
&
keypoints
,
int
nFeatures
);
void
compute_descriptors_gpu
(
const
oclMat
&
descriptors
,
const
oclMat
&
keypoints
,
int
nFeatures
);
// end of kernel callers declarations
SURF_OCL_Invoker
(
SURF_OCL
&
surf
,
const
oclMat
&
img
,
const
oclMat
&
mask
)
:
surf_
(
surf
),
img_cols
(
img
.
cols
),
img_rows
(
img
.
rows
),
...
...
@@ -182,8 +194,8 @@ public:
icvFindMaximaInLayer_gpu
(
surf_
.
det
,
surf_
.
trace
,
surf_
.
maxPosBuffer
,
counters
,
1
+
octave
,
octave
,
use_mask
,
surf_
.
nOctaveLayers
,
layer_rows
,
layer_cols
);
unsigned
int
maxCounter
=
Mat
(
counters
).
at
<
unsigned
int
>
(
1
+
octave
);
maxCounter
=
std
::
min
(
maxCounter
,
static_cast
<
unsigned
int
>
(
maxCandidates
));
int
maxCounter
=
((
Mat
)
counters
).
at
<
int
>
(
1
+
octave
);
maxCounter
=
std
::
min
(
maxCounter
,
static_cast
<
int
>
(
maxCandidates
));
if
(
maxCounter
>
0
)
{
...
...
@@ -191,15 +203,29 @@ public:
keypoints
,
counters
,
octave
,
layer_rows
,
maxFeatures
);
}
}
unsigned
int
featureCounter
=
Mat
(
counters
).
at
<
unsigned
int
>
(
0
);
featureCounter
=
std
::
min
(
featureCounter
,
static_cast
<
unsigned
int
>
(
maxFeatures
));
int
featureCounter
=
Mat
(
counters
).
at
<
int
>
(
0
);
featureCounter
=
std
::
min
(
featureCounter
,
static_cast
<
int
>
(
maxFeatures
));
keypoints
.
cols
=
featureCounter
;
if
(
surf_
.
upright
)
keypoints
.
row
(
SURF_OCL
::
ANGLE_ROW
).
setTo
(
Scalar
::
all
(
90.0
));
{
//keypoints.row(SURF_OCL::ANGLE_ROW).setTo(Scalar::all(90.0));
setUpright
(
keypoints
);
}
else
{
findOrientation
(
keypoints
);
}
}
void
setUpright
(
oclMat
&
keypoints
)
{
const
int
nFeatures
=
keypoints
.
cols
;
if
(
nFeatures
>
0
)
{
icvSetUpright_gpu
(
keypoints
,
keypoints
.
cols
);
}
}
void
findOrientation
(
oclMat
&
keypoints
)
...
...
@@ -484,14 +510,7 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i
divUp
(
max_samples_i
,
localThreads
[
1
])
*
localThreads
[
1
]
*
(
nOctaveLayers
+
2
),
1
};
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
openCLExecuteKernelSURF
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
SURF_OCL_Invoker
::
icvFindMaximaInLayer_gpu
(
const
oclMat
&
det
,
const
oclMat
&
trace
,
oclMat
&
maxPosBuffer
,
oclMat
&
maxCounter
,
int
counterOffset
,
...
...
@@ -537,17 +556,10 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat
1
};
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
openCLExecuteKernelSURF
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
SURF_OCL_Invoker
::
icvInterpolateKeypoint_gpu
(
const
oclMat
&
det
,
const
oclMat
&
maxPosBuffer
,
unsigned
int
maxCounter
,
void
SURF_OCL_Invoker
::
icvInterpolateKeypoint_gpu
(
const
oclMat
&
det
,
const
oclMat
&
maxPosBuffer
,
int
maxCounter
,
oclMat
&
keypoints
,
oclMat
&
counters
,
int
octave
,
int
layer_rows
,
int
maxFeatures
)
{
Context
*
clCxt
=
det
.
clCxt
;
...
...
@@ -569,14 +581,7 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMa
size_t
localThreads
[
3
]
=
{
3
,
3
,
3
};
size_t
globalThreads
[
3
]
=
{
maxCounter
*
localThreads
[
0
],
localThreads
[
1
],
1
};
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
openCLExecuteKernelSURF
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
SURF_OCL_Invoker
::
icvCalcOrientation_gpu
(
const
oclMat
&
keypoints
,
int
nFeatures
)
...
...
@@ -603,16 +608,27 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
size_t
localThreads
[
3
]
=
{
32
,
4
,
1
};
size_t
globalThreads
[
3
]
=
{
nFeatures
*
localThreads
[
0
],
localThreads
[
1
],
1
};
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
openCLExecuteKernelSURF
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
SURF_OCL_Invoker
::
icvSetUpright_gpu
(
const
oclMat
&
keypoints
,
int
nFeatures
)
{
Context
*
clCxt
=
counters
.
clCxt
;
string
kernelName
=
"icvSetUpright"
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
keypoints
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
keypoints
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
nFeatures
));
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
size_t
globalThreads
[
3
]
=
{
nFeatures
,
1
,
1
};
openCLExecuteKernelSURF
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
void
SURF_OCL_Invoker
::
compute_descriptors_gpu
(
const
oclMat
&
descriptors
,
const
oclMat
&
keypoints
,
int
nFeatures
)
{
// compute unnormalized descriptors, then normalize them - odd indexing since grid must be 2D
...
...
@@ -648,14 +664,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
step
));
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
openCLExecuteKernelSURF
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
kernelName
=
"normalize_descriptors64"
;
...
...
@@ -668,14 +678,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args
.
clear
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
descriptors
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
descriptors
.
step
));
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
openCLExecuteKernelSURF
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
...
...
@@ -703,14 +707,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
step
));
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
openCLExecuteKernelSURF
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
kernelName
=
"normalize_descriptors128"
;
...
...
@@ -723,14 +721,8 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args
.
clear
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
descriptors
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
descriptors
.
step
));
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
openCLExecuteKernelSURF
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment