Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
7c1f1d88
Commit
7c1f1d88
authored
Feb 28, 2013
by
yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
fix bug #2787
parent
9ec5333a
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
48 additions
and
29 deletions
+48
-29
nonfree_surf.cl
modules/ocl/src/kernels/nonfree_surf.cl
+48
-29
surf.cpp
modules/ocl/src/surf.cpp
+0
-0
No files found.
modules/ocl/src/kernels/nonfree_surf.cl
View file @
7c1f1d88
...
...
@@ -78,7 +78,7 @@ uchar read_imgTex(IMAGE_INT8 img, sampler_t sam, float2 coord, int rows, int col
//
dynamically
change
the
precision
used
for
floating
type
#
if
defined
(
__ATI__
)
|
| defined (__NVIDIA__)
#
if
defined
DOUBLE_SUPPORT
#
define
F
double
#
else
#
define
F
float
...
...
@@ -299,7 +299,7 @@ __kernel
__global
const
float
*
det,
__global
const
float
*
trace,
__global
int4
*
maxPosBuffer,
volatile __global
unsigned
int* maxCounter,
volatile
__global
int*
maxCounter,
int
counter_offset,
int
det_step,
//
the
step
of
det
in
bytes
int
trace_step,
//
the
step
of
trace
in
bytes
...
...
@@ -408,7 +408,7 @@ __kernel
if
(
condmax
)
{
unsigned
int ind = atomic_inc(maxCounter);
int
ind
=
atomic_inc
(
maxCounter
)
;
if
(
ind
<
c_max_candidates
)
{
...
...
@@ -427,7 +427,7 @@ __kernel
__global
float
*
det,
__global
float
*
trace,
__global
int4
*
maxPosBuffer,
volatile __global
unsigned
int* maxCounter,
volatile
__global
int*
maxCounter,
int
counter_offset,
int
det_step,
//
the
step
of
det
in
bytes
int
trace_step,
//
the
step
of
trace
in
bytes
...
...
@@ -525,7 +525,7 @@ __kernel
if
(
condmax
)
{
unsigned
int ind = atomic_inc(maxCounter);
int
ind
=
atomic_inc
(
maxCounter
)
;
if
(
ind
<
c_max_candidates
)
{
...
...
@@ -585,7 +585,7 @@ __kernel
__global
const
float
*
det,
__global
const
int4
*
maxPosBuffer,
__global
float
*
keypoints,
volatile __global
unsigned
int * featureCounter,
volatile
__global
int
*
featureCounter,
int
det_step,
int
keypoints_step,
int
c_img_rows,
...
...
@@ -684,7 +684,7 @@ __kernel
if
((
c_img_rows
+
1
)
>=
grad_wav_size
&&
(
c_img_cols
+
1
)
>=
grad_wav_size
)
{
//
Get
a
new
feature
index.
unsigned
int ind = atomic_inc(featureCounter);
int
ind
=
atomic_inc
(
featureCounter
)
;
if
(
ind
<
c_max_features
)
{
...
...
@@ -737,19 +737,19 @@ __constant float c_aptW[ORI_SAMPLES] = {0.001455130288377404f, 0.001707611023448
__constant
float
c_NX[2][5]
=
{{0,
0
,
2
,
4
,
-1},
{2,
0
,
4
,
4
,
1}}
;
__constant
float
c_NY[2][5]
=
{{0,
0
,
4
,
2
,
1},
{0,
2
,
4
,
4
,
-1}}
;
void reduce_32_sum(volatile __local float * data,
float
partial_reduction, int tid)
void
reduce_32_sum
(
volatile
__local
float
*
data,
volatile
float*
partial_reduction,
int
tid
)
{
#define op(A, B) (A)+(B)
data[tid] = partial_reduction;
#
define
op
(
A,
B
)
(
*
A
)
+
(
B
)
data[tid]
=
*
partial_reduction
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
data[tid] = partial_reduction = op(partial_reduction, data[tid + 16]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 8 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 4 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 2 ]);
data[tid] = partial_reduction = op(partial_reduction, data[tid + 1 ]);
data[tid]
=
*
partial_reduction
=
op
(
partial_reduction,
data[tid
+
16]
)
;
data[tid]
=
*
partial_reduction
=
op
(
partial_reduction,
data[tid
+
8
]
)
;
data[tid]
=
*
partial_reduction
=
op
(
partial_reduction,
data[tid
+
4
]
)
;
data[tid]
=
*
partial_reduction
=
op
(
partial_reduction,
data[tid
+
2
]
)
;
data[tid]
=
*
partial_reduction
=
op
(
partial_reduction,
data[tid
+
1
]
)
;
}
#
undef
op
}
...
...
@@ -831,7 +831,7 @@ __kernel
{
const int dir = (i * 4 + get_local_id(1)) * ORI_SEARCH_INC;
float sumx = 0.0f, sumy = 0.0f;
volatile
float sumx = 0.0f, sumy = 0.0f;
int d = abs(convert_int_rte(s_angle[get_local_id(0)]) - dir);
if (d < ORI_WIN / 2 || d > 360 - ORI_WIN / 2)
{
...
...
@@ -856,8 +856,8 @@ __kernel
sumx += s_X[get_local_id(0) + 96];
sumy += s_Y[get_local_id(0) + 96];
}
reduce_32_sum(s_sumx + get_local_id(1) * 32, sumx, get_local_id(0));
reduce_32_sum(s_sumy + get_local_id(1) * 32, sumy, get_local_id(0));
reduce_32_sum(s_sumx + get_local_id(1) * 32,
&
sumx, get_local_id(0));
reduce_32_sum(s_sumy + get_local_id(1) * 32,
&
sumy, get_local_id(0));
const float temp_mod = sumx * sumx + sumy * sumy;
if (temp_mod > best_mod)
...
...
@@ -892,14 +892,32 @@ __kernel
kp_dir += 2.0f * CV_PI_F;
kp_dir *= 180.0f / CV_PI_F;
kp_dir = 360.0f - kp_dir;
if (fabs(kp_dir - 360.f) < FLT_EPSILON)
kp_dir = 0.f;
//
kp_dir = 360.0f - kp_dir;
//
if (fabs(kp_dir - 360.f) < FLT_EPSILON)
//
kp_dir = 0.f;
featureDir[get_group_id(0)] = kp_dir;
}
}
__kernel
void icvSetUpright(
__global float * keypoints,
int keypoints_step,
int nFeatures
)
{
keypoints_step /= sizeof(*keypoints);
__global float* featureDir = keypoints + ANGLE_ROW * keypoints_step;
if(get_global_id(0) <= nFeatures)
{
featureDir[get_global_id(0)] = 90.0f;
}
}
#undef ORI_SEARCH_INC
#undef ORI_WIN
#undef ORI_SAMPLES
...
...
@@ -993,10 +1011,7 @@ void calc_dx_dy(
const float centerX = featureX[get_group_id(0)];
const float centerY = featureY[get_group_id(0)];
const float size = featureSize[get_group_id(0)];
float descriptor_dir = 360.0f - featureDir[get_group_id(0)];
if (fabs(descriptor_dir - 360.f) < FLT_EPSILON)
descriptor_dir = 0.f;
descriptor_dir *= (float)(CV_PI_F / 180.0f);
float descriptor_dir = featureDir[get_group_id(0)] * (float)(CV_PI_F / 180.0f);
/* The sampling intervals and wavelet sized for selecting an orientation
and building the keypoint descriptor are defined relative to 's' */
...
...
@@ -1125,11 +1140,15 @@ __kernel
{
sdxabs[tid] = fabs(sdx[tid]); // |dx| array
sdyabs[tid] = fabs(sdy[tid]); // |dy| array
//barrier(CLK_LOCAL_MEM_FENCE);
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25)
{
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
//barrier(CLK_LOCAL_MEM_FENCE);
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25)
{
volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
// write dx, dy, |dx|, |dy|
...
...
modules/ocl/src/surf.cpp
View file @
7c1f1d88
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment