Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
f7b40cdc
Commit
f7b40cdc
authored
Mar 26, 2013
by
peng xiao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add a macro to call additional barrier function on the fly
parent
55c9a7c8
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
180 additions
and
55 deletions
+180
-55
surf.cl
modules/nonfree/src/opencl/surf.cl
+175
-51
surf.ocl.cpp
modules/nonfree/src/surf.ocl.cpp
+1
-1
test_surf.ocl.cpp
modules/nonfree/test/test_surf.ocl.cpp
+4
-3
No files found.
modules/nonfree/src/opencl/surf.cl
View file @
f7b40cdc
...
@@ -747,21 +747,42 @@ void reduce_32_sum(volatile __local float * data, volatile float* partial_reduc
...
@@ -747,21 +747,42 @@ void reduce_32_sum(volatile __local float * data, volatile float* partial_reduc
#
define
op
(
A,
B
)
(
*A
)
+
(
B
)
#
define
op
(
A,
B
)
(
*A
)
+
(
B
)
data[tid]
=
*partial_reduction
;
data[tid]
=
*partial_reduction
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
ifndef
WAVE_SIZE
#
define
WAVE_SIZE
1
#
endif
if
(
tid
<
16
)
if
(
tid
<
16
)
{
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
16]
)
;
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
16]
)
;
#
if
WAVE_SIZE
<
16
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
if
(
tid
<
8
)
{
#
endif
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
8
]
)
;
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
8
]
)
;
#
if
WAVE_SIZE
<
8
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
if
(
tid
<
4
)
{
#
endif
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
4
]
)
;
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
4
]
)
;
#
if
WAVE_SIZE
<
4
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
if
(
tid
<
2
)
{
#
endif
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
2
]
)
;
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
2
]
)
;
#
if
WAVE_SIZE
<
2
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
if
(
tid
<
1
)
{
#
endif
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
1
]
)
;
data[tid]
=
*partial_reduction
=
op
(
partial_reduction,
data[tid
+
1
]
)
;
}
#
undef
WAVE_SIZE
#
undef
op
#
undef
op
}
}
...
@@ -1087,44 +1108,67 @@ void reduce_sum25(
...
@@ -1087,44 +1108,67 @@ void reduce_sum25(
int tid
int tid
)
)
{
{
#ifndef WAVE_SIZE
#define WAVE_SIZE 1
#endif
// first step is to reduce from 25 to 16
// first step is to reduce from 25 to 16
if (tid < 9)
// use 9 threads
if (tid < 9)
{
{
sdata1[tid] += sdata1[tid + 16];
sdata1[tid] += sdata1[tid + 16];
sdata2[tid] += sdata2[tid + 16];
sdata2[tid] += sdata2[tid + 16];
sdata3[tid] += sdata3[tid + 16];
sdata3[tid] += sdata3[tid + 16];
sdata4[tid] += sdata4[tid + 16];
sdata4[tid] += sdata4[tid + 16];
#if WAVE_SIZE < 16
}
}
barrier(CLK_LOCAL_MEM_FENCE);
// sum (reduce) from 16 to 1 (unrolled - aligned to a half-warp)
if (tid < 8)
if (tid < 8)
{
{
#endif
sdata1[tid] += sdata1[tid + 8];
sdata1[tid] += sdata1[tid + 8];
sdata1[tid] += sdata1[tid + 4];
sdata1[tid] += sdata1[tid + 2];
sdata1[tid] += sdata1[tid + 1];
sdata2[tid] += sdata2[tid + 8];
sdata2[tid] += sdata2[tid + 8];
sdata2[tid] += sdata2[tid + 4];
sdata2[tid] += sdata2[tid + 2];
sdata2[tid] += sdata2[tid + 1];
sdata3[tid] += sdata3[tid + 8];
sdata3[tid] += sdata3[tid + 8];
sdata3[tid] += sdata3[tid + 4];
sdata3[tid] += sdata3[tid + 2];
sdata3[tid] += sdata3[tid + 1];
sdata4[tid] += sdata4[tid + 8];
sdata4[tid] += sdata4[tid + 8];
#if WAVE_SIZE < 8
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 4)
{
#endif
sdata1[tid] += sdata1[tid + 4];
sdata2[tid] += sdata2[tid + 4];
sdata3[tid] += sdata3[tid + 4];
sdata4[tid] += sdata4[tid + 4];
sdata4[tid] += sdata4[tid + 4];
#if WAVE_SIZE < 4
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 2)
{
#endif
sdata1[tid] += sdata1[tid + 2];
sdata2[tid] += sdata2[tid + 2];
sdata3[tid] += sdata3[tid + 2];
sdata4[tid] += sdata4[tid + 2];
sdata4[tid] += sdata4[tid + 2];
#if WAVE_SIZE < 2
}
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 1)
{
#endif
sdata1[tid] += sdata1[tid + 1];
sdata2[tid] += sdata2[tid + 1];
sdata3[tid] += sdata3[tid + 1];
sdata4[tid] += sdata4[tid + 1];
sdata4[tid] += sdata4[tid + 1];
}
}
#undef WAVE_SIZE
}
}
__kernel
__kernel
void compute_descriptors64(
void compute_descriptors64(
IMAGE_INT8 imgTex,
IMAGE_INT8 imgTex,
volatile
__global float * descriptors,
__global float * descriptors,
__global const float * keypoints,
__global const float * keypoints,
int descriptors_step,
int descriptors_step,
int keypoints_step,
int keypoints_step,
...
@@ -1158,14 +1202,13 @@ __kernel
...
@@ -1158,14 +1202,13 @@ __kernel
sdyabs[tid] = fabs(sdy[tid]); // |dy| array
sdyabs[tid] = fabs(sdy[tid]); // |dy| array
}
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25)
{
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
}
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25)
if (tid < 25)
{
{
volatile
__global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
__global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
// write dx, dy, |dx|, |dy|
// write dx, dy, |dx|, |dy|
if (tid == 0)
if (tid == 0)
...
@@ -1180,7 +1223,7 @@ __kernel
...
@@ -1180,7 +1223,7 @@ __kernel
__kernel
__kernel
void compute_descriptors128(
void compute_descriptors128(
IMAGE_INT8 imgTex,
IMAGE_INT8 imgTex,
__global
volatile
float * descriptors,
__global float * descriptors,
__global float * keypoints,
__global float * keypoints,
int descriptors_step,
int descriptors_step,
int keypoints_step,
int keypoints_step,
...
@@ -1229,13 +1272,15 @@ __kernel
...
@@ -1229,13 +1272,15 @@ __kernel
sd2[tid] = sdx[tid];
sd2[tid] = sdx[tid];
sdabs2[tid] = fabs(sdx[tid]);
sdabs2[tid] = fabs(sdx[tid]);
}
}
//barrier(CLK_LOCAL_MEM_FENCE);
}
barrier(CLK_LOCAL_MEM_FENCE);
reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
//barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 3);
__global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 3);
if (tid < 25)
{
// write dx (dy >= 0), |dx| (dy >= 0), dx (dy < 0), |dx| (dy < 0)
// write dx (dy >= 0), |dx| (dy >= 0), dx (dy < 0), |dx| (dy < 0)
if (tid == 0)
if (tid == 0)
{
{
...
@@ -1259,11 +1304,14 @@ __kernel
...
@@ -1259,11 +1304,14 @@ __kernel
sd2[tid] = sdy[tid];
sd2[tid] = sdy[tid];
sdabs2[tid] = fabs(sdy[tid]);
sdabs2[tid] = fabs(sdy[tid]);
}
}
//barrier(CLK_LOCAL_MEM_FENCE);
}
barrier(CLK_LOCAL_MEM_FENCE);
reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
//
barrier(CLK_LOCAL_MEM_FENCE);
barrier(CLK_LOCAL_MEM_FENCE);
if (tid < 25)
{
// write dy (dx >= 0), |dy| (dx >= 0), dy (dx < 0), |dy
|
(
dx
<
0
)
// write dy (dx >= 0), |dy| (dx >= 0), dy (dx < 0), |dy
|
(
dx
<
0
)
if
(
tid
==
0
)
if
(
tid
==
0
)
{
{
...
@@ -1274,6 +1322,103 @@ __kernel
...
@@ -1274,6 +1322,103 @@ __kernel
}
}
}
}
}
}
void
reduce_sum128
(
volatile
__local
float*
smem,
int
tid
)
{
#
ifndef
WAVE_SIZE
#
define
WAVE_SIZE
1
#
endif
if
(
tid
<
64
)
{
smem[tid]
+=
smem[tid
+
64]
;
#
if
WAVE_SIZE
<
64
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
32
)
{
#
endif
smem[tid]
+=
smem[tid
+
32]
;
#
if
WAVE_SIZE
<
32
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
#
endif
smem[tid]
+=
smem[tid
+
16]
;
#
if
WAVE_SIZE
<
16
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
#
endif
smem[tid]
+=
smem[tid
+
8]
;
#
if
WAVE_SIZE
<
8
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
{
#
endif
smem[tid]
+=
smem[tid
+
4]
;
#
if
WAVE_SIZE
<
4
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
{
#
endif
smem[tid]
+=
smem[tid
+
2]
;
#
if
WAVE_SIZE
<
2
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
#
endif
smem[tid]
+=
smem[tid
+
1]
;
}
}
void
reduce_sum64
(
volatile
__local
float*
smem,
int
tid
)
{
#
ifndef
WAVE_SIZE
#
define
WAVE_SIZE
1
#
endif
if
(
tid
<
32
)
{
smem[tid]
+=
smem[tid
+
32]
;
#
if
WAVE_SIZE
<
32
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
#
endif
smem[tid]
+=
smem[tid
+
16]
;
#
if
WAVE_SIZE
<
16
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
#
endif
smem[tid]
+=
smem[tid
+
8]
;
#
if
WAVE_SIZE
<
8
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
{
#
endif
smem[tid]
+=
smem[tid
+
4]
;
#
if
WAVE_SIZE
<
4
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
{
#
endif
smem[tid]
+=
smem[tid
+
2]
;
#
if
WAVE_SIZE
<
2
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
#
endif
smem[tid]
+=
smem[tid
+
1]
;
}
}
__kernel
__kernel
void
normalize_descriptors128
(
__global
float
*
descriptors,
int
descriptors_step
)
void
normalize_descriptors128
(
__global
float
*
descriptors,
int
descriptors_step
)
...
@@ -1288,22 +1433,10 @@ __kernel
...
@@ -1288,22 +1433,10 @@ __kernel
sqDesc[get_local_id
(
0
)
]
=
lookup
*
lookup
;
sqDesc[get_local_id
(
0
)
]
=
lookup
*
lookup
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
get_local_id
(
0
)
<
64
)
reduce_sum128
(
sqDesc,
get_local_id
(
0
))
;
sqDesc[get_local_id
(
0
)
]
+=
sqDesc[get_local_id
(
0
)
+
64]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
reduction
to
get
total
if
(
get_local_id
(
0
)
<
32
)
{
volatile
__local
float*
smem
=
sqDesc
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
32]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
16]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
8]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
4]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
2]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
1]
;
}
//
compute
length
(
square
root
)
//
compute
length
(
square
root
)
volatile
__local
float
len
;
volatile
__local
float
len
;
...
@@ -1329,18 +1462,9 @@ __kernel
...
@@ -1329,18 +1462,9 @@ __kernel
sqDesc[get_local_id
(
0
)
]
=
lookup
*
lookup
;
sqDesc[get_local_id
(
0
)
]
=
lookup
*
lookup
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
//
reduction
to
get
total
if
(
get_local_id
(
0
)
<
32
)
reduce_sum64
(
sqDesc,
get_local_id
(
0
))
;
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
volatile
__local
float*
smem
=
sqDesc
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
32]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
16]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
8]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
4]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
2]
;
smem[get_local_id
(
0
)
]
+=
smem[get_local_id
(
0
)
+
1]
;
}
//
compute
length
(
square
root
)
//
compute
length
(
square
root
)
volatile
__local
float
len
;
volatile
__local
float
len
;
...
...
modules/nonfree/src/surf.ocl.cpp
View file @
f7b40cdc
...
@@ -75,7 +75,7 @@ namespace cv
...
@@ -75,7 +75,7 @@ namespace cv
}
}
static
inline
in
t
divUp
(
size_t
total
,
size_t
grain
)
static
inline
size_
t
divUp
(
size_t
total
,
size_t
grain
)
{
{
return
(
total
+
grain
-
1
)
/
grain
;
return
(
total
+
grain
-
1
)
/
grain
;
}
}
...
...
modules/nonfree/test/test_surf.ocl.cpp
View file @
f7b40cdc
...
@@ -144,9 +144,10 @@ PARAM_TEST_CASE(SURF, HessianThreshold, Octaves, OctaveLayers, Extended, Upright
...
@@ -144,9 +144,10 @@ PARAM_TEST_CASE(SURF, HessianThreshold, Octaves, OctaveLayers, Extended, Upright
}
}
};
};
TEST_P
(
SURF
,
D
ISABLED_D
etector
)
TEST_P
(
SURF
,
Detector
)
{
{
cv
::
Mat
image
=
cv
::
imread
(
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"shared/fruits.png"
,
cv
::
IMREAD_GRAYSCALE
);
// the data path should be opencv/samples
cv
::
Mat
image
=
cv
::
imread
(
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"c/fruits.jpg"
,
cv
::
IMREAD_GRAYSCALE
);
ASSERT_FALSE
(
image
.
empty
());
ASSERT_FALSE
(
image
.
empty
());
cv
::
ocl
::
SURF_OCL
surf
;
cv
::
ocl
::
SURF_OCL
surf
;
...
@@ -179,7 +180,7 @@ TEST_P(SURF, DISABLED_Detector)
...
@@ -179,7 +180,7 @@ TEST_P(SURF, DISABLED_Detector)
TEST_P
(
SURF
,
DISABLED_Descriptor
)
TEST_P
(
SURF
,
DISABLED_Descriptor
)
{
{
cv
::
Mat
image
=
cv
::
imread
(
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"
shared/fruits.pn
g"
,
cv
::
IMREAD_GRAYSCALE
);
cv
::
Mat
image
=
cv
::
imread
(
string
(
cvtest
::
TS
::
ptr
()
->
get_data_path
())
+
"
c/fruits.jp
g"
,
cv
::
IMREAD_GRAYSCALE
);
ASSERT_FALSE
(
image
.
empty
());
ASSERT_FALSE
(
image
.
empty
());
cv
::
ocl
::
SURF_OCL
surf
;
cv
::
ocl
::
SURF_OCL
surf
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment