Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
b836e3c6
Commit
b836e3c6
authored
Feb 27, 2013
by
Andrey Kamaev
Committed by
OpenCV Buildbot
Feb 27, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #556 from bitwangyaoyao:2.4_SURF
parents
f03c04c1
9ec5333a
Show whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
290 additions
and
163 deletions
+290
-163
nonfree_surf.cl
modules/ocl/src/kernels/nonfree_surf.cl
+123
-61
mcwutil.cpp
modules/ocl/src/mcwutil.cpp
+30
-0
mcwutil.hpp
modules/ocl/src/mcwutil.hpp
+4
-0
pyrlk.cpp
modules/ocl/src/pyrlk.cpp
+13
-14
surf.cpp
modules/ocl/src/surf.cpp
+120
-88
No files found.
modules/ocl/src/kernels/nonfree_surf.cl
View file @
b836e3c6
...
...
@@ -43,10 +43,39 @@
//
//M*/
#
pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
#
pragma
OPENCL
EXTENSION
cl_khr_global_int32_base_atomics
:
enable
#
pragma
OPENCL
EXTENSION
cl_khr_local_int32_base_atomics
:
enable
//
specialized
for
non-image2d_t
supported
platform,
intel
HD4000,
for
example
#
ifdef
DISABLE_IMAGE2D
#
define
IMAGE_INT32
__global
uint
*
#
define
IMAGE_INT8
__global
uchar
*
#
else
#
define
IMAGE_INT32
image2d_t
#
define
IMAGE_INT8
image2d_t
#
endif
uint
read_sumTex
(
IMAGE_INT32
img,
sampler_t
sam,
int2
coord,
int
rows,
int
cols,
int
elemPerRow
)
{
#
ifdef
DISABLE_IMAGE2D
int
x
=
clamp
(
coord.x,
0
,
cols
)
;
int
y
=
clamp
(
coord.y,
0
,
rows
)
;
return
img[elemPerRow
*
y
+
x]
;
#
else
return
read_imageui
(
img,
sam,
coord
)
.
x
;
#
endif
}
uchar
read_imgTex
(
IMAGE_INT8
img,
sampler_t
sam,
float2
coord,
int
rows,
int
cols,
int
elemPerRow
)
{
#
ifdef
DISABLE_IMAGE2D
int
x
=
clamp
(
convert_int_rte
(
coord.x
)
,
0
,
cols
-
1
)
;
int
y
=
clamp
(
convert_int_rte
(
coord.y
)
,
0
,
rows
-
1
)
;
return
img[elemPerRow
*
y
+
x]
;
#
else
return
(
uchar
)
read_imageui
(
img,
sam,
coord
)
.
x
;
#
endif
}
//
dynamically
change
the
precision
used
for
floating
type
#
if
defined
(
__ATI__
)
|
| defined (__NVIDIA__)
...
...
@@ -58,14 +87,24 @@
// Image read mode
__constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE |
CLK_ADDRESS_CLAMP_TO_EDGE
| CLK_FILTER_NEAREST;
#ifndef FLT_EPSILON
#define FLT_EPSILON (1e-15)
#
define CV_PI_F 3.14159265
f
#
endi
f
#ifndef CV_PI_F
#define CV_PI_F 3.14159265f
#endif
// Use integral image to calculate haar wavelets.
// N = 2
// for simple haar paatern
float icvCalcHaarPatternSum_2(image2d_t sumTex, __constant float src[2][5], int oldSize, int newSize, int y, int x)
float icvCalcHaarPatternSum_2(
IMAGE_INT32 sumTex,
__constant float src[2][5],
int oldSize,
int newSize,
int y, int x,
int rows, int cols, int elemPerRow)
{
float ratio = (float)newSize / oldSize;
...
...
@@ -81,11 +120,10 @@ float icvCalcHaarPatternSum_2(image2d_t sumTex, __constant float src[2][5], int
int dy2 = convert_int_rte(ratio * src[k][3]);
F t = 0;
t += read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy1)).x;
t -= read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy2)).x;
t -= read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy1)).x;
t += read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy2)).x;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy1), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy2), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy1), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy2), rows, cols, elemPerRow );
d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1));
}
...
...
@@ -93,7 +131,13 @@ float icvCalcHaarPatternSum_2(image2d_t sumTex, __constant float src[2][5], int
}
// N = 3
float icvCalcHaarPatternSum_3(image2d_t sumTex, __constant float src[3][5], int oldSize, int newSize, int y, int x)
float icvCalcHaarPatternSum_3(
IMAGE_INT32 sumTex,
__constant float src[2][5],
int oldSize,
int newSize,
int y, int x,
int rows, int cols, int elemPerRow)
{
float ratio = (float)newSize / oldSize;
...
...
@@ -109,11 +153,10 @@ float icvCalcHaarPatternSum_3(image2d_t sumTex, __constant float src[3][5], int
int dy2 = convert_int_rte(ratio * src[k][3]);
F t = 0;
t += read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy1)).x;
t -= read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy2)).x;
t -= read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy1)).x;
t += read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy2)).x;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy1), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy2), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy1), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy2), rows, cols, elemPerRow );
d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1));
}
...
...
@@ -121,7 +164,13 @@ float icvCalcHaarPatternSum_3(image2d_t sumTex, __constant float src[3][5], int
}
// N = 4
float icvCalcHaarPatternSum_4(image2d_t sumTex, __constant float src[4][5], int oldSize, int newSize, int y, int x)
float icvCalcHaarPatternSum_4(
IMAGE_INT32 sumTex,
__constant float src[2][5],
int oldSize,
int newSize,
int y, int x,
int rows, int cols, int elemPerRow)
{
float ratio = (float)newSize / oldSize;
...
...
@@ -137,11 +186,10 @@ float icvCalcHaarPatternSum_4(image2d_t sumTex, __constant float src[4][5], int
int dy2 = convert_int_rte(ratio * src[k][3]);
F t = 0;
t += read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy1)).x;
t -= read_imageui(sumTex, sampler, (int2)(x + dx1, y + dy2)).x;
t -= read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy1)).x;
t += read_imageui(sumTex, sampler, (int2)(x + dx2, y + dy2)).x;
t += read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy1), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx1, y + dy2), rows, cols, elemPerRow );
t -= read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy1), rows, cols, elemPerRow );
t += read_sumTex( sumTex, sampler, (int2)(x + dx2, y + dy2), rows, cols, elemPerRow );
d += t * src[k][4] / ((dx2 - dx1) * (dy2 - dy1));
}
...
...
@@ -172,7 +220,7 @@ __inline int calcSize(int octave, int layer)
//calculate targeted layer per-pixel determinant and trace with an integral image
__kernel void icvCalcLayerDetAndTrace(
image2d_t
sumTex, // input integral image
IMAGE_INT32
sumTex, // input integral image
__global float * det, // output Determinant
__global float * trace, // output trace
int det_step, // the step of det in bytes
...
...
@@ -181,11 +229,13 @@ __kernel void icvCalcLayerDetAndTrace(
int c_img_cols,
int c_nOctaveLayers,
int c_octave,
int c_layer_rows
int c_layer_rows,
int sumTex_step
)
{
det_step /= sizeof(*det);
trace_step /= sizeof(*trace);
sumTex_step/= sizeof(uint);
// Determine the indices
const int gridDim_y = get_num_groups(1) / (c_nOctaveLayers + 2);
const int blockIdx_y = get_group_id(1) % gridDim_y;
...
...
@@ -205,9 +255,9 @@ __kernel void icvCalcLayerDetAndTrace(
if (size <= c_img_rows && size <= c_img_cols && i < samples_i && j < samples_j)
{
const float dx = icvCalcHaarPatternSum_3(sumTex, c_DX , 9, size, i << c_octave, j << c_octave);
const float dy = icvCalcHaarPatternSum_3(sumTex, c_DY , 9, size, i << c_octave, j << c_octave);
const float dxy = icvCalcHaarPatternSum_4(sumTex, c_DXY, 9, size, i << c_octave, j << c_octave);
const float dx = icvCalcHaarPatternSum_3(sumTex, c_DX , 9, size, i << c_octave, j << c_octave
, c_img_rows, c_img_cols, sumTex_step
);
const float dy = icvCalcHaarPatternSum_3(sumTex, c_DY , 9, size, i << c_octave, j << c_octave
, c_img_rows, c_img_cols, sumTex_step
);
const float dxy = icvCalcHaarPatternSum_4(sumTex, c_DXY, 9, size, i << c_octave, j << c_octave
, c_img_rows, c_img_cols, sumTex_step
);
det [j + margin + det_step * (layer * c_layer_rows + i + margin)] = dx * dy - 0.81f * dxy * dxy;
trace[j + margin + trace_step * (layer * c_layer_rows + i + margin)] = dx + dy;
...
...
@@ -220,7 +270,7 @@ __kernel void icvCalcLayerDetAndTrace(
__constant float c_DM[5] = {0, 0, 9, 9, 1};
bool within_check(
image2d_t maskSumTex, int sum_i, int sum_j, int size
)
bool within_check(
IMAGE_INT32 maskSumTex, int sum_i, int sum_j, int size, int rows, int cols, int step
)
{
float ratio = (float)size / 9.0f;
...
...
@@ -233,10 +283,10 @@ bool within_check(image2d_t maskSumTex, int sum_i, int sum_j, int size)
float t = 0;
t += read_
imageui(maskSumTex, sampler, (int2)(sum_j + dx1, sum_i + dy1)).x
;
t -= read_
imageui(maskSumTex, sampler, (int2)(sum_j + dx1, sum_i + dy2)).x
;
t -= read_
imageui(maskSumTex, sampler, (int2)(sum_j + dx2, sum_i + dy1)).x
;
t += read_
imageui(maskSumTex, sampler, (int2)(sum_j + dx2, sum_i + dy2)).x
;
t += read_
sumTex(maskSumTex, sampler, (int2)(sum_j + dx1, sum_i + dy1), rows, cols, step)
;
t -= read_
sumTex(maskSumTex, sampler, (int2)(sum_j + dx1, sum_i + dy2), rows, cols, step)
;
t -= read_
sumTex(maskSumTex, sampler, (int2)(sum_j + dx2, sum_i + dy1), rows, cols, step)
;
t += read_
sumTex(maskSumTex, sampler, (int2)(sum_j + dx2, sum_i + dy2), rows, cols, step)
;
d += t * c_DM[4] / ((dx2 - dx1) * (dy2 - dy1));
...
...
@@ -261,7 +311,8 @@ __kernel
int c_layer_cols,
int c_max_candidates,
float c_hessianThreshold,
image2d_t maskSumTex
IMAGE_INT32 maskSumTex,
int mask_step
)
{
volatile __local float N9[768]; // threads.x * threads.y * 3
...
...
@@ -269,6 +320,7 @@ __kernel
det_step /= sizeof(*det);
trace_step /= sizeof(*trace);
maxCounter += counter_offset;
mask_step /= sizeof(uint);
// Determine the indices
const int gridDim_y = get_num_groups(1) / c_nOctaveLayers;
...
...
@@ -321,7 +373,7 @@ __kernel
const int sum_i = (i - ((size >> 1) >> c_octave)) << c_octave;
const int sum_j = (j - ((size >> 1) >> c_octave)) << c_octave;
if (within_check(maskSumTex, sum_i, sum_j, size))
if (within_check(maskSumTex, sum_i, sum_j, size
, c_img_rows, c_img_cols, mask_step
))
{
// Check to see if we have a max (in its 26 neighbours)
const bool condmax = val0 > N9[localLin - 1 - get_local_size(0) - zoff]
...
...
@@ -704,14 +756,16 @@ void reduce_32_sum(volatile __local float * data, float partial_reduction, int
__kernel
void icvCalcOrientation(
image2d_t
sumTex,
IMAGE_INT32
sumTex,
__global float * keypoints,
int keypoints_step,
int c_img_rows,
int c_img_cols
int c_img_cols,
int sum_step
)
{
keypoints_step /= sizeof(*keypoints);
sum_step /= sizeof(uint);
__global float* featureX = keypoints + X_ROW * keypoints_step;
__global float* featureY = keypoints + Y_ROW * keypoints_step;
__global float* featureSize = keypoints + SIZE_ROW * keypoints_step;
...
...
@@ -754,8 +808,8 @@ __kernel
if (y >= 0 && y < (c_img_rows + 1) - grad_wav_size &&
x >= 0 && x < (c_img_cols + 1) - grad_wav_size)
{
X = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NX, 4, grad_wav_size, y, x);
Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x);
X = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NX, 4, grad_wav_size, y, x
, c_img_rows, c_img_cols, sum_step
);
Y = c_aptW[tid] * icvCalcHaarPatternSum_2(sumTex, c_NY, 4, grad_wav_size, y, x
, c_img_rows, c_img_cols, sum_step
);
angle = atan2(Y, X);
...
...
@@ -881,20 +935,20 @@ __constant float c_DW[PATCH_SZ * PATCH_SZ] =
// utility for linear filter
inline uchar readerGet(
image2d_t src,
IMAGE_INT8 src,
const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir,
int i, int j
int i, int j
, int rows, int cols, int elemPerRow
)
{
float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;
float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
return
(uchar)read_imageui(src, sampler, (float2)(pixel_x, pixel_y)).x
;
return
read_imgTex(src, sampler, (float2)(pixel_x, pixel_y), rows, cols, elemPerRow)
;
}
inline float linearFilter(
image2d_t src,
IMAGE_INT8 src,
const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir,
float y, float x
float y, float x
, int rows, int cols, int elemPerRow
)
{
x -= 0.5f;
...
...
@@ -907,30 +961,33 @@ inline float linearFilter(
const int x2 = x1 + 1;
const int y2 = y1 + 1;
uchar src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x1);
uchar src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x1
, rows, cols, elemPerRow
);
out = out + src_reg * ((x2 - x) * (y2 - y));
src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x2);
src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x2
, rows, cols, elemPerRow
);
out = out + src_reg * ((x - x1) * (y2 - y));
src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x1);
src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x1
, rows, cols, elemPerRow
);
out = out + src_reg * ((x2 - x) * (y - y1));
src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x2);
src_reg = readerGet(src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x2
, rows, cols, elemPerRow
);
out = out + src_reg * ((x - x1) * (y - y1));
return out;
}
void calc_dx_dy(
image2d_t
imgTex,
IMAGE_INT8
imgTex,
volatile __local float s_dx_bin[25],
volatile __local float s_dy_bin[25],
volatile __local float s_PATCH[6][6],
__global const float* featureX,
__global const float* featureY,
__global const float* featureSize,
__global const float* featureDir
__global const float* featureDir,
int rows,
int cols,
int elemPerRow
)
{
const float centerX = featureX[get_group_id(0)];
...
...
@@ -965,7 +1022,7 @@ void calc_dx_dy(
const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size;
const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size;
s_PATCH[get_local_id(1)][get_local_id(0)] = linearFilter(imgTex, centerX, centerY, win_offset, cos_dir, sin_dir, icoo, jcoo);
s_PATCH[get_local_id(1)][get_local_id(0)] = linearFilter(imgTex, centerX, centerY, win_offset, cos_dir, sin_dir, icoo, jcoo
, rows, cols, elemPerRow
);
barrier(CLK_LOCAL_MEM_FENCE);
...
...
@@ -1035,16 +1092,18 @@ void reduce_sum25(
__kernel
void compute_descriptors64(
image2d_t
imgTex,
IMAGE_INT8
imgTex,
volatile __global float * descriptors,
__global const float * keypoints,
int descriptors_step,
int keypoints_step
int keypoints_step,
int rows,
int cols,
int img_step
)
{
descriptors_step /= sizeof(float);
keypoints_step /= sizeof(float);
__global const float * featureX = keypoints + X_ROW * keypoints_step;
__global const float * featureY = keypoints + Y_ROW * keypoints_step;
__global const float * featureSize = keypoints + SIZE_ROW * keypoints_step;
...
...
@@ -1057,7 +1116,7 @@ __kernel
volatile __local float sdyabs[25];
volatile __local float s_PATCH[6][6];
calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir);
calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir
, rows, cols, img_step
);
barrier(CLK_LOCAL_MEM_FENCE);
const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
...
...
@@ -1066,10 +1125,10 @@ __kernel
{
sdxabs[tid] = fabs(sdx[tid]); // |dx| array
sdyabs[tid] = fabs(sdy[tid]); // |dy| array
barrier(CLK_LOCAL_MEM_FENCE);
//
barrier(CLK_LOCAL_MEM_FENCE);
reduce_sum25(sdx, sdy, sdxabs, sdyabs, tid);
barrier(CLK_LOCAL_MEM_FENCE);
//
barrier(CLK_LOCAL_MEM_FENCE);
volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 2);
...
...
@@ -1085,11 +1144,14 @@ __kernel
}
__kernel
void compute_descriptors128(
image2d_t
imgTex,
IMAGE_INT8
imgTex,
__global volatile float * descriptors,
__global float * keypoints,
int descriptors_step,
int keypoints_step
int keypoints_step,
int rows,
int cols,
int img_step
)
{
descriptors_step /= sizeof(*descriptors);
...
...
@@ -1111,7 +1173,7 @@ __kernel
volatile __local float sdabs2[25];
volatile __local float s_PATCH[6][6];
calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir);
calc_dx_dy(imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir
, rows, cols, img_step
);
barrier(CLK_LOCAL_MEM_FENCE);
const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
...
...
@@ -1132,10 +1194,10 @@ __kernel
sd2[tid] = sdx[tid];
sdabs2[tid] = fabs(sdx[tid]);
}
barrier(CLK_LOCAL_MEM_FENCE);
//
barrier(CLK_LOCAL_MEM_FENCE);
reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
barrier(CLK_LOCAL_MEM_FENCE);
//
barrier(CLK_LOCAL_MEM_FENCE);
volatile __global float* descriptors_block = descriptors + descriptors_step * get_group_id(0) + (get_group_id(1) << 3);
...
...
@@ -1162,10 +1224,10 @@ __kernel
sd2[tid] = sdy[tid];
sdabs2[tid] = fabs(sdy[tid]);
}
barrier(CLK_LOCAL_MEM_FENCE);
//
barrier(CLK_LOCAL_MEM_FENCE);
reduce_sum25(sd1, sd2, sdabs1, sdabs2, tid);
barrier(CLK_LOCAL_MEM_FENCE);
//
barrier(CLK_LOCAL_MEM_FENCE);
// write dy (dx >= 0), |dy| (dx >= 0), dy (dx < 0), |dy
|
(
dx
<
0
)
if
(
tid
==
0
)
...
...
modules/ocl/src/mcwutil.cpp
View file @
b836e3c6
...
...
@@ -221,6 +221,36 @@ namespace cv
{
openCLFree
(
texture
);
}
bool
support_image2d
(
Context
*
clCxt
)
{
static
const
char
*
_kernel_string
=
"__kernel void test_func(image2d_t img) {}"
;
static
bool
_isTested
=
false
;
static
bool
_support
=
false
;
if
(
_isTested
)
{
return
_support
;
}
try
{
cv
::
ocl
::
openCLGetKernelFromSource
(
clCxt
,
&
_kernel_string
,
"test_func"
);
_support
=
true
;
}
catch
(
const
cv
::
Exception
&
e
)
{
if
(
e
.
code
==
-
217
)
{
_support
=
false
;
}
else
{
// throw e once again
throw
e
;
}
}
_isTested
=
true
;
return
_support
;
}
}
//namespace ocl
}
//namespace cv
...
...
modules/ocl/src/mcwutil.hpp
View file @
b836e3c6
...
...
@@ -70,6 +70,10 @@ namespace cv
// 2. for faster clamping, there is no buffer padding for the constructed texture
cl_mem
bindTexture
(
const
oclMat
&
mat
);
void
releaseTexture
(
cl_mem
&
texture
);
// returns whether the current context supports image2d_t format or not
bool
support_image2d
(
Context
*
clCxt
=
Context
::
getContext
());
}
//namespace ocl
}
//namespace cv
...
...
modules/ocl/src/pyrlk.cpp
View file @
b836e3c6
...
...
@@ -574,8 +574,9 @@ static void lkSparse_run(oclMat &I, oclMat &J,
Context
*
clCxt
=
I
.
clCxt
;
int
elemCntPerRow
=
I
.
step
/
I
.
elemSize
();
string
kernelName
=
"lkSparse"
;
size_t
localThreads
[
3
]
=
{
8
,
8
,
1
};
size_t
globalThreads
[
3
]
=
{
8
*
ptcount
,
8
,
1
};
bool
isImageSupported
=
support_image2d
();
size_t
localThreads
[
3
]
=
{
8
,
isImageSupported
?
8
:
32
,
1
};
size_t
globalThreads
[
3
]
=
{
8
*
ptcount
,
isImageSupported
?
8
:
32
,
1
};
int
cn
=
I
.
oclchannels
();
char
calcErr
;
if
(
level
==
0
)
...
...
@@ -588,8 +589,9 @@ static void lkSparse_run(oclMat &I, oclMat &J,
}
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
cl_mem
ITex
=
bindTexture
(
I
);
cl_mem
JTex
=
bindTexture
(
J
);
cl_mem
ITex
=
isImageSupported
?
bindTexture
(
I
)
:
(
cl_mem
)
I
.
data
;
cl_mem
JTex
=
isImageSupported
?
bindTexture
(
J
)
:
(
cl_mem
)
J
.
data
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
ITex
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
JTex
));
...
...
@@ -602,6 +604,8 @@ static void lkSparse_run(oclMat &I, oclMat &J,
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
level
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
I
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
I
.
cols
));
if
(
!
isImageSupported
)
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
elemCntPerRow
)
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
patch
.
x
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
patch
.
y
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
cn
));
...
...
@@ -610,19 +614,14 @@ static void lkSparse_run(oclMat &I, oclMat &J,
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
iters
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_char
),
(
void
*
)
&
calcErr
));
try
if
(
isImageSupported
)
{
openCLExecuteKernel2
(
clCxt
,
&
pyrlk
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
(),
CLFLUSH
);
}
catch
(
Exception
&
)
{
printf
(
"Warning: The image2d_t is not supported by the device. Using alternative method!
\n
"
);
releaseTexture
(
ITex
);
releaseTexture
(
JTex
);
ITex
=
(
cl_mem
)
I
.
data
;
JTex
=
(
cl_mem
)
J
.
data
;
localThreads
[
1
]
=
globalThreads
[
1
]
=
32
;
args
.
insert
(
args
.
begin
()
+
11
,
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
elemCntPerRow
)
);
}
else
{
openCLExecuteKernel2
(
clCxt
,
&
pyrlk_no_image
,
kernelName
,
globalThreads
,
localThreads
,
args
,
I
.
oclchannels
(),
I
.
depth
(),
CLFLUSH
);
}
}
...
...
@@ -724,7 +723,7 @@ static void lkDense_run(oclMat &I, oclMat &J, oclMat &u, oclMat &v,
oclMat
&
prevU
,
oclMat
&
prevV
,
oclMat
*
err
,
Size
winSize
,
int
iters
)
{
Context
*
clCxt
=
I
.
clCxt
;
bool
isImageSupported
=
clCxt
->
impl
->
devName
.
find
(
"Intel(R) HD Graphics"
)
==
string
::
npos
;
bool
isImageSupported
=
support_image2d
()
;
int
elemCntPerRow
=
I
.
step
/
I
.
elemSize
();
string
kernelName
=
"lkDense"
;
...
...
modules/ocl/src/surf.cpp
View file @
b836e3c6
/*M///////////////////////////////////////////////////////////////////////////////////////
/*M///////////////////////////////////////////////////////////////////////////////////////
//
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
...
...
@@ -44,6 +44,7 @@
//M*/
#include <iomanip>
#include "precomp.hpp"
#include "mcwutil.hpp"
//#include "opencv2/highgui/highgui.hpp"
using
namespace
cv
;
...
...
@@ -71,7 +72,7 @@ static inline int calcSize(int octave, int layer)
/* Wavelet size increment between layers. This should be an even number,
such that the wavelet sizes in an octave are either all even or all odd.
This ensures that when looking for the neighbo
u
rs of a sample, the layers
This ensures that when looking for the neighbors of a sample, the layers
above and below are aligned correctly. */
const
int
HAAR_SIZE_INC
=
6
;
...
...
@@ -79,6 +80,11 @@ static inline int calcSize(int octave, int layer)
return
(
HAAR_SIZE0
+
HAAR_SIZE_INC
*
layer
)
<<
octave
;
}
namespace
{
const
char
*
noImage2dOption
=
"-D DISABLE_IMAGE2D"
;
}
class
SURF_OCL_Invoker
{
public
:
...
...
@@ -88,7 +94,7 @@ public:
//void loadGlobalConstants(int maxCandidates, int maxFeatures, int img_rows, int img_cols, int nOctaveLayers, float hessianThreshold);
//void loadOctaveConstants(int octave, int layer_rows, int layer_cols);
// kernel callers decl
e
arations
// kernel callers declarations
void
icvCalcLayerDetAndTrace_gpu
(
oclMat
&
det
,
oclMat
&
trace
,
int
octave
,
int
nOctaveLayers
,
int
layer_rows
);
void
icvFindMaximaInLayer_gpu
(
const
oclMat
&
det
,
const
oclMat
&
trace
,
oclMat
&
maxPosBuffer
,
oclMat
&
maxCounter
,
int
counterOffset
,
...
...
@@ -100,14 +106,14 @@ public:
void
icvCalcOrientation_gpu
(
const
oclMat
&
keypoints
,
int
nFeatures
);
void
compute_descriptors_gpu
(
const
oclMat
&
descriptors
,
const
oclMat
&
keypoints
,
int
nFeatures
);
// end of kernel callers decl
e
arations
// end of kernel callers declarations
SURF_OCL_Invoker
(
SURF_OCL
&
surf
,
const
oclMat
&
img
,
const
oclMat
&
mask
)
:
surf_
(
surf
),
img_cols
(
img
.
cols
),
img_rows
(
img
.
rows
),
use_mask
(
!
mask
.
empty
()),
imgTex
(
NULL
),
sumTex
(
NULL
),
maskSumTex
(
NULL
)
use_mask
(
!
mask
.
empty
()),
counters
(
oclMat
()),
imgTex
(
NULL
),
sumTex
(
NULL
),
maskSumTex
(
NULL
)
,
_img
(
img
)
{
CV_Assert
(
!
img
.
empty
()
&&
img
.
type
()
==
CV_8UC1
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
size
()
==
img
.
size
()
&&
mask
.
type
()
==
CV_8UC1
));
...
...
@@ -131,12 +137,13 @@ public:
counters
.
create
(
1
,
surf_
.
nOctaves
+
1
,
CV_32SC1
);
counters
.
setTo
(
Scalar
::
all
(
0
));
//loadGlobalConstants(maxCandidates, maxFeatures, img_rows, img_cols, surf_.nOctaveLayers, static_cast<float>(surf_.hessianThreshold));
integral
(
img
,
surf_
.
sum
);
if
(
support_image2d
())
{
bindImgTex
(
img
,
imgTex
);
integral
(
img
,
surf_
.
sum
);
// the two argumented integral version is incorrect
bindImgTex
(
surf_
.
sum
,
sumTex
);
}
maskSumTex
=
0
;
if
(
use_mask
)
...
...
@@ -155,7 +162,7 @@ public:
void
detectKeypoints
(
oclMat
&
keypoints
)
{
// create image pyramid buffers
// different layers have same sized buffers, but they are sampled from
gaussi
n kernel.
// different layers have same sized buffers, but they are sampled from
Gaussia
n kernel.
ensureSizeIsEnough
(
img_rows
*
(
surf_
.
nOctaveLayers
+
2
),
img_cols
,
CV_32FC1
,
surf_
.
det
);
ensureSizeIsEnough
(
img_rows
*
(
surf_
.
nOctaveLayers
+
2
),
img_cols
,
CV_32FC1
,
surf_
.
trace
);
...
...
@@ -222,7 +229,6 @@ public:
openCLFree
(
sumTex
);
if
(
maskSumTex
)
openCLFree
(
maskSumTex
);
additioalParamBuffer
.
release
();
}
private
:
...
...
@@ -242,7 +248,7 @@ private:
cl_mem
sumTex
;
cl_mem
maskSumTex
;
oclMat
additioalParamBuffer
;
const
oclMat
_img
;
// make a copy for non-image2d_t supported platform
SURF_OCL_Invoker
&
operator
=
(
const
SURF_OCL_Invoker
&
right
)
{
...
...
@@ -362,11 +368,6 @@ void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat
{
if
(
!
img
.
empty
())
{
if
(
img
.
clCxt
->
impl
->
devName
.
find
(
"Intel(R) HD Graphics"
)
!=
string
::
npos
)
{
cout
<<
" Intel HD GPU device unsupported "
<<
endl
;
return
;
}
SURF_OCL_Invoker
surf
(
*
this
,
img
,
mask
);
surf
.
detectKeypoints
(
keypoints
);
...
...
@@ -378,11 +379,6 @@ void cv::ocl::SURF_OCL::operator()(const oclMat &img, const oclMat &mask, oclMat
{
if
(
!
img
.
empty
())
{
if
(
img
.
clCxt
->
impl
->
devName
.
find
(
"Intel(R) HD Graphics"
)
!=
string
::
npos
)
{
cout
<<
" Intel HD GPU device unsupported "
<<
endl
;
return
;
}
SURF_OCL_Invoker
surf
(
*
this
,
img
,
mask
);
if
(
!
useProvidedKeypoints
)
...
...
@@ -443,74 +439,11 @@ void cv::ocl::SURF_OCL::releaseMemory()
// bind source buffer to image oject.
void
SURF_OCL_Invoker
::
bindImgTex
(
const
oclMat
&
img
,
cl_mem
&
texture
)
{
cl_image_format
format
;
int
err
;
int
depth
=
img
.
depth
();
int
channels
=
img
.
channels
();
switch
(
depth
)
{
case
CV_8U
:
format
.
image_channel_data_type
=
CL_UNSIGNED_INT8
;
break
;
case
CV_32S
:
format
.
image_channel_data_type
=
CL_UNSIGNED_INT32
;
break
;
case
CV_32F
:
format
.
image_channel_data_type
=
CL_FLOAT
;
break
;
default
:
throw
std
::
exception
();
break
;
}
switch
(
channels
)
{
case
1
:
format
.
image_channel_order
=
CL_R
;
break
;
case
3
:
format
.
image_channel_order
=
CL_RGB
;
break
;
case
4
:
format
.
image_channel_order
=
CL_RGBA
;
break
;
default
:
throw
std
::
exception
();
break
;
}
if
(
texture
)
{
openCLFree
(
texture
);
}
#ifdef CL_VERSION_1_2
cl_image_desc
desc
;
desc
.
image_type
=
CL_MEM_OBJECT_IMAGE2D
;
desc
.
image_width
=
img
.
step
/
img
.
elemSize
();
desc
.
image_height
=
img
.
rows
;
desc
.
image_depth
=
0
;
desc
.
image_array_size
=
1
;
desc
.
image_row_pitch
=
0
;
desc
.
image_slice_pitch
=
0
;
desc
.
buffer
=
NULL
;
desc
.
num_mip_levels
=
0
;
desc
.
num_samples
=
0
;
texture
=
clCreateImage
(
Context
::
getContext
()
->
impl
->
clContext
,
CL_MEM_READ_WRITE
,
&
format
,
&
desc
,
NULL
,
&
err
);
#else
texture
=
clCreateImage2D
(
Context
::
getContext
()
->
impl
->
clContext
,
CL_MEM_READ_WRITE
,
&
format
,
img
.
step
/
img
.
elemSize
(),
img
.
rows
,
0
,
NULL
,
&
err
);
#endif
size_t
origin
[]
=
{
0
,
0
,
0
};
size_t
region
[]
=
{
img
.
step
/
img
.
elemSize
(),
img
.
rows
,
1
};
clEnqueueCopyBufferToImage
(
img
.
clCxt
->
impl
->
clCmdQueue
,
(
cl_mem
)
img
.
data
,
texture
,
0
,
origin
,
region
,
0
,
NULL
,
0
);
openCLSafeCall
(
err
);
texture
=
bindTexture
(
img
);
}
////////////////////////////
...
...
@@ -525,7 +458,14 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i
string
kernelName
=
"icvCalcLayerDetAndTrace"
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
if
(
sumTex
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
sumTex
));
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
surf_
.
sum
.
data
));
// if image2d is not supported
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
det
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
trace
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
det
.
step
));
...
...
@@ -535,6 +475,7 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
nOctaveLayers
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
octave
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
c_layer_rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
surf_
.
sum
.
step
));
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
]
=
...
...
@@ -543,7 +484,14 @@ void SURF_OCL_Invoker::icvCalcLayerDetAndTrace_gpu(oclMat &det, oclMat &trace, i
divUp
(
max_samples_i
,
localThreads
[
1
])
*
localThreads
[
1
]
*
(
nOctaveLayers
+
2
),
1
};
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
}
void
SURF_OCL_Invoker
::
icvFindMaximaInLayer_gpu
(
const
oclMat
&
det
,
const
oclMat
&
trace
,
oclMat
&
maxPosBuffer
,
oclMat
&
maxCounter
,
int
counterOffset
,
...
...
@@ -572,17 +520,31 @@ void SURF_OCL_Invoker::icvFindMaximaInLayer_gpu(const oclMat &det, const oclMat
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
surf_
.
hessianThreshold
));
if
(
use_mask
)
{
if
(
maskSumTex
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
maskSumTex
));
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
surf_
.
maskSum
.
data
));
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
surf_
.
maskSum
.
step
));
}
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
size_t
globalThreads
[
3
]
=
{
divUp
(
layer_cols
-
2
*
min_margin
,
localThreads
[
0
]
-
2
)
*
localThreads
[
0
],
divUp
(
layer_rows
-
2
*
min_margin
,
localThreads
[
1
]
-
2
)
*
nLayers
*
localThreads
[
1
],
1
};
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
}
void
SURF_OCL_Invoker
::
icvInterpolateKeypoint_gpu
(
const
oclMat
&
det
,
const
oclMat
&
maxPosBuffer
,
unsigned
int
maxCounter
,
...
...
@@ -607,7 +569,14 @@ void SURF_OCL_Invoker::icvInterpolateKeypoint_gpu(const oclMat &det, const oclMa
size_t
localThreads
[
3
]
=
{
3
,
3
,
3
};
size_t
globalThreads
[
3
]
=
{
maxCounter
*
localThreads
[
0
],
localThreads
[
1
],
1
};
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
}
void
SURF_OCL_Invoker
::
icvCalcOrientation_gpu
(
const
oclMat
&
keypoints
,
int
nFeatures
)
...
...
@@ -617,16 +586,31 @@ void SURF_OCL_Invoker::icvCalcOrientation_gpu(const oclMat &keypoints, int nFeat
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
if
(
sumTex
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
sumTex
));
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
surf_
.
sum
.
data
));
// if image2d is not supported
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
keypoints
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
keypoints
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
img_rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
img_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
surf_
.
sum
.
step
));
size_t
localThreads
[
3
]
=
{
32
,
4
,
1
};
size_t
globalThreads
[
3
]
=
{
nFeatures
*
localThreads
[
0
],
localThreads
[
1
],
1
};
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
}
void
SURF_OCL_Invoker
::
compute_descriptors_gpu
(
const
oclMat
&
descriptors
,
const
oclMat
&
keypoints
,
int
nFeatures
)
...
...
@@ -649,12 +633,29 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
globalThreads
[
1
]
=
16
*
localThreads
[
1
];
args
.
clear
();
if
(
imgTex
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
imgTex
));
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
_img
.
data
));
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
descriptors
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
keypoints
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
descriptors
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
keypoints
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
step
));
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
kernelName
=
"normalize_descriptors64"
;
...
...
@@ -667,9 +668,16 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args
.
clear
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
descriptors
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
descriptors
.
step
));
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
}
else
{
kernelName
=
"compute_descriptors128"
;
...
...
@@ -680,12 +688,29 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
globalThreads
[
1
]
=
16
*
localThreads
[
1
];
args
.
clear
();
if
(
imgTex
)
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
imgTex
));
}
else
{
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
_img
.
data
));
}
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
descriptors
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
keypoints
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
descriptors
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
keypoints
.
step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
_img
.
step
));
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
kernelName
=
"normalize_descriptors128"
;
...
...
@@ -698,7 +723,14 @@ void SURF_OCL_Invoker::compute_descriptors_gpu(const oclMat &descriptors, const
args
.
clear
();
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
descriptors
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
descriptors
.
step
));
if
(
support_image2d
())
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
else
{
openCLExecuteKernel
(
clCxt
,
&
nonfree_surf
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
noImage2dOption
);
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment