Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
c18d1ee2
Commit
c18d1ee2
authored
Feb 04, 2014
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
SURF kind of works (let's see if the tests pass)
parent
8d5e9522
Hide whitespace changes
Inline
Side-by-side
Showing
5 changed files
with
207 additions
and
464 deletions
+207
-464
ocl.hpp
modules/core/include/opencv2/core/ocl.hpp
+1
-1
surf.cl
modules/nonfree/src/opencl/surf.cl
+140
-335
surf.cpp
modules/nonfree/src/surf.cpp
+1
-1
surf.hpp
modules/nonfree/src/surf.hpp
+6
-10
surf.ocl.cpp
modules/nonfree/src/surf.ocl.cpp
+59
-117
No files found.
modules/core/include/opencv2/core/ocl.hpp
View file @
c18d1ee2
...
...
@@ -585,7 +585,7 @@ class CV_EXPORTS Image2D
{
public
:
Image2D
();
Image2D
(
const
UMat
&
src
);
explicit
Image2D
(
const
UMat
&
src
);
~
Image2D
();
void
*
ptr
()
const
;
...
...
modules/nonfree/src/opencl/surf.cl
View file @
c18d1ee2
...
...
@@ -52,35 +52,52 @@
#
define
ORI_LOCAL_SIZE
(
360
/
ORI_SEARCH_INC
)
//
specialized
for
non-image2d_t
supported
platform,
intel
HD4000,
for
example
#
if
def
DISABL
E_IMAGE2D
#
define
IMAGE_INT32
__global
uint
*
#
define
IMAGE_INT8
__global
uchar
*
#
else
#
define
IMAGE_INT32
image2d_t
#
define
IMAGE_INT8
image2d_t
#
endif
#
if
ndef
HAV
E_IMAGE2D
__inline
uint
read_sumTex_
(
__global
uint*
sumTex,
int
sum_step,
int
img_rows,
int
img_cols,
int2
coord
)
{
int
x
=
clamp
(
coord.x,
0
,
img_cols
)
;
int
y
=
clamp
(
coord.y,
0
,
img_rows
)
;
return
sumTex[sum_step
*
y
+
x]
;
}
uint
read_sumTex
(
IMAGE_INT32
img,
sampler_t
sam,
int2
coord,
int
rows,
int
cols,
int
elemPerRow
)
__inline
uchar
read_imgTex_
(
__global
uchar*
imgTex,
int
img_step,
int
img_rows,
int
img_cols,
float2
coord
)
{
#
ifdef
DISABLE_IMAGE2D
int
x
=
clamp
(
coord.x,
0
,
cols
)
;
int
y
=
clamp
(
coord.y,
0
,
rows
)
;
return
img[elemPerRow
*
y
+
x]
;
int
x
=
clamp
(
convert_int_rte
(
coord.x
)
,
0
,
img_cols-1
)
;
int
y
=
clamp
(
convert_int_rte
(
coord.y
)
,
0
,
img_rows-1
)
;
return
imgTex[img_step
*
y
+
x]
;
}
#
define
read_sumTex
(
coord
)
read_sumTex_
(
sumTex,
sum_step,
img_rows,
img_cols,
coord
)
#
define
read_imgTex
(
coord
)
read_imgTex_
(
imgTex,
img_step,
img_rows,
img_cols,
coord
)
#
define
__PARAM_sumTex__
__global
uint*
sumTex,
int
sum_step,
int
sum_offset
#
define
__PARAM_imgTex__
__global
uchar*
imgTex,
int
img_step,
int
img_offset
#
define
__PASS_sumTex__
sumTex,
sum_step,
sum_offset
#
define
__PASS_imgTex__
imgTex,
img_step,
img_offset
#
else
return
read_imageui
(
img,
sam,
coord
)
.
x
;
#
endif
__inline
uint
read_sumTex_
(
image2d_t
sumTex,
sampler_t
sam,
int2
coord
)
{
return
read_imageui
(
sumTex,
sam,
coord
)
.
x
;
}
uchar
read_imgTex
(
IMAGE_INT8
img,
sampler_t
sam,
float2
coord,
int
rows,
int
cols,
int
elemPerRow
)
__inline
uchar
read_imgTex_
(
image2d_t
imgTex,
sampler_t
sam,
float2
coord
)
{
#
ifdef
DISABLE_IMAGE2D
int
x
=
clamp
(
round
(
coord.x
)
,
0
,
cols
-
1
)
;
int
y
=
clamp
(
round
(
coord.y
)
,
0
,
rows
-
1
)
;
return
img[elemPerRow
*
y
+
x]
;
#
else
return
(
uchar
)
read_imageui
(
img,
sam,
coord
)
.
x
;
#
endif
return
(
uchar
)
read_imageui
(
imgTex,
sam,
coord
)
.
x
;
}
#
define
read_sumTex
(
coord
)
read_sumTex_
(
sumTex,
sampler,
coord
)
#
define
read_imgTex
(
coord
)
read_imgTex_
(
imgTex,
sampler,
coord
)
#
define
__PARAM_sumTex__
image2d_t
sumTex
#
define
__PARAM_imgTex__
image2d_t
imgTex
#
define
__PASS_sumTex__
sumTex
#
define
__PASS_imgTex__
imgTex
#
endif
//
dynamically
change
the
precision
used
for
floating
type
#
if
defined
(
DOUBLE_SUPPORT
)
...
...
@@ -95,7 +112,7 @@ uchar read_imgTex(IMAGE_INT8 img, sampler_t sam, float2 coord, int rows, int col
#
endif
//
Image
read
mode
__constant
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_NEAREST
;
__constant
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_NEAREST
;
#
ifndef
FLT_EPSILON
#
define
FLT_EPSILON
(
1e-15
)
...
...
@@ -105,45 +122,6 @@ __constant sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAM
#
define
CV_PI_F
3.14159265f
#
endif
//
Use
integral
image
to
calculate
haar
wavelets.
//
N
=
2
//
for
simple
haar
paatern
float
icvCalcHaarPatternSum_2
(
IMAGE_INT32
sumTex,
__constant
float2
*src,
int
oldSize,
int
newSize,
int
y,
int
x,
int
rows,
int
cols,
int
elemPerRow
)
{
float
ratio
=
(
float
)
newSize
/
oldSize
;
F
d
=
0
;
int2
dx1
=
convert_int2
(
round
(
ratio
*
src[0]
))
;
int2
dy1
=
convert_int2
(
round
(
ratio
*
src[1]
))
;
int2
dx2
=
convert_int2
(
round
(
ratio
*
src[2]
))
;
int2
dy2
=
convert_int2
(
round
(
ratio
*
src[3]
))
;
F
t
=
0
;
t
+=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
dx1.x,
y
+
dy1.x
)
,
rows,
cols,
elemPerRow
)
;
t
-=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
dx1.x,
y
+
dy2.x
)
,
rows,
cols,
elemPerRow
)
;
t
-=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
dx2.x,
y
+
dy1.x
)
,
rows,
cols,
elemPerRow
)
;
t
+=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
dx2.x,
y
+
dy2.x
)
,
rows,
cols,
elemPerRow
)
;
d
+=
t
*
src[4].x
/
((
dx2.x
-
dx1.x
)
*
(
dy2.x
-
dy1.x
))
;
t
=
0
;
t
+=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
dx1.y,
y
+
dy1.y
)
,
rows,
cols,
elemPerRow
)
;
t
-=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
dx1.y,
y
+
dy2.y
)
,
rows,
cols,
elemPerRow
)
;
t
-=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
dx2.y,
y
+
dy1.y
)
,
rows,
cols,
elemPerRow
)
;
t
+=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
dx2.y,
y
+
dy2.y
)
,
rows,
cols,
elemPerRow
)
;
d
+=
t
*
src[4].y
/
((
dx2.y
-
dx1.y
)
*
(
dy2.y
-
dy1.y
))
;
return
(
float
)
d
;
}
////////////////////////////////////////////////////////////////////////
//
Hessian
...
...
@@ -182,22 +160,20 @@ F calcAxisAlignedDerivative(
//calculate
targeted
layer
per-pixel
determinant
and
trace
with
an
integral
image
__kernel
void
SURF_calcLayerDetAndTrace
(
IMAGE_INT32
sumTex,
//
input
integral
image
__global
float
*
det,
//
output
Determinant
__PARAM_sumTex__,
//
input
integral
image
int
img_rows,
int
img_cols,
int
c_nOctaveLayers,
int
c_octave,
int
c_layer_rows,
__global
float
*
det,
//
output
determinant
int
det_step,
int
det_offset,
__global
float
*
trace,
//
output
trace
int
det_step,
//
the
step
of
det
in
bytes
int
trace_step,
//
the
step
of
trace
in
bytes
int
c_img_rows,
int
c_img_cols,
int
c_nOctaveLayers,
int
c_octave,
int
c_layer_rows,
int
sumTex_step
)
int
trace_step,
int
trace_offset
)
{
det_step
/=
sizeof
(
*det
)
;
trace_step
/=
sizeof
(
*trace
)
;
sumTex_step/=
sizeof
(
uint
)
;
#
ifndef
HAVE_IMAGE2D
sum_step/=
sizeof
(
uint
)
;
#
endif
//
Determine
the
indices
const
int
gridDim_y
=
get_num_groups
(
1
)
/
(
c_nOctaveLayers
+
2
)
;
const
int
blockIdx_y
=
get_group_id
(
1
)
%
gridDim_y
;
...
...
@@ -209,13 +185,13 @@ __kernel void SURF_calcLayerDetAndTrace(
const
int
size
=
calcSize
(
c_octave,
layer
)
;
const
int
samples_i
=
1
+
((
c_
img_rows
-
size
)
>>
c_octave
)
;
const
int
samples_j
=
1
+
((
c_
img_cols
-
size
)
>>
c_octave
)
;
const
int
samples_i
=
1
+
((
img_rows
-
size
)
>>
c_octave
)
;
const
int
samples_j
=
1
+
((
img_cols
-
size
)
>>
c_octave
)
;
//
Ignore
pixels
where
some
of
the
kernel
is
outside
the
image
const
int
margin
=
(
size
>>
1
)
>>
c_octave
;
if
(
size
<=
c_img_rows
&&
size
<=
c_
img_cols
&&
i
<
samples_i
&&
j
<
samples_j
)
if
(
size
<=
img_rows
&&
size
<=
img_cols
&&
i
<
samples_i
&&
j
<
samples_j
)
{
int
x
=
j
<<
c_octave
;
int
y
=
i
<<
c_octave
;
...
...
@@ -239,14 +215,14 @@ __kernel void SURF_calcLayerDetAndTrace(
{
//
Some
of
the
pixels
needed
to
compute
the
derivative
are
//
repeated,
so
we
only
don
't
duplicate
the
fetch
here.
int
t02
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x,
y
+
r2
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t07
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x,
y
+
r7
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t32
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r3,
y
+
r2
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t37
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r3,
y
+
r7
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t62
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r6,
y
+
r2
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t67
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r6,
y
+
r7
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t92
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r9,
y
+
r2
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t97
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r9,
y
+
r7
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t02
=
read_sumTex
(
(
int2
)(
x,
y
+
r2
)
)
;
int
t07
=
read_sumTex
(
(
int2
)(
x,
y
+
r7
)
)
;
int
t32
=
read_sumTex
(
(
int2
)(
x
+
r3,
y
+
r2
)
)
;
int
t37
=
read_sumTex
(
(
int2
)(
x
+
r3,
y
+
r7
)
)
;
int
t62
=
read_sumTex
(
(
int2
)(
x
+
r6,
y
+
r2
)
)
;
int
t67
=
read_sumTex
(
(
int2
)(
x
+
r6,
y
+
r7
)
)
;
int
t92
=
read_sumTex
(
(
int2
)(
x
+
r9,
y
+
r2
)
)
;
int
t97
=
read_sumTex
(
(
int2
)(
x
+
r9,
y
+
r7
)
)
;
d
=
calcAxisAlignedDerivative
(
t02,
t07,
t32,
t37,
(
r3
)
*
(
r7
-
r2
)
,
t62,
t67,
t92,
t97,
(
r9
-
r6
)
*
(
r7
-
r2
)
,
...
...
@@ -259,14 +235,14 @@ __kernel void SURF_calcLayerDetAndTrace(
{
//
Some
of
the
pixels
needed
to
compute
the
derivative
are
//
repeated,
so
we
only
don
't
duplicate
the
fetch
here.
int
t20
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r2,
y
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t23
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r2,
y
+
r3
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t70
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r7,
y
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t73
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r7,
y
+
r3
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t26
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r2,
y
+
r6
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t76
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r7,
y
+
r6
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t29
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r2,
y
+
r9
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t79
=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r7,
y
+
r9
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
int
t20
=
read_sumTex
(
(
int2
)(
x
+
r2,
y
)
)
;
int
t23
=
read_sumTex
(
(
int2
)(
x
+
r2,
y
+
r3
)
)
;
int
t70
=
read_sumTex
(
(
int2
)(
x
+
r7,
y
)
)
;
int
t73
=
read_sumTex
(
(
int2
)(
x
+
r7,
y
+
r3
)
)
;
int
t26
=
read_sumTex
(
(
int2
)(
x
+
r2,
y
+
r6
)
)
;
int
t76
=
read_sumTex
(
(
int2
)(
x
+
r7,
y
+
r6
)
)
;
int
t29
=
read_sumTex
(
(
int2
)(
x
+
r2,
y
+
r9
)
)
;
int
t79
=
read_sumTex
(
(
int2
)(
x
+
r7,
y
+
r9
)
)
;
d
=
calcAxisAlignedDerivative
(
t20,
t23,
t70,
t73,
(
r7
-
r2
)
*
(
r3
)
,
t26,
t29,
t76,
t79,
(
r7
-
r2
)
*
(
r9
-
r6
)
,
...
...
@@ -280,31 +256,31 @@ __kernel void SURF_calcLayerDetAndTrace(
//
There
's
no
saving
us
here,
we
just
have
to
get
all
of
the
pixels
in
//
separate
fetches
F
t
=
0
;
t
+=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r1,
y
+
r1
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
-=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r1,
y
+
r4
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
-=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r4,
y
+
r1
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
+=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r4,
y
+
r4
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
+=
read_sumTex
(
(
int2
)(
x
+
r1,
y
+
r1
)
)
;
t
-=
read_sumTex
(
(
int2
)(
x
+
r1,
y
+
r4
)
)
;
t
-=
read_sumTex
(
(
int2
)(
x
+
r4,
y
+
r1
)
)
;
t
+=
read_sumTex
(
(
int2
)(
x
+
r4,
y
+
r4
)
)
;
d
+=
t
/
((
r4
-
r1
)
*
(
r4
-
r1
))
;
t
=
0
;
t
+=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r5,
y
+
r1
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
-=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r5,
y
+
r4
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
-=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r8,
y
+
r1
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
+=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r8,
y
+
r4
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
+=
read_sumTex
(
(
int2
)(
x
+
r5,
y
+
r1
)
)
;
t
-=
read_sumTex
(
(
int2
)(
x
+
r5,
y
+
r4
)
)
;
t
-=
read_sumTex
(
(
int2
)(
x
+
r8,
y
+
r1
)
)
;
t
+=
read_sumTex
(
(
int2
)(
x
+
r8,
y
+
r4
)
)
;
d
-=
t
/
((
r8
-
r5
)
*
(
r4
-
r1
))
;
t
=
0
;
t
+=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r1,
y
+
r5
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
-=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r1,
y
+
r8
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
-=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r4,
y
+
r5
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
+=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r4,
y
+
r8
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
+=
read_sumTex
(
(
int2
)(
x
+
r1,
y
+
r5
)
)
;
t
-=
read_sumTex
(
(
int2
)(
x
+
r1,
y
+
r8
)
)
;
t
-=
read_sumTex
(
(
int2
)(
x
+
r4,
y
+
r5
)
)
;
t
+=
read_sumTex
(
(
int2
)(
x
+
r4,
y
+
r8
)
)
;
d
-=
t
/
((
r4
-
r1
)
*
(
r8
-
r5
))
;
t
=
0
;
t
+=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r5,
y
+
r5
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
-=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r5,
y
+
r8
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
-=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r8,
y
+
r5
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
+=
read_sumTex
(
sumTex,
sampler,
(
int2
)(
x
+
r8,
y
+
r8
)
,
c_img_rows,
c_img_cols,
sumTex_step
)
;
t
+=
read_sumTex
(
(
int2
)(
x
+
r5,
y
+
r5
)
)
;
t
-=
read_sumTex
(
(
int2
)(
x
+
r5,
y
+
r8
)
)
;
t
-=
read_sumTex
(
(
int2
)(
x
+
r8,
y
+
r5
)
)
;
t
+=
read_sumTex
(
(
int2
)(
x
+
r8,
y
+
r8
)
)
;
d
+=
t
/
((
r8
-
r5
)
*
(
r8
-
r5
))
;
}
const
float
dxy
=
(
float
)
d
;
...
...
@@ -317,171 +293,17 @@ __kernel void SURF_calcLayerDetAndTrace(
////////////////////////////////////////////////////////////////////////
//
NONMAX
__constant
float
c_DM[5]
=
{0,
0
,
9
,
9
,
1}
;
bool
within_check
(
IMAGE_INT32
maskSumTex,
int
sum_i,
int
sum_j,
int
size,
int
rows,
int
cols,
int
step
)
{
float
ratio
=
(
float
)
size
/
9.0f
;
float
d
=
0
;
int
dx1
=
round
(
ratio
*
c_DM[0]
)
;
int
dy1
=
round
(
ratio
*
c_DM[1]
)
;
int
dx2
=
round
(
ratio
*
c_DM[2]
)
;
int
dy2
=
round
(
ratio
*
c_DM[3]
)
;
float
t
=
0
;
t
+=
read_sumTex
(
maskSumTex,
sampler,
(
int2
)(
sum_j
+
dx1,
sum_i
+
dy1
)
,
rows,
cols,
step
)
;
t
-=
read_sumTex
(
maskSumTex,
sampler,
(
int2
)(
sum_j
+
dx1,
sum_i
+
dy2
)
,
rows,
cols,
step
)
;
t
-=
read_sumTex
(
maskSumTex,
sampler,
(
int2
)(
sum_j
+
dx2,
sum_i
+
dy1
)
,
rows,
cols,
step
)
;
t
+=
read_sumTex
(
maskSumTex,
sampler,
(
int2
)(
sum_j
+
dx2,
sum_i
+
dy2
)
,
rows,
cols,
step
)
;
d
+=
t
*
c_DM[4]
/
((
dx2
-
dx1
)
*
(
dy2
-
dy1
))
;
return
(
d
>=
0.5f
)
;
}
//
Non-maximal
suppression
to
further
filtering
the
candidates
from
previous
step
__kernel
void
SURF_findMaximaInLayerWithMask
(
__global
const
float
*
det,
__global
const
float
*
trace,
__global
int4
*
maxPosBuffer,
volatile
__global
int*
maxCounter,
int
counter_offset,
int
det_step,
//
the
step
of
det
in
bytes
int
trace_step,
//
the
step
of
trace
in
bytes
int
c_img_rows,
int
c_img_cols,
int
c_nOctaveLayers,
int
c_octave,
int
c_layer_rows,
int
c_layer_cols,
int
c_max_candidates,
float
c_hessianThreshold,
IMAGE_INT32
maskSumTex,
int
mask_step
)
{
volatile
__local
float
N9[768]
; // threads.x * threads.y * 3
det_step
/=
sizeof
(
*det
)
;
trace_step
/=
sizeof
(
*trace
)
;
maxCounter
+=
counter_offset
;
mask_step
/=
sizeof
(
uint
)
;
//
Determine
the
indices
const
int
gridDim_y
=
get_num_groups
(
1
)
/
c_nOctaveLayers
;
const
int
blockIdx_y
=
get_group_id
(
1
)
%
gridDim_y
;
const
int
blockIdx_z
=
get_group_id
(
1
)
/
gridDim_y
;
const
int
layer
=
blockIdx_z
+
1
;
const
int
size
=
calcSize
(
c_octave,
layer
)
;
//
Ignore
pixels
without
a
3x3x3
neighbourhood
in
the
layer
above
const
int
margin
=
((
calcSize
(
c_octave,
layer
+
1
)
>>
1
)
>>
c_octave
)
+
1
;
const
int
j
=
get_local_id
(
0
)
+
get_group_id
(
0
)
*
(
get_local_size
(
0
)
-
2
)
+
margin
-
1
;
const
int
i
=
get_local_id
(
1
)
+
blockIdx_y
*
(
get_local_size
(
1
)
-
2
)
+
margin
-
1
;
//
Is
this
thread
within
the
hessian
buffer?
const
int
zoff
=
get_local_size
(
0
)
*
get_local_size
(
1
)
;
const
int
localLin
=
get_local_id
(
0
)
+
get_local_id
(
1
)
*
get_local_size
(
0
)
+
zoff
;
N9[localLin
-
zoff]
=
det[det_step
*
(
c_layer_rows
*
(
layer
-
1
)
+
min
(
max
(
i,
0
)
,
c_img_rows
-
1
))
//
y
+
min
(
max
(
j,
0
)
,
c_img_cols
-
1
)
]
; // x
N9[localLin
]
=
det[det_step
*
(
c_layer_rows
*
(
layer
)
+
min
(
max
(
i,
0
)
,
c_img_rows
-
1
))
//
y
+
min
(
max
(
j,
0
)
,
c_img_cols
-
1
)
]
; // x
N9[localLin
+
zoff]
=
det[det_step
*
(
c_layer_rows
*
(
layer
+
1
)
+
min
(
max
(
i,
0
)
,
c_img_rows
-
1
))
//
y
+
min
(
max
(
j,
0
)
,
c_img_cols
-
1
)
]
; // x
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
i
<
c_layer_rows
-
margin
&&
j
<
c_layer_cols
-
margin
&&
get_local_id
(
0
)
>
0
&&
get_local_id
(
0
)
<
get_local_size
(
0
)
-
1
&&
get_local_id
(
1
)
>
0
&&
get_local_id
(
1
)
<
get_local_size
(
1
)
-
1
//
these
are
unnecessary
conditions
ported
from
CUDA
)
{
float
val0
=
N9[localLin]
;
if
(
val0
>
c_hessianThreshold
)
{
//
Coordinates
for
the
start
of
the
wavelet
in
the
sum
image.
There
//
is
some
integer
division
involved,
so
don
't
try
to
simplify
this
//
(
cancel
out
sampleStep
)
without
checking
the
result
is
the
same
const
int
sum_i
=
(
i
-
((
size
>>
1
)
>>
c_octave
))
<<
c_octave
;
const
int
sum_j
=
(
j
-
((
size
>>
1
)
>>
c_octave
))
<<
c_octave
;
if
(
within_check
(
maskSumTex,
sum_i,
sum_j,
size,
c_img_rows,
c_img_cols,
mask_step
))
{
//
Check
to
see
if
we
have
a
max
(
in
its
26
neighbours
)
const
bool
condmax
=
val0
>
N9[localLin
-
1
-
get_local_size
(
0
)
-
zoff]
&&
val0
>
N9[localLin
-
get_local_size
(
0
)
-
zoff]
&&
val0
>
N9[localLin
+
1
-
get_local_size
(
0
)
-
zoff]
&&
val0
>
N9[localLin
-
1
-
zoff]
&&
val0
>
N9[localLin
-
zoff]
&&
val0
>
N9[localLin
+
1
-
zoff]
&&
val0
>
N9[localLin
-
1
+
get_local_size
(
0
)
-
zoff]
&&
val0
>
N9[localLin
+
get_local_size
(
0
)
-
zoff]
&&
val0
>
N9[localLin
+
1
+
get_local_size
(
0
)
-
zoff]
&&
val0
>
N9[localLin
-
1
-
get_local_size
(
0
)
]
&&
val0
>
N9[localLin
-
get_local_size
(
0
)
]
&&
val0
>
N9[localLin
+
1
-
get_local_size
(
0
)
]
&&
val0
>
N9[localLin
-
1
]
&&
val0
>
N9[localLin
+
1
]
&&
val0
>
N9[localLin
-
1
+
get_local_size
(
0
)
]
&&
val0
>
N9[localLin
+
get_local_size
(
0
)
]
&&
val0
>
N9[localLin
+
1
+
get_local_size
(
0
)
]
&&
val0
>
N9[localLin
-
1
-
get_local_size
(
0
)
+
zoff]
&&
val0
>
N9[localLin
-
get_local_size
(
0
)
+
zoff]
&&
val0
>
N9[localLin
+
1
-
get_local_size
(
0
)
+
zoff]
&&
val0
>
N9[localLin
-
1
+
zoff]
&&
val0
>
N9[localLin
+
zoff]
&&
val0
>
N9[localLin
+
1
+
zoff]
&&
val0
>
N9[localLin
-
1
+
get_local_size
(
0
)
+
zoff]
&&
val0
>
N9[localLin
+
get_local_size
(
0
)
+
zoff]
&&
val0
>
N9[localLin
+
1
+
get_local_size
(
0
)
+
zoff]
;
if
(
condmax
)
{
int
ind
=
atomic_inc
(
maxCounter
)
;
if
(
ind
<
c_max_candidates
)
{
const
int
laplacian
=
(
int
)
copysign
(
1.0f,
trace[trace_step*
(
layer
*
c_layer_rows
+
i
)
+
j]
)
;
maxPosBuffer[ind]
=
(
int4
)(
j,
i,
layer,
laplacian
)
;
}
}
}
}
}
}
__kernel
void
SURF_findMaximaInLayer
(
__global
float
*
det,
int
det_step,
int
det_offset,
__global
float
*
trace,
int
trace_step,
int
trace_offset,
__global
int4
*
maxPosBuffer,
volatile
__global
int*
maxCounter,
int
counter_offset,
int
det_step,
//
the
step
of
det
in
bytes
int
trace_step,
//
the
step
of
trace
in
bytes
int
c_img_rows,
int
c_img_cols,
int
img_rows,
int
img_cols,
int
c_nOctaveLayers,
int
c_octave,
int
c_layer_rows,
...
...
@@ -515,8 +337,8 @@ void SURF_findMaximaInLayer(
const
int
zoff
=
get_local_size
(
0
)
*
get_local_size
(
1
)
;
const
int
localLin
=
get_local_id
(
0
)
+
get_local_id
(
1
)
*
get_local_size
(
0
)
+
zoff
;
int
l_x
=
min
(
max
(
j,
0
)
,
c_
img_cols
-
1
)
;
int
l_y
=
c_layer_rows
*
layer
+
min
(
max
(
i,
0
)
,
c_
img_rows
-
1
)
;
int
l_x
=
min
(
max
(
j,
0
)
,
img_cols
-
1
)
;
int
l_y
=
c_layer_rows
*
layer
+
min
(
max
(
i,
0
)
,
img_rows
-
1
)
;
N9[localLin
-
zoff]
=
det[det_step
*
(
l_y
-
c_layer_rows
)
+
l_x]
;
...
...
@@ -596,7 +418,7 @@ inline bool solve3x3_float(const float4 *A, const float *b, float *x)
if
(
det
!=
0
)
{
F
invdet
=
1.0
/
det
;
F
invdet
=
1.0
f
/
det
;
x[0]
=
invdet
*
(
b[0]
*
(
A[1].y
*
A[2].z
-
A[1].z
*
A[2].y
)
-
...
...
@@ -632,13 +454,13 @@ inline bool solve3x3_float(const float4 *A, const float *b, float *x)
__kernel
void
SURF_interpolateKeypoint
(
__global
const
float
*
det,
int
det_step,
int
det_offset,
__global
const
int4
*
maxPosBuffer,
__global
float
*
keypoints,
volatile
__global
int
*
featureCounter,
int
det_step,
int
keypoints_step,
int
c_img_rows,
int
c_img_cols,
int
keypoints_step,
int
keypoints_offset,
volatile
__global
int*
featureCounter,
int
img_rows,
int
img_cols,
int
c_octave,
int
c_layer_rows,
int
c_max_features
...
...
@@ -730,7 +552,7 @@ void SURF_interpolateKeypoint(
const
int
grad_wav_size
=
2
*
round
(
2.0f
*
s
)
;
//
check
when
grad_wav_size
is
too
big
if
((
c_img_rows
+
1
)
>=
grad_wav_size
&&
(
c_
img_cols
+
1
)
>=
grad_wav_size
)
if
((
img_rows
+
1
)
>=
grad_wav_size
&&
(
img_cols
+
1
)
>=
grad_wav_size
)
{
//
Get
a
new
feature
index.
int
ind
=
atomic_inc
(
featureCounter
)
;
...
...
@@ -836,22 +658,18 @@ void reduce_32_sum(volatile __local float * data, volatile float* partial_reduc
__kernel
void
SURF_calcOrientation
(
IMAGE_INT32
sumTex,
__global
float
*
keypoints,
int
keypoints_step,
int
c_img_rows,
int
c_img_cols,
int
sum_step
)
__PARAM_sumTex__,
int
img_rows,
int
img_cols,
__global
float
*
keypoints,
int
keypoints_step,
int
keypoints_offset
)
{
keypoints_step
/=
sizeof
(
*keypoints
)
;
#
ifndef
HAVE_IMAGE2D
sum_step
/=
sizeof
(
uint
)
;
#
endif
__global
float*
featureX
=
keypoints
+
X_ROW
*
keypoints_step
;
__global
float*
featureY
=
keypoints
+
Y_ROW
*
keypoints_step
;
__global
float*
featureSize
=
keypoints
+
SIZE_ROW
*
keypoints_step
;
__global
float*
featureDir
=
keypoints
+
ANGLE_ROW
*
keypoints_step
;
__local
float
s_X[ORI_SAMPLES]
;
__local
float
s_Y[ORI_SAMPLES]
;
__local
float
s_angle[ORI_SAMPLES]
;
...
...
@@ -866,7 +684,6 @@ void SURF_calcOrientation(
and
building
the
keypoint
descriptor
are
defined
relative
to
's
'
*/
const
float
s
=
featureSize[get_group_id
(
0
)
]
*
1.2f
/
9.0f
;
/*
To
find
the
dominant
orientation,
the
gradients
in
x
and
y
are
sampled
in
a
circle
of
radius
6s
using
wavelets
of
size
4s.
We
ensure
the
gradient
wavelet
size
is
even
to
ensure
the
...
...
@@ -874,7 +691,7 @@ void SURF_calcOrientation(
const
int
grad_wav_size
=
2
*
round
(
2.0f
*
s
)
;
//
check
when
grad_wav_size
is
too
big
if
((
c_img_rows
+
1
)
<
grad_wav_size
|
| (c_
img_cols + 1) < grad_wav_size)
if
((
img_rows
+
1
)
<
grad_wav_size
|
| (
img_cols + 1) < grad_wav_size)
return;
// Calc X, Y, angle and store it to shared memory
...
...
@@ -886,8 +703,8 @@ void SURF_calcOrientation(
float ratio = (float)grad_wav_size / 4;
int r2 = round(ratio * 2.0);
int r4 = round(ratio * 4.0);
int r2 = round(ratio * 2.0
f
);
int r4 = round(ratio * 4.0
f
);
for (int i = tid; i < ORI_SAMPLES; i += ORI_LOCAL_SIZE )
{
float X = 0.0f, Y = 0.0f, angle = 0.0f;
...
...
@@ -895,21 +712,20 @@ void SURF_calcOrientation(
const int x = round(featureX[get_group_id(0)] + c_aptX[i] * s - margin);
const int y = round(featureY[get_group_id(0)] + c_aptY[i] * s - margin);
if (y >= 0 && y < (
c_
img_rows + 1) - grad_wav_size &&
x >= 0 && x < (
c_
img_cols + 1) - grad_wav_size)
if (y >= 0 && y < (img_rows + 1) - grad_wav_size &&
x >= 0 && x < (img_cols + 1) - grad_wav_size)
{
float apt = c_aptW[i];
// Compute the haar sum without fetching duplicate pixels.
float t00 = read_sumTex(
sumTex, sampler, (int2)(x, y), c_img_rows, c_img_cols, sum_step
);
float t02 = read_sumTex(
sumTex, sampler, (int2)(x, y + r2), c_img_rows, c_img_cols, sum_step
);
float t04 = read_sumTex(
sumTex, sampler, (int2)(x, y + r4), c_img_rows, c_img_cols, sum_step
);
float t20 = read_sumTex(
sumTex, sampler, (int2)(x + r2, y), c_img_rows, c_img_cols, sum_step
);
float t24 = read_sumTex(
sumTex, sampler, (int2)(x + r2, y + r4), c_img_rows, c_img_cols, sum_step
);
float t40 = read_sumTex(
sumTex, sampler, (int2)(x + r4, y), c_img_rows, c_img_cols, sum_step
);
float t42 = read_sumTex(
sumTex, sampler, (int2)(x + r4, y + r2), c_img_rows, c_img_cols, sum_step
);
float t44 = read_sumTex(
sumTex, sampler, (int2)(x + r4, y + r4), c_img_rows, c_img_cols, sum_step
);
float t00 = read_sumTex(
(int2)(x, y)
);
float t02 = read_sumTex(
(int2)(x, y + r2)
);
float t04 = read_sumTex(
(int2)(x, y + r4)
);
float t20 = read_sumTex(
(int2)(x + r2, y)
);
float t24 = read_sumTex(
(int2)(x + r2, y + r4)
);
float t40 = read_sumTex(
(int2)(x + r4, y)
);
float t42 = read_sumTex(
(int2)(x + r4, y + r2)
);
float t44 = read_sumTex(
(int2)(x + r4, y + r4)
);
F t = t00 - t04 - t20 + t24;
X -= t / ((r2) * (r4));
...
...
@@ -1001,7 +817,7 @@ void SURF_calcOrientation(
}
__kernel
void SURF_setUp
r
ight(
void SURF_setUp
R
ight(
__global float * keypoints,
int keypoints_step, int keypoints_offset,
int rows, int cols )
...
...
@@ -1050,22 +866,14 @@ __constant float c_DW[PATCH_SZ * PATCH_SZ] =
};
// utility for linear filter
inline uchar readerGet(
IMAGE_INT8 src,
const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir,
int i, int j, int rows, int cols, int elemPerRow
)
{
float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;
float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
return read_imgTex(src, sampler, (float2)(pixel_x, pixel_y), rows, cols, elemPerRow);
}
#define readerGet(centerX, centerY, win_offset, cos_dir, sin_dir, i, j) \
read_imgTex((float2)(centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir, \
centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir))
inline float linearFilter(
IMAGE_INT8 src,
const float centerX, const float centerY, const float win_offset, const float cos_dir, const float sin_dir,
float y, float x, int rows, int cols, int elemPerRow
)
__PARAM_imgTex__, int img_rows, int img_cols,
float centerX, float centerY, float win_offset,
float cos_dir, float sin_dir, float y, float x )
{
x -= 0.5f;
y -= 0.5f;
...
...
@@ -1077,34 +885,31 @@ inline float linearFilter(
const int x2 = x1 + 1;
const int y2 = y1 + 1;
uchar src_reg = readerGet(
src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x1, rows, cols, elemPerRow
);
uchar src_reg = readerGet(
centerX, centerY, win_offset, cos_dir, sin_dir, y1, x1
);
out = out + src_reg * ((x2 - x) * (y2 - y));
src_reg = readerGet(
src, centerX, centerY, win_offset, cos_dir, sin_dir, y1, x2, rows, cols, elemPerRow
);
src_reg = readerGet(
centerX, centerY, win_offset, cos_dir, sin_dir, y1, x2
);
out = out + src_reg * ((x - x1) * (y2 - y));
src_reg = readerGet(
src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x1, rows, cols, elemPerRow
);
src_reg = readerGet(
centerX, centerY, win_offset, cos_dir, sin_dir, y2, x1
);
out = out + src_reg * ((x2 - x) * (y - y1));
src_reg = readerGet(
src, centerX, centerY, win_offset, cos_dir, sin_dir, y2, x2, rows, cols, elemPerRow
);
src_reg = readerGet(
centerX, centerY, win_offset, cos_dir, sin_dir, y2, x2
);
out = out + src_reg * ((x - x1) * (y - y1));
return out;
}
void calc_dx_dy(
IMAGE_INT8 imgTex,
__PARAM_imgTex__,
int img_rows, int img_cols,
volatile __local float *s_dx_bin,
volatile __local float *s_dy_bin,
volatile __local float *s_PATCH,
__global const float* featureX,
__global const float* featureY,
__global const float* featureSize,
__global const float* featureDir,
int rows,
int cols,
int elemPerRow
)
__global const float* featureDir )
{
const float centerX = featureX[get_group_id(0)];
const float centerY = featureY[get_group_id(0)];
...
...
@@ -1141,7 +946,9 @@ void calc_dx_dy(
const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size;
const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size;
s_PATCH[get_local_id(1) * 6 + get_local_id(0)] = linearFilter(imgTex, centerX, centerY, win_offset, cos_dir, sin_dir, icoo, jcoo, rows, cols, elemPerRow);
s_PATCH[get_local_id(1) * 6 + get_local_id(0)] =
linearFilter(__PASS_imgTex__, img_rows, img_cols, centerX, centerY,
win_offset, cos_dir, sin_dir, icoo, jcoo);
barrier(CLK_LOCAL_MEM_FENCE);
...
...
@@ -1232,9 +1039,8 @@ void reduce_sum25(
__kernel
void SURF_computeDescriptors64(
IMAGE_INT8 imgTex,
int img_step, int img_offset,
int rows, int cols,
__PARAM_imgTex__,
int img_rows, int img_cols,
__global const float* keypoints,
int keypoints_step, int keypoints_offset,
__global float * descriptors,
...
...
@@ -1254,7 +1060,7 @@ void SURF_computeDescriptors64(
volatile __local float sdyabs[25];
volatile __local float s_PATCH[6*6];
calc_dx_dy(
imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir, rows, cols, img_step
);
calc_dx_dy(
__PASS_imgTex__, img_rows, img_cols, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir
);
barrier(CLK_LOCAL_MEM_FENCE);
const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
...
...
@@ -1286,9 +1092,8 @@ void SURF_computeDescriptors64(
__kernel
void SURF_computeDescriptors128(
IMAGE_INT8 imgTex,
int img_step, int img_offset,
int rows, int cols,
__PARAM_imgTex__,
int img_rows, int img_cols,
__global const float* keypoints,
int keypoints_step, int keypoints_offset,
__global float* descriptors,
...
...
@@ -1313,7 +1118,7 @@ void SURF_computeDescriptors128(
volatile __local float sdabs2[25];
volatile __local float s_PATCH[6*6];
calc_dx_dy(
imgTex, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir, rows, cols, img_step
);
calc_dx_dy(
__PASS_imgTex__, img_rows, img_cols, sdx, sdy, s_PATCH, featureX, featureY, featureSize, featureDir
);
barrier(CLK_LOCAL_MEM_FENCE);
const int tid = get_local_id(1) * get_local_size(0) + get_local_id(0);
...
...
@@ -1486,7 +1291,7 @@ void reduce_sum64(volatile __local float* smem, int tid)
}
__kernel
void
SURF_normalizeDescriptors128
(
__global
float
*
descriptors,
int
descriptors_step
)
void
SURF_normalizeDescriptors128
(
__global
float
*
descriptors,
int
descriptors_step
,
int
descriptors_offset
)
{
descriptors_step
/=
sizeof
(
*descriptors
)
;
//
no
need
for
thread
ID
...
...
@@ -1514,7 +1319,7 @@ void SURF_normalizeDescriptors128(__global float * descriptors, int descriptors_
}
__kernel
void
SURF_normalizeDescriptors64
(
__global
float
*
descriptors,
int
descriptors_step
)
void
SURF_normalizeDescriptors64
(
__global
float
*
descriptors,
int
descriptors_step
,
int
descriptors_offset
)
{
descriptors_step
/=
sizeof
(
*descriptors
)
;
//
no
need
for
thread
ID
...
...
modules/nonfree/src/surf.cpp
View file @
c18d1ee2
...
...
@@ -902,7 +902,7 @@ void SURF::operator()(InputArray _img, InputArray _mask,
bool
doDescriptors
=
_descriptors
.
needed
();
CV_Assert
(
!
_img
.
empty
()
&&
CV_MAT_DEPTH
(
imgtype
)
==
CV_8U
&&
(
imgcn
==
1
||
imgcn
==
3
||
imgcn
==
4
));
CV_Assert
(
_descriptors
.
needed
()
&&
!
useProvidedKeypoints
);
CV_Assert
(
_descriptors
.
needed
()
||
!
useProvidedKeypoints
);
if
(
ocl
::
useOpenCL
()
)
{
...
...
modules/nonfree/src/surf.hpp
View file @
c18d1ee2
...
...
@@ -54,14 +54,11 @@ protected:
bool
setImage
(
InputArray
img
,
InputArray
mask
);
// kernel callers declarations
bool
calcLayerDetAndTrace
(
UMat
&
det
,
UMat
&
trace
,
int
octave
,
int
layer_rows
);
bool
calcLayerDetAndTrace
(
int
octave
,
int
layer_rows
);
bool
findMaximaInLayer
(
const
UMat
&
det
,
const
UMat
&
trace
,
UMat
&
maxPosBuffer
,
UMat
&
maxCounter
,
int
counterOffset
,
int
octave
,
int
layer_rows
,
int
layer_cols
);
bool
findMaximaInLayer
(
int
counterOffset
,
int
octave
,
int
layer_rows
,
int
layer_cols
);
bool
interpolateKeypoint
(
const
UMat
&
det
,
const
UMat
&
maxPosBuffer
,
int
maxCounter
,
UMat
&
keypoints
,
UMat
&
counters
,
int
octave
,
int
layer_rows
,
int
maxFeatures
);
bool
interpolateKeypoint
(
int
maxCounter
,
UMat
&
keypoints
,
int
octave
,
int
layer_rows
,
int
maxFeatures
);
bool
calcOrientation
(
UMat
&
keypoints
);
...
...
@@ -75,7 +72,7 @@ protected:
int
refcount
;
//! max keypoints = min(keypointsRatio * img.size().area(), 65535)
UMat
sum
,
mask1
,
maskSum
,
intBuffer
;
UMat
sum
,
intBuffer
;
UMat
det
,
trace
;
UMat
maxPosBuffer
;
...
...
@@ -87,12 +84,11 @@ protected:
UMat
img
,
counters
;
// texture buffers
ocl
::
Image2D
imgTex
,
sumTex
,
maskSumTex
;
ocl
::
Image2D
imgTex
,
sumTex
;
bool
haveImageSupport
;
String
kerOpts
;
int
status
;
ocl
::
Kernel
kerCalcDetTrace
,
kerFindMaxima
,
kerFindMaximaMask
,
kerInterp
;
ocl
::
Kernel
kerUpRight
,
kerOri
,
kerCalcDesc64
,
kerCalcDesc128
,
kerNormDesc64
,
kerNormDesc128
;
};
/*
...
...
modules/nonfree/src/surf.ocl.cpp
View file @
c18d1ee2
...
...
@@ -54,20 +54,6 @@ namespace cv
enum
{
ORI_SEARCH_INC
=
5
,
ORI_LOCAL_SIZE
=
(
360
/
ORI_SEARCH_INC
)
};
/*static void openCLExecuteKernelSURF(Context2 *clCxt, const ProgramEntry* source, String kernelName, size_t globalThreads[3],
size_t localThreads[3], std::vector< std::pair<size_t, const void *> > &args, int channels, int depth)
{
std::stringstream optsStr;
optsStr << "-D ORI_LOCAL_SIZE=" << ORI_LOCAL_SIZE << " ";
optsStr << "-D ORI_SEARCH_INC=" << ORI_SEARCH_INC << " ";
cl_kernel kernel;
kernel = openCLGetKernelFromSource(clCxt, source, kernelName, optsStr.str().c_str());
size_t wave_size = queryWaveFrontSize(kernel);
CV_Assert(clReleaseKernel(kernel) == CL_SUCCESS);
optsStr << "-D WAVE_SIZE=" << wave_size;
openCLExecuteKernel(clCxt, source, kernelName, globalThreads, localThreads, args, channels, depth, optsStr.str().c_str());
}*/
static
inline
int
calcSize
(
int
octave
,
int
layer
)
{
/* Wavelet size at first layer of first octave. */
...
...
@@ -100,22 +86,11 @@ bool SURF_OCL::init(const SURF* p)
if
(
ocl
::
haveOpenCL
())
{
const
ocl
::
Device
&
dev
=
ocl
::
Device
::
getDefault
();
if
(
dev
.
type
()
==
ocl
::
Device
::
TYPE_CPU
)
if
(
dev
.
type
()
==
ocl
::
Device
::
TYPE_CPU
||
dev
.
doubleFPConfig
()
==
0
)
return
false
;
haveImageSupport
=
dev
.
imageSupport
();
String
opts
=
haveImageSupport
?
"-D DISABLE_IMAGE2D"
:
""
;
if
(
kerCalcDetTrace
.
create
(
"SURF_calcLayerDetAndTrace"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerFindMaxima
.
create
(
"SURF_findMaximaInLayer"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerFindMaximaMask
.
create
(
"SURF_findMaximaInLayerWithMask"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerInterp
.
create
(
"SURF_interpolateKeypoint"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerUpRight
.
create
(
"SURF_setUpRight"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerOri
.
create
(
"SURF_calcOrientation"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerCalcDesc64
.
create
(
"SURF_computeDescriptors64"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerCalcDesc128
.
create
(
"SURF_computeDescriptors128"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerNormDesc64
.
create
(
"SURF_normalizeDescriptors64"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
)
&&
kerNormDesc128
.
create
(
"SURF_normalizeDescriptors128"
,
ocl
::
nonfree
::
surf_oclsrc
,
opts
))
status
=
1
;
haveImageSupport
=
false
;
//dev.imageSupport();
kerOpts
=
haveImageSupport
?
"-D HAVE_IMAGE2D -D DOUBLE_SUPPORT"
:
""
;
status
=
1
;
}
}
return
status
>
0
;
...
...
@@ -126,8 +101,10 @@ bool SURF_OCL::setImage(InputArray _img, InputArray _mask)
{
if
(
status
<=
0
)
return
false
;
CV_Assert
(
!
_img
.
empty
()
&&
_img
.
type
()
==
CV_8UC1
);
CV_Assert
(
_mask
.
empty
()
||
(
_mask
.
size
()
==
_img
.
size
()
&&
_mask
.
type
()
==
CV_8UC1
));
if
(
!
_mask
.
empty
())
return
false
;
int
imgtype
=
_img
.
type
();
CV_Assert
(
!
_img
.
empty
());
CV_Assert
(
params
&&
params
->
nOctaves
>
0
&&
params
->
nOctaveLayers
>
0
);
int
min_size
=
calcSize
(
params
->
nOctaves
-
1
,
0
);
...
...
@@ -151,10 +128,12 @@ bool SURF_OCL::setImage(InputArray _img, InputArray _mask)
counters
.
setTo
(
Scalar
::
all
(
0
));
img
.
release
();
if
(
_img
.
isUMat
())
if
(
_img
.
isUMat
()
&&
imgtype
==
CV_8UC1
)
img
=
_img
.
getUMat
();
else
else
if
(
imgtype
==
CV_8UC1
)
_img
.
copyTo
(
img
);
else
cvtColor
(
_img
,
img
,
COLOR_BGR2GRAY
);
integral
(
img
,
sum
);
...
...
@@ -164,12 +143,6 @@ bool SURF_OCL::setImage(InputArray _img, InputArray _mask)
sumTex
=
ocl
::
Image2D
(
sum
);
}
maskSumTex
=
ocl
::
Image2D
();
if
(
!
_mask
.
empty
())
{
CV_Error
(
Error
::
StsBadFunc
,
"Masked SURF detector is not implemented yet"
);
}
return
true
;
}
...
...
@@ -191,11 +164,10 @@ bool SURF_OCL::detectKeypoints(UMat &keypoints)
const
int
layer_rows
=
img_rows
>>
octave
;
const
int
layer_cols
=
img_cols
>>
octave
;
if
(
!
calcLayerDetAndTrace
(
det
,
trace
,
octave
,
layer_rows
))
if
(
!
calcLayerDetAndTrace
(
octave
,
layer_rows
))
return
false
;
if
(
!
findMaximaInLayer
(
det
,
trace
,
maxPosBuffer
,
counters
,
1
+
octave
,
octave
,
layer_rows
,
layer_cols
))
if
(
!
findMaximaInLayer
(
1
+
octave
,
octave
,
layer_rows
,
layer_cols
))
return
false
;
cpuCounters
=
counters
.
getMat
(
ACCESS_READ
);
...
...
@@ -205,8 +177,7 @@ bool SURF_OCL::detectKeypoints(UMat &keypoints)
if
(
maxCounter
>
0
)
{
if
(
!
interpolateKeypoint
(
det
,
maxPosBuffer
,
maxCounter
,
keypoints
,
counters
,
octave
,
layer_rows
,
maxFeatures
))
if
(
!
interpolateKeypoint
(
maxCounter
,
keypoints
,
octave
,
layer_rows
,
maxFeatures
))
return
false
;
}
}
...
...
@@ -216,7 +187,7 @@ bool SURF_OCL::detectKeypoints(UMat &keypoints)
featureCounter
=
std
::
min
(
featureCounter
,
maxFeatures
);
cpuCounters
.
release
();
keypoints
=
UMat
(
keypoints
,
Rect
(
0
,
0
,
featureCounter
,
1
));
keypoints
=
UMat
(
keypoints
,
Rect
(
0
,
0
,
featureCounter
,
keypoints
.
rows
));
if
(
params
->
upright
)
return
setUpRight
(
keypoints
);
...
...
@@ -232,7 +203,8 @@ bool SURF_OCL::setUpRight(UMat &keypoints)
return
true
;
size_t
globalThreads
[
3
]
=
{
nFeatures
,
1
};
return
kerUpRight
.
args
(
ocl
::
KernelArg
::
ReadWrite
(
keypoints
)).
run
(
2
,
globalThreads
,
0
,
false
);
ocl
::
Kernel
kerUpRight
(
"SURF_setUpRight"
,
ocl
::
nonfree
::
surf_oclsrc
,
kerOpts
);
return
kerUpRight
.
args
(
ocl
::
KernelArg
::
ReadWrite
(
keypoints
)).
run
(
2
,
globalThreads
,
0
,
true
);
}
bool
SURF_OCL
::
computeDescriptors
(
const
UMat
&
keypoints
,
OutputArray
_descriptors
)
...
...
@@ -255,14 +227,14 @@ bool SURF_OCL::computeDescriptors(const UMat &keypoints, OutputArray _descriptor
if
(
descriptorSize
==
64
)
{
kerCalcDesc
=
kerCalcDesc64
;
kerNormDesc
=
kerNormDesc64
;
kerCalcDesc
.
create
(
"SURF_computeDescriptors64"
,
ocl
::
nonfree
::
surf_oclsrc
,
kerOpts
)
;
kerNormDesc
.
create
(
"SURF_normalizeDescriptors64"
,
ocl
::
nonfree
::
surf_oclsrc
,
kerOpts
)
;
}
else
{
CV_Assert
(
descriptorSize
==
128
);
kerCalcDesc
=
kerCalcDesc128
;
kerNormDesc
=
kerNormDesc128
;
kerCalcDesc
.
create
(
"SURF_computeDescriptors128"
,
ocl
::
nonfree
::
surf_oclsrc
,
kerOpts
)
;
kerNormDesc
.
create
(
"SURF_normalizeDescriptors128"
,
ocl
::
nonfree
::
surf_oclsrc
,
kerOpts
)
;
}
size_t
localThreads
[]
=
{
6
,
6
};
...
...
@@ -271,17 +243,19 @@ bool SURF_OCL::computeDescriptors(const UMat &keypoints, OutputArray _descriptor
if
(
haveImageSupport
)
{
kerCalcDesc
.
args
(
imgTex
,
img_rows
,
img_cols
,
ocl
::
KernelArg
::
ReadOnlyNoSize
(
keypoints
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
descriptors
));
}
else
{
kerCalcDesc
.
args
(
ocl
::
KernelArg
::
ReadOnly
(
img
),
kerCalcDesc
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
img
),
img_rows
,
img_cols
,
ocl
::
KernelArg
::
ReadOnlyNoSize
(
keypoints
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
descriptors
));
}
if
(
!
kerCalcDesc
.
run
(
2
,
globalThreads
,
localThreads
,
fals
e
))
if
(
!
kerCalcDesc
.
run
(
2
,
globalThreads
,
localThreads
,
tru
e
))
return
false
;
size_t
localThreads_n
[]
=
{
descriptorSize
,
1
};
...
...
@@ -290,7 +264,7 @@ bool SURF_OCL::computeDescriptors(const UMat &keypoints, OutputArray _descriptor
globalThreads
[
0
]
=
nFeatures
*
localThreads
[
0
];
globalThreads
[
1
]
=
localThreads
[
1
];
bool
ok
=
kerNormDesc
.
args
(
ocl
::
KernelArg
::
ReadWriteNoSize
(
descriptors
)).
run
(
2
,
globalThreads_n
,
localThreads_n
,
fals
e
);
run
(
2
,
globalThreads_n
,
localThreads_n
,
tru
e
);
if
(
ok
&&
!
_descriptors
.
isUMat
())
descriptors
.
copyTo
(
_descriptors
);
return
ok
;
...
...
@@ -364,19 +338,19 @@ void SURF_OCL::downloadKeypoints(const UMat &keypointsGPU, std::vector<KeyPoint>
}
}
bool
SURF_OCL
::
detect
(
InputArray
img
,
InputArray
mask
,
UMat
&
keypoints
)
bool
SURF_OCL
::
detect
(
InputArray
_img
,
InputArray
_
mask
,
UMat
&
keypoints
)
{
if
(
!
setImage
(
img
,
mask
)
)
if
(
!
setImage
(
_img
,
_
mask
)
)
return
false
;
return
detectKeypoints
(
keypoints
);
}
bool
SURF_OCL
::
detectAndCompute
(
InputArray
img
,
InputArray
mask
,
UMat
&
keypoints
,
bool
SURF_OCL
::
detectAndCompute
(
InputArray
_img
,
InputArray
_
mask
,
UMat
&
keypoints
,
OutputArray
_descriptors
,
bool
useProvidedKeypoints
)
{
if
(
!
setImage
(
img
,
mask
)
)
if
(
!
setImage
(
_img
,
_
mask
)
)
return
false
;
if
(
!
useProvidedKeypoints
&&
!
detectKeypoints
(
keypoints
)
)
...
...
@@ -389,22 +363,20 @@ inline int divUp(int a, int b) { return (a + b-1)/b; }
////////////////////////////
// kernel caller definitions
bool
SURF_OCL
::
calcLayerDetAndTrace
(
UMat
&
det
,
UMat
&
trace
,
int
octave
,
int
c_layer_rows
)
bool
SURF_OCL
::
calcLayerDetAndTrace
(
int
octave
,
int
c_layer_rows
)
{
int
nOctaveLayers
=
params
->
nOctaveLayers
;
const
int
min_size
=
calcSize
(
octave
,
0
);
const
int
max_samples_i
=
1
+
((
img_rows
-
min_size
)
>>
octave
);
const
int
max_samples_j
=
1
+
((
img_cols
-
min_size
)
>>
octave
);
String
kernelName
=
"SURF_calcLayerDetAndTrace"
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
size_t
localThreads
[
3
]
=
{
16
,
16
};
size_t
globalThreads
[
3
]
=
size_t
localThreads
[]
=
{
16
,
16
};
size_t
globalThreads
[]
=
{
divUp
(
max_samples_j
,
localThreads
[
0
])
*
localThreads
[
0
],
divUp
(
max_samples_i
,
localThreads
[
1
])
*
localThreads
[
1
]
*
(
nOctaveLayers
+
2
)
};
ocl
::
Kernel
kerCalcDetTrace
(
"SURF_calcLayerDetAndTrace"
,
ocl
::
nonfree
::
surf_oclsrc
,
kerOpts
);
if
(
haveImageSupport
)
{
kerCalcDetTrace
.
args
(
sumTex
,
...
...
@@ -421,56 +393,15 @@ bool SURF_OCL::calcLayerDetAndTrace(UMat &det, UMat &trace, int octave, int c_la
ocl
::
KernelArg
::
WriteOnlyNoSize
(
det
),
ocl
::
KernelArg
::
WriteOnlyNoSize
(
trace
));
}
return
kerCalcDetTrace
.
run
(
2
,
globalThreads
,
localThreads
,
fals
e
);
return
kerCalcDetTrace
.
run
(
2
,
globalThreads
,
localThreads
,
tru
e
);
}
bool
SURF_OCL
::
findMaximaInLayer
(
const
UMat
&
det
,
const
UMat
&
trace
,
UMat
&
maxPosBuffer
,
UMat
&
maxCounter
,
int
counterOffset
,
int
octave
,
bool
SURF_OCL
::
findMaximaInLayer
(
int
counterOffset
,
int
octave
,
int
layer_rows
,
int
layer_cols
)
{
const
int
min_margin
=
((
calcSize
(
octave
,
2
)
>>
1
)
>>
octave
)
+
1
;
bool
haveMask
=
!
maskSum
.
empty
()
||
(
maskSumTex
.
ptr
()
!=
0
);
int
nOctaveLayers
=
params
->
nOctaveLayers
;
ocl
::
Kernel
ker
;
if
(
haveMask
)
{
if
(
haveImageSupport
)
ker
=
kerFindMaximaMask
.
args
(
maskSumTex
,
ocl
::
KernelArg
::
ReadOnlyNoSize
(
det
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
trace
),
ocl
::
KernelArg
::
PtrReadWrite
(
maxPosBuffer
),
ocl
::
KernelArg
::
PtrReadWrite
(
maxCounter
),
counterOffset
,
img_rows
,
img_cols
,
octave
,
nOctaveLayers
,
layer_rows
,
layer_cols
,
maxCandidates
,
(
float
)
params
->
hessianThreshold
);
else
ker
=
kerFindMaximaMask
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
maskSum
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
det
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
trace
),
ocl
::
KernelArg
::
PtrReadWrite
(
maxPosBuffer
),
ocl
::
KernelArg
::
PtrReadWrite
(
maxCounter
),
counterOffset
,
img_rows
,
img_cols
,
octave
,
nOctaveLayers
,
layer_rows
,
layer_cols
,
maxCandidates
,
(
float
)
params
->
hessianThreshold
);
}
else
{
ker
=
kerFindMaxima
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
det
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
trace
),
ocl
::
KernelArg
::
PtrReadWrite
(
maxPosBuffer
),
ocl
::
KernelArg
::
PtrReadWrite
(
maxCounter
),
counterOffset
,
img_rows
,
img_cols
,
octave
,
nOctaveLayers
,
layer_rows
,
layer_cols
,
maxCandidates
,
(
float
)
params
->
hessianThreshold
);
}
size_t
localThreads
[
3
]
=
{
16
,
16
};
size_t
globalThreads
[
3
]
=
{
...
...
@@ -478,21 +409,31 @@ bool SURF_OCL::findMaximaInLayer(const UMat &det, const UMat &trace,
divUp
(
layer_rows
-
2
*
min_margin
,
localThreads
[
1
]
-
2
)
*
nOctaveLayers
*
localThreads
[
1
]
};
return
ker
.
run
(
2
,
globalThreads
,
localThreads
,
false
);
ocl
::
Kernel
kerFindMaxima
(
"SURF_findMaximaInLayer"
,
ocl
::
nonfree
::
surf_oclsrc
,
kerOpts
);
return
kerFindMaxima
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
det
),
ocl
::
KernelArg
::
ReadOnlyNoSize
(
trace
),
ocl
::
KernelArg
::
PtrReadWrite
(
maxPosBuffer
),
ocl
::
KernelArg
::
PtrReadWrite
(
counters
),
counterOffset
,
img_rows
,
img_cols
,
octave
,
nOctaveLayers
,
layer_rows
,
layer_cols
,
maxCandidates
,
(
float
)
params
->
hessianThreshold
).
run
(
2
,
globalThreads
,
localThreads
,
true
);
}
bool
SURF_OCL
::
interpolateKeypoint
(
const
UMat
&
det
,
const
UMat
&
maxPosBuffer
,
int
maxCounter
,
UMat
&
keypoints
,
UMat
&
counters_
,
int
octave
,
int
layer_rows
,
int
max_features
)
bool
SURF_OCL
::
interpolateKeypoint
(
int
maxCounter
,
UMat
&
keypoints
,
int
octave
,
int
layer_rows
,
int
max_features
)
{
size_t
localThreads
[
3
]
=
{
3
,
3
,
3
};
size_t
globalThreads
[
3
]
=
{
maxCounter
*
localThreads
[
0
],
localThreads
[
1
],
3
};
ocl
::
Kernel
kerInterp
(
"SURF_interpolateKeypoint"
,
ocl
::
nonfree
::
surf_oclsrc
,
kerOpts
);
return
kerInterp
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
det
),
ocl
::
KernelArg
::
PtrReadOnly
(
maxPosBuffer
),
ocl
::
KernelArg
::
ReadWriteNoSize
(
keypoints
),
ocl
::
KernelArg
::
PtrReadWrite
(
counters
_
),
ocl
::
KernelArg
::
PtrReadWrite
(
counters
),
img_rows
,
img_cols
,
octave
,
layer_rows
,
max_features
).
run
(
3
,
globalThreads
,
localThreads
,
fals
e
);
run
(
3
,
globalThreads
,
localThreads
,
tru
e
);
}
bool
SURF_OCL
::
calcOrientation
(
UMat
&
keypoints
)
...
...
@@ -500,18 +441,19 @@ bool SURF_OCL::calcOrientation(UMat &keypoints)
int
nFeatures
=
keypoints
.
cols
;
if
(
nFeatures
==
0
)
return
true
;
ocl
::
Kernel
kerOri
(
"SURF_calcOrientation"
,
ocl
::
nonfree
::
surf_oclsrc
,
kerOpts
);
if
(
haveImageSupport
)
kerOri
.
args
(
sumTex
,
ocl
::
KernelArg
::
ReadWriteNoSize
(
keypoints
),
img_rows
,
img_cols
);
kerOri
.
args
(
sumTex
,
img_rows
,
img_cols
,
ocl
::
KernelArg
::
ReadWriteNoSize
(
keypoints
));
else
kerOri
.
args
(
ocl
::
KernelArg
::
ReadOnlyNoSize
(
sum
),
ocl
::
KernelArg
::
ReadWriteNoSize
(
keypoints
)
,
img_rows
,
img_cols
);
img_rows
,
img_cols
,
ocl
::
KernelArg
::
ReadWriteNoSize
(
keypoints
)
);
size_t
localThreads
[
3
]
=
{
ORI_LOCAL_SIZE
,
1
};
size_t
globalThreads
[
3
]
=
{
nFeatures
*
localThreads
[
0
],
1
};
return
kerOri
.
run
(
2
,
globalThreads
,
localThreads
,
fals
e
);
return
kerOri
.
run
(
2
,
globalThreads
,
localThreads
,
tru
e
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment