Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
a63576e7
Commit
a63576e7
authored
Dec 17, 2013
by
krodyush
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
HOST side optimization for GFFT
parent
5d5527d0
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
50 additions
and
153 deletions
+50
-153
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+2
-0
gftt.cpp
modules/ocl/src/gftt.cpp
+0
-0
imgproc_gftt.cl
modules/ocl/src/opencl/imgproc_gftt.cl
+48
-153
No files found.
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
a63576e7
...
...
@@ -1381,8 +1381,10 @@ namespace cv
oclMat
Dx_
;
oclMat
Dy_
;
oclMat
eig_
;
oclMat
eig_minmax_
;
oclMat
minMaxbuf_
;
oclMat
tmpCorners_
;
oclMat
counter_
;
};
inline
GoodFeaturesToTrackDetector_OCL
::
GoodFeaturesToTrackDetector_OCL
(
int
maxCorners_
,
double
qualityLevel_
,
double
minDistance_
,
...
...
modules/ocl/src/gftt.cpp
View file @
a63576e7
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/imgproc_gftt.cl
View file @
a63576e7
...
...
@@ -46,33 +46,26 @@
#
ifndef
WITH_MASK
#
define
WITH_MASK
0
#
endif
__constant
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_NEAREST
;
inline
float
ELEM_INT2
(
image2d_t
_eig,
int
_x,
int
_y
)
{
return
read_imagef
(
_eig,
sampler,
(
int2
)(
_x,
_y
))
.
x
;
}
inline
float
ELEM_FLT2
(
image2d_t
_eig,
float2
pt
)
{
return
read_imagef
(
_eig,
sampler,
pt
)
.
x
;
}
//macro
to
read
eigenvalue
matrix
#
define
GET_SRC_32F
(
_x,
_y
)
((
__global
const
float*
)(
eig
+
(
_y
)
*eig_pitch
))
[_x]
__kernel
void
findCorners
(
image2d_t
eig,
__global
const
char
*
mask,
__global
float2
*
corners,
const
int
mask_strip,//
in
pixels
const
float
threshold,
const
int
rows,
const
int
cols,
const
int
max_count,
__global
int
*
g_counter
__global
const
char*
eig,
const
int
eig_pitch,
__global
const
char*
mask,
__global
float2*
corners,
const
int
mask_strip,//
in
pixels
__global
const
float*
pMinMax,
const
float
qualityLevel,
const
int
rows,
const
int
cols,
const
int
max_count,
__global
int*
g_counter
)
{
float
threshold
=
qualityLevel*pMinMax[1]
;
const
int
j
=
get_global_id
(
0
)
;
const
int
i
=
get_global_id
(
1
)
;
...
...
@@ -82,39 +75,42 @@ __kernel
#
endif
)
{
const
float
val
=
ELEM_INT2
(
eig,
j,
i
)
;
const
float
val
=
GET_SRC_32F
(
j,
i
)
;
if
(
val
>
threshold
)
{
float
maxVal
=
val
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
-
1
,
i
-
1
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
,
i
-
1
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
+
1
,
i
-
1
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
-
1
,
i
-
1
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
,
i
-
1
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
+
1
,
i
-
1
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
-
1
,
i
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
+
1
,
i
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
-
1
,
i
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
+
1
,
i
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
-
1
,
i
+
1
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
,
i
+
1
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
+
1
,
i
+
1
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
-
1
,
i
+
1
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
,
i
+
1
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
+
1
,
i
+
1
)
,
maxVal
)
;
if
(
val
==
maxVal
)
{
const
int
ind
=
atomic_inc
(
g_counter
)
;
if
(
ind
<
max_count
)
corners[ind]
=
(
float2
)(
j,
i
)
;
{//
pack
and
store
eigenvalue
and
its
coordinates
corners[ind].x
=
val
;
corners[ind].y
=
as_float
(
j
|
(
i<<16
))
;
}
}
}
}
}
#
undef
GET_SRC_32F
//bitonic
sort
__kernel
void
sortCorners_bitonicSort
(
image2d_t
eig,
__global
float2
*
corners,
const
int
count,
const
int
stage,
...
...
@@ -140,8 +136,8 @@ __kernel
const
float2
leftPt
=
corners[leftId]
;
const
float2
rightPt
=
corners[rightId]
;
const
float
leftVal
=
ELEM_FLT2
(
eig,
leftPt
)
;
const
float
rightVal
=
ELEM_FLT2
(
eig,
rightPt
)
;
const
float
leftVal
=
leftPt.x
;
const
float
rightVal
=
rightPt.x
;
const
bool
compareResult
=
leftVal
>
rightVal
;
...
...
@@ -152,124 +148,22 @@ __kernel
corners[rightId]
=
sortOrder
?
greater
:
lesser
;
}
//selection
sort
for
gfft
//kernel
is
ported
from
Bolt
library:
//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
//
Local
sort
will
firstly
sort
elements
of
each
workgroup
using
selection
sort
//
its
performance
is
O
(
n
)
__kernel
void
sortCorners_selectionSortLocal
(
image2d_t
eig,
__global
float2
*
corners,
const
int
count,
__local
float2
*
scratch
)
//
this
is
simple
short
serial
kernel
that
makes
some
short
reduction
and
initialization
work
//
it
makes
HOST
like
work
to
avoid
additional
sync
with
HOST
to
do
this
short
work
//
data
-
input/output
float2.
//
input
data
are
sevral
(
min,max
)
pairs
//
output
data
is
one
reduced
(
min,max
)
pair
//
g_counter
-
counter
that
have
to
be
initialized
by
0
for
next
findCorner
call.
__kernel
void
arithm_op_minMax_final
(
__global
float
*
data,
int
groupnum,__global
int
*
g_counter
)
{
int
i
=
get_local_id
(
0
)
; // index in workgroup
int
numOfGroups
=
get_num_groups
(
0
)
; // index in workgroup
int
groupID
=
get_group_id
(
0
)
;
int
wg
=
get_local_size
(
0
)
; // workgroup size = block size
int
n
; // number of elements to be processed for this work group
int
offset
=
groupID
*
wg
;
int
same
=
0
;
corners
+=
offset
;
n
=
(
groupID
==
(
numOfGroups-1
))
?
(
count
-
wg*
(
numOfGroups-1
))
:
wg
;
float2
pt1,
pt2
;
pt1
=
corners[min
(
i,
n
)
]
;
scratch[i]
=
pt1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
i
>=
n
)
g_counter[0]
=
0
;
float
minVal
=
data[0]
;
float
maxVal
=
data[groupnum]
;
for
(
int
i=1
;i<groupnum;++i)
{
return
;
minVal
=
min
(
minVal,data[i]
)
;
maxVal
=
max
(
maxVal,data[i+groupnum]
)
;
}
float
val1
=
ELEM_FLT2
(
eig,
pt1
)
;
float
val2
;
int
pos
=
0
;
for
(
int
j=0
;j<n;++j)
{
pt2
=
scratch[j]
;
val2
=
ELEM_FLT2
(
eig,
pt2
)
;
if
(
val2
>
val1
)
pos++
;//calculate the rank of this element in this work group
else
{
if
(
val1
>
val2
)
continue
;
else
{
//
val1
and
val2
are
same
same++
;
}
}
}
for
(
int
j=0
; j< same; j++)
corners[pos
+
j]
=
pt1
;
}
__kernel
void
sortCorners_selectionSortFinal
(
image2d_t
eig,
__global
float2
*
corners,
const
int
count
)
{
const
int
i
=
get_local_id
(
0
)
; // index in workgroup
const
int
numOfGroups
=
get_num_groups
(
0
)
; // index in workgroup
const
int
groupID
=
get_group_id
(
0
)
;
const
int
wg
=
get_local_size
(
0
)
; // workgroup size = block size
int
pos
=
0
,
same
=
0
;
const
int
offset
=
get_group_id
(
0
)
*
wg
;
const
int
remainder
=
count
-
wg*
(
numOfGroups-1
)
;
if
((
offset
+
i
)
>=
count
)
return
;
float2
pt1,
pt2
;
pt1
=
corners[groupID*wg
+
i]
;
float
val1
=
ELEM_FLT2
(
eig,
pt1
)
;
float
val2
;
for
(
int
j=0
; j<numOfGroups-1; j++ )
{
for
(
int
k=0
; k<wg; k++)
{
pt2
=
corners[j*wg
+
k]
;
val2
=
ELEM_FLT2
(
eig,
pt2
)
;
if
(
val1
>
val2
)
break
;
else
{
//Increment
only
if
the
value
is
not
the
same.
if
(
val2
>
val1
)
pos++
;
else
same++
;
}
}
}
for
(
int
k=0
; k<remainder; k++)
{
pt2
=
corners[
(
numOfGroups-1
)
*wg
+
k]
;
val2
=
ELEM_FLT2
(
eig,
pt2
)
;
if
(
val1
>
val2
)
break
;
else
{
//Don
't
increment
if
the
value
is
the
same.
//Two
elements
are
same
if
(
*userComp
)(
jData,
iData
)
and
(
*userComp
)(
iData,
jData
)
are
both
false
if
(
val2
>
val1
)
pos++
;
else
same++
;
}
}
for
(
int
j=0
; j< same; j++)
corners[pos
+
j]
=
pt1
;
}
data[0]
=
minVal
;
data[1]
=
maxVal
;
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment