Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
0cea828a
Commit
0cea828a
authored
Dec 19, 2013
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Dec 19, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2006 from krodyush:pullreq/2.4-opt-131120-gfft
parents
bc741ab2
ea0c9b7f
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
221 additions
and
302 deletions
+221
-302
ocl.hpp
modules/ocl/include/opencv2/ocl/ocl.hpp
+2
-0
gftt.cpp
modules/ocl/src/gftt.cpp
+176
-154
imgproc_gftt.cl
modules/ocl/src/opencl/imgproc_gftt.cl
+43
-148
No files found.
modules/ocl/include/opencv2/ocl/ocl.hpp
View file @
0cea828a
...
...
@@ -1380,8 +1380,10 @@ namespace cv
oclMat
Dx_
;
oclMat
Dy_
;
oclMat
eig_
;
oclMat
eig_minmax_
;
oclMat
minMaxbuf_
;
oclMat
tmpCorners_
;
oclMat
counter_
;
};
inline
GoodFeaturesToTrackDetector_OCL
::
GoodFeaturesToTrackDetector_OCL
(
int
maxCorners_
,
double
qualityLevel_
,
double
minDistance_
,
...
...
modules/ocl/src/gftt.cpp
View file @
0cea828a
...
...
@@ -48,63 +48,35 @@
using
namespace
cv
;
using
namespace
cv
::
ocl
;
// currently sort procedure on the host is more efficient
static
bool
use_cpu_sorter
=
true
;
namespace
// compact structure for corners
struct
DefCorner
{
enum
SortMethod
float
eig
;
//eigenvalue of corner
short
x
;
//x coordinate of corner point
short
y
;
//y coordinate of corner point
}
;
// compare procedure for corner
//it is used for sort on the host side
struct
DefCornerCompare
{
CPU_STL
,
BITONIC
,
SELECTION
};
const
int
GROUP_SIZE
=
256
;
template
<
SortMethod
method
>
struct
Sorter
{
//typedef EigType;
};
//TODO(pengx): optimize GPU sorter's performance thus CPU sorter is removed.
template
<>
struct
Sorter
<
CPU_STL
>
{
typedef
oclMat
EigType
;
static
cv
::
Mutex
cs
;
static
Mat
mat_eig
;
//prototype
static
int
clfloat2Gt
(
cl_float2
pt1
,
cl_float2
pt2
)
{
float
v1
=
mat_eig
.
at
<
float
>
(
cvRound
(
pt1
.
s
[
1
]),
cvRound
(
pt1
.
s
[
0
]));
float
v2
=
mat_eig
.
at
<
float
>
(
cvRound
(
pt2
.
s
[
1
]),
cvRound
(
pt2
.
s
[
0
]));
return
v1
>
v2
;
}
static
void
sortCorners_caller
(
const
EigType
&
eig_tex
,
oclMat
&
corners
,
const
int
count
)
bool
operator
()(
const
DefCorner
a
,
const
DefCorner
b
)
const
{
cv
::
AutoLock
lock
(
cs
);
//temporarily use STL's sort function
Mat
mat_corners
=
corners
;
mat_eig
=
eig_tex
;
std
::
sort
(
mat_corners
.
begin
<
cl_float2
>
(),
mat_corners
.
begin
<
cl_float2
>
()
+
count
,
clfloat2Gt
);
corners
=
mat_corners
;
return
a
.
eig
>
b
.
eig
;
}
};
cv
::
Mutex
Sorter
<
CPU_STL
>::
cs
;
cv
::
Mat
Sorter
<
CPU_STL
>::
mat_eig
;
template
<>
st
ruct
Sorter
<
BITONIC
>
// sort corner point using opencl bitonicosrt implementation
st
atic
void
sortCorners_caller
(
oclMat
&
corners
,
const
int
count
)
{
typedef
TextureCL
EigType
;
static
void
sortCorners_caller
(
const
EigType
&
eig_tex
,
oclMat
&
corners
,
const
int
count
)
{
Context
*
cxt
=
Context
::
getContext
();
size_t
globalThreads
[
3
]
=
{
count
/
2
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
GROUP_SIZE
,
1
,
1
};
int
GS
=
count
/
2
;
int
LS
=
min
(
255
,
GS
);
size_t
globalThreads
[
3
]
=
{
GS
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
LS
,
1
,
1
};
// 2^numStages should be equal to count or the output is invalid
int
numStages
=
0
;
...
...
@@ -112,90 +84,106 @@ struct Sorter<BITONIC>
{
++
numStages
;
}
const
int
argc
=
5
;
const
int
argc
=
4
;
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
(
argc
);
std
::
string
kernelname
=
"sortCorners_bitonicSort"
;
args
[
0
]
=
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
eig_tex
);
args
[
1
]
=
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
corners
.
data
);
args
[
2
]
=
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
count
);
args
[
0
]
=
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
corners
.
data
);
args
[
1
]
=
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
count
);
for
(
int
stage
=
0
;
stage
<
numStages
;
++
stage
)
{
args
[
3
]
=
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
stage
);
args
[
2
]
=
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
stage
);
for
(
int
passOfStage
=
0
;
passOfStage
<
stage
+
1
;
++
passOfStage
)
{
args
[
4
]
=
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
passOfStage
);
args
[
3
]
=
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
passOfStage
);
openCLExecuteKernel
(
cxt
,
&
imgproc_gftt
,
kernelname
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
}
}
};
template
<>
struct
Sorter
<
SELECTION
>
{
typedef
TextureCL
EigType
;
static
void
sortCorners_caller
(
const
EigType
&
eig_tex
,
oclMat
&
corners
,
const
int
count
)
{
Context
*
cxt
=
Context
::
getContext
();
size_t
globalThreads
[
3
]
=
{
count
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
GROUP_SIZE
,
1
,
1
};
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
//local
std
::
string
kernelname
=
"sortCorners_selectionSortLocal"
;
int
lds_size
=
GROUP_SIZE
*
sizeof
(
cl_float2
);
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
eig_tex
)
);
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
corners
.
data
)
);
args
.
push_back
(
std
::
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
count
)
);
args
.
push_back
(
std
::
make_pair
(
lds_size
,
(
void
*
)
NULL
)
);
openCLExecuteKernel
(
cxt
,
&
imgproc_gftt
,
kernelname
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
//final
kernelname
=
"sortCorners_selectionSortFinal"
;
args
.
pop_back
();
openCLExecuteKernel
(
cxt
,
&
imgproc_gftt
,
kernelname
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
};
}
int
findCorners_caller
(
const
TextureCL
&
eig
,
const
float
threshold
,
// find corners on matrix and put it into array
static
void
findCorners_caller
(
const
oclMat
&
eig_mat
,
//input matrix worth eigenvalues
oclMat
&
eigMinMax
,
//input with min and max values of eigenvalues
const
float
qualityLevel
,
const
oclMat
&
mask
,
oclMat
&
corners
,
const
int
max_count
)
oclMat
&
corners
,
//output array with detected corners
oclMat
&
counter
)
//output value with number of detected corners, have to be 0 before call
{
string
opt
;
std
::
vector
<
int
>
k
;
Context
*
cxt
=
Context
::
getContext
();
std
::
vector
<
std
::
pair
<
size_t
,
const
void
*>
>
args
;
std
::
string
kernelname
=
"findCorners"
;
const
int
mask_strip
=
mask
.
step
/
mask
.
elemSize1
();
oclMat
g_counter
(
1
,
1
,
CV_32SC1
);
g_counter
.
setTo
(
0
);
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
(
eig_mat
.
data
)));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
eig
));
int
src_pitch
=
(
int
)
eig_mat
.
step
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
src_pitch
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
mask
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
corners
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
mask_strip
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
threshold
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
eig
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
eig
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
max_count
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
g_counter
.
data
));
size_t
globalThreads
[
3
]
=
{
eig
.
cols
,
eig
.
rows
,
1
};
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
eigMinMax
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
qualityLevel
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
eig_mat
.
rows
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
eig_mat
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
corners
.
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
counter
.
data
));
size_t
globalThreads
[
3
]
=
{
eig_mat
.
cols
,
eig_mat
.
rows
,
1
};
size_t
localThreads
[
3
]
=
{
16
,
16
,
1
};
if
(
!
mask
.
empty
())
opt
+=
" -D WITH_MASK=1"
;
const
char
*
opt
=
mask
.
empty
()
?
""
:
"-D WITH_MASK"
;
openCLExecuteKernel
(
cxt
,
&
imgproc_gftt
,
kernelname
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
opt
);
return
std
::
min
(
Mat
(
g_counter
).
at
<
int
>
(
0
),
max_count
);
openCLExecuteKernel
(
cxt
,
&
imgproc_gftt
,
"findCorners"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
opt
.
c_str
());
}
static
void
minMaxEig_caller
(
const
oclMat
&
src
,
oclMat
&
dst
,
oclMat
&
tozero
)
{
size_t
groupnum
=
src
.
clCxt
->
getDeviceInfo
().
maxComputeUnits
;
CV_Assert
(
groupnum
!=
0
);
int
dbsize
=
groupnum
*
2
*
src
.
elemSize
();
ensureSizeIsEnough
(
1
,
dbsize
,
CV_8UC1
,
dst
);
cl_mem
dst_data
=
reinterpret_cast
<
cl_mem
>
(
dst
.
data
);
int
all_cols
=
src
.
step
/
src
.
elemSize
();
int
pre_cols
=
(
src
.
offset
%
src
.
step
)
/
src
.
elemSize
();
int
sec_cols
=
all_cols
-
(
src
.
offset
%
src
.
step
+
src
.
cols
*
src
.
elemSize
()
-
1
)
/
src
.
elemSize
()
-
1
;
int
invalid_cols
=
pre_cols
+
sec_cols
;
int
cols
=
all_cols
-
invalid_cols
,
elemnum
=
cols
*
src
.
rows
;
int
offset
=
src
.
offset
/
src
.
elemSize
();
{
// first parallel pass
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
src
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
invalid_cols
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
offset
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
elemnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
size_t
globalThreads
[
3
]
=
{
groupnum
*
256
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
256
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
arithm_minMax
,
"arithm_op_minMax"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
"-D T=float -D DEPTH_5"
);
}
{
// run final "serial" kernel to find accumulate results from threads and reset corner counter
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
dst_data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
)
,
(
void
*
)
&
groupnum
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
)
,
(
void
*
)
&
tozero
.
data
));
size_t
globalThreads
[
3
]
=
{
1
,
1
,
1
};
size_t
localThreads
[
3
]
=
{
1
,
1
,
1
};
openCLExecuteKernel
(
src
.
clCxt
,
&
imgproc_gftt
,
"arithm_op_minMax_final"
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
}
}
}
//unnamed namespace
void
cv
::
ocl
::
GoodFeaturesToTrackDetector_OCL
::
operator
()(
const
oclMat
&
image
,
oclMat
&
corners
,
const
oclMat
&
mask
)
{
...
...
@@ -205,67 +193,99 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
ensureSizeIsEnough
(
image
.
size
(),
CV_32F
,
eig_
);
if
(
useHarrisDetector
)
corner
MinEigenVal
_dxdy
(
image
,
eig_
,
Dx_
,
Dy_
,
blockSize
,
3
,
harrisK
);
corner
Harris
_dxdy
(
image
,
eig_
,
Dx_
,
Dy_
,
blockSize
,
3
,
harrisK
);
else
cornerMinEigenVal_dxdy
(
image
,
eig_
,
Dx_
,
Dy_
,
blockSize
,
3
);
double
maxVal
=
0
;
minMax
(
eig_
,
NULL
,
&
maxVal
);
ensureSizeIsEnough
(
1
,
1
,
CV_32SC1
,
counter_
);
// find max eigenvalue and reset detected counters
minMaxEig_caller
(
eig_
,
eig_minmax_
,
counter_
);
// allocate buffer for kernels
int
corner_array_size
=
std
::
max
(
1024
,
static_cast
<
int
>
(
image
.
size
().
area
()
*
0.05
));
ensureSizeIsEnough
(
1
,
std
::
max
(
1000
,
static_cast
<
int
>
(
image
.
size
().
area
()
*
0.05
)),
CV_32FC2
,
tmpCorners_
);
if
(
!
use_cpu_sorter
)
{
// round to 2^n
unsigned
int
n
=
1
;
for
(
n
=
1
;
n
<
(
unsigned
int
)
corner_array_size
;
n
<<=
1
);
corner_array_size
=
(
int
)
n
;
Ptr
<
TextureCL
>
eig_tex
=
bindTexturePtr
(
eig_
);
int
total
=
findCorners_caller
(
*
eig_tex
,
static_cast
<
float
>
(
maxVal
*
qualityLevel
),
ensureSizeIsEnough
(
1
,
corner_array_size
,
CV_32FC2
,
tmpCorners_
);
// set to 0 to be able use bitonic sort on whole 2^n array
tmpCorners_
.
setTo
(
0
);
}
else
{
ensureSizeIsEnough
(
1
,
corner_array_size
,
CV_32FC2
,
tmpCorners_
);
}
int
total
=
tmpCorners_
.
cols
;
// by default the number of corner is full array
vector
<
DefCorner
>
tmp
(
tmpCorners_
.
cols
);
// input buffer with corner for HOST part of algorithm
//find points with high eigenvalue and put it into the output array
findCorners_caller
(
eig_
,
eig_minmax_
,
static_cast
<
float
>
(
qualityLevel
),
mask
,
tmpCorners_
,
tmpCorners_
.
cols
);
counter_
);
if
(
!
use_cpu_sorter
)
{
// sort detected corners on deivce side
sortCorners_caller
(
tmpCorners_
,
corner_array_size
);
}
else
{
// send non-blocking request to read real non-zero number of corners to sort it on the HOST side
openCLVerifyCall
(
clEnqueueReadBuffer
(
getClCommandQueue
(
counter_
.
clCxt
),
(
cl_mem
)
counter_
.
data
,
CL_FALSE
,
0
,
sizeof
(
int
),
&
total
,
0
,
NULL
,
NULL
));
}
//blocking read whole corners array (sorted or not sorted)
openCLReadBuffer
(
tmpCorners_
.
clCxt
,(
cl_mem
)
tmpCorners_
.
data
,
&
tmp
[
0
],
tmpCorners_
.
cols
*
sizeof
(
DefCorner
));
if
(
total
==
0
)
{
{
// check for trivial case
corners
.
release
();
return
;
}
if
(
use_cpu_sorter
)
{
Sorter
<
CPU_STL
>::
sortCorners_caller
(
eig_
,
tmpCorners_
,
total
);
}
else
{
//if total is power of 2
if
(((
total
-
1
)
&
(
total
))
==
0
)
{
Sorter
<
BITONIC
>::
sortCorners_caller
(
*
eig_tex
,
tmpCorners_
,
total
);
}
else
{
Sorter
<
SELECTION
>::
sortCorners_caller
(
*
eig_tex
,
tmpCorners_
,
total
);
}
{
// sort detected corners on cpu side.
tmp
.
resize
(
total
);
cv
::
sort
(
tmp
,
DefCornerCompare
());
}
//estimate maximal size of final output array
int
total_max
=
maxCorners
>
0
?
std
::
min
(
maxCorners
,
total
)
:
total
;
int
D2
=
(
int
)
ceil
(
minDistance
*
minDistance
);
// allocate output buffer
vector
<
Point2f
>
tmp2
;
tmp2
.
reserve
(
total_max
);
if
(
minDistance
<
1
)
{
// we have not distance restriction. then just copy with conversion maximal allowed points into output array
for
(
int
i
=
0
;
i
<
total_max
&&
tmp
[
i
].
eig
>
0.0
f
;
++
i
)
{
Rect
roi_range
(
0
,
0
,
maxCorners
>
0
?
std
::
min
(
maxCorners
,
total
)
:
total
,
1
);
tmpCorners_
(
roi_range
).
copyTo
(
corners
);
tmp2
.
push_back
(
Point2f
(
tmp
[
i
].
x
,
tmp
[
i
].
y
)
);
}
}
else
{
vector
<
Point2f
>
tmp
(
total
);
downloadPoints
(
tmpCorners_
,
tmp
);
vector
<
Point2f
>
tmp2
;
tmp2
.
reserve
(
total
);
{
// we have distance restriction. then start coping to output array from the first element and check distance for each next one
const
int
cell_size
=
cvRound
(
minDistance
);
const
int
grid_width
=
(
image
.
cols
+
cell_size
-
1
)
/
cell_size
;
const
int
grid_height
=
(
image
.
rows
+
cell_size
-
1
)
/
cell_size
;
std
::
vector
<
std
::
vector
<
Point2
f
>
>
grid
(
grid_width
*
grid_height
);
std
::
vector
<
std
::
vector
<
Point2
i
>
>
grid
(
grid_width
*
grid_height
);
for
(
int
i
=
0
;
i
<
total
;
++
i
)
for
(
int
i
=
0
;
i
<
total
;
++
i
)
{
Point2f
p
=
tmp
[
i
];
DefCorner
p
=
tmp
[
i
];
if
(
p
.
eig
<=
0.0
f
)
break
;
// condition to stop that is needed for GPU bitonic sort usage.
bool
good
=
true
;
...
...
@@ -287,40 +307,42 @@ void cv::ocl::GoodFeaturesToTrackDetector_OCL::operator ()(const oclMat& image,
{
for
(
int
xx
=
x1
;
xx
<=
x2
;
xx
++
)
{
vector
<
Point2f
>&
m
=
grid
[
yy
*
grid_width
+
xx
];
if
(
!
m
.
empty
())
{
vector
<
Point2i
>&
m
=
grid
[
yy
*
grid_width
+
xx
];
if
(
m
.
empty
())
continue
;
for
(
size_t
j
=
0
;
j
<
m
.
size
();
j
++
)
{
floa
t
dx
=
p
.
x
-
m
[
j
].
x
;
floa
t
dy
=
p
.
y
-
m
[
j
].
y
;
in
t
dx
=
p
.
x
-
m
[
j
].
x
;
in
t
dy
=
p
.
y
-
m
[
j
].
y
;
if
(
dx
*
dx
+
dy
*
dy
<
minDistance
*
minDistance
)
if
(
dx
*
dx
+
dy
*
dy
<
D2
)
{
good
=
false
;
goto
break_out
;
}
goto
break_out_
;
}
}
}
}
break_out
:
break_out
_
:
if
(
good
)
{
grid
[
y_cell
*
grid_width
+
x_cell
].
push_back
(
p
);
grid
[
y_cell
*
grid_width
+
x_cell
].
push_back
(
Point2i
(
p
.
x
,
p
.
y
)
);
tmp2
.
push_back
(
p
);
tmp2
.
push_back
(
Point2f
(
p
.
x
,
p
.
y
)
);
if
(
maxCorners
>
0
&&
tmp2
.
size
()
==
static_cast
<
size_t
>
(
maxCorners
))
break
;
}
}
corners
.
upload
(
Mat
(
1
,
static_cast
<
int
>
(
tmp2
.
size
()),
CV_32FC2
,
&
tmp2
[
0
]));
}
int
final_size
=
static_cast
<
int
>
(
tmp2
.
size
());
if
(
final_size
>
0
)
corners
.
upload
(
Mat
(
1
,
final_size
,
CV_32FC2
,
&
tmp2
[
0
]));
else
corners
.
release
();
}
void
cv
::
ocl
::
GoodFeaturesToTrackDetector_OCL
::
downloadPoints
(
const
oclMat
&
points
,
vector
<
Point2f
>
&
points_v
)
{
...
...
modules/ocl/src/opencl/imgproc_gftt.cl
View file @
0cea828a
...
...
@@ -46,33 +46,26 @@
#
ifndef
WITH_MASK
#
define
WITH_MASK
0
#
endif
__constant
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP_TO_EDGE |
CLK_FILTER_NEAREST
;
inline
float
ELEM_INT2
(
image2d_t
_eig,
int
_x,
int
_y
)
{
return
read_imagef
(
_eig,
sampler,
(
int2
)(
_x,
_y
))
.
x
;
}
inline
float
ELEM_FLT2
(
image2d_t
_eig,
float2
pt
)
{
return
read_imagef
(
_eig,
sampler,
pt
)
.
x
;
}
//macro
to
read
eigenvalue
matrix
#
define
GET_SRC_32F
(
_x,
_y
)
((
__global
const
float*
)(
eig
+
(
_y
)
*eig_pitch
))
[_x]
__kernel
void
findCorners
(
image2d_t
eig,
__global
const
char
*
mask,
__global
float2
*
corners,
__global
const
char*
eig,
const
int
eig_pitch,
__global
const
char*
mask,
__global
float2*
corners,
const
int
mask_strip,//
in
pixels
const
float
threshold,
__global
const
float*
pMinMax,
const
float
qualityLevel,
const
int
rows,
const
int
cols,
const
int
max_count,
__global
int
*
g_counter
__global
int
*
g_counter
)
{
float
threshold
=
qualityLevel*pMinMax[1]
;
const
int
j
=
get_global_id
(
0
)
;
const
int
i
=
get_global_id
(
1
)
;
...
...
@@ -82,39 +75,42 @@ __kernel
#
endif
)
{
const
float
val
=
ELEM_INT2
(
eig,
j,
i
)
;
const
float
val
=
GET_SRC_32F
(
j,
i
)
;
if
(
val
>
threshold
)
{
float
maxVal
=
val
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
-
1
,
i
-
1
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
,
i
-
1
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
+
1
,
i
-
1
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
-
1
,
i
-
1
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
,
i
-
1
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
+
1
,
i
-
1
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
-
1
,
i
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
+
1
,
i
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
-
1
,
i
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
+
1
,
i
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
-
1
,
i
+
1
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
,
i
+
1
)
,
maxVal
)
;
maxVal
=
fmax
(
ELEM_INT2
(
eig,
j
+
1
,
i
+
1
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
-
1
,
i
+
1
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
,
i
+
1
)
,
maxVal
)
;
maxVal
=
fmax
(
GET_SRC_32F
(
j
+
1
,
i
+
1
)
,
maxVal
)
;
if
(
val
==
maxVal
)
{
const
int
ind
=
atomic_inc
(
g_counter
)
;
if
(
ind
<
max_count
)
corners[ind]
=
(
float2
)(
j,
i
)
;
{//
pack
and
store
eigenvalue
and
its
coordinates
corners[ind].x
=
val
;
corners[ind].y
=
as_float
(
j
|
(
i<<16
))
;
}
}
}
}
}
#
undef
GET_SRC_32F
//bitonic
sort
__kernel
void
sortCorners_bitonicSort
(
image2d_t
eig,
__global
float2
*
corners,
const
int
count,
const
int
stage,
...
...
@@ -140,8 +136,8 @@ __kernel
const
float2
leftPt
=
corners[leftId]
;
const
float2
rightPt
=
corners[rightId]
;
const
float
leftVal
=
ELEM_FLT2
(
eig,
leftPt
)
;
const
float
rightVal
=
ELEM_FLT2
(
eig,
rightPt
)
;
const
float
leftVal
=
leftPt.x
;
const
float
rightVal
=
rightPt.x
;
const
bool
compareResult
=
leftVal
>
rightVal
;
...
...
@@ -152,124 +148,22 @@ __kernel
corners[rightId]
=
sortOrder
?
greater
:
lesser
;
}
//selection
sort
for
gfft
//kernel
is
ported
from
Bolt
library:
//https://github.com/HSA-Libraries/Bolt/blob/master/include/bolt/cl/sort_kernels.cl
//
Local
sort
will
firstly
sort
elements
of
each
workgroup
using
selection
sort
//
its
performance
is
O
(
n
)
__kernel
void
sortCorners_selectionSortLocal
(
image2d_t
eig,
__global
float2
*
corners,
const
int
count,
__local
float2
*
scratch
)
//
this
is
simple
short
serial
kernel
that
makes
some
short
reduction
and
initialization
work
//
it
makes
HOST
like
work
to
avoid
additional
sync
with
HOST
to
do
this
short
work
//
data
-
input/output
float2.
//
input
data
are
sevral
(
min,max
)
pairs
//
output
data
is
one
reduced
(
min,max
)
pair
//
g_counter
-
counter
that
have
to
be
initialized
by
0
for
next
findCorner
call.
__kernel
void
arithm_op_minMax_final
(
__global
float
*
data,
int
groupnum,__global
int
*
g_counter
)
{
int
i
=
get_local_id
(
0
)
; // index in workgroup
int
numOfGroups
=
get_num_groups
(
0
)
; // index in workgroup
int
groupID
=
get_group_id
(
0
)
;
int
wg
=
get_local_size
(
0
)
; // workgroup size = block size
int
n
; // number of elements to be processed for this work group
int
offset
=
groupID
*
wg
;
int
same
=
0
;
corners
+=
offset
;
n
=
(
groupID
==
(
numOfGroups-1
))
?
(
count
-
wg*
(
numOfGroups-1
))
:
wg
;
float2
pt1,
pt2
;
pt1
=
corners[min
(
i,
n
)
]
;
scratch[i]
=
pt1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
i
>=
n
)
{
return
;
}
float
val1
=
ELEM_FLT2
(
eig,
pt1
)
;
float
val2
;
int
pos
=
0
;
for
(
int
j=0
;j<n;++j)
{
pt2
=
scratch[j]
;
val2
=
ELEM_FLT2
(
eig,
pt2
)
;
if
(
val2
>
val1
)
pos++
;//calculate the rank of this element in this work group
else
g_counter[0]
=
0
;
float
minVal
=
data[0]
;
float
maxVal
=
data[groupnum]
;
for
(
int
i=1
;i<groupnum;++i)
{
if
(
val1
>
val2
)
continue
;
else
{
//
val1
and
val2
are
same
same++
;
}
}
}
for
(
int
j=0
; j< same; j++)
corners[pos
+
j]
=
pt1
;
}
__kernel
void
sortCorners_selectionSortFinal
(
image2d_t
eig,
__global
float2
*
corners,
const
int
count
)
{
const
int
i
=
get_local_id
(
0
)
; // index in workgroup
const
int
numOfGroups
=
get_num_groups
(
0
)
; // index in workgroup
const
int
groupID
=
get_group_id
(
0
)
;
const
int
wg
=
get_local_size
(
0
)
; // workgroup size = block size
int
pos
=
0
,
same
=
0
;
const
int
offset
=
get_group_id
(
0
)
*
wg
;
const
int
remainder
=
count
-
wg*
(
numOfGroups-1
)
;
if
((
offset
+
i
)
>=
count
)
return
;
float2
pt1,
pt2
;
pt1
=
corners[groupID*wg
+
i]
;
float
val1
=
ELEM_FLT2
(
eig,
pt1
)
;
float
val2
;
for
(
int
j=0
; j<numOfGroups-1; j++ )
{
for
(
int
k=0
; k<wg; k++)
{
pt2
=
corners[j*wg
+
k]
;
val2
=
ELEM_FLT2
(
eig,
pt2
)
;
if
(
val1
>
val2
)
break
;
else
{
//Increment
only
if
the
value
is
not
the
same.
if
(
val2
>
val1
)
pos++
;
else
same++
;
}
}
}
for
(
int
k=0
; k<remainder; k++)
{
pt2
=
corners[
(
numOfGroups-1
)
*wg
+
k]
;
val2
=
ELEM_FLT2
(
eig,
pt2
)
;
if
(
val1
>
val2
)
break
;
else
{
//Don
't
increment
if
the
value
is
the
same.
//Two
elements
are
same
if
(
*userComp
)(
jData,
iData
)
and
(
*userComp
)(
iData,
jData
)
are
both
false
if
(
val2
>
val1
)
pos++
;
else
same++
;
}
minVal
=
min
(
minVal,data[i]
)
;
maxVal
=
max
(
maxVal,data[i+groupnum]
)
;
}
for
(
int
j=0
; j< same; j++)
corners[pos
+
j]
=
pt1
;
data[0]
=
minVal
;
data[1]
=
maxVal
;
}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment