Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
bc78e87a
Commit
bc78e87a
authored
Jul 30, 2013
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Jul 30, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #1123 from bitwangyaoyao:2.4_fix
parents
26a3cabb
270b2c79
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
77 additions
and
371 deletions
+77
-371
perf_opticalflow.cpp
modules/ocl/perf/perf_opticalflow.cpp
+2
-2
hog.cpp
modules/ocl/src/hog.cpp
+28
-108
objdetect_hog.cl
modules/ocl/src/opencl/objdetect_hog.cl
+1
-94
pyrlk.cl
modules/ocl/src/opencl/pyrlk.cl
+46
-167
No files found.
modules/ocl/perf/perf_opticalflow.cpp
View file @
bc78e87a
...
@@ -48,8 +48,8 @@
...
@@ -48,8 +48,8 @@
///////////// PyrLKOpticalFlow ////////////////////////
///////////// PyrLKOpticalFlow ////////////////////////
PERFTEST
(
PyrLKOpticalFlow
)
PERFTEST
(
PyrLKOpticalFlow
)
{
{
std
::
string
images1
[]
=
{
"rubberwhale1.png"
,
"
basketball1.pn
g"
};
std
::
string
images1
[]
=
{
"rubberwhale1.png"
,
"
aloeL.jp
g"
};
std
::
string
images2
[]
=
{
"rubberwhale2.png"
,
"
basketball2.pn
g"
};
std
::
string
images2
[]
=
{
"rubberwhale2.png"
,
"
aloeR.jp
g"
};
for
(
size_t
i
=
0
;
i
<
sizeof
(
images1
)
/
sizeof
(
std
::
string
);
i
++
)
for
(
size_t
i
=
0
;
i
<
sizeof
(
images1
)
/
sizeof
(
std
::
string
);
i
++
)
{
{
...
...
modules/ocl/src/hog.cpp
View file @
bc78e87a
...
@@ -56,98 +56,6 @@ using namespace std;
...
@@ -56,98 +56,6 @@ using namespace std;
static
oclMat
gauss_w_lut
;
static
oclMat
gauss_w_lut
;
static
bool
hog_device_cpu
;
static
bool
hog_device_cpu
;
/* pre-compute gaussian and interp_weight lookup tables if sigma is 4.0f */
static
const
float
gaussian_interp_lut
[]
=
{
/* gaussian lut */
0.01831564
f
,
0.02926831
f
,
0.04393693
f
,
0.06196101
f
,
0.08208500
f
,
0.10215643
f
,
0.11943297
f
,
0.13117145
f
,
0.13533528
f
,
0.13117145
f
,
0.11943297
f
,
0.10215643
f
,
0.08208500
f
,
0.06196101
f
,
0.04393693
f
,
0.02926831
f
,
0.02926831
f
,
0.04677062
f
,
0.07021102
f
,
0.09901341
f
,
0.13117145
f
,
0.16324551
f
,
0.19085334
f
,
0.20961139
f
,
0.21626517
f
,
0.20961139
f
,
0.19085334
f
,
0.16324551
f
,
0.13117145
f
,
0.09901341
f
,
0.07021102
f
,
0.04677062
f
,
0.04393693
f
,
0.07021102
f
,
0.10539922
f
,
0.14863673
f
,
0.19691168
f
,
0.24506053
f
,
0.28650481
f
,
0.31466395
f
,
0.32465246
f
,
0.31466395
f
,
0.28650481
f
,
0.24506053
f
,
0.19691168
f
,
0.14863673
f
,
0.10539922
f
,
0.07021102
f
,
0.06196101
f
,
0.09901341
f
,
0.14863673
f
,
0.20961139
f
,
0.27768996
f
,
0.34559074
f
,
0.40403652
f
,
0.44374731
f
,
0.45783335
f
,
0.44374731
f
,
0.40403652
f
,
0.34559074
f
,
0.27768996
f
,
0.20961139
f
,
0.14863673
f
,
0.09901341
f
,
0.08208500
f
,
0.13117145
f
,
0.19691168
f
,
0.27768996
f
,
0.36787945
f
,
0.45783335
f
,
0.53526145
f
,
0.58786964
f
,
0.60653067
f
,
0.58786964
f
,
0.53526145
f
,
0.45783335
f
,
0.36787945
f
,
0.27768996
f
,
0.19691168
f
,
0.13117145
f
,
0.10215643
f
,
0.16324551
f
,
0.24506053
f
,
0.34559074
f
,
0.45783335
f
,
0.56978285
f
,
0.66614360
f
,
0.73161560
f
,
0.75483960
f
,
0.73161560
f
,
0.66614360
f
,
0.56978285
f
,
0.45783335
f
,
0.34559074
f
,
0.24506053
f
,
0.16324551
f
,
0.11943297
f
,
0.19085334
f
,
0.28650481
f
,
0.40403652
f
,
0.53526145
f
,
0.66614360
f
,
0.77880079
f
,
0.85534531
f
,
0.88249689
f
,
0.85534531
f
,
0.77880079
f
,
0.66614360
f
,
0.53526145
f
,
0.40403652
f
,
0.28650481
f
,
0.19085334
f
,
0.13117145
f
,
0.20961139
f
,
0.31466395
f
,
0.44374731
f
,
0.58786964
f
,
0.73161560
f
,
0.85534531
f
,
0.93941307
f
,
0.96923321
f
,
0.93941307
f
,
0.85534531
f
,
0.73161560
f
,
0.58786964
f
,
0.44374731
f
,
0.31466395
f
,
0.20961139
f
,
0.13533528
f
,
0.21626517
f
,
0.32465246
f
,
0.45783335
f
,
0.60653067
f
,
0.75483960
f
,
0.88249689
f
,
0.96923321
f
,
1.00000000
f
,
0.96923321
f
,
0.88249689
f
,
0.75483960
f
,
0.60653067
f
,
0.45783335
f
,
0.32465246
f
,
0.21626517
f
,
0.13117145
f
,
0.20961139
f
,
0.31466395
f
,
0.44374731
f
,
0.58786964
f
,
0.73161560
f
,
0.85534531
f
,
0.93941307
f
,
0.96923321
f
,
0.93941307
f
,
0.85534531
f
,
0.73161560
f
,
0.58786964
f
,
0.44374731
f
,
0.31466395
f
,
0.20961139
f
,
0.11943297
f
,
0.19085334
f
,
0.28650481
f
,
0.40403652
f
,
0.53526145
f
,
0.66614360
f
,
0.77880079
f
,
0.85534531
f
,
0.88249689
f
,
0.85534531
f
,
0.77880079
f
,
0.66614360
f
,
0.53526145
f
,
0.40403652
f
,
0.28650481
f
,
0.19085334
f
,
0.10215643
f
,
0.16324551
f
,
0.24506053
f
,
0.34559074
f
,
0.45783335
f
,
0.56978285
f
,
0.66614360
f
,
0.73161560
f
,
0.75483960
f
,
0.73161560
f
,
0.66614360
f
,
0.56978285
f
,
0.45783335
f
,
0.34559074
f
,
0.24506053
f
,
0.16324551
f
,
0.08208500
f
,
0.13117145
f
,
0.19691168
f
,
0.27768996
f
,
0.36787945
f
,
0.45783335
f
,
0.53526145
f
,
0.58786964
f
,
0.60653067
f
,
0.58786964
f
,
0.53526145
f
,
0.45783335
f
,
0.36787945
f
,
0.27768996
f
,
0.19691168
f
,
0.13117145
f
,
0.06196101
f
,
0.09901341
f
,
0.14863673
f
,
0.20961139
f
,
0.27768996
f
,
0.34559074
f
,
0.40403652
f
,
0.44374731
f
,
0.45783335
f
,
0.44374731
f
,
0.40403652
f
,
0.34559074
f
,
0.27768996
f
,
0.20961139
f
,
0.14863673
f
,
0.09901341
f
,
0.04393693
f
,
0.07021102
f
,
0.10539922
f
,
0.14863673
f
,
0.19691168
f
,
0.24506053
f
,
0.28650481
f
,
0.31466395
f
,
0.32465246
f
,
0.31466395
f
,
0.28650481
f
,
0.24506053
f
,
0.19691168
f
,
0.14863673
f
,
0.10539922
f
,
0.07021102
f
,
0.02926831
f
,
0.04677062
f
,
0.07021102
f
,
0.09901341
f
,
0.13117145
f
,
0.16324551
f
,
0.19085334
f
,
0.20961139
f
,
0.21626517
f
,
0.20961139
f
,
0.19085334
f
,
0.16324551
f
,
0.13117145
f
,
0.09901341
f
,
0.07021102
f
,
0.04677062
f
,
/* interp_weight lut */
0.00390625
f
,
0.01171875
f
,
0.01953125
f
,
0.02734375
f
,
0.03515625
f
,
0.04296875
f
,
0.05078125
f
,
0.05859375
f
,
0.05859375
f
,
0.05078125
f
,
0.04296875
f
,
0.03515625
f
,
0.02734375
f
,
0.01953125
f
,
0.01171875
f
,
0.00390625
f
,
0.01171875
f
,
0.03515625
f
,
0.05859375
f
,
0.08203125
f
,
0.10546875
f
,
0.12890625
f
,
0.15234375
f
,
0.17578125
f
,
0.17578125
f
,
0.15234375
f
,
0.12890625
f
,
0.10546875
f
,
0.08203125
f
,
0.05859375
f
,
0.03515625
f
,
0.01171875
f
,
0.01953125
f
,
0.05859375
f
,
0.09765625
f
,
0.13671875
f
,
0.17578125
f
,
0.21484375
f
,
0.25390625
f
,
0.29296875
f
,
0.29296875
f
,
0.25390625
f
,
0.21484375
f
,
0.17578125
f
,
0.13671875
f
,
0.09765625
f
,
0.05859375
f
,
0.01953125
f
,
0.02734375
f
,
0.08203125
f
,
0.13671875
f
,
0.19140625
f
,
0.24609375
f
,
0.30078125
f
,
0.35546875
f
,
0.41015625
f
,
0.41015625
f
,
0.35546875
f
,
0.30078125
f
,
0.24609375
f
,
0.19140625
f
,
0.13671875
f
,
0.08203125
f
,
0.02734375
f
,
0.03515625
f
,
0.10546875
f
,
0.17578125
f
,
0.24609375
f
,
0.31640625
f
,
0.38671875
f
,
0.45703125
f
,
0.52734375
f
,
0.52734375
f
,
0.45703125
f
,
0.38671875
f
,
0.31640625
f
,
0.24609375
f
,
0.17578125
f
,
0.10546875
f
,
0.03515625
f
,
0.04296875
f
,
0.12890625
f
,
0.21484375
f
,
0.30078125
f
,
0.38671875
f
,
0.47265625
f
,
0.55859375
f
,
0.64453125
f
,
0.64453125
f
,
0.55859375
f
,
0.47265625
f
,
0.38671875
f
,
0.30078125
f
,
0.21484375
f
,
0.12890625
f
,
0.04296875
f
,
0.05078125
f
,
0.15234375
f
,
0.25390625
f
,
0.35546875
f
,
0.45703125
f
,
0.55859375
f
,
0.66015625
f
,
0.76171875
f
,
0.76171875
f
,
0.66015625
f
,
0.55859375
f
,
0.45703125
f
,
0.35546875
f
,
0.25390625
f
,
0.15234375
f
,
0.05078125
f
,
0.05859375
f
,
0.17578125
f
,
0.29296875
f
,
0.41015625
f
,
0.52734375
f
,
0.64453125
f
,
0.76171875
f
,
0.87890625
f
,
0.87890625
f
,
0.76171875
f
,
0.64453125
f
,
0.52734375
f
,
0.41015625
f
,
0.29296875
f
,
0.17578125
f
,
0.05859375
f
,
0.05859375
f
,
0.17578125
f
,
0.29296875
f
,
0.41015625
f
,
0.52734375
f
,
0.64453125
f
,
0.76171875
f
,
0.87890625
f
,
0.87890625
f
,
0.76171875
f
,
0.64453125
f
,
0.52734375
f
,
0.41015625
f
,
0.29296875
f
,
0.17578125
f
,
0.05859375
f
,
0.05078125
f
,
0.15234375
f
,
0.25390625
f
,
0.35546875
f
,
0.45703125
f
,
0.55859375
f
,
0.66015625
f
,
0.76171875
f
,
0.76171875
f
,
0.66015625
f
,
0.55859375
f
,
0.45703125
f
,
0.35546875
f
,
0.25390625
f
,
0.15234375
f
,
0.05078125
f
,
0.04296875
f
,
0.12890625
f
,
0.21484375
f
,
0.30078125
f
,
0.38671875
f
,
0.47265625
f
,
0.55859375
f
,
0.64453125
f
,
0.64453125
f
,
0.55859375
f
,
0.47265625
f
,
0.38671875
f
,
0.30078125
f
,
0.21484375
f
,
0.12890625
f
,
0.04296875
f
,
0.03515625
f
,
0.10546875
f
,
0.17578125
f
,
0.24609375
f
,
0.31640625
f
,
0.38671875
f
,
0.45703125
f
,
0.52734375
f
,
0.52734375
f
,
0.45703125
f
,
0.38671875
f
,
0.31640625
f
,
0.24609375
f
,
0.17578125
f
,
0.10546875
f
,
0.03515625
f
,
0.02734375
f
,
0.08203125
f
,
0.13671875
f
,
0.19140625
f
,
0.24609375
f
,
0.30078125
f
,
0.35546875
f
,
0.41015625
f
,
0.41015625
f
,
0.35546875
f
,
0.30078125
f
,
0.24609375
f
,
0.19140625
f
,
0.13671875
f
,
0.08203125
f
,
0.02734375
f
,
0.01953125
f
,
0.05859375
f
,
0.09765625
f
,
0.13671875
f
,
0.17578125
f
,
0.21484375
f
,
0.25390625
f
,
0.29296875
f
,
0.29296875
f
,
0.25390625
f
,
0.21484375
f
,
0.17578125
f
,
0.13671875
f
,
0.09765625
f
,
0.05859375
f
,
0.01953125
f
,
0.01171875
f
,
0.03515625
f
,
0.05859375
f
,
0.08203125
f
,
0.10546875
f
,
0.12890625
f
,
0.15234375
f
,
0.17578125
f
,
0.17578125
f
,
0.15234375
f
,
0.12890625
f
,
0.10546875
f
,
0.08203125
f
,
0.05859375
f
,
0.03515625
f
,
0.01171875
f
,
0.00390625
f
,
0.01171875
f
,
0.01953125
f
,
0.02734375
f
,
0.03515625
f
,
0.04296875
f
,
0.05078125
f
,
0.05859375
f
,
0.05859375
f
,
0.05078125
f
,
0.04296875
f
,
0.03515625
f
,
0.02734375
f
,
0.01953125
f
,
0.01171875
f
,
0.00390625
f
};
namespace
cv
namespace
cv
{
{
...
@@ -180,7 +88,7 @@ namespace cv
...
@@ -180,7 +88,7 @@ namespace cv
int
nblocks_win_x
,
int
nblocks_win_y
);
int
nblocks_win_x
,
int
nblocks_win_y
);
void
compute_hists
(
int
nbins
,
int
block_stride_x
,
int
blovck_stride_y
,
void
compute_hists
(
int
nbins
,
int
block_stride_x
,
int
blovck_stride_y
,
int
height
,
int
width
,
float
sigma
,
const
cv
::
ocl
::
oclMat
&
grad
,
int
height
,
int
width
,
const
cv
::
ocl
::
oclMat
&
grad
,
const
cv
::
ocl
::
oclMat
&
qangle
,
const
cv
::
ocl
::
oclMat
&
qangle
,
const
cv
::
ocl
::
oclMat
&
gauss_w_lut
,
cv
::
ocl
::
oclMat
&
block_hists
);
const
cv
::
ocl
::
oclMat
&
gauss_w_lut
,
cv
::
ocl
::
oclMat
&
block_hists
);
...
@@ -328,10 +236,18 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
...
@@ -328,10 +236,18 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
Size
wins_per_img
=
numPartsWithin
(
img
.
size
(),
win_size
,
win_stride
);
Size
wins_per_img
=
numPartsWithin
(
img
.
size
(),
win_size
,
win_stride
);
labels
.
create
(
1
,
wins_per_img
.
area
(),
CV_8U
);
labels
.
create
(
1
,
wins_per_img
.
area
(),
CV_8U
);
vector
<
float
>
v_lut
=
vector
<
float
>
(
gaussian_interp_lut
,
gaussian_interp_lut
+
float
sigma
=
getWinSigma
();
sizeof
(
gaussian_interp_lut
)
/
sizeof
(
gaussian_interp_lut
[
0
]));
float
scale
=
1.
f
/
(
2.
f
*
sigma
*
sigma
);
Mat
m_lut
(
v_lut
);
Mat
gaussian_lut
(
1
,
512
,
CV_32FC1
);
gauss_w_lut
.
upload
(
m_lut
.
reshape
(
1
,
1
));
int
idx
=
0
;
for
(
int
i
=-
8
;
i
<
8
;
i
++
)
for
(
int
j
=-
8
;
j
<
8
;
j
++
)
gaussian_lut
.
at
<
float
>
(
idx
++
)
=
std
::
exp
(
-
(
j
*
j
+
i
*
i
)
*
scale
);
for
(
int
i
=-
8
;
i
<
8
;
i
++
)
for
(
int
j
=-
8
;
j
<
8
;
j
++
)
gaussian_lut
.
at
<
float
>
(
idx
++
)
=
(
8.
f
-
fabs
(
j
+
0.5
f
))
*
(
8.
f
-
fabs
(
i
+
0.5
f
))
/
64.
f
;
gauss_w_lut
.
upload
(
gaussian_lut
);
}
}
void
cv
::
ocl
::
HOGDescriptor
::
computeGradient
(
const
oclMat
&
img
,
oclMat
&
grad
,
oclMat
&
qangle
)
void
cv
::
ocl
::
HOGDescriptor
::
computeGradient
(
const
oclMat
&
img
,
oclMat
&
grad
,
oclMat
&
qangle
)
...
@@ -358,7 +274,7 @@ void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img)
...
@@ -358,7 +274,7 @@ void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img)
computeGradient
(
img
,
this
->
grad
,
this
->
qangle
);
computeGradient
(
img
,
this
->
grad
,
this
->
qangle
);
hog
::
compute_hists
(
nbins
,
block_stride
.
width
,
block_stride
.
height
,
effect_size
.
height
,
hog
::
compute_hists
(
nbins
,
block_stride
.
width
,
block_stride
.
height
,
effect_size
.
height
,
effect_size
.
width
,
(
float
)
getWinSigma
(),
grad
,
qangle
,
gauss_w_lut
,
block_hists
);
effect_size
.
width
,
grad
,
qangle
,
gauss_w_lut
,
block_hists
);
hog
::
normalize_hists
(
nbins
,
block_stride
.
width
,
block_stride
.
height
,
effect_size
.
height
,
hog
::
normalize_hists
(
nbins
,
block_stride
.
width
,
block_stride
.
height
,
effect_size
.
height
,
effect_size
.
width
,
block_hists
,
(
float
)
threshold_L2hys
);
effect_size
.
width
,
block_hists
,
(
float
)
threshold_L2hys
);
...
@@ -1708,7 +1624,7 @@ void cv::ocl::device::hog::set_up_constants(int nbins,
...
@@ -1708,7 +1624,7 @@ void cv::ocl::device::hog::set_up_constants(int nbins,
void
cv
::
ocl
::
device
::
hog
::
compute_hists
(
int
nbins
,
void
cv
::
ocl
::
device
::
hog
::
compute_hists
(
int
nbins
,
int
block_stride_x
,
int
block_stride_y
,
int
block_stride_x
,
int
block_stride_y
,
int
height
,
int
width
,
float
sigma
,
int
height
,
int
width
,
const
cv
::
ocl
::
oclMat
&
grad
,
const
cv
::
ocl
::
oclMat
&
grad
,
const
cv
::
ocl
::
oclMat
&
qangle
,
const
cv
::
ocl
::
oclMat
&
qangle
,
const
cv
::
ocl
::
oclMat
&
gauss_w_lut
,
const
cv
::
ocl
::
oclMat
&
gauss_w_lut
,
...
@@ -1716,8 +1632,7 @@ void cv::ocl::device::hog::compute_hists(int nbins,
...
@@ -1716,8 +1632,7 @@ void cv::ocl::device::hog::compute_hists(int nbins,
{
{
Context
*
clCxt
=
Context
::
getContext
();
Context
*
clCxt
=
Context
::
getContext
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
string
kernelName
=
(
sigma
==
4.0
f
)
?
"compute_hists_lut_kernel"
:
string
kernelName
=
"compute_hists_lut_kernel"
;
"compute_hists_kernel"
;
int
img_block_width
=
(
width
-
CELLS_PER_BLOCK_X
*
CELL_WIDTH
+
block_stride_x
)
int
img_block_width
=
(
width
-
CELLS_PER_BLOCK_X
*
CELL_WIDTH
+
block_stride_x
)
/
block_stride_x
;
/
block_stride_x
;
...
@@ -1728,9 +1643,6 @@ void cv::ocl::device::hog::compute_hists(int nbins,
...
@@ -1728,9 +1643,6 @@ void cv::ocl::device::hog::compute_hists(int nbins,
int
grad_quadstep
=
grad
.
step
>>
2
;
int
grad_quadstep
=
grad
.
step
>>
2
;
int
qangle_step
=
qangle
.
step
;
int
qangle_step
=
qangle
.
step
;
// Precompute gaussian spatial window parameter
float
scale
=
1.
f
/
(
2.
f
*
sigma
*
sigma
);
int
blocks_in_group
=
4
;
int
blocks_in_group
=
4
;
size_t
localThreads
[
3
]
=
{
blocks_in_group
*
24
,
2
,
1
};
size_t
localThreads
[
3
]
=
{
blocks_in_group
*
24
,
2
,
1
};
size_t
globalThreads
[
3
]
=
{
size_t
globalThreads
[
3
]
=
{
...
@@ -1751,15 +1663,23 @@ void cv::ocl::device::hog::compute_hists(int nbins,
...
@@ -1751,15 +1663,23 @@ void cv::ocl::device::hog::compute_hists(int nbins,
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
qangle_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
qangle_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
grad
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
grad
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
qangle
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
qangle
.
data
));
if
(
kernelName
.
compare
(
"compute_hists_lut_kernel"
)
==
0
)
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
gauss_w_lut
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
gauss_w_lut
.
data
));
else
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
scale
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
block_hists
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
block_hists
.
data
));
args
.
push_back
(
make_pair
(
smem
,
(
void
*
)
NULL
));
args
.
push_back
(
make_pair
(
smem
,
(
void
*
)
NULL
));
if
(
hog_device_cpu
)
{
openCLExecuteKernel
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
openCLExecuteKernel
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
);
localThreads
,
args
,
-
1
,
-
1
,
"-D CPU"
);
}
else
{
cl_kernel
kernel
=
openCLGetKernelFromSource
(
clCxt
,
&
objdetect_hog
,
kernelName
);
int
wave_size
=
queryDeviceInfo
<
WAVEFRONT_SIZE
,
int
>
(
kernel
);
char
opt
[
32
]
=
{
0
};
sprintf
(
opt
,
"-D WAVE_SIZE=%d"
,
wave_size
);
openCLExecuteKernel
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
localThreads
,
args
,
-
1
,
-
1
,
opt
);
}
}
}
void
cv
::
ocl
::
device
::
hog
::
normalize_hists
(
int
nbins
,
void
cv
::
ocl
::
device
::
hog
::
normalize_hists
(
int
nbins
,
...
...
modules/ocl/src/opencl/objdetect_hog.cl
View file @
bc78e87a
...
@@ -53,7 +53,7 @@
...
@@ -53,7 +53,7 @@
//----------------------------------------------------------------------------
//----------------------------------------------------------------------------
//
Histogram
computation
//
Histogram
computation
//
12
threads
for
a
cell,
12x4
threads
per
block
//
12
threads
for
a
cell,
12x4
threads
per
block
//
Use
pre-computed
gaussian
and
interp_weight
lookup
tables
if
sigma
is
4.0f
//
Use
pre-computed
gaussian
and
interp_weight
lookup
tables
__kernel
void
compute_hists_lut_kernel
(
__kernel
void
compute_hists_lut_kernel
(
const
int
cblock_stride_x,
const
int
cblock_stride_y,
const
int
cblock_stride_x,
const
int
cblock_stride_y,
const
int
cnbins,
const
int
cblock_hist_size,
const
int
img_block_width,
const
int
cnbins,
const
int
cblock_hist_size,
const
int
img_block_width,
...
@@ -146,99 +146,6 @@ __kernel void compute_hists_lut_kernel(
...
@@ -146,99 +146,6 @@ __kernel void compute_hists_lut_kernel(
}
}
}
}
//----------------------------------------------------------------------------
//
Histogram
computation
//
12
threads
for
a
cell,
12x4
threads
per
block
__kernel
void
compute_hists_kernel
(
const
int
cblock_stride_x,
const
int
cblock_stride_y,
const
int
cnbins,
const
int
cblock_hist_size,
const
int
img_block_width,
const
int
blocks_in_group,
const
int
blocks_total,
const
int
grad_quadstep,
const
int
qangle_step,
__global
const
float*
grad,
__global
const
uchar*
qangle,
const
float
scale,
__global
float*
block_hists,
__local
float*
smem
)
{
const
int
lx
=
get_local_id
(
0
)
;
const
int
lp
=
lx
/
24
; /* local group id */
const
int
gid
=
get_group_id
(
0
)
*
blocks_in_group
+
lp
;/* global group id */
const
int
gidY
=
gid
/
img_block_width
;
const
int
gidX
=
gid
-
gidY
*
img_block_width
;
const
int
lidX
=
lx
-
lp
*
24
;
const
int
lidY
=
get_local_id
(
1
)
;
const
int
cell_x
=
lidX
/
12
;
const
int
cell_y
=
lidY
;
const
int
cell_thread_x
=
lidX
-
cell_x
*
12
;
__local
float*
hists
=
smem
+
lp
*
cnbins
*
(
CELLS_PER_BLOCK_X
*
CELLS_PER_BLOCK_Y
*
12
+
CELLS_PER_BLOCK_X
*
CELLS_PER_BLOCK_Y
)
;
__local
float*
final_hist
=
hists
+
cnbins
*
(
CELLS_PER_BLOCK_X
*
CELLS_PER_BLOCK_Y
*
12
)
;
const
int
offset_x
=
gidX
*
cblock_stride_x
+
(
cell_x
<<
2
)
+
cell_thread_x
;
const
int
offset_y
=
gidY
*
cblock_stride_y
+
(
cell_y
<<
2
)
;
__global
const
float*
grad_ptr
=
(
gid
<
blocks_total
)
?
grad
+
offset_y
*
grad_quadstep
+
(
offset_x
<<
1
)
:
grad
;
__global
const
uchar*
qangle_ptr
=
(
gid
<
blocks_total
)
?
qangle
+
offset_y
*
qangle_step
+
(
offset_x
<<
1
)
:
qangle
;
__local
float*
hist
=
hists
+
12
*
(
cell_y
*
CELLS_PER_BLOCK_Y
+
cell_x
)
+
cell_thread_x
;
for
(
int
bin_id
=
0
; bin_id < cnbins; ++bin_id)
hist[bin_id
*
48]
=
0.f
;
const
int
dist_x
=
-4
+
cell_thread_x
-
4
*
cell_x
;
const
int
dist_center_x
=
dist_x
-
4
*
(
1
-
2
*
cell_x
)
;
const
int
dist_y_begin
=
-4
-
4
*
lidY
;
for
(
int
dist_y
=
dist_y_begin
; dist_y < dist_y_begin + 12; ++dist_y)
{
float2
vote
=
(
float2
)
(
grad_ptr[0],
grad_ptr[1]
)
;
uchar2
bin
=
(
uchar2
)
(
qangle_ptr[0],
qangle_ptr[1]
)
;
grad_ptr
+=
grad_quadstep
;
qangle_ptr
+=
qangle_step
;
int
dist_center_y
=
dist_y
-
4
*
(
1
-
2
*
cell_y
)
;
float
gaussian
=
exp
(
-
(
dist_center_y
*
dist_center_y
+
dist_center_x
*
dist_center_x
)
*
scale
)
;
float
interp_weight
=
(
8.f
-
fabs
(
dist_y
+
0.5f
))
*
(
8.f
-
fabs
(
dist_x
+
0.5f
))
/
64.f
;
hist[bin.x
*
48]
+=
gaussian
*
interp_weight
*
vote.x
;
hist[bin.y
*
48]
+=
gaussian
*
interp_weight
*
vote.y
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
volatile
__local
float*
hist_
=
hist
;
for
(
int
bin_id
=
0
; bin_id < cnbins; ++bin_id, hist_ += 48)
{
if
(
cell_thread_x
<
6
)
hist_[0]
+=
hist_[6]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
cell_thread_x
<
3
)
hist_[0]
+=
hist_[3]
;
#
ifdef
CPU
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
if
(
cell_thread_x
==
0
)
final_hist[
(
cell_x
*
2
+
cell_y
)
*
cnbins
+
bin_id]
=
hist_[0]
+
hist_[1]
+
hist_[2]
;
}
#
ifdef
CPU
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
int
tid
=
(
cell_y
*
CELLS_PER_BLOCK_Y
+
cell_x
)
*
12
+
cell_thread_x
;
if
((
tid
<
cblock_hist_size
)
&&
(
gid
<
blocks_total
))
{
__global
float*
block_hist
=
block_hists
+
(
gidY
*
img_block_width
+
gidX
)
*
cblock_hist_size
;
block_hist[tid]
=
final_hist[tid]
;
}
}
//-------------------------------------------------------------
//-------------------------------------------------------------
//
Normalization
of
histograms
via
L2Hys_norm
//
Normalization
of
histograms
via
L2Hys_norm
//
optimized
for
the
case
of
9
bins
//
optimized
for
the
case
of
9
bins
...
...
modules/ocl/src/opencl/pyrlk.cl
View file @
bc78e87a
...
@@ -17,6 +17,7 @@
...
@@ -17,6 +17,7 @@
//
@Authors
//
@Authors
//
Dachuan
Zhao,
dachuan@multicorewareinc.com
//
Dachuan
Zhao,
dachuan@multicorewareinc.com
//
Yao
Wang,
bitwangyaoyao@gmail.com
//
Yao
Wang,
bitwangyaoyao@gmail.com
//
Xiaopeng
Fu,
fuxiaopeng2222@163.com
//
//
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
Redistribution
and
use
in
source
and
binary
forms,
with
or
without
modification,
//
are
permitted
provided
that
the
following
conditions
are
met:
//
are
permitted
provided
that
the
following
conditions
are
met:
...
@@ -47,6 +48,7 @@
...
@@ -47,6 +48,7 @@
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
//#pragma
OPENCL
EXTENSION
cl_amd_printf
:
enable
#
define
BUFFER
64
#
define
BUFFER
64
#
define
BUFFER2
BUFFER>>1
#
ifndef
WAVE_SIZE
#
ifndef
WAVE_SIZE
#
define
WAVE_SIZE
1
#
define
WAVE_SIZE
1
#
endif
#
endif
...
@@ -58,53 +60,16 @@ void reduce3(float val1, float val2, float val3, __local float* smem1, __local
...
@@ -58,53 +60,16 @@ void reduce3(float val1, float val2, float val3, __local float* smem1, __local
smem3[tid]
=
val3
;
smem3[tid]
=
val3
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
32
)
for
(
int
i
=
BUFFER2
; i > 0; i >>= 1)
{
smem1[tid]
+=
smem1[tid
+
32]
;
smem2[tid]
+=
smem2[tid
+
32]
;
smem3[tid]
+=
smem3[tid
+
32]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
smem1[tid]
+=
smem1[tid
+
16]
;
smem2[tid]
+=
smem2[tid
+
16]
;
smem3[tid]
+=
smem3[tid
+
16]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
smem1[tid]
+=
smem1[tid
+
8]
;
smem2[tid]
+=
smem2[tid
+
8]
;
smem3[tid]
+=
smem3[tid
+
8]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
{
{
smem1[tid]
+=
smem1[tid
+
4]
;
if
(
tid
<
i
)
smem2[tid]
+=
smem2[tid
+
4]
;
smem3[tid]
+=
smem3[tid
+
4]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
{
{
smem1[tid]
+=
smem1[tid
+
2
]
;
smem1[tid]
+=
smem1[tid
+
i
]
;
smem2[tid]
+=
smem2[tid
+
2
]
;
smem2[tid]
+=
smem2[tid
+
i
]
;
smem3[tid]
+=
smem3[tid
+
2
]
;
smem3[tid]
+=
smem3[tid
+
i
]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
smem1[BUFFER]
=
smem1[tid]
+
smem1[tid
+
1]
;
smem2[BUFFER]
=
smem2[tid]
+
smem2[tid
+
1]
;
smem3[BUFFER]
=
smem3[tid]
+
smem3[tid
+
1]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
void
reduce2
(
float
val1,
float
val2,
volatile
__local
float*
smem1,
volatile
__local
float*
smem2,
int
tid
)
void
reduce2
(
float
val1,
float
val2,
volatile
__local
float*
smem1,
volatile
__local
float*
smem2,
int
tid
)
...
@@ -113,47 +78,15 @@ void reduce2(float val1, float val2, volatile __local float* smem1, volatile __l
...
@@ -113,47 +78,15 @@ void reduce2(float val1, float val2, volatile __local float* smem1, volatile __l
smem2[tid]
=
val2
;
smem2[tid]
=
val2
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
32
)
for
(
int
i
=
BUFFER2
; i > 0; i >>= 1)
{
smem1[tid]
+=
smem1[tid
+
32]
;
smem2[tid]
+=
smem2[tid
+
32]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
smem1[tid]
+=
smem1[tid
+
16]
;
smem2[tid]
+=
smem2[tid
+
16]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
smem1[tid]
+=
smem1[tid
+
8]
;
smem2[tid]
+=
smem2[tid
+
8]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
{
{
smem1[tid]
+=
smem1[tid
+
4]
;
if
(
tid
<
i
)
smem2[tid]
+=
smem2[tid
+
4]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
{
{
smem1[tid]
+=
smem1[tid
+
2
]
;
smem1[tid]
+=
smem1[tid
+
i
]
;
smem2[tid]
+=
smem2[tid
+
2
]
;
smem2[tid]
+=
smem2[tid
+
i
]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
smem1[BUFFER]
=
smem1[tid]
+
smem1[tid
+
1]
;
smem2[BUFFER]
=
smem2[tid]
+
smem2[tid
+
1]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
void
reduce1
(
float
val1,
volatile
__local
float*
smem1,
int
tid
)
void
reduce1
(
float
val1,
volatile
__local
float*
smem1,
int
tid
)
...
@@ -161,45 +94,18 @@ void reduce1(float val1, volatile __local float* smem1, int tid)
...
@@ -161,45 +94,18 @@ void reduce1(float val1, volatile __local float* smem1, int tid)
smem1[tid]
=
val1
;
smem1[tid]
=
val1
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
32
)
for
(
int
i
=
BUFFER2
; i > 0; i >>= 1)
{
smem1[tid]
+=
smem1[tid
+
32]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
smem1[tid]
+=
smem1[tid
+
16]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
{
smem1[tid]
+=
smem1[tid
+
8]
;
if
(
tid
<
i
)
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
4
)
{
{
smem1[tid]
+=
smem1[tid
+
4
]
;
smem1[tid]
+=
smem1[tid
+
i
]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
2
)
{
smem1[tid]
+=
smem1[tid
+
2]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
1
)
{
smem1[BUFFER]
=
smem1[tid]
+
smem1[tid
+
1]
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
#
else
#
else
void
reduce3
(
float
val1,
float
val2,
float
val3,
void
reduce3
(
float
val1,
float
val2,
float
val3,
__local
volatile
float*
smem1,
__local
volatile
float*
smem2,
__local
volatile
float*
smem3,
int
tid
)
__local
volatile
float*
smem1,
__local
volatile
float*
smem2,
__local
volatile
float*
smem3,
int
tid
)
{
{
smem1[tid]
=
val1
;
smem1[tid]
=
val1
;
smem2[tid]
=
val2
;
smem2[tid]
=
val2
;
...
@@ -212,15 +118,19 @@ __local volatile float* smem1, __local volatile float* smem2, __local volatile f
...
@@ -212,15 +118,19 @@ __local volatile float* smem1, __local volatile float* smem2, __local volatile f
smem2[tid]
+=
smem2[tid
+
32]
;
smem2[tid]
+=
smem2[tid
+
32]
;
smem3[tid]
+=
smem3[tid
+
32]
;
smem3[tid]
+=
smem3[tid
+
32]
;
#
if
WAVE_SIZE
<
32
#
if
WAVE_SIZE
<
32
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
tid
<
16
)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
#
endif
#
endif
smem1[tid]
+=
smem1[tid
+
16]
;
smem1[tid]
+=
smem1[tid
+
16]
;
smem2[tid]
+=
smem2[tid
+
16]
;
smem2[tid]
+=
smem2[tid
+
16]
;
smem3[tid]
+=
smem3[tid
+
16]
;
smem3[tid]
+=
smem3[tid
+
16]
;
#
if
WAVE_SIZE
<16
#
if
WAVE_SIZE
<16
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
tid
<
8
)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
#
endif
#
endif
smem1[tid]
+=
smem1[tid
+
8]
;
smem1[tid]
+=
smem1[tid
+
8]
;
smem2[tid]
+=
smem2[tid
+
8]
;
smem2[tid]
+=
smem2[tid
+
8]
;
...
@@ -238,6 +148,7 @@ __local volatile float* smem1, __local volatile float* smem2, __local volatile f
...
@@ -238,6 +148,7 @@ __local volatile float* smem1, __local volatile float* smem2, __local volatile f
smem2[tid]
+=
smem2[tid
+
1]
;
smem2[tid]
+=
smem2[tid
+
1]
;
smem3[tid]
+=
smem3[tid
+
1]
;
smem3[tid]
+=
smem3[tid
+
1]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
void
reduce2
(
float
val1,
float
val2,
__local
volatile
float*
smem1,
__local
volatile
float*
smem2,
int
tid
)
void
reduce2
(
float
val1,
float
val2,
__local
volatile
float*
smem1,
__local
volatile
float*
smem2,
int
tid
)
...
@@ -251,14 +162,18 @@ void reduce2(float val1, float val2, __local volatile float* smem1, __local vola
...
@@ -251,14 +162,18 @@ void reduce2(float val1, float val2, __local volatile float* smem1, __local vola
smem1[tid]
+=
smem1[tid
+
32]
;
smem1[tid]
+=
smem1[tid
+
32]
;
smem2[tid]
+=
smem2[tid
+
32]
;
smem2[tid]
+=
smem2[tid
+
32]
;
#
if
WAVE_SIZE
<
32
#
if
WAVE_SIZE
<
32
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
tid
<
16
)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
#
endif
#
endif
smem1[tid]
+=
smem1[tid
+
16]
;
smem1[tid]
+=
smem1[tid
+
16]
;
smem2[tid]
+=
smem2[tid
+
16]
;
smem2[tid]
+=
smem2[tid
+
16]
;
#
if
WAVE_SIZE
<16
#
if
WAVE_SIZE
<16
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
tid
<
8
)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
#
endif
#
endif
smem1[tid]
+=
smem1[tid
+
8]
;
smem1[tid]
+=
smem1[tid
+
8]
;
smem2[tid]
+=
smem2[tid
+
8]
;
smem2[tid]
+=
smem2[tid
+
8]
;
...
@@ -272,6 +187,7 @@ void reduce2(float val1, float val2, __local volatile float* smem1, __local vola
...
@@ -272,6 +187,7 @@ void reduce2(float val1, float val2, __local volatile float* smem1, __local vola
smem1[tid]
+=
smem1[tid
+
1]
;
smem1[tid]
+=
smem1[tid
+
1]
;
smem2[tid]
+=
smem2[tid
+
1]
;
smem2[tid]
+=
smem2[tid
+
1]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
void
reduce1
(
float
val1,
__local
volatile
float*
smem1,
int
tid
)
void
reduce1
(
float
val1,
__local
volatile
float*
smem1,
int
tid
)
...
@@ -283,19 +199,24 @@ void reduce1(float val1, __local volatile float* smem1, int tid)
...
@@ -283,19 +199,24 @@ void reduce1(float val1, __local volatile float* smem1, int tid)
{
{
smem1[tid]
+=
smem1[tid
+
32]
;
smem1[tid]
+=
smem1[tid
+
32]
;
#
if
WAVE_SIZE
<
32
#
if
WAVE_SIZE
<
32
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
tid
<
16
)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
16
)
{
#
endif
#
endif
smem1[tid]
+=
smem1[tid
+
16]
;
smem1[tid]
+=
smem1[tid
+
16]
;
#
if
WAVE_SIZE
<16
#
if
WAVE_SIZE
<16
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
if
(
tid
<
8
)
{
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
tid
<
8
)
{
#
endif
#
endif
smem1[tid]
+=
smem1[tid
+
8]
;
smem1[tid]
+=
smem1[tid
+
8]
;
smem1[tid]
+=
smem1[tid
+
4]
;
smem1[tid]
+=
smem1[tid
+
4]
;
smem1[tid]
+=
smem1[tid
+
2]
;
smem1[tid]
+=
smem1[tid
+
2]
;
smem1[tid]
+=
smem1[tid
+
1]
;
smem1[tid]
+=
smem1[tid
+
1]
;
}
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
}
}
#
endif
#
endif
...
@@ -388,15 +309,9 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
...
@@ -388,15 +309,9 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
__global
const
float2*
prevPts,
int
prevPtsStep,
__global
float2*
nextPts,
int
nextPtsStep,
__global
uchar*
status,
__global
float*
err,
__global
const
float2*
prevPts,
int
prevPtsStep,
__global
float2*
nextPts,
int
nextPtsStep,
__global
uchar*
status,
__global
float*
err,
const
int
level,
const
int
rows,
const
int
cols,
int
PATCH_X,
int
PATCH_Y,
int
cn,
int
c_winSize_x,
int
c_winSize_y,
int
c_iters,
char
calcErr
)
const
int
level,
const
int
rows,
const
int
cols,
int
PATCH_X,
int
PATCH_Y,
int
cn,
int
c_winSize_x,
int
c_winSize_y,
int
c_iters,
char
calcErr
)
{
{
#
ifdef
CPU
__local
float
smem1[BUFFER+1]
;
__local
float
smem2[BUFFER+1]
;
__local
float
smem3[BUFFER+1]
;
#
else
__local
float
smem1[BUFFER]
;
__local
float
smem1[BUFFER]
;
__local
float
smem2[BUFFER]
;
__local
float
smem2[BUFFER]
;
__local
float
smem3[BUFFER]
;
__local
float
smem3[BUFFER]
;
#
endif
unsigned
int
xid=get_local_id
(
0
)
;
unsigned
int
xid=get_local_id
(
0
)
;
unsigned
int
yid=get_local_id
(
1
)
;
unsigned
int
yid=get_local_id
(
1
)
;
...
@@ -492,17 +407,11 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
...
@@ -492,17 +407,11 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
}
}
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
A11 = smem1[BUFFER];
A12 = smem2[BUFFER];
A22 = smem3[BUFFER];
#else
A11 = smem1[0];
A11 = smem1[0];
A12 = smem2[0];
A12 = smem2[0];
A22 = smem3[0];
A22 = smem3[0];
#endif
barrier(CLK_LOCAL_MEM_FENCE);
float D = A11 * A22 - A12 * A12;
float D = A11 * A22 - A12 * A12;
...
@@ -592,15 +501,10 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
...
@@ -592,15 +501,10 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
}
}
reduce2(b1, b2, smem1, smem2, tid);
reduce2(b1, b2, smem1, smem2, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
b1 = smem1[BUFFER];
b2 = smem2[BUFFER];
#else
b1 = smem1[0];
b1 = smem1[0];
b2 = smem2[0];
b2 = smem2[0];
#endif
barrier(CLK_LOCAL_MEM_FENCE);
float2 delta;
float2 delta;
delta.x = A12 * b2 - A22 * b1;
delta.x = A12 * b2 - A22 * b1;
...
@@ -675,11 +579,7 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
...
@@ -675,11 +579,7 @@ __kernel void lkSparse_C1_D5(image2d_t I, image2d_t J,
nextPts[gid] = prevPt;
nextPts[gid] = prevPt;
if (calcErr)
if (calcErr)
#ifdef CPU
err[gid] = smem1[BUFFER] / (float)(c_winSize_x * c_winSize_y);
#else
err[gid] = smem1[0] / (float)(c_winSize_x * c_winSize_y);
err[gid] = smem1[0] / (float)(c_winSize_x * c_winSize_y);
#endif
}
}
}
}
...
@@ -688,15 +588,9 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
...
@@ -688,15 +588,9 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
__global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
__global const float2* prevPts, int prevPtsStep, __global float2* nextPts, int nextPtsStep, __global uchar* status, __global float* err,
const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
const int level, const int rows, const int cols, int PATCH_X, int PATCH_Y, int cn, int c_winSize_x, int c_winSize_y, int c_iters, char calcErr)
{
{
#ifdef CPU
__local float smem1[BUFFER+1];
__local float smem2[BUFFER+1];
__local float smem3[BUFFER+1];
#else
__local float smem1[BUFFER];
__local float smem1[BUFFER];
__local float smem2[BUFFER];
__local float smem2[BUFFER];
__local float smem3[BUFFER];
__local float smem3[BUFFER];
#endif
unsigned int xid=get_local_id(0);
unsigned int xid=get_local_id(0);
unsigned int yid=get_local_id(1);
unsigned int yid=get_local_id(1);
...
@@ -795,17 +689,11 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
...
@@ -795,17 +689,11 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
}
}
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
reduce3(A11, A12, A22, smem1, smem2, smem3, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
A11 = smem1[BUFFER];
A12 = smem2[BUFFER];
A22 = smem3[BUFFER];
#else
A11 = smem1[0];
A11 = smem1[0];
A12 = smem2[0];
A12 = smem2[0];
A22 = smem3[0];
A22 = smem3[0];
#endif
barrier(CLK_LOCAL_MEM_FENCE);
float D = A11 * A22 - A12 * A12;
float D = A11 * A22 - A12 * A12;
...
@@ -895,15 +783,10 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
...
@@ -895,15 +783,10 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
}
}
reduce2(b1, b2, smem1, smem2, tid);
reduce2(b1, b2, smem1, smem2, tid);
barrier(CLK_LOCAL_MEM_FENCE);
#ifdef CPU
b1 = smem1[BUFFER];
b2 = smem2[BUFFER];
#else
b1 = smem1[0];
b1 = smem1[0];
b2 = smem2[0];
b2 = smem2[0];
#endif
barrier(CLK_LOCAL_MEM_FENCE);
float2 delta;
float2 delta;
delta.x = A12 * b2 - A22 * b1;
delta.x = A12 * b2 - A22 * b1;
...
@@ -977,11 +860,7 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
...
@@ -977,11 +860,7 @@ __kernel void lkSparse_C4_D5(image2d_t I, image2d_t J,
nextPts[gid] = nextPt;
nextPts[gid] = nextPt;
if (calcErr)
if (calcErr)
#ifdef CPU
err[gid] = smem1[BUFFER] / (float)(3 * c_winSize_x * c_winSize_y);
#else
err[gid] = smem1[0] / (float)(3 * c_winSize_x * c_winSize_y);
err[gid] = smem1[0] / (float)(3 * c_winSize_x * c_winSize_y);
#endif
}
}
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment