Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
270b2c79
Commit
270b2c79
authored
Jul 26, 2013
by
yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
generating the lut table instead of hard coding one
parent
f1060ac5
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
18 additions
and
203 deletions
+18
-203
hog.cpp
modules/ocl/src/hog.cpp
+17
-109
objdetect_hog.cl
modules/ocl/src/opencl/objdetect_hog.cl
+1
-94
No files found.
modules/ocl/src/hog.cpp
View file @
270b2c79
...
...
@@ -56,98 +56,6 @@ using namespace std;
static
oclMat
gauss_w_lut
;
static
bool
hog_device_cpu
;
/* pre-compute gaussian and interp_weight lookup tables if sigma is 4.0f */
static
const
float
gaussian_interp_lut
[]
=
{
/* gaussian lut */
0.01831564
f
,
0.02926831
f
,
0.04393693
f
,
0.06196101
f
,
0.08208500
f
,
0.10215643
f
,
0.11943297
f
,
0.13117145
f
,
0.13533528
f
,
0.13117145
f
,
0.11943297
f
,
0.10215643
f
,
0.08208500
f
,
0.06196101
f
,
0.04393693
f
,
0.02926831
f
,
0.02926831
f
,
0.04677062
f
,
0.07021102
f
,
0.09901341
f
,
0.13117145
f
,
0.16324551
f
,
0.19085334
f
,
0.20961139
f
,
0.21626517
f
,
0.20961139
f
,
0.19085334
f
,
0.16324551
f
,
0.13117145
f
,
0.09901341
f
,
0.07021102
f
,
0.04677062
f
,
0.04393693
f
,
0.07021102
f
,
0.10539922
f
,
0.14863673
f
,
0.19691168
f
,
0.24506053
f
,
0.28650481
f
,
0.31466395
f
,
0.32465246
f
,
0.31466395
f
,
0.28650481
f
,
0.24506053
f
,
0.19691168
f
,
0.14863673
f
,
0.10539922
f
,
0.07021102
f
,
0.06196101
f
,
0.09901341
f
,
0.14863673
f
,
0.20961139
f
,
0.27768996
f
,
0.34559074
f
,
0.40403652
f
,
0.44374731
f
,
0.45783335
f
,
0.44374731
f
,
0.40403652
f
,
0.34559074
f
,
0.27768996
f
,
0.20961139
f
,
0.14863673
f
,
0.09901341
f
,
0.08208500
f
,
0.13117145
f
,
0.19691168
f
,
0.27768996
f
,
0.36787945
f
,
0.45783335
f
,
0.53526145
f
,
0.58786964
f
,
0.60653067
f
,
0.58786964
f
,
0.53526145
f
,
0.45783335
f
,
0.36787945
f
,
0.27768996
f
,
0.19691168
f
,
0.13117145
f
,
0.10215643
f
,
0.16324551
f
,
0.24506053
f
,
0.34559074
f
,
0.45783335
f
,
0.56978285
f
,
0.66614360
f
,
0.73161560
f
,
0.75483960
f
,
0.73161560
f
,
0.66614360
f
,
0.56978285
f
,
0.45783335
f
,
0.34559074
f
,
0.24506053
f
,
0.16324551
f
,
0.11943297
f
,
0.19085334
f
,
0.28650481
f
,
0.40403652
f
,
0.53526145
f
,
0.66614360
f
,
0.77880079
f
,
0.85534531
f
,
0.88249689
f
,
0.85534531
f
,
0.77880079
f
,
0.66614360
f
,
0.53526145
f
,
0.40403652
f
,
0.28650481
f
,
0.19085334
f
,
0.13117145
f
,
0.20961139
f
,
0.31466395
f
,
0.44374731
f
,
0.58786964
f
,
0.73161560
f
,
0.85534531
f
,
0.93941307
f
,
0.96923321
f
,
0.93941307
f
,
0.85534531
f
,
0.73161560
f
,
0.58786964
f
,
0.44374731
f
,
0.31466395
f
,
0.20961139
f
,
0.13533528
f
,
0.21626517
f
,
0.32465246
f
,
0.45783335
f
,
0.60653067
f
,
0.75483960
f
,
0.88249689
f
,
0.96923321
f
,
1.00000000
f
,
0.96923321
f
,
0.88249689
f
,
0.75483960
f
,
0.60653067
f
,
0.45783335
f
,
0.32465246
f
,
0.21626517
f
,
0.13117145
f
,
0.20961139
f
,
0.31466395
f
,
0.44374731
f
,
0.58786964
f
,
0.73161560
f
,
0.85534531
f
,
0.93941307
f
,
0.96923321
f
,
0.93941307
f
,
0.85534531
f
,
0.73161560
f
,
0.58786964
f
,
0.44374731
f
,
0.31466395
f
,
0.20961139
f
,
0.11943297
f
,
0.19085334
f
,
0.28650481
f
,
0.40403652
f
,
0.53526145
f
,
0.66614360
f
,
0.77880079
f
,
0.85534531
f
,
0.88249689
f
,
0.85534531
f
,
0.77880079
f
,
0.66614360
f
,
0.53526145
f
,
0.40403652
f
,
0.28650481
f
,
0.19085334
f
,
0.10215643
f
,
0.16324551
f
,
0.24506053
f
,
0.34559074
f
,
0.45783335
f
,
0.56978285
f
,
0.66614360
f
,
0.73161560
f
,
0.75483960
f
,
0.73161560
f
,
0.66614360
f
,
0.56978285
f
,
0.45783335
f
,
0.34559074
f
,
0.24506053
f
,
0.16324551
f
,
0.08208500
f
,
0.13117145
f
,
0.19691168
f
,
0.27768996
f
,
0.36787945
f
,
0.45783335
f
,
0.53526145
f
,
0.58786964
f
,
0.60653067
f
,
0.58786964
f
,
0.53526145
f
,
0.45783335
f
,
0.36787945
f
,
0.27768996
f
,
0.19691168
f
,
0.13117145
f
,
0.06196101
f
,
0.09901341
f
,
0.14863673
f
,
0.20961139
f
,
0.27768996
f
,
0.34559074
f
,
0.40403652
f
,
0.44374731
f
,
0.45783335
f
,
0.44374731
f
,
0.40403652
f
,
0.34559074
f
,
0.27768996
f
,
0.20961139
f
,
0.14863673
f
,
0.09901341
f
,
0.04393693
f
,
0.07021102
f
,
0.10539922
f
,
0.14863673
f
,
0.19691168
f
,
0.24506053
f
,
0.28650481
f
,
0.31466395
f
,
0.32465246
f
,
0.31466395
f
,
0.28650481
f
,
0.24506053
f
,
0.19691168
f
,
0.14863673
f
,
0.10539922
f
,
0.07021102
f
,
0.02926831
f
,
0.04677062
f
,
0.07021102
f
,
0.09901341
f
,
0.13117145
f
,
0.16324551
f
,
0.19085334
f
,
0.20961139
f
,
0.21626517
f
,
0.20961139
f
,
0.19085334
f
,
0.16324551
f
,
0.13117145
f
,
0.09901341
f
,
0.07021102
f
,
0.04677062
f
,
/* interp_weight lut */
0.00390625
f
,
0.01171875
f
,
0.01953125
f
,
0.02734375
f
,
0.03515625
f
,
0.04296875
f
,
0.05078125
f
,
0.05859375
f
,
0.05859375
f
,
0.05078125
f
,
0.04296875
f
,
0.03515625
f
,
0.02734375
f
,
0.01953125
f
,
0.01171875
f
,
0.00390625
f
,
0.01171875
f
,
0.03515625
f
,
0.05859375
f
,
0.08203125
f
,
0.10546875
f
,
0.12890625
f
,
0.15234375
f
,
0.17578125
f
,
0.17578125
f
,
0.15234375
f
,
0.12890625
f
,
0.10546875
f
,
0.08203125
f
,
0.05859375
f
,
0.03515625
f
,
0.01171875
f
,
0.01953125
f
,
0.05859375
f
,
0.09765625
f
,
0.13671875
f
,
0.17578125
f
,
0.21484375
f
,
0.25390625
f
,
0.29296875
f
,
0.29296875
f
,
0.25390625
f
,
0.21484375
f
,
0.17578125
f
,
0.13671875
f
,
0.09765625
f
,
0.05859375
f
,
0.01953125
f
,
0.02734375
f
,
0.08203125
f
,
0.13671875
f
,
0.19140625
f
,
0.24609375
f
,
0.30078125
f
,
0.35546875
f
,
0.41015625
f
,
0.41015625
f
,
0.35546875
f
,
0.30078125
f
,
0.24609375
f
,
0.19140625
f
,
0.13671875
f
,
0.08203125
f
,
0.02734375
f
,
0.03515625
f
,
0.10546875
f
,
0.17578125
f
,
0.24609375
f
,
0.31640625
f
,
0.38671875
f
,
0.45703125
f
,
0.52734375
f
,
0.52734375
f
,
0.45703125
f
,
0.38671875
f
,
0.31640625
f
,
0.24609375
f
,
0.17578125
f
,
0.10546875
f
,
0.03515625
f
,
0.04296875
f
,
0.12890625
f
,
0.21484375
f
,
0.30078125
f
,
0.38671875
f
,
0.47265625
f
,
0.55859375
f
,
0.64453125
f
,
0.64453125
f
,
0.55859375
f
,
0.47265625
f
,
0.38671875
f
,
0.30078125
f
,
0.21484375
f
,
0.12890625
f
,
0.04296875
f
,
0.05078125
f
,
0.15234375
f
,
0.25390625
f
,
0.35546875
f
,
0.45703125
f
,
0.55859375
f
,
0.66015625
f
,
0.76171875
f
,
0.76171875
f
,
0.66015625
f
,
0.55859375
f
,
0.45703125
f
,
0.35546875
f
,
0.25390625
f
,
0.15234375
f
,
0.05078125
f
,
0.05859375
f
,
0.17578125
f
,
0.29296875
f
,
0.41015625
f
,
0.52734375
f
,
0.64453125
f
,
0.76171875
f
,
0.87890625
f
,
0.87890625
f
,
0.76171875
f
,
0.64453125
f
,
0.52734375
f
,
0.41015625
f
,
0.29296875
f
,
0.17578125
f
,
0.05859375
f
,
0.05859375
f
,
0.17578125
f
,
0.29296875
f
,
0.41015625
f
,
0.52734375
f
,
0.64453125
f
,
0.76171875
f
,
0.87890625
f
,
0.87890625
f
,
0.76171875
f
,
0.64453125
f
,
0.52734375
f
,
0.41015625
f
,
0.29296875
f
,
0.17578125
f
,
0.05859375
f
,
0.05078125
f
,
0.15234375
f
,
0.25390625
f
,
0.35546875
f
,
0.45703125
f
,
0.55859375
f
,
0.66015625
f
,
0.76171875
f
,
0.76171875
f
,
0.66015625
f
,
0.55859375
f
,
0.45703125
f
,
0.35546875
f
,
0.25390625
f
,
0.15234375
f
,
0.05078125
f
,
0.04296875
f
,
0.12890625
f
,
0.21484375
f
,
0.30078125
f
,
0.38671875
f
,
0.47265625
f
,
0.55859375
f
,
0.64453125
f
,
0.64453125
f
,
0.55859375
f
,
0.47265625
f
,
0.38671875
f
,
0.30078125
f
,
0.21484375
f
,
0.12890625
f
,
0.04296875
f
,
0.03515625
f
,
0.10546875
f
,
0.17578125
f
,
0.24609375
f
,
0.31640625
f
,
0.38671875
f
,
0.45703125
f
,
0.52734375
f
,
0.52734375
f
,
0.45703125
f
,
0.38671875
f
,
0.31640625
f
,
0.24609375
f
,
0.17578125
f
,
0.10546875
f
,
0.03515625
f
,
0.02734375
f
,
0.08203125
f
,
0.13671875
f
,
0.19140625
f
,
0.24609375
f
,
0.30078125
f
,
0.35546875
f
,
0.41015625
f
,
0.41015625
f
,
0.35546875
f
,
0.30078125
f
,
0.24609375
f
,
0.19140625
f
,
0.13671875
f
,
0.08203125
f
,
0.02734375
f
,
0.01953125
f
,
0.05859375
f
,
0.09765625
f
,
0.13671875
f
,
0.17578125
f
,
0.21484375
f
,
0.25390625
f
,
0.29296875
f
,
0.29296875
f
,
0.25390625
f
,
0.21484375
f
,
0.17578125
f
,
0.13671875
f
,
0.09765625
f
,
0.05859375
f
,
0.01953125
f
,
0.01171875
f
,
0.03515625
f
,
0.05859375
f
,
0.08203125
f
,
0.10546875
f
,
0.12890625
f
,
0.15234375
f
,
0.17578125
f
,
0.17578125
f
,
0.15234375
f
,
0.12890625
f
,
0.10546875
f
,
0.08203125
f
,
0.05859375
f
,
0.03515625
f
,
0.01171875
f
,
0.00390625
f
,
0.01171875
f
,
0.01953125
f
,
0.02734375
f
,
0.03515625
f
,
0.04296875
f
,
0.05078125
f
,
0.05859375
f
,
0.05859375
f
,
0.05078125
f
,
0.04296875
f
,
0.03515625
f
,
0.02734375
f
,
0.01953125
f
,
0.01171875
f
,
0.00390625
f
};
namespace
cv
{
...
...
@@ -180,7 +88,7 @@ namespace cv
int
nblocks_win_x
,
int
nblocks_win_y
);
void
compute_hists
(
int
nbins
,
int
block_stride_x
,
int
blovck_stride_y
,
int
height
,
int
width
,
float
sigma
,
const
cv
::
ocl
::
oclMat
&
grad
,
int
height
,
int
width
,
const
cv
::
ocl
::
oclMat
&
grad
,
const
cv
::
ocl
::
oclMat
&
qangle
,
const
cv
::
ocl
::
oclMat
&
gauss_w_lut
,
cv
::
ocl
::
oclMat
&
block_hists
);
...
...
@@ -328,10 +236,18 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
Size
wins_per_img
=
numPartsWithin
(
img
.
size
(),
win_size
,
win_stride
);
labels
.
create
(
1
,
wins_per_img
.
area
(),
CV_8U
);
vector
<
float
>
v_lut
=
vector
<
float
>
(
gaussian_interp_lut
,
gaussian_interp_lut
+
sizeof
(
gaussian_interp_lut
)
/
sizeof
(
gaussian_interp_lut
[
0
]));
Mat
m_lut
(
v_lut
);
gauss_w_lut
.
upload
(
m_lut
.
reshape
(
1
,
1
));
float
sigma
=
getWinSigma
();
float
scale
=
1.
f
/
(
2.
f
*
sigma
*
sigma
);
Mat
gaussian_lut
(
1
,
512
,
CV_32FC1
);
int
idx
=
0
;
for
(
int
i
=-
8
;
i
<
8
;
i
++
)
for
(
int
j
=-
8
;
j
<
8
;
j
++
)
gaussian_lut
.
at
<
float
>
(
idx
++
)
=
std
::
exp
(
-
(
j
*
j
+
i
*
i
)
*
scale
);
for
(
int
i
=-
8
;
i
<
8
;
i
++
)
for
(
int
j
=-
8
;
j
<
8
;
j
++
)
gaussian_lut
.
at
<
float
>
(
idx
++
)
=
(
8.
f
-
fabs
(
j
+
0.5
f
))
*
(
8.
f
-
fabs
(
i
+
0.5
f
))
/
64.
f
;
gauss_w_lut
.
upload
(
gaussian_lut
);
}
void
cv
::
ocl
::
HOGDescriptor
::
computeGradient
(
const
oclMat
&
img
,
oclMat
&
grad
,
oclMat
&
qangle
)
...
...
@@ -358,7 +274,7 @@ void cv::ocl::HOGDescriptor::computeBlockHistograms(const oclMat &img)
computeGradient
(
img
,
this
->
grad
,
this
->
qangle
);
hog
::
compute_hists
(
nbins
,
block_stride
.
width
,
block_stride
.
height
,
effect_size
.
height
,
effect_size
.
width
,
(
float
)
getWinSigma
(),
grad
,
qangle
,
gauss_w_lut
,
block_hists
);
effect_size
.
width
,
grad
,
qangle
,
gauss_w_lut
,
block_hists
);
hog
::
normalize_hists
(
nbins
,
block_stride
.
width
,
block_stride
.
height
,
effect_size
.
height
,
effect_size
.
width
,
block_hists
,
(
float
)
threshold_L2hys
);
...
...
@@ -1708,7 +1624,7 @@ void cv::ocl::device::hog::set_up_constants(int nbins,
void
cv
::
ocl
::
device
::
hog
::
compute_hists
(
int
nbins
,
int
block_stride_x
,
int
block_stride_y
,
int
height
,
int
width
,
float
sigma
,
int
height
,
int
width
,
const
cv
::
ocl
::
oclMat
&
grad
,
const
cv
::
ocl
::
oclMat
&
qangle
,
const
cv
::
ocl
::
oclMat
&
gauss_w_lut
,
...
...
@@ -1716,8 +1632,7 @@ void cv::ocl::device::hog::compute_hists(int nbins,
{
Context
*
clCxt
=
Context
::
getContext
();
vector
<
pair
<
size_t
,
const
void
*>
>
args
;
string
kernelName
=
(
sigma
==
4.0
f
)
?
"compute_hists_lut_kernel"
:
"compute_hists_kernel"
;
string
kernelName
=
"compute_hists_lut_kernel"
;
int
img_block_width
=
(
width
-
CELLS_PER_BLOCK_X
*
CELL_WIDTH
+
block_stride_x
)
/
block_stride_x
;
...
...
@@ -1728,9 +1643,6 @@ void cv::ocl::device::hog::compute_hists(int nbins,
int
grad_quadstep
=
grad
.
step
>>
2
;
int
qangle_step
=
qangle
.
step
;
// Precompute gaussian spatial window parameter
float
scale
=
1.
f
/
(
2.
f
*
sigma
*
sigma
);
int
blocks_in_group
=
4
;
size_t
localThreads
[
3
]
=
{
blocks_in_group
*
24
,
2
,
1
};
size_t
globalThreads
[
3
]
=
{
...
...
@@ -1751,14 +1663,10 @@ void cv::ocl::device::hog::compute_hists(int nbins,
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
qangle_step
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
grad
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
qangle
.
data
));
if
(
kernelName
.
compare
(
"compute_hists_lut_kernel"
)
==
0
)
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
gauss_w_lut
.
data
));
else
args
.
push_back
(
make_pair
(
sizeof
(
cl_float
),
(
void
*
)
&
scale
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
gauss_w_lut
.
data
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_mem
),
(
void
*
)
&
block_hists
.
data
));
args
.
push_back
(
make_pair
(
smem
,
(
void
*
)
NULL
));
if
(
hog_device_cpu
)
{
openCLExecuteKernel
(
clCxt
,
&
objdetect_hog
,
kernelName
,
globalThreads
,
...
...
modules/ocl/src/opencl/objdetect_hog.cl
View file @
270b2c79
...
...
@@ -53,7 +53,7 @@
//----------------------------------------------------------------------------
//
Histogram
computation
//
12
threads
for
a
cell,
12x4
threads
per
block
//
Use
pre-computed
gaussian
and
interp_weight
lookup
tables
if
sigma
is
4.0f
//
Use
pre-computed
gaussian
and
interp_weight
lookup
tables
__kernel
void
compute_hists_lut_kernel
(
const
int
cblock_stride_x,
const
int
cblock_stride_y,
const
int
cnbins,
const
int
cblock_hist_size,
const
int
img_block_width,
...
...
@@ -146,99 +146,6 @@ __kernel void compute_hists_lut_kernel(
}
}
//----------------------------------------------------------------------------
//
Histogram
computation
//
12
threads
for
a
cell,
12x4
threads
per
block
__kernel
void
compute_hists_kernel
(
const
int
cblock_stride_x,
const
int
cblock_stride_y,
const
int
cnbins,
const
int
cblock_hist_size,
const
int
img_block_width,
const
int
blocks_in_group,
const
int
blocks_total,
const
int
grad_quadstep,
const
int
qangle_step,
__global
const
float*
grad,
__global
const
uchar*
qangle,
const
float
scale,
__global
float*
block_hists,
__local
float*
smem
)
{
const
int
lx
=
get_local_id
(
0
)
;
const
int
lp
=
lx
/
24
; /* local group id */
const
int
gid
=
get_group_id
(
0
)
*
blocks_in_group
+
lp
;/* global group id */
const
int
gidY
=
gid
/
img_block_width
;
const
int
gidX
=
gid
-
gidY
*
img_block_width
;
const
int
lidX
=
lx
-
lp
*
24
;
const
int
lidY
=
get_local_id
(
1
)
;
const
int
cell_x
=
lidX
/
12
;
const
int
cell_y
=
lidY
;
const
int
cell_thread_x
=
lidX
-
cell_x
*
12
;
__local
float*
hists
=
smem
+
lp
*
cnbins
*
(
CELLS_PER_BLOCK_X
*
CELLS_PER_BLOCK_Y
*
12
+
CELLS_PER_BLOCK_X
*
CELLS_PER_BLOCK_Y
)
;
__local
float*
final_hist
=
hists
+
cnbins
*
(
CELLS_PER_BLOCK_X
*
CELLS_PER_BLOCK_Y
*
12
)
;
const
int
offset_x
=
gidX
*
cblock_stride_x
+
(
cell_x
<<
2
)
+
cell_thread_x
;
const
int
offset_y
=
gidY
*
cblock_stride_y
+
(
cell_y
<<
2
)
;
__global
const
float*
grad_ptr
=
(
gid
<
blocks_total
)
?
grad
+
offset_y
*
grad_quadstep
+
(
offset_x
<<
1
)
:
grad
;
__global
const
uchar*
qangle_ptr
=
(
gid
<
blocks_total
)
?
qangle
+
offset_y
*
qangle_step
+
(
offset_x
<<
1
)
:
qangle
;
__local
float*
hist
=
hists
+
12
*
(
cell_y
*
CELLS_PER_BLOCK_Y
+
cell_x
)
+
cell_thread_x
;
for
(
int
bin_id
=
0
; bin_id < cnbins; ++bin_id)
hist[bin_id
*
48]
=
0.f
;
const
int
dist_x
=
-4
+
cell_thread_x
-
4
*
cell_x
;
const
int
dist_center_x
=
dist_x
-
4
*
(
1
-
2
*
cell_x
)
;
const
int
dist_y_begin
=
-4
-
4
*
lidY
;
for
(
int
dist_y
=
dist_y_begin
; dist_y < dist_y_begin + 12; ++dist_y)
{
float2
vote
=
(
float2
)
(
grad_ptr[0],
grad_ptr[1]
)
;
uchar2
bin
=
(
uchar2
)
(
qangle_ptr[0],
qangle_ptr[1]
)
;
grad_ptr
+=
grad_quadstep
;
qangle_ptr
+=
qangle_step
;
int
dist_center_y
=
dist_y
-
4
*
(
1
-
2
*
cell_y
)
;
float
gaussian
=
exp
(
-
(
dist_center_y
*
dist_center_y
+
dist_center_x
*
dist_center_x
)
*
scale
)
;
float
interp_weight
=
(
8.f
-
fabs
(
dist_y
+
0.5f
))
*
(
8.f
-
fabs
(
dist_x
+
0.5f
))
/
64.f
;
hist[bin.x
*
48]
+=
gaussian
*
interp_weight
*
vote.x
;
hist[bin.y
*
48]
+=
gaussian
*
interp_weight
*
vote.y
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
volatile
__local
float*
hist_
=
hist
;
for
(
int
bin_id
=
0
; bin_id < cnbins; ++bin_id, hist_ += 48)
{
if
(
cell_thread_x
<
6
)
hist_[0]
+=
hist_[6]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
cell_thread_x
<
3
)
hist_[0]
+=
hist_[3]
;
#
ifdef
CPU
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
if
(
cell_thread_x
==
0
)
final_hist[
(
cell_x
*
2
+
cell_y
)
*
cnbins
+
bin_id]
=
hist_[0]
+
hist_[1]
+
hist_[2]
;
}
#
ifdef
CPU
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
int
tid
=
(
cell_y
*
CELLS_PER_BLOCK_Y
+
cell_x
)
*
12
+
cell_thread_x
;
if
((
tid
<
cblock_hist_size
)
&&
(
gid
<
blocks_total
))
{
__global
float*
block_hist
=
block_hists
+
(
gidY
*
img_block_width
+
gidX
)
*
cblock_hist_size
;
block_hist[tid]
=
final_hist[tid]
;
}
}
//-------------------------------------------------------------
//
Normalization
of
histograms
via
L2Hys_norm
//
optimized
for
the
case
of
9
bins
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment