Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
270b2c79
Commit
270b2c79
authored
Jul 26, 2013
by
yao
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
generating the lut table instead of hard coding one
parent
f1060ac5
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
1 addition
and
94 deletions
+1
-94
hog.cpp
modules/ocl/src/hog.cpp
+0
-0
objdetect_hog.cl
modules/ocl/src/opencl/objdetect_hog.cl
+1
-94
No files found.
modules/ocl/src/hog.cpp
View file @
270b2c79
This diff is collapsed.
Click to expand it.
modules/ocl/src/opencl/objdetect_hog.cl
View file @
270b2c79
...
...
@@ -53,7 +53,7 @@
//----------------------------------------------------------------------------
//
Histogram
computation
//
12
threads
for
a
cell,
12x4
threads
per
block
//
Use
pre-computed
gaussian
and
interp_weight
lookup
tables
if
sigma
is
4.0f
//
Use
pre-computed
gaussian
and
interp_weight
lookup
tables
__kernel
void
compute_hists_lut_kernel
(
const
int
cblock_stride_x,
const
int
cblock_stride_y,
const
int
cnbins,
const
int
cblock_hist_size,
const
int
img_block_width,
...
...
@@ -146,99 +146,6 @@ __kernel void compute_hists_lut_kernel(
}
}
//----------------------------------------------------------------------------
//
Histogram
computation
//
12
threads
for
a
cell,
12x4
threads
per
block
__kernel
void
compute_hists_kernel
(
const
int
cblock_stride_x,
const
int
cblock_stride_y,
const
int
cnbins,
const
int
cblock_hist_size,
const
int
img_block_width,
const
int
blocks_in_group,
const
int
blocks_total,
const
int
grad_quadstep,
const
int
qangle_step,
__global
const
float*
grad,
__global
const
uchar*
qangle,
const
float
scale,
__global
float*
block_hists,
__local
float*
smem
)
{
const
int
lx
=
get_local_id
(
0
)
;
const
int
lp
=
lx
/
24
; /* local group id */
const
int
gid
=
get_group_id
(
0
)
*
blocks_in_group
+
lp
;/* global group id */
const
int
gidY
=
gid
/
img_block_width
;
const
int
gidX
=
gid
-
gidY
*
img_block_width
;
const
int
lidX
=
lx
-
lp
*
24
;
const
int
lidY
=
get_local_id
(
1
)
;
const
int
cell_x
=
lidX
/
12
;
const
int
cell_y
=
lidY
;
const
int
cell_thread_x
=
lidX
-
cell_x
*
12
;
__local
float*
hists
=
smem
+
lp
*
cnbins
*
(
CELLS_PER_BLOCK_X
*
CELLS_PER_BLOCK_Y
*
12
+
CELLS_PER_BLOCK_X
*
CELLS_PER_BLOCK_Y
)
;
__local
float*
final_hist
=
hists
+
cnbins
*
(
CELLS_PER_BLOCK_X
*
CELLS_PER_BLOCK_Y
*
12
)
;
const
int
offset_x
=
gidX
*
cblock_stride_x
+
(
cell_x
<<
2
)
+
cell_thread_x
;
const
int
offset_y
=
gidY
*
cblock_stride_y
+
(
cell_y
<<
2
)
;
__global
const
float*
grad_ptr
=
(
gid
<
blocks_total
)
?
grad
+
offset_y
*
grad_quadstep
+
(
offset_x
<<
1
)
:
grad
;
__global
const
uchar*
qangle_ptr
=
(
gid
<
blocks_total
)
?
qangle
+
offset_y
*
qangle_step
+
(
offset_x
<<
1
)
:
qangle
;
__local
float*
hist
=
hists
+
12
*
(
cell_y
*
CELLS_PER_BLOCK_Y
+
cell_x
)
+
cell_thread_x
;
for
(
int
bin_id
=
0
; bin_id < cnbins; ++bin_id)
hist[bin_id
*
48]
=
0.f
;
const
int
dist_x
=
-4
+
cell_thread_x
-
4
*
cell_x
;
const
int
dist_center_x
=
dist_x
-
4
*
(
1
-
2
*
cell_x
)
;
const
int
dist_y_begin
=
-4
-
4
*
lidY
;
for
(
int
dist_y
=
dist_y_begin
; dist_y < dist_y_begin + 12; ++dist_y)
{
float2
vote
=
(
float2
)
(
grad_ptr[0],
grad_ptr[1]
)
;
uchar2
bin
=
(
uchar2
)
(
qangle_ptr[0],
qangle_ptr[1]
)
;
grad_ptr
+=
grad_quadstep
;
qangle_ptr
+=
qangle_step
;
int
dist_center_y
=
dist_y
-
4
*
(
1
-
2
*
cell_y
)
;
float
gaussian
=
exp
(
-
(
dist_center_y
*
dist_center_y
+
dist_center_x
*
dist_center_x
)
*
scale
)
;
float
interp_weight
=
(
8.f
-
fabs
(
dist_y
+
0.5f
))
*
(
8.f
-
fabs
(
dist_x
+
0.5f
))
/
64.f
;
hist[bin.x
*
48]
+=
gaussian
*
interp_weight
*
vote.x
;
hist[bin.y
*
48]
+=
gaussian
*
interp_weight
*
vote.y
;
}
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
volatile
__local
float*
hist_
=
hist
;
for
(
int
bin_id
=
0
; bin_id < cnbins; ++bin_id, hist_ += 48)
{
if
(
cell_thread_x
<
6
)
hist_[0]
+=
hist_[6]
;
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
if
(
cell_thread_x
<
3
)
hist_[0]
+=
hist_[3]
;
#
ifdef
CPU
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
if
(
cell_thread_x
==
0
)
final_hist[
(
cell_x
*
2
+
cell_y
)
*
cnbins
+
bin_id]
=
hist_[0]
+
hist_[1]
+
hist_[2]
;
}
#
ifdef
CPU
barrier
(
CLK_LOCAL_MEM_FENCE
)
;
#
endif
int
tid
=
(
cell_y
*
CELLS_PER_BLOCK_Y
+
cell_x
)
*
12
+
cell_thread_x
;
if
((
tid
<
cblock_hist_size
)
&&
(
gid
<
blocks_total
))
{
__global
float*
block_hist
=
block_hists
+
(
gidY
*
img_block_width
+
gidX
)
*
cblock_hist_size
;
block_hist[tid]
=
final_hist[tid]
;
}
}
//-------------------------------------------------------------
//
Normalization
of
histograms
via
L2Hys_norm
//
optimized
for
the
case
of
9
bins
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment