Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
22a3cf0f
Commit
22a3cf0f
authored
Dec 19, 2013
by
Andrey Pavlenko
Committed by
OpenCV Buildbot
Dec 19, 2013
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #2008 from krodyush:pullreq/2.4-opt-131209-hog
parents
0cea828a
f3ee1c3d
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
33 additions
and
9 deletions
+33
-9
hog.cpp
modules/ocl/src/hog.cpp
+20
-4
objdetect_hog.cl
modules/ocl/src/opencl/objdetect_hog.cl
+13
-5
No files found.
modules/ocl/src/hog.cpp
View file @
22a3cf0f
...
...
@@ -76,6 +76,11 @@ namespace cv
int
cdescr_width
;
int
cdescr_height
;
// A shift value and type that allows qangle to be different
// sizes on different hardware
int
qangle_step_shift
;
int
qangle_type
;
void
set_up_constants
(
int
nbins
,
int
block_stride_x
,
int
block_stride_y
,
int
nblocks_win_x
,
int
nblocks_win_y
);
...
...
@@ -153,6 +158,7 @@ cv::ocl::HOGDescriptor::HOGDescriptor(Size win_size_, Size block_size_, Size blo
hog_device_cpu
=
true
;
else
hog_device_cpu
=
false
;
}
size_t
cv
::
ocl
::
HOGDescriptor
::
getDescriptorSize
()
const
...
...
@@ -213,7 +219,7 @@ void cv::ocl::HOGDescriptor::init_buffer(const oclMat &img, Size win_stride)
effect_size
=
img
.
size
();
grad
.
create
(
img
.
size
(),
CV_32FC2
);
qangle
.
create
(
img
.
size
(),
CV_8UC2
);
qangle
.
create
(
img
.
size
(),
hog
::
qangle_type
);
const
size_t
block_hist_size
=
getBlockHistogramSize
();
const
Size
blocks_per_img
=
numPartsWithin
(
img
.
size
(),
block_size
,
block_stride
);
...
...
@@ -1607,6 +1613,16 @@ void cv::ocl::device::hog::set_up_constants(int nbins,
int
descr_size
=
descr_width
*
nblocks_win_y
;
cdescr_size
=
descr_size
;
qangle_type
=
CV_8UC2
;
qangle_step_shift
=
0
;
// Some Intel devices have low single-byte access performance,
// so we change the datatype here.
if
(
Context
::
getContext
()
->
supportsFeature
(
FEATURE_CL_INTEL_DEVICE
))
{
qangle_type
=
CV_32SC2
;
qangle_step_shift
=
2
;
}
}
void
cv
::
ocl
::
device
::
hog
::
compute_hists
(
int
nbins
,
...
...
@@ -1628,7 +1644,7 @@ void cv::ocl::device::hog::compute_hists(int nbins,
int
blocks_total
=
img_block_width
*
img_block_height
;
int
grad_quadstep
=
grad
.
step
>>
2
;
int
qangle_step
=
qangle
.
step
;
int
qangle_step
=
qangle
.
step
>>
qangle_step_shift
;
int
blocks_in_group
=
4
;
size_t
localThreads
[
3
]
=
{
blocks_in_group
*
24
,
2
,
1
};
...
...
@@ -1892,7 +1908,7 @@ void cv::ocl::device::hog::compute_gradients_8UC1(int height, int width,
char
correctGamma
=
(
correct_gamma
)
?
1
:
0
;
int
img_step
=
img
.
step
;
int
grad_quadstep
=
grad
.
step
>>
3
;
int
qangle_step
=
qangle
.
step
>>
1
;
int
qangle_step
=
qangle
.
step
>>
(
1
+
qangle_step_shift
)
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
height
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
width
));
...
...
@@ -1927,7 +1943,7 @@ void cv::ocl::device::hog::compute_gradients_8UC4(int height, int width,
char
correctGamma
=
(
correct_gamma
)
?
1
:
0
;
int
img_step
=
img
.
step
>>
2
;
int
grad_quadstep
=
grad
.
step
>>
3
;
int
qangle_step
=
qangle
.
step
>>
1
;
int
qangle_step
=
qangle
.
step
>>
(
1
+
qangle_step_shift
)
;
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
height
));
args
.
push_back
(
make_pair
(
sizeof
(
cl_int
),
(
void
*
)
&
width
));
...
...
modules/ocl/src/opencl/objdetect_hog.cl
View file @
22a3cf0f
...
...
@@ -50,6 +50,14 @@
#
define
NTHREADS
256
#
define
CV_PI_F
3.1415926535897932384626433832795f
#
ifdef
INTEL_DEVICE
#
define
QANGLE_TYPE
int
#
define
QANGLE_TYPE2
int2
#
else
#
define
QANGLE_TYPE
uchar
#
define
QANGLE_TYPE2
uchar2
#
endif
//----------------------------------------------------------------------------
//
Histogram
computation
//
12
threads
for
a
cell,
12x4
threads
per
block
...
...
@@ -59,7 +67,7 @@ __kernel void compute_hists_lut_kernel(
const
int
cnbins,
const
int
cblock_hist_size,
const
int
img_block_width,
const
int
blocks_in_group,
const
int
blocks_total,
const
int
grad_quadstep,
const
int
qangle_step,
__global
const
float*
grad,
__global
const
uchar
*
qangle,
__global
const
float*
grad,
__global
const
QANGLE_TYPE
*
qangle,
__global
const
float*
gauss_w_lut,
__global
float*
block_hists,
__local
float*
smem
)
{
...
...
@@ -86,7 +94,7 @@ __kernel void compute_hists_lut_kernel(
__global
const
float*
grad_ptr
=
(
gid
<
blocks_total
)
?
grad
+
offset_y
*
grad_quadstep
+
(
offset_x
<<
1
)
:
grad
;
__global
const
uchar
*
qangle_ptr
=
(
gid
<
blocks_total
)
?
__global
const
QANGLE_TYPE
*
qangle_ptr
=
(
gid
<
blocks_total
)
?
qangle
+
offset_y
*
qangle_step
+
(
offset_x
<<
1
)
:
qangle
;
__local
float*
hist
=
hists
+
12
*
(
cell_y
*
CELLS_PER_BLOCK_Y
+
cell_x
)
+
...
...
@@ -101,7 +109,7 @@ __kernel void compute_hists_lut_kernel(
for
(
int
dist_y
=
dist_y_begin
; dist_y < dist_y_begin + 12; ++dist_y)
{
float2
vote
=
(
float2
)
(
grad_ptr[0],
grad_ptr[1]
)
;
uchar2
bin
=
(
uchar
2
)
(
qangle_ptr[0],
qangle_ptr[1]
)
;
QANGLE_TYPE2
bin
=
(
QANGLE_TYPE
2
)
(
qangle_ptr[0],
qangle_ptr[1]
)
;
grad_ptr
+=
grad_quadstep
;
qangle_ptr
+=
qangle_step
;
...
...
@@ -558,7 +566,7 @@ __kernel void extract_descrs_by_cols_kernel(
__kernel
void
compute_gradients_8UC4_kernel
(
const
int
height,
const
int
width,
const
int
img_step,
const
int
grad_quadstep,
const
int
qangle_step,
const
__global
uchar4
*
img,
__global
float
*
grad,
__global
uchar
*
qangle,
const
__global
uchar4
*
img,
__global
float
*
grad,
__global
QANGLE_TYPE
*
qangle,
const
float
angle_scale,
const
char
correct_gamma,
const
int
cnbins
)
{
const
int
x
=
get_global_id
(
0
)
;
...
...
@@ -660,7 +668,7 @@ __kernel void compute_gradients_8UC4_kernel(
__kernel
void
compute_gradients_8UC1_kernel
(
const
int
height,
const
int
width,
const
int
img_step,
const
int
grad_quadstep,
const
int
qangle_step,
__global
const
uchar
*
img,
__global
float
*
grad,
__global
uchar
*
qangle,
__global
const
uchar
*
img,
__global
float
*
grad,
__global
QANGLE_TYPE
*
qangle,
const
float
angle_scale,
const
char
correct_gamma,
const
int
cnbins
)
{
const
int
x
=
get_global_id
(
0
)
;
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment