Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
4709b9d2
Unverified
Commit
4709b9d2
authored
Mar 25, 2017
by
Claudio
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add cuda::streams to by_rows and 8UC1 functions
Fix #8177
parent
dd3655f6
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
78 additions
and
38 deletions
+78
-38
hog.cu
modules/cudaobjdetect/src/cuda/hog.cu
+25
-16
hog.cpp
modules/cudaobjdetect/src/hog.cpp
+53
-22
No files found.
modules/cudaobjdetect/src/cuda/hog.cu
View file @
4709b9d2
...
@@ -518,8 +518,10 @@ namespace cv { namespace cuda { namespace device
...
@@ -518,8 +518,10 @@ namespace cv { namespace cuda { namespace device
template <int nthreads>
template <int nthreads>
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
__global__ void extract_descrs_by_rows_kernel(const int img_block_width,
const float* block_hists, PtrStepf descriptors)
const int win_block_stride_x, const int win_block_stride_y,
const float* block_hists,
PtrStepf descriptors)
{
{
// Get left top corner of the window in src
// Get left top corner of the window in src
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
...
@@ -538,8 +540,14 @@ namespace cv { namespace cuda { namespace device
...
@@ -538,8 +540,14 @@ namespace cv { namespace cuda { namespace device
}
}
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
void extract_descrs_by_rows(int win_height, int win_width,
int height, int width, float* block_hists, int cell_size_x, int ncells_block_x, PtrStepSzf descriptors)
int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x,
int height, int width,
float* block_hists, int cell_size_x,
int ncells_block_x,
PtrStepSzf descriptors,
const cudaStream_t& stream)
{
{
const int nthreads = 256;
const int nthreads = 256;
...
@@ -551,17 +559,16 @@ namespace cv { namespace cuda { namespace device
...
@@ -551,17 +559,16 @@ namespace cv { namespace cuda { namespace device
dim3 grid(img_win_width, img_win_height);
dim3 grid(img_win_width, img_win_height);
int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) / block_stride_x;
int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) / block_stride_x;
extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>(
extract_descrs_by_rows_kernel<nthreads><<<grid, threads, 0, stream>>>(img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cuda
DeviceSynchronize
() );
cudaSafeCall( cuda
GetLastError
() );
}
}
template <int nthreads>
template <int nthreads>
__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x,
__global__ void extract_descrs_by_cols_kernel(const int img_block_width,
const int win_block_stride_y, const float* block_hists,
const int win_block_stride_x, const int win_block_stride_y,
const float* block_hists,
PtrStepf descriptors)
PtrStepf descriptors)
{
{
// Get left top corner of the window in src
// Get left top corner of the window in src
...
@@ -792,8 +799,12 @@ namespace cv { namespace cuda { namespace device
...
@@ -792,8 +799,12 @@ namespace cv { namespace cuda { namespace device
}
}
void compute_gradients_8UC1(int nbins, int height, int width, const PtrStepSzb& img,
void compute_gradients_8UC1(int nbins,
float angle_scale, PtrStepSzf grad, PtrStepSzb qangle, bool correct_gamma)
int height, int width, const PtrStepSzb& img,
float angle_scale,
PtrStepSzf grad, PtrStepSzb qangle,
bool correct_gamma,
const cudaStream_t& stream)
{
{
(void)nbins;
(void)nbins;
const int nthreads = 256;
const int nthreads = 256;
...
@@ -802,13 +813,11 @@ namespace cv { namespace cuda { namespace device
...
@@ -802,13 +813,11 @@ namespace cv { namespace cuda { namespace device
dim3 gdim(divUp(width, bdim.x), divUp(height, bdim.y));
dim3 gdim(divUp(width, bdim.x), divUp(height, bdim.y));
if (correct_gamma)
if (correct_gamma)
compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim
, 0, stream
>>>(height, width, img, angle_scale, grad, qangle);
else
else
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim
, 0, stream
>>>(height, width, img, angle_scale, grad, qangle);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
}
...
...
modules/cudaobjdetect/src/hog.cpp
View file @
4709b9d2
...
@@ -64,19 +64,29 @@ namespace cv { namespace cuda { namespace device
...
@@ -64,19 +64,29 @@ namespace cv { namespace cuda { namespace device
{
{
namespace
hog
namespace
hog
{
{
void
set_up_constants
(
int
nbins
,
int
block_stride_x
,
int
block_stride_y
,
void
set_up_constants
(
int
nbins
,
int
block_stride_x
,
int
block_stride_y
,
int
nblocks_win_x
,
int
nblocks_win_y
,
int
nblocks_win_x
,
int
nblocks_win_y
,
int
ncells_block_x
,
int
ncells_block_y
,
int
ncells_block_x
,
int
ncells_block_y
,
const
cudaStream_t
&
stream
);
const
cudaStream_t
&
stream
);
void
compute_hists
(
int
nbins
,
int
block_stride_x
,
int
block_stride_y
,
void
compute_hists
(
int
nbins
,
int
height
,
int
width
,
const
PtrStepSzf
&
grad
,
int
block_stride_x
,
int
block_stride_y
,
const
PtrStepSzb
&
qangle
,
float
sigma
,
float
*
block_hists
,
int
height
,
int
width
,
int
cell_size_x
,
int
cell_size_y
,
int
ncells_block_x
,
int
ncells_block_y
,
const
PtrStepSzf
&
grad
,
const
PtrStepSzb
&
qangle
,
float
sigma
,
float
*
block_hists
,
int
cell_size_x
,
int
cell_size_y
,
int
ncells_block_x
,
int
ncells_block_y
,
const
cudaStream_t
&
stream
);
const
cudaStream_t
&
stream
);
void
normalize_hists
(
int
nbins
,
int
block_stride_x
,
int
block_stride_y
,
void
normalize_hists
(
int
nbins
,
int
height
,
int
width
,
float
*
block_hists
,
float
threshold
,
int
cell_size_x
,
int
cell_size_y
,
int
ncells_block_x
,
int
ncells_block_y
,
int
block_stride_x
,
int
block_stride_y
,
int
height
,
int
width
,
float
*
block_hists
,
float
threshold
,
int
cell_size_x
,
int
cell_size_y
,
int
ncells_block_x
,
int
ncells_block_y
,
const
cudaStream_t
&
stream
);
const
cudaStream_t
&
stream
);
void
classify_hists
(
int
win_height
,
int
win_width
,
int
block_stride_y
,
void
classify_hists
(
int
win_height
,
int
win_width
,
int
block_stride_y
,
...
@@ -85,21 +95,37 @@ namespace cv { namespace cuda { namespace device
...
@@ -85,21 +95,37 @@ namespace cv { namespace cuda { namespace device
float
threshold
,
int
cell_size_x
,
int
ncells_block_x
,
unsigned
char
*
labels
);
float
threshold
,
int
cell_size_x
,
int
ncells_block_x
,
unsigned
char
*
labels
);
void
compute_confidence_hists
(
int
win_height
,
int
win_width
,
int
block_stride_y
,
int
block_stride_x
,
void
compute_confidence_hists
(
int
win_height
,
int
win_width
,
int
block_stride_y
,
int
block_stride_x
,
int
win_stride_y
,
int
win_stride_x
,
int
height
,
int
width
,
float
*
block_hists
,
int
win_stride_y
,
int
win_stride_x
,
int
height
,
int
width
,
float
*
block_hists
,
float
*
coefs
,
float
free_coef
,
float
threshold
,
int
cell_size_x
,
int
ncells_block_x
,
float
*
confidences
);
float
*
coefs
,
float
free_coef
,
float
threshold
,
int
cell_size_x
,
int
ncells_block_x
,
float
*
confidences
);
void
extract_descrs_by_rows
(
int
win_height
,
int
win_width
,
int
block_stride_y
,
int
block_stride_x
,
void
extract_descrs_by_rows
(
int
win_height
,
int
win_width
,
int
win_stride_y
,
int
win_stride_x
,
int
height
,
int
width
,
float
*
block_hists
,
int
cell_size_x
,
int
ncells_block_x
,
int
block_stride_y
,
int
block_stride_x
,
cv
::
cuda
::
PtrStepSzf
descriptors
);
int
win_stride_y
,
int
win_stride_x
,
void
extract_descrs_by_cols
(
int
win_height
,
int
win_width
,
int
block_stride_y
,
int
block_stride_x
,
int
height
,
int
width
,
int
win_stride_y
,
int
win_stride_x
,
int
height
,
int
width
,
float
*
block_hists
,
int
cell_size_x
,
int
ncells_block_x
,
float
*
block_hists
,
int
cell_size_x
,
int
ncells_block_x
,
cv
::
cuda
::
PtrStepSzf
descriptors
,
const
cudaStream_t
&
stream
);
void
extract_descrs_by_cols
(
int
win_height
,
int
win_width
,
int
block_stride_y
,
int
block_stride_x
,
int
win_stride_y
,
int
win_stride_x
,
int
height
,
int
width
,
float
*
block_hists
,
int
cell_size_x
,
int
ncells_block_x
,
cv
::
cuda
::
PtrStepSzf
descriptors
,
cv
::
cuda
::
PtrStepSzf
descriptors
,
const
cudaStream_t
&
stream
);
const
cudaStream_t
&
stream
);
void
compute_gradients_8UC1
(
int
nbins
,
int
height
,
int
width
,
const
cv
::
cuda
::
PtrStepSzb
&
img
,
void
compute_gradients_8UC1
(
int
nbins
,
float
angle_scale
,
cv
::
cuda
::
PtrStepSzf
grad
,
cv
::
cuda
::
PtrStepSzb
qangle
,
bool
correct_gamma
);
int
height
,
int
width
,
const
cv
::
cuda
::
PtrStepSzb
&
img
,
void
compute_gradients_8UC4
(
int
nbins
,
int
height
,
int
width
,
const
cv
::
cuda
::
PtrStepSzb
&
img
,
float
angle_scale
,
float
angle_scale
,
cv
::
cuda
::
PtrStepSzf
grad
,
cv
::
cuda
::
PtrStepSzb
qangle
,
bool
correct_gamma
,
cv
::
cuda
::
PtrStepSzf
grad
,
cv
::
cuda
::
PtrStepSzb
qangle
,
bool
correct_gamma
,
const
cudaStream_t
&
stream
);
void
compute_gradients_8UC4
(
int
nbins
,
int
height
,
int
width
,
const
cv
::
cuda
::
PtrStepSzb
&
img
,
float
angle_scale
,
cv
::
cuda
::
PtrStepSzf
grad
,
cv
::
cuda
::
PtrStepSzb
qangle
,
bool
correct_gamma
,
const
cudaStream_t
&
stream
);
const
cudaStream_t
&
stream
);
void
resize_8UC1
(
const
cv
::
cuda
::
PtrStepSzb
&
src
,
cv
::
cuda
::
PtrStepSzb
dst
);
void
resize_8UC1
(
const
cv
::
cuda
::
PtrStepSzb
&
src
,
cv
::
cuda
::
PtrStepSzb
dst
);
...
@@ -483,7 +509,8 @@ namespace
...
@@ -483,7 +509,8 @@ namespace
img
.
rows
,
img
.
cols
,
img
.
rows
,
img
.
cols
,
block_hists
.
ptr
<
float
>
(),
block_hists
.
ptr
<
float
>
(),
cell_size_
.
width
,
cells_per_block_
.
width
,
cell_size_
.
width
,
cells_per_block_
.
width
,
descriptors
);
descriptors
,
StreamAccessor
::
getStream
(
stream
));
break
;
break
;
case
DESCR_FORMAT_COL_BY_COL
:
case
DESCR_FORMAT_COL_BY_COL
:
hog
::
extract_descrs_by_cols
(
win_size_
.
height
,
win_size_
.
width
,
hog
::
extract_descrs_by_cols
(
win_size_
.
height
,
win_size_
.
width
,
...
@@ -524,8 +551,12 @@ namespace
...
@@ -524,8 +551,12 @@ namespace
switch
(
img
.
type
())
switch
(
img
.
type
())
{
{
case
CV_8UC1
:
case
CV_8UC1
:
hog
::
compute_gradients_8UC1
(
nbins_
,
img
.
rows
,
img
.
cols
,
img
,
hog
::
compute_gradients_8UC1
(
nbins_
,
angleScale
,
grad
,
qangle
,
gamma_correction_
);
img
.
rows
,
img
.
cols
,
img
,
angleScale
,
grad
,
qangle
,
gamma_correction_
,
StreamAccessor
::
getStream
(
stream
));
break
;
break
;
case
CV_8UC4
:
case
CV_8UC4
:
hog
::
compute_gradients_8UC4
(
nbins_
,
hog
::
compute_gradients_8UC4
(
nbins_
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment