Unverified Commit 4709b9d2 authored by Claudio's avatar Claudio

Add cuda::streams to by_rows and 8UC1 functions

Fix #8177
parent dd3655f6
...@@ -518,8 +518,10 @@ namespace cv { namespace cuda { namespace device ...@@ -518,8 +518,10 @@ namespace cv { namespace cuda { namespace device
template <int nthreads> template <int nthreads>
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y, __global__ void extract_descrs_by_rows_kernel(const int img_block_width,
const float* block_hists, PtrStepf descriptors) const int win_block_stride_x, const int win_block_stride_y,
const float* block_hists,
PtrStepf descriptors)
{ {
// Get left top corner of the window in src // Get left top corner of the window in src
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width + const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
...@@ -538,8 +540,14 @@ namespace cv { namespace cuda { namespace device ...@@ -538,8 +540,14 @@ namespace cv { namespace cuda { namespace device
} }
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x, void extract_descrs_by_rows(int win_height, int win_width,
int height, int width, float* block_hists, int cell_size_x, int ncells_block_x, PtrStepSzf descriptors) int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x,
int height, int width,
float* block_hists, int cell_size_x,
int ncells_block_x,
PtrStepSzf descriptors,
const cudaStream_t& stream)
{ {
const int nthreads = 256; const int nthreads = 256;
...@@ -551,17 +559,16 @@ namespace cv { namespace cuda { namespace device ...@@ -551,17 +559,16 @@ namespace cv { namespace cuda { namespace device
dim3 grid(img_win_width, img_win_height); dim3 grid(img_win_width, img_win_height);
int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) / block_stride_x; int img_block_width = (width - ncells_block_x * cell_size_x + block_stride_x) / block_stride_x;
extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>( extract_descrs_by_rows_kernel<nthreads><<<grid, threads, 0, stream>>>(img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaGetLastError() );
} }
template <int nthreads> template <int nthreads>
__global__ void extract_descrs_by_cols_kernel(const int img_block_width, const int win_block_stride_x, __global__ void extract_descrs_by_cols_kernel(const int img_block_width,
const int win_block_stride_y, const float* block_hists, const int win_block_stride_x, const int win_block_stride_y,
const float* block_hists,
PtrStepf descriptors) PtrStepf descriptors)
{ {
// Get left top corner of the window in src // Get left top corner of the window in src
...@@ -792,8 +799,12 @@ namespace cv { namespace cuda { namespace device ...@@ -792,8 +799,12 @@ namespace cv { namespace cuda { namespace device
} }
void compute_gradients_8UC1(int nbins, int height, int width, const PtrStepSzb& img, void compute_gradients_8UC1(int nbins,
float angle_scale, PtrStepSzf grad, PtrStepSzb qangle, bool correct_gamma) int height, int width, const PtrStepSzb& img,
float angle_scale,
PtrStepSzf grad, PtrStepSzb qangle,
bool correct_gamma,
const cudaStream_t& stream)
{ {
(void)nbins; (void)nbins;
const int nthreads = 256; const int nthreads = 256;
...@@ -802,13 +813,11 @@ namespace cv { namespace cuda { namespace device ...@@ -802,13 +813,11 @@ namespace cv { namespace cuda { namespace device
dim3 gdim(divUp(width, bdim.x), divUp(height, bdim.y)); dim3 gdim(divUp(width, bdim.x), divUp(height, bdim.y));
if (correct_gamma) if (correct_gamma)
compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle); compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim, 0, stream>>>(height, width, img, angle_scale, grad, qangle);
else else
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle); compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim, 0, stream>>>(height, width, img, angle_scale, grad, qangle);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
} }
......
...@@ -64,19 +64,29 @@ namespace cv { namespace cuda { namespace device ...@@ -64,19 +64,29 @@ namespace cv { namespace cuda { namespace device
{ {
namespace hog namespace hog
{ {
void set_up_constants(int nbins, int block_stride_x, int block_stride_y, void set_up_constants(int nbins,
int block_stride_x, int block_stride_y,
int nblocks_win_x, int nblocks_win_y, int nblocks_win_x, int nblocks_win_y,
int ncells_block_x, int ncells_block_y, int ncells_block_x, int ncells_block_y,
const cudaStream_t& stream); const cudaStream_t& stream);
void compute_hists(int nbins, int block_stride_x, int block_stride_y, void compute_hists(int nbins,
int height, int width, const PtrStepSzf& grad, int block_stride_x, int block_stride_y,
const PtrStepSzb& qangle, float sigma, float* block_hists, int height, int width,
int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y, const PtrStepSzf& grad, const PtrStepSzb& qangle,
float sigma,
float* block_hists,
int cell_size_x, int cell_size_y,
int ncells_block_x, int ncells_block_y,
const cudaStream_t& stream); const cudaStream_t& stream);
void normalize_hists(int nbins, int block_stride_x, int block_stride_y, void normalize_hists(int nbins,
int height, int width, float* block_hists, float threshold, int cell_size_x, int cell_size_y, int ncells_block_x, int ncells_block_y, int block_stride_x, int block_stride_y,
int height, int width,
float* block_hists,
float threshold,
int cell_size_x, int cell_size_y,
int ncells_block_x, int ncells_block_y,
const cudaStream_t& stream); const cudaStream_t& stream);
void classify_hists(int win_height, int win_width, int block_stride_y, void classify_hists(int win_height, int win_width, int block_stride_y,
...@@ -85,21 +95,37 @@ namespace cv { namespace cuda { namespace device ...@@ -85,21 +95,37 @@ namespace cv { namespace cuda { namespace device
float threshold, int cell_size_x, int ncells_block_x, unsigned char* labels); float threshold, int cell_size_x, int ncells_block_x, unsigned char* labels);
void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x, void compute_confidence_hists(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists, int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
float* coefs, float free_coef, float threshold, int cell_size_x, int ncells_block_x, float *confidences); float* coefs, float free_coef, float threshold, int cell_size_x, int ncells_block_x, float *confidences);
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, void extract_descrs_by_rows(int win_height, int win_width,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists, int cell_size_x, int ncells_block_x, int block_stride_y, int block_stride_x,
cv::cuda::PtrStepSzf descriptors); int win_stride_y, int win_stride_x,
void extract_descrs_by_cols(int win_height, int win_width, int block_stride_y, int block_stride_x, int height, int width,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists, int cell_size_x, int ncells_block_x, float* block_hists,
int cell_size_x, int ncells_block_x,
cv::cuda::PtrStepSzf descriptors,
const cudaStream_t& stream);
void extract_descrs_by_cols(int win_height, int win_width,
int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x,
int height, int width,
float* block_hists,
int cell_size_x, int ncells_block_x,
cv::cuda::PtrStepSzf descriptors, cv::cuda::PtrStepSzf descriptors,
const cudaStream_t& stream); const cudaStream_t& stream);
void compute_gradients_8UC1(int nbins, int height, int width, const cv::cuda::PtrStepSzb& img, void compute_gradients_8UC1(int nbins,
float angle_scale, cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle, bool correct_gamma); int height, int width, const cv::cuda::PtrStepSzb& img,
void compute_gradients_8UC4(int nbins, int height, int width, const cv::cuda::PtrStepSzb& img, float angle_scale,
float angle_scale, cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle, bool correct_gamma, cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle,
bool correct_gamma,
const cudaStream_t& stream);
void compute_gradients_8UC4(int nbins,
int height, int width, const cv::cuda::PtrStepSzb& img,
float angle_scale,
cv::cuda::PtrStepSzf grad, cv::cuda::PtrStepSzb qangle,
bool correct_gamma,
const cudaStream_t& stream); const cudaStream_t& stream);
void resize_8UC1(const cv::cuda::PtrStepSzb& src, cv::cuda::PtrStepSzb dst); void resize_8UC1(const cv::cuda::PtrStepSzb& src, cv::cuda::PtrStepSzb dst);
...@@ -483,7 +509,8 @@ namespace ...@@ -483,7 +509,8 @@ namespace
img.rows, img.cols, img.rows, img.cols,
block_hists.ptr<float>(), block_hists.ptr<float>(),
cell_size_.width, cells_per_block_.width, cell_size_.width, cells_per_block_.width,
descriptors); descriptors,
StreamAccessor::getStream(stream));
break; break;
case DESCR_FORMAT_COL_BY_COL: case DESCR_FORMAT_COL_BY_COL:
hog::extract_descrs_by_cols(win_size_.height, win_size_.width, hog::extract_descrs_by_cols(win_size_.height, win_size_.width,
...@@ -524,8 +551,12 @@ namespace ...@@ -524,8 +551,12 @@ namespace
switch (img.type()) switch (img.type())
{ {
case CV_8UC1: case CV_8UC1:
hog::compute_gradients_8UC1(nbins_, img.rows, img.cols, img, hog::compute_gradients_8UC1(nbins_,
angleScale, grad, qangle, gamma_correction_); img.rows, img.cols, img,
angleScale,
grad, qangle,
gamma_correction_,
StreamAccessor::getStream(stream));
break; break;
case CV_8UC4: case CV_8UC4:
hog::compute_gradients_8UC4(nbins_, hog::compute_gradients_8UC4(nbins_,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment