Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
5f56b276
Commit
5f56b276
authored
Mar 31, 2011
by
Anatoly Baksheev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added ROI support for HOG_GPU
parent
400dbb13
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
49 additions
and
75 deletions
+49
-75
hog.cu
modules/gpu/src/cuda/hog.cu
+49
-75
No files found.
modules/gpu/src/cuda/hog.cu
View file @
5f56b276
...
...
@@ -50,10 +50,6 @@
#endif
#endif
#ifndef div_up
#define div_up(n, grain) (((n) + (grain) - 1) / (grain))
#endif
// Other values are not supported
#define CELL_WIDTH 8
#define CELL_HEIGHT 8
...
...
@@ -208,7 +204,7 @@ void compute_hists(int nbins, int block_stride_x, int block_stride_y,
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) /
block_stride_y;
dim3 grid(div
_u
p(img_block_width, nblocks), img_block_height);
dim3 grid(div
U
p(img_block_width, nblocks), img_block_height);
dim3 threads(32, 2, nblocks);
cudaSafeCall(cudaFuncSetCacheConfig(compute_hists_kernel_many_blocks<nblocks>,
...
...
@@ -311,7 +307,7 @@ void normalize_hists(int nbins, int block_stride_x, int block_stride_y,
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
int img_block_height = (height - CELLS_PER_BLOCK_Y * CELL_HEIGHT + block_stride_y) / block_stride_y;
dim3 grid(div
_u
p(img_block_width, nblocks), img_block_height);
dim3 grid(div
U
p(img_block_width, nblocks), img_block_height);
if (nthreads == 32)
normalize_hists_kernel_many_blocks<32, nblocks><<<grid, threads>>>(block_hist_size, img_block_width, block_hists, threshold);
...
...
@@ -395,9 +391,7 @@ __global__ void classify_hists_kernel_many_blocks(const int img_win_width, const
}
if (threadIdx.x == 0)
labels[blockIdx.y * img_win_width + blockIdx.x * blockDim.z + win_x]
= (product + free_coef >= threshold);
labels[blockIdx.y * img_win_width + blockIdx.x * blockDim.z + win_x] = (product + free_coef >= threshold);
}
...
...
@@ -414,13 +408,11 @@ void classify_hists(int win_height, int win_width, int block_stride_y, int block
int img_win_height = (height - win_height + win_stride_y) / win_stride_y;
dim3 threads(nthreads, 1, nblocks);
dim3 grid(div
_u
p(img_win_width, nblocks), img_win_height);
dim3 grid(div
U
p(img_win_width, nblocks), img_win_height);
cudaSafeCall(cudaFuncSetCacheConfig(classify_hists_kernel_many_blocks<nthreads, nblocks>,
cudaFuncCachePreferL1));
cudaSafeCall(cudaFuncSetCacheConfig(classify_hists_kernel_many_blocks<nthreads, nblocks>, cudaFuncCachePreferL1));
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
classify_hists_kernel_many_blocks<nthreads, nblocks><<<grid, threads>>>(
img_win_width, img_block_width, win_block_stride_x, win_block_stride_y,
block_hists, coefs, free_coef, threshold, labels);
...
...
@@ -434,9 +426,8 @@ void classify_hists(int win_height, int win_width, int block_stride_y, int block
template <int nthreads>
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x,
const int win_block_stride_y, const float* block_hists,
PtrElemStepf descriptors)
__global__ void extract_descrs_by_rows_kernel(const int img_block_width, const int win_block_stride_x, const int win_block_stride_y,
const float* block_hists, PtrElemStepf descriptors)
{
// Get left top corner of the window in src
const float* hist = block_hists + (blockIdx.y * win_block_stride_y * img_block_width +
...
...
@@ -455,9 +446,8 @@ __global__ void extract_descrs_by_rows_kernel(const int img_block_width, const i
}
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x,
int win_stride_y, int win_stride_x, int height, int width, float* block_hists,
DevMem2Df descriptors)
void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, int block_stride_x, int win_stride_y, int win_stride_x,
int height, int width, float* block_hists, DevMem2Df descriptors)
{
const int nthreads = 256;
...
...
@@ -468,8 +458,7 @@ void extract_descrs_by_rows(int win_height, int win_width, int block_stride_y, i
dim3 threads(nthreads, 1);
dim3 grid(img_win_width, img_win_height);
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) /
block_stride_x;
int img_block_width = (width - CELLS_PER_BLOCK_X * CELL_WIDTH + block_stride_x) / block_stride_x;
extract_descrs_by_rows_kernel<nthreads><<<grid, threads>>>(
img_block_width, win_block_stride_x, win_block_stride_y, block_hists, descriptors);
cudaSafeCall( cudaGetLastError() );
...
...
@@ -640,21 +629,17 @@ void compute_gradients_8UC4(int nbins, int height, int width, const DevMem2D& im
const int nthreads = 256;
dim3 bdim(nthreads, 1);
dim3 gdim(div
_up(width, bdim.x), div_u
p(height, bdim.y));
dim3 gdim(div
Up(width, bdim.x), divU
p(height, bdim.y));
if (correct_gamma)
compute_gradients_8UC4_kernel<nthreads, 1><<<gdim, bdim>>>(
height, width, img, angle_scale, grad, qangle);
compute_gradients_8UC4_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
else
compute_gradients_8UC4_kernel<nthreads, 0><<<gdim, bdim>>>(
height, width, img, angle_scale, grad, qangle);
compute_gradients_8UC4_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
template <int nthreads, int correct_gamma>
__global__ void compute_gradients_8UC1_kernel(int height, int width, const PtrElemStep img,
float angle_scale, PtrElemStepf grad, PtrElemStep qangle)
...
...
@@ -715,17 +700,14 @@ void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2D& im
const int nthreads = 256;
dim3 bdim(nthreads, 1);
dim3 gdim(div
_up(width, bdim.x), div_u
p(height, bdim.y));
dim3 gdim(div
Up(width, bdim.x), divU
p(height, bdim.y));
if (correct_gamma)
compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim>>>(
height, width, img, angle_scale, grad, qangle);
compute_gradients_8UC1_kernel<nthreads, 1><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
else
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(
height, width, img, angle_scale, grad, qangle);
compute_gradients_8UC1_kernel<nthreads, 0><<<gdim, bdim>>>(height, width, img, angle_scale, grad, qangle);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
}
...
...
@@ -735,67 +717,58 @@ void compute_gradients_8UC1(int nbins, int height, int width, const DevMem2D& im
// Resize
texture<uchar4, 2, cudaReadModeNormalizedFloat> resize8UC4_tex;
texture<unsigned char, 2, cudaReadModeNormalizedFloat> resize8UC1_tex;
texture<uchar, 2, cudaReadModeNormalizedFloat> resize8UC1_tex;
extern "C" __global__ void resize_8UC4_kernel(float sx, float sy, DevMem2D dst
)
__global__ void resize_for_hog_kernel(float sx, float sy, DevMem2D_<uchar> dst, int colOfs
)
{
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
float4 val = tex2D(resize8UC4_tex, x * sx, y * sy);
((uchar4*)dst.ptr(y))[x] = make_uchar4(val.x * 255, val.y * 255, val.z * 255, val.w * 255);
}
((unsigned char*)dst.ptr(y))[x] = tex2D(resize8UC1_tex, x * sx + colOfs, y * sy) * 255;
}
void resize_8UC4(const DevMem2D& src, DevMem2D dst)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaBindTexture2D(0, resize8UC4_tex, src.data, desc, src.cols, src.rows, src.step);
resize8UC4_tex.filterMode = cudaFilterModeLinear;
dim3 threads(32, 8);
dim3 grid(div_up(dst.cols, threads.x), div_up(dst.rows, threads.y));
float sx = (float)src.cols / dst.cols;
float sy = (float)src.rows / dst.rows;
resize_8UC4_kernel<<<grid, threads>>>(sx, sy, dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaUnbindTexture(resize8UC4_tex));
}
extern "C" __global__ void resize_8UC1_kernel(float sx, float sy, DevMem2D dst)
__global__ void resize_for_hog_kernel(float sx, float sy, DevMem2D_<uchar4> dst, int colOfs)
{
unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
((unsigned char*)dst.ptr(y))[x] = tex2D(resize8UC1_tex, x * sx, y * sy) * 255;
{
float4 val = tex2D(resize8UC4_tex, x * sx + colOfs, y * sy);
dst.ptr(y)[x] = make_uchar4(val.x * 255, val.y * 255, val.z * 255, val.w * 255);
}
}
void resize_8UC1(const DevMem2D& src, DevMem2D dst
)
template<class T, class TEX>
static void resize_for_hog(const DevMem2D& src, DevMem2D dst, TEX& tex
)
{
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cudaBindTexture2D(0, resize8UC1_tex, src.data, desc, src.cols, src.rows, src.step);
resize8UC1_tex.filterMode = cudaFilterModeLinear;
tex.filterMode = cudaFilterModeLinear;
size_t texOfs = 0;
int colOfs = 0;
cudaChannelFormatDesc desc = cudaCreateChannelDesc<T>();
cudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) );
if (texOfs != 0)
{
colOfs = static_cast<int>( texOfs/sizeof(T) );
cudaSafeCall( cudaUnbindTexture(tex) );
cudaSafeCall( cudaBindTexture2D(&texOfs, tex, src.data, desc, src.cols, src.rows, src.step) );
}
dim3 threads(32, 8);
dim3 grid(div
_up(dst.cols, threads.x), div_u
p(dst.rows, threads.y));
float sx =
(float)src.cols
/ dst.cols;
float sy =
(float)src.rows
/ dst.rows;
resize_
8UC1_kernel<<<grid, threads>>>(sx, sy, dst
);
dim3 grid(div
Up(dst.cols, threads.x), divU
p(dst.rows, threads.y));
float sx =
static_cast<float>(src.cols)
/ dst.cols;
float sy =
static_cast<float>(src.rows)
/ dst.rows;
resize_
for_hog_kernel<<<grid, threads>>>(sx, sy, (DevMem2D_<T>)dst, colOfs
);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall(cudaThreadSynchronize());
cudaSafeCall(cudaUnbindTexture(resize8UC1_tex));
}
void resize_8UC1(const DevMem2D& src, DevMem2D dst) { resize_for_hog<uchar> (src, dst, resize8UC1_tex); }
void resize_8UC4(const DevMem2D& src, DevMem2D dst) { resize_for_hog<uchar4>(src, dst, resize8UC4_tex); }
}}}
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment