Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
08b4e780
Commit
08b4e780
authored
Sep 21, 2012
by
marina.kolpakova
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
add shrinking kernel
parent
1cf7a46f
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
126 additions
and
7 deletions
+126
-7
isf-sc.cu
modules/gpu/src/cuda/isf-sc.cu
+125
-7
icf.hpp
modules/gpu/src/icf.hpp
+1
-0
No files found.
modules/gpu/src/cuda/isf-sc.cu
View file @
08b4e780
...
@@ -41,25 +41,136 @@
...
@@ -41,25 +41,136 @@
//M*/
//M*/
#include <icf.hpp>
#include <icf.hpp>
#include <opencv2/gpu/device/saturate_cast.hpp>
namespace cv { namespace gpu {
namespace cv { namespace gpu {
namespace device {
namespace device {
enum {
HOG_BINS = 6,
HOG_LUV_BINS = 10,
WIDTH = 640,
HEIGHT = 480,
GREY_OFFSET = HEIGHT * HOG_LUV_BINS
};
/* Returns the nearest upper power of two, works only for
the typical GPU thread count (pert block) values */
int power_2up(unsigned int n)
{
if (n < 1) return 1;
else if (n < 2) return 2;
else if (n < 4) return 4;
else if (n < 8) return 8;
else if (n < 16) return 16;
else if (n < 32) return 32;
else if (n < 64) return 64;
else if (n < 128) return 128;
else if (n < 256) return 256;
else if (n < 512) return 512;
else if (n < 1024) return 1024;
return -1; // Input is too big
}
__device__ __forceinline__ uchar grey(const uchar3 rgb)
{
return saturate_cast<uchar>(rgb.x * 0.114f + rgb.y * 0.587f + rgb.z * 0.299f);
}
__device__ __forceinline__ void luv(const uchar3 rgb, uchar& l, uchar& u, uchar& v)
{
}
__global__ void rgb2grayluv(const uchar3* __restrict__ rgb, uchar* __restrict__ hog,
__global__ void rgb2grayluv(const uchar3* __restrict__ rgb, uchar* __restrict__ hog,
const int rgbPitch, const int hogPitch)
const int rgbPitch, const int hogPitch)
{
{
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const uchar3 color = rgb[rgbPitch * y + x];
uchar l, u, v;
luv(color, l, u, v);
hog[hogPitch * y + x] = l;
hog[hogPitch * (y + HEIGHT) + x] = u;
hog[hogPitch * (y + 2 * HEIGHT) + x] = v;
hog[hogPitch * (y + 3 * HEIGHT) + x] = grey(color);
}
__device__ __forceinline__
int qangle(const float &y, const float &x)
{
int bin = 0;
// const float2 &bin_vector_zero = const_angle_bins_vectors[0];
// float max_dot_product = fabs(x*bin_vector_zero.x + y*bin_vector_zero.y);
// // let us hope this gets unrolled
// #pragma unroll
// for(int i=1; i < num_angles_bin; i+=1)
// {
// const float2 &bin_vector_i = const_angle_bins_vectors[i];
// //const float2 bin_vector_i = const_angle_bins_vectors[i];
// //const float2 &bin_vector_i = angle_bins_vectors[i];
// const float dot_product = fabs(x*bin_vector_i.x + y*bin_vector_i.y);
// if(dot_product > max_dot_product)
// {
// max_dot_product = dot_product;
// index = i;
// }
// }
return bin;
}
}
__global__ void gray2hog(const uchar* __restrict__ gray, uchar* __restrict__ hog,
// texture<uchar, 2, cudaReadModeElementType> tgray;
const int pitch
)
__global__ void gray2hog(const uchar* __restrict__ gray, uchar* __restrict__ hog, const int pitch, const float norm
)
{
{
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
// derivative
float dx = gray[y * pitch + x + 1];
dx -= gray[y * pitch + x - 1];
float dy = gray[(y + 1) * pitch + x];
dy -= gray[(y -1) * pitch + x - 1];
// mag and angle
const uchar mag = saturate_cast<uchar>(sqrtf(dy * dy + dx * dx) * norm);
const int bin = qangle(dx, dy);
}
template <int FACTOR>
__device__ __forceinline__ uchar shrink(const uchar* ptr, const int pitch, const int y, const int x)
{
int out = 0;
#pragma unroll
for(int dy = 0; dy < FACTOR; ++dy)
#pragma unroll
for(int dx = 0; dx < FACTOR; ++dx)
{
out += ptr[dy * pitch + dx];
}
return saturate_cast<uchar>(out / FACTOR);
}
}
template<int FACTOR>
__global__ void decimate(const uchar* __restrict__ hogluv, uchar* __restrict__ shrank,
__global__ void decimate(const uchar* __restrict__ hogluv, uchar* __restrict__ shrank,
const int inPitch, const int outPitch )
const int inPitch, const int outPitch )
{
{
const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x * blockDim.x + threadIdx.x;
const uchar* ptr = hogluv + (FACTOR * y) * inPitch + (FACTOR * x);
shrank[ y * outPitch + x]= shrink<FACTOR>(ptr, inPitch, y, x);
}
}
__global__ void intRow(const uchar* __restrict__ hogluv, ushort* __restrict__ sum,
__global__ void intRow(const uchar* __restrict__ hogluv, ushort* __restrict__ sum,
...
@@ -89,6 +200,11 @@ void __device icf::Cascade::detectAt() const
...
@@ -89,6 +200,11 @@ void __device icf::Cascade::detectAt() const
void icf::Cascade::detect(const cv::gpu::PtrStepSzb& hogluv, cudaStream_t stream) const
void icf::Cascade::detect(const cv::gpu::PtrStepSzb& hogluv, cudaStream_t stream) const
{
{
// detection kernel
// detection kernel
dim3 block(32, 8, 1);
dim3 grid(32 * ChannelStorage::FRAME_WIDTH / 32, ChannelStorage::FRAME_HEIGHT / 8, 64);
device::detect<<<grid, block, 0, stream>>>(*this, hogluv, hogluv.step / sizeof(ushort));
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
...
@@ -99,12 +215,13 @@ void icf::ChannelStorage::frame(const cv::gpu::PtrStepSz<uchar3>& rgb, cudaStrea
...
@@ -99,12 +215,13 @@ void icf::ChannelStorage::frame(const cv::gpu::PtrStepSz<uchar3>& rgb, cudaStrea
dim3 grid(FRAME_WIDTH / 32, FRAME_HEIGHT / 8);
dim3 grid(FRAME_WIDTH / 32, FRAME_HEIGHT / 8);
uchar * channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_BINS);
uchar * channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_BINS);
device::rgb2grayluv<<<grid, block, 0, stream>>>((uchar3*)rgb.ptr(), channels, rgb.step, dmem.step);
device::rgb2grayluv<<<grid, block, 0, stream>>>((uchar3*)rgb.ptr(), channels,
rgb.step / sizeof(uchar3), dmem.step);
cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaGetLastError());
// hog calculation kernel
// hog calculation kernel
channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_LUV_BINS);
channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_LUV_BINS);
device::gray2hog<<<grid, block, 0, stream>>>(channels, (uchar*)dmem.ptr(), dmem.step);
device::gray2hog<<<grid, block, 0, stream>>>(channels, (uchar*)dmem.ptr(), dmem.step
, magnitudeScaling
);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
const int shrWidth = FRAME_WIDTH / shrinkage;
const int shrWidth = FRAME_WIDTH / shrinkage;
...
@@ -112,19 +229,20 @@ void icf::ChannelStorage::frame(const cv::gpu::PtrStepSz<uchar3>& rgb, cudaStrea
...
@@ -112,19 +229,20 @@ void icf::ChannelStorage::frame(const cv::gpu::PtrStepSz<uchar3>& rgb, cudaStrea
// decimate kernel
// decimate kernel
grid = dim3(shrWidth / 32, shrHeight / 8);
grid = dim3(shrWidth / 32, shrHeight / 8);
device::decimate<<<grid, block, 0, stream>>>((uchar*)dmem.ptr(), (uchar*)shrunk.ptr(), dmem.step, shrunk.step);
device::decimate<
4><
<<grid, block, 0, stream>>>((uchar*)dmem.ptr(), (uchar*)shrunk.ptr(), dmem.step, shrunk.step);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
// integrate rows
// integrate rows
block = dim3(shrWidth, 1);
block = dim3(shrWidth, 1);
grid = dim3(shrHeight * HOG_LUV_BINS, 1);
grid = dim3(shrHeight * HOG_LUV_BINS, 1);
device::intRow<<<grid, block, 0, stream>>>((uchar*)shrunk.ptr(), (ushort*)hogluv.ptr(), shrunk.step, hogluv.step);
device::intRow<<<grid, block, 0, stream>>>((uchar*)shrunk.ptr(), (ushort*)hogluv.ptr(),
shrunk.step, hogluv.step / sizeof(ushort));
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
// integrate cols
// integrate cols
block = dim3(128, 1);
block = dim3(128, 1);
grid = dim3(shrWidth * HOG_LUV_BINS, 1);
grid = dim3(shrWidth * HOG_LUV_BINS, 1);
device::intCol<<<grid, block, 0, stream>>>((ushort*)hogluv.ptr(), hogluv.step);
device::intCol<<<grid, block, 0, stream>>>((ushort*)hogluv.ptr(), hogluv.step
/ hogluv.step / sizeof(ushort)
);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaGetLastError() );
}
}
...
...
modules/gpu/src/icf.hpp
View file @
08b4e780
...
@@ -105,6 +105,7 @@ struct ChannelStorage
...
@@ -105,6 +105,7 @@ struct ChannelStorage
};
};
int
shrinkage
;
int
shrinkage
;
static
const
float
magnitudeScaling
=
1.
f
;
// / sqrt(2);
};
};
struct
__align__
(
16
)
Octave
struct
__align__
(
16
)
Octave
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment