Commit 5d15e4ea authored by marina.kolpakova's avatar marina.kolpakova

CUDA kernels interface

parent 1a52a322
...@@ -42,13 +42,90 @@ ...@@ -42,13 +42,90 @@
#include <icf.hpp> #include <icf.hpp>
void icf::Cascade::detect(const cv::gpu::PtrStepSzb& hogluv) const namespace cv { namespace gpu {
namespace device {
__global__ void rgb2grayluv(const uchar3* __restrict__ rgb, uchar* __restrict__ hog,
const int rgbPitch, const int hogPitch)
{
}
__global__ void gray2hog(const uchar* __restrict__ gray, uchar* __restrict__ hog,
const int pitch)
{
}
__global__ void decimate(const uchar* __restrict__ hogluv, uchar* __restrict__ shrank,
const int inPitch, const int outPitch )
{
}
__global__ void intRow(const uchar* __restrict__ hogluv, ushort* __restrict__ sum,
const int inPitch, const int outPitch)
{
}
__global__ void intCol(ushort* __restrict__ sum, const int pitch)
{
}
__global__ void detect(const cv::gpu::icf::Cascade cascade, const uchar* __restrict__ hogluv, const int pitch)
{
cascade.detectAt();
}
}
void __device icf::Cascade::detectAt() const
{
}
void icf::Cascade::detect(const cv::gpu::PtrStepSzb& hogluv, cudaStream_t stream) const
{ {
// detection kernel // detection kernel
} }
void icf::ChannelStorage::frame(const cv::gpu::PtrStepSz<uchar4>& image) void icf::ChannelStorage::frame(const cv::gpu::PtrStepSz<uchar3>& rgb, cudaStream_t stream)
{ {
// color convertin kernel // color convertin kernel
dim3 block(32, 8);
dim3 grid(FRAME_WIDTH / 32, FRAME_HEIGHT / 8);
uchar * channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_BINS);
device::rgb2grayluv<<<grid, block, 0, stream>>>((uchar3*)rgb.ptr(), channels, rgb.step, dmem.step);
cudaSafeCall( cudaGetLastError());
// hog calculation kernel // hog calculation kernel
channels = (uchar*)dmem.ptr(FRAME_HEIGHT * HOG_LUV_BINS);
device::gray2hog<<<grid, block, 0, stream>>>(channels, (uchar*)dmem.ptr(), dmem.step);
cudaSafeCall( cudaGetLastError() );
const int shrWidth = FRAME_WIDTH / shrinkage;
const int shrHeight = FRAME_HEIGHT / shrinkage;
// decimate kernel
grid = dim3(shrWidth / 32, shrHeight / 8);
device::decimate<<<grid, block, 0, stream>>>((uchar*)dmem.ptr(), (uchar*)shrunk.ptr(), dmem.step, shrunk.step);
cudaSafeCall( cudaGetLastError() );
// integrate rows
block = dim3(shrWidth, 1);
grid = dim3(shrHeight * HOG_LUV_BINS, 1);
device::intRow<<<grid, block, 0, stream>>>((uchar*)shrunk.ptr(), (ushort*)hogluv.ptr(), shrunk.step, hogluv.step);
cudaSafeCall( cudaGetLastError() );
// integrate cols
block = dim3(128, 1);
grid = dim3(shrWidth * HOG_LUV_BINS, 1);
device::intCol<<<grid, block, 0, stream>>>((ushort*)hogluv.ptr(), hogluv.step);
cudaSafeCall( cudaGetLastError() );
} }
}}
\ No newline at end of file
...@@ -46,17 +46,19 @@ ...@@ -46,17 +46,19 @@
#define __OPENCV_ICF_HPP__ #define __OPENCV_ICF_HPP__
#if defined __CUDACC__ #if defined __CUDACC__
# define __hd__ __host__ __device__ __forceinline__ # define __device __device__ __forceinline__
#else #else
# define __hd__ # define __device
#endif #endif
namespace icf { namespace cv { namespace gpu { namespace icf {
using cv::gpu::PtrStepSzb; using cv::gpu::PtrStepSzb;
using cv::gpu::PtrStepSzf; using cv::gpu::PtrStepSzf;
typedef unsigned char uchar;
struct Cascade struct Cascade
{ {
Cascade() {} Cascade() {}
...@@ -64,7 +66,8 @@ struct Cascade ...@@ -64,7 +66,8 @@ struct Cascade
const cv::gpu::PtrStepSzf& lvs, const cv::gpu::PtrStepSzb& fts, const cv::gpu::PtrStepSzb& lls) const cv::gpu::PtrStepSzf& lvs, const cv::gpu::PtrStepSzb& fts, const cv::gpu::PtrStepSzb& lls)
: octaves(octs), stages(sts), nodes(nds), leaves(lvs), features(fts), levels(lls) {} : octaves(octs), stages(sts), nodes(nds), leaves(lvs), features(fts), levels(lls) {}
void detect(const cv::gpu::PtrStepSzb& hogluv) const; void detect(const cv::gpu::PtrStepSzb& hogluv, cudaStream_t stream) const;
void __device detectAt() const;
PtrStepSzb octaves; PtrStepSzb octaves;
PtrStepSzf stages; PtrStepSzf stages;
...@@ -83,12 +86,24 @@ struct ChannelStorage ...@@ -83,12 +86,24 @@ struct ChannelStorage
const cv::gpu::PtrStepSzb& itg, const int s) const cv::gpu::PtrStepSzb& itg, const int s)
: dmem (buff), shrunk(shr), hogluv(itg), shrinkage(s) {} : dmem (buff), shrunk(shr), hogluv(itg), shrinkage(s) {}
void frame(const cv::gpu::PtrStepSz<uchar4>& image); void frame(const cv::gpu::PtrStepSz<uchar3>& rgb, cudaStream_t stream);
PtrStepSzb dmem; PtrStepSzb dmem;
PtrStepSzb shrunk; PtrStepSzb shrunk;
PtrStepSzb hogluv; PtrStepSzb hogluv;
enum
{
FRAME_WIDTH = 640,
FRAME_HEIGHT = 480,
TOTAL_SCALES = 55,
CLASSIFIERS = 5,
ORIG_OBJECT_WIDTH = 64,
ORIG_OBJECT_HEIGHT = 128,
HOG_BINS = 6,
HOG_LUV_BINS = 10
};
int shrinkage; int shrinkage;
}; };
...@@ -143,6 +158,6 @@ struct __align__(8) Level //is actually 24 bytes ...@@ -143,6 +158,6 @@ struct __align__(8) Level //is actually 24 bytes
objSize.y = round(oct.size.y * relScale); objSize.y = round(oct.size.y * relScale);
} }
}; };
} }}}
#endif #endif
\ No newline at end of file
...@@ -100,9 +100,9 @@ struct cv::gpu::SoftCascade::Filds ...@@ -100,9 +100,9 @@ struct cv::gpu::SoftCascade::Filds
}; };
bool fill(const FileNode &root, const float mins, const float maxs); bool fill(const FileNode &root, const float mins, const float maxs);
void detect() const void detect(cudaStream_t stream) const
{ {
cascade.detect(hogluv); cascade.detect(hogluv, stream);
} }
private: private:
...@@ -394,18 +394,20 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c ...@@ -394,18 +394,20 @@ bool cv::gpu::SoftCascade::load( const string& filename, const float minScale, c
} }
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& image, const GpuMat& /*rois*/, void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& image, const GpuMat& /*rois*/,
GpuMat& /*objects*/, const int /*rejectfactor*/, Stream /*stream*/) GpuMat& /*objects*/, const int /*rejectfactor*/, Stream s)
{ {
// only color images are supperted // only color images are supperted
CV_Assert(image.type() == CV_8UC4); CV_Assert(image.type() == CV_8UC3);
// only this window size allowed // only this window size allowed
CV_Assert(image.cols == 640 && image.rows == 480); CV_Assert(image.cols == 640 && image.rows == 480);
Filds& flds = *filds; Filds& flds = *filds;
flds.storage.frame(image); cudaStream_t stream = StreamAccessor::getStream(s);
flds.detect();
flds.storage.frame(image, stream);
flds.detect(stream);
} }
#endif #endif
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment