Commit aa92be34 authored by marina.kolpakova's avatar marina.kolpakova

GK107 Policy

parent 580d8173
This diff is collapsed.
...@@ -72,9 +72,9 @@ struct __align__(16) Octave ...@@ -72,9 +72,9 @@ struct __align__(16) Octave
struct __align__(8) Level //is actually 24 bytes struct __align__(8) Level //is actually 24 bytes
{ {
int octave; int octave;
int step;
float relScale; float relScale;
float shrScale; // used for marking detection
float scaling[2]; // calculated according to Dollal paper float scaling[2]; // calculated according to Dollal paper
// for 640x480 we can not get overflow // for 640x480 we can not get overflow
...@@ -115,31 +115,41 @@ struct __align__(16) Detection ...@@ -115,31 +115,41 @@ struct __align__(16) Detection
: x(_x), y(_y), w(_w), h(_h), confidence(c), kind(0) {}; : x(_x), y(_y), w(_w), h(_h), confidence(c), kind(0) {};
}; };
struct CascadePolicy struct GK107PolicyX4
{ {
enum {STA_X = 32, STA_Y = 8}; enum {WARP = 32, STA_X = WARP, STA_Y = 8, SHRINKAGE = 4};
static const dim3 block()
{
return dim3(GK107PolicyX4::STA_X, GK107PolicyX4::STA_Y);
}
}; };
template<typename Policy> template<typename Policy>
struct CascadeInvoker struct CascadeInvoker
{ {
CascadeInvoker(): levels(0), octaves(0), stages(0), nodes(0), leaves(0) {} CascadeInvoker(): levels(0), stages(0), nodes(0), leaves(0), scales(0) {}
CascadeInvoker(const PtrStepSzb& _levels, const PtrStepSzb& _octaves, const PtrStepSzf& _stages, CascadeInvoker(const PtrStepSzb& _levels, const PtrStepSzb& _octaves, const PtrStepSzf& _stages,
const PtrStepSzb& _nodes, const PtrStepSzf& _leaves) const PtrStepSzb& _nodes, const PtrStepSzf& _leaves)
: levels((const Level*)_levels.ptr()), octaves((const Octave*)_octaves.ptr()), stages((const float*)_stages.ptr()), : levels((const Level*)_levels.ptr()),
nodes((const Node*)_nodes.ptr()), leaves((const float*)_leaves.ptr()) stages((const float*)_stages.ptr()),
nodes((const Node*)_nodes.ptr()), leaves((const float*)_leaves.ptr()),
scales(_levels.cols / sizeof(Level))
{} {}
const Level* levels; const Level* levels;
const Octave* octaves;
const float* stages; const float* stages;
const Node* nodes; const Node* nodes;
const float* leaves; const float* leaves;
int scales;
void operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects, void operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
PtrStepSzi counter, const int downscales, const int csale = -1, const cudaStream_t& stream = 0) const; PtrStepSzi counter, const int downscales, const int csale = -1, const cudaStream_t& stream = 0) const;
template<bool isUp>
__device void detect(Detection* objects, const uint ndetections, uint* ctr, const int downscales) const;
}; };
} }
......
...@@ -63,7 +63,7 @@ void cv::gpu::SCascade::read(const FileNode& fn) { Algorithm::read(fn); } ...@@ -63,7 +63,7 @@ void cv::gpu::SCascade::read(const FileNode& fn) { Algorithm::read(fn); }
#include <icf.hpp> #include <icf.hpp>
cv::gpu::device::icf::Level::Level(int idx, const Octave& oct, const float scale, const int w, const int h) cv::gpu::device::icf::Level::Level(int idx, const Octave& oct, const float scale, const int w, const int h)
: octave(idx), relScale(scale / oct.scale), shrScale (relScale / (float)oct.shrinkage) : octave(idx), step(oct.stages), relScale(scale / oct.scale)
{ {
workRect.x = round(w / (float)oct.shrinkage); workRect.x = round(w / (float)oct.shrinkage);
workRect.y = round(h / (float)oct.shrinkage); workRect.y = round(h / (float)oct.shrinkage);
...@@ -100,7 +100,7 @@ namespace imgproc { ...@@ -100,7 +100,7 @@ namespace imgproc {
struct cv::gpu::SCascade::Fields struct cv::gpu::SCascade::Fields
{ {
static Fields* parseCascade(const FileNode &root, const float mins, const float maxs) static Fields* parseCascade(const FileNode &root, const float mins, const float maxs, const int totals)
{ {
static const char *const SC_STAGE_TYPE = "stageType"; static const char *const SC_STAGE_TYPE = "stageType";
static const char *const SC_BOOST = "BOOST"; static const char *const SC_BOOST = "BOOST";
...@@ -119,11 +119,8 @@ struct cv::gpu::SCascade::Fields ...@@ -119,11 +119,8 @@ struct cv::gpu::SCascade::Fields
static const char *const SC_ORIG_W = "width"; static const char *const SC_ORIG_W = "width";
static const char *const SC_ORIG_H = "height"; static const char *const SC_ORIG_H = "height";
int origWidth = (int)root[SC_ORIG_W]; int origWidth = (int)root[SC_ORIG_W];
CV_Assert(origWidth == ORIG_OBJECT_WIDTH);
int origHeight = (int)root[SC_ORIG_H]; int origHeight = (int)root[SC_ORIG_H];
CV_Assert(origHeight == ORIG_OBJECT_HEIGHT);
static const char *const SC_OCTAVES = "octaves"; static const char *const SC_OCTAVES = "octaves";
static const char *const SC_STAGES = "stages"; static const char *const SC_STAGES = "stages";
...@@ -142,7 +139,6 @@ struct cv::gpu::SCascade::Fields ...@@ -142,7 +139,6 @@ struct cv::gpu::SCascade::Fields
static const char * const SC_F_CHANNEL = "channel"; static const char * const SC_F_CHANNEL = "channel";
static const char * const SC_F_RECT = "rect"; static const char * const SC_F_RECT = "rect";
FileNode fn = root[SC_OCTAVES]; FileNode fn = root[SC_OCTAVES];
if (fn.empty()) return false; if (fn.empty()) return false;
...@@ -167,8 +163,8 @@ struct cv::gpu::SCascade::Fields ...@@ -167,8 +163,8 @@ struct cv::gpu::SCascade::Fields
ushort nstages = saturate_cast<ushort>((int)fns[SC_OCT_STAGES]); ushort nstages = saturate_cast<ushort>((int)fns[SC_OCT_STAGES]);
ushort2 size; ushort2 size;
size.x = cvRound(ORIG_OBJECT_WIDTH * scale); size.x = cvRound(origWidth * scale);
size.y = cvRound(ORIG_OBJECT_HEIGHT * scale); size.y = cvRound(origHeight * scale);
shrinkage = saturate_cast<ushort>((int)fns[SC_OCT_SHRINKAGE]); shrinkage = saturate_cast<ushort>((int)fns[SC_OCT_SHRINKAGE]);
Octave octave(octIndex, nstages, shrinkage, size, scale); Octave octave(octIndex, nstages, shrinkage, size, scale);
...@@ -245,11 +241,11 @@ struct cv::gpu::SCascade::Fields ...@@ -245,11 +241,11 @@ struct cv::gpu::SCascade::Fields
CV_Assert(!hleaves.empty()); CV_Assert(!hleaves.empty());
std::vector<Level> vlevels; std::vector<Level> vlevels;
float logFactor = (::log(maxs) - ::log(mins)) / (TOTAL_SCALES -1); float logFactor = (::log(maxs) - ::log(mins)) / (totals -1);
float scale = mins; float scale = mins;
int downscales = 0; int downscales = 0;
for (int sc = 0; sc < TOTAL_SCALES; ++sc) for (int sc = 0; sc < totals; ++sc)
{ {
int width = ::std::max(0.0f, FRAME_WIDTH - (origWidth * scale)); int width = ::std::max(0.0f, FRAME_WIDTH - (origWidth * scale));
int height = ::std::max(0.0f, FRAME_HEIGHT - (origHeight * scale)); int height = ::std::max(0.0f, FRAME_HEIGHT - (origHeight * scale));
...@@ -302,7 +298,7 @@ struct cv::gpu::SCascade::Fields ...@@ -302,7 +298,7 @@ struct cv::gpu::SCascade::Fields
leaves.upload(hleaves); leaves.upload(hleaves);
levels.upload(hlevels); levels.upload(hlevels);
invoker = device::icf::CascadeInvoker<device::icf::CascadePolicy>(levels, octaves, stages, nodes, leaves); invoker = device::icf::CascadeInvoker<device::icf::GK107PolicyX4>(levels, octaves, stages, nodes, leaves);
} }
...@@ -456,16 +452,13 @@ public: ...@@ -456,16 +452,13 @@ public:
GpuMat sobelBuf; GpuMat sobelBuf;
device::icf::CascadeInvoker<device::icf::CascadePolicy> invoker; device::icf::CascadeInvoker<device::icf::GK107PolicyX4> invoker;
enum { BOOST = 0 }; enum { BOOST = 0 };
enum enum
{ {
FRAME_WIDTH = 640, FRAME_WIDTH = 640,
FRAME_HEIGHT = 480, FRAME_HEIGHT = 480,
TOTAL_SCALES = 55,
ORIG_OBJECT_WIDTH = 64,
ORIG_OBJECT_HEIGHT = 128,
HOG_BINS = 6, HOG_BINS = 6,
LUV_BINS = 3, LUV_BINS = 3,
HOG_LUV_BINS = 10 HOG_LUV_BINS = 10
...@@ -480,21 +473,19 @@ cv::gpu::SCascade::~SCascade() { delete fields; } ...@@ -480,21 +473,19 @@ cv::gpu::SCascade::~SCascade() { delete fields; }
bool cv::gpu::SCascade::load(const FileNode& fn) bool cv::gpu::SCascade::load(const FileNode& fn)
{ {
if (fields) delete fields; if (fields) delete fields;
fields = Fields::parseCascade(fn, minScale, maxScale); fields = Fields::parseCascade(fn, minScale, maxScale, scales);
return fields != 0; return fields != 0;
} }
void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, Stream& s) const void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, Stream& s) const
{ {
CV_Assert(fields);
const GpuMat colored = image.getGpuMat(); const GpuMat colored = image.getGpuMat();
// only color images are supperted // only color images are supperted
CV_Assert(colored.type() == CV_8UC3 || colored.type() == CV_32SC1); CV_Assert(colored.type() == CV_8UC3 || colored.type() == CV_32SC1);
GpuMat rois = _rois.getGpuMat(), objects = _objects.getGpuMat(); GpuMat rois = _rois.getGpuMat(), objects = _objects.getGpuMat();
// we guess user knows about shrincage
// CV_Assert((rois.size().width == getRoiSize().height) && (rois.type() == CV_8UC1));
Fields& flds = *fields; Fields& flds = *fields;
if (colored.type() == CV_8UC3) if (colored.type() == CV_8UC3)
...@@ -518,15 +509,13 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _ ...@@ -518,15 +509,13 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, const int level, Stream& s) const void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, const int level, Stream& s) const
{ {
CV_Assert(fields);
const GpuMat colored = image.getGpuMat(); const GpuMat colored = image.getGpuMat();
// only color images are supperted // only color images are supperted
CV_Assert(colored.type() == CV_8UC3 || colored.type() == CV_32SC1); CV_Assert(colored.type() == CV_8UC3 || colored.type() == CV_32SC1);
// we guess user knows about shrincage
// CV_Assert((rois.size().width == getRoiSize().height) && (rois.type() == CV_8UC1));
Fields& flds = *fields; Fields& flds = *fields;
if (colored.type() == CV_8UC3) if (colored.type() == CV_8UC3)
{ {
// only this window size allowed // only this window size allowed
...@@ -549,6 +538,8 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _ ...@@ -549,6 +538,8 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const
{ {
CV_Assert(fields);
const GpuMat roi = _roi.getGpuMat(); const GpuMat roi = _roi.getGpuMat();
_mask.create( roi.cols / 4, roi.rows / 4, roi.type() ); _mask.create( roi.cols / 4, roi.rows / 4, roi.type() );
GpuMat mask = _mask.getGpuMat(); GpuMat mask = _mask.getGpuMat();
......
...@@ -172,7 +172,7 @@ GPU_TEST_P(SCascadeTestRoi, detect, ...@@ -172,7 +172,7 @@ GPU_TEST_P(SCascadeTestRoi, detect,
sub.setTo(1); sub.setTo(1);
cv::rectangle(result, r, cv::Scalar(0, 0, 255, 255), 1); cv::rectangle(result, r, cv::Scalar(0, 0, 255, 255), 1);
} }
objectBoxes.setTo(0);
cascade.genRoi(rois, trois); cascade.genRoi(rois, trois);
cascade.detect(colored, trois, objectBoxes); cascade.detect(colored, trois, objectBoxes);
...@@ -222,7 +222,7 @@ GPU_TEST_P(SCascadeTestLevel, detect, ...@@ -222,7 +222,7 @@ GPU_TEST_P(SCascadeTestLevel, detect,
cv::gpu::GpuMat trois; cv::gpu::GpuMat trois;
cascade.genRoi(rois, trois); cascade.genRoi(rois, trois);
objectBoxes.setTo(0);
int level = GET_PARAM(3); int level = GET_PARAM(3);
cascade.detect(colored, trois, objectBoxes, level); cascade.detect(colored, trois, objectBoxes, level);
...@@ -281,7 +281,7 @@ GPU_TEST_P(SCascadeTestAll, detect, ...@@ -281,7 +281,7 @@ GPU_TEST_P(SCascadeTestAll, detect,
cv::gpu::GpuMat trois; cv::gpu::GpuMat trois;
cascade.genRoi(rois, trois); cascade.genRoi(rois, trois);
objectBoxes.setTo(0);
cascade.detect(colored, trois, objectBoxes); cascade.detect(colored, trois, objectBoxes);
typedef cv::gpu::SCascade::Detection Detection; typedef cv::gpu::SCascade::Detection Detection;
...@@ -321,7 +321,7 @@ GPU_TEST_P(SCascadeTestAll, detectOnIntegral, ...@@ -321,7 +321,7 @@ GPU_TEST_P(SCascadeTestAll, detectOnIntegral,
cv::gpu::GpuMat trois; cv::gpu::GpuMat trois;
cascade.genRoi(rois, trois); cascade.genRoi(rois, trois);
objectBoxes.setTo(0);
cascade.detect(hogluv, trois, objectBoxes); cascade.detect(hogluv, trois, objectBoxes);
typedef cv::gpu::SCascade::Detection Detection; typedef cv::gpu::SCascade::Detection Detection;
...@@ -357,7 +357,7 @@ GPU_TEST_P(SCascadeTestAll, detectStream, ...@@ -357,7 +357,7 @@ GPU_TEST_P(SCascadeTestAll, detectStream,
cv::gpu::GpuMat trois; cv::gpu::GpuMat trois;
cascade.genRoi(rois, trois, s); cascade.genRoi(rois, trois, s);
objectBoxes.setTo(0);
cascade.detect(colored, trois, objectBoxes, s); cascade.detect(colored, trois, objectBoxes, s);
cudaDeviceSynchronize(); cudaDeviceSynchronize();
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment