Commit b52fea7f authored by marina.kolpakova's avatar marina.kolpakova

update soft cascade interface: - add class Detection in interface, - split…

update soft cascade interface:  - add class Detection in interface,  - split sync- and async- versions,  - add support for detecting at the specific scale.
parent 612a2585
...@@ -1537,6 +1537,18 @@ public: ...@@ -1537,6 +1537,18 @@ public:
class CV_EXPORTS SoftCascade class CV_EXPORTS SoftCascade
{ {
public: public:
struct CV_EXPORTS Detection
{
ushort x;
ushort y;
ushort w;
ushort h;
float confidence;
int kind;
enum {PEDESTRIAN = 0};
};
//! An empty cascade will be created. //! An empty cascade will be created.
SoftCascade(); SoftCascade();
...@@ -1559,9 +1571,19 @@ public: ...@@ -1559,9 +1571,19 @@ public:
//! Param rois is a mask //! Param rois is a mask
//! Param objects 4-channel matrix thet contain detected rectangles //! Param objects 4-channel matrix thet contain detected rectangles
//! Param rejectfactor used for final object box computing //! Param rejectfactor used for final object box computing
//! Param stream
virtual void detectMultiScale(const GpuMat& image, const GpuMat& rois, GpuMat& objects, virtual void detectMultiScale(const GpuMat& image, const GpuMat& rois, GpuMat& objects,
int rejectfactor = 1, Stream stream = Stream::Null()); int rejectfactor = 1, int specificScale = -1);
//! detect specific objects on in the input frame for all scales computed flom minScale and maxscale values.
//! asynchronous version.
//! Param image is input frame for detector. Cascade will be applied to it.
//! Param rois is a mask
//! Param objects 4-channel matrix thet contain detected rectangles
//! Param rejectfactor used for final object box computing
//! Param ndet retrieves number of detections
//! Param stream wrapper for CUDA stream
virtual void detectMultiScale(const GpuMat& image, const GpuMat& rois, GpuMat& objects,
int rejectfactor, GpuMat& ndet, Stream stream);
private: private:
struct Filds; struct Filds;
......
...@@ -105,7 +105,7 @@ namespace icf { ...@@ -105,7 +105,7 @@ namespace icf {
float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y); float sarea = (scaledRect.z - scaledRect.x) * (scaledRect.w - scaledRect.y);
const float expected_new_area = farea * relScale * relScale; const float expected_new_area = farea * relScale * relScale;
float approx = sarea / expected_new_area; float approx = __fdividef(sarea, expected_new_area);
dprintf("%d: new rect: %d box %d %d %d %d rel areas %f %f\n",threadIdx.x, (node.threshold >> 28), dprintf("%d: new rect: %d box %d %d %d %d rel areas %f %f\n",threadIdx.x, (node.threshold >> 28),
scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea); scaledRect.x, scaledRect.y, scaledRect.z, scaledRect.w, farea * relScale * relScale, sarea);
...@@ -198,12 +198,13 @@ namespace icf { ...@@ -198,12 +198,13 @@ namespace icf {
// } // }
__global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages, __global__ void test_kernel_warp(const Level* levels, const Octave* octaves, const float* stages,
const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr) const Node* nodes, const float* leaves, Detection* objects, const uint ndetections, uint* ctr,
const int downscales)
{ {
const int y = blockIdx.y * blockDim.y + threadIdx.y; const int y = blockIdx.y * blockDim.y + threadIdx.y;
const int x = blockIdx.x; const int x = blockIdx.x;
Level level = levels[blockIdx.z]; Level level = levels[downscales + blockIdx.z];
if(x >= level.workRect.x || y >= level.workRect.y) return; if(x >= level.workRect.x || y >= level.workRect.y) return;
...@@ -236,7 +237,7 @@ namespace icf { ...@@ -236,7 +237,7 @@ namespace icf {
dprintf("%d: decided: %d (%d >= %f) %d %f\n\n" ,threadIdx.x, next, sum, threshold, lShift, impact); dprintf("%d: decided: %d (%d >= %f) %d %f\n\n" ,threadIdx.x, next, sum, threshold, lShift, impact);
dprintf("%d: extracted stage: %f\n",threadIdx.x, stages[(st + threadIdx.x)]); dprintf("%d: extracted stage: %f\n",threadIdx.x, stages[(st + threadIdx.x)]);
dprintf("%d: computed score: %f\n",threadIdx.x, impact); dprintf("%d: computed score: %f\n",threadIdx.x, impact);
#pragma unroll
// scan on shuffl functions // scan on shuffl functions
for (int i = 1; i < 32; i *= 2) for (int i = 1; i < 32; i *= 2)
{ {
...@@ -263,13 +264,13 @@ namespace icf { ...@@ -263,13 +264,13 @@ namespace icf {
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv,
PtrStepSz<uchar4> objects, PtrStepSzi counter) PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales)
{ {
int fw = 160; int fw = 160;
int fh = 120; int fh = 120;
dim3 block(32, 8); dim3 block(32, 8);
dim3 grid(fw, fh / 8, 47); dim3 grid(fw, fh / 8, downscales);
const Level* l = (const Level*)levels.ptr(); const Level* l = (const Level*)levels.ptr();
const Octave* oct = ((const Octave*)octaves.ptr()); const Octave* oct = ((const Octave*)octaves.ptr());
...@@ -283,8 +284,38 @@ namespace icf { ...@@ -283,8 +284,38 @@ namespace icf {
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step)); cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr); test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, 0);
cudaSafeCall( cudaGetLastError());
grid = dim3(fw, fh / 8, 47 - downscales);
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, downscales);
cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaDeviceSynchronize());
}
void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
PtrStepSzi counter)
{
int fw = 160;
int fh = 120;
dim3 block(32, 8);
dim3 grid(fw, fh / 8, 1);
const Level* l = (const Level*)levels.ptr();
const Octave* oct = ((const Octave*)octaves.ptr());
const float* st = (const float*)stages.ptr();
const Node* nd = (const Node*)nodes.ptr();
const float* lf = (const float*)leaves.ptr();
uint* ctr = (uint*)counter.ptr();
Detection* det = (Detection*)objects.ptr();
uint max_det = objects.cols / sizeof(Detection);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
cudaSafeCall( cudaBindTexture2D(0, thogluv, hogluv.data, desc, hogluv.cols, hogluv.rows, hogluv.step));
test_kernel_warp<<<grid, block>>>(l, oct, st, nd, lf, det, max_det, ctr, scale);
cudaSafeCall( cudaGetLastError()); cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaDeviceSynchronize()); cudaSafeCall( cudaDeviceSynchronize());
} }
......
...@@ -49,7 +49,11 @@ cv::gpu::SoftCascade::SoftCascade() : filds(0) { throw_nogpu(); } ...@@ -49,7 +49,11 @@ cv::gpu::SoftCascade::SoftCascade() : filds(0) { throw_nogpu(); }
cv::gpu::SoftCascade::SoftCascade( const string&, const float, const float) : filds(0) { throw_nogpu(); } cv::gpu::SoftCascade::SoftCascade( const string&, const float, const float) : filds(0) { throw_nogpu(); }
cv::gpu::SoftCascade::~SoftCascade() { throw_nogpu(); } cv::gpu::SoftCascade::~SoftCascade() { throw_nogpu(); }
bool cv::gpu::SoftCascade::load( const string&, const float, const float) { throw_nogpu(); return false; } bool cv::gpu::SoftCascade::load( const string&, const float, const float) { throw_nogpu(); return false; }
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, const int, Stream) { throw_nogpu();} void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, const int, int) { throw_nogpu();}
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, int, GpuMat&, Stream)
{
throw_nogpu();
}
#else #else
...@@ -60,6 +64,9 @@ namespace icf { ...@@ -60,6 +64,9 @@ namespace icf {
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle, void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
const int fw, const int fh, const int bins); const int fw, const int fh, const int bins);
void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages, void detect(const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
PtrStepSzi counter, const int downscales);
void detectAtScale(const int scale, const PtrStepSzb& levels, const PtrStepSzb& octaves, const PtrStepSzf& stages,
const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects, const PtrStepSzb& nodes, const PtrStepSzf& leaves, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
PtrStepSzi counter); PtrStepSzi counter);
} }
...@@ -86,6 +93,8 @@ struct cv::gpu::SoftCascade::Filds ...@@ -86,6 +93,8 @@ struct cv::gpu::SoftCascade::Filds
int origObjWidth; int origObjWidth;
int origObjHeight; int origObjHeight;
int downscales;
GpuMat octaves; GpuMat octaves;
GpuMat stages; GpuMat stages;
GpuMat nodes; GpuMat nodes;
...@@ -120,7 +129,6 @@ struct cv::gpu::SoftCascade::Filds ...@@ -120,7 +129,6 @@ struct cv::gpu::SoftCascade::Filds
FRAME_WIDTH = 640, FRAME_WIDTH = 640,
FRAME_HEIGHT = 480, FRAME_HEIGHT = 480,
TOTAL_SCALES = 55, TOTAL_SCALES = 55,
// CLASSIFIERS = 5,
ORIG_OBJECT_WIDTH = 64, ORIG_OBJECT_WIDTH = 64,
ORIG_OBJECT_HEIGHT = 128, ORIG_OBJECT_HEIGHT = 128,
HOG_BINS = 6, HOG_BINS = 6,
...@@ -132,7 +140,14 @@ struct cv::gpu::SoftCascade::Filds ...@@ -132,7 +140,14 @@ struct cv::gpu::SoftCascade::Filds
void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const void detect(cv::gpu::GpuMat objects, cudaStream_t stream) const
{ {
cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int)); cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int));
device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects , detCounter); device::icf::detect(levels, octaves, stages, nodes, leaves, hogluv, objects , detCounter, downscales);
}
void detectAtScale(int scale, cv::gpu::GpuMat objects, cudaStream_t stream) const
{
cudaMemset(detCounter.data, 0, detCounter.step * detCounter.rows * sizeof(int));
device::icf::detectAtScale(scale, levels, octaves, stages, nodes, leaves, hogluv, objects,
detCounter);
} }
private: private:
...@@ -160,7 +175,7 @@ private: ...@@ -160,7 +175,7 @@ private:
} }
}; };
inline bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs) bool cv::gpu::SoftCascade::Filds::fill(const FileNode &root, const float mins, const float maxs)
{ {
using namespace device::icf; using namespace device::icf;
minScale = mins; minScale = mins;
...@@ -351,6 +366,7 @@ inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<device::ic ...@@ -351,6 +366,7 @@ inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<device::ic
float logFactor = (::log(maxScale) - ::log(minScale)) / (nscales -1); float logFactor = (::log(maxScale) - ::log(minScale)) / (nscales -1);
float scale = minScale; float scale = minScale;
downscales = 0;
for (int sc = 0; sc < nscales; ++sc) for (int sc = 0; sc < nscales; ++sc)
{ {
int width = ::std::max(0.0f, frameW - (origObjWidth * scale)); int width = ::std::max(0.0f, frameW - (origObjWidth * scale));
...@@ -366,7 +382,10 @@ inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<device::ic ...@@ -366,7 +382,10 @@ inline void cv::gpu::SoftCascade::Filds::calcLevels(const std::vector<device::ic
if (!width || !height) if (!width || !height)
break; break;
else else
{
vlevels.push_back(level); vlevels.push_back(level);
if (octs[fit].scale < 1) ++downscales;
}
if (::fabs(scale - maxScale) < FLT_EPSILON) break; if (::fabs(scale - maxScale) < FLT_EPSILON) break;
scale = ::std::min(maxScale, ::expf(::log(scale) + logFactor)); scale = ::std::min(maxScale, ::expf(::log(scale) + logFactor));
...@@ -424,8 +443,11 @@ namespace { ...@@ -424,8 +443,11 @@ namespace {
return s; return s;
} }
} }
//================================== synchronous version ============================================================//
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& /*rois*/, void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& /*rois*/,
GpuMat& objects, const int /*rejectfactor*/, Stream s) GpuMat& objects, const int /*rejectfactor*/, int specificScale)
{ {
// only color images are supperted // only color images are supperted
CV_Assert(colored.type() == CV_8UC3); CV_Assert(colored.type() == CV_8UC3);
...@@ -513,11 +535,21 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat& ...@@ -513,11 +535,21 @@ void cv::gpu::SoftCascade::detectMultiScale(const GpuMat& colored, const GpuMat&
} }
#endif #endif
cudaStream_t stream = StreamAccessor::getStream(s); if (specificScale == -1)
flds.detect(objects, stream); flds.detect(objects, 0);
else
flds.detectAtScale(specificScale, objects, 0);
cv::Mat out(flds.detCounter);
int ndetections = *(out.data);
// cv::Mat out(flds.detCounter); objects = GpuMat(objects, cv::Rect(0, 0, ndetections * sizeof(Detection), 1));
// std::cout << out << std::endl;
} }
void cv::gpu::SoftCascade::detectMultiScale(const GpuMat&, const GpuMat&, GpuMat&, int, GpuMat&, Stream)
{
// cudaStream_t stream = StreamAccessor::getStream(s);
}
#endif #endif
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment