Commit 60c0e41b authored by marina.kolpakova's avatar marina.kolpakova

integrate NMS (Dollar's criteria)

parent d2e88e1d
...@@ -88,19 +88,23 @@ namespace icf { ...@@ -88,19 +88,23 @@ namespace icf {
return (w < 0 || h < 0)? 0.f : (float)(w * h); return (w < 0 || h < 0)? 0.f : (float)(w * h);
} }
__global__ void overlap(const uint* n, const Detection* detections, uchar* overlaps) texture<uint4, cudaTextureType2D, cudaReadModeElementType> tdetections;
__global__ void overlap(const uint* n, uchar* overlaps)
{ {
const int idx = threadIdx.x; const int idx = threadIdx.x;
const int total = *n; const int total = *n;
for (int i = idx; i < total; i += 192) for (int i = idx + 1; i < total; i += 192)
{ {
const Detection& a = detections[i]; const uint4 _a = tex2D(tdetections, i, 0);
const Detection& a = *((Detection*)(&_a));
bool excluded = false; bool excluded = false;
for (int j = i + 1; j < total; ++j) for (int j = i + 1; j < total; ++j)
{ {
const Detection& b = detections[j]; const uint4 _b = tex2D(tdetections, j, 0);
const Detection& b = *((Detection*)(&_b));
float ovl = overlapArea(a, b) / ::min(a.w * a.h, b.w * b.h); float ovl = overlapArea(a, b) / ::min(a.w * a.h, b.w * b.h);
if (ovl > 0.65f) if (ovl > 0.65f)
...@@ -115,7 +119,7 @@ namespace icf { ...@@ -115,7 +119,7 @@ namespace icf {
} }
} }
__global__ void collect(const uint* n, const Detection* detections, uchar* overlaps) __global__ void collect(const uint* n, uchar* overlaps, uint* ctr, uint4* suppressed)
{ {
const int idx = threadIdx.x; const int idx = threadIdx.x;
const int total = *n; const int total = *n;
...@@ -124,19 +128,24 @@ namespace icf { ...@@ -124,19 +128,24 @@ namespace icf {
{ {
if (!overlaps[i]) if (!overlaps[i])
{ {
const Detection& det = detections[i]; int oidx = atomicInc(ctr, 50);
// printf("%d: %d %d %d %d %f\n", i, det.x, det.y, det.w, det.h, det.confidence ); suppressed[oidx] = tex2D(tdetections, i + 1, 0);
} }
} }
} }
void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections) void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed)
{ {
int block = 192; int block = 192;
int grid = 1; int grid = 1;
overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0)); cudaChannelFormatDesc desc = cudaCreateChannelDesc<uint4>();
collect<<<grid, block>>>((uint*)ndetections.ptr(0), (Detection*)objects.ptr(0), (uchar*)overlaps.ptr(0)); size_t offset;
cudaSafeCall( cudaBindTexture2D(&offset, tdetections, objects.data, desc, objects.cols / sizeof(uint4), objects.rows, objects.step));
overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0));
collect<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0), (uint*)suppressed.ptr(0), ((uint4*)suppressed.ptr(0)) + 1);
// if (!stream) // if (!stream)
{ {
cudaSafeCall( cudaGetLastError()); cudaSafeCall( cudaGetLastError());
......
...@@ -86,7 +86,7 @@ namespace icf { ...@@ -86,7 +86,7 @@ namespace icf {
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle, void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
const int fw, const int fh, const int bins, cudaStream_t stream); const int fw, const int fh, const int bins, cudaStream_t stream);
void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections); void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed);
} }
namespace imgproc { namespace imgproc {
...@@ -312,6 +312,7 @@ struct cv::gpu::SCascade::Fields ...@@ -312,6 +312,7 @@ struct cv::gpu::SCascade::Fields
hogluv.setTo(cv::Scalar::all(0)); hogluv.setTo(cv::Scalar::all(0));
overlaps.create(1, 5000, CV_8UC1); overlaps.create(1, 5000, CV_8UC1);
suppressed.create(1, sizeof(Detection) * 51, CV_8UC1);
return true; return true;
} }
...@@ -447,7 +448,9 @@ public: ...@@ -447,7 +448,9 @@ public:
{ {
ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps); ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
overlaps.setTo(0); overlaps.setTo(0);
device::icf::suppress(objects, overlaps, ndetections); suppressed.setTo(0);
device::icf::suppress(objects, overlaps, ndetections, suppressed);
// std::cout << cv::Mat(overlaps) << std::endl; // std::cout << cv::Mat(overlaps) << std::endl;
} }
...@@ -484,6 +487,9 @@ public: ...@@ -484,6 +487,9 @@ public:
// used for area overlap computing during // used for area overlap computing during
GpuMat overlaps; GpuMat overlaps;
// used for suppression
GpuMat suppressed;
// Cascade from xml // Cascade from xml
GpuMat octaves; GpuMat octaves;
GpuMat stages; GpuMat stages;
...@@ -525,7 +531,6 @@ bool cv::gpu::SCascade::load(const FileNode& fn) ...@@ -525,7 +531,6 @@ bool cv::gpu::SCascade::load(const FileNode& fn)
void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, Stream& s) const void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _objects, Stream& s) const
{ {
CV_Assert(fields); CV_Assert(fields);
const GpuMat colored = image.getGpuMat(); const GpuMat colored = image.getGpuMat();
// only color images are supperted // only color images are supperted
...@@ -545,6 +550,7 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _ ...@@ -545,6 +550,7 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
colored.copyTo(flds.hogluv); colored.copyTo(flds.hogluv);
} }
GpuMat spr(objects, cv::Rect(0, 0, flds.suppressed.cols, flds.suppressed.rows));
GpuMat tmp = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1)); GpuMat tmp = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1)); objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1));
...@@ -552,8 +558,11 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _ ...@@ -552,8 +558,11 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
flds.detect(rois, tmp, objects, stream); flds.detect(rois, tmp, objects, stream);
// if (rejCriteria != NO_REJECT) if (rejCriteria != NO_REJECT)
flds.suppress(tmp, objects); {
flds.suppress(tmp, objects);
flds.suppressed.copyTo(spr);
}
} }
void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const void cv::gpu::SCascade::genRoi(InputArray _roi, OutputArray _mask, Stream& stream) const
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment