Commit bd3179bd authored by marina.kolpakova's avatar marina.kolpakova

fix CUDA support for streams for NMS; refactor tests

parent 60c0e41b
...@@ -134,7 +134,8 @@ namespace icf { ...@@ -134,7 +134,8 @@ namespace icf {
} }
} }
void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed) void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections,
PtrStepSzb suppressed, cudaStream_t stream)
{ {
int block = 192; int block = 192;
int grid = 1; int grid = 1;
...@@ -146,7 +147,7 @@ namespace icf { ...@@ -146,7 +147,7 @@ namespace icf {
overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0)); overlap<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0));
collect<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0), (uint*)suppressed.ptr(0), ((uint4*)suppressed.ptr(0)) + 1); collect<<<grid, block>>>((uint*)ndetections.ptr(0), (uchar*)overlaps.ptr(0), (uint*)suppressed.ptr(0), ((uint4*)suppressed.ptr(0)) + 1);
// if (!stream) if (!stream)
{ {
cudaSafeCall( cudaGetLastError()); cudaSafeCall( cudaGetLastError());
cudaSafeCall( cudaDeviceSynchronize()); cudaSafeCall( cudaDeviceSynchronize());
...@@ -330,15 +331,15 @@ __global__ void soft_cascade(const CascadeInvoker<Policy> invoker, Detection* ob ...@@ -330,15 +331,15 @@ __global__ void soft_cascade(const CascadeInvoker<Policy> invoker, Detection* ob
template<typename Policy> template<typename Policy>
void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv, void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const cudaStream_t& stream) const PtrStepSz<uchar4> objects, const int downscales, const cudaStream_t& stream) const
{ {
int fw = roi.rows; int fw = roi.rows;
int fh = roi.cols; int fh = roi.cols;
dim3 grid(fw, fh / Policy::STA_Y, downscales); dim3 grid(fw, fh / Policy::STA_Y, downscales);
uint* ctr = (uint*)(counter.ptr(0)); uint* ctr = (uint*)(objects.ptr(0));
Detection* det = (Detection*)objects.ptr(); Detection* det = ((Detection*)objects.ptr(0)) + 1;
uint max_det = objects.cols / sizeof(Detection); uint max_det = objects.cols / sizeof(Detection);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>(); cudaChannelFormatDesc desc = cudaCreateChannelDesc<int>();
...@@ -363,7 +364,7 @@ void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi& ...@@ -363,7 +364,7 @@ void CascadeInvoker<Policy>::operator()(const PtrStepSzb& roi, const PtrStepSzi&
} }
template void CascadeInvoker<GK107PolicyX4>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv, template void CascadeInvoker<GK107PolicyX4>::operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv,
PtrStepSz<uchar4> objects, PtrStepSzi counter, const int downscales, const cudaStream_t& stream) const; PtrStepSz<uchar4> objects, const int downscales, const cudaStream_t& stream) const;
} }
}}} }}}
\ No newline at end of file
...@@ -147,7 +147,7 @@ struct CascadeInvoker ...@@ -147,7 +147,7 @@ struct CascadeInvoker
int scales; int scales;
void operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects, void operator()(const PtrStepSzb& roi, const PtrStepSzi& hogluv, PtrStepSz<uchar4> objects,
PtrStepSzi counter, const int downscales, const cudaStream_t& stream = 0) const; const int downscales, const cudaStream_t& stream = 0) const;
template<bool isUp> template<bool isUp>
__device void detect(Detection* objects, const uint ndetections, uint* ctr, const int downscales) const; __device void detect(Detection* objects, const uint ndetections, uint* ctr, const int downscales) const;
......
...@@ -86,7 +86,8 @@ namespace icf { ...@@ -86,7 +86,8 @@ namespace icf {
void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle, void fillBins(cv::gpu::PtrStepSzb hogluv, const cv::gpu::PtrStepSzf& nangle,
const int fw, const int fh, const int bins, cudaStream_t stream); const int fw, const int fh, const int bins, cudaStream_t stream);
void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections, PtrStepSzb suppressed); void suppress(const PtrStepSzb& objects, PtrStepSzb overlaps, PtrStepSzi ndetections,
PtrStepSzb suppressed, cudaStream_t stream);
} }
namespace imgproc { namespace imgproc {
...@@ -328,13 +329,20 @@ struct cv::gpu::SCascade::Fields ...@@ -328,13 +329,20 @@ struct cv::gpu::SCascade::Fields
leaves.upload(hleaves); leaves.upload(hleaves);
} }
void detect(const cv::gpu::GpuMat& roi, const cv::gpu::GpuMat& count, cv::gpu::GpuMat& objects, const cudaStream_t& stream) const void detect(const cv::gpu::GpuMat& roi, cv::gpu::GpuMat& objects, Stream& s) const
{ {
cudaMemset(count.data, 0, sizeof(Detection)); if (s)
s.enqueueMemSet(objects, 0);
else
cudaMemset(objects.data, 0, sizeof(Detection));
cudaSafeCall( cudaGetLastError()); cudaSafeCall( cudaGetLastError());
device::icf::CascadeInvoker<device::icf::GK107PolicyX4> invoker device::icf::CascadeInvoker<device::icf::GK107PolicyX4> invoker
= device::icf::CascadeInvoker<device::icf::GK107PolicyX4>(levels, stages, nodes, leaves); = device::icf::CascadeInvoker<device::icf::GK107PolicyX4>(levels, stages, nodes, leaves);
invoker(roi, hogluv, objects, count, downscales, stream);
cudaStream_t stream = StreamAccessor::getStream(s);
invoker(roi, hogluv, objects, downscales, stream);
} }
void preprocess(const cv::gpu::GpuMat& colored, Stream& s) void preprocess(const cv::gpu::GpuMat& colored, Stream& s)
...@@ -356,6 +364,26 @@ struct cv::gpu::SCascade::Fields ...@@ -356,6 +364,26 @@ struct cv::gpu::SCascade::Fields
integrate(fh, fw, s); integrate(fh, fw, s);
} }
void suppress(GpuMat& objects, Stream& s)
{
GpuMat ndetections = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
if (s)
{
s.enqueueMemSet(overlaps, 0);
s.enqueueMemSet(suppressed, 0);
}
else
{
overlaps.setTo(0);
suppressed.setTo(0);
}
cudaStream_t stream = StreamAccessor::getStream(s);
device::icf::suppress(objects, overlaps, ndetections, suppressed, stream);
}
private: private:
typedef std::vector<device::icf::Octave>::const_iterator octIt_t; typedef std::vector<device::icf::Octave>::const_iterator octIt_t;
...@@ -442,17 +470,7 @@ private: ...@@ -442,17 +470,7 @@ private:
} }
} }
#include <iostream>
public: public:
void suppress(GpuMat& ndetections, GpuMat& objects)
{
ensureSizeIsEnough(objects.rows, objects.cols, CV_8UC1, overlaps);
overlaps.setTo(0);
suppressed.setTo(0);
device::icf::suppress(objects, overlaps, ndetections, suppressed);
// std::cout << cv::Mat(overlaps) << std::endl;
}
// scales range // scales range
float minScale; float minScale;
...@@ -547,20 +565,18 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _ ...@@ -547,20 +565,18 @@ void cv::gpu::SCascade::detect(InputArray image, InputArray _rois, OutputArray _
} }
else else
{ {
colored.copyTo(flds.hogluv); if (s)
s.enqueueCopy(colored, flds.hogluv);
else
colored.copyTo(flds.hogluv);
} }
GpuMat spr(objects, cv::Rect(0, 0, flds.suppressed.cols, flds.suppressed.rows)); flds.detect(rois, objects, s);
GpuMat tmp = GpuMat(objects, cv::Rect(0, 0, sizeof(Detection), 1));
objects = GpuMat(objects, cv::Rect( sizeof(Detection), 0, objects.cols - sizeof(Detection), 1));
cudaStream_t stream = StreamAccessor::getStream(s);
flds.detect(rois, tmp, objects, stream);
if (rejCriteria != NO_REJECT) if (rejCriteria != NO_REJECT)
{ {
flds.suppress(tmp, objects); GpuMat spr(objects, cv::Rect(0, 0, flds.suppressed.cols, flds.suppressed.rows));
flds.suppress(objects, s);
flds.suppressed.copyTo(spr); flds.suppressed.copyTo(spr);
} }
} }
......
...@@ -47,7 +47,7 @@ ...@@ -47,7 +47,7 @@
using cv::gpu::GpuMat; using cv::gpu::GpuMat;
// show detection results on input image with cv::imshow // show detection results on input image with cv::imshow
#define SHOW_DETECTIONS // #define SHOW_DETECTIONS
#if defined SHOW_DETECTIONS #if defined SHOW_DETECTIONS
# define SHOW(res) \ # define SHOW(res) \
...@@ -99,21 +99,9 @@ namespace { ...@@ -99,21 +99,9 @@ namespace {
return std::string(s); return std::string(s);
} }
static std::string getImageName(int level)
{
time_t rawtime;
struct tm * timeinfo;
char buffer [80];
time ( &rawtime );
timeinfo = localtime ( &rawtime );
strftime (buffer,80,"%Y-%m-%d--%H-%M-%S",timeinfo);
return "gpu_rec_level_" + itoa(level)+ "_" + std::string(buffer) + ".png";
}
static void print(std::ostream &out, const Detection& d) static void print(std::ostream &out, const Detection& d)
{ {
#if defined SHOW_DETECTIONS
out << "\x1b[32m[ detection]\x1b[0m (" out << "\x1b[32m[ detection]\x1b[0m ("
<< std::setw(4) << d.x << std::setw(4) << d.x
<< " " << " "
...@@ -125,11 +113,32 @@ namespace { ...@@ -125,11 +113,32 @@ namespace {
<< ") " << ") "
<< std::setw(12) << d.confidence << std::setw(12) << d.confidence
<< std::endl; << std::endl;
#else
(void)out; (void)d;
#endif
} }
static void printTotal(std::ostream &out, int detbytes) static void printTotal(std::ostream &out, int detbytes)
{ {
#if defined SHOW_DETECTIONS
out << "\x1b[32m[ ]\x1b[0m Total detections " << (detbytes / sizeof(Detection)) << std::endl; out << "\x1b[32m[ ]\x1b[0m Total detections " << (detbytes / sizeof(Detection)) << std::endl;
#else
(void)out; (void)detbytes;
#endif
}
#if defined SHOW_DETECTIONS
static std::string getImageName(int level)
{
time_t rawtime;
struct tm * timeinfo;
char buffer [80];
time ( &rawtime );
timeinfo = localtime ( &rawtime );
strftime (buffer,80,"%Y-%m-%d--%H-%M-%S",timeinfo);
return "gpu_rec_level_" + itoa(level)+ "_" + std::string(buffer) + ".png";
} }
static void writeResult(const cv::Mat& result, const int level) static void writeResult(const cv::Mat& result, const int level)
...@@ -138,6 +147,7 @@ namespace { ...@@ -138,6 +147,7 @@ namespace {
cv::imwrite(path, result); cv::imwrite(path, result);
std::cout << "\x1b[32m" << "[ ]" << std::endl << "[ stored in]"<< "\x1b[0m" << path << std::endl; std::cout << "\x1b[32m" << "[ ]" << std::endl << "[ stored in]"<< "\x1b[0m" << path << std::endl;
} }
#endif
} }
typedef ::testing::TestWithParam<std::tr1::tuple<cv::gpu::DeviceInfo, std::string, std::string, int> > SCascadeTestRoi; typedef ::testing::TestWithParam<std::tr1::tuple<cv::gpu::DeviceInfo, std::string, std::string, int> > SCascadeTestRoi;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment