Commit 7a29d96c authored by Alexey Spizhevoy's avatar Alexey Spizhevoy

added buffered version of gpu::integral function and updated performance test…

added buffered version of gpu::integral function and updated performance test (it still works too slow)
parent 1748f65f
...@@ -650,6 +650,9 @@ namespace cv ...@@ -650,6 +650,9 @@ namespace cv
//! supports only CV_8UC1 source type //! supports only CV_8UC1 source type
CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum); CV_EXPORTS void integral(const GpuMat& src, GpuMat& sum);
//! buffered version
CV_EXPORTS void integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer);
//! computes the integral image and integral for the squared image //! computes the integral image and integral for the squared image
//! sum will have CV_32S type, sqsum - CV32F type //! sum will have CV_32S type, sqsum - CV32F type
//! supports only CV_8UC1 source type //! supports only CV_8UC1 source type
......
...@@ -61,6 +61,7 @@ void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_ ...@@ -61,6 +61,7 @@ void cv::gpu::warpAffine(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_
void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); } void cv::gpu::warpPerspective(const GpuMat&, GpuMat&, const Mat&, Size, int) { throw_nogpu(); }
void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); } void cv::gpu::rotate(const GpuMat&, GpuMat&, Size, double, double, double, int) { throw_nogpu(); }
void cv::gpu::integral(const GpuMat&, GpuMat&) { throw_nogpu(); } void cv::gpu::integral(const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::integralBuffered(const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::integral(const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); } void cv::gpu::integral(const GpuMat&, GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&) { throw_nogpu(); } void cv::gpu::sqrIntegral(const GpuMat&, GpuMat&) { throw_nogpu(); }
void cv::gpu::columnSum(const GpuMat&, GpuMat&) { throw_nogpu(); } void cv::gpu::columnSum(const GpuMat&, GpuMat&) { throw_nogpu(); }
...@@ -545,6 +546,12 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d ...@@ -545,6 +546,12 @@ void cv::gpu::rotate(const GpuMat& src, GpuMat& dst, Size dsize, double angle, d
// integral // integral
void cv::gpu::integral(const GpuMat& src, GpuMat& sum) void cv::gpu::integral(const GpuMat& src, GpuMat& sum)
{
GpuMat buffer;
integralBuffered(src, sum, buffer);
}
void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer)
{ {
CV_Assert(src.type() == CV_8UC1); CV_Assert(src.type() == CV_8UC1);
...@@ -555,10 +562,8 @@ void cv::gpu::integral(const GpuMat& src, GpuMat& sum) ...@@ -555,10 +562,8 @@ void cv::gpu::integral(const GpuMat& src, GpuMat& sum)
roiSize.height = src.rows; roiSize.height = src.rows;
NppSt32u bufSize; NppSt32u bufSize;
nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize) ); nppSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize) );
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
GpuMat buffer(1, bufSize, CV_8UC1);
nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<NppSt8u*>(src.ptr<NppSt8u>()), src.step, nppSafeCall( nppiStIntegral_8u32u_C1R(const_cast<NppSt8u*>(src.ptr<NppSt8u>()), src.step,
sum.ptr<NppSt32u>(), sum.step, roiSize, buffer.ptr<NppSt8u>(), bufSize) ); sum.ptr<NppSt32u>(), sum.step, roiSize, buffer.ptr<NppSt8u>(), bufSize) );
......
...@@ -170,24 +170,26 @@ TEST(cornerHarris) ...@@ -170,24 +170,26 @@ TEST(cornerHarris)
TEST(integral) TEST(integral)
{ {
Mat src, sum; Mat src, sum;
gpu::GpuMat d_src, d_sum; gpu::GpuMat d_src, d_sum, d_buf;
for (int size = 1000; size <= 8000; size *= 2) int size = 4000;
gen(src, size, size, CV_8U, 0, 256);
sum.create(size + 1, size + 1, CV_32S);
d_src = src;
d_sum.create(size + 1, size + 1, CV_32S);
for (int i = 0; i < 5; ++i)
{ {
SUBTEST << "size " << size << ", 8U"; SUBTEST << "size " << size << ", 8U";
gen(src, size, size, CV_8U, 0, 256);
sum.create(size + 1, size + 1, CV_32S);
CPU_ON; CPU_ON;
integral(src, sum); integral(src, sum);
CPU_OFF; CPU_OFF;
d_src = src;
d_sum.create(size + 1, size + 1, CV_32S);
GPU_ON; GPU_ON;
gpu::integral(d_src, d_sum); gpu::integralBuffered(d_src, d_sum, d_buf);
GPU_OFF; GPU_OFF;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment