Commit 8c1f9baf authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

fixed gpu::integral for Kepler

parent c3f277b7
...@@ -551,13 +551,13 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S ...@@ -551,13 +551,13 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
if (info.supports(WARP_SHUFFLE_FUNCTIONS)) if (info.supports(WARP_SHUFFLE_FUNCTIONS))
{ {
GpuMat src16; GpuMat srcAlligned;
if (src.cols % 16 == 0) if (src.cols % 16 == 0 && src.rows % 8 == 0)
src16 = src; srcAlligned = src;
else else
{ {
ensureSizeIsEnough(src.rows, ((src.cols + 15) / 16) * 16, src.type(), buffer); ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 15) / 16) * 16, src.type(), buffer);
GpuMat inner = buffer(Rect(0, 0, src.cols, src.rows)); GpuMat inner = buffer(Rect(0, 0, src.cols, src.rows));
...@@ -572,21 +572,21 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S ...@@ -572,21 +572,21 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
src.copyTo(inner); src.copyTo(inner);
} }
src16 = buffer; srcAlligned = buffer;
} }
sum.create(src16.rows + 1, src16.cols + 1, CV_32SC1); sum.create(srcAlligned.rows + 1, srcAlligned.cols + 1, CV_32SC1);
if (s) if (s)
s.enqueueMemSet(sum, Scalar::all(0)); s.enqueueMemSet(sum, Scalar::all(0));
else else
sum.setTo(Scalar::all(0)); sum.setTo(Scalar::all(0));
GpuMat inner = sum(Rect(1, 1, src16.cols, src16.rows)); GpuMat inner = sum(Rect(1, 1, srcAlligned.cols, srcAlligned.rows));
cv::gpu::device::imgproc::shfl_integral_gpu(src16, inner, stream); cv::gpu::device::imgproc::shfl_integral_gpu(srcAlligned, inner, stream);
if (src16.cols != src.cols) if (srcAlligned.data != src.data)
sum = sum(Rect(0, 0, src.cols + 1, src.rows + 1)); sum = sum(Rect(0, 0, src.cols + 1, src.rows + 1));
} }
else else
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment