Commit 7839dbd2 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

used new device layer for cv::gpu::integral

parent 224f18b0
......@@ -248,60 +248,3 @@ PERF_TEST_P(Sz_KernelSz_Ccorr, Convolve,
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// Integral
PERF_TEST_P(Sz, Integral,
CUDA_TYPICAL_MAT_SIZES)
{
const cv::Size size = GetParam();
cv::Mat src(size, CV_8UC1);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_CUDA())
{
const cv::cuda::GpuMat d_src(src);
cv::cuda::GpuMat dst;
cv::cuda::GpuMat d_buf;
TEST_CYCLE() cv::cuda::integral(d_src, dst, d_buf);
CUDA_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
TEST_CYCLE() cv::integral(src, dst);
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// IntegralSqr
PERF_TEST_P(Sz, IntegralSqr,
CUDA_TYPICAL_MAT_SIZES)
{
const cv::Size size = GetParam();
cv::Mat src(size, CV_8UC1);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_CUDA())
{
const cv::cuda::GpuMat d_src(src);
cv::cuda::GpuMat dst, buf;
TEST_CYCLE() cv::cuda::sqrIntegral(d_src, dst, buf);
CUDA_SANITY_CHECK(dst);
}
else
{
FAIL_NO_CPU();
}
}
......@@ -465,3 +465,60 @@ PERF_TEST_P(Sz, MeanStdDev,
SANITY_CHECK(cpu_stddev);
}
}
//////////////////////////////////////////////////////////////////////
// Integral
PERF_TEST_P(Sz, Integral,
CUDA_TYPICAL_MAT_SIZES)
{
const cv::Size size = GetParam();
cv::Mat src(size, CV_8UC1);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_CUDA())
{
const cv::cuda::GpuMat d_src(src);
cv::cuda::GpuMat dst;
cv::cuda::GpuMat d_buf;
TEST_CYCLE() cv::cuda::integral(d_src, dst, d_buf);
CUDA_SANITY_CHECK(dst);
}
else
{
cv::Mat dst;
TEST_CYCLE() cv::integral(src, dst);
CPU_SANITY_CHECK(dst);
}
}
//////////////////////////////////////////////////////////////////////
// IntegralSqr
PERF_TEST_P(Sz, IntegralSqr,
CUDA_TYPICAL_MAT_SIZES)
{
const cv::Size size = GetParam();
cv::Mat src(size, CV_8UC1);
declare.in(src, WARMUP_RNG);
if (PERF_RUN_CUDA())
{
const cv::cuda::GpuMat d_src(src);
cv::cuda::GpuMat dst, buf;
TEST_CYCLE() cv::cuda::sqrIntegral(d_src, dst, buf);
CUDA_SANITY_CHECK(dst);
}
else
{
FAIL_NO_CPU();
}
}
This diff is collapsed.
......@@ -294,116 +294,4 @@ void cv::cuda::normalize(InputArray _src, OutputArray dst, double a, double b, i
}
}
////////////////////////////////////////////////////////////////////////
// integral
namespace cv { namespace cuda { namespace device
{
namespace imgproc
{
void shfl_integral_gpu(const PtrStepSzb& img, PtrStepSz<unsigned int> integral, cudaStream_t stream);
}
}}}
void cv::cuda::integral(InputArray _src, OutputArray _dst, GpuMat& buffer, Stream& _stream)
{
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8UC1 );
cudaStream_t stream = StreamAccessor::getStream(_stream);
cv::Size whole;
cv::Point offset;
src.locateROI(whole, offset);
if (deviceSupports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
&& offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x))
{
ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
cv::cuda::device::imgproc::shfl_integral_gpu(src, buffer, stream);
_dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
GpuMat dst = _dst.getGpuMat();
dst.setTo(Scalar::all(0), _stream);
GpuMat inner = dst(Rect(1, 1, src.cols, src.rows));
GpuMat res = buffer(Rect(0, 0, src.cols, src.rows));
res.copyTo(inner, _stream);
}
else
{
#ifndef HAVE_OPENCV_CUDALEGACY
throw_no_cuda();
#else
_dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
GpuMat dst = _dst.getGpuMat();
NcvSize32u roiSize;
roiSize.width = src.cols;
roiSize.height = src.rows;
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
Ncv32u bufSize;
ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
NppStStreamHandler h(stream);
ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step),
dst.ptr<Ncv32u>(), static_cast<int>(dst.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
#endif
}
}
//////////////////////////////////////////////////////////////////////////////
// sqrIntegral
void cv::cuda::sqrIntegral(InputArray _src, OutputArray _dst, GpuMat& buf, Stream& _stream)
{
#ifndef HAVE_OPENCV_CUDALEGACY
(void) _src;
(void) _dst;
(void) _stream;
throw_no_cuda();
#else
GpuMat src = _src.getGpuMat();
CV_Assert( src.type() == CV_8U );
NcvSize32u roiSize;
roiSize.width = src.cols;
roiSize.height = src.rows;
cudaDeviceProp prop;
cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
Ncv32u bufSize;
ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
ensureSizeIsEnough(1, bufSize, CV_8U, buf);
cudaStream_t stream = StreamAccessor::getStream(_stream);
NppStStreamHandler h(stream);
_dst.create(src.rows + 1, src.cols + 1, CV_64F);
GpuMat dst = _dst.getGpuMat();
ncvSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), static_cast<int>(src.step),
dst.ptr<Ncv64u>(0), static_cast<int>(dst.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
#endif
}
#endif
......@@ -125,43 +125,6 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, GEMM, testing::Combine(
ALL_GEMM_FLAGS,
WHOLE_SUBMAT));
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Integral
PARAM_TEST_CASE(Integral, cv::cuda::DeviceInfo, cv::Size, UseRoi)
{
cv::cuda::DeviceInfo devInfo;
cv::Size size;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
useRoi = GET_PARAM(2);
cv::cuda::setDevice(devInfo.deviceID());
}
};
CUDA_TEST_P(Integral, Accuracy)
{
cv::Mat src = randomMat(size, CV_8UC1);
cv::cuda::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_32SC1, useRoi);
cv::cuda::integral(loadMat(src, useRoi), dst);
cv::Mat dst_gold;
cv::integral(src, dst_gold, CV_32S);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Integral, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
WHOLE_SUBMAT));
////////////////////////////////////////////////////////////////////////////
// MulSpectrums
......
......@@ -816,4 +816,78 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, MeanStdDev, testing::Combine(
DIFFERENT_SIZES,
WHOLE_SUBMAT));
///////////////////////////////////////////////////////////////////////////////////////////////////////
// Integral
PARAM_TEST_CASE(Integral, cv::cuda::DeviceInfo, cv::Size, UseRoi)
{
cv::cuda::DeviceInfo devInfo;
cv::Size size;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
useRoi = GET_PARAM(2);
cv::cuda::setDevice(devInfo.deviceID());
}
};
CUDA_TEST_P(Integral, Accuracy)
{
cv::Mat src = randomMat(size, CV_8UC1);
cv::cuda::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_32SC1, useRoi);
cv::cuda::integral(loadMat(src, useRoi), dst);
cv::Mat dst_gold;
cv::integral(src, dst_gold, CV_32S);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Integral, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
WHOLE_SUBMAT));
///////////////////////////////////////////////////////////////////////////////////////////////////////
// IntegralSqr
PARAM_TEST_CASE(IntegralSqr, cv::cuda::DeviceInfo, cv::Size, UseRoi)
{
cv::cuda::DeviceInfo devInfo;
cv::Size size;
bool useRoi;
virtual void SetUp()
{
devInfo = GET_PARAM(0);
size = GET_PARAM(1);
useRoi = GET_PARAM(2);
cv::cuda::setDevice(devInfo.deviceID());
}
};
CUDA_TEST_P(IntegralSqr, Accuracy)
{
cv::Mat src = randomMat(size, CV_8UC1);
cv::cuda::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_64FC1, useRoi);
cv::cuda::sqrIntegral(loadMat(src, useRoi), dst);
cv::Mat dst_gold, temp;
cv::integral(src, temp, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
INSTANTIATE_TEST_CASE_P(CUDA_Arithm, IntegralSqr, testing::Combine(
ALL_DEVICES,
DIFFERENT_SIZES,
WHOLE_SUBMAT));
#endif // HAVE_CUDA
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment