used new device layer for cv::gpu::integral

7839dbd2 · Vladislav Vinogradov · 224f18b0 · 7839dbd2 · 7839dbd2 · 7839dbd2
Commit 7839dbd2 authored Aug 27, 2013 by Vladislav Vinogradov
6 changed files
--- a/modules/cudaarithm/perf/perf_arithm.cpp
+++ b/modules/cudaarithm/perf/perf_arithm.cpp
@@ -248,60 +248,3 @@ PERF_TEST_P(Sz_KernelSz_Ccorr, Convolve,
        CPU_SANITY_CHECK(dst);
    }
 }
-
-//////////////////////////////////////////////////////////////////////
-// Integral
-
-PERF_TEST_P(Sz, Integral,
-            CUDA_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src(size, CV_8UC1);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_CUDA())
-    {
-        const cv::cuda::GpuMat d_src(src);
-        cv::cuda::GpuMat dst;
-        cv::cuda::GpuMat d_buf;
-
-        TEST_CYCLE() cv::cuda::integral(d_src, dst, d_buf);
-
-        CUDA_SANITY_CHECK(dst);
-    }
-    else
-    {
-        cv::Mat dst;
-
-        TEST_CYCLE() cv::integral(src, dst);
-
-        CPU_SANITY_CHECK(dst);
-    }
-}
-
-//////////////////////////////////////////////////////////////////////
-// IntegralSqr
-
-PERF_TEST_P(Sz, IntegralSqr,
-            CUDA_TYPICAL_MAT_SIZES)
-{
-    const cv::Size size = GetParam();
-
-    cv::Mat src(size, CV_8UC1);
-    declare.in(src, WARMUP_RNG);
-
-    if (PERF_RUN_CUDA())
-    {
-        const cv::cuda::GpuMat d_src(src);
-        cv::cuda::GpuMat dst, buf;
-
-        TEST_CYCLE() cv::cuda::sqrIntegral(d_src, dst, buf);
-
-        CUDA_SANITY_CHECK(dst);
-    }
-    else
-    {
-        FAIL_NO_CPU();
-    }
-}
--- a/modules/cudaarithm/perf/perf_reductions.cpp
+++ b/modules/cudaarithm/perf/perf_reductions.cpp
@@ -465,3 +465,60 @@ PERF_TEST_P(Sz, MeanStdDev,
        SANITY_CHECK(cpu_stddev);
    }
 }
+
+//////////////////////////////////////////////////////////////////////
+// Integral
+
+PERF_TEST_P(Sz, Integral,
+            CUDA_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src(size, CV_8UC1);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_CUDA())
+    {
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst;
+        cv::cuda::GpuMat d_buf;
+
+        TEST_CYCLE() cv::cuda::integral(d_src, dst, d_buf);
+
+        CUDA_SANITY_CHECK(dst);
+    }
+    else
+    {
+        cv::Mat dst;
+
+        TEST_CYCLE() cv::integral(src, dst);
+
+        CPU_SANITY_CHECK(dst);
+    }
+}
+
+//////////////////////////////////////////////////////////////////////
+// IntegralSqr
+
+PERF_TEST_P(Sz, IntegralSqr,
+            CUDA_TYPICAL_MAT_SIZES)
+{
+    const cv::Size size = GetParam();
+
+    cv::Mat src(size, CV_8UC1);
+    declare.in(src, WARMUP_RNG);
+
+    if (PERF_RUN_CUDA())
+    {
+        const cv::cuda::GpuMat d_src(src);
+        cv::cuda::GpuMat dst, buf;
+
+        TEST_CYCLE() cv::cuda::sqrIntegral(d_src, dst, buf);
+
+        CUDA_SANITY_CHECK(dst);
+    }
+    else
+    {
+        FAIL_NO_CPU();
+    }
+}
--- a/modules/cudaarithm/src/cuda/integral.cu
+++ b/modules/cudaarithm/src/cuda/integral.cu
--- a/modules/cudaarithm/src/reductions.cpp
+++ b/modules/cudaarithm/src/reductions.cpp
@@ -294,116 +294,4 @@ void cv::cuda::normalize(InputArray _src, OutputArray dst, double a, double b, i
    }
 }

-////////////////////////////////////////////////////////////////////////
-// integral
-
-namespace cv { namespace cuda { namespace device
-{
-    namespace imgproc
-    {
-        void shfl_integral_gpu(const PtrStepSzb& img, PtrStepSz<unsigned int> integral, cudaStream_t stream);
-    }
-}}}
-
-void cv::cuda::integral(InputArray _src, OutputArray _dst, GpuMat& buffer, Stream& _stream)
-{
-    GpuMat src = _src.getGpuMat();
-
-    CV_Assert( src.type() == CV_8UC1 );
-
-    cudaStream_t stream = StreamAccessor::getStream(_stream);
-
-    cv::Size whole;
-    cv::Point offset;
-    src.locateROI(whole, offset);
-
-    if (deviceSupports(WARP_SHUFFLE_FUNCTIONS) && src.cols <= 2048
-        && offset.x % 16 == 0 && ((src.cols + 63) / 64) * 64 <= (static_cast<int>(src.step) - offset.x))
-    {
-        ensureSizeIsEnough(((src.rows + 7) / 8) * 8, ((src.cols + 63) / 64) * 64, CV_32SC1, buffer);
-
-        cv::cuda::device::imgproc::shfl_integral_gpu(src, buffer, stream);
-
-        _dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
-        GpuMat dst = _dst.getGpuMat();
-
-        dst.setTo(Scalar::all(0), _stream);
-
-        GpuMat inner = dst(Rect(1, 1, src.cols, src.rows));
-        GpuMat res = buffer(Rect(0, 0, src.cols, src.rows));
-
-        res.copyTo(inner, _stream);
-    }
-    else
-    {
-    #ifndef HAVE_OPENCV_CUDALEGACY
-        throw_no_cuda();
-    #else
-        _dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
-        GpuMat dst = _dst.getGpuMat();
-
-        NcvSize32u roiSize;
-        roiSize.width = src.cols;
-        roiSize.height = src.rows;
-
-        cudaDeviceProp prop;
-        cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
-
-        Ncv32u bufSize;
-        ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
-        ensureSizeIsEnough(1, bufSize, CV_8UC1, buffer);
-
-        NppStStreamHandler h(stream);
-
-        ncvSafeCall( nppiStIntegral_8u32u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>()), static_cast<int>(src.step),
-            dst.ptr<Ncv32u>(), static_cast<int>(dst.step), roiSize, buffer.ptr<Ncv8u>(), bufSize, prop) );
-
-        if (stream == 0)
-            cudaSafeCall( cudaDeviceSynchronize() );
-    #endif
-    }
-}
-
-//////////////////////////////////////////////////////////////////////////////
-// sqrIntegral
-
-void cv::cuda::sqrIntegral(InputArray _src, OutputArray _dst, GpuMat& buf, Stream& _stream)
-{
-#ifndef HAVE_OPENCV_CUDALEGACY
-    (void) _src;
-    (void) _dst;
-    (void) _stream;
-    throw_no_cuda();
-#else
-    GpuMat src = _src.getGpuMat();
-
-    CV_Assert( src.type() == CV_8U );
-
-    NcvSize32u roiSize;
-    roiSize.width = src.cols;
-    roiSize.height = src.rows;
-
-    cudaDeviceProp prop;
-    cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
-
-    Ncv32u bufSize;
-    ncvSafeCall(nppiStSqrIntegralGetSize_8u64u(roiSize, &bufSize, prop));
-
-    ensureSizeIsEnough(1, bufSize, CV_8U, buf);
-
-    cudaStream_t stream = StreamAccessor::getStream(_stream);
-
-    NppStStreamHandler h(stream);
-
-    _dst.create(src.rows + 1, src.cols + 1, CV_64F);
-    GpuMat dst = _dst.getGpuMat();
-
-    ncvSafeCall(nppiStSqrIntegral_8u64u_C1R(const_cast<Ncv8u*>(src.ptr<Ncv8u>(0)), static_cast<int>(src.step),
-            dst.ptr<Ncv64u>(0), static_cast<int>(dst.step), roiSize, buf.ptr<Ncv8u>(0), bufSize, prop));
-
-    if (stream == 0)
-        cudaSafeCall( cudaDeviceSynchronize() );
-#endif
-}
-
 #endif
--- a/modules/cudaarithm/test/test_arithm.cpp
+++ b/modules/cudaarithm/test/test_arithm.cpp
@@ -125,43 +125,6 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, GEMM, testing::Combine(
    ALL_GEMM_FLAGS,
    WHOLE_SUBMAT));

-///////////////////////////////////////////////////////////////////////////////////////////////////////
-// Integral
-
-PARAM_TEST_CASE(Integral, cv::cuda::DeviceInfo, cv::Size, UseRoi)
-{
-    cv::cuda::DeviceInfo devInfo;
-    cv::Size size;
-    bool useRoi;
-
-    virtual void SetUp()
-    {
-        devInfo = GET_PARAM(0);
-        size = GET_PARAM(1);
-        useRoi = GET_PARAM(2);
-
-        cv::cuda::setDevice(devInfo.deviceID());
-    }
-};
-
-CUDA_TEST_P(Integral, Accuracy)
-{
-    cv::Mat src = randomMat(size, CV_8UC1);
-
-    cv::cuda::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_32SC1, useRoi);
-    cv::cuda::integral(loadMat(src, useRoi), dst);
-
-    cv::Mat dst_gold;
-    cv::integral(src, dst_gold, CV_32S);
-
-    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
-}
-
-INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Integral, testing::Combine(
-    ALL_DEVICES,
-    DIFFERENT_SIZES,
-    WHOLE_SUBMAT));
-
 ////////////////////////////////////////////////////////////////////////////
 // MulSpectrums


--- a/modules/cudaarithm/test/test_reductions.cpp
+++ b/modules/cudaarithm/test/test_reductions.cpp
@@ -816,4 +816,78 @@ INSTANTIATE_TEST_CASE_P(CUDA_Arithm, MeanStdDev, testing::Combine(
    DIFFERENT_SIZES,
    WHOLE_SUBMAT));

+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// Integral
+
+PARAM_TEST_CASE(Integral, cv::cuda::DeviceInfo, cv::Size, UseRoi)
+{
+    cv::cuda::DeviceInfo devInfo;
+    cv::Size size;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
+
+        cv::cuda::setDevice(devInfo.deviceID());
+    }
+};
+
+CUDA_TEST_P(Integral, Accuracy)
+{
+    cv::Mat src = randomMat(size, CV_8UC1);
+
+    cv::cuda::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_32SC1, useRoi);
+    cv::cuda::integral(loadMat(src, useRoi), dst);
+
+    cv::Mat dst_gold;
+    cv::integral(src, dst_gold, CV_32S);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Integral, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    WHOLE_SUBMAT));
+
+///////////////////////////////////////////////////////////////////////////////////////////////////////
+// IntegralSqr
+
+PARAM_TEST_CASE(IntegralSqr, cv::cuda::DeviceInfo, cv::Size, UseRoi)
+{
+    cv::cuda::DeviceInfo devInfo;
+    cv::Size size;
+    bool useRoi;
+
+    virtual void SetUp()
+    {
+        devInfo = GET_PARAM(0);
+        size = GET_PARAM(1);
+        useRoi = GET_PARAM(2);
+
+        cv::cuda::setDevice(devInfo.deviceID());
+    }
+};
+
+CUDA_TEST_P(IntegralSqr, Accuracy)
+{
+    cv::Mat src = randomMat(size, CV_8UC1);
+
+    cv::cuda::GpuMat dst = createMat(cv::Size(src.cols + 1, src.rows + 1), CV_64FC1, useRoi);
+    cv::cuda::sqrIntegral(loadMat(src, useRoi), dst);
+
+    cv::Mat dst_gold, temp;
+    cv::integral(src, temp, dst_gold);
+
+    EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
+}
+
+INSTANTIATE_TEST_CASE_P(CUDA_Arithm, IntegralSqr, testing::Combine(
+    ALL_DEVICES,
+    DIFFERENT_SIZES,
+    WHOLE_SUBMAT));
+
 #endif // HAVE_CUDA