switched to new device layer in min/max

c7a3a7d4 · Vladislav Vinogradov · b11cccaa · c7a3a7d4 · c7a3a7d4
Commit c7a3a7d4 authored Jul 30, 2013 by Vladislav Vinogradov
Expand all Hide whitespace changes
Inline Side-by-side

Showing with 2 additions and 151 deletions

minmax_mat.cu modules/cudaarithm/src/cuda/minmax_mat.cu +0 -0

element_operations.cpp modules/cudaarithm/src/element_operations.cpp +2 -151

No files found.
--- a/modules/cudaarithm/src/cuda/minmax_mat.cu
+++ b/modules/cudaarithm/src/cuda/minmax_mat.cu
--- a/modules/cudaarithm/src/element_operations.cpp
+++ b/modules/cudaarithm/src/element_operations.cpp
@@ -435,158 +435,9 @@ namespace
    };
 }

-namespace arithm
-{
-    void minMat_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
-    void minMat_v2(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
-    template <typename T> void minMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template <typename T> void minScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-
-    void maxMat_v4(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
-    void maxMat_v2(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
-    template <typename T> void maxMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    template <typename T> void maxScalar(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-}
-
-void minMaxMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& _stream, int op)
-{
-    using namespace arithm;
-
-    typedef void (*func_t)(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
-    static const func_t funcs[2][7] =
-    {
-        {
-            minMat<unsigned char>,
-            minMat<signed char>,
-            minMat<unsigned short>,
-            minMat<short>,
-            minMat<int>,
-            minMat<float>,
-            minMat<double>
-        },
-        {
-            maxMat<unsigned char>,
-            maxMat<signed char>,
-            maxMat<unsigned short>,
-            maxMat<short>,
-            maxMat<int>,
-            maxMat<float>,
-            maxMat<double>
-        }
-    };
+void minMaxMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int op);

-    typedef void (*opt_func_t)(PtrStepSz<unsigned int> src1, PtrStepSz<unsigned int> src2, PtrStepSz<unsigned int> dst, cudaStream_t stream);
-    static const opt_func_t funcs_v4[2] =
-    {
-        minMat_v4, maxMat_v4
-    };
-    static const opt_func_t funcs_v2[2] =
-    {
-        minMat_v2, maxMat_v2
-    };
-
-    const int depth = src1.depth();
-    const int cn = src1.channels();
-
-    CV_Assert( depth <= CV_64F );
-
-    cudaStream_t stream = StreamAccessor::getStream(_stream);
-
-    PtrStepSzb src1_(src1.rows, src1.cols * cn, src1.data, src1.step);
-    PtrStepSzb src2_(src1.rows, src1.cols * cn, src2.data, src2.step);
-    PtrStepSzb dst_(src1.rows, src1.cols * cn, dst.data, dst.step);
-
-    if (depth == CV_8U || depth == CV_16U)
-    {
-        const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
-        const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
-        const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
-
-        const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
-
-        if (isAllAligned)
-        {
-            if (depth == CV_8U && (src1_.cols & 3) == 0)
-            {
-                const int vcols = src1_.cols >> 2;
-
-                funcs_v4[op](PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
-                             PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
-                             PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
-                             stream);
-
-                return;
-            }
-            else if (depth == CV_16U && (src1_.cols & 1) == 0)
-            {
-                const int vcols = src1_.cols >> 1;
-
-                funcs_v2[op](PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src1_.data, src1_.step),
-                             PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) src2_.data, src2_.step),
-                             PtrStepSz<unsigned int>(src1_.rows, vcols, (unsigned int*) dst_.data, dst_.step),
-                             stream);
-
-                return;
-            }
-        }
-    }
-
-    const func_t func = funcs[op][depth];
-
-    if (!func)
-        CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
-
-    func(src1_, src2_, dst_, stream);
-}
-
-namespace
-{
-    template <typename T> double castScalar(double val)
-    {
-        return saturate_cast<T>(val);
-    }
-}
-
-void minMaxScalar(const GpuMat& src, Scalar val, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int op)
-{
-    using namespace arithm;
-
-    typedef void (*func_t)(PtrStepSzb src1, double src2, PtrStepSzb dst, cudaStream_t stream);
-    static const func_t funcs[2][7] =
-    {
-        {
-            minScalar<unsigned char>,
-            minScalar<signed char>,
-            minScalar<unsigned short>,
-            minScalar<short>,
-            minScalar<int>,
-            minScalar<float>,
-            minScalar<double>
-        },
-        {
-            maxScalar<unsigned char>,
-            maxScalar<signed char>,
-            maxScalar<unsigned short>,
-            maxScalar<short>,
-            maxScalar<int>,
-            maxScalar<float>,
-            maxScalar<double>
-        }
-    };
-
-    typedef double (*cast_func_t)(double sc);
-    static const cast_func_t cast_func[] =
-    {
-        castScalar<unsigned char>, castScalar<signed char>, castScalar<unsigned short>, castScalar<short>, castScalar<int>, castScalar<float>, castScalar<double>
-    };
-
-    const int depth = src.depth();
-
-    CV_Assert( depth <= CV_64F );
-    CV_Assert( src.channels() == 1 );
-
-    funcs[op][depth](src, cast_func[depth](val[0]), dst, StreamAccessor::getStream(stream));
-}
+void minMaxScalar(const GpuMat& src, cv::Scalar value, bool, GpuMat& dst, const GpuMat&, double, Stream& stream, int op);

 void cv::cuda::min(InputArray src1, InputArray src2, OutputArray dst, Stream& stream)
 {