added gpu::LUT for CV_8UC3 type, added gpu::cvtColor for BGR2BGR5x5, minor fix in tests.

e1e5047b · Vladislav Vinogradov · 1b8c0000 · e1e5047b · e1e5047b · e1e5047b
Commit e1e5047b authored Sep 27, 2010 by Vladislav Vinogradov
6 changed files
--- a/modules/gpu/src/arithm.cpp
+++ b/modules/gpu/src/arithm.cpp
@@ -266,13 +266,13 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
    sz.height = src1.rows;

    int funcIdx = normType >> 1;
-    Scalar retVal;
+    double retVal;

    nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), src1.step, 
        src2.ptr<Npp8u>(), src2.step, 
-        sz, retVal.val) );
+        sz, &retVal) );

-    return retVal[0];
+    return retVal;
 }

 ////////////////////////////////////////////////////////////////////////
@@ -307,10 +307,7 @@ void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode)

 Scalar cv::gpu::sum(const GpuMat& src)
 {
-    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
-    
-    
-    
+    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);    

    NppiSize sz;
    sz.width  = src.cols;
@@ -324,7 +321,7 @@ Scalar cv::gpu::sum(const GpuMat& src)
        GpuMat buf(1, bufsz, CV_32S);

        Scalar res;
-         nppSafeCall( nppiSum_8u_C1R(src.ptr<Npp8u>(), src.step, sz, buf.ptr<Npp32s>(), res.val) );
+        nppSafeCall( nppiSum_8u_C1R(src.ptr<Npp8u>(), src.step, sz, buf.ptr<Npp32s>(), res.val) );
        return res;
    }
    else
@@ -336,8 +333,6 @@ Scalar cv::gpu::sum(const GpuMat& src)
        nppSafeCall( nppiSum_8u_C4R(src.ptr<Npp8u>(), src.step, sz, buf.ptr<Npp32s>(), res.val) );
        return res;
    }
-
-    
 }

 ////////////////////////////////////////////////////////////////////////
@@ -371,28 +366,54 @@ void cv::gpu::LUT(const GpuMat& src, const Mat& lut, GpuMat& dst)
    {
    public:
        Npp32s pLevels[256];
+        const Npp32s* pLevels3[3];
+        int nValues3[3];

        LevelsInit()
-        {            
+        {
+            nValues3[0] = nValues3[1] = nValues3[2] = 256;
            for (int i = 0; i < 256; ++i)
                pLevels[i] = i;
+            pLevels3[0] = pLevels3[1] = pLevels3[2] = pLevels;
        }
    };
    static LevelsInit lvls;

    int cn = src.channels();

-    CV_Assert(src.type() == CV_8UC1);
-    CV_Assert(lut.depth() == CV_32SC1 && lut.rows * lut.cols == 256 && lut.isContinuous());
+    CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC3);
+    CV_Assert(lut.depth() == CV_8U && (lut.channels() == 1 || lut.channels() == cn) && lut.rows * lut.cols == 256 && lut.isContinuous());

-    dst.create(src.size(), src.type());
+    dst.create(src.size(), CV_MAKETYPE(lut.depth(), cn));

    NppiSize sz;
    sz.height = src.rows;
    sz.width = src.cols;
+    
+    Mat nppLut;
+    lut.convertTo(nppLut, CV_32S);

-    nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, 
-        lut.ptr<Npp32s>(), lvls.pLevels, 256) );
+    if (src.type() == CV_8UC1)
+    {
+        nppSafeCall( nppiLUT_Linear_8u_C1R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, 
+            nppLut.ptr<Npp32s>(), lvls.pLevels, 256) );
+    }
+    else
+    {
+        Mat nppLut3[3];
+        const Npp32s* pValues3[3];
+        if (nppLut.channels() == 1)
+            pValues3[0] = pValues3[1] = pValues3[2] = nppLut.ptr<Npp32s>();
+        else
+        {
+            cv::split(nppLut, nppLut3);
+            pValues3[0] = nppLut3[0].ptr<Npp32s>();
+            pValues3[1] = nppLut3[1].ptr<Npp32s>(); 
+            pValues3[2] = nppLut3[2].ptr<Npp32s>();
+        }
+        nppSafeCall( nppiLUT_Linear_8u_C3R(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, 
+            pValues3, lvls.pLevels3, lvls.nValues3) );
+    }
 }

 #endif /* !defined (HAVE_CUDA) */
\ No newline at end of file
--- a/modules/gpu/src/cuda/color.cu
+++ b/modules/gpu/src/cuda/color.cu
--- a/modules/gpu/src/imgproc_gpu.cpp
+++ b/modules/gpu/src/imgproc_gpu.cpp
@@ -81,13 +81,16 @@ namespace cv { namespace gpu
        void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
        void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);

-        void swapChannels_gpu(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream);
-        void swapChannels_gpu(const DevMem2D_<ushort>& src, const DevMem2D_<ushort>& dst, int cn, const int* coeffs, cudaStream_t stream);
-        void swapChannels_gpu(const DevMem2Df& src, const DevMem2Df& dst, int cn, const int* coeffs, cudaStream_t stream);
+        void swapChannels_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream);
+        void swapChannels_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream);
+        void swapChannels_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int cn, const int* coeffs, cudaStream_t stream);

-        void RGB2RGB_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
-        void RGB2RGB_gpu(const DevMem2D_<ushort>& src, int srccn, const DevMem2D_<ushort>& dst, int dstcn, int bidx, cudaStream_t stream);
-        void RGB2RGB_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int dstcn, int bidx, cudaStream_t stream);
+        void RGB2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
+        void RGB2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
+        void RGB2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
+
+        void RGB5x52RGB_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
+        void RGB2RGB5x5_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int green_bits, int bidx, cudaStream_t stream);

        void Gray2RGB_gpu(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);
        void Gray2RGB_gpu(const DevMem2D_<ushort>& src, const DevMem2D_<ushort>& dst, int dstcn, cudaStream_t stream);
@@ -245,38 +248,36 @@ namespace
                
                out.create(sz, CV_MAKETYPE(depth, dcn));
                if( depth == CV_8U )
-                    improc::RGB2RGB_gpu((DevMem2D)src, scn, (DevMem2D)out, dcn, bidx, stream);
+                    improc::RGB2RGB_gpu_8u(src, scn, out, dcn, bidx, stream);
                else if( depth == CV_16U )
-                    improc::RGB2RGB_gpu((DevMem2D_<unsigned short>)src, scn, (DevMem2D_<unsigned short>)out, dcn, bidx, stream);
+                    improc::RGB2RGB_gpu_16u(src, scn, out, dcn, bidx, stream);
                else
-                    improc::RGB2RGB_gpu((DevMem2Df)src, scn, (DevMem2Df)out, dcn, bidx, stream);
+                    improc::RGB2RGB_gpu_32f(src, scn, out, dcn, bidx, stream);
                break;
                
-            //case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555:
-            //case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555:
-            //    CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
-            //    dst.create(sz, CV_8UC2);
-            //
-            //    CvtColorLoop(src, dst, RGB2RGB5x5(scn,
-            //              code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
-            //              code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2,
-            //              code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
-            //              code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5 // green bits
-            //                                      ));
-            //    break;
+            case CV_BGR2BGR565: case CV_BGR2BGR555: case CV_RGB2BGR565: case CV_RGB2BGR555:
+            case CV_BGRA2BGR565: case CV_BGRA2BGR555: case CV_RGBA2BGR565: case CV_RGBA2BGR555:
+                CV_Assert( (scn == 3 || scn == 4) && depth == CV_8U );
+                out.create(sz, CV_8UC2);
+
+                improc::RGB2RGB5x5_gpu(src, scn, out, code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
+                          code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5,
+                          code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
+                          code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2,
+                          stream);
+                break;
            
            //case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
            //case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
            //    if(dcn <= 0) dcn = 3;
            //    CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U );
-            //    dst.create(sz, CV_MAKETYPE(depth, dcn));
-            //    
-            //    CvtColorLoop(src, dst, RGB5x52RGB(dcn,
-            //              code == CV_BGR5652BGR || code == CV_BGR5552BGR ||
-            //              code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2, // blue idx
-            //              code == CV_BGR5652BGR || code == CV_BGR5652RGB ||
-            //              code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5 // green bits
-            //              ));
+            //    out.create(sz, CV_MAKETYPE(depth, dcn));
+
+            //    improc::RGB5x52RGB_gpu(src, code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
+            //              code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5, out, dcn,
+            //              code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
+            //              code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2,
+            //              stream);
            //    break;
                        
            case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY:
@@ -329,7 +330,7 @@ namespace
                nppSafeCall( nppiRGBToYCbCr_8u_C3R(src.ptr<Npp8u>(), src.step, out.ptr<Npp8u>(), out.step, nppsz) );
                {
                    static int coeffs[] = {0, 2, 1};
-                    improc::swapChannels_gpu((DevMem2D)out, (DevMem2D)out, 3, coeffs, 0);
+                    improc::swapChannels_gpu_8u(out, out, 3, coeffs, 0);
                }
                break;

@@ -341,7 +342,7 @@ namespace
                {
                    static int coeffs[] = {0, 2, 1};
                    GpuMat src1(src.size(), src.type());
-                    improc::swapChannels_gpu((DevMem2D)src, (DevMem2D)src1, 3, coeffs, 0);
+                    improc::swapChannels_gpu_8u(src, src1, 3, coeffs, 0);
                    nppSafeCall( nppiYCbCrToRGB_8u_C3R(src1.ptr<Npp8u>(), src1.step, out.ptr<Npp8u>(), out.step, nppsz) );   
                }             
                break;

--- a/tests/gpu/src/arithm.cpp
+++ b/tests/gpu/src/arithm.cpp
--- a/tests/gpu/src/gputest_main.cpp
+++ b/tests/gpu/src/gputest_main.cpp
@@ -46,6 +46,18 @@ CvTS test_system;
 const char* blacklist[] =
 {    
    "GPU-NppImageSum",
+    "GPU-MatOperatorAsyncCall",
+    //"GPU-NppErode",
+    //"GPU-NppDilate",
+    //"GPU-NppMorphologyEx",
+    //"GPU-NppImageDivide",
+    //"GPU-NppImageMeanStdDev",
+    //"GPU-NppImageMinNax",
+    //"GPU-NppImageResize",
+    //"GPU-NppImageWarpAffine",
+    //"GPU-NppImageWarpPerspective",
+    //"GPU-NppImageIntegral",
+    //"GPU-NppImageBlur",
    0
 };


--- a/tests/gpu/src/imgproc_gpu.cpp
+++ b/tests/gpu/src/imgproc_gpu.cpp