From 5285722c1c2c1d15bbfc132f3c202449aaea6b49 Mon Sep 17 00:00:00 2001
From: Vladislav Vinogradov <no@email>
Date: Mon, 27 Sep 2010 14:10:19 +0000
Subject: [PATCH] added gpu::magnitude for complex source vector (two channels
 float). added gpu::cvtColor for BGR5x5 <-> BGR and BGR5x5 <-> Gray.

---
 modules/gpu/include/opencv2/gpu/gpu.hpp |   2 +
 modules/gpu/src/arithm.cpp              |  22 +-
 modules/gpu/src/cuda/color.cu           | 272 +++++++++++++-----------
 modules/gpu/src/imgproc_gpu.cpp         |  53 +++--
 tests/gpu/src/gputest_main.cpp          |   6 +-
 tests/gpu/src/imgproc_gpu.cpp           |  12 +-
 6 files changed, 202 insertions(+), 165 deletions(-)

diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp
index 51abde787c..314a8c2281 100644
--- a/modules/gpu/include/opencv2/gpu/gpu.hpp
+++ b/modules/gpu/include/opencv2/gpu/gpu.hpp
@@ -452,6 +452,8 @@ namespace cv
 
         //! computes magnitude (magnitude(i)) of each (x(i), y(i)) vector
         CV_EXPORTS void magnitude(const GpuMat& x, const GpuMat& y, GpuMat& magnitude);
+        //! computes magnitude (magnitude(i)) of complex (x(i).re, x(i).im) vector
+        CV_EXPORTS void magnitude(const GpuMat& x, GpuMat& magnitude);
 
         ////////////////////////////// Image processing //////////////////////////////
 
diff --git a/modules/gpu/src/arithm.cpp b/modules/gpu/src/arithm.cpp
index 7f999122de..00e08a142a 100644
--- a/modules/gpu/src/arithm.cpp
+++ b/modules/gpu/src/arithm.cpp
@@ -70,6 +70,7 @@ void cv::gpu::LUT(const GpuMat&, const Mat&, GpuMat&) { throw_nogpu(); }
 void cv::gpu::exp(const GpuMat&, GpuMat&) { throw_nogpu(); }
 void cv::gpu::log(const GpuMat&, GpuMat&) { throw_nogpu(); }
 void cv::gpu::magnitude(const GpuMat&, const GpuMat&, GpuMat&) { throw_nogpu(); }
+void cv::gpu::magnitude(const GpuMat&, GpuMat&) { throw_nogpu(); }
 
 #else /* !defined (HAVE_CUDA) */
 
@@ -530,6 +531,19 @@ void cv::gpu::log(const GpuMat& src, GpuMat& dst)
 ////////////////////////////////////////////////////////////////////////
 // magnitude
 
+void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst)
+{
+    CV_Assert(src.type() == CV_32FC2);
+
+    dst.create(src.size(), CV_32FC1);
+
+    NppiSize sz;
+    sz.width = src.cols;
+    sz.height = src.rows;
+
+    nppSafeCall( nppiMagnitude_32fc32f_C1R(src.ptr<Npp32fc>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
+}
+
 void cv::gpu::magnitude(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
 {
     CV_DbgAssert(src1.type() == src2.type() && src1.size() == src2.size());
@@ -539,13 +553,7 @@ void cv::gpu::magnitude(const GpuMat& src1, const GpuMat& src2, GpuMat& dst)
     GpuMat srcs[] = {src1, src2};
     cv::gpu::merge(srcs, 2, src);
 
-    dst.create(src1.size(), src1.type());
-
-    NppiSize sz;
-    sz.width = src.cols;
-    sz.height = src.rows;
-
-    nppSafeCall( nppiMagnitude_32fc32f_C1R(src.ptr<Npp32fc>(), src.step, dst.ptr<Npp32f>(), dst.step, sz) );
+    cv::gpu::magnitude(src, dst);
 }
 
 #endif /* !defined (HAVE_CUDA) */
\ No newline at end of file
diff --git a/modules/gpu/src/cuda/color.cu b/modules/gpu/src/cuda/color.cu
index 418518ef12..dadb9598a9 100644
--- a/modules/gpu/src/cuda/color.cu
+++ b/modules/gpu/src/cuda/color.cu
@@ -312,41 +312,6 @@ namespace imgproc
         }
     }
 
-    /*struct RGB5x52RGB
-    {
-        typedef uchar channel_type;
-
-        RGB5x52RGB(int _dstcn, int _blueIdx, int _greenBits)
-		    : dstcn(_dstcn), blueIdx(_blueIdx), greenBits(_greenBits) {}
-
-        void operator()(const uchar* src, uchar* dst, int n) const
-        {
-            int dcn = dstcn, bidx = blueIdx;
-            if( greenBits == 6 )
-                for( int i = 0; i < n; i++, dst += dcn )
-                {
-                    unsigned t = ((const unsigned short*)src)[i];
-                    dst[bidx] = (uchar)(t << 3);
-                    dst[1] = (uchar)((t >> 3) & ~3);
-                    dst[bidx ^ 2] = (uchar)((t >> 8) & ~7);
-                    if( dcn == 4 )
-                        dst[3] = 255;
-                }
-            else
-                for( int i = 0; i < n; i++, dst += dcn )
-                {
-                    unsigned t = ((const unsigned short*)src)[i];
-                    dst[bidx] = (uchar)(t << 3);
-                    dst[1] = (uchar)((t >> 2) & ~7);
-                    dst[bidx ^ 2] = (uchar)((t >> 7) & ~7);
-                    if( dcn == 4 )
-                        dst[3] = t & 0x8000 ? 255 : 0;
-                }
-        }
-
-        int dstcn, blueIdx, greenBits;
-    };*/
-
     template <int SRCCN, int GREEN_BITS> struct RGB2RGB5x5Converter {};
 
     template<int SRCCN> struct RGB2RGB5x5Converter<SRCCN, 6> 
@@ -415,7 +380,7 @@ namespace cv { namespace gpu { namespace improc
             {RGB5x52RGB_caller<6, 3>, RGB5x52RGB_caller<6, 4>}
         };
 
-        RGB5x52RGB_callers[green_bits - 5][dstcn - 5](src, dst, bidx, stream);
+        RGB5x52RGB_callers[green_bits - 5][dstcn - 3](src, dst, bidx, stream);
     }
 
     template <int SRCCN, int GREEN_BITS>
@@ -471,28 +436,37 @@ namespace imgproc
         }
     }
 
-    //struct Gray2RGB5x5
-    //{
-    //    typedef uchar channel_type;
-    //
-    //    Gray2RGB5x5(int _greenBits) : greenBits(_greenBits) {}
-    //    void operator()(const uchar* src, uchar* dst, int n) const
-    //    {
-    //        if( greenBits == 6 )
-    //            for( int i = 0; i < n; i++ )
-    //            {
-    //                int t = src[i];
-    //                ((unsigned short*)dst)[i] = (unsigned short)((t >> 3)|((t & ~3) << 3)|((t & ~7) << 8));
-    //            }
-    //        else
-    //            for( int i = 0; i < n; i++ )
-    //            {
-    //                int t = src[i] >> 3;
-    //                ((unsigned short*)dst)[i] = (unsigned short)(t|(t << 5)|(t << 10));
-    //            }
-    //    }
-    //    int greenBits;
-    //};
+    template <int GREEN_BITS> struct Gray2RGB5x5Converter {};
+
+    template<> struct Gray2RGB5x5Converter<6> 
+    {
+        static __device__ unsigned short cvt(unsigned int t)
+        {
+            return (unsigned short)((t >> 3) | ((t & ~3) << 3) | ((t & ~7) << 8));
+        }
+    };
+    template<> struct Gray2RGB5x5Converter<5> 
+    {
+        static __device__ unsigned short cvt(unsigned int t)
+        {
+            t >>= 3;
+            return (unsigned short)(t | (t << 5) | (t << 10));
+        }
+    };   
+
+    template<int GREEN_BITS>
+    __global__ void Gray2RGB5x5(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)
+    {
+        const int x = blockDim.x * blockIdx.x + threadIdx.x;
+        const int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+        if (y < rows && x < cols)
+        {
+            unsigned int src = src_[y * src_step + x];
+
+            *(unsigned short*)(dst_ + y * dst_step + (x << 1)) = Gray2RGB5x5Converter<GREEN_BITS>::cvt(src);
+        }
+    }
 }
 
 namespace cv { namespace gpu { namespace improc
@@ -536,60 +510,86 @@ namespace cv { namespace gpu { namespace improc
 
         Gray2RGB_callers[dstcn - 3](src, dst, stream);
     }
+
+    template <int GREEN_BITS>
+    void Gray2RGB5x5_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
+    {
+        dim3 threads(32, 8, 1);
+        dim3 grid(1, 1, 1);
+
+        grid.x = divUp(src.cols, threads.x);
+        grid.y = divUp(src.rows, threads.y);
+
+        imgproc::Gray2RGB5x5<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step, 
+            dst.ptr, dst.step, src.rows, src.cols);
+
+        if (stream == 0)
+            cudaSafeCall( cudaThreadSynchronize() );
+    }
+
+    void Gray2RGB5x5_gpu(const DevMem2D& src, const DevMem2D& dst, int green_bits, cudaStream_t stream)
+    {
+        typedef void (*Gray2RGB5x5_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
+        static const Gray2RGB5x5_caller_t Gray2RGB5x5_callers[2] = 
+        {
+            Gray2RGB5x5_caller<5>, Gray2RGB5x5_caller<6>
+        };
+
+        Gray2RGB5x5_callers[green_bits - 5](src, dst, stream);
+    }
 }}}
 
 ///////////////////////////////// Color to Grayscale ////////////////////////////////
 
 namespace imgproc
 {
-    //#undef R2Y
-    //#undef G2Y
-    //#undef B2Y
-    //
-    //enum
-    //{
-    //    yuv_shift = 14,
-    //    xyz_shift = 12,
-    //    R2Y = 4899,
-    //    G2Y = 9617,
-    //    B2Y = 1868,
-    //    BLOCK_SIZE = 256
-    //};
-
-    //struct RGB5x52Gray
-    //{
-    //    typedef uchar channel_type;
-    //
-    //    RGB5x52Gray(int _greenBits) : greenBits(_greenBits) {}
-    //    void operator()(const uchar* src, uchar* dst, int n) const
-    //    {
-    //        if( greenBits == 6 )
-    //            for( int i = 0; i < n; i++ )
-    //            {
-    //                int t = ((unsigned short*)src)[i];
-    //                dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
-    //                                           ((t >> 3) & 0xfc)*G2Y +
-    //                                           ((t >> 8) & 0xf8)*R2Y, yuv_shift);
-    //            }
-    //        else
-    //            for( int i = 0; i < n; i++ )
-    //            {
-    //                int t = ((unsigned short*)src)[i];
-    //                dst[i] = (uchar)CV_DESCALE(((t << 3) & 0xf8)*B2Y +
-    //                                           ((t >> 2) & 0xf8)*G2Y +
-    //                                           ((t >> 7) & 0xf8)*R2Y, yuv_shift);
-    //            }
-    //    }
-    //    int greenBits;
-    //};
+    #undef R2Y
+    #undef G2Y
+    #undef B2Y
+    
+    enum
+    {
+        yuv_shift  = 14,
+        xyz_shift  = 12,
+        R2Y        = 4899,
+        G2Y        = 9617,
+        B2Y        = 1868,
+        BLOCK_SIZE = 256
+    };
 
-    __global__ void RGB2Gray_3(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
+    template <int GREEN_BITS> struct RGB5x52GrayConverter {};
+
+    template<> struct RGB5x52GrayConverter<6> 
+    {
+        static __device__ unsigned char cvt(unsigned int t)
+        {
+            return (unsigned char)CV_DESCALE(((t << 3) & 0xf8)*B2Y + ((t >> 3) & 0xfc)*G2Y + ((t >> 8) & 0xf8)*R2Y, yuv_shift);
+        }
+    };
+    template<> struct RGB5x52GrayConverter<5> 
+    {
+        static __device__ unsigned char cvt(unsigned int t)
+        {
+            return (unsigned char)CV_DESCALE(((t << 3) & 0xf8)*B2Y + ((t >> 2) & 0xf8)*G2Y + ((t >> 7) & 0xf8)*R2Y, yuv_shift);
+        }
+    };   
+
+    template<int GREEN_BITS>
+    __global__ void RGB5x52Gray(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)
     {
-        const int cr = 4899;
-        const int cg = 9617;
-        const int cb = 1868;
-        const int yuv_shift = 14;
+        const int x = blockDim.x * blockIdx.x + threadIdx.x;
+        const int y = blockDim.y * blockIdx.y + threadIdx.y;
+
+        if (y < rows && x < cols)
+        {
+            unsigned int src = *(unsigned short*)(src_ + y * src_step + (x << 1));
 
+            dst_[y * dst_step + x] = RGB5x52GrayConverter<GREEN_BITS>::cvt(src);
+        }
+    }
+
+    __global__ void RGB2Gray_3(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
+    {
 		const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 2;
 		const int y = blockDim.y * blockIdx.y + threadIdx.y;
 
@@ -600,19 +600,19 @@ namespace imgproc
             uchar t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
 
             uchar4 dst;
-            dst.x = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.x = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
 
             src += 3;
             t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
-            dst.y = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.y = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
 
             src += 3;
             t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
-            dst.z = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.z = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
 
             src += 3;
             t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
-            dst.w = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.w = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
 
             *(uchar4*)(dst_ + y * dst_step + x) = dst;
         }
@@ -620,11 +620,6 @@ namespace imgproc
 
     __global__ void RGB2Gray_3(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx)
     {
-        const int cr = 4899;
-        const int cg = 9617;
-        const int cb = 1868;
-        const int yuv_shift = 14;
-
 		const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 1;
 		const int y = blockDim.y * blockIdx.y + threadIdx.y;
 
@@ -635,11 +630,11 @@ namespace imgproc
             unsigned short t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
 
             ushort2 dst;
-            dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
 
             src += 3;
             t0 = src[bidx], t1 = src[1], t2 = src[bidx ^ 2];
-            dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
 
             *(ushort2*)(dst_ + y * dst_step + x) = dst;
         }
@@ -665,11 +660,6 @@ namespace imgproc
 
     __global__ void RGB2Gray_4(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
     {
-        const int cr = 4899;
-        const int cg = 9617;
-        const int cb = 1868;
-        const int yuv_shift = 14;
-
 		const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 2;
 		const int y = blockDim.y * blockIdx.y + threadIdx.y;
 
@@ -680,19 +670,19 @@ namespace imgproc
             uchar t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];
 
             uchar4 dst;
-            dst.x = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.x = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
 
             src = *(uchar4*)(src_ + y * src_step + (x << 2) + 4);
             t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];
-            dst.y = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.y = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
 
             src = *(uchar4*)(src_ + y * src_step + (x << 2) + 8);
             t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];
-            dst.z = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.z = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
 
             src = *(uchar4*)(src_ + y * src_step + (x << 2) + 12);
             t0 = ((uchar*)(&src))[bidx], t1 = src.y, t2 = ((uchar*)(&src))[bidx ^ 2];
-            dst.w = (uchar)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.w = (uchar)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
 
             *(uchar4*)(dst_ + y * dst_step + x) = dst;
         }
@@ -700,11 +690,6 @@ namespace imgproc
 
     __global__ void RGB2Gray_4(const unsigned short* src_, size_t src_step, unsigned short* dst_, size_t dst_step, int rows, int cols, int bidx)
     {
-        const int cr = 4899;
-        const int cg = 9617;
-        const int cb = 1868;
-        const int yuv_shift = 14;
-
 		const int x = (blockDim.x * blockIdx.x + threadIdx.x) << 1;
 		const int y = blockDim.y * blockIdx.y + threadIdx.y;
 
@@ -715,11 +700,11 @@ namespace imgproc
             unsigned short t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2];
 
             ushort2 dst;
-            dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.x = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
 
             src = *(ushort4*)(src_ + y * src_step + (x << 2) + 4);
             t0 = ((unsigned short*)(&src))[bidx], t1 = src.y, t2 = ((unsigned short*)(&src))[bidx ^ 2];
-            dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * cb + t1 * cg + t2 * cr), yuv_shift);
+            dst.y = (unsigned short)CV_DESCALE((unsigned)(t0 * B2Y + t1 * G2Y + t2 * R2Y), yuv_shift);
 
             *(ushort2*)(dst_ + y * dst_step + x) = dst;
         }
@@ -820,6 +805,33 @@ namespace cv { namespace gpu { namespace improc
         if (stream == 0)
             cudaSafeCall( cudaThreadSynchronize() );
     }
+
+    template <int GREEN_BITS>
+    void RGB5x52Gray_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
+    {
+        dim3 threads(32, 8, 1);
+        dim3 grid(1, 1, 1);
+
+        grid.x = divUp(src.cols, threads.x);
+        grid.y = divUp(src.rows, threads.y);
+
+        imgproc::RGB5x52Gray<GREEN_BITS><<<grid, threads, 0, stream>>>(src.ptr, src.step, 
+            dst.ptr, dst.step, src.rows, src.cols);
+
+        if (stream == 0)
+            cudaSafeCall( cudaThreadSynchronize() );
+    }
+
+    void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream)
+    {
+        typedef void (*RGB5x52Gray_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
+        static const RGB5x52Gray_caller_t RGB5x52Gray_callers[2] = 
+        {
+            RGB5x52Gray_caller<5>, RGB5x52Gray_caller<6>
+        };
+
+        RGB5x52Gray_callers[green_bits - 5](src, dst, stream);
+    }
 }}}
 
 ///////////////////////////////////// RGB <-> YCrCb //////////////////////////////////////
diff --git a/modules/gpu/src/imgproc_gpu.cpp b/modules/gpu/src/imgproc_gpu.cpp
index 750044af34..d7c47dae74 100644
--- a/modules/gpu/src/imgproc_gpu.cpp
+++ b/modules/gpu/src/imgproc_gpu.cpp
@@ -95,10 +95,12 @@ namespace cv { namespace gpu
         void Gray2RGB_gpu(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);
         void Gray2RGB_gpu(const DevMem2D_<ushort>& src, const DevMem2D_<ushort>& dst, int dstcn, cudaStream_t stream);
         void Gray2RGB_gpu(const DevMem2Df& src, const DevMem2Df& dst, int dstcn, cudaStream_t stream);
+        void Gray2RGB5x5_gpu(const DevMem2D& src, const DevMem2D& dst, int green_bits, cudaStream_t stream);
 
         void RGB2Gray_gpu(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);
         void RGB2Gray_gpu(const DevMem2D_<ushort>& src, int srccn, const DevMem2D_<ushort>& dst, int bidx, cudaStream_t stream);
         void RGB2Gray_gpu(const DevMem2Df& src, int srccn, const DevMem2Df& dst, int bidx, cudaStream_t stream);
+        void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream);
     }
 }}
 
@@ -267,18 +269,20 @@ namespace
                           stream);
                 break;
             
-            //case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
-            //case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
-            //    if(dcn <= 0) dcn = 3;
-            //    CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U );
-            //    out.create(sz, CV_MAKETYPE(depth, dcn));
-
-            //    improc::RGB5x52RGB_gpu(src, code == CV_BGR2BGR565 || code == CV_RGB2BGR565 ||
-            //              code == CV_BGRA2BGR565 || code == CV_RGBA2BGR565 ? 6 : 5, out, dcn,
-            //              code == CV_BGR2BGR565 || code == CV_BGR2BGR555 ||
-            //              code == CV_BGRA2BGR565 || code == CV_BGRA2BGR555 ? 0 : 2,
-            //              stream);
-            //    break;
+            case CV_BGR5652BGR: case CV_BGR5552BGR: case CV_BGR5652RGB: case CV_BGR5552RGB:
+            case CV_BGR5652BGRA: case CV_BGR5552BGRA: case CV_BGR5652RGBA: case CV_BGR5552RGBA:
+                if(dcn <= 0) dcn = 3;
+                CV_Assert( (dcn == 3 || dcn == 4) && scn == 2 && depth == CV_8U );
+                out.create(sz, CV_MAKETYPE(depth, dcn));
+
+                improc::RGB5x52RGB_gpu(src, 
+                          code == CV_BGR5652BGR || code == CV_BGR5652RGB ||
+                          code == CV_BGR5652BGRA || code == CV_BGR5652RGBA ? 6 : 5, 
+                          out, dcn,
+                          code == CV_BGR5652BGR || code == CV_BGR5552BGR ||
+                          code == CV_BGR5652BGRA || code == CV_BGR5552BGRA ? 0 : 2,
+                          stream);
+                break;
                         
             case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY:
                 CV_Assert(scn == 3 || scn == 4);
@@ -294,11 +298,13 @@ namespace
                     improc::RGB2Gray_gpu((DevMem2Df)src, scn, (DevMem2Df)out, bidx, stream);
                 break;
             
-            //case CV_BGR5652GRAY: case CV_BGR5552GRAY:
-            //    CV_Assert( scn == 2 && depth == CV_8U );
-            //    dst.create(sz, CV_8UC1);
-            //    CvtColorLoop(src, dst, RGB5x52Gray(code == CV_BGR5652GRAY ? 6 : 5));
-            //    break;
+            case CV_BGR5652GRAY: case CV_BGR5552GRAY:
+                CV_Assert( scn == 2 && depth == CV_8U );
+
+                out.create(sz, CV_8UC1);
+
+                improc::RGB5x52Gray_gpu(src, code == CV_BGR5652GRAY ? 6 : 5, out, stream);
+                break;
             
             case CV_GRAY2BGR: case CV_GRAY2BGRA:
                 if (dcn <= 0) 
@@ -315,12 +321,13 @@ namespace
                     improc::Gray2RGB_gpu((DevMem2Df)src, (DevMem2Df)out, dcn, stream);
                 break;
                 
-            //case CV_GRAY2BGR565: case CV_GRAY2BGR555:
-            //    CV_Assert( scn == 1 && depth == CV_8U );
-            //    dst.create(sz, CV_8UC2);
-            //    
-            //    CvtColorLoop(src, dst, Gray2RGB5x5(code == CV_GRAY2BGR565 ? 6 : 5));
-            //    break;
+            case CV_GRAY2BGR565: case CV_GRAY2BGR555:
+                CV_Assert( scn == 1 && depth == CV_8U );
+
+                out.create(sz, CV_8UC2);
+                
+                improc::Gray2RGB5x5_gpu(src, out, code == CV_GRAY2BGR565 ? 6 : 5, stream);
+                break;
                 
             case CV_RGB2YCrCb:
                 CV_Assert(scn == 3 && depth == CV_8U);
diff --git a/tests/gpu/src/gputest_main.cpp b/tests/gpu/src/gputest_main.cpp
index d4b9b3f5eb..463b2b7509 100644
--- a/tests/gpu/src/gputest_main.cpp
+++ b/tests/gpu/src/gputest_main.cpp
@@ -47,9 +47,9 @@ const char* blacklist[] =
 {    
     "GPU-NppImageSum",              // crash
     "GPU-MatOperatorAsyncCall",     // crash
-    //"GPU-NppErode",                 // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR)
-    //"GPU-NppDilate",                // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR)
-    //"GPU-NppMorphologyEx",          // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR)
+    "GPU-NppErode",                 // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR)
+    "GPU-NppDilate",                // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR)
+    "GPU-NppMorphologyEx",          // npp func returns error code (CUDA_KERNEL_LAUNCH_ERROR or TEXTURE_BIND_ERROR)
     //"GPU-NppImageDivide",           // different round mode
     //"GPU-NppImageMeanStdDev",       // different precision
     //"GPU-NppImageMinNax",           // npp bug
diff --git a/tests/gpu/src/imgproc_gpu.cpp b/tests/gpu/src/imgproc_gpu.cpp
index 0f8a6aed2e..beff852163 100644
--- a/tests/gpu/src/imgproc_gpu.cpp
+++ b/tests/gpu/src/imgproc_gpu.cpp
@@ -498,8 +498,16 @@ void CV_GpuCvtColorTest::run( int )
     try
     {
         //run tests
-        int codes[]             = { CV_BGR2RGB,  /* CV_RGB2YCrCb,   CV_YCrCb2RGB,*/   CV_RGB2RGBA,   CV_RGBA2BGRA,   CV_BGRA2GRAY,   CV_GRAY2RGB,   CV_RGB2BGR555/*,   CV_BGR5552BGR/*, CV_BGR2BGR565, CV_BGR5652RGB*/};
-        const char* codes_str[] = {"CV_BGR2RGB", /*"CV_RGB2YCrCb", "CV_YCrCb2RGB",*/ "CV_RGB2RGBA", "CV_RGBA2BGRA", "CV_BGRA2GRAY", "CV_GRAY2RGB", "CV_RGB2BGR555"/*, "CV_BGR5552BGR"/*, "CV_BGR2BGR565", "CV_BGR5652RGB"*/};
+        int codes[] = { CV_BGR2RGB, CV_RGB2BGRA, CV_BGRA2RGB,
+                        CV_RGB2BGR555, CV_BGR5552BGR, CV_BGR2BGR565, CV_BGR5652RGB, 
+                        /* CV_RGB2YCrCb, CV_YCrCb2RGB,*/  
+                        CV_RGB2GRAY, CV_GRAY2BGRA, CV_BGRA2GRAY,
+                        CV_GRAY2BGR555, CV_BGR5552GRAY, CV_GRAY2BGR565, CV_BGR5652GRAY};
+        const char* codes_str[] = { "CV_BGR2RGB", "CV_RGB2BGRA", "CV_BGRA2RGB",
+                                    "CV_RGB2BGR555", "CV_BGR5552BGR", "CV_BGR2BGR565", "CV_BGR5652RGB", 
+                                    /* "CV_RGB2YCrCb", "CV_YCrCb2RGB",*/  
+                                    "CV_RGB2GRAY", "CV_GRAY2BGRA", "CV_BGRA2GRAY",
+                                    "CV_GRAY2BGR555", "CV_BGR5552GRAY", "CV_GRAY2BGR565", "CV_BGR5652GRAY"};
         int codes_num = sizeof(codes) / sizeof(int);
 
         for (int i = 0; i < codes_num; ++i)
-- 
2.18.0