Commit 3e840cb7 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

added min/max filters to gpu module.

added supports of 4-channels image to gpu::minMax and gpu::cvtColor for RGB <-> YCrCB, RGB <-> YUV and RGB <-> XYZ color conversion.
parent 1cf405d8
...@@ -411,7 +411,7 @@ namespace cv ...@@ -411,7 +411,7 @@ namespace cv
CV_EXPORTS Scalar sum(const GpuMat& m); CV_EXPORTS Scalar sum(const GpuMat& m);
//! finds global minimum and maximum array elements and returns their values //! finds global minimum and maximum array elements and returns their values
//! supports only CV_8UC1 type //! supports CV_8UC1 and CV_8UC4 type
//! disabled until fix npp bug //! disabled until fix npp bug
CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal = 0); CV_EXPORTS void minMax(const GpuMat& src, double* minVal, double* maxVal = 0);
...@@ -649,6 +649,12 @@ namespace cv ...@@ -649,6 +649,12 @@ namespace cv
//! returns the Gaussian filter engine //! returns the Gaussian filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0); CV_EXPORTS Ptr<FilterEngine_GPU> createGaussianFilter_GPU(int type, Size ksize, double sigma1, double sigma2 = 0);
//! returns maximum filter
CV_EXPORTS Ptr<BaseFilter_GPU> getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
//! returns minimum filter
CV_EXPORTS Ptr<BaseFilter_GPU> getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor = Point(-1,-1));
//! smooths the image using the normalized box filter //! smooths the image using the normalized box filter
//! supports CV_8UC1, CV_8UC4 types //! supports CV_8UC1, CV_8UC4 types
CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1)); CV_EXPORTS void boxFilter(const GpuMat& src, GpuMat& dst, int ddepth, Size ksize, Point anchor = Point(-1,-1));
......
...@@ -419,11 +419,10 @@ Scalar cv::gpu::sum(const GpuMat& src) ...@@ -419,11 +419,10 @@ Scalar cv::gpu::sum(const GpuMat& src)
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// minMax // minMax
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal) namespace
{ {
CV_Assert(!"disabled until fix npp bug"); void minMax_c1(const GpuMat& src, double* minVal, double* maxVal)
CV_Assert(src.type() == CV_8UC1); {
NppiSize sz; NppiSize sz;
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
...@@ -437,6 +436,38 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal) ...@@ -437,6 +436,38 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)
if (maxVal) if (maxVal)
*maxVal = max_res; *maxVal = max_res;
}
void minMax_c4(const GpuMat& src, double* minVal, double* maxVal)
{
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
Npp8u* cuMin = nppsMalloc_8u(4);
Npp8u* cuMax = nppsMalloc_8u(4);
nppSafeCall( nppiMinMax_8u_C4R(src.ptr<Npp8u>(), src.step, sz, cuMin, cuMax) );
if (minVal)
cudaMemcpy(minVal, cuMin, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost);
if (maxVal)
cudaMemcpy(maxVal, cuMax, 4 * sizeof(Npp8u), cudaMemcpyDeviceToHost);
nppsFree(cuMin);
nppsFree(cuMax);
}
}
void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal)
{
typedef void (*minMax_t)(const GpuMat& src, double* minVal, double* maxVal);
static const minMax_t minMax_callers[] = {0, minMax_c1, 0, 0, minMax_c4};
CV_Assert(!"disabled until fix npp bug");
CV_Assert(src.type() == CV_8UC1 || src.type() == CV_8UC4);
minMax_callers[src.channels()](src, minVal, maxVal);
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
......
...@@ -625,9 +625,8 @@ namespace imgproc ...@@ -625,9 +625,8 @@ namespace imgproc
template <typename T> struct RGB2YCrCbConverter template <typename T> struct RGB2YCrCbConverter
{ {
typedef typename TypeVec<T, 3>::vec_t dst_t; template <typename D>
static __device__ void cvt(const T* src, D& dst, int bidx)
static __device__ void cvt(const T* src, dst_t& dst, int bidx)
{ {
const int delta = ColorChannel<T>::half() * (1 << yuv_shift); const int delta = ColorChannel<T>::half() * (1 << yuv_shift);
...@@ -642,9 +641,8 @@ namespace imgproc ...@@ -642,9 +641,8 @@ namespace imgproc
}; };
template<> struct RGB2YCrCbConverter<float> template<> struct RGB2YCrCbConverter<float>
{ {
typedef typename TypeVec<float, 3>::vec_t dst_t; template <typename D>
static __device__ void cvt(const float* src, D& dst, int bidx)
static __device__ void cvt(const float* src, dst_t& dst, int bidx)
{ {
dst.x = src[0] * cYCrCbCoeffs_f[0] + src[1] * cYCrCbCoeffs_f[1] + src[2] * cYCrCbCoeffs_f[2]; dst.x = src[0] * cYCrCbCoeffs_f[0] + src[1] * cYCrCbCoeffs_f[1] + src[2] * cYCrCbCoeffs_f[2];
dst.y = (src[bidx^2] - dst.x) * cYCrCbCoeffs_f[3] + ColorChannel<float>::half(); dst.y = (src[bidx^2] - dst.x) * cYCrCbCoeffs_f[3] + ColorChannel<float>::half();
...@@ -652,11 +650,11 @@ namespace imgproc ...@@ -652,11 +650,11 @@ namespace imgproc
} }
}; };
template <int SRCCN, typename T> template <int SRCCN, int DSTCN, typename T>
__global__ void RGB2YCrCb(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx) __global__ void RGB2YCrCb(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
{ {
typedef typename TypeVec<T, SRCCN>::vec_t src_t; typedef typename TypeVec<T, SRCCN>::vec_t src_t;
typedef typename TypeVec<T, 3>::vec_t dst_t; typedef typename TypeVec<T, DSTCN>::vec_t dst_t;
const int x = blockDim.x * blockIdx.x + threadIdx.x; const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y; const int y = blockDim.y * blockIdx.y + threadIdx.y;
...@@ -668,30 +666,28 @@ namespace imgproc ...@@ -668,30 +666,28 @@ namespace imgproc
RGB2YCrCbConverter<T>::cvt(((const T*)(&src)), dst, bidx); RGB2YCrCbConverter<T>::cvt(((const T*)(&src)), dst, bidx);
*(dst_t*)(dst_ + y * dst_step + x * 3 * sizeof(T)) = dst; *(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
} }
} }
template <typename T> struct YCrCb2RGBConvertor template <typename D> struct YCrCb2RGBConvertor
{ {
typedef typename TypeVec<T, 3>::vec_t src_t; template <typename T>
static __device__ void cvt(const T& src, D* dst, int bidx)
static __device__ void cvt(const src_t& src, T* dst, int bidx)
{ {
const int b = src.x + CV_DESCALE((src.z - ColorChannel<T>::half()) * cYCrCbCoeffs_i[3], yuv_shift); const int b = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * cYCrCbCoeffs_i[3], yuv_shift);
const int g = src.x + CV_DESCALE((src.z - ColorChannel<T>::half()) * cYCrCbCoeffs_i[2] + (src.y - ColorChannel<T>::half()) * cYCrCbCoeffs_i[1], yuv_shift); const int g = src.x + CV_DESCALE((src.z - ColorChannel<D>::half()) * cYCrCbCoeffs_i[2] + (src.y - ColorChannel<D>::half()) * cYCrCbCoeffs_i[1], yuv_shift);
const int r = src.x + CV_DESCALE((src.y - ColorChannel<T>::half()) * cYCrCbCoeffs_i[0], yuv_shift); const int r = src.x + CV_DESCALE((src.y - ColorChannel<D>::half()) * cYCrCbCoeffs_i[0], yuv_shift);
dst[bidx] = saturate_cast<T>(b); dst[bidx] = saturate_cast<D>(b);
dst[1] = saturate_cast<T>(g); dst[1] = saturate_cast<D>(g);
dst[bidx^2] = saturate_cast<T>(r); dst[bidx^2] = saturate_cast<D>(r);
} }
}; };
template <> struct YCrCb2RGBConvertor<float> template <> struct YCrCb2RGBConvertor<float>
{ {
typedef typename TypeVec<float, 3>::vec_t src_t; template <typename T>
static __device__ void cvt(const T& src, float* dst, int bidx)
static __device__ void cvt(const src_t& src, float* dst, int bidx)
{ {
dst[bidx] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[3]; dst[bidx] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[3];
dst[1] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[2] + (src.y - ColorChannel<float>::half()) * cYCrCbCoeffs_f[1]; dst[1] = src.x + (src.z - ColorChannel<float>::half()) * cYCrCbCoeffs_f[2] + (src.y - ColorChannel<float>::half()) * cYCrCbCoeffs_f[1];
...@@ -699,10 +695,10 @@ namespace imgproc ...@@ -699,10 +695,10 @@ namespace imgproc
} }
}; };
template <int DSTCN, typename T> template <int SRCCN, int DSTCN, typename T>
__global__ void YCrCb2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx) __global__ void YCrCb2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols, int bidx)
{ {
typedef typename TypeVec<T, 3>::vec_t src_t; typedef typename TypeVec<T, SRCCN>::vec_t src_t;
typedef typename TypeVec<T, DSTCN>::vec_t dst_t; typedef typename TypeVec<T, DSTCN>::vec_t dst_t;
const int x = blockDim.x * blockIdx.x + threadIdx.x; const int x = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -710,7 +706,7 @@ namespace imgproc ...@@ -710,7 +706,7 @@ namespace imgproc
if (y < rows && x < cols) if (y < rows && x < cols)
{ {
src_t src = *(const src_t*)(src_ + y * src_step + x * 3 * sizeof(T)); src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T));
dst_t dst; dst_t dst;
YCrCb2RGBConvertor<T>::cvt(src, ((T*)(&dst)), bidx); YCrCb2RGBConvertor<T>::cvt(src, ((T*)(&dst)), bidx);
...@@ -723,7 +719,7 @@ namespace imgproc ...@@ -723,7 +719,7 @@ namespace imgproc
namespace cv { namespace gpu { namespace improc namespace cv { namespace gpu { namespace improc
{ {
template <typename T, int SRCCN> template <typename T, int SRCCN, int DSTCN>
void RGB2YCrCb_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream) void RGB2YCrCb_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
...@@ -732,53 +728,56 @@ namespace cv { namespace gpu { namespace improc ...@@ -732,53 +728,56 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x); grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y); grid.y = divUp(src.rows, threads.y);
imgproc::RGB2YCrCb<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, imgproc::RGB2YCrCb<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx); dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
} }
void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream) void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream)
{ {
typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream); typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2] = static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2][2] =
{ {
RGB2YCrCb_caller<uchar, 3>, RGB2YCrCb_caller<uchar, 4> {RGB2YCrCb_caller<uchar, 3, 3>, RGB2YCrCb_caller<uchar, 3, 4>},
{RGB2YCrCb_caller<uchar, 4, 3>, RGB2YCrCb_caller<uchar, 4, 4>}
}; };
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
RGB2YCrCb_callers[srccn-3](src, dst, bidx, stream); RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
} }
void RGB2YCrCb_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream) void RGB2YCrCb_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream)
{ {
typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream); typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2] = static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2][2] =
{ {
RGB2YCrCb_caller<unsigned short, 3>, RGB2YCrCb_caller<unsigned short, 4> {RGB2YCrCb_caller<unsigned short, 3, 3>, RGB2YCrCb_caller<unsigned short, 3, 4>},
{RGB2YCrCb_caller<unsigned short, 4, 3>, RGB2YCrCb_caller<unsigned short, 4, 4>}
}; };
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 5 * sizeof(int)) );
RGB2YCrCb_callers[srccn-3](src, dst, bidx, stream); RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
} }
void RGB2YCrCb_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const float* coeffs, cudaStream_t stream) void RGB2YCrCb_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream)
{ {
typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream); typedef void (*RGB2YCrCb_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2] = static const RGB2YCrCb_caller_t RGB2YCrCb_callers[2][2] =
{ {
RGB2YCrCb_caller<float, 3>, RGB2YCrCb_caller<float, 4> {RGB2YCrCb_caller<float, 3, 3>, RGB2YCrCb_caller<float, 3, 4>},
{RGB2YCrCb_caller<float, 4, 3>, RGB2YCrCb_caller<float, 4, 4>}
}; };
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 5 * sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 5 * sizeof(float)) );
RGB2YCrCb_callers[srccn-3](src, dst, bidx, stream); RGB2YCrCb_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
} }
template <typename T, int DSTCN> template <typename T, int SRCCN, int DSTCN>
void YCrCb2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream) void YCrCb2RGB_caller(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
...@@ -787,50 +786,53 @@ namespace cv { namespace gpu { namespace improc ...@@ -787,50 +786,53 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x); grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y); grid.y = divUp(src.rows, threads.y);
imgproc::YCrCb2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, imgproc::YCrCb2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols, bidx); dst.ptr, dst.step, src.rows, src.cols, bidx);
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
} }
void YCrCb2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream) void YCrCb2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream)
{ {
typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream); typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2] = static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2][2] =
{ {
YCrCb2RGB_caller<uchar, 3>, YCrCb2RGB_caller<uchar, 4> {YCrCb2RGB_caller<uchar, 3, 3>, YCrCb2RGB_caller<uchar, 3, 4>},
{YCrCb2RGB_caller<uchar, 4, 3>, YCrCb2RGB_caller<uchar, 4, 4>}
}; };
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
YCrCb2RGB_callers[dstcn-3](src, dst, bidx, stream); YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
} }
void YCrCb2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream) void YCrCb2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream)
{ {
typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream); typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2] = static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2][2] =
{ {
YCrCb2RGB_caller<unsigned short, 3>, YCrCb2RGB_caller<unsigned short, 4> {YCrCb2RGB_caller<unsigned short, 3, 3>, YCrCb2RGB_caller<unsigned short, 3, 4>},
{YCrCb2RGB_caller<unsigned short, 4, 3>, YCrCb2RGB_caller<unsigned short, 4, 4>}
}; };
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_i, coeffs, 4 * sizeof(int)) );
YCrCb2RGB_callers[dstcn-3](src, dst, bidx, stream); YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
} }
void YCrCb2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream) void YCrCb2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream)
{ {
typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream); typedef void (*YCrCb2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, int bidx, cudaStream_t stream);
static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2] = static const YCrCb2RGB_caller_t YCrCb2RGB_callers[2][2] =
{ {
YCrCb2RGB_caller<float, 3>, YCrCb2RGB_caller<float, 4> {YCrCb2RGB_caller<float, 3, 3>, YCrCb2RGB_caller<float, 3, 4>},
{YCrCb2RGB_caller<float, 4, 3>, YCrCb2RGB_caller<float, 4, 4>}
}; };
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 4 * sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cYCrCbCoeffs_f, coeffs, 4 * sizeof(float)) );
YCrCb2RGB_callers[dstcn-3](src, dst, bidx, stream); YCrCb2RGB_callers[srccn-3][dstcn-3](src, dst, bidx, stream);
} }
}}} }}}
...@@ -843,38 +845,30 @@ namespace imgproc ...@@ -843,38 +845,30 @@ namespace imgproc
template <typename T> struct RGB2XYZConvertor template <typename T> struct RGB2XYZConvertor
{ {
typedef typename TypeVec<T, 3>::vec_t dst_t; template <typename D>
static __device__ dst_t cvt(const T* src) static __device__ void cvt(const T* src, D& dst)
{ {
dst_t dst;
dst.x = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[0] + src[1] * cXYZ_D65i[1] + src[2] * cXYZ_D65i[2], xyz_shift)); dst.x = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[0] + src[1] * cXYZ_D65i[1] + src[2] * cXYZ_D65i[2], xyz_shift));
dst.y = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[3] + src[1] * cXYZ_D65i[4] + src[2] * cXYZ_D65i[5], xyz_shift)); dst.y = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[3] + src[1] * cXYZ_D65i[4] + src[2] * cXYZ_D65i[5], xyz_shift));
dst.z = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[6] + src[1] * cXYZ_D65i[7] + src[2] * cXYZ_D65i[8], xyz_shift)); dst.z = saturate_cast<T>(CV_DESCALE(src[0] * cXYZ_D65i[6] + src[1] * cXYZ_D65i[7] + src[2] * cXYZ_D65i[8], xyz_shift));
return dst;
} }
}; };
template <> struct RGB2XYZConvertor<float> template <> struct RGB2XYZConvertor<float>
{ {
typedef typename TypeVec<float, 3>::vec_t dst_t; template <typename D>
static __device__ dst_t cvt(const float* src) static __device__ void cvt(const float* src, D& dst)
{ {
dst_t dst;
dst.x = src[0] * cXYZ_D65f[0] + src[1] * cXYZ_D65f[1] + src[2] * cXYZ_D65f[2]; dst.x = src[0] * cXYZ_D65f[0] + src[1] * cXYZ_D65f[1] + src[2] * cXYZ_D65f[2];
dst.y = src[0] * cXYZ_D65f[3] + src[1] * cXYZ_D65f[4] + src[2] * cXYZ_D65f[5]; dst.y = src[0] * cXYZ_D65f[3] + src[1] * cXYZ_D65f[4] + src[2] * cXYZ_D65f[5];
dst.z = src[0] * cXYZ_D65f[6] + src[1] * cXYZ_D65f[7] + src[2] * cXYZ_D65f[8]; dst.z = src[0] * cXYZ_D65f[6] + src[1] * cXYZ_D65f[7] + src[2] * cXYZ_D65f[8];
return dst;
} }
}; };
template <int SRCCN, typename T> template <int SRCCN, int DSTCN, typename T>
__global__ void RGB2XYZ(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols) __global__ void RGB2XYZ(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)
{ {
typedef typename TypeVec<T, SRCCN>::vec_t src_t; typedef typename TypeVec<T, SRCCN>::vec_t src_t;
typedef typename TypeVec<T, 3>::vec_t dst_t; typedef typename TypeVec<T, DSTCN>::vec_t dst_t;
const int x = blockDim.x * blockIdx.x + threadIdx.x; const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y; const int y = blockDim.y * blockIdx.y + threadIdx.y;
...@@ -883,24 +877,27 @@ namespace imgproc ...@@ -883,24 +877,27 @@ namespace imgproc
{ {
src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T)); src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T));
*(dst_t*)(dst_ + y * dst_step + x * 3 * sizeof(T)) = RGB2XYZConvertor<T>::cvt((const T*)(&src)); dst_t dst;
RGB2XYZConvertor<T>::cvt((const T*)(&src), dst);
*(dst_t*)(dst_ + y * dst_step + x * DSTCN * sizeof(T)) = dst;
} }
} }
template <typename T> struct XYZ2RGBConvertor template <typename D> struct XYZ2RGBConvertor
{ {
typedef typename TypeVec<T, 3>::vec_t src_t; template <typename T>
static __device__ void cvt(const src_t& src, T* dst) static __device__ void cvt(const T& src, D* dst)
{ {
dst[0] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[0] + src.y * cXYZ_D65i[1] + src.z * cXYZ_D65i[2], xyz_shift)); dst[0] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[0] + src.y * cXYZ_D65i[1] + src.z * cXYZ_D65i[2], xyz_shift));
dst[1] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift)); dst[1] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[3] + src.y * cXYZ_D65i[4] + src.z * cXYZ_D65i[5], xyz_shift));
dst[2] = saturate_cast<T>(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift)); dst[2] = saturate_cast<D>(CV_DESCALE(src.x * cXYZ_D65i[6] + src.y * cXYZ_D65i[7] + src.z * cXYZ_D65i[8], xyz_shift));
} }
}; };
template <> struct XYZ2RGBConvertor<float> template <> struct XYZ2RGBConvertor<float>
{ {
typedef typename TypeVec<float, 3>::vec_t src_t; template <typename T>
static __device__ void cvt(const src_t& src, float* dst) static __device__ void cvt(const T& src, float* dst)
{ {
dst[0] = src.x * cXYZ_D65f[0] + src.y * cXYZ_D65f[1] + src.z * cXYZ_D65f[2]; dst[0] = src.x * cXYZ_D65f[0] + src.y * cXYZ_D65f[1] + src.z * cXYZ_D65f[2];
dst[1] = src.x * cXYZ_D65f[3] + src.y * cXYZ_D65f[4] + src.z * cXYZ_D65f[5]; dst[1] = src.x * cXYZ_D65f[3] + src.y * cXYZ_D65f[4] + src.z * cXYZ_D65f[5];
...@@ -908,10 +905,10 @@ namespace imgproc ...@@ -908,10 +905,10 @@ namespace imgproc
} }
}; };
template <int DSTCN, typename T> template <int SRCCN, int DSTCN, typename T>
__global__ void XYZ2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols) __global__ void XYZ2RGB(const uchar* src_, size_t src_step, uchar* dst_, size_t dst_step, int rows, int cols)
{ {
typedef typename TypeVec<T, 3>::vec_t src_t; typedef typename TypeVec<T, SRCCN>::vec_t src_t;
typedef typename TypeVec<T, DSTCN>::vec_t dst_t; typedef typename TypeVec<T, DSTCN>::vec_t dst_t;
const int x = blockDim.x * blockIdx.x + threadIdx.x; const int x = blockDim.x * blockIdx.x + threadIdx.x;
...@@ -919,7 +916,7 @@ namespace imgproc ...@@ -919,7 +916,7 @@ namespace imgproc
if (y < rows && x < cols) if (y < rows && x < cols)
{ {
src_t src = *(const src_t*)(src_ + y * src_step + x * 3 * sizeof(T)); src_t src = *(const src_t*)(src_ + y * src_step + x * SRCCN * sizeof(T));
dst_t dst; dst_t dst;
XYZ2RGBConvertor<T>::cvt(src, (T*)(&dst)); XYZ2RGBConvertor<T>::cvt(src, (T*)(&dst));
...@@ -932,7 +929,7 @@ namespace imgproc ...@@ -932,7 +929,7 @@ namespace imgproc
namespace cv { namespace gpu { namespace improc namespace cv { namespace gpu { namespace improc
{ {
template <typename T, int SRCCN> template <typename T, int SRCCN, int DSTCN>
void RGB2XYZ_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream) void RGB2XYZ_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
...@@ -941,44 +938,56 @@ namespace cv { namespace gpu { namespace improc ...@@ -941,44 +938,56 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x); grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y); grid.y = divUp(src.rows, threads.y);
imgproc::RGB2XYZ<SRCCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, imgproc::RGB2XYZ<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols); dst.ptr, dst.step, src.rows, src.cols);
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
} }
void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream) void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)
{ {
typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<uchar, 3>, RGB2XYZ_caller<uchar, 4>}; static const RGB2XYZ_caller_t RGB2XYZ_callers[2][2] =
{
{RGB2XYZ_caller<uchar, 3, 3>, RGB2XYZ_caller<uchar, 3, 4>},
{RGB2XYZ_caller<uchar, 4, 3>, RGB2XYZ_caller<uchar, 4, 4>}
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
RGB2XYZ_callers[srccn-3](src, dst, stream); RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
} }
void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream) void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)
{ {
typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<unsigned short, 3>, RGB2XYZ_caller<unsigned short, 4>}; static const RGB2XYZ_caller_t RGB2XYZ_callers[2][2] =
{
{RGB2XYZ_caller<unsigned short, 3, 3>, RGB2XYZ_caller<unsigned short, 3, 4>},
{RGB2XYZ_caller<unsigned short, 4, 3>, RGB2XYZ_caller<unsigned short, 4, 4>}
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
RGB2XYZ_callers[srccn-3](src, dst, stream); RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
} }
void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, const float* coeffs, cudaStream_t stream) void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream)
{ {
typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); typedef void (*RGB2XYZ_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
static const RGB2XYZ_caller_t RGB2XYZ_callers[] = {RGB2XYZ_caller<float, 3>, RGB2XYZ_caller<float, 4>}; static const RGB2XYZ_caller_t RGB2XYZ_callers[2][2] =
{
{RGB2XYZ_caller<float, 3, 3>, RGB2XYZ_caller<float, 3, 4>},
{RGB2XYZ_caller<float, 4, 3>, RGB2XYZ_caller<float, 4, 4>}
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
RGB2XYZ_callers[srccn-3](src, dst, stream); RGB2XYZ_callers[srccn-3][dstcn-3](src, dst, stream);
} }
template <typename T, int DSTCN> template <typename T, int SRCCN, int DSTCN>
void XYZ2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream) void XYZ2RGB_caller(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream)
{ {
dim3 threads(32, 8, 1); dim3 threads(32, 8, 1);
...@@ -987,41 +996,53 @@ namespace cv { namespace gpu { namespace improc ...@@ -987,41 +996,53 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x); grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y); grid.y = divUp(src.rows, threads.y);
imgproc::XYZ2RGB<DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step, imgproc::XYZ2RGB<SRCCN, DSTCN, T><<<grid, threads, 0, stream>>>(src.ptr, src.step,
dst.ptr, dst.step, src.rows, src.cols); dst.ptr, dst.step, src.rows, src.cols);
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() ); cudaSafeCall( cudaThreadSynchronize() );
} }
void XYZ2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream) void XYZ2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)
{ {
typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<uchar, 3>, XYZ2RGB_caller<uchar, 4>}; static const XYZ2RGB_caller_t XYZ2RGB_callers[2][2] =
{
{XYZ2RGB_caller<uchar, 3, 3>, XYZ2RGB_caller<uchar, 3, 4>},
{XYZ2RGB_caller<uchar, 4, 3>, XYZ2RGB_caller<uchar, 4, 4>}
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
XYZ2RGB_callers[dstcn-3](src, dst, stream); XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
} }
void XYZ2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream) void XYZ2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream)
{ {
typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<unsigned short, 3>, XYZ2RGB_caller<unsigned short, 4>}; static const XYZ2RGB_caller_t XYZ2RGB_callers[2][2] =
{
{XYZ2RGB_caller<unsigned short, 3, 3>, XYZ2RGB_caller<unsigned short, 3, 4>},
{XYZ2RGB_caller<unsigned short, 4, 3>, XYZ2RGB_caller<unsigned short, 4, 4>}
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) ); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65i, coeffs, 9 * sizeof(int)) );
XYZ2RGB_callers[dstcn-3](src, dst, stream); XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
} }
void XYZ2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream) void XYZ2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream)
{ {
typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); typedef void (*XYZ2RGB_caller_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
static const XYZ2RGB_caller_t XYZ2RGB_callers[] = {XYZ2RGB_caller<float, 3>, XYZ2RGB_caller<float, 4>}; static const XYZ2RGB_caller_t XYZ2RGB_callers[2][2] =
{
{XYZ2RGB_caller<float, 3, 3>, XYZ2RGB_caller<float, 3, 4>},
{XYZ2RGB_caller<float, 4, 3>, XYZ2RGB_caller<float, 4, 4>}
};
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) ); cudaSafeCall( cudaMemcpyToSymbol(imgproc::cXYZ_D65f, coeffs, 9 * sizeof(float)) );
XYZ2RGB_callers[dstcn-3](src, dst, stream); XYZ2RGB_callers[srccn-3][dstcn-3](src, dst, stream);
} }
}}} }}}
......
...@@ -63,6 +63,9 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Gpu ...@@ -63,6 +63,9 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int, int, const Gpu
Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); } Ptr<FilterEngine_GPU> cv::gpu::createSeparableLinearFilter_GPU(int, int, const Mat&, const Mat&, const Point&) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int, int, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); } Ptr<FilterEngine_GPU> cv::gpu::createDerivFilter_GPU(int, int, int, int, int) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int, Size, double, double) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); } Ptr<FilterEngine_GPU> cv::gpu::createGaussianFilter_GPU(int, Size, double, double) { throw_nogpu(); return Ptr<FilterEngine_GPU>(0); }
Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
Ptr<BaseFilter_GPU> cv::gpu::getMinFilter_GPU(int, int, const Size&, Point) { throw_nogpu(); return Ptr<BaseFilter_GPU>(0); }
void cv::gpu::boxFilter(const GpuMat&, GpuMat&, int, Size, Point) { throw_nogpu(); } void cv::gpu::boxFilter(const GpuMat&, GpuMat&, int, Size, Point) { throw_nogpu(); }
void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); } void cv::gpu::erode( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }
void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); } void cv::gpu::dilate( const GpuMat&, GpuMat&, const Mat&, Point, int) { throw_nogpu(); }
...@@ -105,20 +108,20 @@ namespace ...@@ -105,20 +108,20 @@ namespace
int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1; int scale = nDivisor && (kernel.depth() == CV_32F || kernel.depth() == CV_64F) ? 256 : 1;
if (nDivisor) *nDivisor = scale; if (nDivisor) *nDivisor = scale;
Mat cont_krnl = (kernel.isContinuous() ? kernel : kernel.clone()).reshape(1, 1); Mat temp(kernel.size(), type);
Mat temp; kernel.convertTo(temp, type, scale);
cont_krnl.convertTo(temp, type, scale); Mat cont_krnl = temp.reshape(1, 1);
if (reverse) if (reverse)
{ {
int count = temp.cols >> 1; int count = cont_krnl.cols >> 1;
for (int i = 0; i < count; ++i) for (int i = 0; i < count; ++i)
{ {
std::swap(temp.at<int>(0, i), temp.at<int>(0, temp.cols - 1 - i)); std::swap(cont_krnl.at<int>(0, i), cont_krnl.at<int>(0, cont_krnl.cols - 1 - i));
} }
} }
gpu_krnl.upload(temp); gpu_krnl.upload(cont_krnl);
} }
} }
...@@ -785,4 +788,58 @@ void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double si ...@@ -785,4 +788,58 @@ void cv::gpu::GaussianBlur(const GpuMat& src, GpuMat& dst, Size ksize, double si
f->apply(src, dst); f->apply(src, dst);
} }
////////////////////////////////////////////////////////////////////////////////////////////////////
// Image Rank Filter
namespace
{
typedef NppStatus (*nppFilterRank_t)(const Npp8u * pSrc, Npp32s nSrcStep, Npp8u * pDst, Npp32s nDstStep, NppiSize oSizeROI,
NppiSize oMaskSize, NppiPoint oAnchor);
class NPPRankFilter : public BaseFilter_GPU
{
public:
NPPRankFilter(const Size& ksize_, const Point& anchor_, nppFilterRank_t func_) : BaseFilter_GPU(ksize_, anchor_), func(func_) {}
virtual void operator()(const GpuMat& src, GpuMat& dst)
{
NppiSize sz;
sz.width = src.cols;
sz.height = src.rows;
NppiSize oKernelSize;
oKernelSize.height = ksize.height;
oKernelSize.width = ksize.width;
NppiPoint oAnchor;
oAnchor.x = anchor.x;
oAnchor.y = anchor.y;
nppSafeCall( func(src.ptr<Npp8u>(), src.step, dst.ptr<Npp8u>(), dst.step, sz, oKernelSize, oAnchor) );
}
nppFilterRank_t func;
};
}
Ptr<BaseFilter_GPU> cv::gpu::getMaxFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor)
{
static const nppFilterRank_t nppFilterRank_callers[] = {0, nppiFilterMax_8u_C1R, 0, 0, nppiFilterMax_8u_C4R};
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType);
normalizeAnchor(anchor, ksize);
return Ptr<BaseFilter_GPU>(new NPPRankFilter(ksize, anchor, nppFilterRank_callers[CV_MAT_CN(srcType)]));
}
Ptr<BaseFilter_GPU> cv::gpu::getMinFilter_GPU(int srcType, int dstType, const Size& ksize, Point anchor)
{
static const nppFilterRank_t nppFilterRank_callers[] = {0, nppiFilterMin_8u_C1R, 0, 0, nppiFilterMin_8u_C4R};
CV_Assert((srcType == CV_8UC1 || srcType == CV_8UC4) && dstType == srcType);
normalizeAnchor(anchor, ksize);
return Ptr<BaseFilter_GPU>(new NPPRankFilter(ksize, anchor, nppFilterRank_callers[CV_MAT_CN(srcType)]));
}
#endif #endif
...@@ -98,21 +98,21 @@ namespace cv { namespace gpu ...@@ -98,21 +98,21 @@ namespace cv { namespace gpu
void RGB2Gray_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream); void RGB2Gray_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);
void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream); void RGB5x52Gray_gpu(const DevMem2D& src, int green_bits, const DevMem2D& dst, cudaStream_t stream);
void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream); void RGB2YCrCb_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);
void RGB2YCrCb_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const int* coeffs, cudaStream_t stream); void RGB2YCrCb_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);
void RGB2YCrCb_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, const float* coeffs, cudaStream_t stream); void RGB2YCrCb_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream);
void YCrCb2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream); void YCrCb2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);
void YCrCb2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream); void YCrCb2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const int* coeffs, cudaStream_t stream);
void YCrCb2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream); void YCrCb2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, const float* coeffs, cudaStream_t stream);
void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream); void RGB2XYZ_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);
void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, const int* coeffs, cudaStream_t stream); void RGB2XYZ_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);
void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, const float* coeffs, cudaStream_t stream); void RGB2XYZ_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream);
void XYZ2RGB_gpu_8u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream); void XYZ2RGB_gpu_8u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);
void XYZ2RGB_gpu_16u(const DevMem2D& src, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream); void XYZ2RGB_gpu_16u(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const int* coeffs, cudaStream_t stream);
void XYZ2RGB_gpu_32f(const DevMem2D& src, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream); void XYZ2RGB_gpu_32f(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, const float* coeffs, cudaStream_t stream);
} }
}} }}
...@@ -361,7 +361,8 @@ namespace ...@@ -361,7 +361,8 @@ namespace
case CV_BGR2YCrCb: case CV_RGB2YCrCb: case CV_BGR2YCrCb: case CV_RGB2YCrCb:
case CV_BGR2YUV: case CV_RGB2YUV: case CV_BGR2YUV: case CV_RGB2YUV:
{ {
CV_Assert( scn == 3 || scn == 4 ); if(dcn <= 0) dcn = 3;
CV_Assert( (scn == 3 || scn == 4) && (dcn == 3 || dcn == 4) );
bidx = code == CV_BGR2YCrCb || code == CV_RGB2YUV ? 0 : 2; bidx = code == CV_BGR2YCrCb || code == CV_RGB2YUV ? 0 : 2;
...@@ -382,14 +383,14 @@ namespace ...@@ -382,14 +383,14 @@ namespace
std::swap(coeffs_i[0], coeffs_i[2]); std::swap(coeffs_i[0], coeffs_i[2]);
} }
out.create(sz, CV_MAKETYPE(depth, 3)); out.create(sz, CV_MAKETYPE(depth, dcn));
if( depth == CV_8U ) if( depth == CV_8U )
improc::RGB2YCrCb_gpu_8u(src, scn, out, bidx, coeffs_i, stream); improc::RGB2YCrCb_gpu_8u(src, scn, out, dcn, bidx, coeffs_i, stream);
else if( depth == CV_16U ) else if( depth == CV_16U )
improc::RGB2YCrCb_gpu_16u(src, scn, out, bidx, coeffs_i, stream); improc::RGB2YCrCb_gpu_16u(src, scn, out, dcn, bidx, coeffs_i, stream);
else else
improc::RGB2YCrCb_gpu_32f(src, scn, out, bidx, coeffs_f, stream); improc::RGB2YCrCb_gpu_32f(src, scn, out, dcn, bidx, coeffs_f, stream);
} }
break; break;
...@@ -398,7 +399,7 @@ namespace ...@@ -398,7 +399,7 @@ namespace
{ {
if (dcn <= 0) dcn = 3; if (dcn <= 0) dcn = 3;
CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) ); CV_Assert( (scn == 3 || scn == 4) && (dcn == 3 || dcn == 4) );
bidx = code == CV_YCrCb2BGR || code == CV_YUV2RGB ? 0 : 2; bidx = code == CV_YCrCb2BGR || code == CV_YUV2RGB ? 0 : 2;
...@@ -414,17 +415,18 @@ namespace ...@@ -414,17 +415,18 @@ namespace
out.create(sz, CV_MAKETYPE(depth, dcn)); out.create(sz, CV_MAKETYPE(depth, dcn));
if( depth == CV_8U ) if( depth == CV_8U )
improc::YCrCb2RGB_gpu_8u(src, out, dcn, bidx, coeffs_i, stream); improc::YCrCb2RGB_gpu_8u(src, scn, out, dcn, bidx, coeffs_i, stream);
else if( depth == CV_16U ) else if( depth == CV_16U )
improc::YCrCb2RGB_gpu_16u(src, out, dcn, bidx, coeffs_i, stream); improc::YCrCb2RGB_gpu_16u(src, scn, out, dcn, bidx, coeffs_i, stream);
else else
improc::YCrCb2RGB_gpu_32f(src, out, dcn, bidx, coeffs_f, stream); improc::YCrCb2RGB_gpu_32f(src, scn, out, dcn, bidx, coeffs_f, stream);
} }
break; break;
case CV_BGR2XYZ: case CV_RGB2XYZ: case CV_BGR2XYZ: case CV_RGB2XYZ:
{ {
CV_Assert( scn == 3 || scn == 4 ); if(dcn <= 0) dcn = 3;
CV_Assert( (scn == 3 || scn == 4) && (dcn == 3 || dcn == 4) );
bidx = code == CV_BGR2XYZ ? 0 : 2; bidx = code == CV_BGR2XYZ ? 0 : 2;
...@@ -457,21 +459,21 @@ namespace ...@@ -457,21 +459,21 @@ namespace
std::swap(coeffs_i[6], coeffs_i[8]); std::swap(coeffs_i[6], coeffs_i[8]);
} }
out.create(sz, CV_MAKETYPE(depth, 3)); out.create(sz, CV_MAKETYPE(depth, dcn));
if( depth == CV_8U ) if( depth == CV_8U )
improc::RGB2XYZ_gpu_8u(src, scn, out, coeffs_i, stream); improc::RGB2XYZ_gpu_8u(src, scn, out, dcn, coeffs_i, stream);
else if( depth == CV_16U ) else if( depth == CV_16U )
improc::RGB2XYZ_gpu_16u(src, scn, out, coeffs_i, stream); improc::RGB2XYZ_gpu_16u(src, scn, out, dcn, coeffs_i, stream);
else else
improc::RGB2XYZ_gpu_32f(src, scn, out, coeffs_f, stream); improc::RGB2XYZ_gpu_32f(src, scn, out, dcn, coeffs_f, stream);
} }
break; break;
case CV_XYZ2BGR: case CV_XYZ2RGB: case CV_XYZ2BGR: case CV_XYZ2RGB:
{ {
if (dcn <= 0) dcn = 3; if (dcn <= 0) dcn = 3;
CV_Assert( scn == 3 && (dcn == 3 || dcn == 4) ); CV_Assert( (scn == 3 || scn == 4) && (dcn == 3 || dcn == 4) );
bidx = code == CV_XYZ2BGR ? 0 : 2; bidx = code == CV_XYZ2BGR ? 0 : 2;
static const float XYZ2sRGB_D65f[] = static const float XYZ2sRGB_D65f[] =
...@@ -506,11 +508,11 @@ namespace ...@@ -506,11 +508,11 @@ namespace
out.create(sz, CV_MAKETYPE(depth, dcn)); out.create(sz, CV_MAKETYPE(depth, dcn));
if( depth == CV_8U ) if( depth == CV_8U )
improc::XYZ2RGB_gpu_8u(src, out, dcn, coeffs_i, stream); improc::XYZ2RGB_gpu_8u(src, scn, out, dcn, coeffs_i, stream);
else if( depth == CV_16U ) else if( depth == CV_16U )
improc::XYZ2RGB_gpu_16u(src, out, dcn, coeffs_i, stream); improc::XYZ2RGB_gpu_16u(src, scn, out, dcn, coeffs_i, stream);
else else
improc::XYZ2RGB_gpu_32f(src, out, dcn, coeffs_f, stream); improc::XYZ2RGB_gpu_32f(src, scn, out, dcn, coeffs_f, stream);
} }
break; break;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment