Commit b08f6082 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

added to gpu module linear filters for int and float source types.

refactored gpu module.
parent ea040ce7
......@@ -50,7 +50,7 @@ namespace cv
// Simple lightweight structure that encapsulates image ptr on device, its pitch and its sizes.
// It is intended to pass to nvcc-compiled code. GpuMat depends on headers that nvcc can't compile
template<typename T = unsigned char>
template <typename T>
struct DevMem2D_
{
typedef T elem_t;
......@@ -60,16 +60,21 @@ namespace cv
int rows;
T* ptr;
size_t step;
size_t elem_step;
DevMem2D_() : cols(0), rows(0), ptr(0), step(0) {}
DevMem2D_() : cols(0), rows(0), ptr(0), step(0), elem_step(0) {}
DevMem2D_(int rows_, int cols_, T *ptr_, size_t step_)
: cols(cols_), rows(rows_), ptr(ptr_), step(step_) {}
: cols(cols_), rows(rows_), ptr(ptr_), step(step_), elem_step(step_ / sizeof(T)) {}
template <typename U>
explicit DevMem2D_(const DevMem2D_<U>& d)
: cols(d.cols), rows(d.rows), ptr((T*)d.ptr), step(d.step), elem_step(d.step / sizeof(T)) {}
size_t elemSize() const { return elem_size; }
};
typedef DevMem2D_<> DevMem2D;
typedef DevMem2D_<unsigned char> DevMem2D;
typedef DevMem2D_<float> DevMem2Df;
typedef DevMem2D_<int> DevMem2Di;
}
......
......@@ -636,7 +636,7 @@ namespace cv
//! returns the separable filter engine with the specified filters
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableFilter_GPU(const Ptr<BaseRowFilter_GPU>& rowFilter,
const Ptr<BaseColumnFilter_GPU>& columnFilter, bool rowFilterFirst = true);
const Ptr<BaseColumnFilter_GPU>& columnFilter);
//! returns horizontal 1D box filter
//! supports only CV_8UC1 source type and CV_32FC1 sum type
......@@ -658,7 +658,7 @@ namespace cv
//! only MORPH_ERODE and MORPH_DILATE are supported
//! supports CV_8UC1 and CV_8UC4 types
//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const GpuMat& kernel, const Size& ksize,
CV_EXPORTS Ptr<BaseFilter_GPU> getMorphologyFilter_GPU(int op, int type, const Mat& kernel, const Size& ksize,
Point anchor=Point(-1,-1));
//! returns morphological filter engine. Only MORPH_ERODE and MORPH_DILATE are supported.
......@@ -667,25 +667,24 @@ namespace cv
//! returns 2D filter with the specified kernel
//! supports CV_8UC1 and CV_8UC4 types
//! kernel must have CV_8UC1 type, one rows and cols == ksize.width * ksize.height
CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const GpuMat& kernel, const Size& ksize,
Point anchor = Point(-1, -1), int nDivisor = 1);
CV_EXPORTS Ptr<BaseFilter_GPU> getLinearFilter_GPU(int srcType, int dstType, const Mat& kernel, const Size& ksize,
Point anchor = Point(-1, -1));
//! returns the non-separable linear filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createLinearFilter_GPU(int srcType, int dstType, const Mat& kernel,
const Point& anchor = Point(-1,-1));
//! returns the primitive row filter with the specified kernel
CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const GpuMat& rowKernel,
int anchor = -1, int nDivisor = 1);
CV_EXPORTS Ptr<BaseRowFilter_GPU> getLinearRowFilter_GPU(int srcType, int bufType, const Mat& rowKernel,
int anchor = -1);
//! returns the primitive column filter with the specified kernel
CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const GpuMat& columnKernel,
int anchor = -1, int nDivisor = 1);
CV_EXPORTS Ptr<BaseColumnFilter_GPU> getLinearColumnFilter_GPU(int bufType, int dstType, const Mat& columnKernel,
int anchor = -1);
//! returns the separable linear filter engine
CV_EXPORTS Ptr<FilterEngine_GPU> createSeparableLinearFilter_GPU(int srcType, int dstType, const Mat& rowKernel,
const Mat& columnKernel, const Point& anchor = Point(-1,-1), bool rowFilterFirst = true);
const Mat& columnKernel, const Point& anchor = Point(-1,-1));
//! returns filter engine for the generalized Sobel operator
CV_EXPORTS Ptr<FilterEngine_GPU> createDerivFilter_GPU(int srcType, int dstType, int dx, int dy, int ksize);
......@@ -720,7 +719,7 @@ namespace cv
//! applies separable 2D linear filter to the image
CV_EXPORTS void sepFilter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& kernelX, const Mat& kernelY,
Point anchor = Point(-1,-1), bool rowFilterFirst = true);
Point anchor = Point(-1,-1));
//! applies generalized Sobel operator to the image
CV_EXPORTS void Sobel(const GpuMat& src, GpuMat& dst, int ddepth, int dx, int dy, int ksize = 3, double scale = 1);
......
......@@ -316,9 +316,9 @@ void cv::gpu::absdiff(const GpuMat& src, const Scalar& s, GpuMat& dst)
////////////////////////////////////////////////////////////////////////
// compare
namespace cv { namespace gpu { namespace matrix_operations
namespace cv { namespace gpu { namespace mathfunc
{
void compare_ne_8u(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst);
}}}
......@@ -346,7 +346,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
}
else
{
matrix_operations::compare_ne_8u(src1, src2, dst);
mathfunc::compare_ne_8uc4(src1, src2, dst);
}
}
else
......@@ -359,7 +359,7 @@ void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int c
}
else
{
matrix_operations::compare_ne_32f(src1, src2, dst);
mathfunc::compare_ne_32f(src1, src2, dst);
}
}
}
......
This diff is collapsed.
......@@ -54,20 +54,18 @@ using namespace cv::gpu;
#define SHRT_MAX 32767
#endif
template <typename T>
struct TypeLimits {};
template <>
struct TypeLimits<short>
{
static __device__ short max() {return SHRT_MAX;}
};
template <>
struct TypeLimits<float>
namespace csbp_krnls
{
static __device__ float max() {return FLT_MAX;}
};
template <typename T> struct TypeLimits;
template <> struct TypeLimits<short>
{
static __device__ short max() {return SHRT_MAX;}
};
template <> struct TypeLimits<float>
{
static __device__ float max() {return FLT_MAX;}
};
}
///////////////////////////////////////////////////////////////
/////////////////////// load constants ////////////////////////
......
......@@ -58,19 +58,8 @@ namespace cv
static inline int divUp(int a, int b) { return (a % b == 0) ? a/b : a/b + 1; }
namespace matrix_operations
{
extern "C" void copy_to_with_mask(const DevMem2D& src, DevMem2D dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
extern "C" void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0);
extern "C" void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0);
}
template<class T>
inline void uploadConstant(const char* name, const T& value) { cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) ); }
static inline void uploadConstant(const char* name, const T& value) { cudaSafeCall( cudaMemcpyToSymbol(name, &value, sizeof(T)) ); }
}
}
......
......@@ -43,6 +43,7 @@
#include "opencv2/gpu/devmem2d.hpp"
#include "saturate_cast.hpp"
#include "safe_call.hpp"
#include "cuda_shared.hpp"
using namespace cv::gpu;
......@@ -50,6 +51,227 @@ using namespace cv::gpu;
#define FLT_MAX 3.402823466e+30F
#endif
/////////////////////////////////////////////////////////////////////////////////////////////////
// Linear filters
#define MAX_KERNEL_SIZE 16
namespace filter_krnls
{
__constant__ float cLinearKernel[MAX_KERNEL_SIZE];
}
namespace cv { namespace gpu { namespace filters
{
void loadLinearKernel(const float kernel[], int ksize)
{
cudaSafeCall( cudaMemcpyToSymbol(filter_krnls::cLinearKernel, kernel, ksize * sizeof(float)) );
}
}}}
namespace filter_krnls
{
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>
__global__ void linearRowFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)
{
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
const int blockStartX = blockDim.x * blockIdx.x;
const int blockStartY = blockDim.y * blockIdx.y;
const int threadX = blockStartX + threadIdx.x;
const int prevThreadX = threadX - blockDim.x;
const int nextThreadX = threadX + blockDim.x;
const int threadY = blockStartY + threadIdx.y;
T* sDataRow = smem + threadIdx.y * blockDim.x * 3;
if (threadY < height)
{
const T* rowSrc = src + threadY * src_step;
sDataRow[threadIdx.x + blockDim.x] = threadX < width ? rowSrc[threadX] : 0;
sDataRow[threadIdx.x] = prevThreadX >= 0 ? rowSrc[prevThreadX] : 0;
sDataRow[(blockDim.x << 1) + threadIdx.x] = nextThreadX < width ? rowSrc[nextThreadX] : 0;
__syncthreads();
if (threadX < width)
{
float sum = 0;
sDataRow += threadIdx.x + blockDim.x - anchor;
#pragma unroll
for(int i = 0; i < KERNEL_SIZE; ++i)
sum += cLinearKernel[i] * sDataRow[i];
dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);
}
}
}
}
namespace cv { namespace gpu { namespace filters
{
template <int KERNEL_SIZE, typename T, typename D>
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)
{
const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 16;
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.ptr, src.elem_step,
dst.ptr, dst.elem_step, anchor, src.cols, src.rows);
cudaSafeCall( cudaThreadSynchronize() );
}
template <typename T, typename D>
inline void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);
static const caller_t callers[] =
{linearRowFilter_caller<0 , T, D>, linearRowFilter_caller<1 , T, D>,
linearRowFilter_caller<2 , T, D>, linearRowFilter_caller<3 , T, D>,
linearRowFilter_caller<4 , T, D>, linearRowFilter_caller<5 , T, D>,
linearRowFilter_caller<6 , T, D>, linearRowFilter_caller<7 , T, D>,
linearRowFilter_caller<8 , T, D>, linearRowFilter_caller<9 , T, D>,
linearRowFilter_caller<10, T, D>, linearRowFilter_caller<11, T, D>,
linearRowFilter_caller<12, T, D>, linearRowFilter_caller<13, T, D>,
linearRowFilter_caller<14, T, D>, linearRowFilter_caller<15, T, D>};
loadLinearKernel(kernel, ksize);
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
}
void linearRowFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<int, int>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<int, float>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<float, int>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<float, float>(src, dst, kernel, ksize, anchor);
}
}}}
namespace filter_krnls
{
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>
__global__ void linearColumnFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)
{
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
const int blockStartX = blockDim.x * blockIdx.x;
const int blockStartY = blockDim.y * blockIdx.y;
const int threadX = blockStartX + threadIdx.x;
const int threadY = blockStartY + threadIdx.y;
const int prevThreadY = threadY - blockDim.y;
const int nextThreadY = threadY + blockDim.y;
const int smem_step = blockDim.x;
T* sDataColumn = smem + threadIdx.x;
if (threadX < width)
{
const T* colSrc = src + threadX;
sDataColumn[(threadIdx.y + blockDim.y) * smem_step] = threadY < height ? colSrc[threadY * src_step] : 0;
sDataColumn[threadIdx.y * smem_step] = prevThreadY >= 0 ? colSrc[prevThreadY * src_step] : 0;
sDataColumn[(threadIdx.y + (blockDim.y << 1)) * smem_step] = nextThreadY < height ? colSrc[nextThreadY * src_step] : 0;
__syncthreads();
if (threadY < height)
{
float sum = 0;
sDataColumn += (threadIdx.y + blockDim.y - anchor)* smem_step;
#pragma unroll
for(int i = 0; i < KERNEL_SIZE; ++i)
sum += cLinearKernel[i] * sDataColumn[i * smem_step];
dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);
}
}
}
}
namespace cv { namespace gpu { namespace filters
{
template <int KERNEL_SIZE, typename T, typename D>
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)
{
const int BLOCK_DIM_X = 16;
const int BLOCK_DIM_Y = 16;
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.ptr, src.elem_step,
dst.ptr, dst.elem_step, anchor, src.cols, src.rows);
cudaSafeCall( cudaThreadSynchronize() );
}
template <typename T, typename D>
inline void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);
static const caller_t callers[] =
{linearColumnFilter_caller<0 , T, D>, linearColumnFilter_caller<1 , T, D>,
linearColumnFilter_caller<2 , T, D>, linearColumnFilter_caller<3 , T, D>,
linearColumnFilter_caller<4 , T, D>, linearColumnFilter_caller<5 , T, D>,
linearColumnFilter_caller<6 , T, D>, linearColumnFilter_caller<7 , T, D>,
linearColumnFilter_caller<8 , T, D>, linearColumnFilter_caller<9 , T, D>,
linearColumnFilter_caller<10, T, D>, linearColumnFilter_caller<11, T, D>,
linearColumnFilter_caller<12, T, D>, linearColumnFilter_caller<13, T, D>,
linearColumnFilter_caller<14, T, D>, linearColumnFilter_caller<15, T, D>};
loadLinearKernel(kernel, ksize);
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
}
void linearColumnFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<int, int>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_32s32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<int, float>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_32f32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<float, int>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_32f32f(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<float, float>(src, dst, kernel, ksize, anchor);
}
}}}
/////////////////////////////////////////////////////////////////////////////////////////////////
// Bilateral filters
namespace bf_krnls
{
__constant__ float* ctable_color;
......
......@@ -45,7 +45,7 @@
using namespace cv::gpu;
/////////////////////////////////// Remap ///////////////////////////////////////////////
namespace imgproc
namespace imgproc_krnls
{
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap;
......@@ -123,7 +123,7 @@ namespace imgproc
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace imgproc
{
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
{
......@@ -132,15 +132,15 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(dst.cols, threads.x);
grid.y = divUp(dst.rows, threads.y);
imgproc::tex_remap.filterMode = cudaFilterModeLinear;
imgproc::tex_remap.addressMode[0] = imgproc::tex_remap.addressMode[1] = cudaAddressModeWrap;
imgproc_krnls::tex_remap.filterMode = cudaFilterModeLinear;
imgproc_krnls::tex_remap.addressMode[0] = imgproc_krnls::tex_remap.addressMode[1] = cudaAddressModeWrap;
cudaChannelFormatDesc desc = cudaCreateChannelDesc<unsigned char>();
cudaSafeCall( cudaBindTexture2D(0, imgproc::tex_remap, src.ptr, desc, src.cols, src.rows, src.step) );
cudaSafeCall( cudaBindTexture2D(0, imgproc_krnls::tex_remap, src.ptr, desc, src.cols, src.rows, src.step) );
imgproc::remap_1c<<<grid, threads>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
imgproc_krnls::remap_1c<<<grid, threads>>>(xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture(imgproc::tex_remap) );
cudaSafeCall( cudaUnbindTexture(imgproc_krnls::tex_remap) );
}
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
......@@ -150,7 +150,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(dst.cols, threads.x);
grid.y = divUp(dst.rows, threads.y);
imgproc::remap_3c<<<grid, threads>>>(src.ptr, src.step, xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
imgproc_krnls::remap_3c<<<grid, threads>>>(src.ptr, src.step, xmap.ptr, ymap.ptr, xmap.step, dst.ptr, dst.step, dst.cols, dst.rows);
cudaSafeCall( cudaThreadSynchronize() );
}
......@@ -159,7 +159,7 @@ namespace cv { namespace gpu { namespace improc
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
namespace imgproc
namespace imgproc_krnls
{
texture<uchar4, 2> tex_meanshift;
......@@ -254,7 +254,7 @@ namespace imgproc
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace imgproc
{
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps)
{
......@@ -264,11 +264,11 @@ namespace cv { namespace gpu { namespace improc
grid.y = divUp(src.rows, threads.y);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaSafeCall( cudaBindTexture2D( 0, imgproc::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
cudaSafeCall( cudaBindTexture2D( 0, imgproc_krnls::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
imgproc::meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
imgproc_krnls::meanshift_kernel<<< grid, threads >>>( dst.ptr, dst.step, dst.cols, dst.rows, sp, sr, maxIter, eps );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture( imgproc::tex_meanshift ) );
cudaSafeCall( cudaUnbindTexture( imgproc_krnls::tex_meanshift ) );
}
extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps)
{
......@@ -278,17 +278,17 @@ namespace cv { namespace gpu { namespace improc
grid.y = divUp(src.rows, threads.y);
cudaChannelFormatDesc desc = cudaCreateChannelDesc<uchar4>();
cudaSafeCall( cudaBindTexture2D( 0, imgproc::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
cudaSafeCall( cudaBindTexture2D( 0, imgproc_krnls::tex_meanshift, src.ptr, desc, src.cols, src.rows, src.step ) );
imgproc::meanshiftproc_kernel<<< grid, threads >>>( dstr.ptr, dstr.step, dstsp.ptr, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
imgproc_krnls::meanshiftproc_kernel<<< grid, threads >>>( dstr.ptr, dstr.step, dstsp.ptr, dstsp.step, dstr.cols, dstr.rows, sp, sr, maxIter, eps );
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaUnbindTexture( imgproc::tex_meanshift ) );
cudaSafeCall( cudaUnbindTexture( imgproc_krnls::tex_meanshift ) );
}
}}}
/////////////////////////////////// drawColorDisp ///////////////////////////////////////////////
namespace imgproc
namespace imgproc_krnls
{
template <typename T>
__device__ unsigned int cvtPixel(T d, int ndisp, float S = 1, float V = 1)
......@@ -391,7 +391,7 @@ namespace imgproc
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace imgproc
{
void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream)
{
......@@ -400,7 +400,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x << 2);
grid.y = divUp(src.rows, threads.y);
imgproc::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp);
imgproc_krnls::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, src.cols, src.rows, ndisp);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
......@@ -413,7 +413,7 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(src.cols, threads.x << 1);
grid.y = divUp(src.rows, threads.y);
imgproc::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp);
imgproc_krnls::drawColorDisp<<<grid, threads, 0, stream>>>(src.ptr, src.step / sizeof(short), dst.ptr, dst.step, src.cols, src.rows, ndisp);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
......@@ -422,7 +422,7 @@ namespace cv { namespace gpu { namespace improc
/////////////////////////////////// reprojectImageTo3D ///////////////////////////////////////////////
namespace imgproc
namespace imgproc_krnls
{
__constant__ float cq[16];
......@@ -457,7 +457,7 @@ namespace imgproc
}
}
namespace cv { namespace gpu { namespace improc
namespace cv { namespace gpu { namespace imgproc
{
template <typename T>
inline void reprojectImageTo3D_caller(const DevMem2D_<T>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream)
......@@ -467,9 +467,9 @@ namespace cv { namespace gpu { namespace improc
grid.x = divUp(disp.cols, threads.x);
grid.y = divUp(disp.rows, threads.y);
cudaSafeCall( cudaMemcpyToSymbol(imgproc::cq, q, 16 * sizeof(float)) );
cudaSafeCall( cudaMemcpyToSymbol(imgproc_krnls::cq, q, 16 * sizeof(float)) );
imgproc::reprojectImageTo3D<<<grid, threads, 0, stream>>>(disp.ptr, disp.step / sizeof(T), xyzw.ptr, xyzw.step / sizeof(float), disp.rows, disp.cols);
imgproc_krnls::reprojectImageTo3D<<<grid, threads, 0, stream>>>(disp.ptr, disp.step / sizeof(T), xyzw.ptr, xyzw.step / sizeof(float), disp.rows, disp.cols);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
......
......@@ -41,6 +41,9 @@
//M*/
#include "cuda_shared.hpp"
#include "saturate_cast.hpp"
#include "transform.hpp"
#include "vecmath.hpp"
using namespace cv::gpu;
......@@ -48,6 +51,9 @@ using namespace cv::gpu;
#define CV_PI 3.1415926535897932384626433832795f
#endif
//////////////////////////////////////////////////////////////////////////////////////
// Cart <-> Polar
namespace mathfunc_krnls
{
struct Nothing
......@@ -143,8 +149,8 @@ namespace cv { namespace gpu { namespace mathfunc
const float scale = angleInDegrees ? (float)(180.0f / CV_PI) : 1.f;
mathfunc_krnls::cartToPolar<Mag, Angle><<<grid, threads, 0, stream>>>(
x.ptr, x.step / sizeof(float), y.ptr, y.step / sizeof(float),
mag.ptr, mag.step / sizeof(float), angle.ptr, angle.step / sizeof(float), scale, x.cols, x.rows);
x.ptr, x.elem_step, y.ptr, y.elem_step,
mag.ptr, mag.elem_step, angle.ptr, angle.elem_step, scale, x.cols, x.rows);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
......@@ -191,8 +197,8 @@ namespace cv { namespace gpu { namespace mathfunc
const float scale = angleInDegrees ? (float)(CV_PI / 180.0f) : 1.0f;
mathfunc_krnls::polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.ptr, mag.step / sizeof(float),
angle.ptr, angle.step / sizeof(float), scale, x.ptr, x.step / sizeof(float), y.ptr, y.step / sizeof(float), mag.cols, mag.rows);
mathfunc_krnls::polarToCart<Mag><<<grid, threads, 0, stream>>>(mag.ptr, mag.elem_step,
angle.ptr, angle.elem_step, scale, x.ptr, x.elem_step, y.ptr, y.elem_step, mag.cols, mag.rows);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
......@@ -210,3 +216,37 @@ namespace cv { namespace gpu { namespace mathfunc
callers[mag.ptr == 0](mag, angle, x, y, angleInDegrees, stream);
}
}}}
//////////////////////////////////////////////////////////////////////////////////////
// Compare
namespace mathfunc_krnls
{
template <typename T1, typename T2>
struct NotEqual
{
__device__ uchar operator()(const T1& src1, const T2& src2, int, int)
{
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
}
};
}
namespace cv { namespace gpu { namespace mathfunc
{
template <typename T1, typename T2>
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
mathfunc_krnls::NotEqual<T1, T2> op;
transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), dst, op, 0);
}
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
compare_ne<uint, uint>(src1, src2, dst);
}
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
compare_ne<float, float>(src1, src2, dst);
}
}}}
This diff is collapsed.
This diff is collapsed.
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_TRANSFORM_HPP__
#define __OPENCV_GPU_TRANSFORM_HPP__
#include "cuda_shared.hpp"
#include "saturate_cast.hpp"
#include "vecmath.hpp"
namespace cv { namespace gpu { namespace algo_krnls
{
template <typename T, typename D, typename UnOp>
static __global__ void transform(const T* src, size_t src_step,
D* dst, size_t dst_step, int width, int height, UnOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < width && y < height)
{
T src_data = src[y * src_step + x];
dst[y * dst_step + x] = op(src_data, x, y);
}
}
template <typename T1, typename T2, typename D, typename BinOp>
static __global__ void transform(const T1* src1, size_t src1_step, const T2* src2, size_t src2_step,
D* dst, size_t dst_step, int width, int height, BinOp op)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < width && y < height)
{
T1 src1_data = src1[y * src1_step + x];
T2 src2_data = src2[y * src2_step + x];
dst[y * dst_step + x] = op(src1_data, src2_data, x, y);
}
}
}}}
namespace cv
{
namespace gpu
{
template <typename T, typename D, typename UnOp>
static void transform(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, UnOp op, cudaStream_t stream)
{
dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src.cols, threads.x);
grid.y = divUp(src.rows, threads.y);
algo_krnls::transform<<<grid, threads, 0, stream>>>(src.ptr, src.elem_step,
dst.ptr, dst.elem_step, src.cols, src.rows, op);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
template <typename T1, typename T2, typename D, typename BinOp>
static void transform(const DevMem2D_<T1>& src1, const DevMem2D_<T2>& src2, const DevMem2D_<D>& dst, BinOp op, cudaStream_t stream)
{
dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(src1.cols, threads.x);
grid.y = divUp(src1.rows, threads.y);
algo_krnls::transform<<<grid, threads, 0, stream>>>(src1.ptr, src1.elem_step,
src2.ptr, src2.elem_step, dst.ptr, dst.elem_step, src1.cols, src1.rows, op);
if (stream == 0)
cudaSafeCall( cudaThreadSynchronize() );
}
}
}
#endif // __OPENCV_GPU_TRANSFORM_HPP__
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_VECMATH_HPP__
#define __OPENCV_GPU_VECMATH_HPP__
#include "cuda_shared.hpp"
namespace cv
{
namespace gpu
{
template<typename T, int N> struct TypeVec;
template<typename T> struct TypeVec<T, 1> { typedef T vec_t; };
template<> struct TypeVec<unsigned char, 2> { typedef uchar2 vec_t; };
template<> struct TypeVec<uchar2, 2> { typedef uchar2 vec_t; };
template<> struct TypeVec<unsigned char, 3> { typedef uchar3 vec_t; };;
template<> struct TypeVec<uchar3, 3> { typedef uchar3 vec_t; };
template<> struct TypeVec<unsigned char, 4> { typedef uchar4 vec_t; };;
template<> struct TypeVec<uchar4, 4> { typedef uchar4 vec_t; };
template<> struct TypeVec<char, 2> { typedef char2 vec_t; };
template<> struct TypeVec<char2, 2> { typedef char2 vec_t; };
template<> struct TypeVec<char, 3> { typedef char3 vec_t; };
template<> struct TypeVec<char3, 3> { typedef char3 vec_t; };
template<> struct TypeVec<char, 4> { typedef char4 vec_t; };
template<> struct TypeVec<char4, 4> { typedef char4 vec_t; };
template<> struct TypeVec<unsigned short, 2> { typedef ushort2 vec_t; };
template<> struct TypeVec<ushort2, 2> { typedef ushort2 vec_t; };
template<> struct TypeVec<unsigned short, 3> { typedef ushort3 vec_t; };
template<> struct TypeVec<ushort3, 3> { typedef ushort3 vec_t; };
template<> struct TypeVec<unsigned short, 4> { typedef ushort4 vec_t; };
template<> struct TypeVec<ushort4, 4> { typedef ushort4 vec_t; };
template<> struct TypeVec<short, 2> { typedef short2 vec_t; };
template<> struct TypeVec<short2, 2> { typedef short2 vec_t; };
template<> struct TypeVec<short, 3> { typedef short3 vec_t; };
template<> struct TypeVec<short3, 3> { typedef short3 vec_t; };
template<> struct TypeVec<short, 4> { typedef short4 vec_t; };
template<> struct TypeVec<short4, 4> { typedef short4 vec_t; };
template<> struct TypeVec<unsigned int, 2> { typedef uint2 vec_t; };
template<> struct TypeVec<uint2, 2> { typedef uint2 vec_t; };
template<> struct TypeVec<unsigned int, 3> { typedef uint3 vec_t; };
template<> struct TypeVec<uint3, 3> { typedef uint3 vec_t; };
template<> struct TypeVec<unsigned int, 4> { typedef uint4 vec_t; };
template<> struct TypeVec<uint4, 4> { typedef uint4 vec_t; };
template<> struct TypeVec<int, 2> { typedef int2 vec_t; };
template<> struct TypeVec<int2, 2> { typedef int2 vec_t; };
template<> struct TypeVec<int, 3> { typedef int3 vec_t; };
template<> struct TypeVec<int3, 3> { typedef int3 vec_t; };
template<> struct TypeVec<int, 4> { typedef int4 vec_t; };
template<> struct TypeVec<int4, 4> { typedef int4 vec_t; };
template<> struct TypeVec<float, 2> { typedef float2 vec_t; };
template<> struct TypeVec<float2, 2> { typedef float2 vec_t; };
template<> struct TypeVec<float, 3> { typedef float3 vec_t; };
template<> struct TypeVec<float3, 3> { typedef float3 vec_t; };
template<> struct TypeVec<float, 4> { typedef float4 vec_t; };
template<> struct TypeVec<float4, 4> { typedef float4 vec_t; };
static __device__ uchar4 operator+(const uchar4& a, const uchar4& b)
{
return make_uchar4(a.x + b.x, a.y + b.y, a.z + b.z, a.w + b.w);
}
static __device__ uchar4 operator-(const uchar4& a, const uchar4& b)
{
return make_uchar4(a.x - b.x, a.y - b.y, a.z - b.z, a.w - b.w);
}
static __device__ uchar4 operator*(const uchar4& a, const uchar4& b)
{
return make_uchar4(a.x * b.x, a.y * b.y, a.z * b.z, a.w * b.w);
}
static __device__ uchar4 operator/(const uchar4& a, const uchar4& b)
{
return make_uchar4(a.x / b.x, a.y / b.y, a.z / b.z, a.w / b.w);
}
template <typename T>
static __device__ uchar4 operator*(const uchar4& a, T s)
{
return make_uchar4(a.x * s, a.y * s, a.z * s, a.w * s);
}
template <typename T>
static __device__ uchar4 operator*(T s, const uchar4& a)
{
return a * s;
}
}
}
#endif // __OPENCV_GPU_VECMATH_HPP__
\ No newline at end of file
......@@ -69,6 +69,22 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& /*src*/, GpuMat& /*dst*/, int
#include "opencv2/gpu/stream_accessor.hpp"
namespace cv
{
namespace gpu
{
namespace matrix_operations
{
void copy_to_with_mask(const DevMem2D& src, DevMem2D dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0);
void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0);
}
}
}
struct Stream::Impl
{
cudaStream_t stream;
......
......@@ -75,7 +75,7 @@ void cv::gpu::histRange(const GpuMat&, GpuMat*, const GpuMat*) { throw_nogpu();
namespace cv { namespace gpu
{
namespace improc
namespace imgproc
{
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);
......@@ -142,7 +142,7 @@ namespace cv { namespace gpu
void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap)
{
typedef void (*remap_gpu_t)(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);
static const remap_gpu_t callers[] = {improc::remap_gpu_1c, 0, improc::remap_gpu_3c};
static const remap_gpu_t callers[] = {imgproc::remap_gpu_1c, 0, imgproc::remap_gpu_3c};
CV_Assert((src.type() == CV_8U || src.type() == CV_8UC3) && xmap.type() == CV_32F && ymap.type() == CV_32F);
......@@ -180,7 +180,7 @@ void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
eps = 1.f;
eps = (float)std::max(criteria.epsilon, 0.0);
improc::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps);
imgproc::meanShiftFiltering_gpu(src, dst, sp, sr, maxIter, eps);
}
////////////////////////////////////////////////////////////////////////
......@@ -207,7 +207,7 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int
eps = 1.f;
eps = (float)std::max(criteria.epsilon, 0.0);
improc::meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps);
imgproc::meanShiftProc_gpu(src, dstr, dstsp, sp, sr, maxIter, eps);
}
////////////////////////////////////////////////////////////////////////
......@@ -223,7 +223,7 @@ namespace
out = dst;
out.create(src.size(), CV_8UC4);
improc::drawColorDisp_gpu((DevMem2D_<T>)src, out, ndisp, stream);
imgproc::drawColorDisp_gpu((DevMem2D_<T>)src, out, ndisp, stream);
dst = out;
}
......@@ -256,7 +256,7 @@ namespace
void reprojectImageTo3D_caller(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream)
{
xyzw.create(disp.rows, disp.cols, CV_32FC4);
improc::reprojectImageTo3D_gpu((DevMem2D_<T>)disp, xyzw, Q.ptr<float>(), stream);
imgproc::reprojectImageTo3D_gpu((DevMem2D_<T>)disp, xyzw, Q.ptr<float>(), stream);
}
typedef void (*reprojectImageTo3D_caller_t)(const GpuMat& disp, GpuMat& xyzw, const Mat& Q, const cudaStream_t& stream);
......@@ -313,7 +313,7 @@ namespace
case CV_RGBA2BGR: case CV_RGB2BGR: case CV_BGRA2RGBA:
{
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx, cudaStream_t stream);
static const func_t funcs[] = {improc::RGB2RGB_gpu_8u, 0, improc::RGB2RGB_gpu_16u, 0, 0, improc::RGB2RGB_gpu_32f};
static const func_t funcs[] = {imgproc::RGB2RGB_gpu_8u, 0, imgproc::RGB2RGB_gpu_16u, 0, 0, imgproc::RGB2RGB_gpu_32f};
CV_Assert(scn == 3 || scn == 4);
......@@ -338,7 +338,7 @@ namespace
dst.create(sz, CV_8UC2);
improc::RGB2RGB5x5_gpu(src, scn, dst, green_bits, bidx, stream);
imgproc::RGB2RGB5x5_gpu(src, scn, dst, green_bits, bidx, stream);
break;
}
......@@ -356,14 +356,14 @@ namespace
dst.create(sz, CV_MAKETYPE(depth, dcn));
improc::RGB5x52RGB_gpu(src, green_bits, dst, dcn, bidx, stream);
imgproc::RGB5x52RGB_gpu(src, green_bits, dst, dcn, bidx, stream);
break;
}
case CV_BGR2GRAY: case CV_BGRA2GRAY: case CV_RGB2GRAY: case CV_RGBA2GRAY:
{
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int bidx, cudaStream_t stream);
static const func_t funcs[] = {improc::RGB2Gray_gpu_8u, 0, improc::RGB2Gray_gpu_16u, 0, 0, improc::RGB2Gray_gpu_32f};
static const func_t funcs[] = {imgproc::RGB2Gray_gpu_8u, 0, imgproc::RGB2Gray_gpu_16u, 0, 0, imgproc::RGB2Gray_gpu_32f};
CV_Assert(scn == 3 || scn == 4);
......@@ -383,14 +383,14 @@ namespace
dst.create(sz, CV_8UC1);
improc::RGB5x52Gray_gpu(src, green_bits, dst, stream);
imgproc::RGB5x52Gray_gpu(src, green_bits, dst, stream);
break;
}
case CV_GRAY2BGR: case CV_GRAY2BGRA:
{
typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, int dstcn, cudaStream_t stream);
static const func_t funcs[] = {improc::Gray2RGB_gpu_8u, 0, improc::Gray2RGB_gpu_16u, 0, 0, improc::Gray2RGB_gpu_32f};
static const func_t funcs[] = {imgproc::Gray2RGB_gpu_8u, 0, imgproc::Gray2RGB_gpu_16u, 0, 0, imgproc::Gray2RGB_gpu_32f};
if (dcn <= 0) dcn = 3;
......@@ -410,7 +410,7 @@ namespace
dst.create(sz, CV_8UC2);
improc::Gray2RGB5x5_gpu(src, dst, green_bits, stream);
imgproc::Gray2RGB5x5_gpu(src, dst, green_bits, stream);
break;
}
......@@ -419,7 +419,7 @@ namespace
{
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx,
const void* coeffs, cudaStream_t stream);
static const func_t funcs[] = {improc::RGB2YCrCb_gpu_8u, 0, improc::RGB2YCrCb_gpu_16u, 0, 0, improc::RGB2YCrCb_gpu_32f};
static const func_t funcs[] = {imgproc::RGB2YCrCb_gpu_8u, 0, imgproc::RGB2YCrCb_gpu_16u, 0, 0, imgproc::RGB2YCrCb_gpu_32f};
if (dcn <= 0) dcn = 3;
CV_Assert((scn == 3 || scn == 4) && (dcn == 3 || dcn == 4));
......@@ -456,7 +456,7 @@ namespace
{
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx,
const void* coeffs, cudaStream_t stream);
static const func_t funcs[] = {improc::YCrCb2RGB_gpu_8u, 0, improc::YCrCb2RGB_gpu_16u, 0, 0, improc::YCrCb2RGB_gpu_32f};
static const func_t funcs[] = {imgproc::YCrCb2RGB_gpu_8u, 0, imgproc::YCrCb2RGB_gpu_16u, 0, 0, imgproc::YCrCb2RGB_gpu_32f};
if (dcn <= 0) dcn = 3;
......@@ -485,7 +485,7 @@ namespace
{
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn,
const void* coeffs, cudaStream_t stream);
static const func_t funcs[] = {improc::RGB2XYZ_gpu_8u, 0, improc::RGB2XYZ_gpu_16u, 0, 0, improc::RGB2XYZ_gpu_32f};
static const func_t funcs[] = {imgproc::RGB2XYZ_gpu_8u, 0, imgproc::RGB2XYZ_gpu_16u, 0, 0, imgproc::RGB2XYZ_gpu_32f};
if (dcn <= 0) dcn = 3;
......@@ -534,7 +534,7 @@ namespace
{
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn,
const void* coeffs, cudaStream_t stream);
static const func_t funcs[] = {improc::XYZ2RGB_gpu_8u, 0, improc::XYZ2RGB_gpu_16u, 0, 0, improc::XYZ2RGB_gpu_32f};
static const func_t funcs[] = {imgproc::XYZ2RGB_gpu_8u, 0, imgproc::XYZ2RGB_gpu_16u, 0, 0, imgproc::XYZ2RGB_gpu_32f};
if (dcn <= 0) dcn = 3;
......@@ -584,8 +584,8 @@ namespace
{
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx,
int hrange, cudaStream_t stream);
static const func_t funcs_hsv[] = {improc::RGB2HSV_gpu_8u, 0, 0, 0, 0, improc::RGB2HSV_gpu_32f};
static const func_t funcs_hls[] = {improc::RGB2HLS_gpu_8u, 0, 0, 0, 0, improc::RGB2HLS_gpu_32f};
static const func_t funcs_hsv[] = {imgproc::RGB2HSV_gpu_8u, 0, 0, 0, 0, imgproc::RGB2HSV_gpu_32f};
static const func_t funcs_hls[] = {imgproc::RGB2HLS_gpu_8u, 0, 0, 0, 0, imgproc::RGB2HLS_gpu_32f};
if (dcn <= 0) dcn = 3;
......@@ -610,8 +610,8 @@ namespace
{
typedef void (*func_t)(const DevMem2D& src, int srccn, const DevMem2D& dst, int dstcn, int bidx,
int hrange, cudaStream_t stream);
static const func_t funcs_hsv[] = {improc::HSV2RGB_gpu_8u, 0, 0, 0, 0, improc::HSV2RGB_gpu_32f};
static const func_t funcs_hls[] = {improc::HLS2RGB_gpu_8u, 0, 0, 0, 0, improc::HLS2RGB_gpu_32f};
static const func_t funcs_hsv[] = {imgproc::HSV2RGB_gpu_8u, 0, 0, 0, 0, imgproc::HSV2RGB_gpu_32f};
static const func_t funcs_hls[] = {imgproc::HLS2RGB_gpu_8u, 0, 0, 0, 0, imgproc::HLS2RGB_gpu_32f};
if (dcn <= 0) dcn = 3;
......
......@@ -77,6 +77,22 @@ namespace cv
#else /* !defined (HAVE_CUDA) */
namespace cv
{
namespace gpu
{
namespace matrix_operations
{
void copy_to_with_mask(const DevMem2D& src, DevMem2D dst, int depth, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
void set_to_without_mask (DevMem2D dst, int depth, const double *scalar, int channels, const cudaStream_t & stream = 0);
void set_to_with_mask (DevMem2D dst, int depth, const double *scalar, const DevMem2D& mask, int channels, const cudaStream_t & stream = 0);
void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, int channels, double alpha, double beta, const cudaStream_t & stream = 0);
}
}
}
void cv::gpu::GpuMat::upload(const Mat& m)
{
CV_DbgAssert(!m.empty());
......
......@@ -53,7 +53,6 @@ const char* blacklist[] =
//"GPU-NppImageMeanStdDev", // different precision
//"GPU-NppImageExp", // different precision
//"GPU-NppImageLog", // different precision
//"GPU-NppImageMagnitude", // different precision
"GPU-NppImageCanny", // NPP_TEXTURE_BIND_ERROR
//"GPU-NppImageResize", // different precision
......@@ -61,8 +60,8 @@ const char* blacklist[] =
//"GPU-NppImageWarpPerspective", // different precision
//"GPU-NppImageIntegral", // different precision
//"GPU-NppImageSobel", // ???
//"GPU-NppImageScharr", // ???
//"GPU-NppImageSobel", // sign error
//"GPU-NppImageScharr", // sign error
//"GPU-NppImageGaussianBlur", // different precision
0
};
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment