Commit 47d68f69 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

implemented gpu::remap for all types

parent 78542854
...@@ -66,6 +66,9 @@ namespace cv ...@@ -66,6 +66,9 @@ namespace cv
template <typename T> struct DevMem2D_ template <typename T> struct DevMem2D_
{ {
typedef T elem_type;
typedef int index_type;
int cols; int cols;
int rows; int rows;
T* data; T* data;
...@@ -79,8 +82,7 @@ namespace cv ...@@ -79,8 +82,7 @@ namespace cv
template <typename U> template <typename U>
explicit DevMem2D_(const DevMem2D_<U>& d) explicit DevMem2D_(const DevMem2D_<U>& d)
: cols(d.cols), rows(d.rows), data((T*)d.data), step(d.step) {} : cols(d.cols), rows(d.rows), data((T*)d.data), step(d.step) {}
typedef T elem_type;
enum { elem_size = sizeof(elem_type) }; enum { elem_size = sizeof(elem_type) };
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; } __CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
...@@ -89,6 +91,9 @@ namespace cv ...@@ -89,6 +91,9 @@ namespace cv
__CV_GPU_HOST_DEVICE__ operator T*() const { return data; } __CV_GPU_HOST_DEVICE__ operator T*() const { return data; }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
#if defined(__DEVCLASES_ADD_THRUST_BEGIN_END__) #if defined(__DEVCLASES_ADD_THRUST_BEGIN_END__)
thrust::device_ptr<T> begin() const { return thrust::device_ptr<T>(data); } thrust::device_ptr<T> begin() const { return thrust::device_ptr<T>(data); }
thrust::device_ptr<T> end() const { return thrust::device_ptr<T>(data) + cols * rows; } thrust::device_ptr<T> end() const { return thrust::device_ptr<T>(data) + cols * rows; }
...@@ -97,19 +102,24 @@ namespace cv ...@@ -97,19 +102,24 @@ namespace cv
template<typename T> struct PtrStep_ template<typename T> struct PtrStep_
{ {
typedef T elem_type;
typedef int index_type;
T* data; T* data;
size_t step; size_t step;
PtrStep_() : data(0), step(0) {} PtrStep_() : data(0), step(0) {}
PtrStep_(const DevMem2D_<T>& mem) : data(mem.data), step(mem.step) {} PtrStep_(const DevMem2D_<T>& mem) : data(mem.data), step(mem.step) {}
typedef T elem_type;
enum { elem_size = sizeof(elem_type) }; enum { elem_size = sizeof(elem_type) };
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; } __CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return (T*)( (char*)data + y * step); } __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return (T*)( (char*)data + y * step); }
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)data + y * step); } __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)data + y * step); }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
#if defined(__DEVCLASES_ADD_THRUST_BEGIN_END__) #if defined(__DEVCLASES_ADD_THRUST_BEGIN_END__)
thrust::device_ptr<T> begin() const { return thrust::device_ptr<T>(data); } thrust::device_ptr<T> begin() const { return thrust::device_ptr<T>(data); }
#endif #endif
...@@ -124,7 +134,10 @@ namespace cv ...@@ -124,7 +134,10 @@ namespace cv
PtrStep_<T>::step /= PtrStep_<T>::elem_size; PtrStep_<T>::step /= PtrStep_<T>::elem_size;
} }
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep_<T>::data + y * PtrStep_<T>::step; } __CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep_<T>::data + y * PtrStep_<T>::step; }
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep_<T>::data + y * PtrStep_<T>::step; } __CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep_<T>::data + y * PtrStep_<T>::step; }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
}; };
typedef DevMem2D_<unsigned char> DevMem2D; typedef DevMem2D_<unsigned char> DevMem2D;
......
...@@ -596,8 +596,9 @@ namespace cv ...@@ -596,8 +596,9 @@ namespace cv
////////////////////////////// Image processing ////////////////////////////// ////////////////////////////// Image processing //////////////////////////////
//! DST[x,y] = SRC[xmap[x,y],ymap[x,y]] with bilinear interpolation. //! DST[x,y] = SRC[xmap[x,y],ymap[x,y]] with bilinear interpolation.
//! supports CV_8UC1, CV_8UC3 source types and CV_32FC1 map type //! supports CV_32FC1 map type
CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap); CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap,
int interpolation, int borderMode = BORDER_CONSTANT, const Scalar& borderValue = Scalar());
//! Does mean shift filtering on GPU. //! Does mean shift filtering on GPU.
CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
...@@ -761,10 +762,10 @@ namespace cv ...@@ -761,10 +762,10 @@ namespace cv
CV_EXPORTS void upsample(const GpuMat& src, GpuMat &dst, Stream& stream = Stream::Null()); CV_EXPORTS void upsample(const GpuMat& src, GpuMat &dst, Stream& stream = Stream::Null());
//! smoothes the source image and downsamples it //! smoothes the source image and downsamples it
CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
//! upsamples the source image and then smoothes it //! upsamples the source image and then smoothes it
CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null()); CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
//! performs linear blending of two images //! performs linear blending of two images
//! to avoid accuracy errors sum of weigths shouldn't be very close to zero //! to avoid accuracy errors sum of weigths shouldn't be very close to zero
......
...@@ -242,9 +242,9 @@ namespace filter_krnls ...@@ -242,9 +242,9 @@ namespace filter_krnls
{ {
const T* srcCol = src.ptr() + x; const T* srcCol = src.ptr() + x;
sDataColumn[ threadIdx.y * BLOCK_DIM_X] = b.at_low(y - BLOCK_DIM_Y, srcCol); sDataColumn[ threadIdx.y * BLOCK_DIM_X] = b.at_low(y - BLOCK_DIM_Y, srcCol, src.step);
sDataColumn[(threadIdx.y + BLOCK_DIM_Y) * BLOCK_DIM_X] = b.at_high(y, srcCol); sDataColumn[(threadIdx.y + BLOCK_DIM_Y) * BLOCK_DIM_X] = b.at_high(y, srcCol, src.step);
sDataColumn[(threadIdx.y + BLOCK_DIM_Y * 2) * BLOCK_DIM_X] = b.at_high(y + BLOCK_DIM_Y, srcCol); sDataColumn[(threadIdx.y + BLOCK_DIM_Y * 2) * BLOCK_DIM_X] = b.at_high(y + BLOCK_DIM_Y, srcCol, src.step);
__syncthreads(); __syncthreads();
...@@ -273,7 +273,7 @@ namespace cv { namespace gpu { namespace filters ...@@ -273,7 +273,7 @@ namespace cv { namespace gpu { namespace filters
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y); dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y)); dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
B<T> b(src.rows, src.step); B<T> b(src.rows);
if (!b.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1)) if (!b.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1))
{ {
......
...@@ -44,6 +44,8 @@ ...@@ -44,6 +44,8 @@
#include "opencv2/gpu/device/border_interpolate.hpp" #include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/vec_traits.hpp" #include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp" #include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/utility.hpp"
using namespace cv::gpu; using namespace cv::gpu;
using namespace cv::gpu::device; using namespace cv::gpu::device;
...@@ -51,109 +53,119 @@ using namespace cv::gpu::device; ...@@ -51,109 +53,119 @@ using namespace cv::gpu::device;
/////////////////////////////////// Remap /////////////////////////////////////////////// /////////////////////////////////// Remap ///////////////////////////////////////////////
namespace cv { namespace gpu { namespace imgproc namespace cv { namespace gpu { namespace imgproc
{ {
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap(0, cudaFilterModeLinear, cudaAddressModeWrap); // cudaAddressModeClamp == BrdReplicate
/*texture<uchar, cudaTextureType2D, cudaReadModeNormalizedFloat> tex_remap_uchar_LinearFilter(0, cudaFilterModeLinear, cudaAddressModeClamp);
__global__ void remap_1c(const float* mapx, const float* mapy, size_t map_step, uchar* out, size_t out_step, int width, int height) __global__ void remap_uchar_LinearFilter(const PtrStepf mapx, const PtrStepf mapy, DevMem2D dst)
{ {
int x = blockDim.x * blockIdx.x + threadIdx.x; const int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y; const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < width && y < height)
{
int idx = y * (map_step >> 2) + x; /* map_step >> 2 <=> map_step / sizeof(float)*/
float xcoo = mapx[idx]; if (x < dst.cols && y < dst.rows)
float ycoo = mapy[idx]; {
const float xcoo = mapx.ptr(y)[x];
const float ycoo = mapy.ptr(y)[x];
out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex_remap, xcoo, ycoo)); dst.ptr(y)[x] = 255.0f * tex2D(tex_remap_uchar_LinearFilter, xcoo, ycoo);
} }
} }*/
__global__ void remap_3c(const uchar* src, size_t src_step, const float* mapx, const float* mapy, template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, DevMem2D_<T> dst)
size_t map_step, uchar* dst, size_t dst_step, int width, int height) {
{
const int x = blockDim.x * blockIdx.x + threadIdx.x; const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y; const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < width && y < height) if (x < dst.cols && y < dst.rows)
{ {
const int idx = y * (map_step >> 2) + x; /* map_step >> 2 <=> map_step / sizeof(float)*/ const float xcoo = mapx.ptr(y)[x];
const float ycoo = mapy.ptr(y)[x];
const float xcoo = mapx[idx];
const float ycoo = mapy[idx];
uchar3 out = make_uchar3(0, 0, 0);
if (xcoo >= 0 && xcoo < width - 1 && ycoo >= 0 && ycoo < height - 1) dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
{
const int x1 = __float2int_rd(xcoo);
const int y1 = __float2int_rd(ycoo);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
uchar src_reg = *(src + y1 * src_step + 3 * x1);
out.x += src_reg * (x2 - xcoo) * (y2 - ycoo);
src_reg = *(src + y1 * src_step + 3 * x1 + 1);
out.y += src_reg * (x2 - xcoo) * (y2 - ycoo);
src_reg = *(src + y1 * src_step + 3 * x1 + 2);
out.z += src_reg * (x2 - xcoo) * (y2 - ycoo);
src_reg = *(src + y1 * src_step + 3 * x2);
out.x += src_reg * (xcoo - x1) * (y2 - ycoo);
src_reg = *(src + y1 * src_step + 3 * x2 + 1);
out.y += src_reg * (xcoo - x1) * (y2 - ycoo);
src_reg = *(src + y1 * src_step + 3 * x2 + 2);
out.z += src_reg * (xcoo - x1) * (y2 - ycoo);
src_reg = *(src + y2 * src_step + 3 * x1);
out.x += src_reg * (x2 - xcoo) * (ycoo - y1);
src_reg = *(src + y2 * src_step + 3 * x1 + 1);
out.y += src_reg * (x2 - xcoo) * (ycoo - y1);
src_reg = *(src + y2 * src_step + 3 * x1 + 2);
out.z += src_reg * (x2 - xcoo) * (ycoo - y1);
src_reg = *(src + y2 * src_step + 3 * x2);
out.x += src_reg * (xcoo - x1) * (ycoo - y1);
src_reg = *(src + y2 * src_step + 3 * x2 + 1);
out.y += src_reg * (xcoo - x1) * (ycoo - y1);
src_reg = *(src + y2 * src_step + 3 * x2 + 2);
out.z += src_reg * (xcoo - x1) * (ycoo - y1);
}
/**(uchar3*)(dst + y * dst_step + 3 * x) = out;*/
*(dst + y * dst_step + 3 * x) = out.x;
*(dst + y * dst_step + 3 * x + 1) = out.y;
*(dst + y * dst_step + 3 * x + 2) = out.z;
} }
} }
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst) template <template <typename> class Filter, template <typename> class B, typename T>
void remap_caller(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, T borderValue)
{ {
dim3 threads(16, 16, 1); dim3 block(32, 8);
dim3 grid(1, 1, 1); dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
grid.x = divUp(dst.cols, threads.x);
grid.y = divUp(dst.rows, threads.y);
TextureBinder tex(&tex_remap, src); B<T> brd(src.rows, src.cols, borderValue);
BorderReader< PtrStep_<T>, B<T> > brd_src(src, brd);
Filter< BorderReader< PtrStep_<T>, B<T> > > filter_src(brd_src);
remap_1c<<<grid, threads>>>(xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows); remap<<<grid, block>>>(filter_src, mapx, mapy, dst);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst) #define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type, filter) \
template <> void remap_caller<filter, BrdReplicate>(const DevMem2D_<type>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<type>& dst, type) \
{ \
const dim3 block(16, 16); \
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
TextureBinder tex(&tex_remap_ ## type ## _ ## filter ## , src); \
remap_ ## type ## _ ## filter ## <<<grid, block>>>(mapx, mapy, dst); \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
}
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar, LinearFilter)
#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX
template <typename T> void remap_gpu(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst,
int interpolation, int borderMode, const double borderValue[4])
{ {
dim3 threads(32, 8, 1); typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D_<T>& dst, T borderValue);
dim3 grid(1, 1, 1);
grid.x = divUp(dst.cols, threads.x);
grid.y = divUp(dst.rows, threads.y);
remap_3c<<<grid, threads>>>(src.data, src.step, xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows); static const caller_t callers[2][3] =
cudaSafeCall( cudaGetLastError() ); {
{ remap_caller<PointFilter, BrdReflect101>, remap_caller<PointFilter, BrdReplicate>, remap_caller<PointFilter, BrdConstant> },
{ remap_caller<LinearFilter, BrdReflect101>, remap_caller<LinearFilter, BrdReplicate>, remap_caller<LinearFilter, BrdConstant> }
};
cudaSafeCall( cudaDeviceSynchronize() ); typename VecTraits<T>::elem_type brd[] = {(typename VecTraits<T>::elem_type)borderValue[0], (typename VecTraits<T>::elem_type)borderValue[1], (typename VecTraits<T>::elem_type)borderValue[2], (typename VecTraits<T>::elem_type)borderValue[3]};
callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), xmap, ymap, static_cast< DevMem2D_<T> >(dst), VecTraits<T>::make(brd));
} }
template void remap_gpu<uchar >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uchar2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uchar3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uchar4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<schar>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<char2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<char3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<char4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<ushort >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<ushort2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<ushort3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<ushort4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<short >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<short2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<short3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<short4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uint >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uint2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uint3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uint4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<int >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<int2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<int3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<int4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<float >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<float2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<float3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<float4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
/////////////////////////////////// MeanShiftfiltering /////////////////////////////////////////////// /////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
texture<uchar4, 2> tex_meanshift; texture<uchar4, 2> tex_meanshift;
...@@ -541,9 +553,9 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -541,9 +553,9 @@ namespace cv { namespace gpu { namespace imgproc
} }
} }
template <typename B> template <typename BR, typename BC>
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k, __global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
PtrStep dst, B border_row, B border_col) PtrStep dst, BR border_row, BC border_col)
{ {
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
...@@ -561,10 +573,10 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -561,10 +573,10 @@ namespace cv { namespace gpu { namespace imgproc
for (int i = ibegin; i < iend; ++i) for (int i = ibegin; i < iend; ++i)
{ {
int y = border_col.idx(i); int y = border_col.idx_row(i);
for (int j = jbegin; j < jend; ++j) for (int j = jbegin; j < jend; ++j)
{ {
int x = border_row.idx(j); int x = border_row.idx_col(j);
float dx = tex2D(harrisDxTex, x, y); float dx = tex2D(harrisDxTex, x, y);
float dy = tex2D(harrisDyTex, x, y); float dy = tex2D(harrisDyTex, x, y);
a += dx * dx; a += dx * dx;
...@@ -596,7 +608,7 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -596,7 +608,7 @@ namespace cv { namespace gpu { namespace imgproc
{ {
case BORDER_REFLECT101_GPU: case BORDER_REFLECT101_GPU:
cornerHarris_kernel<<<grid, threads>>>( cornerHarris_kernel<<<grid, threads>>>(
cols, rows, block_size, k, dst, BrdReflect101(cols), BrdReflect101(rows)); cols, rows, block_size, k, dst, BrdRowReflect101<void>(cols), BrdColReflect101<void>(rows));
break; break;
case BORDER_REPLICATE_GPU: case BORDER_REPLICATE_GPU:
harrisDxTex.addressMode[0] = cudaAddressModeClamp; harrisDxTex.addressMode[0] = cudaAddressModeClamp;
...@@ -656,9 +668,9 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -656,9 +668,9 @@ namespace cv { namespace gpu { namespace imgproc
} }
template <typename B> template <typename BR, typename BC>
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size, __global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size,
PtrStep dst, B border_row, B border_col) PtrStep dst, BR border_row, BC border_col)
{ {
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x; const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y; const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
...@@ -676,10 +688,10 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -676,10 +688,10 @@ namespace cv { namespace gpu { namespace imgproc
for (int i = ibegin; i < iend; ++i) for (int i = ibegin; i < iend; ++i)
{ {
int y = border_col.idx(i); int y = border_col.idx_row(i);
for (int j = jbegin; j < jend; ++j) for (int j = jbegin; j < jend; ++j)
{ {
int x = border_row.idx(j); int x = border_row.idx_col(j);
float dx = tex2D(minEigenValDxTex, x, y); float dx = tex2D(minEigenValDxTex, x, y);
float dy = tex2D(minEigenValDyTex, x, y); float dy = tex2D(minEigenValDyTex, x, y);
a += dx * dx; a += dx * dx;
...@@ -713,7 +725,7 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -713,7 +725,7 @@ namespace cv { namespace gpu { namespace imgproc
{ {
case BORDER_REFLECT101_GPU: case BORDER_REFLECT101_GPU:
cornerMinEigenVal_kernel<<<grid, threads>>>( cornerMinEigenVal_kernel<<<grid, threads>>>(
cols, rows, block_size, dst, BrdReflect101(cols), BrdReflect101(rows)); cols, rows, block_size, dst, BrdRowReflect101<void>(cols), BrdColReflect101<void>(rows));
break; break;
case BORDER_REPLICATE_GPU: case BORDER_REPLICATE_GPU:
minEigenValDxTex.addressMode[0] = cudaAddressModeClamp; minEigenValDxTex.addressMode[0] = cudaAddressModeClamp;
...@@ -981,7 +993,7 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -981,7 +993,7 @@ namespace cv { namespace gpu { namespace imgproc
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// pyrDown // pyrDown
template <typename T> __global__ void pyrDown(const PtrStep_<T> src, PtrStep_<T> dst, const BrdReflect101 rowBrd, const BrdReflect101 colBrd, int dst_cols) template <typename T, typename B> __global__ void pyrDown(const PtrStep_<T> src, PtrStep_<T> dst, const B b, int dst_cols)
{ {
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type; typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
...@@ -996,11 +1008,11 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -996,11 +1008,11 @@ namespace cv { namespace gpu { namespace imgproc
sum = VecTraits<value_type>::all(0); sum = VecTraits<value_type>::all(0);
sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y - 2))[rowBrd.idx(x)]; sum = sum + 0.0625f * b.at(src_y - 2, x, src.data, src.step);
sum = sum + 0.25f * src.ptr(colBrd.idx(src_y - 1))[rowBrd.idx(x)]; sum = sum + 0.25f * b.at(src_y - 1, x, src.data, src.step);
sum = sum + 0.375f * src.ptr(colBrd.idx(src_y ))[rowBrd.idx(x)]; sum = sum + 0.375f * b.at(src_y , x, src.data, src.step);
sum = sum + 0.25f * src.ptr(colBrd.idx(src_y + 1))[rowBrd.idx(x)]; sum = sum + 0.25f * b.at(src_y + 1, x, src.data, src.step);
sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y + 2))[rowBrd.idx(x)]; sum = sum + 0.0625f * b.at(src_y + 2, x, src.data, src.step);
smem[2 + threadIdx.x] = sum; smem[2 + threadIdx.x] = sum;
...@@ -1010,11 +1022,11 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -1010,11 +1022,11 @@ namespace cv { namespace gpu { namespace imgproc
sum = VecTraits<value_type>::all(0); sum = VecTraits<value_type>::all(0);
sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y - 2))[rowBrd.idx(left_x)]; sum = sum + 0.0625f * b.at(src_y - 2, left_x, src.data, src.step);
sum = sum + 0.25f * src.ptr(colBrd.idx(src_y - 1))[rowBrd.idx(left_x)]; sum = sum + 0.25f * b.at(src_y - 1, left_x, src.data, src.step);
sum = sum + 0.375f * src.ptr(colBrd.idx(src_y ))[rowBrd.idx(left_x)]; sum = sum + 0.375f * b.at(src_y , left_x, src.data, src.step);
sum = sum + 0.25f * src.ptr(colBrd.idx(src_y + 1))[rowBrd.idx(left_x)]; sum = sum + 0.25f * b.at(src_y + 1, left_x, src.data, src.step);
sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y + 2))[rowBrd.idx(left_x)]; sum = sum + 0.0625f * b.at(src_y + 2, left_x, src.data, src.step);
smem[threadIdx.x] = sum; smem[threadIdx.x] = sum;
} }
...@@ -1025,11 +1037,11 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -1025,11 +1037,11 @@ namespace cv { namespace gpu { namespace imgproc
sum = VecTraits<value_type>::all(0); sum = VecTraits<value_type>::all(0);
sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y - 2))[rowBrd.idx(right_x)]; sum = sum + 0.0625f * b.at(src_y - 2, right_x, src.data, src.step);
sum = sum + 0.25f * src.ptr(colBrd.idx(src_y - 1))[rowBrd.idx(right_x)]; sum = sum + 0.25f * b.at(src_y - 1, right_x, src.data, src.step);
sum = sum + 0.375f * src.ptr(colBrd.idx(src_y ))[rowBrd.idx(right_x)]; sum = sum + 0.375f * b.at(src_y , right_x, src.data, src.step);
sum = sum + 0.25f * src.ptr(colBrd.idx(src_y + 1))[rowBrd.idx(right_x)]; sum = sum + 0.25f * b.at(src_y + 1, right_x, src.data, src.step);
sum = sum + 0.0625f * src.ptr(colBrd.idx(src_y + 2))[rowBrd.idx(right_x)]; sum = sum + 0.0625f * b.at(src_y + 2, right_x, src.data, src.step);
smem[4 + threadIdx.x] = sum; smem[4 + threadIdx.x] = sum;
} }
...@@ -1055,58 +1067,68 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -1055,58 +1067,68 @@ namespace cv { namespace gpu { namespace imgproc
} }
} }
template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream) template <typename T, template <typename> class B> void pyrDown_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)
{ {
const dim3 block(256); const dim3 block(256);
const dim3 grid(divUp(src.cols, block.x), dst.rows); const dim3 grid(divUp(src.cols, block.x), dst.rows);
BrdReflect101 rowBrd(src.cols); B<T> b(src.rows, src.cols);
BrdReflect101 colBrd(src.rows);
pyrDown<typename TypeVec<T, cn>::vec_type><<<grid, block, 0, stream>>>( pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols);
static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(src),
static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(dst),
rowBrd, colBrd, dst.cols);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void pyrDown_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream)
template void pyrDown_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); {
template void pyrDown_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); typedef typename TypeVec<T, cn>::vec_type type;
template void pyrDown_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
typedef void (*caller_t)(const DevMem2D_<type>& src, const DevMem2D_<type>& dst, cudaStream_t stream);
static const caller_t callers[] =
{
pyrDown_caller<type, BrdReflect101>, pyrDown_caller<type, BrdReplicate>, pyrDown_caller<type, BrdConstant>
};
callers[borderType](static_cast< DevMem2D_<type> >(src), static_cast< DevMem2D_<type> >(dst), stream);
}
template void pyrDown_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrDown_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// pyrUp // pyrUp
template <typename T> __global__ void pyrUp(const PtrStep_<T> src, DevMem2D_<T> dst, const BrdReflect101 rowBrd, const BrdReflect101 colBrd) template <typename T, typename B> __global__ void pyrUp(const PtrStep_<T> src, DevMem2D_<T> dst, const B b)
{ {
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type; typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
...@@ -1119,7 +1141,7 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -1119,7 +1141,7 @@ namespace cv { namespace gpu { namespace imgproc
value_type sum; value_type sum;
if (threadIdx.x < 10 && threadIdx.y < 10) if (threadIdx.x < 10 && threadIdx.y < 10)
smem1[threadIdx.y][threadIdx.x] = src.ptr(colBrd.idx(blockIdx.y * blockDim.y / 2 + threadIdx.y - 1))[rowBrd.idx(blockIdx.x * blockDim.x / 2 + threadIdx.x - 1)]; smem1[threadIdx.y][threadIdx.x] = b.at(blockIdx.y * blockDim.y / 2 + threadIdx.y - 1, blockIdx.x * blockDim.x / 2 + threadIdx.x - 1, src.data, src.step);
__syncthreads(); __syncthreads();
...@@ -1175,53 +1197,63 @@ namespace cv { namespace gpu { namespace imgproc ...@@ -1175,53 +1197,63 @@ namespace cv { namespace gpu { namespace imgproc
dst.ptr(y)[x] = saturate_cast<T>(4.0f * sum); dst.ptr(y)[x] = saturate_cast<T>(4.0f * sum);
} }
template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream) template <typename T, template <typename> class B> void pyrUp_caller(const DevMem2D_<T>& src, const DevMem2D_<T>& dst, cudaStream_t stream)
{ {
const dim3 block(16, 16); const dim3 block(16, 16);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
BrdReflect101 rowBrd(src.cols); B<T> b(src.rows, src.cols);
BrdReflect101 colBrd(src.rows);
pyrUp<typename TypeVec<T, cn>::vec_type><<<grid, block, 0, stream>>>( pyrUp<T><<<grid, block, 0, stream>>>(src, dst, b);
static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(src),
static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(dst),
rowBrd, colBrd);
cudaSafeCall( cudaGetLastError() ); cudaSafeCall( cudaGetLastError() );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
template void pyrUp_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream)
template void pyrUp_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); {
template void pyrUp_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); typedef typename TypeVec<T, cn>::vec_type type;
template void pyrUp_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
typedef void (*caller_t)(const DevMem2D_<type>& src, const DevMem2D_<type>& dst, cudaStream_t stream);
template void pyrUp_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); static const caller_t callers[] =
template void pyrUp_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); {
template void pyrUp_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); pyrUp_caller<type, BrdReflect101>, pyrUp_caller<type, BrdReplicate>, pyrUp_caller<type, BrdConstant>
};
template void pyrUp_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); callers[borderType](static_cast< DevMem2D_<type> >(src), static_cast< DevMem2D_<type> >(dst), stream);
template void pyrUp_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); }
template void pyrUp_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrUp_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrUp_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrUp_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrUp_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrUp_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrUp_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrUp_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrUp_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template void pyrUp_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// buildWarpMaps // buildWarpMaps
......
...@@ -675,31 +675,29 @@ namespace cv { namespace gpu { namespace surf ...@@ -675,31 +675,29 @@ namespace cv { namespace gpu { namespace surf
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f 3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f
}; };
__device__ __forceinline__ unsigned char calcWin(int i, int j, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir) struct WinReader
{ {
float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir; typedef uchar elem_type;
float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
return tex2D(imgTex, pixel_x, pixel_y); __device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) :
} centerX(centerX_), centerY(centerY_), win_offset(win_offset_), cos_dir(cos_dir_), sin_dir(sin_dir_)
{
__device__ unsigned char calcPATCH(int i1, int j1, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir, int win_size) }
{
/* Scale the window to size PATCH_SZ so each pixel's size is s. This
makes calculating the gradients with wavelets of size 2s easy */
const float icoo = ((float)i1 / (PATCH_SZ + 1)) * win_size;
const float jcoo = ((float)j1 / (PATCH_SZ + 1)) * win_size;
const int i = __float2int_rd(icoo); __device__ __forceinline__ uchar operator ()(int i, int j) const
const int j = __float2int_rd(jcoo); {
float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;
float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
float res = calcWin(i, j, centerX, centerY, win_offset, cos_dir, sin_dir) * (i + 1 - icoo) * (j + 1 - jcoo); return tex2D(imgTex, pixel_x, pixel_y);
res += calcWin(i + 1, j, centerX, centerY, win_offset, cos_dir, sin_dir) * (icoo - i) * (j + 1 - jcoo); }
res += calcWin(i + 1, j + 1, centerX, centerY, win_offset, cos_dir, sin_dir) * (icoo - i) * (jcoo - j);
res += calcWin(i, j + 1, centerX, centerY, win_offset, cos_dir, sin_dir) * (i + 1 - icoo) * (jcoo - j);
return saturate_cast<unsigned char>(res); float centerX;
} float centerY;
float win_offset;
float cos_dir;
float sin_dir;
};
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25], __device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25],
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir) const float* featureX, const float* featureY, const float* featureSize, const float* featureDir)
...@@ -732,7 +730,13 @@ namespace cv { namespace gpu { namespace surf ...@@ -732,7 +730,13 @@ namespace cv { namespace gpu { namespace surf
const int xIndex = xBlock * 5 + threadIdx.x; const int xIndex = xBlock * 5 + threadIdx.x;
const int yIndex = yBlock * 5 + threadIdx.y; const int yIndex = yBlock * 5 + threadIdx.y;
s_PATCH[threadIdx.y][threadIdx.x] = calcPATCH(yIndex, xIndex, centerX, centerY, win_offset, cos_dir, sin_dir, win_size); const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size;
const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size;
LinearFilter<WinReader> filter(WinReader(centerX, centerY, win_offset, cos_dir, sin_dir));
s_PATCH[threadIdx.y][threadIdx.x] = filter(icoo, jcoo);
__syncthreads(); __syncthreads();
if (threadIdx.x < 5 && threadIdx.y < 5) if (threadIdx.x < 5 && threadIdx.y < 5)
......
...@@ -885,7 +885,7 @@ void cv::gpu::GpuMat::release() ...@@ -885,7 +885,7 @@ void cv::gpu::GpuMat::release()
if( refcount && CV_XADD(refcount, -1) == 1 ) if( refcount && CV_XADD(refcount, -1) == 1 )
{ {
fastFree(refcount); fastFree(refcount);
cudaSafeCall( cudaFree(datastart) ); cudaFree(datastart);
} }
data = datastart = dataend = 0; data = datastart = dataend = 0;
step = rows = cols = 0; step = rows = cols = 0;
......
...@@ -47,7 +47,7 @@ using namespace cv::gpu; ...@@ -47,7 +47,7 @@ using namespace cv::gpu;
#if !defined (HAVE_CUDA) #if !defined (HAVE_CUDA)
void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&){ throw_nogpu(); } void cv::gpu::remap(const GpuMat&, GpuMat&, const GpuMat&, const GpuMat&, int, int, const Scalar&){ throw_nogpu(); }
void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); } void cv::gpu::meanShiftFiltering(const GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }
void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); } void cv::gpu::meanShiftProc(const GpuMat&, GpuMat&, GpuMat&, int, int, TermCriteria) { throw_nogpu(); }
void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); } void cv::gpu::drawColorDisp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
...@@ -92,8 +92,8 @@ void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogp ...@@ -92,8 +92,8 @@ void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogp
void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); } void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool, ConvolveBuf&) { throw_nogpu(); }
void cv::gpu::downsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::downsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::upsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::upsample(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); }
void cv::gpu::pyrDown(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::pyrDown(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
void cv::gpu::pyrUp(const GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } void cv::gpu::pyrUp(const GpuMat&, GpuMat&, int, Stream&) { throw_nogpu(); }
void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_nogpu(); } void cv::gpu::Canny(const GpuMat&, GpuMat&, double, double, int, bool) { throw_nogpu(); }
void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_nogpu(); } void cv::gpu::Canny(const GpuMat&, CannyBuf&, GpuMat&, double, double, int, bool) { throw_nogpu(); }
void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_nogpu(); } void cv::gpu::Canny(const GpuMat&, const GpuMat&, GpuMat&, double, double, bool) { throw_nogpu(); }
...@@ -104,39 +104,52 @@ void cv::gpu::CannyBuf::release() { throw_nogpu(); } ...@@ -104,39 +104,52 @@ void cv::gpu::CannyBuf::release() { throw_nogpu(); }
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
////////////////////////////////////////////////////////////////////////
// remap
namespace cv { namespace gpu { namespace imgproc namespace cv { namespace gpu { namespace imgproc
{ {
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst); template <typename T> void remap_gpu(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst,
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst); int interpolation, int borderMode, const double borderValue[4]);
}}}
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps);
extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps);
void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream); void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap, int interpolation, int borderMode, const Scalar& borderValue)
void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream); {
using namespace cv::gpu::imgproc;
void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream); typedef void (*caller_t)(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);;
void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream); static const caller_t callers[6][4] =
}}} {
{remap_gpu<uchar>, remap_gpu<uchar2>, remap_gpu<uchar3>, remap_gpu<uchar4>},
{remap_gpu<schar>, remap_gpu<char2>, remap_gpu<char3>, remap_gpu<char4>},
{remap_gpu<ushort>, remap_gpu<ushort2>, remap_gpu<ushort3>, remap_gpu<ushort4>},
{remap_gpu<short>, remap_gpu<short2>, remap_gpu<short3>, remap_gpu<short4>},
{remap_gpu<int>, remap_gpu<int2>, remap_gpu<int3>, remap_gpu<int4>},
{remap_gpu<float>, remap_gpu<float2>, remap_gpu<float3>, remap_gpu<float4>}
};
//////////////////////////////////////////////////////////////////////// CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
// remap CV_Assert(xmap.type() == CV_32F && ymap.type() == CV_32F && xmap.size() == ymap.size());
void cv::gpu::remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap) CV_Assert(interpolation == INTER_NEAREST || interpolation == INTER_LINEAR);
{
typedef void (*remap_gpu_t)(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst);
static const remap_gpu_t callers[] = {imgproc::remap_gpu_1c, 0, imgproc::remap_gpu_3c};
CV_Assert((src.type() == CV_8U || src.type() == CV_8UC3) && xmap.type() == CV_32F && ymap.type() == CV_32F); CV_Assert(borderMode == BORDER_REFLECT101 || borderMode == BORDER_REPLICATE || borderMode == BORDER_CONSTANT);
int gpuBorderType;
CV_Assert(tryConvertToGpuBorderType(borderMode, gpuBorderType));
dst.create(xmap.size(), src.type()); dst.create(xmap.size(), src.type());
callers[src.channels() - 1](src, xmap, ymap, dst); callers[src.depth()][src.channels() - 1](src, xmap, ymap, dst, interpolation, gpuBorderType, borderValue.val);
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// meanShiftFiltering_GPU // meanShiftFiltering_GPU
namespace cv { namespace gpu { namespace imgproc
{
extern "C" void meanShiftFiltering_gpu(const DevMem2D& src, DevMem2D dst, int sp, int sr, int maxIter, float eps);
}}}
void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria) void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, TermCriteria criteria)
{ {
if( src.empty() ) if( src.empty() )
...@@ -163,6 +176,11 @@ void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr, ...@@ -163,6 +176,11 @@ void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// meanShiftProc_GPU // meanShiftProc_GPU
namespace cv { namespace gpu { namespace imgproc
{
extern "C" void meanShiftProc_gpu(const DevMem2D& src, DevMem2D dstr, DevMem2D dstsp, int sp, int sr, int maxIter, float eps);
}}}
void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria) void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int sp, int sr, TermCriteria criteria)
{ {
if( src.empty() ) if( src.empty() )
...@@ -190,6 +208,12 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int ...@@ -190,6 +208,12 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// drawColorDisp // drawColorDisp
namespace cv { namespace gpu { namespace imgproc
{
void drawColorDisp_gpu(const DevMem2D& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);
void drawColorDisp_gpu(const DevMem2D_<short>& src, const DevMem2D& dst, int ndisp, const cudaStream_t& stream);
}}}
namespace namespace
{ {
template <typename T> template <typename T>
...@@ -215,6 +239,12 @@ void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& s ...@@ -215,6 +239,12 @@ void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& s
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// reprojectImageTo3D // reprojectImageTo3D
namespace cv { namespace gpu { namespace imgproc
{
void reprojectImageTo3D_gpu(const DevMem2D& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
void reprojectImageTo3D_gpu(const DevMem2D_<short>& disp, const DevMem2Df& xyzw, const float* q, const cudaStream_t& stream);
}}}
namespace namespace
{ {
template <typename T> template <typename T>
...@@ -1596,14 +1626,14 @@ void cv::gpu::upsample(const GpuMat& src, GpuMat& dst, Stream& stream) ...@@ -1596,14 +1626,14 @@ void cv::gpu::upsample(const GpuMat& src, GpuMat& dst, Stream& stream)
namespace cv { namespace gpu { namespace imgproc namespace cv { namespace gpu { namespace imgproc
{ {
template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
}}} }}}
void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream) void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)
{ {
using namespace cv::gpu::imgproc; using namespace cv::gpu::imgproc;
typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
static const func_t funcs[6][4] = static const func_t funcs[6][4] =
{ {
...@@ -1617,9 +1647,13 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream) ...@@ -1617,9 +1647,13 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream)
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT);
int gpuBorderType;
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type()); dst.create((src.rows + 1) / 2, (src.cols + 1) / 2, src.type());
funcs[src.depth()][src.channels() - 1](src, dst, StreamAccessor::getStream(stream)); funcs[src.depth()][src.channels() - 1](src, dst, gpuBorderType, StreamAccessor::getStream(stream));
} }
...@@ -1628,14 +1662,14 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream) ...@@ -1628,14 +1662,14 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream)
namespace cv { namespace gpu { namespace imgproc namespace cv { namespace gpu { namespace imgproc
{ {
template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
}}} }}}
void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream) void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, int borderType, Stream& stream)
{ {
using namespace cv::gpu::imgproc; using namespace cv::gpu::imgproc;
typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream); typedef void (*func_t)(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
static const func_t funcs[6][4] = static const func_t funcs[6][4] =
{ {
...@@ -1649,9 +1683,13 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream) ...@@ -1649,9 +1683,13 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream)
CV_Assert(src.depth() <= CV_32F && src.channels() <= 4); CV_Assert(src.depth() <= CV_32F && src.channels() <= 4);
CV_Assert(borderType == BORDER_REFLECT101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT);
int gpuBorderType;
CV_Assert(tryConvertToGpuBorderType(borderType, gpuBorderType));
dst.create(src.rows*2, src.cols*2, src.type()); dst.create(src.rows*2, src.cols*2, src.type());
funcs[src.depth()][src.channels() - 1](src, dst, StreamAccessor::getStream(stream)); funcs[src.depth()][src.channels() - 1](src, dst, gpuBorderType, StreamAccessor::getStream(stream));
} }
......
...@@ -48,82 +48,131 @@ ...@@ -48,82 +48,131 @@
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
struct BrdReflect101 //////////////////////////////////////////////////////////////
// BrdConstant
template <typename D> struct BrdRowConstant
{ {
explicit __host__ __device__ __forceinline__ BrdReflect101(int len): last(len - 1) {} typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowConstant(int width_, const D& val_ = VecTraits<D>::all(0)) : width(width_), val(val_) {}
__device__ __forceinline__ int idx_low(int i) const template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{ {
return abs(i); return x >= 0 ? saturate_cast<D>(data[x]) : val;
} }
__device__ __forceinline__ int idx_high(int i) const template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{ {
return last - abs(last - i); return x < width ? saturate_cast<D>(data[x]) : val;
} }
__device__ __forceinline__ int idx(int i) const template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{ {
return idx_low(idx_high(i)); return (x >= 0 && x < width) ? saturate_cast<D>(data[x]) : val;
} }
__host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const
{ {
return -last <= mini && maxi <= 2 * last; return true;
} }
const int last; const int width;
const D val;
}; };
template <typename D> struct BrdRowReflect101 : BrdReflect101 template <typename D> struct BrdColConstant
{ {
explicit __host__ __device__ __forceinline__ BrdRowReflect101(int len): BrdReflect101(len) {} typedef D result_type;
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const explicit __host__ __device__ __forceinline__ BrdColConstant(int height_, const D& val_ = VecTraits<D>::all(0)) : height(height_), val(val_) {}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{ {
return saturate_cast<D>(data[idx_low(i)]); return y >= 0 ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
} }
template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{ {
return saturate_cast<D>(data[idx_high(i)]); return y < height ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
} }
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return (y >= 0 && y < height) ? saturate_cast<D>(*(const T*)((const char*)data + y * step)) : val;
}
__host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const
{
return true;
}
const int height;
const D val;
}; };
template <typename D> struct BrdColReflect101 : BrdReflect101 template <typename D> struct BrdConstant
{ {
__host__ __device__ __forceinline__ BrdColReflect101(int len, size_t step): BrdReflect101(len), step(step) {} typedef D result_type;
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const __host__ __device__ __forceinline__ BrdConstant(int height_, int width_, const D& val_ = VecTraits<D>::all(0)) :
height(height_), width(width_), val(val_)
{ {
return saturate_cast<D>(*(const D*)((const char*)data + idx_low(i)*step));
} }
template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{ {
return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step)); return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(((const T*)((const uchar*)data + y * step))[x]) : val;
} }
const size_t step; template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return (x >= 0 && x < width && y >= 0 && y < height) ? saturate_cast<D>(src(y, x)) : val;
}
const int height;
const int width;
const D val;
}; };
struct BrdReplicate //////////////////////////////////////////////////////////////
// BrdReplicate
template <typename D> struct BrdRowReplicate
{ {
explicit __host__ __device__ __forceinline__ BrdReplicate(int len): last(len - 1) {} typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowReplicate(int width) : last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowReplicate(int width, U) : last_col(width - 1) {}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::max(x, 0);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::min(x, last_col);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
__device__ __forceinline__ int idx_low(int i) const template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{ {
return ::max(i, 0); return saturate_cast<D>(data[idx_col_low(x)]);
} }
__device__ __forceinline__ int idx_high(int i) const template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{ {
return ::min(i, last); return saturate_cast<D>(data[idx_col_high(x)]);
} }
__device__ __forceinline__ int idx(int i) const template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{ {
return idx_low(idx_high(i)); return saturate_cast<D>(data[idx_col(x)]);
} }
bool is_range_safe(int mini, int maxi) const bool is_range_safe(int mini, int maxi) const
...@@ -131,103 +180,328 @@ namespace cv { namespace gpu { namespace device ...@@ -131,103 +180,328 @@ namespace cv { namespace gpu { namespace device
return true; return true;
} }
const int last; const int last_col;
}; };
template <typename D> struct BrdRowReplicate : BrdReplicate template <typename D> struct BrdColReplicate
{ {
explicit __host__ __device__ __forceinline__ BrdRowReplicate(int len): BrdReplicate(len) {} typedef D result_type;
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const explicit __host__ __device__ __forceinline__ BrdColReplicate(int height) : last_row(height - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdColReplicate(int height, U) : last_row(height - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{ {
return saturate_cast<D>(data[idx_low(i)]); return ::max(y, 0);
} }
template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const __device__ __forceinline__ int idx_row_high(int y) const
{ {
return saturate_cast<D>(data[idx_high(i)]); return ::min(y, last_row);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const T*)((const char*)data + idx_row_high(y) * step));
} }
};
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const T*)((const char*)data + idx_row(y) * step));
}
template <typename D> struct BrdColReplicate : BrdReplicate bool is_range_safe(int mini, int maxi) const
{
return true;
}
const int last_row;
};
template <typename D> struct BrdReplicate
{ {
__host__ __device__ __forceinline__ BrdColReplicate(int len, size_t step): BrdReplicate(len), step(step) {} typedef D result_type;
__host__ __device__ __forceinline__ BrdReplicate(int height, int width) :
last_row(height - 1), last_col(width - 1)
{
}
template <typename U>
__host__ __device__ __forceinline__ BrdReplicate(int height, int width, U) :
last_row(height - 1), last_col(width - 1)
{
}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::max(y, 0);
}
__device__ __forceinline__ float idx_row_low(float y) const
{
return ::fmax(y, 0.0f);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return ::min(y, last_row);
}
__device__ __forceinline__ float idx_row_high(float y) const
{
return ::fmin(y, last_row);
}
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const __device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
__device__ __forceinline__ float idx_row(float y) const
{
return idx_row_low(idx_row_high(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::max(x, 0);
}
__device__ __forceinline__ float idx_col_low(float x) const
{
return ::fmax(x, 0);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return ::min(x, last_col);
}
__device__ __forceinline__ float idx_col_high(float x) const
{
return ::fmin(x, last_col);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
__device__ __forceinline__ float idx_col(float x) const
{ {
return saturate_cast<D>(*(const D*)((const char*)data + idx_low(i)*step)); return idx_col_low(idx_col_high(x));
} }
template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{ {
return saturate_cast<D>(*(const D*)((const char*)data + idx_high(i)*step)); return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
} }
const size_t step; template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
const int last_row;
const int last_col;
}; };
template <typename D> struct BrdRowConstant //////////////////////////////////////////////////////////////
// BrdReflect101
template <typename D> struct BrdRowReflect101
{ {
explicit __host__ __device__ __forceinline__ BrdRowConstant(int len_, const D& val_ = VecTraits<D>::all(0)): len(len_), val(val_) {} typedef D result_type;
explicit __host__ __device__ __forceinline__ BrdRowReflect101(int width) : last_col(width - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdRowReflect101(int width, U) : last_col(width - 1) {}
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const __device__ __forceinline__ int idx_col_low(int x) const
{ {
return i >= 0 ? saturate_cast<D>(data[i]) : val; return ::abs(x);
} }
template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const __device__ __forceinline__ int idx_col_high(int x) const
{ {
return i < len ? saturate_cast<D>(data[i]) : val; return last_col - ::abs(last_col - x);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at_low(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_low(x)]);
}
template <typename T> __device__ __forceinline__ D at_high(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col_high(x)]);
}
template <typename T> __device__ __forceinline__ D at(int x, const T* data) const
{
return saturate_cast<D>(data[idx_col(x)]);
} }
__host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const
{ {
return true; return -last_col <= mini && maxi <= 2 * last_col;
} }
const int len; const int last_col;
const D val;
}; };
template <typename D> struct BrdColConstant template <typename D> struct BrdColReflect101
{ {
__host__ __device__ __forceinline__ BrdColConstant(int len_, size_t step_, const D& val_ = VecTraits<D>::all(0)): len(len_), step(step_), val(val_) {} typedef D result_type;
template <typename T> __device__ __forceinline__ D at_low(int i, const T* data) const explicit __host__ __device__ __forceinline__ BrdColReflect101(int height) : last_row(height - 1) {}
template <typename U> __host__ __device__ __forceinline__ BrdColReflect101(int height, U) : last_row(height - 1) {}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::abs(y);
}
__device__ __forceinline__ int idx_row_high(int y) const
{ {
return i >= 0 ? saturate_cast<D>(*(const D*)((const char*)data + i*step)) : val; return last_row - ::abs(last_row - y);
} }
template <typename T> __device__ __forceinline__ D at_high(int i, const T* data) const __device__ __forceinline__ int idx_row(int y) const
{ {
return i < len ? saturate_cast<D>(*(const D*)((const char*)data + i*step)) : val; return idx_row_low(idx_row_high(y));
}
template <typename T> __device__ __forceinline__ D at_low(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_low(y) * step));
}
template <typename T> __device__ __forceinline__ D at_high(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row_high(y) * step));
}
template <typename T> __device__ __forceinline__ D at(int y, const T* data, size_t step) const
{
return saturate_cast<D>(*(const D*)((const char*)data + idx_row(y) * step));
} }
__host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const __host__ __device__ __forceinline__ bool is_range_safe(int mini, int maxi) const
{ {
return true; return -last_row <= mini && maxi <= 2 * last_row;
} }
const int len; const int last_row;
const size_t step;
const D val;
}; };
template <typename OutT> struct BrdConstant template <typename D> struct BrdReflect101
{ {
__host__ __device__ __forceinline__ BrdConstant(int w, int h, const OutT &val = VecTraits<OutT>::all(0)) : w(w), h(h), val(val) {} typedef D result_type;
__host__ __device__ __forceinline__ BrdReflect101(int height, int width) :
last_row(height - 1), last_col(width - 1)
{
}
template <typename U>
__host__ __device__ __forceinline__ BrdReflect101(int height, int width, U) :
last_row(height - 1), last_col(width - 1)
{
}
__device__ __forceinline__ int idx_row_low(int y) const
{
return ::abs(y);
}
__device__ __forceinline__ float idx_row_low(float y) const
{
return ::fabs(y);
}
__device__ __forceinline__ int idx_row_high(int y) const
{
return last_row - ::abs(last_row - y);
}
__device__ __forceinline__ float idx_row_high(float y) const
{
return last_row - ::fabs(last_row - y);
}
__device__ __forceinline__ int idx_row(int y) const
{
return idx_row_low(idx_row_high(y));
}
__device__ __forceinline__ float idx_row(float y) const
{
return idx_row_low(idx_row_high(y));
}
__device__ __forceinline__ int idx_col_low(int x) const
{
return ::abs(x);
}
__device__ __forceinline__ float idx_col_low(float x) const
{
return ::fabs(x);
}
__device__ __forceinline__ int idx_col_high(int x) const
{
return last_col - ::abs(last_col - x);
}
__device__ __forceinline__ float idx_col_high(float x) const
{
return last_col - ::fabs(last_col - x);
}
__device__ __forceinline__ int idx_col(int x) const
{
return idx_col_low(idx_col_high(x));
}
__device__ __forceinline__ float idx_col(float x) const
{
return idx_col_low(idx_col_high(x));
}
template <typename T> __device__ __forceinline__ D at(int y, int x, const T* data, size_t step) const
{
return saturate_cast<D>(((const T*)((const char*)data + idx_row(y) * step))[idx_col(x)]);
}
template <typename Ptr2D> __device__ __forceinline__ D at(typename Ptr2D::index_type y, typename Ptr2D::index_type x, const Ptr2D& src) const
{
return saturate_cast<D>(src(idx_row(y), idx_col(x)));
}
const int last_row;
const int last_col;
};
//////////////////////////////////////////////////////////////
// BorderReader
template <typename Ptr2D, typename B> struct BorderReader
{
typedef typename B::result_type elem_type;
typedef typename Ptr2D::index_type index_type;
__host__ __device__ __forceinline__ BorderReader(const Ptr2D& ptr_, const B& b_) : ptr(ptr_), b(b_) {}
__device__ __forceinline__ OutT at(int x, int y, const uchar* data, int step) const __device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const
{ {
if (x >= 0 && x <= w - 1 && y >= 0 && y <= h - 1) return b.at(y, x, ptr);
return ((const OutT*)(data + y * step))[x];
return val;
} }
const int w; const Ptr2D ptr;
const int h; const B b;
OutT val;
}; };
}}} }}}
......
...@@ -309,7 +309,6 @@ namespace cv { namespace gpu { namespace device ...@@ -309,7 +309,6 @@ namespace cv { namespace gpu { namespace device
U vec1Vals[MAX_LEN / THREAD_DIM]; U vec1Vals[MAX_LEN / THREAD_DIM];
}; };
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Solve linear system // Solve linear system
...@@ -364,6 +363,60 @@ namespace cv { namespace gpu { namespace device ...@@ -364,6 +363,60 @@ namespace cv { namespace gpu { namespace device
return false; return false;
} }
///////////////////////////////////////////////////////////////////////////////
// Filters
template <typename Ptr2D> struct PointFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_) : src(src_) {}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
return src(__float2int_rn(y), __float2int_rn(x));
}
const Ptr2D src;
};
template <typename Ptr2D> struct LinearFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_) : src(src_) {}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0);
const int x1 = __float2int_rd(x);
const int y1 = __float2int_rd(y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
elem_type src_reg = src(y1, x1);
out = out + src_reg * ((x2 - x) * (y2 - y));
src_reg = src(y1, x2);
out = out + src_reg * ((x - x1) * (y2 - y));
src_reg = src(y2, x1);
out = out + src_reg * ((x2 - x) * (y - y1));
src_reg = src(y2, x2);
out = out + src_reg * ((x - x1) * (y - y1));
return saturate_cast<elem_type>(out);
}
const Ptr2D src;
};
}}} }}}
#endif // __OPENCV_GPU_UTILITY_HPP__ #endif // __OPENCV_GPU_UTILITY_HPP__
...@@ -166,6 +166,7 @@ namespace cv { namespace gpu { namespace device ...@@ -166,6 +166,7 @@ namespace cv { namespace gpu { namespace device
enum {cn=1}; \ enum {cn=1}; \
static __device__ __host__ __forceinline__ type all(type v) {return v;} \ static __device__ __host__ __forceinline__ type all(type v) {return v;} \
static __device__ __host__ __forceinline__ type make(type x) {return x;} \ static __device__ __host__ __forceinline__ type make(type x) {return x;} \
static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
}; \ }; \
template<> struct VecTraits<type ## 1> \ template<> struct VecTraits<type ## 1> \
{ \ { \
...@@ -173,6 +174,7 @@ namespace cv { namespace gpu { namespace device ...@@ -173,6 +174,7 @@ namespace cv { namespace gpu { namespace device
enum {cn=1}; \ enum {cn=1}; \
static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \ static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \ static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
}; \ }; \
template<> struct VecTraits<type ## 2> \ template<> struct VecTraits<type ## 2> \
{ \ { \
...@@ -180,6 +182,7 @@ namespace cv { namespace gpu { namespace device ...@@ -180,6 +182,7 @@ namespace cv { namespace gpu { namespace device
enum {cn=2}; \ enum {cn=2}; \
static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \ static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \ static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
}; \ }; \
template<> struct VecTraits<type ## 3> \ template<> struct VecTraits<type ## 3> \
{ \ { \
...@@ -187,6 +190,7 @@ namespace cv { namespace gpu { namespace device ...@@ -187,6 +190,7 @@ namespace cv { namespace gpu { namespace device
enum {cn=3}; \ enum {cn=3}; \
static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \ static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \ static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
}; \ }; \
template<> struct VecTraits<type ## 4> \ template<> struct VecTraits<type ## 4> \
{ \ { \
...@@ -194,6 +198,7 @@ namespace cv { namespace gpu { namespace device ...@@ -194,6 +198,7 @@ namespace cv { namespace gpu { namespace device
enum {cn=4}; \ enum {cn=4}; \
static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \ static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \ static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
}; \ }; \
template<> struct VecTraits<type ## 8> \ template<> struct VecTraits<type ## 8> \
{ \ { \
...@@ -201,10 +206,10 @@ namespace cv { namespace gpu { namespace device ...@@ -201,10 +206,10 @@ namespace cv { namespace gpu { namespace device
enum {cn=8}; \ enum {cn=8}; \
static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \ static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \ static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
}; };
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uchar) OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uchar)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(char)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(ushort) OPENCV_GPU_IMPLEMENT_VEC_TRAITS(ushort)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(short) OPENCV_GPU_IMPLEMENT_VEC_TRAITS(short)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(int) OPENCV_GPU_IMPLEMENT_VEC_TRAITS(int)
...@@ -214,12 +219,61 @@ namespace cv { namespace gpu { namespace device ...@@ -214,12 +219,61 @@ namespace cv { namespace gpu { namespace device
#undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS #undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS
template<> struct VecTraits<char>
{
typedef char elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char all(char v) {return v;}
static __device__ __host__ __forceinline__ char make(char x) {return x;}
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
};
template<> struct VecTraits<schar> template<> struct VecTraits<schar>
{ {
typedef schar elem_type; typedef schar elem_type;
enum {cn=1}; enum {cn=1};
static __device__ __host__ __forceinline__ schar all(schar v) {return v;} static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
static __device__ __host__ __forceinline__ schar make(schar x) {return x;} static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
};
template<> struct VecTraits<char1>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
};
template<> struct VecTraits<char2>
{
typedef schar elem_type;
enum {cn=2};
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
};
template<> struct VecTraits<char3>
{
typedef schar elem_type;
enum {cn=3};
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
};
template<> struct VecTraits<char4>
{
typedef schar elem_type;
enum {cn=4};
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
};
template<> struct VecTraits<char8>
{
typedef schar elem_type;
enum {cn=8};
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
}; };
}}} }}}
......
...@@ -181,15 +181,18 @@ INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine( ...@@ -181,15 +181,18 @@ INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
// remap // remap
struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> > struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int, int> >
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
int type; int type;
int interpolation;
int borderType;
cv::Size size; cv::Size size;
cv::Mat src; cv::Mat src;
cv::Mat xmap; cv::Mat xmap;
cv::Mat ymap; cv::Mat ymap;
cv::Scalar borderValue;
cv::Mat dst_gold; cv::Mat dst_gold;
...@@ -197,43 +200,83 @@ struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> ...@@ -197,43 +200,83 @@ struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int>
{ {
devInfo = std::tr1::get<0>(GetParam()); devInfo = std::tr1::get<0>(GetParam());
type = std::tr1::get<1>(GetParam()); type = std::tr1::get<1>(GetParam());
interpolation = std::tr1::get<2>(GetParam());
borderType = std::tr1::get<3>(GetParam());
cv::gpu::setDevice(devInfo.deviceID()); cv::gpu::setDevice(devInfo.deviceID());
cv::RNG& rng = cvtest::TS::ptr()->get_rng(); cv::RNG& rng = cvtest::TS::ptr()->get_rng();
size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150)); size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false); src = cvtest::randomMat(rng, size, type, 0.0, 256.0, false);
xmap = cvtest::randomMat(rng, size, CV_32FC1, 0.0, src.cols - 1, false);
ymap = cvtest::randomMat(rng, size, CV_32FC1, 0.0, src.rows - 1, false); xmap.create(size, CV_32FC1);
ymap.create(size, CV_32FC1);
for (int y = 0; y < src.rows; ++y)
{
float* xmap_row = xmap.ptr<float>(y);
float* ymap_row = ymap.ptr<float>(y);
for (int x = 0; x < src.cols; ++x)
{
xmap_row[x] = src.cols - 1 - x;
ymap_row[x] = src.rows - 1 - y;
}
}
borderValue[0] = rng.uniform(0.0, 256.0);
borderValue[1] = rng.uniform(0.0, 256.0);
borderValue[2] = rng.uniform(0.0, 256.0);
borderValue[3] = rng.uniform(0.0, 256.0);
cv::remap(src, dst_gold, xmap, ymap, cv::INTER_LINEAR, cv::BORDER_WRAP); cv::remap(src, dst_gold, xmap, ymap, interpolation, borderType, borderValue);
} }
}; };
TEST_P(Remap, Accuracy) TEST_P(Remap, Accuracy)
{ {
static const char* interpolations_str[] = {"INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC"};
static const char* borderTypes_str[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
const char* interpolationStr = interpolations_str[interpolation];
const char* borderTypeStr = borderTypes_str[borderType];
PRINT_PARAM(devInfo); PRINT_PARAM(devInfo);
PRINT_TYPE(type); PRINT_TYPE(type);
PRINT_PARAM(interpolationStr);
PRINT_PARAM(borderTypeStr);
PRINT_PARAM(size); PRINT_PARAM(size);
PRINT_PARAM(borderValue);
cv::Mat dst; cv::Mat dst;
ASSERT_NO_THROW( ASSERT_NO_THROW(
cv::gpu::GpuMat gpuRes; cv::gpu::GpuMat gpuRes;
cv::gpu::remap(cv::gpu::GpuMat(src), gpuRes, cv::gpu::GpuMat(xmap), cv::gpu::GpuMat(ymap)); cv::gpu::remap(cv::gpu::GpuMat(src), gpuRes, cv::gpu::GpuMat(xmap), cv::gpu::GpuMat(ymap), interpolation, borderType, borderValue);
gpuRes.download(dst); gpuRes.download(dst);
); );
EXPECT_MAT_SIMILAR(dst_gold, dst, 0.5); EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
} }
INSTANTIATE_TEST_CASE_P(ImgProc, Remap, testing::Combine( INSTANTIATE_TEST_CASE_P
testing::ValuesIn(devices()), (
testing::Values(CV_8UC1, CV_8UC3))); ImgProc, Remap, testing::Combine
(
testing::ValuesIn(devices()),
testing::Values
(
CV_8UC1, CV_8UC3, CV_8UC4,
CV_32FC1, CV_32FC3, CV_32FC4
),
testing::Values(cv::INTER_NEAREST, cv::INTER_LINEAR),
testing::Values(cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT)
)
);
/////////////////////////////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////////////////////////////
// copyMakeBorder // copyMakeBorder
......
...@@ -79,9 +79,9 @@ TEST(remap) ...@@ -79,9 +79,9 @@ TEST(remap)
Mat src, dst, xmap, ymap; Mat src, dst, xmap, ymap;
gpu::GpuMat d_src, d_dst, d_xmap, d_ymap; gpu::GpuMat d_src, d_dst, d_xmap, d_ymap;
for (int size = 1000; size <= 8000; size *= 2) for (int size = 1000; size <= 4000; size *= 2)
{ {
SUBTEST << "src " << size << " and 8U, 32F maps"; SUBTEST << "src " << size << ", 8UC1";
gen(src, size, size, CV_8UC1, 0, 256); gen(src, size, size, CV_8UC1, 0, 256);
...@@ -101,7 +101,112 @@ TEST(remap) ...@@ -101,7 +101,112 @@ TEST(remap)
dst.create(xmap.size(), src.type()); dst.create(xmap.size(), src.type());
CPU_ON; CPU_ON;
remap(src, dst, xmap, ymap, INTER_LINEAR); remap(src, dst, xmap, ymap, INTER_LINEAR, BORDER_REPLICATE);
CPU_OFF;
d_src = src;
d_xmap = xmap;
d_ymap = ymap;
d_dst.create(d_xmap.size(), d_src.type());
GPU_ON;
gpu::remap(d_src, d_dst, d_xmap, d_ymap, INTER_LINEAR, BORDER_REPLICATE);
GPU_OFF;
}
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << "src " << size << ", 8UC3";
gen(src, size, size, CV_8UC3, 0, 256);
xmap.create(size, size, CV_32F);
ymap.create(size, size, CV_32F);
for (int i = 0; i < size; ++i)
{
float* xmap_row = xmap.ptr<float>(i);
float* ymap_row = ymap.ptr<float>(i);
for (int j = 0; j < size; ++j)
{
xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;
ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;
}
}
dst.create(xmap.size(), src.type());
CPU_ON;
remap(src, dst, xmap, ymap, INTER_LINEAR, BORDER_REPLICATE);
CPU_OFF;
d_src = src;
d_xmap = xmap;
d_ymap = ymap;
d_dst.create(d_xmap.size(), d_src.type());
GPU_ON;
gpu::remap(d_src, d_dst, d_xmap, d_ymap, INTER_LINEAR, BORDER_REPLICATE);
GPU_OFF;
}
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << "src " << size << ", 8UC4";
gen(src, size, size, CV_8UC4, 0, 256);
xmap.create(size, size, CV_32F);
ymap.create(size, size, CV_32F);
for (int i = 0; i < size; ++i)
{
float* xmap_row = xmap.ptr<float>(i);
float* ymap_row = ymap.ptr<float>(i);
for (int j = 0; j < size; ++j)
{
xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;
ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;
}
}
dst.create(xmap.size(), src.type());
CPU_ON;
remap(src, dst, xmap, ymap, INTER_LINEAR, BORDER_REPLICATE);
CPU_OFF;
d_src = src;
d_xmap = xmap;
d_ymap = ymap;
d_dst.create(d_xmap.size(), d_src.type());
GPU_ON;
gpu::remap(d_src, d_dst, d_xmap, d_ymap, INTER_LINEAR, BORDER_REPLICATE);
GPU_OFF;
}
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << "src " << size << ", 16SC3";
gen(src, size, size, CV_16SC3, 0, 256);
xmap.create(size, size, CV_32F);
ymap.create(size, size, CV_32F);
for (int i = 0; i < size; ++i)
{
float* xmap_row = xmap.ptr<float>(i);
float* ymap_row = ymap.ptr<float>(i);
for (int j = 0; j < size; ++j)
{
xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;
ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;
}
}
dst.create(xmap.size(), src.type());
CPU_ON;
remap(src, dst, xmap, ymap, INTER_LINEAR, BORDER_REPLICATE);
CPU_OFF; CPU_OFF;
d_src = src; d_src = src;
...@@ -110,7 +215,7 @@ TEST(remap) ...@@ -110,7 +215,7 @@ TEST(remap)
d_dst.create(d_xmap.size(), d_src.type()); d_dst.create(d_xmap.size(), d_src.type());
GPU_ON; GPU_ON;
gpu::remap(d_src, d_dst, d_xmap, d_ymap); gpu::remap(d_src, d_dst, d_xmap, d_ymap, INTER_LINEAR, BORDER_REPLICATE);
GPU_OFF; GPU_OFF;
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment