Commit 47d68f69 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

implemented gpu::remap for all types

parent 78542854
......@@ -66,6 +66,9 @@ namespace cv
template <typename T> struct DevMem2D_
{
typedef T elem_type;
typedef int index_type;
int cols;
int rows;
T* data;
......@@ -80,7 +83,6 @@ namespace cv
explicit DevMem2D_(const DevMem2D_<U>& d)
: cols(d.cols), rows(d.rows), data((T*)d.data), step(d.step) {}
typedef T elem_type;
enum { elem_size = sizeof(elem_type) };
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
......@@ -89,6 +91,9 @@ namespace cv
__CV_GPU_HOST_DEVICE__ operator T*() const { return data; }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
#if defined(__DEVCLASES_ADD_THRUST_BEGIN_END__)
thrust::device_ptr<T> begin() const { return thrust::device_ptr<T>(data); }
thrust::device_ptr<T> end() const { return thrust::device_ptr<T>(data) + cols * rows; }
......@@ -97,19 +102,24 @@ namespace cv
template<typename T> struct PtrStep_
{
typedef T elem_type;
typedef int index_type;
T* data;
size_t step;
PtrStep_() : data(0), step(0) {}
PtrStep_(const DevMem2D_<T>& mem) : data(mem.data), step(mem.step) {}
typedef T elem_type;
enum { elem_size = sizeof(elem_type) };
__CV_GPU_HOST_DEVICE__ size_t elemSize() const { return elem_size; }
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return (T*)( (char*)data + y * step); }
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return (const T*)( (const char*)data + y * step); }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
#if defined(__DEVCLASES_ADD_THRUST_BEGIN_END__)
thrust::device_ptr<T> begin() const { return thrust::device_ptr<T>(data); }
#endif
......@@ -125,6 +135,9 @@ namespace cv
}
__CV_GPU_HOST_DEVICE__ T* ptr(int y = 0) { return PtrStep_<T>::data + y * PtrStep_<T>::step; }
__CV_GPU_HOST_DEVICE__ const T* ptr(int y = 0) const { return PtrStep_<T>::data + y * PtrStep_<T>::step; }
__CV_GPU_HOST_DEVICE__ T& operator ()(int y, int x) { return ptr(y)[x]; }
__CV_GPU_HOST_DEVICE__ const T& operator ()(int y, int x) const { return ptr(y)[x]; }
};
typedef DevMem2D_<unsigned char> DevMem2D;
......
......@@ -596,8 +596,9 @@ namespace cv
////////////////////////////// Image processing //////////////////////////////
//! DST[x,y] = SRC[xmap[x,y],ymap[x,y]] with bilinear interpolation.
//! supports CV_8UC1, CV_8UC3 source types and CV_32FC1 map type
CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap);
//! supports CV_32FC1 map type
CV_EXPORTS void remap(const GpuMat& src, GpuMat& dst, const GpuMat& xmap, const GpuMat& ymap,
int interpolation, int borderMode = BORDER_CONSTANT, const Scalar& borderValue = Scalar());
//! Does mean shift filtering on GPU.
CV_EXPORTS void meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
......@@ -761,10 +762,10 @@ namespace cv
CV_EXPORTS void upsample(const GpuMat& src, GpuMat &dst, Stream& stream = Stream::Null());
//! smoothes the source image and downsamples it
CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
CV_EXPORTS void pyrDown(const GpuMat& src, GpuMat& dst, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
//! upsamples the source image and then smoothes it
CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream = Stream::Null());
CV_EXPORTS void pyrUp(const GpuMat& src, GpuMat& dst, int borderType = BORDER_DEFAULT, Stream& stream = Stream::Null());
//! performs linear blending of two images
//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
......
......@@ -242,9 +242,9 @@ namespace filter_krnls
{
const T* srcCol = src.ptr() + x;
sDataColumn[ threadIdx.y * BLOCK_DIM_X] = b.at_low(y - BLOCK_DIM_Y, srcCol);
sDataColumn[(threadIdx.y + BLOCK_DIM_Y) * BLOCK_DIM_X] = b.at_high(y, srcCol);
sDataColumn[(threadIdx.y + BLOCK_DIM_Y * 2) * BLOCK_DIM_X] = b.at_high(y + BLOCK_DIM_Y, srcCol);
sDataColumn[ threadIdx.y * BLOCK_DIM_X] = b.at_low(y - BLOCK_DIM_Y, srcCol, src.step);
sDataColumn[(threadIdx.y + BLOCK_DIM_Y) * BLOCK_DIM_X] = b.at_high(y, srcCol, src.step);
sDataColumn[(threadIdx.y + BLOCK_DIM_Y * 2) * BLOCK_DIM_X] = b.at_high(y + BLOCK_DIM_Y, srcCol, src.step);
__syncthreads();
......@@ -273,7 +273,7 @@ namespace cv { namespace gpu { namespace filters
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
B<T> b(src.rows, src.step);
B<T> b(src.rows);
if (!b.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1))
{
......
This diff is collapsed.
......@@ -675,32 +675,30 @@ namespace cv { namespace gpu { namespace surf
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f
};
__device__ __forceinline__ unsigned char calcWin(int i, int j, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir)
struct WinReader
{
float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;
float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
typedef uchar elem_type;
return tex2D(imgTex, pixel_x, pixel_y);
__device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) :
centerX(centerX_), centerY(centerY_), win_offset(win_offset_), cos_dir(cos_dir_), sin_dir(sin_dir_)
{
}
__device__ unsigned char calcPATCH(int i1, int j1, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir, int win_size)
__device__ __forceinline__ uchar operator ()(int i, int j) const
{
/* Scale the window to size PATCH_SZ so each pixel's size is s. This
makes calculating the gradients with wavelets of size 2s easy */
const float icoo = ((float)i1 / (PATCH_SZ + 1)) * win_size;
const float jcoo = ((float)j1 / (PATCH_SZ + 1)) * win_size;
const int i = __float2int_rd(icoo);
const int j = __float2int_rd(jcoo);
float res = calcWin(i, j, centerX, centerY, win_offset, cos_dir, sin_dir) * (i + 1 - icoo) * (j + 1 - jcoo);
res += calcWin(i + 1, j, centerX, centerY, win_offset, cos_dir, sin_dir) * (icoo - i) * (j + 1 - jcoo);
res += calcWin(i + 1, j + 1, centerX, centerY, win_offset, cos_dir, sin_dir) * (icoo - i) * (jcoo - j);
res += calcWin(i, j + 1, centerX, centerY, win_offset, cos_dir, sin_dir) * (i + 1 - icoo) * (jcoo - j);
float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;
float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
return saturate_cast<unsigned char>(res);
return tex2D(imgTex, pixel_x, pixel_y);
}
float centerX;
float centerY;
float win_offset;
float cos_dir;
float sin_dir;
};
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25],
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir)
{
......@@ -732,7 +730,13 @@ namespace cv { namespace gpu { namespace surf
const int xIndex = xBlock * 5 + threadIdx.x;
const int yIndex = yBlock * 5 + threadIdx.y;
s_PATCH[threadIdx.y][threadIdx.x] = calcPATCH(yIndex, xIndex, centerX, centerY, win_offset, cos_dir, sin_dir, win_size);
const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size;
const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size;
LinearFilter<WinReader> filter(WinReader(centerX, centerY, win_offset, cos_dir, sin_dir));
s_PATCH[threadIdx.y][threadIdx.x] = filter(icoo, jcoo);
__syncthreads();
if (threadIdx.x < 5 && threadIdx.y < 5)
......
......@@ -885,7 +885,7 @@ void cv::gpu::GpuMat::release()
if( refcount && CV_XADD(refcount, -1) == 1 )
{
fastFree(refcount);
cudaSafeCall( cudaFree(datastart) );
cudaFree(datastart);
}
data = datastart = dataend = 0;
step = rows = cols = 0;
......
This diff is collapsed.
......@@ -310,7 +310,6 @@ namespace cv { namespace gpu { namespace device
U vec1Vals[MAX_LEN / THREAD_DIM];
};
///////////////////////////////////////////////////////////////////////////////
// Solve linear system
......@@ -364,6 +363,60 @@ namespace cv { namespace gpu { namespace device
return false;
}
///////////////////////////////////////////////////////////////////////////////
// Filters
template <typename Ptr2D> struct PointFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ PointFilter(const Ptr2D& src_) : src(src_) {}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
return src(__float2int_rn(y), __float2int_rn(x));
}
const Ptr2D src;
};
template <typename Ptr2D> struct LinearFilter
{
typedef typename Ptr2D::elem_type elem_type;
typedef float index_type;
explicit __host__ __device__ __forceinline__ LinearFilter(const Ptr2D& src_) : src(src_) {}
__device__ __forceinline__ elem_type operator ()(float y, float x) const
{
typedef typename TypeVec<float, VecTraits<elem_type>::cn>::vec_type work_type;
work_type out = VecTraits<work_type>::all(0);
const int x1 = __float2int_rd(x);
const int y1 = __float2int_rd(y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
elem_type src_reg = src(y1, x1);
out = out + src_reg * ((x2 - x) * (y2 - y));
src_reg = src(y1, x2);
out = out + src_reg * ((x - x1) * (y2 - y));
src_reg = src(y2, x1);
out = out + src_reg * ((x2 - x) * (y - y1));
src_reg = src(y2, x2);
out = out + src_reg * ((x - x1) * (y - y1));
return saturate_cast<elem_type>(out);
}
const Ptr2D src;
};
}}}
#endif // __OPENCV_GPU_UTILITY_HPP__
......@@ -166,6 +166,7 @@ namespace cv { namespace gpu { namespace device
enum {cn=1}; \
static __device__ __host__ __forceinline__ type all(type v) {return v;} \
static __device__ __host__ __forceinline__ type make(type x) {return x;} \
static __device__ __host__ __forceinline__ type make(const type* v) {return *v;} \
}; \
template<> struct VecTraits<type ## 1> \
{ \
......@@ -173,6 +174,7 @@ namespace cv { namespace gpu { namespace device
enum {cn=1}; \
static __device__ __host__ __forceinline__ type ## 1 all(type v) {return make_ ## type ## 1(v);} \
static __device__ __host__ __forceinline__ type ## 1 make(type x) {return make_ ## type ## 1(x);} \
static __device__ __host__ __forceinline__ type ## 1 make(const type* v) {return make_ ## type ## 1(*v);} \
}; \
template<> struct VecTraits<type ## 2> \
{ \
......@@ -180,6 +182,7 @@ namespace cv { namespace gpu { namespace device
enum {cn=2}; \
static __device__ __host__ __forceinline__ type ## 2 all(type v) {return make_ ## type ## 2(v, v);} \
static __device__ __host__ __forceinline__ type ## 2 make(type x, type y) {return make_ ## type ## 2(x, y);} \
static __device__ __host__ __forceinline__ type ## 2 make(const type* v) {return make_ ## type ## 2(v[0], v[1]);} \
}; \
template<> struct VecTraits<type ## 3> \
{ \
......@@ -187,6 +190,7 @@ namespace cv { namespace gpu { namespace device
enum {cn=3}; \
static __device__ __host__ __forceinline__ type ## 3 all(type v) {return make_ ## type ## 3(v, v, v);} \
static __device__ __host__ __forceinline__ type ## 3 make(type x, type y, type z) {return make_ ## type ## 3(x, y, z);} \
static __device__ __host__ __forceinline__ type ## 3 make(const type* v) {return make_ ## type ## 3(v[0], v[1], v[2]);} \
}; \
template<> struct VecTraits<type ## 4> \
{ \
......@@ -194,6 +198,7 @@ namespace cv { namespace gpu { namespace device
enum {cn=4}; \
static __device__ __host__ __forceinline__ type ## 4 all(type v) {return make_ ## type ## 4(v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 4 make(type x, type y, type z, type w) {return make_ ## type ## 4(x, y, z, w);} \
static __device__ __host__ __forceinline__ type ## 4 make(const type* v) {return make_ ## type ## 4(v[0], v[1], v[2], v[3]);} \
}; \
template<> struct VecTraits<type ## 8> \
{ \
......@@ -201,10 +206,10 @@ namespace cv { namespace gpu { namespace device
enum {cn=8}; \
static __device__ __host__ __forceinline__ type ## 8 all(type v) {return make_ ## type ## 8(v, v, v, v, v, v, v, v);} \
static __device__ __host__ __forceinline__ type ## 8 make(type a0, type a1, type a2, type a3, type a4, type a5, type a6, type a7) {return make_ ## type ## 8(a0, a1, a2, a3, a4, a5, a6, a7);} \
static __device__ __host__ __forceinline__ type ## 8 make(const type* v) {return make_ ## type ## 8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);} \
};
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(uchar)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(char)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(ushort)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(short)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS(int)
......@@ -214,12 +219,61 @@ namespace cv { namespace gpu { namespace device
#undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS
template<> struct VecTraits<char>
{
typedef char elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char all(char v) {return v;}
static __device__ __host__ __forceinline__ char make(char x) {return x;}
static __device__ __host__ __forceinline__ char make(const char* x) {return *x;}
};
template<> struct VecTraits<schar>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ schar all(schar v) {return v;}
static __device__ __host__ __forceinline__ schar make(schar x) {return x;}
static __device__ __host__ __forceinline__ schar make(const schar* x) {return *x;}
};
template<> struct VecTraits<char1>
{
typedef schar elem_type;
enum {cn=1};
static __device__ __host__ __forceinline__ char1 all(schar v) {return make_char1(v);}
static __device__ __host__ __forceinline__ char1 make(schar x) {return make_char1(x);}
static __device__ __host__ __forceinline__ char1 make(const schar* v) {return make_char1(v[0]);}
};
template<> struct VecTraits<char2>
{
typedef schar elem_type;
enum {cn=2};
static __device__ __host__ __forceinline__ char2 all(schar v) {return make_char2(v, v);}
static __device__ __host__ __forceinline__ char2 make(schar x, schar y) {return make_char2(x, y);}
static __device__ __host__ __forceinline__ char2 make(const schar* v) {return make_char2(v[0], v[1]);}
};
template<> struct VecTraits<char3>
{
typedef schar elem_type;
enum {cn=3};
static __device__ __host__ __forceinline__ char3 all(schar v) {return make_char3(v, v, v);}
static __device__ __host__ __forceinline__ char3 make(schar x, schar y, schar z) {return make_char3(x, y, z);}
static __device__ __host__ __forceinline__ char3 make(const schar* v) {return make_char3(v[0], v[1], v[2]);}
};
template<> struct VecTraits<char4>
{
typedef schar elem_type;
enum {cn=4};
static __device__ __host__ __forceinline__ char4 all(schar v) {return make_char4(v, v, v, v);}
static __device__ __host__ __forceinline__ char4 make(schar x, schar y, schar z, schar w) {return make_char4(x, y, z, w);}
static __device__ __host__ __forceinline__ char4 make(const schar* v) {return make_char4(v[0], v[1], v[2], v[3]);}
};
template<> struct VecTraits<char8>
{
typedef schar elem_type;
enum {cn=8};
static __device__ __host__ __forceinline__ char8 all(schar v) {return make_char8(v, v, v, v, v, v, v, v);}
static __device__ __host__ __forceinline__ char8 make(schar a0, schar a1, schar a2, schar a3, schar a4, schar a5, schar a6, schar a7) {return make_char8(a0, a1, a2, a3, a4, a5, a6, a7);}
static __device__ __host__ __forceinline__ char8 make(const schar* v) {return make_char8(v[0], v[1], v[2], v[3], v[4], v[5], v[6], v[7]);}
};
}}}
......
......@@ -181,15 +181,18 @@ INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
///////////////////////////////////////////////////////////////////////////////////////////////////////
// remap
struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int> >
struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int, int, int> >
{
cv::gpu::DeviceInfo devInfo;
int type;
int interpolation;
int borderType;
cv::Size size;
cv::Mat src;
cv::Mat xmap;
cv::Mat ymap;
cv::Scalar borderValue;
cv::Mat dst_gold;
......@@ -197,43 +200,83 @@ struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int>
{
devInfo = std::tr1::get<0>(GetParam());
type = std::tr1::get<1>(GetParam());
interpolation = std::tr1::get<2>(GetParam());
borderType = std::tr1::get<3>(GetParam());
cv::gpu::setDevice(devInfo.deviceID());
cv::RNG& rng = cvtest::TS::ptr()->get_rng();
size = cv::Size(rng.uniform(20, 150), rng.uniform(20, 150));
size = cv::Size(rng.uniform(100, 200), rng.uniform(100, 200));
src = cvtest::randomMat(rng, size, type, 0.0, 127.0, false);
xmap = cvtest::randomMat(rng, size, CV_32FC1, 0.0, src.cols - 1, false);
ymap = cvtest::randomMat(rng, size, CV_32FC1, 0.0, src.rows - 1, false);
src = cvtest::randomMat(rng, size, type, 0.0, 256.0, false);
xmap.create(size, CV_32FC1);
ymap.create(size, CV_32FC1);
for (int y = 0; y < src.rows; ++y)
{
float* xmap_row = xmap.ptr<float>(y);
float* ymap_row = ymap.ptr<float>(y);
for (int x = 0; x < src.cols; ++x)
{
xmap_row[x] = src.cols - 1 - x;
ymap_row[x] = src.rows - 1 - y;
}
}
borderValue[0] = rng.uniform(0.0, 256.0);
borderValue[1] = rng.uniform(0.0, 256.0);
borderValue[2] = rng.uniform(0.0, 256.0);
borderValue[3] = rng.uniform(0.0, 256.0);
cv::remap(src, dst_gold, xmap, ymap, cv::INTER_LINEAR, cv::BORDER_WRAP);
cv::remap(src, dst_gold, xmap, ymap, interpolation, borderType, borderValue);
}
};
TEST_P(Remap, Accuracy)
{
static const char* interpolations_str[] = {"INTER_NEAREST", "INTER_LINEAR", "INTER_CUBIC"};
static const char* borderTypes_str[] = {"BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", "BORDER_REFLECT_101"};
const char* interpolationStr = interpolations_str[interpolation];
const char* borderTypeStr = borderTypes_str[borderType];
PRINT_PARAM(devInfo);
PRINT_TYPE(type);
PRINT_PARAM(interpolationStr);
PRINT_PARAM(borderTypeStr);
PRINT_PARAM(size);
PRINT_PARAM(borderValue);
cv::Mat dst;
ASSERT_NO_THROW(
cv::gpu::GpuMat gpuRes;
cv::gpu::remap(cv::gpu::GpuMat(src), gpuRes, cv::gpu::GpuMat(xmap), cv::gpu::GpuMat(ymap));
cv::gpu::remap(cv::gpu::GpuMat(src), gpuRes, cv::gpu::GpuMat(xmap), cv::gpu::GpuMat(ymap), interpolation, borderType, borderValue);
gpuRes.download(dst);
);
EXPECT_MAT_SIMILAR(dst_gold, dst, 0.5);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-5);
}
INSTANTIATE_TEST_CASE_P(ImgProc, Remap, testing::Combine(
INSTANTIATE_TEST_CASE_P
(
ImgProc, Remap, testing::Combine
(
testing::ValuesIn(devices()),
testing::Values(CV_8UC1, CV_8UC3)));
testing::Values
(
CV_8UC1, CV_8UC3, CV_8UC4,
CV_32FC1, CV_32FC3, CV_32FC4
),
testing::Values(cv::INTER_NEAREST, cv::INTER_LINEAR),
testing::Values(cv::BORDER_REFLECT101, cv::BORDER_REPLICATE, cv::BORDER_CONSTANT)
)
);
///////////////////////////////////////////////////////////////////////////////////////////////////////
// copyMakeBorder
......
......@@ -79,9 +79,9 @@ TEST(remap)
Mat src, dst, xmap, ymap;
gpu::GpuMat d_src, d_dst, d_xmap, d_ymap;
for (int size = 1000; size <= 8000; size *= 2)
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << "src " << size << " and 8U, 32F maps";
SUBTEST << "src " << size << ", 8UC1";
gen(src, size, size, CV_8UC1, 0, 256);
......@@ -101,7 +101,112 @@ TEST(remap)
dst.create(xmap.size(), src.type());
CPU_ON;
remap(src, dst, xmap, ymap, INTER_LINEAR);
remap(src, dst, xmap, ymap, INTER_LINEAR, BORDER_REPLICATE);
CPU_OFF;
d_src = src;
d_xmap = xmap;
d_ymap = ymap;
d_dst.create(d_xmap.size(), d_src.type());
GPU_ON;
gpu::remap(d_src, d_dst, d_xmap, d_ymap, INTER_LINEAR, BORDER_REPLICATE);
GPU_OFF;
}
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << "src " << size << ", 8UC3";
gen(src, size, size, CV_8UC3, 0, 256);
xmap.create(size, size, CV_32F);
ymap.create(size, size, CV_32F);
for (int i = 0; i < size; ++i)
{
float* xmap_row = xmap.ptr<float>(i);
float* ymap_row = ymap.ptr<float>(i);
for (int j = 0; j < size; ++j)
{
xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;
ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;
}
}
dst.create(xmap.size(), src.type());
CPU_ON;
remap(src, dst, xmap, ymap, INTER_LINEAR, BORDER_REPLICATE);
CPU_OFF;
d_src = src;
d_xmap = xmap;
d_ymap = ymap;
d_dst.create(d_xmap.size(), d_src.type());
GPU_ON;
gpu::remap(d_src, d_dst, d_xmap, d_ymap, INTER_LINEAR, BORDER_REPLICATE);
GPU_OFF;
}
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << "src " << size << ", 8UC4";
gen(src, size, size, CV_8UC4, 0, 256);
xmap.create(size, size, CV_32F);
ymap.create(size, size, CV_32F);
for (int i = 0; i < size; ++i)
{
float* xmap_row = xmap.ptr<float>(i);
float* ymap_row = ymap.ptr<float>(i);
for (int j = 0; j < size; ++j)
{
xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;
ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;
}
}
dst.create(xmap.size(), src.type());
CPU_ON;
remap(src, dst, xmap, ymap, INTER_LINEAR, BORDER_REPLICATE);
CPU_OFF;
d_src = src;
d_xmap = xmap;
d_ymap = ymap;
d_dst.create(d_xmap.size(), d_src.type());
GPU_ON;
gpu::remap(d_src, d_dst, d_xmap, d_ymap, INTER_LINEAR, BORDER_REPLICATE);
GPU_OFF;
}
for (int size = 1000; size <= 4000; size *= 2)
{
SUBTEST << "src " << size << ", 16SC3";
gen(src, size, size, CV_16SC3, 0, 256);
xmap.create(size, size, CV_32F);
ymap.create(size, size, CV_32F);
for (int i = 0; i < size; ++i)
{
float* xmap_row = xmap.ptr<float>(i);
float* ymap_row = ymap.ptr<float>(i);
for (int j = 0; j < size; ++j)
{
xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;
ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;
}
}
dst.create(xmap.size(), src.type());
CPU_ON;
remap(src, dst, xmap, ymap, INTER_LINEAR, BORDER_REPLICATE);
CPU_OFF;
d_src = src;
......@@ -110,7 +215,7 @@ TEST(remap)
d_dst.create(d_xmap.size(), d_src.type());
GPU_ON;
gpu::remap(d_src, d_dst, d_xmap, d_ymap);
gpu::remap(d_src, d_dst, d_xmap, d_ymap, INTER_LINEAR, BORDER_REPLICATE);
GPU_OFF;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment