Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
21c4753f
Commit
21c4753f
authored
Aug 23, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used global memory access for up-scaling
parent
d1f6a23a
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
380 additions
and
323 deletions
+380
-323
resize.cu
modules/gpu/src/cuda/resize.cu
+370
-307
resize.cpp
modules/gpu/src/resize.cpp
+10
-16
No files found.
modules/gpu/src/cuda/resize.cu
View file @
21c4753f
...
...
@@ -52,368 +52,431 @@
namespace cv { namespace gpu { namespace device
{
namespace imgproc
// kernels
template <typename T> __global__ void resize_nearest(const PtrStep<T> src, PtrStepSz<T> dst, const float fy, const float fx)
{
template <typename T> __global__ void resize_nearest(const PtrStep<T> src, const float fx, const float fy, PtrStepSz<T> dst)
{
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
if (dst_x < dst.cols && dst_y < dst.rows)
{
const float src_x = dst_x * fx;
const float src_y = dst_y * fy;
if (dst_x < dst.cols && dst_y < dst.rows)
{
const float src_x = dst_x * fx;
const float src_y = dst_y * fy;
dst(dst_y, dst_x) = src(__float2int_rz(src_y), __float2int_rz(src_x));
}
dst(dst_y, dst_x) = src(__float2int_rz(src_y), __float2int_rz(src_x));
}
}
template <typename T> __global__ void resize_linear(const PtrStepSz<T> src, const float fx, const float fy, PtrStepSz<T> dst
)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
template <typename T> __global__ void resize_linear(const PtrStepSz<T> src, PtrStepSz<T> dst, const float fy, const float fx
)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
if (dst_x < dst.cols && dst_y < dst.rows)
{
const float src_x = dst_x * fx;
const float src_y = dst_y * fy;
if (dst_x < dst.cols && dst_y < dst.rows)
{
const float src_x = dst_x * fx;
const float src_y = dst_y * fy;
work_type out = VecTraits<work_type>::all(0);
work_type out = VecTraits<work_type>::all(0);
const int x1 = __float2int_rd(src_x);
const int y1 = __float2int_rd(src_y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
const int x2_read = ::min(x2, src.cols - 1);
const int y2_read = ::min(y2, src.rows - 1);
const int x1 = __float2int_rd(src_x);
const int y1 = __float2int_rd(src_y);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
const int x2_read = ::min(x2, src.cols - 1);
const int y2_read = ::min(y2, src.rows - 1);
T src_reg = src(y1, x1);
out = out + src_reg * ((x2 - src_x) * (y2 - src_y));
T src_reg = src(y1, x1);
out = out + src_reg * ((x2 - src_x) * (y2 - src_y));
src_reg = src(y1, x2_read);
out = out + src_reg * ((src_x - x1) * (y2 - src_y));
src_reg = src(y1, x2_read);
out = out + src_reg * ((src_x - x1) * (y2 - src_y));
src_reg = src(y2_read, x1);
out = out + src_reg * ((x2 - src_x) * (src_y - y1));
src_reg = src(y2_read, x1);
out = out + src_reg * ((x2 - src_x) * (src_y - y1));
src_reg = src(y2_read, x2_read);
out = out + src_reg * ((src_x - x1) * (src_y - y1));
src_reg = src(y2_read, x2_read);
out = out + src_reg * ((src_x - x1) * (src_y - y1));
dst(dst_y, dst_x) = saturate_cast<T>(out);
}
dst(dst_y, dst_x) = saturate_cast<T>(out);
}
}
template <class Ptr2D, typename T> __global__ void resize(const Ptr2D src, const float fx, const float fy, PtrStepSz<T> dst
)
{
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
template <class Ptr2D, typename T> __global__ void resize(const Ptr2D src, PtrStepSz<T> dst, const float fy, const float fx
)
{
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
if (dst_x < dst.cols && dst_y < dst.rows)
{
const float src_x = dst_x * fx;
const float src_y = dst_y * fy;
if (dst_x < dst.cols && dst_y < dst.rows)
{
const float src_x = dst_x * fx;
const float src_y = dst_y * fy;
dst(dst_y, dst_x) = src(src_y, src_x);
}
dst(dst_y, dst_x) = src(src_y, src_x);
}
}
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
dst(y, x) = src(y, x);
}
}
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
// textures
resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
}
template <typename T> struct TextureAccessor;
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
texture<type, cudaTextureType2D, cudaReadModeElementType> tex_resize_##type (0, cudaFilterModePoint, cudaAddressModeClamp); \
template <> struct TextureAccessor<type> \
{ \
typedef type elem_type; \
typedef int index_type; \
int xoff; \
int yoff; \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_resize_##type, x + xoff, y + yoff); \
} \
__host__ static void bind(const PtrStepSz<type>& mat) \
{ \
bindTexture(&tex_resize_##type, mat); \
} \
};
template <typename T> struct ResizeDispatcherStream<PointFilter, T>
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
template <typename T>
TextureAccessor<T> texAccessor(const PtrStepSz<T>& mat, int yoff, int xoff)
{
TextureAccessor<T>::bind(mat);
TextureAccessor<T> t;
t.xoff = xoff;
t.yoff = yoff;
return t;
}
// callers for nearest interpolation
template <typename T>
void call_resize_nearest_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
resize_nearest<<<grid, block, 0, stream>>>(src, dst, fy, fx);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T>
void call_resize_nearest_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
resize<<<grid, block>>>(texAccessor(srcWhole, yoff, xoff), dst, fy, fx);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
// callers for linear interpolation
template <typename T>
void call_resize_linear_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
resize_linear<<<grid, block>>>(src, dst, fy, fx);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T>
void call_resize_linear_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
if (srcWhole.data == src.data)
{
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
TextureAccessor<T> texSrc = texAccessor(src, 0, 0);
LinearFilter< TextureAccessor<T> > filteredSrc(texSrc);
resize_nearest<<<grid, block, 0, stream>>>(src, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
}
};
template <typename T> struct ResizeDispatcherStream<LinearFilter, T>
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
}
else
{
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
TextureAccessor<T> texSrc = texAccessor(srcWhole, yoff, xoff);
resize_linear<<<grid, block, 0, stream>>>(src, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
}
};
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader<TextureAccessor<T>, BrdReplicate<T> > brdSrc(texSrc, brd);
LinearFilter< BorderReader<TextureAccessor<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
{
static void call(PtrStepSz<T> src, PtrStepSz<T>, int, int, float fx, float fy, PtrStepSz<T> dst)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
}
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
cudaSafeCall( cudaGetLastError() );
resize<<<grid, block>>>(filteredSrc, fx, fy, dst
);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize()
);
}
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <typename T> struct ResizeDispatcherNonStream<PointFilter, T>
{
static void call(PtrStepSz<T> src, PtrStepSz<T>, int, int, float fx, float fy, PtrStepSz<T> dst)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
// callers for cubic interpolation
resize_nearest<<<grid, block>>>(src, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
template <typename T>
void call_resize_cubic_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <typename T> struct ResizeDispatcherNonStream<LinearFilter, T>
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
CubicFilter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
resize<<<grid, block, 0, stream>>>(filteredSrc, dst, fy, fx);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T>
void call_resize_cubic_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
if (srcWhole.data == src.data)
{
static void call(PtrStepSz<T> src, PtrStepSz<T>, int, int, float fx, float fy, PtrStepSz<T> dst)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
TextureAccessor<T> texSrc = texAccessor(src, 0, 0);
CubicFilter< TextureAccessor<T> > filteredSrc(texSrc);
resize_linear<<<grid, block>>>(src, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
}
else
{
TextureAccessor<T> texSrc = texAccessor(srcWhole, yoff, xoff);
cudaSafeCall( cudaDeviceSynchronize()
);
}
}
;
BrdReplicate<T> brd(src.rows, src.cols
);
BorderReader<TextureAccessor<T>, BrdReplicate<T> > brdSrc(texSrc, brd);
CubicFilter< BorderReader<TextureAccessor<T>, BrdReplicate<T> > > filteredSrc(brdSrc)
;
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
struct tex_resize_ ## type ## _reader \
{ \
typedef type elem_type; \
typedef int index_type; \
int xoff; \
int yoff; \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \
} \
}; \
template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
{ \
const dim3 block(32, 8); \
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
if (srcWhole.data == src.data) \
{ \
bindTexture(&tex_resize_ ## type, src); \
tex_resize_ ## type ## _reader texSrc; \
texSrc.xoff = 0; \
texSrc.yoff = 0; \
Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
} \
else \
{ \
bindTexture(&tex_resize_ ## type, srcWhole); \
tex_resize_ ## type ## _reader texSrc; \
texSrc.xoff = xoff; \
texSrc.yoff = yoff; \
BrdReplicate< type > brd(src.rows, src.cols); \
BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \
Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
} \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
}; \
template <> struct ResizeDispatcherNonStream<PointFilter, type > \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
{ \
const dim3 block(32, 8); \
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_resize_ ## type, srcWhole); \
tex_resize_ ## type ## _reader texSrc; \
texSrc.xoff = xoff; \
texSrc.yoff = yoff; \
resize<<<grid, block>>>(texSrc, fx, fy, dst); \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
}; \
template <> struct ResizeDispatcherNonStream<LinearFilter, type > \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
{ \
const dim3 block(32, 8); \
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
if (srcWhole.data == src.data) \
{ \
bindTexture(&tex_resize_ ## type, src); \
tex_resize_ ## type ## _reader texSrc; \
texSrc.xoff = 0; \
texSrc.yoff = 0; \
LinearFilter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
} \
else \
{ \
bindTexture(&tex_resize_ ## type, srcWhole); \
tex_resize_ ## type ## _reader texSrc; \
texSrc.xoff = xoff; \
texSrc.yoff = yoff; \
BrdReplicate< type > brd(src.rows, src.cols); \
BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \
LinearFilter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
} \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
};
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
template <template <typename> class Filter, typename T> struct ResizeDispatcher
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
}
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
// ResizeNearestDispatcher
template <typename T> struct ResizeNearestDispatcher
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
call_resize_nearest_glob(src, dst, fy, fx, stream);
}
};
template <typename T> struct SelectImplForNearest
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
if (stream)
call_resize_nearest_glob(src, dst, fy, fx, stream);
else
{
if (
stream == 0
)
ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst
);
if (
fx > 1 || fy > 1
)
call_resize_nearest_glob(src, dst, fy, fx, 0
);
else
ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream
);
call_resize_nearest_tex(src, srcWhole, yoff, xoff, dst, fy, fx
);
}
};
}
};
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
template <> struct ResizeNearestDispatcher<uchar> : SelectImplForNearest<uchar> {};
template <> struct ResizeNearestDispatcher<uchar4> : SelectImplForNearest<uchar4> {};
if (x < dst.cols && y < dst.rows)
{
dst(y, x) = src(y, x);
}
template <> struct ResizeNearestDispatcher<ushort> : SelectImplForNearest<ushort> {};
template <> struct ResizeNearestDispatcher<ushort4> : SelectImplForNearest<ushort4> {};
template <> struct ResizeNearestDispatcher<short> : SelectImplForNearest<short> {};
template <> struct ResizeNearestDispatcher<short4> : SelectImplForNearest<short4> {};
template <> struct ResizeNearestDispatcher<float> : SelectImplForNearest<float> {};
template <> struct ResizeNearestDispatcher<float4> : SelectImplForNearest<float4> {};
// ResizeLinearDispatcher
template <typename T> struct ResizeLinearDispatcher
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
call_resize_linear_glob(src, dst, fy, fx, stream);
}
};
template <typename T> struct ResizeAreaDispatcher
template <typename T> struct SelectImplForLinear
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
static void call(PtrStepSz<T> src, PtrStepSz<T>, int, int, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
if (stream)
call_resize_linear_glob(src, dst, fy, fx, stream);
else
{
const int iscale_x = (int) round(fx);
const int iscale_y = (int) round(fy);
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
if (std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
{
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst);
}
if (fx > 1 || fy > 1)
call_resize_linear_glob(src, dst, fy, fx, 0);
else
{
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst);
}
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
call_resize_linear_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
}
};
}
};
template <> struct ResizeLinearDispatcher<uchar> : SelectImplForLinear<uchar> {};
template <> struct ResizeLinearDispatcher<uchar4> : SelectImplForLinear<uchar4> {};
template <> struct ResizeLinearDispatcher<ushort> : SelectImplForLinear<ushort> {};
template <> struct ResizeLinearDispatcher<ushort4> : SelectImplForLinear<ushort4> {};
template <typename T> void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
PtrStepSzb dst, int interpolation, cudaStream_t stream)
template <> struct ResizeLinearDispatcher<short> : SelectImplForLinear<short> {};
template <> struct ResizeLinearDispatcher<short4> : SelectImplForLinear<short4> {};
template <> struct ResizeLinearDispatcher<float> : SelectImplForLinear<float> {};
template <> struct ResizeLinearDispatcher<float4> : SelectImplForLinear<float4> {};
// ResizeCubicDispatcher
template <typename T> struct ResizeCubicDispatcher
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
call_resize_cubic_glob(src, dst, fy, fx, stream);
}
};
template <typename T> struct SelectImplForCubic
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
if (stream)
call_resize_cubic_glob(src, dst, fy, fx, stream);
else
call_resize_cubic_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
}
};
template <> struct ResizeCubicDispatcher<uchar> : SelectImplForCubic<uchar> {};
template <> struct ResizeCubicDispatcher<uchar4> : SelectImplForCubic<uchar4> {};
template <> struct ResizeCubicDispatcher<ushort> : SelectImplForCubic<ushort> {};
template <> struct ResizeCubicDispatcher<ushort4> : SelectImplForCubic<ushort4> {};
template <> struct ResizeCubicDispatcher<short> : SelectImplForCubic<short> {};
template <> struct ResizeCubicDispatcher<short4> : SelectImplForCubic<short4> {};
template <> struct ResizeCubicDispatcher<float> : SelectImplForCubic<float> {};
template <> struct ResizeCubicDispatcher<float4> : SelectImplForCubic<float4> {};
// ResizeAreaDispatcher
template <typename T> struct ResizeAreaDispatcher
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>&, int, int, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream);
const int iscale_x = (int) round(fx);
const int iscale_y = (int) round(fy);
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
static const caller_t callers[4] =
if (std::abs(fx - iscale_x) < FLT_MIN && std::abs(fy - iscale_y) < FLT_MIN)
{
ResizeDispatcher<PointFilter, T>::call,
ResizeDispatcher<LinearFilter, T>::call,
ResizeDispatcher<CubicFilter, T>::call,
ResizeAreaDispatcher<T>::call
};
// change to linear if area interpolation upscaling
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
interpolation = 1;
callers[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, fx, fy,
static_cast< PtrStepSz<T> >(dst), stream);
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst);
}
else
{
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst);
}
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
// resize
template <typename T> void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream)
{
typedef void (*func_t)(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream);
static const func_t funcs[4] =
{
ResizeNearestDispatcher<T>::call,
ResizeLinearDispatcher<T>::call,
ResizeCubicDispatcher<T>::call,
ResizeAreaDispatcher<T>::call
};
// change to linear if area interpolation upscaling
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
interpolation = 1;
funcs[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), yoff, xoff, static_cast< PtrStepSz<T> >(dst), fy, fx, stream);
}
template void resize<uchar >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<uchar3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<uchar4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<ushort >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<ushort3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<ushort4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<short >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<short3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<short4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
template void resize<float >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<float3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<float4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
}}}
#endif /* CUDA_DISABLER */
modules/gpu/src/resize.cpp
View file @
21c4753f
...
...
@@ -50,27 +50,21 @@ void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&)
namespace
cv
{
namespace
gpu
{
namespace
device
{
namespace
imgproc
{
template
<
typename
T
>
void
resize_gpu
(
PtrStepSzb
src
,
PtrStepSzb
srcWhole
,
int
xoff
,
int
yoff
,
float
fx
,
float
fy
,
PtrStepSzb
dst
,
int
interpolation
,
cudaStream_t
stream
);
}
template
<
typename
T
>
void
resize
(
const
PtrStepSzb
&
src
,
const
PtrStepSzb
&
srcWhole
,
int
yoff
,
int
xoff
,
const
PtrStepSzb
&
dst
,
float
fy
,
float
fx
,
int
interpolation
,
cudaStream_t
stream
);
}}}
void
cv
::
gpu
::
resize
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
Size
dsize
,
double
fx
,
double
fy
,
int
interpolation
,
Stream
&
stream
)
{
using
namespace
::
cv
::
gpu
::
device
::
imgproc
;
typedef
void
(
*
func_t
)(
PtrStepSzb
src
,
PtrStepSzb
srcWhole
,
int
xoff
,
int
yoff
,
float
fx
,
float
fy
,
PtrStepSzb
dst
,
int
interpolation
,
cudaStream_t
stream
);
typedef
void
(
*
func_t
)(
const
PtrStepSzb
&
src
,
const
PtrStepSzb
&
srcWhole
,
int
yoff
,
int
xoff
,
const
PtrStepSzb
&
dst
,
float
fy
,
float
fx
,
int
interpolation
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
6
][
4
]
=
{
{
resize_gpu
<
uchar
>
,
0
/*resize_gpu<uchar2>*/
,
resize_gpu
<
uchar3
>
,
resize_gpu
<
uchar4
>
},
{
0
/*
resize_gpu<schar>*/
,
0
/*resize_gpu<char2>*/
,
0
/*resize_gpu<char3>*/
,
0
/*resize_gpu
<char4>*/
},
{
resize_gpu
<
ushort
>
,
0
/*resize_gpu<ushort2>*/
,
resize_gpu
<
ushort3
>
,
resize_gpu
<
ushort4
>
},
{
resize_gpu
<
short
>
,
0
/*resize_gpu<short2>*/
,
resize_gpu
<
short3
>
,
resize_gpu
<
short4
>
},
{
0
/*
resize_gpu<int>*/
,
0
/*resize_gpu<int2>*/
,
0
/*resize_gpu<int3>*/
,
0
/*resize_gpu
<int4>*/
},
{
resize_gpu
<
float
>
,
0
/*resize_gpu<float2>*/
,
resize_gpu
<
float3
>
,
resize_gpu
<
float4
>
}
{
device
::
resize
<
uchar
>
,
0
/*device::resize<uchar2>*/
,
device
::
resize
<
uchar3
>
,
device
::
resize
<
uchar4
>
},
{
0
/*
device::resize<schar>*/
,
0
/*device::resize<char2>*/
,
0
/*device::resize<char3>*/
,
0
/*device::resize
<char4>*/
},
{
device
::
resize
<
ushort
>
,
0
/*device::resize<ushort2>*/
,
device
::
resize
<
ushort3
>
,
device
::
resize
<
ushort4
>
},
{
device
::
resize
<
short
>
,
0
/*device::resize<short2>*/
,
device
::
resize
<
short3
>
,
device
::
resize
<
short4
>
},
{
0
/*
device::resize<int>*/
,
0
/*device::resize<int2>*/
,
0
/*device::resize<int3>*/
,
0
/*device::resize
<int4>*/
},
{
device
::
resize
<
float
>
,
0
/*device::resize<float2>*/
,
device
::
resize
<
float3
>
,
device
::
resize
<
float4
>
}
};
CV_Assert
(
src
.
depth
()
<=
CV_32F
&&
src
.
channels
()
<=
4
);
...
...
@@ -108,7 +102,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
src
.
locateROI
(
wholeSize
,
ofs
);
PtrStepSzb
wholeSrc
(
wholeSize
.
height
,
wholeSize
.
width
,
src
.
datastart
,
src
.
step
);
func
(
src
,
wholeSrc
,
ofs
.
x
,
ofs
.
y
,
static_cast
<
float
>
(
1.0
/
fx
),
static_cast
<
float
>
(
1.0
/
fy
),
dst
,
interpolation
,
StreamAccessor
::
getStream
(
stream
));
func
(
src
,
wholeSrc
,
ofs
.
y
,
ofs
.
x
,
dst
,
static_cast
<
float
>
(
1.0
/
fy
),
static_cast
<
float
>
(
1.0
/
fx
)
,
interpolation
,
StreamAccessor
::
getStream
(
stream
));
}
#endif // HAVE_CUDA
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment