Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
21c4753f
Commit
21c4753f
authored
Aug 23, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used global memory access for up-scaling
parent
d1f6a23a
Show whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
265 additions
and
208 deletions
+265
-208
resize.cu
modules/gpu/src/cuda/resize.cu
+256
-193
resize.cpp
modules/gpu/src/resize.cpp
+9
-15
No files found.
modules/gpu/src/cuda/resize.cu
View file @
21c4753f
...
...
@@ -52,9 +52,9 @@
namespace cv { namespace gpu { namespace device
{
namespace imgproc
{
template <typename T> __global__ void resize_nearest(const PtrStep<T> src, const float fx, const float fy, PtrStepSz<T> dst
)
// kernels
template <typename T> __global__ void resize_nearest(const PtrStep<T> src, PtrStepSz<T> dst, const float fy, const float fx
)
{
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
...
...
@@ -68,7 +68,7 @@ namespace cv { namespace gpu { namespace device
}
}
template <typename T> __global__ void resize_linear(const PtrStepSz<T> src, const float fx, const float fy, PtrStepSz<T> dst
)
template <typename T> __global__ void resize_linear(const PtrStepSz<T> src, PtrStepSz<T> dst, const float fy, const float fx
)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type work_type;
...
...
@@ -105,7 +105,7 @@ namespace cv { namespace gpu { namespace device
}
}
template <class Ptr2D, typename T> __global__ void resize(const Ptr2D src, const float fx, const float fy, PtrStepSz<T> dst
)
template <class Ptr2D, typename T> __global__ void resize(const Ptr2D src, PtrStepSz<T> dst, const float fy, const float fx
)
{
const int dst_x = blockDim.x * blockIdx.x + threadIdx.x;
const int dst_y = blockDim.y * blockIdx.y + threadIdx.y;
...
...
@@ -119,222 +119,298 @@ namespace cv { namespace gpu { namespace device
}
}
template <template <typename> class Filter, typename T> struct ResizeDispatcherStream
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, PtrStepSz<T> dst)
{
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < dst.cols && y < dst.rows)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
dst(y, x) = src(y, x);
}
}
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
// textures
resize<<<grid, block, 0, stream>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
}
template <typename T> struct TextureAccessor;
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
texture<type, cudaTextureType2D, cudaReadModeElementType> tex_resize_##type (0, cudaFilterModePoint, cudaAddressModeClamp); \
template <> struct TextureAccessor<type> \
{ \
typedef type elem_type; \
typedef int index_type; \
int xoff; \
int yoff; \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_resize_##type, x + xoff, y + yoff); \
} \
__host__ static void bind(const PtrStepSz<type>& mat) \
{ \
bindTexture(&tex_resize_##type, mat); \
} \
};
template <typename T> struct ResizeDispatcherStream<PointFilter, T>
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
template <typename T>
TextureAccessor<T> texAccessor(const PtrStepSz<T>& mat, int yoff, int xoff)
{
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
TextureAccessor<T>::bind(mat);
TextureAccessor<T> t;
t.xoff = xoff;
t.yoff = yoff;
return t;
}
// callers for nearest interpolation
template <typename T>
void call_resize_nearest_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
resize_nearest<<<grid, block, 0, stream>>>(src, fx, fy, dst
);
resize_nearest<<<grid, block, 0, stream>>>(src, dst, fy, fx
);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <typename T> struct ResizeDispatcherStream<LinearFilter, T>
{
static void call(PtrStepSz<T> src, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream)
template <typename T>
void call_resize_nearest_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
resize_linear<<<grid, block, 0, stream>>>(src, fx, fy, dst
);
resize<<<grid, block>>>(texAccessor(srcWhole, yoff, xoff), dst, fy, fx
);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <template <typename> class Filter, typename T> struct ResizeDispatcherNonStream
// callers for linear interpolation
template <typename T>
void call_resize_linear_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
static void call(PtrStepSz<T> src, PtrStepSz<T>, int, int, float fx, float fy, PtrStepSz<T> dst)
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
resize_linear<<<grid, block>>>(src, dst, fy, fx);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template <typename T>
void call_resize_linear_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
if (srcWhole.data == src.data)
{
TextureAccessor<T> texSrc = texAccessor(src, 0, 0);
LinearFilter< TextureAccessor<T> > filteredSrc(texSrc);
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
}
else
{
TextureAccessor<T> texSrc = texAccessor(srcWhole, yoff, xoff);
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
Filter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
BorderReader<TextureAccessor<T>, BrdReplicate<T> > brdSrc(texSrc, brd);
LinearFilter< BorderReader<TextureAccessor<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
}
resize<<<grid, block>>>(filteredSrc, fx, fy, dst);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <typename T> struct ResizeDispatcherNonStream<PointFilter, T>
{
static void call(PtrStepSz<T> src, PtrStepSz<T>, int, int, float fx, float fy, PtrStepSz<T> dst)
// callers for cubic interpolation
template <typename T>
void call_resize_cubic_glob(const PtrStepSz<T>& src, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
resize_nearest<<<grid, block>>>(src, fx, fy, dst);
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdReplicate<T> > brdSrc(src, brd);
CubicFilter< BorderReader< PtrStep<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
resize<<<grid, block, 0, stream>>>(filteredSrc, dst, fy, fx);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <typename T> struct ResizeDispatcherNonStream<LinearFilter, T>
{
static void call(PtrStepSz<T> src, PtrStepSz<T>, int, int, float fx, float fy, PtrStepSz<T> dst)
template <typename T>
void call_resize_cubic_tex(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx)
{
const dim3 block(32, 8);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
resize_linear<<<grid, block>>>(src, fx, fy, dst);
if (srcWhole.data == src.data)
{
TextureAccessor<T> texSrc = texAccessor(src, 0, 0);
CubicFilter< TextureAccessor<T> > filteredSrc(texSrc);
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
}
else
{
TextureAccessor<T> texSrc = texAccessor(srcWhole, yoff, xoff);
BrdReplicate<T> brd(src.rows, src.cols);
BorderReader<TextureAccessor<T>, BrdReplicate<T> > brdSrc(texSrc, brd);
CubicFilter< BorderReader<TextureAccessor<T>, BrdReplicate<T> > > filteredSrc(brdSrc);
resize<<<grid, block>>>(filteredSrc, dst, fy, fx);
}
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
// ResizeNearestDispatcher
template <typename T> struct ResizeNearestDispatcher
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
call_resize_nearest_glob(src, dst, fy, fx, stream);
}
};
#define OPENCV_GPU_IMPLEMENT_RESIZE_TEX(type) \
texture< type , cudaTextureType2D> tex_resize_ ## type (0, cudaFilterModePoint, cudaAddressModeClamp); \
struct tex_resize_ ## type ## _reader \
{ \
typedef type elem_type; \
typedef int index_type; \
int xoff; \
int yoff; \
__device__ __forceinline__ elem_type operator ()(index_type y, index_type x) const \
{ \
return tex2D(tex_resize_ ## type, x + xoff, y + yoff); \
} \
}; \
template <template <typename> class Filter> struct ResizeDispatcherNonStream<Filter, type > \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
{ \
const dim3 block(32, 8); \
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
if (srcWhole.data == src.data) \
{ \
bindTexture(&tex_resize_ ## type, src); \
tex_resize_ ## type ## _reader texSrc; \
texSrc.xoff = 0; \
texSrc.yoff = 0; \
Filter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
} \
else \
{ \
bindTexture(&tex_resize_ ## type, srcWhole); \
tex_resize_ ## type ## _reader texSrc; \
texSrc.xoff = xoff; \
texSrc.yoff = yoff; \
BrdReplicate< type > brd(src.rows, src.cols); \
BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \
Filter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
} \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
}; \
template <> struct ResizeDispatcherNonStream<PointFilter, type > \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
{ \
const dim3 block(32, 8); \
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
bindTexture(&tex_resize_ ## type, srcWhole); \
tex_resize_ ## type ## _reader texSrc; \
texSrc.xoff = xoff; \
texSrc.yoff = yoff; \
resize<<<grid, block>>>(texSrc, fx, fy, dst); \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
}; \
template <> struct ResizeDispatcherNonStream<LinearFilter, type > \
{ \
static void call(PtrStepSz< type > src, PtrStepSz< type > srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz< type > dst) \
{ \
const dim3 block(32, 8); \
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
if (srcWhole.data == src.data) \
{ \
bindTexture(&tex_resize_ ## type, src); \
tex_resize_ ## type ## _reader texSrc; \
texSrc.xoff = 0; \
texSrc.yoff = 0; \
LinearFilter<tex_resize_ ## type ## _reader> filteredSrc(texSrc); \
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
} \
else \
{ \
bindTexture(&tex_resize_ ## type, srcWhole); \
tex_resize_ ## type ## _reader texSrc; \
texSrc.xoff = xoff; \
texSrc.yoff = yoff; \
BrdReplicate< type > brd(src.rows, src.cols); \
BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > brdSrc(texSrc, brd); \
LinearFilter< BorderReader<tex_resize_ ## type ## _reader, BrdReplicate< type > > > filteredSrc(brdSrc); \
resize<<<grid, block>>>(filteredSrc, fx, fy, dst); \
} \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
} \
template <typename T> struct SelectImplForNearest
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
if (stream)
call_resize_nearest_glob(src, dst, fy, fx, stream);
else
{
if (fx > 1 || fy > 1)
call_resize_nearest_glob(src, dst, fy, fx, 0);
else
call_resize_nearest_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
}
}
};
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(uchar4)
template <> struct ResizeNearestDispatcher<uchar> : SelectImplForNearest<uchar> {};
template <> struct ResizeNearestDispatcher<uchar4> : SelectImplForNearest<uchar4> {};
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(schar)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(char4)
template <> struct ResizeNearestDispatcher<ushort> : SelectImplForNearest<ushort> {};
template <> struct ResizeNearestDispatcher<ushort4> : SelectImplForNearest<ushort4> {};
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(ushort4)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(short4)
template <> struct ResizeNearestDispatcher<short> : SelectImplForNearest<short> {};
template <> struct ResizeNearestDispatcher<short4> : SelectImplForNearest<short4> {};
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int)
//OPENCV_GPU_IMPLEMENT_RESIZE_TEX(int4)
template <> struct ResizeNearestDispatcher<float> : SelectImplForNearest<float> {};
template <> struct ResizeNearestDispatcher<float4> : SelectImplForNearest<float4> {};
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float)
OPENCV_GPU_IMPLEMENT_RESIZE_TEX(float4)
// ResizeLinearDispatcher
#undef OPENCV_GPU_IMPLEMENT_RESIZE_TEX
template <typename T> struct ResizeLinearDispatcher
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
call_resize_linear_glob(src, dst, fy, fx, stream);
}
};
template <template <typename> class Filter, typename T> struct ResizeDispatche
r
template <typename T> struct SelectImplForLinea
r
{
static void call(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst
, cudaStream_t stream)
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx
, cudaStream_t stream)
{
if (stream == 0
)
ResizeDispatcherNonStream<Filter, T>::call(src, srcWhole, xoff, yoff, fx, fy, dst
);
if (stream
)
call_resize_linear_glob(src, dst, fy, fx, stream
);
else
ResizeDispatcherStream<Filter, T>::call(src, fx, fy, dst, stream);
{
if (fx > 1 || fy > 1)
call_resize_linear_glob(src, dst, fy, fx, 0);
else
call_resize_linear_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
}
}
};
template <typename Ptr2D, typename T> __global__ void resize_area(const Ptr2D src, PtrStepSz<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
template <> struct ResizeLinearDispatcher<uchar> : SelectImplForLinear<uchar> {};
template <> struct ResizeLinearDispatcher<uchar4> : SelectImplForLinear<uchar4> {};
if (x < dst.cols && y < dst.rows)
template <> struct ResizeLinearDispatcher<ushort> : SelectImplForLinear<ushort> {};
template <> struct ResizeLinearDispatcher<ushort4> : SelectImplForLinear<ushort4> {};
template <> struct ResizeLinearDispatcher<short> : SelectImplForLinear<short> {};
template <> struct ResizeLinearDispatcher<short4> : SelectImplForLinear<short4> {};
template <> struct ResizeLinearDispatcher<float> : SelectImplForLinear<float> {};
template <> struct ResizeLinearDispatcher<float4> : SelectImplForLinear<float4> {};
// ResizeCubicDispatcher
template <typename T> struct ResizeCubicDispatcher
{
dst(y, x) = src(y, x);
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
call_resize_cubic_glob(src, dst, fy, fx, stream);
}
};
template <typename T> struct SelectImplForCubic
{
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream)
{
if (stream)
call_resize_cubic_glob(src, dst, fy, fx, stream);
else
call_resize_cubic_tex(src, srcWhole, yoff, xoff, dst, fy, fx);
}
};
template <> struct ResizeCubicDispatcher<uchar> : SelectImplForCubic<uchar> {};
template <> struct ResizeCubicDispatcher<uchar4> : SelectImplForCubic<uchar4> {};
template <> struct ResizeCubicDispatcher<ushort> : SelectImplForCubic<ushort> {};
template <> struct ResizeCubicDispatcher<ushort4> : SelectImplForCubic<ushort4> {};
template <> struct ResizeCubicDispatcher<short> : SelectImplForCubic<short> {};
template <> struct ResizeCubicDispatcher<short4> : SelectImplForCubic<short4> {};
template <> struct ResizeCubicDispatcher<float> : SelectImplForCubic<float> {};
template <> struct ResizeCubicDispatcher<float4> : SelectImplForCubic<float4> {};
// ResizeAreaDispatcher
template <typename T> struct ResizeAreaDispatcher
{
static void call(PtrStepSz<T> src, PtrStepSz<T>, int, int, float fx, float fy, PtrStepSz<T> dst
, cudaStream_t stream)
static void call(const PtrStepSz<T>& src, const PtrStepSz<T>&, int, int, const PtrStepSz<T>& dst, float fy, float fx
, cudaStream_t stream)
{
const int iscale_x = (int) round(fx);
const int iscale_y = (int) round(fy);
...
...
@@ -347,6 +423,7 @@ namespace cv { namespace gpu { namespace device
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
IntegerAreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst);
}
else
...
...
@@ -354,25 +431,27 @@ namespace cv { namespace gpu { namespace device
BrdConstant<T> brd(src.rows, src.cols);
BorderReader< PtrStep<T>, BrdConstant<T> > brdSrc(src, brd);
AreaFilter< BorderReader< PtrStep<T>, BrdConstant<T> > > filteredSrc(brdSrc, fx, fy);
resize_area<<<grid, block, 0, stream>>>(filteredSrc, dst);
}
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
};
template <typename T> void resize_gpu(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy,
PtrStepSzb dst, int interpolation, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSz<T> src, PtrStepSz<T> srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSz<T> dst, cudaStream_t stream);
// resize
static const caller_t callers[4] =
template <typename T> void resize(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream)
{
typedef void (*func_t)(const PtrStepSz<T>& src, const PtrStepSz<T>& srcWhole, int yoff, int xoff, const PtrStepSz<T>& dst, float fy, float fx, cudaStream_t stream);
static const func_t funcs[4] =
{
ResizeDispatcher<PointFilter,
T>::call,
ResizeDispatcher<LinearFilter,
T>::call,
ResizeDispatcher<CubicFilter,
T>::call,
ResizeNearestDispatcher<
T>::call,
ResizeLinearDispatcher<
T>::call,
ResizeCubicDispatcher<
T>::call,
ResizeAreaDispatcher<T>::call
};
...
...
@@ -380,40 +459,24 @@ namespace cv { namespace gpu { namespace device
if (interpolation == 3 && (fx <= 1.f || fy <= 1.f))
interpolation = 1;
callers[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), xoff, yoff, fx, fy,
static_cast< PtrStepSz<T> >(dst), stream);
funcs[interpolation](static_cast< PtrStepSz<T> >(src), static_cast< PtrStepSz<T> >(srcWhole), yoff, xoff, static_cast< PtrStepSz<T> >(dst), fy, fx, stream);
}
template void resize_gpu<uchar >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<uchar2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<uchar3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<uchar4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<schar>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<char2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<char3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<char4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<ushort >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<ushort2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<ushort3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<ushort4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<short >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<short2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<short3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<short4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<int4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<float >(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
//template void resize_gpu<float2>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<float3>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
template void resize_gpu<float4>(PtrStepSzb src, PtrStepSzb srcWhole, int xoff, int yoff, float fx, float fy, PtrStepSzb dst, int interpolation, cudaStream_t stream);
} // namespace imgproc
}}} // namespace cv { namespace gpu { namespace device
template void resize<uchar >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<uchar3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<uchar4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<ushort >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<ushort3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<ushort4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<short >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<short3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<short4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<float >(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<float3>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
template void resize<float4>(const PtrStepSzb& src, const PtrStepSzb& srcWhole, int yoff, int xoff, const PtrStepSzb& dst, float fy, float fx, int interpolation, cudaStream_t stream);
}}}
#endif /* CUDA_DISABLER */
modules/gpu/src/resize.cpp
View file @
21c4753f
...
...
@@ -50,27 +50,21 @@ void cv::gpu::resize(const GpuMat&, GpuMat&, Size, double, double, int, Stream&)
namespace
cv
{
namespace
gpu
{
namespace
device
{
namespace
imgproc
{
template
<
typename
T
>
void
resize_gpu
(
PtrStepSzb
src
,
PtrStepSzb
srcWhole
,
int
xoff
,
int
yoff
,
float
fx
,
float
fy
,
PtrStepSzb
dst
,
int
interpolation
,
cudaStream_t
stream
);
}
void
resize
(
const
PtrStepSzb
&
src
,
const
PtrStepSzb
&
srcWhole
,
int
yoff
,
int
xoff
,
const
PtrStepSzb
&
dst
,
float
fy
,
float
fx
,
int
interpolation
,
cudaStream_t
stream
);
}}}
void
cv
::
gpu
::
resize
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
Size
dsize
,
double
fx
,
double
fy
,
int
interpolation
,
Stream
&
stream
)
{
using
namespace
::
cv
::
gpu
::
device
::
imgproc
;
typedef
void
(
*
func_t
)(
PtrStepSzb
src
,
PtrStepSzb
srcWhole
,
int
xoff
,
int
yoff
,
float
fx
,
float
fy
,
PtrStepSzb
dst
,
int
interpolation
,
cudaStream_t
stream
);
typedef
void
(
*
func_t
)(
const
PtrStepSzb
&
src
,
const
PtrStepSzb
&
srcWhole
,
int
yoff
,
int
xoff
,
const
PtrStepSzb
&
dst
,
float
fy
,
float
fx
,
int
interpolation
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
6
][
4
]
=
{
{
resize_gpu
<
uchar
>
,
0
/*resize_gpu<uchar2>*/
,
resize_gpu
<
uchar3
>
,
resize_gpu
<
uchar4
>
},
{
0
/*
resize_gpu<schar>*/
,
0
/*resize_gpu<char2>*/
,
0
/*resize_gpu<char3>*/
,
0
/*resize_gpu
<char4>*/
},
{
resize_gpu
<
ushort
>
,
0
/*resize_gpu<ushort2>*/
,
resize_gpu
<
ushort3
>
,
resize_gpu
<
ushort4
>
},
{
resize_gpu
<
short
>
,
0
/*resize_gpu<short2>*/
,
resize_gpu
<
short3
>
,
resize_gpu
<
short4
>
},
{
0
/*
resize_gpu<int>*/
,
0
/*resize_gpu<int2>*/
,
0
/*resize_gpu<int3>*/
,
0
/*resize_gpu
<int4>*/
},
{
resize_gpu
<
float
>
,
0
/*resize_gpu<float2>*/
,
resize_gpu
<
float3
>
,
resize_gpu
<
float4
>
}
{
device
::
resize
<
uchar
>
,
0
/*device::resize<uchar2>*/
,
device
::
resize
<
uchar3
>
,
device
::
resize
<
uchar4
>
},
{
0
/*
device::resize<schar>*/
,
0
/*device::resize<char2>*/
,
0
/*device::resize<char3>*/
,
0
/*device::resize
<char4>*/
},
{
device
::
resize
<
ushort
>
,
0
/*device::resize<ushort2>*/
,
device
::
resize
<
ushort3
>
,
device
::
resize
<
ushort4
>
},
{
device
::
resize
<
short
>
,
0
/*device::resize<short2>*/
,
device
::
resize
<
short3
>
,
device
::
resize
<
short4
>
},
{
0
/*
device::resize<int>*/
,
0
/*device::resize<int2>*/
,
0
/*device::resize<int3>*/
,
0
/*device::resize
<int4>*/
},
{
device
::
resize
<
float
>
,
0
/*device::resize<float2>*/
,
device
::
resize
<
float3
>
,
device
::
resize
<
float4
>
}
};
CV_Assert
(
src
.
depth
()
<=
CV_32F
&&
src
.
channels
()
<=
4
);
...
...
@@ -108,7 +102,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
src
.
locateROI
(
wholeSize
,
ofs
);
PtrStepSzb
wholeSrc
(
wholeSize
.
height
,
wholeSize
.
width
,
src
.
datastart
,
src
.
step
);
func
(
src
,
wholeSrc
,
ofs
.
x
,
ofs
.
y
,
static_cast
<
float
>
(
1.0
/
fx
),
static_cast
<
float
>
(
1.0
/
fy
),
dst
,
interpolation
,
StreamAccessor
::
getStream
(
stream
));
func
(
src
,
wholeSrc
,
ofs
.
y
,
ofs
.
x
,
dst
,
static_cast
<
float
>
(
1.0
/
fy
),
static_cast
<
float
>
(
1.0
/
fx
)
,
interpolation
,
StreamAccessor
::
getStream
(
stream
));
}
#endif // HAVE_CUDA
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment