Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
47d68f69
Commit
47d68f69
authored
Aug 31, 2011
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
implemented gpu::remap for all types
parent
78542854
Hide whitespace changes
Inline
Side-by-side
Showing
12 changed files
with
943 additions
and
326 deletions
+943
-326
devmem2d.hpp
modules/gpu/include/opencv2/gpu/devmem2d.hpp
+17
-4
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+5
-4
filters.cu
modules/gpu/src/cuda/filters.cu
+4
-4
imgproc.cu
modules/gpu/src/cuda/imgproc.cu
+204
-172
surf.cu
modules/gpu/src/cuda/surf.cu
+25
-21
gpumat.cpp
modules/gpu/src/gpumat.cpp
+1
-1
imgproc.cpp
modules/gpu/src/imgproc.cpp
+67
-29
border_interpolate.hpp
modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
+348
-74
utility.hpp
modules/gpu/src/opencv2/gpu/device/utility.hpp
+54
-1
vec_traits.hpp
modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
+55
-1
test_imgproc.cpp
modules/gpu/test/test_imgproc.cpp
+54
-11
tests.cpp
samples/gpu/performance/tests.cpp
+109
-4
No files found.
modules/gpu/include/opencv2/gpu/devmem2d.hpp
View file @
47d68f69
...
...
@@ -66,6 +66,9 @@ namespace cv
template
<
typename
T
>
struct
DevMem2D_
{
typedef
T
elem_type
;
typedef
int
index_type
;
int
cols
;
int
rows
;
T
*
data
;
...
...
@@ -79,8 +82,7 @@ namespace cv
template
<
typename
U
>
explicit
DevMem2D_
(
const
DevMem2D_
<
U
>&
d
)
:
cols
(
d
.
cols
),
rows
(
d
.
rows
),
data
((
T
*
)
d
.
data
),
step
(
d
.
step
)
{}
typedef
T
elem_type
;
enum
{
elem_size
=
sizeof
(
elem_type
)
};
__CV_GPU_HOST_DEVICE__
size_t
elemSize
()
const
{
return
elem_size
;
}
...
...
@@ -89,6 +91,9 @@ namespace cv
__CV_GPU_HOST_DEVICE__
operator
T
*
()
const
{
return
data
;
}
__CV_GPU_HOST_DEVICE__
T
&
operator
()(
int
y
,
int
x
)
{
return
ptr
(
y
)[
x
];
}
__CV_GPU_HOST_DEVICE__
const
T
&
operator
()(
int
y
,
int
x
)
const
{
return
ptr
(
y
)[
x
];
}
#if defined(__DEVCLASES_ADD_THRUST_BEGIN_END__)
thrust
::
device_ptr
<
T
>
begin
()
const
{
return
thrust
::
device_ptr
<
T
>
(
data
);
}
thrust
::
device_ptr
<
T
>
end
()
const
{
return
thrust
::
device_ptr
<
T
>
(
data
)
+
cols
*
rows
;
}
...
...
@@ -97,19 +102,24 @@ namespace cv
template
<
typename
T
>
struct
PtrStep_
{
typedef
T
elem_type
;
typedef
int
index_type
;
T
*
data
;
size_t
step
;
PtrStep_
()
:
data
(
0
),
step
(
0
)
{}
PtrStep_
(
const
DevMem2D_
<
T
>&
mem
)
:
data
(
mem
.
data
),
step
(
mem
.
step
)
{}
typedef
T
elem_type
;
enum
{
elem_size
=
sizeof
(
elem_type
)
};
__CV_GPU_HOST_DEVICE__
size_t
elemSize
()
const
{
return
elem_size
;
}
__CV_GPU_HOST_DEVICE__
T
*
ptr
(
int
y
=
0
)
{
return
(
T
*
)(
(
char
*
)
data
+
y
*
step
);
}
__CV_GPU_HOST_DEVICE__
const
T
*
ptr
(
int
y
=
0
)
const
{
return
(
const
T
*
)(
(
const
char
*
)
data
+
y
*
step
);
}
__CV_GPU_HOST_DEVICE__
T
&
operator
()(
int
y
,
int
x
)
{
return
ptr
(
y
)[
x
];
}
__CV_GPU_HOST_DEVICE__
const
T
&
operator
()(
int
y
,
int
x
)
const
{
return
ptr
(
y
)[
x
];
}
#if defined(__DEVCLASES_ADD_THRUST_BEGIN_END__)
thrust
::
device_ptr
<
T
>
begin
()
const
{
return
thrust
::
device_ptr
<
T
>
(
data
);
}
#endif
...
...
@@ -124,7 +134,10 @@ namespace cv
PtrStep_
<
T
>::
step
/=
PtrStep_
<
T
>::
elem_size
;
}
__CV_GPU_HOST_DEVICE__
T
*
ptr
(
int
y
=
0
)
{
return
PtrStep_
<
T
>::
data
+
y
*
PtrStep_
<
T
>::
step
;
}
__CV_GPU_HOST_DEVICE__
const
T
*
ptr
(
int
y
=
0
)
const
{
return
PtrStep_
<
T
>::
data
+
y
*
PtrStep_
<
T
>::
step
;
}
__CV_GPU_HOST_DEVICE__
const
T
*
ptr
(
int
y
=
0
)
const
{
return
PtrStep_
<
T
>::
data
+
y
*
PtrStep_
<
T
>::
step
;
}
__CV_GPU_HOST_DEVICE__
T
&
operator
()(
int
y
,
int
x
)
{
return
ptr
(
y
)[
x
];
}
__CV_GPU_HOST_DEVICE__
const
T
&
operator
()(
int
y
,
int
x
)
const
{
return
ptr
(
y
)[
x
];
}
};
typedef
DevMem2D_
<
unsigned
char
>
DevMem2D
;
...
...
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
47d68f69
...
...
@@ -596,8 +596,9 @@ namespace cv
////////////////////////////// Image processing //////////////////////////////
//! DST[x,y] = SRC[xmap[x,y],ymap[x,y]] with bilinear interpolation.
//! supports CV_8UC1, CV_8UC3 source types and CV_32FC1 map type
CV_EXPORTS
void
remap
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
xmap
,
const
GpuMat
&
ymap
);
//! supports CV_32FC1 map type
CV_EXPORTS
void
remap
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
xmap
,
const
GpuMat
&
ymap
,
int
interpolation
,
int
borderMode
=
BORDER_CONSTANT
,
const
Scalar
&
borderValue
=
Scalar
());
//! Does mean shift filtering on GPU.
CV_EXPORTS
void
meanShiftFiltering
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
sp
,
int
sr
,
...
...
@@ -761,10 +762,10 @@ namespace cv
CV_EXPORTS
void
upsample
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
Stream
&
stream
=
Stream
::
Null
());
//! smoothes the source image and downsamples it
CV_EXPORTS
void
pyrDown
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
Stream
&
stream
=
Stream
::
Null
());
CV_EXPORTS
void
pyrDown
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
borderType
=
BORDER_DEFAULT
,
Stream
&
stream
=
Stream
::
Null
());
//! upsamples the source image and then smoothes it
CV_EXPORTS
void
pyrUp
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
Stream
&
stream
=
Stream
::
Null
());
CV_EXPORTS
void
pyrUp
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
borderType
=
BORDER_DEFAULT
,
Stream
&
stream
=
Stream
::
Null
());
//! performs linear blending of two images
//! to avoid accuracy errors sum of weigths shouldn't be very close to zero
...
...
modules/gpu/src/cuda/filters.cu
View file @
47d68f69
...
...
@@ -242,9 +242,9 @@ namespace filter_krnls
{
const T* srcCol = src.ptr() + x;
sDataColumn[ threadIdx.y * BLOCK_DIM_X] = b.at_low(y - BLOCK_DIM_Y, srcCol);
sDataColumn[(threadIdx.y + BLOCK_DIM_Y) * BLOCK_DIM_X] = b.at_high(y, srcCol);
sDataColumn[(threadIdx.y + BLOCK_DIM_Y * 2) * BLOCK_DIM_X] = b.at_high(y + BLOCK_DIM_Y, srcCol);
sDataColumn[ threadIdx.y * BLOCK_DIM_X] = b.at_low(y - BLOCK_DIM_Y, srcCol
, src.step
);
sDataColumn[(threadIdx.y + BLOCK_DIM_Y) * BLOCK_DIM_X] = b.at_high(y, srcCol
, src.step
);
sDataColumn[(threadIdx.y + BLOCK_DIM_Y * 2) * BLOCK_DIM_X] = b.at_high(y + BLOCK_DIM_Y, srcCol
, src.step
);
__syncthreads();
...
...
@@ -273,7 +273,7 @@ namespace cv { namespace gpu { namespace filters
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 grid(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
B<T> b(src.rows
, src.step
);
B<T> b(src.rows);
if (!b.is_range_safe(-BLOCK_DIM_Y, (grid.y + 1) * BLOCK_DIM_Y - 1))
{
...
...
modules/gpu/src/cuda/imgproc.cu
View file @
47d68f69
...
...
@@ -44,6 +44,8 @@
#include "opencv2/gpu/device/border_interpolate.hpp"
#include "opencv2/gpu/device/vec_traits.hpp"
#include "opencv2/gpu/device/vec_math.hpp"
#include "opencv2/gpu/device/saturate_cast.hpp"
#include "opencv2/gpu/device/utility.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
...
...
@@ -51,109 +53,119 @@ using namespace cv::gpu::device;
/////////////////////////////////// Remap ///////////////////////////////////////////////
namespace cv { namespace gpu { namespace imgproc
{
texture<unsigned char, 2, cudaReadModeNormalizedFloat> tex_remap(0, cudaFilterModeLinear, cudaAddressModeWrap);
// cudaAddressModeClamp == BrdReplicate
/*texture<uchar, cudaTextureType2D, cudaReadModeNormalizedFloat> tex_remap_uchar_LinearFilter(0, cudaFilterModeLinear, cudaAddressModeClamp);
__global__ void remap_
1c(const float* mapx, const float* mapy, size_t map_step, uchar* out, size_t out_step, int width, int heigh
t)
__global__ void remap_
uchar_LinearFilter(const PtrStepf mapx, const PtrStepf mapy, DevMem2D ds
t)
{
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < width && y < height)
{
int idx = y * (map_step >> 2) + x; /* map_step >> 2 <=> map_step / sizeof(float)*/
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
float xcoo = mapx[idx];
float ycoo = mapy[idx];
if (x < dst.cols && y < dst.rows)
{
const float xcoo = mapx.ptr(y)[x];
const float ycoo = mapy.ptr(y)[x];
out[y * out_step + x] = (unsigned char)(255.f * tex2D(tex_remap, xcoo, ycoo)
);
dst.ptr(y)[x] = 255.0f * tex2D(tex_remap_uchar_LinearFilter, xcoo, ycoo
);
}
}
}
*/
__global__ void remap_3c(const uchar* src, size_t src_step, const float* mapx, const float* mapy,
size_t map_step, uchar* dst, size_t dst_step, int width, int height)
{
template <typename Ptr2D, typename T> __global__ void remap(const Ptr2D src, const PtrStepf mapx, const PtrStepf mapy, DevMem2D_<T> dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x <
width && y < height
)
if (x <
dst.cols && y < dst.rows
)
{
const int idx = y * (map_step >> 2) + x; /* map_step >> 2 <=> map_step / sizeof(float)*/
const float xcoo = mapx[idx];
const float ycoo = mapy[idx];
uchar3 out = make_uchar3(0, 0, 0);
const float xcoo = mapx.ptr(y)[x];
const float ycoo = mapy.ptr(y)[x];
if (xcoo >= 0 && xcoo < width - 1 && ycoo >= 0 && ycoo < height - 1)
{
const int x1 = __float2int_rd(xcoo);
const int y1 = __float2int_rd(ycoo);
const int x2 = x1 + 1;
const int y2 = y1 + 1;
uchar src_reg = *(src + y1 * src_step + 3 * x1);
out.x += src_reg * (x2 - xcoo) * (y2 - ycoo);
src_reg = *(src + y1 * src_step + 3 * x1 + 1);
out.y += src_reg * (x2 - xcoo) * (y2 - ycoo);
src_reg = *(src + y1 * src_step + 3 * x1 + 2);
out.z += src_reg * (x2 - xcoo) * (y2 - ycoo);
src_reg = *(src + y1 * src_step + 3 * x2);
out.x += src_reg * (xcoo - x1) * (y2 - ycoo);
src_reg = *(src + y1 * src_step + 3 * x2 + 1);
out.y += src_reg * (xcoo - x1) * (y2 - ycoo);
src_reg = *(src + y1 * src_step + 3 * x2 + 2);
out.z += src_reg * (xcoo - x1) * (y2 - ycoo);
src_reg = *(src + y2 * src_step + 3 * x1);
out.x += src_reg * (x2 - xcoo) * (ycoo - y1);
src_reg = *(src + y2 * src_step + 3 * x1 + 1);
out.y += src_reg * (x2 - xcoo) * (ycoo - y1);
src_reg = *(src + y2 * src_step + 3 * x1 + 2);
out.z += src_reg * (x2 - xcoo) * (ycoo - y1);
src_reg = *(src + y2 * src_step + 3 * x2);
out.x += src_reg * (xcoo - x1) * (ycoo - y1);
src_reg = *(src + y2 * src_step + 3 * x2 + 1);
out.y += src_reg * (xcoo - x1) * (ycoo - y1);
src_reg = *(src + y2 * src_step + 3 * x2 + 2);
out.z += src_reg * (xcoo - x1) * (ycoo - y1);
}
/**(uchar3*)(dst + y * dst_step + 3 * x) = out;*/
*(dst + y * dst_step + 3 * x) = out.x;
*(dst + y * dst_step + 3 * x + 1) = out.y;
*(dst + y * dst_step + 3 * x + 2) = out.z;
dst.ptr(y)[x] = saturate_cast<T>(src(ycoo, xcoo));
}
}
void remap_gpu_1c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
template <template <typename> class Filter, template <typename> class B, typename T>
void remap_caller(const DevMem2D_<T>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<T>& dst, T borderValue)
{
dim3 threads(16, 16, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(dst.cols, threads.x);
grid.y = divUp(dst.rows, threads.y);
dim3 block(32, 8);
dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
TextureBinder tex(&tex_remap, src);
B<T> brd(src.rows, src.cols, borderValue);
BorderReader< PtrStep_<T>, B<T> > brd_src(src, brd);
Filter< BorderReader< PtrStep_<T>, B<T> > > filter_src(brd_src);
remap
_1c<<<grid, threads>>>(xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows
);
remap
<<<grid, block>>>(filter_src, mapx, mapy, dst
);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
}
void remap_gpu_3c(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, DevMem2D dst)
#define OPENCV_GPU_IMPLEMENT_REMAP_TEX(type, filter) \
template <> void remap_caller<filter, BrdReplicate>(const DevMem2D_<type>& src, const DevMem2Df& mapx, const DevMem2Df& mapy, const DevMem2D_<type>& dst, type) \
{ \
const dim3 block(16, 16); \
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y)); \
TextureBinder tex(&tex_remap_ ## type ## _ ## filter ## , src); \
remap_ ## type ## _ ## filter ## <<<grid, block>>>(mapx, mapy, dst); \
cudaSafeCall( cudaGetLastError() ); \
cudaSafeCall( cudaDeviceSynchronize() ); \
}
//OPENCV_GPU_IMPLEMENT_REMAP_TEX(uchar, LinearFilter)
#undef OPENCV_GPU_IMPLEMENT_REMAP_TEX
template <typename T> void remap_gpu(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst,
int interpolation, int borderMode, const double borderValue[4])
{
dim3 threads(32, 8, 1);
dim3 grid(1, 1, 1);
grid.x = divUp(dst.cols, threads.x);
grid.y = divUp(dst.rows, threads.y);
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D_<T>& dst, T borderValue);
remap_3c<<<grid, threads>>>(src.data, src.step, xmap.data, ymap.data, xmap.step, dst.data, dst.step, dst.cols, dst.rows);
cudaSafeCall( cudaGetLastError() );
static const caller_t callers[2][3] =
{
{ remap_caller<PointFilter, BrdReflect101>, remap_caller<PointFilter, BrdReplicate>, remap_caller<PointFilter, BrdConstant> },
{ remap_caller<LinearFilter, BrdReflect101>, remap_caller<LinearFilter, BrdReplicate>, remap_caller<LinearFilter, BrdConstant> }
};
cudaSafeCall( cudaDeviceSynchronize() );
typename VecTraits<T>::elem_type brd[] = {(typename VecTraits<T>::elem_type)borderValue[0], (typename VecTraits<T>::elem_type)borderValue[1], (typename VecTraits<T>::elem_type)borderValue[2], (typename VecTraits<T>::elem_type)borderValue[3]};
callers[interpolation][borderMode](static_cast< DevMem2D_<T> >(src), xmap, ymap, static_cast< DevMem2D_<T> >(dst), VecTraits<T>::make(brd));
}
template void remap_gpu<uchar >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uchar2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uchar3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uchar4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<schar>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<char2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<char3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<char4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<ushort >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<ushort2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<ushort3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<ushort4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<short >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<short2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<short3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<short4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uint >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uint2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uint3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<uint4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<int >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<int2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<int3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<int4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<float >(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<float2>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<float3>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
template void remap_gpu<float4>(const DevMem2D& src, const DevMem2Df& xmap, const DevMem2Df& ymap, const DevMem2D& dst, int interpolation, int borderMode, const double borderValue[4]);
/////////////////////////////////// MeanShiftfiltering ///////////////////////////////////////////////
texture<uchar4, 2> tex_meanshift;
...
...
@@ -541,9 +553,9 @@ namespace cv { namespace gpu { namespace imgproc
}
}
template <typename B>
template <typename B
R, typename BC
>
__global__ void cornerHarris_kernel(const int cols, const int rows, const int block_size, const float k,
PtrStep dst, B
border_row, B
border_col)
PtrStep dst, B
R border_row, BC
border_col)
{
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
...
...
@@ -561,10 +573,10 @@ namespace cv { namespace gpu { namespace imgproc
for (int i = ibegin; i < iend; ++i)
{
int y = border_col.idx(i);
int y = border_col.idx
_row
(i);
for (int j = jbegin; j < jend; ++j)
{
int x = border_row.idx(j);
int x = border_row.idx
_col
(j);
float dx = tex2D(harrisDxTex, x, y);
float dy = tex2D(harrisDyTex, x, y);
a += dx * dx;
...
...
@@ -596,7 +608,7 @@ namespace cv { namespace gpu { namespace imgproc
{
case BORDER_REFLECT101_GPU:
cornerHarris_kernel<<<grid, threads>>>(
cols, rows, block_size, k, dst, BrdR
eflect101(cols), BrdReflect101
(rows));
cols, rows, block_size, k, dst, BrdR
owReflect101<void>(cols), BrdColReflect101<void>
(rows));
break;
case BORDER_REPLICATE_GPU:
harrisDxTex.addressMode[0] = cudaAddressModeClamp;
...
...
@@ -656,9 +668,9 @@ namespace cv { namespace gpu { namespace imgproc
}
template <typename B>
template <typename B
R, typename BC
>
__global__ void cornerMinEigenVal_kernel(const int cols, const int rows, const int block_size,
PtrStep dst, B
border_row, B
border_col)
PtrStep dst, B
R border_row, BC
border_col)
{
const unsigned int x = blockIdx.x * blockDim.x + threadIdx.x;
const unsigned int y = blockIdx.y * blockDim.y + threadIdx.y;
...
...
@@ -676,10 +688,10 @@ namespace cv { namespace gpu { namespace imgproc
for (int i = ibegin; i < iend; ++i)
{
int y = border_col.idx(i);
int y = border_col.idx
_row
(i);
for (int j = jbegin; j < jend; ++j)
{
int x = border_row.idx(j);
int x = border_row.idx
_col
(j);
float dx = tex2D(minEigenValDxTex, x, y);
float dy = tex2D(minEigenValDyTex, x, y);
a += dx * dx;
...
...
@@ -713,7 +725,7 @@ namespace cv { namespace gpu { namespace imgproc
{
case BORDER_REFLECT101_GPU:
cornerMinEigenVal_kernel<<<grid, threads>>>(
cols, rows, block_size, dst, BrdR
eflect101(cols), BrdReflect101
(rows));
cols, rows, block_size, dst, BrdR
owReflect101<void>(cols), BrdColReflect101<void>
(rows));
break;
case BORDER_REPLICATE_GPU:
minEigenValDxTex.addressMode[0] = cudaAddressModeClamp;
...
...
@@ -981,7 +993,7 @@ namespace cv { namespace gpu { namespace imgproc
//////////////////////////////////////////////////////////////////////////
// pyrDown
template <typename T
> __global__ void pyrDown(const PtrStep_<T> src, PtrStep_<T> dst, const BrdReflect101 rowBrd, const BrdReflect101 colBrd
, int dst_cols)
template <typename T
, typename B> __global__ void pyrDown(const PtrStep_<T> src, PtrStep_<T> dst, const B b
, int dst_cols)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
...
...
@@ -996,11 +1008,11 @@ namespace cv { namespace gpu { namespace imgproc
sum = VecTraits<value_type>::all(0);
sum = sum + 0.0625f *
src.ptr(colBrd.idx(src_y - 2))[rowBrd.idx(x)]
;
sum = sum + 0.25f *
src.ptr(colBrd.idx(src_y - 1))[rowBrd.idx(x)]
;
sum = sum + 0.375f *
src.ptr(colBrd.idx(src_y ))[rowBrd.idx(x)]
;
sum = sum + 0.25f *
src.ptr(colBrd.idx(src_y + 1))[rowBrd.idx(x)]
;
sum = sum + 0.0625f *
src.ptr(colBrd.idx(src_y + 2))[rowBrd.idx(x)]
;
sum = sum + 0.0625f *
b.at(src_y - 2, x, src.data, src.step)
;
sum = sum + 0.25f *
b.at(src_y - 1, x, src.data, src.step)
;
sum = sum + 0.375f *
b.at(src_y , x, src.data, src.step)
;
sum = sum + 0.25f *
b.at(src_y + 1, x, src.data, src.step)
;
sum = sum + 0.0625f *
b.at(src_y + 2, x, src.data, src.step)
;
smem[2 + threadIdx.x] = sum;
...
...
@@ -1010,11 +1022,11 @@ namespace cv { namespace gpu { namespace imgproc
sum = VecTraits<value_type>::all(0);
sum = sum + 0.0625f *
src.ptr(colBrd.idx(src_y - 2))[rowBrd.idx(left_x)]
;
sum = sum + 0.25f *
src.ptr(colBrd.idx(src_y - 1))[rowBrd.idx(left_x)]
;
sum = sum + 0.375f *
src.ptr(colBrd.idx(src_y ))[rowBrd.idx(left_x)]
;
sum = sum + 0.25f *
src.ptr(colBrd.idx(src_y + 1))[rowBrd.idx(left_x)]
;
sum = sum + 0.0625f *
src.ptr(colBrd.idx(src_y + 2))[rowBrd.idx(left_x)]
;
sum = sum + 0.0625f *
b.at(src_y - 2, left_x, src.data, src.step)
;
sum = sum + 0.25f *
b.at(src_y - 1, left_x, src.data, src.step)
;
sum = sum + 0.375f *
b.at(src_y , left_x, src.data, src.step)
;
sum = sum + 0.25f *
b.at(src_y + 1, left_x, src.data, src.step)
;
sum = sum + 0.0625f *
b.at(src_y + 2, left_x, src.data, src.step)
;
smem[threadIdx.x] = sum;
}
...
...
@@ -1025,11 +1037,11 @@ namespace cv { namespace gpu { namespace imgproc
sum = VecTraits<value_type>::all(0);
sum = sum + 0.0625f *
src.ptr(colBrd.idx(src_y - 2))[rowBrd.idx(right_x)]
;
sum = sum + 0.25f *
src.ptr(colBrd.idx(src_y - 1))[rowBrd.idx(right_x)]
;
sum = sum + 0.375f *
src.ptr(colBrd.idx(src_y ))[rowBrd.idx(right_x)]
;
sum = sum + 0.25f *
src.ptr(colBrd.idx(src_y + 1))[rowBrd.idx(right_x)]
;
sum = sum + 0.0625f *
src.ptr(colBrd.idx(src_y + 2))[rowBrd.idx(right_x)]
;
sum = sum + 0.0625f *
b.at(src_y - 2, right_x, src.data, src.step)
;
sum = sum + 0.25f *
b.at(src_y - 1, right_x, src.data, src.step)
;
sum = sum + 0.375f *
b.at(src_y , right_x, src.data, src.step)
;
sum = sum + 0.25f *
b.at(src_y + 1, right_x, src.data, src.step)
;
sum = sum + 0.0625f *
b.at(src_y + 2, right_x, src.data, src.step)
;
smem[4 + threadIdx.x] = sum;
}
...
...
@@ -1055,58 +1067,68 @@ namespace cv { namespace gpu { namespace imgproc
}
}
template <typename T,
int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D
& dst, cudaStream_t stream)
template <typename T,
template <typename> class B> void pyrDown_caller(const DevMem2D_<T>& src, const DevMem2D_<T>
& dst, cudaStream_t stream)
{
const dim3 block(256);
const dim3 grid(divUp(src.cols, block.x), dst.rows);
BrdReflect101 rowBrd(src.cols);
BrdReflect101 colBrd(src.rows);
B<T> b(src.rows, src.cols);
pyrDown<typename TypeVec<T, cn>::vec_type><<<grid, block, 0, stream>>>(
static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(src),
static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(dst),
rowBrd, colBrd, dst.cols);
pyrDown<T><<<grid, block, 0, stream>>>(src, dst, b, dst.cols);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void pyrDown_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template <typename T, int cn> void pyrDown_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream)
{
typedef typename TypeVec<T, cn>::vec_type type;
typedef void (*caller_t)(const DevMem2D_<type>& src, const DevMem2D_<type>& dst, cudaStream_t stream);
static const caller_t callers[] =
{
pyrDown_caller<type, BrdReflect101>, pyrDown_caller<type, BrdReplicate>, pyrDown_caller<type, BrdConstant>
};
callers[borderType](static_cast< DevMem2D_<type> >(src), static_cast< DevMem2D_<type> >(dst), stream);
}
template void pyrDown_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrDown_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrDown_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
template void pyrDown_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst,
int borderType,
cudaStream_t stream);
//////////////////////////////////////////////////////////////////////////
// pyrUp
template <typename T
> __global__ void pyrUp(const PtrStep_<T> src, DevMem2D_<T> dst, const BrdReflect101 rowBrd, const BrdReflect101 colBrd
)
template <typename T
, typename B> __global__ void pyrUp(const PtrStep_<T> src, DevMem2D_<T> dst, const B b
)
{
typedef typename TypeVec<float, VecTraits<T>::cn>::vec_type value_type;
...
...
@@ -1119,7 +1141,7 @@ namespace cv { namespace gpu { namespace imgproc
value_type sum;
if (threadIdx.x < 10 && threadIdx.y < 10)
smem1[threadIdx.y][threadIdx.x] =
src.ptr(colBrd.idx(blockIdx.y * blockDim.y / 2 + threadIdx.y - 1))[rowBrd.idx(blockIdx.x * blockDim.x / 2 + threadIdx.x - 1)]
;
smem1[threadIdx.y][threadIdx.x] =
b.at(blockIdx.y * blockDim.y / 2 + threadIdx.y - 1, blockIdx.x * blockDim.x / 2 + threadIdx.x - 1, src.data, src.step)
;
__syncthreads();
...
...
@@ -1175,53 +1197,63 @@ namespace cv { namespace gpu { namespace imgproc
dst.ptr(y)[x] = saturate_cast<T>(4.0f * sum);
}
template <typename T,
int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D
& dst, cudaStream_t stream)
template <typename T,
template <typename> class B> void pyrUp_caller(const DevMem2D_<T>& src, const DevMem2D_<T>
& dst, cudaStream_t stream)
{
const dim3 block(16, 16);
const dim3 grid(divUp(dst.cols, block.x), divUp(dst.rows, block.y));
BrdReflect101 rowBrd(src.cols);
BrdReflect101 colBrd(src.rows);
B<T> b(src.rows, src.cols);
pyrUp<typename TypeVec<T, cn>::vec_type><<<grid, block, 0, stream>>>(
static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(src),
static_cast< DevMem2D_<typename TypeVec<T, cn>::vec_type> >(dst),
rowBrd, colBrd);
pyrUp<T><<<grid, block, 0, stream>>>(src, dst, b);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
template void pyrUp_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template void pyrUp_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, cudaStream_t stream);
template <typename T, int cn> void pyrUp_gpu(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream)
{
typedef typename TypeVec<T, cn>::vec_type type;
typedef void (*caller_t)(const DevMem2D_<type>& src, const DevMem2D_<type>& dst, cudaStream_t stream);
static const caller_t callers[] =
{
pyrUp_caller<type, BrdReflect101>, pyrUp_caller<type, BrdReplicate>, pyrUp_caller<type, BrdConstant>
};
callers[borderType](static_cast< DevMem2D_<type> >(src), static_cast< DevMem2D_<type> >(dst), stream);
}
template void pyrUp_gpu<uchar, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<uchar, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<uchar, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<uchar, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<schar, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<schar, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<schar, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<schar, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<ushort, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<ushort, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<ushort, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<ushort, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<short, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<short, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<short, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<short, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<int, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<int, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<int, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<int, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<float, 1>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<float, 2>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<float, 3>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
template void pyrUp_gpu<float, 4>(const DevMem2D& src, const DevMem2D& dst, int borderType, cudaStream_t stream);
//////////////////////////////////////////////////////////////////////////
// buildWarpMaps
...
...
modules/gpu/src/cuda/surf.cu
View file @
47d68f69
...
...
@@ -675,31 +675,29 @@ namespace cv { namespace gpu { namespace surf
3.695352233989979e-006f, 8.444558261544444e-006f, 1.760426494001877e-005f, 3.34794785885606e-005f, 5.808438800158911e-005f, 9.193058212986216e-005f, 0.0001327334757661447f, 0.0001748319627949968f, 0.0002100782439811155f, 0.0002302826324012131f, 0.0002302826324012131f, 0.0002100782439811155f, 0.0001748319627949968f, 0.0001327334757661447f, 9.193058212986216e-005f, 5.808438800158911e-005f, 3.34794785885606e-005f, 1.760426494001877e-005f, 8.444558261544444e-006f, 3.695352233989979e-006f
};
__device__ __forceinline__ unsigned char calcWin(int i, int j, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir)
struct WinReader
{
float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;
float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
typedef uchar elem_type;
return tex2D(imgTex, pixel_x, pixel_y);
}
__device__ unsigned char calcPATCH(int i1, int j1, float centerX, float centerY, float win_offset, float cos_dir, float sin_dir, int win_size)
{
/* Scale the window to size PATCH_SZ so each pixel's size is s. This
makes calculating the gradients with wavelets of size 2s easy */
const float icoo = ((float)i1 / (PATCH_SZ + 1)) * win_size;
const float jcoo = ((float)j1 / (PATCH_SZ + 1)) * win_size;
__device__ __forceinline__ WinReader(float centerX_, float centerY_, float win_offset_, float cos_dir_, float sin_dir_) :
centerX(centerX_), centerY(centerY_), win_offset(win_offset_), cos_dir(cos_dir_), sin_dir(sin_dir_)
{
}
const int i = __float2int_rd(icoo);
const int j = __float2int_rd(jcoo);
__device__ __forceinline__ uchar operator ()(int i, int j) const
{
float pixel_x = centerX + (win_offset + j) * cos_dir + (win_offset + i) * sin_dir;
float pixel_y = centerY - (win_offset + j) * sin_dir + (win_offset + i) * cos_dir;
float res = calcWin(i, j, centerX, centerY, win_offset, cos_dir, sin_dir) * (i + 1 - icoo) * (j + 1 - jcoo);
res += calcWin(i + 1, j, centerX, centerY, win_offset, cos_dir, sin_dir) * (icoo - i) * (j + 1 - jcoo);
res += calcWin(i + 1, j + 1, centerX, centerY, win_offset, cos_dir, sin_dir) * (icoo - i) * (jcoo - j);
res += calcWin(i, j + 1, centerX, centerY, win_offset, cos_dir, sin_dir) * (i + 1 - icoo) * (jcoo - j);
return tex2D(imgTex, pixel_x, pixel_y);
}
return saturate_cast<unsigned char>(res);
}
float centerX;
float centerY;
float win_offset;
float cos_dir;
float sin_dir;
};
__device__ void calc_dx_dy(float s_dx_bin[25], float s_dy_bin[25],
const float* featureX, const float* featureY, const float* featureSize, const float* featureDir)
...
...
@@ -732,7 +730,13 @@ namespace cv { namespace gpu { namespace surf
const int xIndex = xBlock * 5 + threadIdx.x;
const int yIndex = yBlock * 5 + threadIdx.y;
s_PATCH[threadIdx.y][threadIdx.x] = calcPATCH(yIndex, xIndex, centerX, centerY, win_offset, cos_dir, sin_dir, win_size);
const float icoo = ((float)yIndex / (PATCH_SZ + 1)) * win_size;
const float jcoo = ((float)xIndex / (PATCH_SZ + 1)) * win_size;
LinearFilter<WinReader> filter(WinReader(centerX, centerY, win_offset, cos_dir, sin_dir));
s_PATCH[threadIdx.y][threadIdx.x] = filter(icoo, jcoo);
__syncthreads();
if (threadIdx.x < 5 && threadIdx.y < 5)
...
...
modules/gpu/src/gpumat.cpp
View file @
47d68f69
...
...
@@ -885,7 +885,7 @@ void cv::gpu::GpuMat::release()
if
(
refcount
&&
CV_XADD
(
refcount
,
-
1
)
==
1
)
{
fastFree
(
refcount
);
cuda
SafeCall
(
cudaFree
(
datastart
)
);
cuda
Free
(
datastart
);
}
data
=
datastart
=
dataend
=
0
;
step
=
rows
=
cols
=
0
;
...
...
modules/gpu/src/imgproc.cpp
View file @
47d68f69
...
...
@@ -47,7 +47,7 @@ using namespace cv::gpu;
#if !defined (HAVE_CUDA)
void
cv
::
gpu
::
remap
(
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
){
throw_nogpu
();
}
void
cv
::
gpu
::
remap
(
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
const
GpuMat
&
,
int
,
int
,
const
Scalar
&
){
throw_nogpu
();
}
void
cv
::
gpu
::
meanShiftFiltering
(
const
GpuMat
&
,
GpuMat
&
,
int
,
int
,
TermCriteria
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
meanShiftProc
(
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
int
,
int
,
TermCriteria
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
drawColorDisp
(
const
GpuMat
&
,
GpuMat
&
,
int
,
Stream
&
)
{
throw_nogpu
();
}
...
...
@@ -92,8 +92,8 @@ void cv::gpu::convolve(const GpuMat&, const GpuMat&, GpuMat&, bool) { throw_nogp
void
cv
::
gpu
::
convolve
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
bool
,
ConvolveBuf
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
downsample
(
const
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
upsample
(
const
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
pyrDown
(
const
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
pyrUp
(
const
GpuMat
&
,
GpuMat
&
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
pyrDown
(
const
GpuMat
&
,
GpuMat
&
,
int
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
pyrUp
(
const
GpuMat
&
,
GpuMat
&
,
int
,
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
,
GpuMat
&
,
double
,
double
,
int
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
,
CannyBuf
&
,
GpuMat
&
,
double
,
double
,
int
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
Canny
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
double
,
double
,
bool
)
{
throw_nogpu
();
}
...
...
@@ -104,39 +104,52 @@ void cv::gpu::CannyBuf::release() { throw_nogpu(); }
#else
/* !defined (HAVE_CUDA) */
////////////////////////////////////////////////////////////////////////
// remap
namespace
cv
{
namespace
gpu
{
namespace
imgproc
{
void
remap_gpu_1c
(
const
DevMem2D
&
src
,
const
DevMem2Df
&
xmap
,
const
DevMem2Df
&
ymap
,
DevMem2D
dst
);
void
remap_gpu_3c
(
const
DevMem2D
&
src
,
const
DevMem2Df
&
xmap
,
const
DevMem2Df
&
ymap
,
DevMem2D
dst
);
extern
"C"
void
meanShiftFiltering_gpu
(
const
DevMem2D
&
src
,
DevMem2D
dst
,
int
sp
,
int
sr
,
int
maxIter
,
float
eps
);
extern
"C"
void
meanShiftProc_gpu
(
const
DevMem2D
&
src
,
DevMem2D
dstr
,
DevMem2D
dstsp
,
int
sp
,
int
sr
,
int
maxIter
,
float
eps
);
template
<
typename
T
>
void
remap_gpu
(
const
DevMem2D
&
src
,
const
DevMem2Df
&
xmap
,
const
DevMem2Df
&
ymap
,
const
DevMem2D
&
dst
,
int
interpolation
,
int
borderMode
,
const
double
borderValue
[
4
]);
}}}
void
drawColorDisp_gpu
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
int
ndisp
,
const
cudaStream_t
&
stream
);
void
drawColorDisp_gpu
(
const
DevMem2D_
<
short
>&
src
,
const
DevMem2D
&
dst
,
int
ndisp
,
const
cudaStream_t
&
stream
);
void
cv
::
gpu
::
remap
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
xmap
,
const
GpuMat
&
ymap
,
int
interpolation
,
int
borderMode
,
const
Scalar
&
borderValue
)
{
using
namespace
cv
::
gpu
::
imgproc
;
void
reprojectImageTo3D_gpu
(
const
DevMem2D
&
disp
,
const
DevMem2Df
&
xyzw
,
const
float
*
q
,
const
cudaStream_t
&
stream
);
void
reprojectImageTo3D_gpu
(
const
DevMem2D_
<
short
>&
disp
,
const
DevMem2Df
&
xyzw
,
const
float
*
q
,
const
cudaStream_t
&
stream
);
}}}
typedef
void
(
*
caller_t
)(
const
DevMem2D
&
src
,
const
DevMem2Df
&
xmap
,
const
DevMem2Df
&
ymap
,
const
DevMem2D
&
dst
,
int
interpolation
,
int
borderMode
,
const
double
borderValue
[
4
]);;
static
const
caller_t
callers
[
6
][
4
]
=
{
{
remap_gpu
<
uchar
>
,
remap_gpu
<
uchar2
>
,
remap_gpu
<
uchar3
>
,
remap_gpu
<
uchar4
>
},
{
remap_gpu
<
schar
>
,
remap_gpu
<
char2
>
,
remap_gpu
<
char3
>
,
remap_gpu
<
char4
>
},
{
remap_gpu
<
ushort
>
,
remap_gpu
<
ushort2
>
,
remap_gpu
<
ushort3
>
,
remap_gpu
<
ushort4
>
},
{
remap_gpu
<
short
>
,
remap_gpu
<
short2
>
,
remap_gpu
<
short3
>
,
remap_gpu
<
short4
>
},
{
remap_gpu
<
int
>
,
remap_gpu
<
int2
>
,
remap_gpu
<
int3
>
,
remap_gpu
<
int4
>
},
{
remap_gpu
<
float
>
,
remap_gpu
<
float2
>
,
remap_gpu
<
float3
>
,
remap_gpu
<
float4
>
}
};
////////////////////////////////////////////////////////////////////////
// remap
CV_Assert
(
src
.
depth
()
<=
CV_32F
&&
src
.
channels
()
<=
4
);
CV_Assert
(
xmap
.
type
()
==
CV_32F
&&
ymap
.
type
()
==
CV_32F
&&
xmap
.
size
()
==
ymap
.
size
());
void
cv
::
gpu
::
remap
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
xmap
,
const
GpuMat
&
ymap
)
{
typedef
void
(
*
remap_gpu_t
)(
const
DevMem2D
&
src
,
const
DevMem2Df
&
xmap
,
const
DevMem2Df
&
ymap
,
DevMem2D
dst
);
static
const
remap_gpu_t
callers
[]
=
{
imgproc
::
remap_gpu_1c
,
0
,
imgproc
::
remap_gpu_3c
};
CV_Assert
(
interpolation
==
INTER_NEAREST
||
interpolation
==
INTER_LINEAR
);
CV_Assert
((
src
.
type
()
==
CV_8U
||
src
.
type
()
==
CV_8UC3
)
&&
xmap
.
type
()
==
CV_32F
&&
ymap
.
type
()
==
CV_32F
);
CV_Assert
(
borderMode
==
BORDER_REFLECT101
||
borderMode
==
BORDER_REPLICATE
||
borderMode
==
BORDER_CONSTANT
);
int
gpuBorderType
;
CV_Assert
(
tryConvertToGpuBorderType
(
borderMode
,
gpuBorderType
));
dst
.
create
(
xmap
.
size
(),
src
.
type
());
callers
[
src
.
channels
()
-
1
](
src
,
xmap
,
ymap
,
dst
);
callers
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
xmap
,
ymap
,
dst
,
interpolation
,
gpuBorderType
,
borderValue
.
val
);
}
////////////////////////////////////////////////////////////////////////
// meanShiftFiltering_GPU
namespace
cv
{
namespace
gpu
{
namespace
imgproc
{
extern
"C"
void
meanShiftFiltering_gpu
(
const
DevMem2D
&
src
,
DevMem2D
dst
,
int
sp
,
int
sr
,
int
maxIter
,
float
eps
);
}}}
void
cv
::
gpu
::
meanShiftFiltering
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
sp
,
int
sr
,
TermCriteria
criteria
)
{
if
(
src
.
empty
()
)
...
...
@@ -163,6 +176,11 @@ void cv::gpu::meanShiftFiltering(const GpuMat& src, GpuMat& dst, int sp, int sr,
////////////////////////////////////////////////////////////////////////
// meanShiftProc_GPU
namespace
cv
{
namespace
gpu
{
namespace
imgproc
{
extern
"C"
void
meanShiftProc_gpu
(
const
DevMem2D
&
src
,
DevMem2D
dstr
,
DevMem2D
dstsp
,
int
sp
,
int
sr
,
int
maxIter
,
float
eps
);
}}}
void
cv
::
gpu
::
meanShiftProc
(
const
GpuMat
&
src
,
GpuMat
&
dstr
,
GpuMat
&
dstsp
,
int
sp
,
int
sr
,
TermCriteria
criteria
)
{
if
(
src
.
empty
()
)
...
...
@@ -190,6 +208,12 @@ void cv::gpu::meanShiftProc(const GpuMat& src, GpuMat& dstr, GpuMat& dstsp, int
////////////////////////////////////////////////////////////////////////
// drawColorDisp
namespace
cv
{
namespace
gpu
{
namespace
imgproc
{
void
drawColorDisp_gpu
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
int
ndisp
,
const
cudaStream_t
&
stream
);
void
drawColorDisp_gpu
(
const
DevMem2D_
<
short
>&
src
,
const
DevMem2D
&
dst
,
int
ndisp
,
const
cudaStream_t
&
stream
);
}}}
namespace
{
template
<
typename
T
>
...
...
@@ -215,6 +239,12 @@ void cv::gpu::drawColorDisp(const GpuMat& src, GpuMat& dst, int ndisp, Stream& s
////////////////////////////////////////////////////////////////////////
// reprojectImageTo3D
namespace
cv
{
namespace
gpu
{
namespace
imgproc
{
void
reprojectImageTo3D_gpu
(
const
DevMem2D
&
disp
,
const
DevMem2Df
&
xyzw
,
const
float
*
q
,
const
cudaStream_t
&
stream
);
void
reprojectImageTo3D_gpu
(
const
DevMem2D_
<
short
>&
disp
,
const
DevMem2Df
&
xyzw
,
const
float
*
q
,
const
cudaStream_t
&
stream
);
}}}
namespace
{
template
<
typename
T
>
...
...
@@ -1596,14 +1626,14 @@ void cv::gpu::upsample(const GpuMat& src, GpuMat& dst, Stream& stream)
namespace
cv
{
namespace
gpu
{
namespace
imgproc
{
template
<
typename
T
,
int
cn
>
void
pyrDown_gpu
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
cudaStream_t
stream
);
template
<
typename
T
,
int
cn
>
void
pyrDown_gpu
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
int
borderType
,
cudaStream_t
stream
);
}}}
void
cv
::
gpu
::
pyrDown
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
Stream
&
stream
)
void
cv
::
gpu
::
pyrDown
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
borderType
,
Stream
&
stream
)
{
using
namespace
cv
::
gpu
::
imgproc
;
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
cudaStream_t
stream
);
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
int
borderType
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
6
][
4
]
=
{
...
...
@@ -1617,9 +1647,13 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream)
CV_Assert
(
src
.
depth
()
<=
CV_32F
&&
src
.
channels
()
<=
4
);
CV_Assert
(
borderType
==
BORDER_REFLECT101
||
borderType
==
BORDER_REPLICATE
||
borderType
==
BORDER_CONSTANT
);
int
gpuBorderType
;
CV_Assert
(
tryConvertToGpuBorderType
(
borderType
,
gpuBorderType
));
dst
.
create
((
src
.
rows
+
1
)
/
2
,
(
src
.
cols
+
1
)
/
2
,
src
.
type
());
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
dst
,
StreamAccessor
::
getStream
(
stream
));
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
dst
,
gpuBorderType
,
StreamAccessor
::
getStream
(
stream
));
}
...
...
@@ -1628,14 +1662,14 @@ void cv::gpu::pyrDown(const GpuMat& src, GpuMat& dst, Stream& stream)
namespace
cv
{
namespace
gpu
{
namespace
imgproc
{
template
<
typename
T
,
int
cn
>
void
pyrUp_gpu
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
cudaStream_t
stream
);
template
<
typename
T
,
int
cn
>
void
pyrUp_gpu
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
int
borderType
,
cudaStream_t
stream
);
}}}
void
cv
::
gpu
::
pyrUp
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
Stream
&
stream
)
void
cv
::
gpu
::
pyrUp
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
borderType
,
Stream
&
stream
)
{
using
namespace
cv
::
gpu
::
imgproc
;
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
cudaStream_t
stream
);
typedef
void
(
*
func_t
)(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
int
borderType
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
6
][
4
]
=
{
...
...
@@ -1649,9 +1683,13 @@ void cv::gpu::pyrUp(const GpuMat& src, GpuMat& dst, Stream& stream)
CV_Assert
(
src
.
depth
()
<=
CV_32F
&&
src
.
channels
()
<=
4
);
CV_Assert
(
borderType
==
BORDER_REFLECT101
||
borderType
==
BORDER_REPLICATE
||
borderType
==
BORDER_CONSTANT
);
int
gpuBorderType
;
CV_Assert
(
tryConvertToGpuBorderType
(
borderType
,
gpuBorderType
));
dst
.
create
(
src
.
rows
*
2
,
src
.
cols
*
2
,
src
.
type
());
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
dst
,
StreamAccessor
::
getStream
(
stream
));
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
dst
,
gpuBorderType
,
StreamAccessor
::
getStream
(
stream
));
}
...
...
modules/gpu/src/opencv2/gpu/device/border_interpolate.hpp
View file @
47d68f69
...
...
@@ -48,82 +48,131 @@
namespace
cv
{
namespace
gpu
{
namespace
device
{
struct
BrdReflect101
//////////////////////////////////////////////////////////////
// BrdConstant
template
<
typename
D
>
struct
BrdRowConstant
{
explicit
__host__
__device__
__forceinline__
BrdReflect101
(
int
len
)
:
last
(
len
-
1
)
{}
typedef
D
result_type
;
explicit
__host__
__device__
__forceinline__
BrdRowConstant
(
int
width_
,
const
D
&
val_
=
VecTraits
<
D
>::
all
(
0
))
:
width
(
width_
),
val
(
val_
)
{}
__device__
__forceinline__
int
idx_low
(
int
i
)
const
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
x
,
const
T
*
data
)
const
{
return
abs
(
i
)
;
return
x
>=
0
?
saturate_cast
<
D
>
(
data
[
x
])
:
val
;
}
__device__
__forceinline__
int
idx_high
(
int
i
)
const
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
x
,
const
T
*
data
)
const
{
return
last
-
abs
(
last
-
i
)
;
return
x
<
width
?
saturate_cast
<
D
>
(
data
[
x
])
:
val
;
}
__device__
__forceinline__
int
idx
(
int
i
)
const
template
<
typename
T
>
__device__
__forceinline__
D
at
(
int
x
,
const
T
*
data
)
const
{
return
idx_low
(
idx_high
(
i
))
;
return
(
x
>=
0
&&
x
<
width
)
?
saturate_cast
<
D
>
(
data
[
x
])
:
val
;
}
__host__
__device__
__forceinline__
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
-
last
<=
mini
&&
maxi
<=
2
*
last
;
return
true
;
}
const
int
last
;
const
int
width
;
const
D
val
;
};
template
<
typename
D
>
struct
Brd
RowReflect101
:
BrdReflect101
template
<
typename
D
>
struct
Brd
ColConstant
{
explicit
__host__
__device__
__forceinline__
BrdRowReflect101
(
int
len
)
:
BrdReflect101
(
len
)
{}
typedef
D
result_type
;
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
explicit
__host__
__device__
__forceinline__
BrdColConstant
(
int
height_
,
const
D
&
val_
=
VecTraits
<
D
>::
all
(
0
))
:
height
(
height_
),
val
(
val_
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
y
,
const
T
*
data
,
size_t
step
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)])
;
return
y
>=
0
?
saturate_cast
<
D
>
(
*
(
const
T
*
)((
const
char
*
)
data
+
y
*
step
))
:
val
;
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
y
,
const
T
*
data
,
size_t
step
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)])
;
return
y
<
height
?
saturate_cast
<
D
>
(
*
(
const
T
*
)((
const
char
*
)
data
+
y
*
step
))
:
val
;
}
template
<
typename
T
>
__device__
__forceinline__
D
at
(
int
y
,
const
T
*
data
,
size_t
step
)
const
{
return
(
y
>=
0
&&
y
<
height
)
?
saturate_cast
<
D
>
(
*
(
const
T
*
)((
const
char
*
)
data
+
y
*
step
))
:
val
;
}
__host__
__device__
__forceinline__
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
const
int
height
;
const
D
val
;
};
template
<
typename
D
>
struct
BrdCo
lReflect101
:
BrdReflect101
template
<
typename
D
>
struct
BrdCo
nstant
{
__host__
__device__
__forceinline__
BrdColReflect101
(
int
len
,
size_t
step
)
:
BrdReflect101
(
len
),
step
(
step
)
{}
typedef
D
result_type
;
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
__host__
__device__
__forceinline__
BrdConstant
(
int
height_
,
int
width_
,
const
D
&
val_
=
VecTraits
<
D
>::
all
(
0
))
:
height
(
height_
),
width
(
width_
),
val
(
val_
)
{
return
saturate_cast
<
D
>
(
*
(
const
D
*
)((
const
char
*
)
data
+
idx_low
(
i
)
*
step
));
}
template
<
typename
T
>
__device__
__forceinline__
D
at
_high
(
int
i
,
const
T
*
data
)
const
template
<
typename
T
>
__device__
__forceinline__
D
at
(
int
y
,
int
x
,
const
T
*
data
,
size_t
step
)
const
{
return
saturate_cast
<
D
>
(
*
(
const
D
*
)((
const
char
*
)
data
+
idx_high
(
i
)
*
step
))
;
return
(
x
>=
0
&&
x
<
width
&&
y
>=
0
&&
y
<
height
)
?
saturate_cast
<
D
>
(((
const
T
*
)((
const
uchar
*
)
data
+
y
*
step
))[
x
])
:
val
;
}
const
size_t
step
;
template
<
typename
Ptr2D
>
__device__
__forceinline__
D
at
(
typename
Ptr2D
::
index_type
y
,
typename
Ptr2D
::
index_type
x
,
const
Ptr2D
&
src
)
const
{
return
(
x
>=
0
&&
x
<
width
&&
y
>=
0
&&
y
<
height
)
?
saturate_cast
<
D
>
(
src
(
y
,
x
))
:
val
;
}
const
int
height
;
const
int
width
;
const
D
val
;
};
struct
BrdReplicate
//////////////////////////////////////////////////////////////
// BrdReplicate
template
<
typename
D
>
struct
BrdRowReplicate
{
explicit
__host__
__device__
__forceinline__
BrdReplicate
(
int
len
)
:
last
(
len
-
1
)
{}
typedef
D
result_type
;
explicit
__host__
__device__
__forceinline__
BrdRowReplicate
(
int
width
)
:
last_col
(
width
-
1
)
{}
template
<
typename
U
>
__host__
__device__
__forceinline__
BrdRowReplicate
(
int
width
,
U
)
:
last_col
(
width
-
1
)
{}
__device__
__forceinline__
int
idx_col_low
(
int
x
)
const
{
return
::
max
(
x
,
0
);
}
__device__
__forceinline__
int
idx_col_high
(
int
x
)
const
{
return
::
min
(
x
,
last_col
);
}
__device__
__forceinline__
int
idx_col
(
int
x
)
const
{
return
idx_col_low
(
idx_col_high
(
x
));
}
__device__
__forceinline__
int
idx_low
(
int
i
)
const
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
x
,
const
T
*
data
)
const
{
return
::
max
(
i
,
0
);
return
saturate_cast
<
D
>
(
data
[
idx_col_low
(
x
)]
);
}
__device__
__forceinline__
int
idx_high
(
int
i
)
const
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
x
,
const
T
*
data
)
const
{
return
::
min
(
i
,
last
);
return
saturate_cast
<
D
>
(
data
[
idx_col_high
(
x
)]
);
}
__device__
__forceinline__
int
idx
(
int
i
)
const
template
<
typename
T
>
__device__
__forceinline__
D
at
(
int
x
,
const
T
*
data
)
const
{
return
idx_low
(
idx_high
(
i
)
);
return
saturate_cast
<
D
>
(
data
[
idx_col
(
x
)]
);
}
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
...
...
@@ -131,103 +180,328 @@ namespace cv { namespace gpu { namespace device
return
true
;
}
const
int
last
;
const
int
last
_col
;
};
template
<
typename
D
>
struct
Brd
RowReplicate
:
Brd
Replicate
template
<
typename
D
>
struct
Brd
Col
Replicate
{
explicit
__host__
__device__
__forceinline__
BrdRowReplicate
(
int
len
)
:
BrdReplicate
(
len
)
{}
typedef
D
result_type
;
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
explicit
__host__
__device__
__forceinline__
BrdColReplicate
(
int
height
)
:
last_row
(
height
-
1
)
{}
template
<
typename
U
>
__host__
__device__
__forceinline__
BrdColReplicate
(
int
height
,
U
)
:
last_row
(
height
-
1
)
{}
__device__
__forceinline__
int
idx_row_low
(
int
y
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_low
(
i
)]
);
return
::
max
(
y
,
0
);
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
__device__
__forceinline__
int
idx_row_high
(
int
y
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_high
(
i
)]);
return
::
min
(
y
,
last_row
);
}
__device__
__forceinline__
int
idx_row
(
int
y
)
const
{
return
idx_row_low
(
idx_row_high
(
y
));
}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
y
,
const
T
*
data
,
size_t
step
)
const
{
return
saturate_cast
<
D
>
(
*
(
const
T
*
)((
const
char
*
)
data
+
idx_row_low
(
y
)
*
step
));
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
y
,
const
T
*
data
,
size_t
step
)
const
{
return
saturate_cast
<
D
>
(
*
(
const
T
*
)((
const
char
*
)
data
+
idx_row_high
(
y
)
*
step
));
}
};
template
<
typename
T
>
__device__
__forceinline__
D
at
(
int
y
,
const
T
*
data
,
size_t
step
)
const
{
return
saturate_cast
<
D
>
(
*
(
const
T
*
)((
const
char
*
)
data
+
idx_row
(
y
)
*
step
));
}
template
<
typename
D
>
struct
BrdColReplicate
:
BrdReplicate
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
}
const
int
last_row
;
};
template
<
typename
D
>
struct
BrdReplicate
{
__host__
__device__
__forceinline__
BrdColReplicate
(
int
len
,
size_t
step
)
:
BrdReplicate
(
len
),
step
(
step
)
{}
typedef
D
result_type
;
__host__
__device__
__forceinline__
BrdReplicate
(
int
height
,
int
width
)
:
last_row
(
height
-
1
),
last_col
(
width
-
1
)
{
}
template
<
typename
U
>
__host__
__device__
__forceinline__
BrdReplicate
(
int
height
,
int
width
,
U
)
:
last_row
(
height
-
1
),
last_col
(
width
-
1
)
{
}
__device__
__forceinline__
int
idx_row_low
(
int
y
)
const
{
return
::
max
(
y
,
0
);
}
__device__
__forceinline__
float
idx_row_low
(
float
y
)
const
{
return
::
fmax
(
y
,
0.0
f
);
}
__device__
__forceinline__
int
idx_row_high
(
int
y
)
const
{
return
::
min
(
y
,
last_row
);
}
__device__
__forceinline__
float
idx_row_high
(
float
y
)
const
{
return
::
fmin
(
y
,
last_row
);
}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
__device__
__forceinline__
int
idx_row
(
int
y
)
const
{
return
idx_row_low
(
idx_row_high
(
y
));
}
__device__
__forceinline__
float
idx_row
(
float
y
)
const
{
return
idx_row_low
(
idx_row_high
(
y
));
}
__device__
__forceinline__
int
idx_col_low
(
int
x
)
const
{
return
::
max
(
x
,
0
);
}
__device__
__forceinline__
float
idx_col_low
(
float
x
)
const
{
return
::
fmax
(
x
,
0
);
}
__device__
__forceinline__
int
idx_col_high
(
int
x
)
const
{
return
::
min
(
x
,
last_col
);
}
__device__
__forceinline__
float
idx_col_high
(
float
x
)
const
{
return
::
fmin
(
x
,
last_col
);
}
__device__
__forceinline__
int
idx_col
(
int
x
)
const
{
return
idx_col_low
(
idx_col_high
(
x
));
}
__device__
__forceinline__
float
idx_col
(
float
x
)
const
{
return
saturate_cast
<
D
>
(
*
(
const
D
*
)((
const
char
*
)
data
+
idx_low
(
i
)
*
step
));
return
idx_col_low
(
idx_col_high
(
x
));
}
template
<
typename
T
>
__device__
__forceinline__
D
at
_high
(
int
i
,
const
T
*
data
)
const
template
<
typename
T
>
__device__
__forceinline__
D
at
(
int
y
,
int
x
,
const
T
*
data
,
size_t
step
)
const
{
return
saturate_cast
<
D
>
(
*
(
const
D
*
)((
const
char
*
)
data
+
idx_high
(
i
)
*
step
)
);
return
saturate_cast
<
D
>
(
((
const
T
*
)((
const
char
*
)
data
+
idx_row
(
y
)
*
step
))[
idx_col
(
x
)]
);
}
const
size_t
step
;
template
<
typename
Ptr2D
>
__device__
__forceinline__
D
at
(
typename
Ptr2D
::
index_type
y
,
typename
Ptr2D
::
index_type
x
,
const
Ptr2D
&
src
)
const
{
return
saturate_cast
<
D
>
(
src
(
idx_row
(
y
),
idx_col
(
x
)));
}
const
int
last_row
;
const
int
last_col
;
};
template
<
typename
D
>
struct
BrdRowConstant
//////////////////////////////////////////////////////////////
// BrdReflect101
template
<
typename
D
>
struct
BrdRowReflect101
{
explicit
__host__
__device__
__forceinline__
BrdRowConstant
(
int
len_
,
const
D
&
val_
=
VecTraits
<
D
>::
all
(
0
))
:
len
(
len_
),
val
(
val_
)
{}
typedef
D
result_type
;
explicit
__host__
__device__
__forceinline__
BrdRowReflect101
(
int
width
)
:
last_col
(
width
-
1
)
{}
template
<
typename
U
>
__host__
__device__
__forceinline__
BrdRowReflect101
(
int
width
,
U
)
:
last_col
(
width
-
1
)
{}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
__device__
__forceinline__
int
idx_col_low
(
int
x
)
const
{
return
i
>=
0
?
saturate_cast
<
D
>
(
data
[
i
])
:
val
;
return
::
abs
(
x
)
;
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
__device__
__forceinline__
int
idx_col_high
(
int
x
)
const
{
return
i
<
len
?
saturate_cast
<
D
>
(
data
[
i
])
:
val
;
return
last_col
-
::
abs
(
last_col
-
x
);
}
__device__
__forceinline__
int
idx_col
(
int
x
)
const
{
return
idx_col_low
(
idx_col_high
(
x
));
}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
x
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_col_low
(
x
)]);
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
x
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_col_high
(
x
)]);
}
template
<
typename
T
>
__device__
__forceinline__
D
at
(
int
x
,
const
T
*
data
)
const
{
return
saturate_cast
<
D
>
(
data
[
idx_col
(
x
)]);
}
__host__
__device__
__forceinline__
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
return
-
last_col
<=
mini
&&
maxi
<=
2
*
last_col
;
}
const
int
len
;
const
D
val
;
const
int
last_col
;
};
template
<
typename
D
>
struct
BrdCol
Constant
template
<
typename
D
>
struct
BrdCol
Reflect101
{
__host__
__device__
__forceinline__
BrdColConstant
(
int
len_
,
size_t
step_
,
const
D
&
val_
=
VecTraits
<
D
>::
all
(
0
))
:
len
(
len_
),
step
(
step_
),
val
(
val_
)
{}
typedef
D
result_type
;
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
i
,
const
T
*
data
)
const
explicit
__host__
__device__
__forceinline__
BrdColReflect101
(
int
height
)
:
last_row
(
height
-
1
)
{}
template
<
typename
U
>
__host__
__device__
__forceinline__
BrdColReflect101
(
int
height
,
U
)
:
last_row
(
height
-
1
)
{}
__device__
__forceinline__
int
idx_row_low
(
int
y
)
const
{
return
::
abs
(
y
);
}
__device__
__forceinline__
int
idx_row_high
(
int
y
)
const
{
return
i
>=
0
?
saturate_cast
<
D
>
(
*
(
const
D
*
)((
const
char
*
)
data
+
i
*
step
))
:
val
;
return
last_row
-
::
abs
(
last_row
-
y
)
;
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
i
,
const
T
*
data
)
const
__device__
__forceinline__
int
idx_row
(
int
y
)
const
{
return
i
<
len
?
saturate_cast
<
D
>
(
*
(
const
D
*
)((
const
char
*
)
data
+
i
*
step
))
:
val
;
return
idx_row_low
(
idx_row_high
(
y
));
}
template
<
typename
T
>
__device__
__forceinline__
D
at_low
(
int
y
,
const
T
*
data
,
size_t
step
)
const
{
return
saturate_cast
<
D
>
(
*
(
const
D
*
)((
const
char
*
)
data
+
idx_row_low
(
y
)
*
step
));
}
template
<
typename
T
>
__device__
__forceinline__
D
at_high
(
int
y
,
const
T
*
data
,
size_t
step
)
const
{
return
saturate_cast
<
D
>
(
*
(
const
D
*
)((
const
char
*
)
data
+
idx_row_high
(
y
)
*
step
));
}
template
<
typename
T
>
__device__
__forceinline__
D
at
(
int
y
,
const
T
*
data
,
size_t
step
)
const
{
return
saturate_cast
<
D
>
(
*
(
const
D
*
)((
const
char
*
)
data
+
idx_row
(
y
)
*
step
));
}
__host__
__device__
__forceinline__
bool
is_range_safe
(
int
mini
,
int
maxi
)
const
{
return
true
;
return
-
last_row
<=
mini
&&
maxi
<=
2
*
last_row
;
}
const
int
len
;
const
size_t
step
;
const
D
val
;
const
int
last_row
;
};
template
<
typename
OutT
>
struct
BrdConstant
template
<
typename
D
>
struct
BrdReflect101
{
__host__
__device__
__forceinline__
BrdConstant
(
int
w
,
int
h
,
const
OutT
&
val
=
VecTraits
<
OutT
>::
all
(
0
))
:
w
(
w
),
h
(
h
),
val
(
val
)
{}
typedef
D
result_type
;
__host__
__device__
__forceinline__
BrdReflect101
(
int
height
,
int
width
)
:
last_row
(
height
-
1
),
last_col
(
width
-
1
)
{
}
template
<
typename
U
>
__host__
__device__
__forceinline__
BrdReflect101
(
int
height
,
int
width
,
U
)
:
last_row
(
height
-
1
),
last_col
(
width
-
1
)
{
}
__device__
__forceinline__
int
idx_row_low
(
int
y
)
const
{
return
::
abs
(
y
);
}
__device__
__forceinline__
float
idx_row_low
(
float
y
)
const
{
return
::
fabs
(
y
);
}
__device__
__forceinline__
int
idx_row_high
(
int
y
)
const
{
return
last_row
-
::
abs
(
last_row
-
y
);
}
__device__
__forceinline__
float
idx_row_high
(
float
y
)
const
{
return
last_row
-
::
fabs
(
last_row
-
y
);
}
__device__
__forceinline__
int
idx_row
(
int
y
)
const
{
return
idx_row_low
(
idx_row_high
(
y
));
}
__device__
__forceinline__
float
idx_row
(
float
y
)
const
{
return
idx_row_low
(
idx_row_high
(
y
));
}
__device__
__forceinline__
int
idx_col_low
(
int
x
)
const
{
return
::
abs
(
x
);
}
__device__
__forceinline__
float
idx_col_low
(
float
x
)
const
{
return
::
fabs
(
x
);
}
__device__
__forceinline__
int
idx_col_high
(
int
x
)
const
{
return
last_col
-
::
abs
(
last_col
-
x
);
}
__device__
__forceinline__
float
idx_col_high
(
float
x
)
const
{
return
last_col
-
::
fabs
(
last_col
-
x
);
}
__device__
__forceinline__
int
idx_col
(
int
x
)
const
{
return
idx_col_low
(
idx_col_high
(
x
));
}
__device__
__forceinline__
float
idx_col
(
float
x
)
const
{
return
idx_col_low
(
idx_col_high
(
x
));
}
template
<
typename
T
>
__device__
__forceinline__
D
at
(
int
y
,
int
x
,
const
T
*
data
,
size_t
step
)
const
{
return
saturate_cast
<
D
>
(((
const
T
*
)((
const
char
*
)
data
+
idx_row
(
y
)
*
step
))[
idx_col
(
x
)]);
}
template
<
typename
Ptr2D
>
__device__
__forceinline__
D
at
(
typename
Ptr2D
::
index_type
y
,
typename
Ptr2D
::
index_type
x
,
const
Ptr2D
&
src
)
const
{
return
saturate_cast
<
D
>
(
src
(
idx_row
(
y
),
idx_col
(
x
)));
}
const
int
last_row
;
const
int
last_col
;
};
//////////////////////////////////////////////////////////////
// BorderReader
template
<
typename
Ptr2D
,
typename
B
>
struct
BorderReader
{
typedef
typename
B
::
result_type
elem_type
;
typedef
typename
Ptr2D
::
index_type
index_type
;
__host__
__device__
__forceinline__
BorderReader
(
const
Ptr2D
&
ptr_
,
const
B
&
b_
)
:
ptr
(
ptr_
),
b
(
b_
)
{}
__device__
__forceinline__
OutT
at
(
int
x
,
int
y
,
const
uchar
*
data
,
int
step
)
const
__device__
__forceinline__
elem_type
operator
()(
index_type
y
,
index_type
x
)
const
{
if
(
x
>=
0
&&
x
<=
w
-
1
&&
y
>=
0
&&
y
<=
h
-
1
)
return
((
const
OutT
*
)(
data
+
y
*
step
))[
x
];
return
val
;
return
b
.
at
(
y
,
x
,
ptr
);
}
const
int
w
;
const
int
h
;
OutT
val
;
const
Ptr2D
ptr
;
const
B
b
;
};
}}}
...
...
modules/gpu/src/opencv2/gpu/device/utility.hpp
View file @
47d68f69
...
...
@@ -309,7 +309,6 @@ namespace cv { namespace gpu { namespace device
U
vec1Vals
[
MAX_LEN
/
THREAD_DIM
];
};
///////////////////////////////////////////////////////////////////////////////
// Solve linear system
...
...
@@ -364,6 +363,60 @@ namespace cv { namespace gpu { namespace device
return
false
;
}
///////////////////////////////////////////////////////////////////////////////
// Filters
template
<
typename
Ptr2D
>
struct
PointFilter
{
typedef
typename
Ptr2D
::
elem_type
elem_type
;
typedef
float
index_type
;
explicit
__host__
__device__
__forceinline__
PointFilter
(
const
Ptr2D
&
src_
)
:
src
(
src_
)
{}
__device__
__forceinline__
elem_type
operator
()(
float
y
,
float
x
)
const
{
return
src
(
__float2int_rn
(
y
),
__float2int_rn
(
x
));
}
const
Ptr2D
src
;
};
template
<
typename
Ptr2D
>
struct
LinearFilter
{
typedef
typename
Ptr2D
::
elem_type
elem_type
;
typedef
float
index_type
;
explicit
__host__
__device__
__forceinline__
LinearFilter
(
const
Ptr2D
&
src_
)
:
src
(
src_
)
{}
__device__
__forceinline__
elem_type
operator
()(
float
y
,
float
x
)
const
{
typedef
typename
TypeVec
<
float
,
VecTraits
<
elem_type
>::
cn
>::
vec_type
work_type
;
work_type
out
=
VecTraits
<
work_type
>::
all
(
0
);
const
int
x1
=
__float2int_rd
(
x
);
const
int
y1
=
__float2int_rd
(
y
);
const
int
x2
=
x1
+
1
;
const
int
y2
=
y1
+
1
;
elem_type
src_reg
=
src
(
y1
,
x1
);
out
=
out
+
src_reg
*
((
x2
-
x
)
*
(
y2
-
y
));
src_reg
=
src
(
y1
,
x2
);
out
=
out
+
src_reg
*
((
x
-
x1
)
*
(
y2
-
y
));
src_reg
=
src
(
y2
,
x1
);
out
=
out
+
src_reg
*
((
x2
-
x
)
*
(
y
-
y1
));
src_reg
=
src
(
y2
,
x2
);
out
=
out
+
src_reg
*
((
x
-
x1
)
*
(
y
-
y1
));
return
saturate_cast
<
elem_type
>
(
out
);
}
const
Ptr2D
src
;
};
}}}
#endif // __OPENCV_GPU_UTILITY_HPP__
modules/gpu/src/opencv2/gpu/device/vec_traits.hpp
View file @
47d68f69
...
...
@@ -166,6 +166,7 @@ namespace cv { namespace gpu { namespace device
enum
{
cn
=
1
};
\
static
__device__
__host__
__forceinline__
type
all
(
type
v
)
{
return
v
;}
\
static
__device__
__host__
__forceinline__
type
make
(
type
x
)
{
return
x
;}
\
static
__device__
__host__
__forceinline__
type
make
(
const
type
*
v
)
{
return
*
v
;}
\
};
\
template
<>
struct
VecTraits
<
type
##
1
>
\
{
\
...
...
@@ -173,6 +174,7 @@ namespace cv { namespace gpu { namespace device
enum
{
cn
=
1
};
\
static
__device__
__host__
__forceinline__
type
##
1
all
(
type
v
)
{
return
make_
##
type
##
1
(
v
);}
\
static
__device__
__host__
__forceinline__
type
##
1
make
(
type
x
)
{
return
make_
##
type
##
1
(
x
);}
\
static
__device__
__host__
__forceinline__
type
##
1
make
(
const
type
*
v
)
{
return
make_
##
type
##
1
(
*
v
);}
\
};
\
template
<>
struct
VecTraits
<
type
##
2
>
\
{
\
...
...
@@ -180,6 +182,7 @@ namespace cv { namespace gpu { namespace device
enum
{
cn
=
2
};
\
static
__device__
__host__
__forceinline__
type
##
2
all
(
type
v
)
{
return
make_
##
type
##
2
(
v
,
v
);}
\
static
__device__
__host__
__forceinline__
type
##
2
make
(
type
x
,
type
y
)
{
return
make_
##
type
##
2
(
x
,
y
);}
\
static
__device__
__host__
__forceinline__
type
##
2
make
(
const
type
*
v
)
{
return
make_
##
type
##
2
(
v
[
0
],
v
[
1
]);}
\
};
\
template
<>
struct
VecTraits
<
type
##
3
>
\
{
\
...
...
@@ -187,6 +190,7 @@ namespace cv { namespace gpu { namespace device
enum
{
cn
=
3
};
\
static
__device__
__host__
__forceinline__
type
##
3
all
(
type
v
)
{
return
make_
##
type
##
3
(
v
,
v
,
v
);}
\
static
__device__
__host__
__forceinline__
type
##
3
make
(
type
x
,
type
y
,
type
z
)
{
return
make_
##
type
##
3
(
x
,
y
,
z
);}
\
static
__device__
__host__
__forceinline__
type
##
3
make
(
const
type
*
v
)
{
return
make_
##
type
##
3
(
v
[
0
],
v
[
1
],
v
[
2
]);}
\
};
\
template
<>
struct
VecTraits
<
type
##
4
>
\
{
\
...
...
@@ -194,6 +198,7 @@ namespace cv { namespace gpu { namespace device
enum
{
cn
=
4
};
\
static
__device__
__host__
__forceinline__
type
##
4
all
(
type
v
)
{
return
make_
##
type
##
4
(
v
,
v
,
v
,
v
);}
\
static
__device__
__host__
__forceinline__
type
##
4
make
(
type
x
,
type
y
,
type
z
,
type
w
)
{
return
make_
##
type
##
4
(
x
,
y
,
z
,
w
);}
\
static
__device__
__host__
__forceinline__
type
##
4
make
(
const
type
*
v
)
{
return
make_
##
type
##
4
(
v
[
0
],
v
[
1
],
v
[
2
],
v
[
3
]);}
\
};
\
template
<>
struct
VecTraits
<
type
##
8
>
\
{
\
...
...
@@ -201,10 +206,10 @@ namespace cv { namespace gpu { namespace device
enum
{
cn
=
8
};
\
static
__device__
__host__
__forceinline__
type
##
8
all
(
type
v
)
{
return
make_
##
type
##
8
(
v
,
v
,
v
,
v
,
v
,
v
,
v
,
v
);}
\
static
__device__
__host__
__forceinline__
type
##
8
make
(
type
a0
,
type
a1
,
type
a2
,
type
a3
,
type
a4
,
type
a5
,
type
a6
,
type
a7
)
{
return
make_
##
type
##
8
(
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
);}
\
static
__device__
__host__
__forceinline__
type
##
8
make
(
const
type
*
v
)
{
return
make_
##
type
##
8
(
v
[
0
],
v
[
1
],
v
[
2
],
v
[
3
],
v
[
4
],
v
[
5
],
v
[
6
],
v
[
7
]);}
\
};
OPENCV_GPU_IMPLEMENT_VEC_TRAITS
(
uchar
)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS
(
char
)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS
(
ushort
)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS
(
short
)
OPENCV_GPU_IMPLEMENT_VEC_TRAITS
(
int
)
...
...
@@ -214,12 +219,61 @@ namespace cv { namespace gpu { namespace device
#undef OPENCV_GPU_IMPLEMENT_VEC_TRAITS
template
<>
struct
VecTraits
<
char
>
{
typedef
char
elem_type
;
enum
{
cn
=
1
};
static
__device__
__host__
__forceinline__
char
all
(
char
v
)
{
return
v
;}
static
__device__
__host__
__forceinline__
char
make
(
char
x
)
{
return
x
;}
static
__device__
__host__
__forceinline__
char
make
(
const
char
*
x
)
{
return
*
x
;}
};
template
<>
struct
VecTraits
<
schar
>
{
typedef
schar
elem_type
;
enum
{
cn
=
1
};
static
__device__
__host__
__forceinline__
schar
all
(
schar
v
)
{
return
v
;}
static
__device__
__host__
__forceinline__
schar
make
(
schar
x
)
{
return
x
;}
static
__device__
__host__
__forceinline__
schar
make
(
const
schar
*
x
)
{
return
*
x
;}
};
template
<>
struct
VecTraits
<
char1
>
{
typedef
schar
elem_type
;
enum
{
cn
=
1
};
static
__device__
__host__
__forceinline__
char1
all
(
schar
v
)
{
return
make_char1
(
v
);}
static
__device__
__host__
__forceinline__
char1
make
(
schar
x
)
{
return
make_char1
(
x
);}
static
__device__
__host__
__forceinline__
char1
make
(
const
schar
*
v
)
{
return
make_char1
(
v
[
0
]);}
};
template
<>
struct
VecTraits
<
char2
>
{
typedef
schar
elem_type
;
enum
{
cn
=
2
};
static
__device__
__host__
__forceinline__
char2
all
(
schar
v
)
{
return
make_char2
(
v
,
v
);}
static
__device__
__host__
__forceinline__
char2
make
(
schar
x
,
schar
y
)
{
return
make_char2
(
x
,
y
);}
static
__device__
__host__
__forceinline__
char2
make
(
const
schar
*
v
)
{
return
make_char2
(
v
[
0
],
v
[
1
]);}
};
template
<>
struct
VecTraits
<
char3
>
{
typedef
schar
elem_type
;
enum
{
cn
=
3
};
static
__device__
__host__
__forceinline__
char3
all
(
schar
v
)
{
return
make_char3
(
v
,
v
,
v
);}
static
__device__
__host__
__forceinline__
char3
make
(
schar
x
,
schar
y
,
schar
z
)
{
return
make_char3
(
x
,
y
,
z
);}
static
__device__
__host__
__forceinline__
char3
make
(
const
schar
*
v
)
{
return
make_char3
(
v
[
0
],
v
[
1
],
v
[
2
]);}
};
template
<>
struct
VecTraits
<
char4
>
{
typedef
schar
elem_type
;
enum
{
cn
=
4
};
static
__device__
__host__
__forceinline__
char4
all
(
schar
v
)
{
return
make_char4
(
v
,
v
,
v
,
v
);}
static
__device__
__host__
__forceinline__
char4
make
(
schar
x
,
schar
y
,
schar
z
,
schar
w
)
{
return
make_char4
(
x
,
y
,
z
,
w
);}
static
__device__
__host__
__forceinline__
char4
make
(
const
schar
*
v
)
{
return
make_char4
(
v
[
0
],
v
[
1
],
v
[
2
],
v
[
3
]);}
};
template
<>
struct
VecTraits
<
char8
>
{
typedef
schar
elem_type
;
enum
{
cn
=
8
};
static
__device__
__host__
__forceinline__
char8
all
(
schar
v
)
{
return
make_char8
(
v
,
v
,
v
,
v
,
v
,
v
,
v
,
v
);}
static
__device__
__host__
__forceinline__
char8
make
(
schar
a0
,
schar
a1
,
schar
a2
,
schar
a3
,
schar
a4
,
schar
a5
,
schar
a6
,
schar
a7
)
{
return
make_char8
(
a0
,
a1
,
a2
,
a3
,
a4
,
a5
,
a6
,
a7
);}
static
__device__
__host__
__forceinline__
char8
make
(
const
schar
*
v
)
{
return
make_char8
(
v
[
0
],
v
[
1
],
v
[
2
],
v
[
3
],
v
[
4
],
v
[
5
],
v
[
6
],
v
[
7
]);}
};
}}}
...
...
modules/gpu/test/test_imgproc.cpp
View file @
47d68f69
...
...
@@ -181,15 +181,18 @@ INSTANTIATE_TEST_CASE_P(ImgProc, Resize, testing::Combine(
///////////////////////////////////////////////////////////////////////////////////////////////////////
// remap
struct
Remap
:
testing
::
TestWithParam
<
std
::
tr1
::
tuple
<
cv
::
gpu
::
DeviceInfo
,
int
>
>
struct
Remap
:
testing
::
TestWithParam
<
std
::
tr1
::
tuple
<
cv
::
gpu
::
DeviceInfo
,
int
,
int
,
int
>
>
{
cv
::
gpu
::
DeviceInfo
devInfo
;
int
type
;
int
interpolation
;
int
borderType
;
cv
::
Size
size
;
cv
::
Mat
src
;
cv
::
Mat
xmap
;
cv
::
Mat
ymap
;
cv
::
Scalar
borderValue
;
cv
::
Mat
dst_gold
;
...
...
@@ -197,43 +200,83 @@ struct Remap : testing::TestWithParam< std::tr1::tuple<cv::gpu::DeviceInfo, int>
{
devInfo
=
std
::
tr1
::
get
<
0
>
(
GetParam
());
type
=
std
::
tr1
::
get
<
1
>
(
GetParam
());
interpolation
=
std
::
tr1
::
get
<
2
>
(
GetParam
());
borderType
=
std
::
tr1
::
get
<
3
>
(
GetParam
());
cv
::
gpu
::
setDevice
(
devInfo
.
deviceID
());
cv
::
RNG
&
rng
=
cvtest
::
TS
::
ptr
()
->
get_rng
();
size
=
cv
::
Size
(
rng
.
uniform
(
20
,
150
),
rng
.
uniform
(
20
,
15
0
));
size
=
cv
::
Size
(
rng
.
uniform
(
100
,
200
),
rng
.
uniform
(
100
,
20
0
));
src
=
cvtest
::
randomMat
(
rng
,
size
,
type
,
0.0
,
127.0
,
false
);
xmap
=
cvtest
::
randomMat
(
rng
,
size
,
CV_32FC1
,
0.0
,
src
.
cols
-
1
,
false
);
ymap
=
cvtest
::
randomMat
(
rng
,
size
,
CV_32FC1
,
0.0
,
src
.
rows
-
1
,
false
);
src
=
cvtest
::
randomMat
(
rng
,
size
,
type
,
0.0
,
256.0
,
false
);
xmap
.
create
(
size
,
CV_32FC1
);
ymap
.
create
(
size
,
CV_32FC1
);
for
(
int
y
=
0
;
y
<
src
.
rows
;
++
y
)
{
float
*
xmap_row
=
xmap
.
ptr
<
float
>
(
y
);
float
*
ymap_row
=
ymap
.
ptr
<
float
>
(
y
);
for
(
int
x
=
0
;
x
<
src
.
cols
;
++
x
)
{
xmap_row
[
x
]
=
src
.
cols
-
1
-
x
;
ymap_row
[
x
]
=
src
.
rows
-
1
-
y
;
}
}
borderValue
[
0
]
=
rng
.
uniform
(
0.0
,
256.0
);
borderValue
[
1
]
=
rng
.
uniform
(
0.0
,
256.0
);
borderValue
[
2
]
=
rng
.
uniform
(
0.0
,
256.0
);
borderValue
[
3
]
=
rng
.
uniform
(
0.0
,
256.0
);
cv
::
remap
(
src
,
dst_gold
,
xmap
,
ymap
,
cv
::
INTER_LINEAR
,
cv
::
BORDER_WRAP
);
cv
::
remap
(
src
,
dst_gold
,
xmap
,
ymap
,
interpolation
,
borderType
,
borderValue
);
}
};
TEST_P
(
Remap
,
Accuracy
)
{
static
const
char
*
interpolations_str
[]
=
{
"INTER_NEAREST"
,
"INTER_LINEAR"
,
"INTER_CUBIC"
};
static
const
char
*
borderTypes_str
[]
=
{
"BORDER_CONSTANT"
,
"BORDER_REPLICATE"
,
"BORDER_REFLECT"
,
"BORDER_WRAP"
,
"BORDER_REFLECT_101"
};
const
char
*
interpolationStr
=
interpolations_str
[
interpolation
];
const
char
*
borderTypeStr
=
borderTypes_str
[
borderType
];
PRINT_PARAM
(
devInfo
);
PRINT_TYPE
(
type
);
PRINT_PARAM
(
interpolationStr
);
PRINT_PARAM
(
borderTypeStr
);
PRINT_PARAM
(
size
);
PRINT_PARAM
(
borderValue
);
cv
::
Mat
dst
;
ASSERT_NO_THROW
(
cv
::
gpu
::
GpuMat
gpuRes
;
cv
::
gpu
::
remap
(
cv
::
gpu
::
GpuMat
(
src
),
gpuRes
,
cv
::
gpu
::
GpuMat
(
xmap
),
cv
::
gpu
::
GpuMat
(
ymap
));
cv
::
gpu
::
remap
(
cv
::
gpu
::
GpuMat
(
src
),
gpuRes
,
cv
::
gpu
::
GpuMat
(
xmap
),
cv
::
gpu
::
GpuMat
(
ymap
)
,
interpolation
,
borderType
,
borderValue
);
gpuRes
.
download
(
dst
);
);
EXPECT_MAT_
SIMILAR
(
dst_gold
,
dst
,
0.
5
);
EXPECT_MAT_
NEAR
(
dst_gold
,
dst
,
1e-
5
);
}
INSTANTIATE_TEST_CASE_P
(
ImgProc
,
Remap
,
testing
::
Combine
(
testing
::
ValuesIn
(
devices
()),
testing
::
Values
(
CV_8UC1
,
CV_8UC3
)));
INSTANTIATE_TEST_CASE_P
(
ImgProc
,
Remap
,
testing
::
Combine
(
testing
::
ValuesIn
(
devices
()),
testing
::
Values
(
CV_8UC1
,
CV_8UC3
,
CV_8UC4
,
CV_32FC1
,
CV_32FC3
,
CV_32FC4
),
testing
::
Values
(
cv
::
INTER_NEAREST
,
cv
::
INTER_LINEAR
),
testing
::
Values
(
cv
::
BORDER_REFLECT101
,
cv
::
BORDER_REPLICATE
,
cv
::
BORDER_CONSTANT
)
)
);
///////////////////////////////////////////////////////////////////////////////////////////////////////
// copyMakeBorder
...
...
samples/gpu/performance/tests.cpp
View file @
47d68f69
...
...
@@ -79,9 +79,9 @@ TEST(remap)
Mat
src
,
dst
,
xmap
,
ymap
;
gpu
::
GpuMat
d_src
,
d_dst
,
d_xmap
,
d_ymap
;
for
(
int
size
=
1000
;
size
<=
8
000
;
size
*=
2
)
for
(
int
size
=
1000
;
size
<=
4
000
;
size
*=
2
)
{
SUBTEST
<<
"src "
<<
size
<<
"
and 8U, 32F maps
"
;
SUBTEST
<<
"src "
<<
size
<<
"
, 8UC1
"
;
gen
(
src
,
size
,
size
,
CV_8UC1
,
0
,
256
);
...
...
@@ -101,7 +101,112 @@ TEST(remap)
dst
.
create
(
xmap
.
size
(),
src
.
type
());
CPU_ON
;
remap
(
src
,
dst
,
xmap
,
ymap
,
INTER_LINEAR
);
remap
(
src
,
dst
,
xmap
,
ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
CPU_OFF
;
d_src
=
src
;
d_xmap
=
xmap
;
d_ymap
=
ymap
;
d_dst
.
create
(
d_xmap
.
size
(),
d_src
.
type
());
GPU_ON
;
gpu
::
remap
(
d_src
,
d_dst
,
d_xmap
,
d_ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
GPU_OFF
;
}
for
(
int
size
=
1000
;
size
<=
4000
;
size
*=
2
)
{
SUBTEST
<<
"src "
<<
size
<<
", 8UC3"
;
gen
(
src
,
size
,
size
,
CV_8UC3
,
0
,
256
);
xmap
.
create
(
size
,
size
,
CV_32F
);
ymap
.
create
(
size
,
size
,
CV_32F
);
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
float
*
xmap_row
=
xmap
.
ptr
<
float
>
(
i
);
float
*
ymap_row
=
ymap
.
ptr
<
float
>
(
i
);
for
(
int
j
=
0
;
j
<
size
;
++
j
)
{
xmap_row
[
j
]
=
(
j
-
size
*
0.5
f
)
*
0.75
f
+
size
*
0.5
f
;
ymap_row
[
j
]
=
(
i
-
size
*
0.5
f
)
*
0.75
f
+
size
*
0.5
f
;
}
}
dst
.
create
(
xmap
.
size
(),
src
.
type
());
CPU_ON
;
remap
(
src
,
dst
,
xmap
,
ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
CPU_OFF
;
d_src
=
src
;
d_xmap
=
xmap
;
d_ymap
=
ymap
;
d_dst
.
create
(
d_xmap
.
size
(),
d_src
.
type
());
GPU_ON
;
gpu
::
remap
(
d_src
,
d_dst
,
d_xmap
,
d_ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
GPU_OFF
;
}
for
(
int
size
=
1000
;
size
<=
4000
;
size
*=
2
)
{
SUBTEST
<<
"src "
<<
size
<<
", 8UC4"
;
gen
(
src
,
size
,
size
,
CV_8UC4
,
0
,
256
);
xmap
.
create
(
size
,
size
,
CV_32F
);
ymap
.
create
(
size
,
size
,
CV_32F
);
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
float
*
xmap_row
=
xmap
.
ptr
<
float
>
(
i
);
float
*
ymap_row
=
ymap
.
ptr
<
float
>
(
i
);
for
(
int
j
=
0
;
j
<
size
;
++
j
)
{
xmap_row
[
j
]
=
(
j
-
size
*
0.5
f
)
*
0.75
f
+
size
*
0.5
f
;
ymap_row
[
j
]
=
(
i
-
size
*
0.5
f
)
*
0.75
f
+
size
*
0.5
f
;
}
}
dst
.
create
(
xmap
.
size
(),
src
.
type
());
CPU_ON
;
remap
(
src
,
dst
,
xmap
,
ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
CPU_OFF
;
d_src
=
src
;
d_xmap
=
xmap
;
d_ymap
=
ymap
;
d_dst
.
create
(
d_xmap
.
size
(),
d_src
.
type
());
GPU_ON
;
gpu
::
remap
(
d_src
,
d_dst
,
d_xmap
,
d_ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
GPU_OFF
;
}
for
(
int
size
=
1000
;
size
<=
4000
;
size
*=
2
)
{
SUBTEST
<<
"src "
<<
size
<<
", 16SC3"
;
gen
(
src
,
size
,
size
,
CV_16SC3
,
0
,
256
);
xmap
.
create
(
size
,
size
,
CV_32F
);
ymap
.
create
(
size
,
size
,
CV_32F
);
for
(
int
i
=
0
;
i
<
size
;
++
i
)
{
float
*
xmap_row
=
xmap
.
ptr
<
float
>
(
i
);
float
*
ymap_row
=
ymap
.
ptr
<
float
>
(
i
);
for
(
int
j
=
0
;
j
<
size
;
++
j
)
{
xmap_row
[
j
]
=
(
j
-
size
*
0.5
f
)
*
0.75
f
+
size
*
0.5
f
;
ymap_row
[
j
]
=
(
i
-
size
*
0.5
f
)
*
0.75
f
+
size
*
0.5
f
;
}
}
dst
.
create
(
xmap
.
size
(),
src
.
type
());
CPU_ON
;
remap
(
src
,
dst
,
xmap
,
ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
CPU_OFF
;
d_src
=
src
;
...
...
@@ -110,7 +215,7 @@ TEST(remap)
d_dst
.
create
(
d_xmap
.
size
(),
d_src
.
type
());
GPU_ON
;
gpu
::
remap
(
d_src
,
d_dst
,
d_xmap
,
d_ymap
);
gpu
::
remap
(
d_src
,
d_dst
,
d_xmap
,
d_ymap
,
INTER_LINEAR
,
BORDER_REPLICATE
);
GPU_OFF
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment