Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
7d0feef7
Commit
7d0feef7
authored
Oct 20, 2010
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added vecmath to gpu module.
parent
0c3803a6
Hide whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
1015 additions
and
207 deletions
+1015
-207
filters.cu
modules/gpu/src/cuda/filters.cu
+117
-50
saturate_cast.hpp
modules/gpu/src/cuda/saturate_cast.hpp
+0
-86
vecmath.hpp
modules/gpu/src/cuda/vecmath.hpp
+824
-21
cudastream.cpp
modules/gpu/src/cudastream.cpp
+1
-1
filtering.cpp
modules/gpu/src/filtering.cpp
+62
-47
imgproc_gpu.cpp
modules/gpu/src/imgproc_gpu.cpp
+1
-1
filters.cpp
tests/gpu/src/filters.cpp
+10
-1
No files found.
modules/gpu/src/cuda/filters.cu
View file @
7d0feef7
...
...
@@ -44,6 +44,7 @@
#include "saturate_cast.hpp"
#include "safe_call.hpp"
#include "cuda_shared.hpp"
#include "vecmath.hpp"
using namespace cv::gpu;
...
...
@@ -71,7 +72,7 @@ namespace cv { namespace gpu { namespace filters
namespace filter_krnls
{
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE,
int CN,
typename T, typename D>
__global__ void linearRowFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)
{
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
...
...
@@ -91,23 +92,24 @@ namespace filter_krnls
{
const T* rowSrc = src + threadY * src_step;
sDataRow[threadIdx.x + blockDim.x] = threadX < width ? rowSrc[threadX] :
0
;
sDataRow[threadIdx.x + blockDim.x] = threadX < width ? rowSrc[threadX] :
VecTraits<T>::all(0)
;
sDataRow[threadIdx.x] = prevThreadX >= 0 ? rowSrc[prevThreadX] :
0
;
sDataRow[threadIdx.x] = prevThreadX >= 0 ? rowSrc[prevThreadX] :
VecTraits<T>::all(0)
;
sDataRow[(blockDim.x << 1) + threadIdx.x] = nextThreadX < width ? rowSrc[nextThreadX] :
0
;
sDataRow[(blockDim.x << 1) + threadIdx.x] = nextThreadX < width ? rowSrc[nextThreadX] :
VecTraits<T>::all(0)
;
__syncthreads();
if (threadX < width)
{
float sum = 0;
typedef typename TypeVec<float, CN>::vec_t sum_t;
sum_t sum = VecTraits<sum_t>::all(0);
sDataRow += threadIdx.x + blockDim.x - anchor;
#pragma unroll
for(int i = 0; i < KERNEL_SIZE; ++i)
sum
+= cLinearKernel[i] * sDataRow
[i];
sum
= sum + sDataRow[i] * cLinearKernel
[i];
dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);
}
...
...
@@ -117,7 +119,7 @@ namespace filter_krnls
namespace cv { namespace gpu { namespace filters
{
template <int KERNEL_SIZE, typename T, typename D>
template <int KERNEL_SIZE,
int CN,
typename T, typename D>
void linearRowFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)
{
const int BLOCK_DIM_X = 16;
...
...
@@ -126,51 +128,83 @@ namespace cv { namespace gpu { namespace filters
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.ptr, src.elem_step,
filter_krnls::linearRowFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE
, CN
><<<blocks, threads>>>(src.ptr, src.elem_step,
dst.ptr, dst.elem_step, anchor, src.cols, src.rows);
cudaSafeCall( cudaThreadSynchronize() );
}
template <typename T, typename D>
template <
int CN,
typename T, typename D>
inline void linearRowFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);
static const caller_t callers[] =
{linearRowFilter_caller<0 ,
T, D>, linearRowFilter_caller<1
, T, D>,
linearRowFilter_caller<2 ,
T, D>, linearRowFilter_caller<3
, T, D>,
linearRowFilter_caller<4 ,
T, D>, linearRowFilter_caller<5
, T, D>,
linearRowFilter_caller<6 ,
T, D>, linearRowFilter_caller<7
, T, D>,
linearRowFilter_caller<8 ,
T, D>, linearRowFilter_caller<9
, T, D>,
linearRowFilter_caller<10,
T, D>, linearRowFilter_caller<11
, T, D>,
linearRowFilter_caller<12,
T, D>, linearRowFilter_caller<13
, T, D>,
linearRowFilter_caller<14,
T, D>, linearRowFilter_caller<15
, T, D>};
{linearRowFilter_caller<0 ,
CN, T, D>, linearRowFilter_caller<1 , CN
, T, D>,
linearRowFilter_caller<2 ,
CN, T, D>, linearRowFilter_caller<3 , CN
, T, D>,
linearRowFilter_caller<4 ,
CN, T, D>, linearRowFilter_caller<5 , CN
, T, D>,
linearRowFilter_caller<6 ,
CN, T, D>, linearRowFilter_caller<7 , CN
, T, D>,
linearRowFilter_caller<8 ,
CN, T, D>, linearRowFilter_caller<9 , CN
, T, D>,
linearRowFilter_caller<10,
CN, T, D>, linearRowFilter_caller<11, CN
, T, D>,
linearRowFilter_caller<12,
CN, T, D>, linearRowFilter_caller<13, CN
, T, D>,
linearRowFilter_caller<14,
CN, T, D>, linearRowFilter_caller<15, CN
, T, D>};
loadLinearKernel(kernel, ksize);
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
}
void linearRowFilter_gpu_
32s32s
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
void linearRowFilter_gpu_
8u_8u_c4
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<
int, int
>(src, dst, kernel, ksize, anchor);
linearRowFilter_gpu<
4, uchar4, uchar4
>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_
32s32f
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
void linearRowFilter_gpu_
8u_8s_c4
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<
int, float
>(src, dst, kernel, ksize, anchor);
linearRowFilter_gpu<
4, uchar4, char4
>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_
32f32s
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
void linearRowFilter_gpu_
8s_8u_c4
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<
float, int
>(src, dst, kernel, ksize, anchor);
linearRowFilter_gpu<
4, char4, uchar4
>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_
32f32f
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
void linearRowFilter_gpu_
8s_8s_c4
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<float, float>(src, dst, kernel, ksize, anchor);
linearRowFilter_gpu<4, char4, char4>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_16u_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<2, ushort2, ushort2>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_16u_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<2, ushort2, short2>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_16s_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<2, short2, ushort2>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_16s_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<2, short2, short2>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_32s_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<1, int, int>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_32s_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<1, int, float>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_32f_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<1, float, int>(src, dst, kernel, ksize, anchor);
}
void linearRowFilter_gpu_32f_32f_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearRowFilter_gpu<1 ,float, float>(src, dst, kernel, ksize, anchor);
}
}}}
namespace filter_krnls
{
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE, typename T, typename D>
template <int BLOCK_DIM_X, int BLOCK_DIM_Y, int KERNEL_SIZE,
int CN,
typename T, typename D>
__global__ void linearColumnFilter(const T* src, size_t src_step, D* dst, size_t dst_step, int anchor, int width, int height)
{
__shared__ T smem[BLOCK_DIM_Y * BLOCK_DIM_X * 3];
...
...
@@ -192,23 +226,24 @@ namespace filter_krnls
{
const T* colSrc = src + threadX;
sDataColumn[(threadIdx.y + blockDim.y) * smem_step] = threadY < height ? colSrc[threadY * src_step] :
0
;
sDataColumn[(threadIdx.y + blockDim.y) * smem_step] = threadY < height ? colSrc[threadY * src_step] :
VecTraits<T>::all(0)
;
sDataColumn[threadIdx.y * smem_step] = prevThreadY >= 0 ? colSrc[prevThreadY * src_step] :
0
;
sDataColumn[threadIdx.y * smem_step] = prevThreadY >= 0 ? colSrc[prevThreadY * src_step] :
VecTraits<T>::all(0)
;
sDataColumn[(threadIdx.y + (blockDim.y << 1)) * smem_step] = nextThreadY < height ? colSrc[nextThreadY * src_step] :
0
;
sDataColumn[(threadIdx.y + (blockDim.y << 1)) * smem_step] = nextThreadY < height ? colSrc[nextThreadY * src_step] :
VecTraits<T>::all(0)
;
__syncthreads();
if (threadY < height)
{
float sum = 0;
typedef typename TypeVec<float, CN>::vec_t sum_t;
sum_t sum = VecTraits<sum_t>::all(0);
sDataColumn += (threadIdx.y + blockDim.y - anchor)* smem_step;
#pragma unroll
for(int i = 0; i < KERNEL_SIZE; ++i)
sum
+= cLinearKernel[i] * sDataColumn[i * smem_step
];
sum
= sum + sDataColumn[i * smem_step] * cLinearKernel[i
];
dst[threadY * dst_step + threadX] = saturate_cast<D>(sum);
}
...
...
@@ -218,7 +253,7 @@ namespace filter_krnls
namespace cv { namespace gpu { namespace filters
{
template <int KERNEL_SIZE, typename T, typename D>
template <int KERNEL_SIZE,
int CN,
typename T, typename D>
void linearColumnFilter_caller(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor)
{
const int BLOCK_DIM_X = 16;
...
...
@@ -227,45 +262,77 @@ namespace cv { namespace gpu { namespace filters
dim3 threads(BLOCK_DIM_X, BLOCK_DIM_Y);
dim3 blocks(divUp(src.cols, BLOCK_DIM_X), divUp(src.rows, BLOCK_DIM_Y));
filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE><<<blocks, threads>>>(src.ptr, src.elem_step,
filter_krnls::linearColumnFilter<BLOCK_DIM_X, BLOCK_DIM_Y, KERNEL_SIZE
, CN
><<<blocks, threads>>>(src.ptr, src.elem_step,
dst.ptr, dst.elem_step, anchor, src.cols, src.rows);
cudaSafeCall( cudaThreadSynchronize() );
}
template <typename T, typename D>
template <
int CN,
typename T, typename D>
inline void linearColumnFilter_gpu(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
typedef void (*caller_t)(const DevMem2D_<T>& src, const DevMem2D_<D>& dst, int anchor);
static const caller_t callers[] =
{linearColumnFilter_caller<0 ,
T, D>, linearColumnFilter_caller<1
, T, D>,
linearColumnFilter_caller<2 ,
T, D>, linearColumnFilter_caller<3
, T, D>,
linearColumnFilter_caller<4 ,
T, D>, linearColumnFilter_caller<5
, T, D>,
linearColumnFilter_caller<6 ,
T, D>, linearColumnFilter_caller<7
, T, D>,
linearColumnFilter_caller<8 ,
T, D>, linearColumnFilter_caller<9
, T, D>,
linearColumnFilter_caller<10,
T, D>, linearColumnFilter_caller<11
, T, D>,
linearColumnFilter_caller<12,
T, D>, linearColumnFilter_caller<13
, T, D>,
linearColumnFilter_caller<14,
T, D>, linearColumnFilter_caller<15
, T, D>};
{linearColumnFilter_caller<0 ,
CN, T, D>, linearColumnFilter_caller<1 , CN
, T, D>,
linearColumnFilter_caller<2 ,
CN, T, D>, linearColumnFilter_caller<3 , CN
, T, D>,
linearColumnFilter_caller<4 ,
CN, T, D>, linearColumnFilter_caller<5 , CN
, T, D>,
linearColumnFilter_caller<6 ,
CN, T, D>, linearColumnFilter_caller<7 , CN
, T, D>,
linearColumnFilter_caller<8 ,
CN, T, D>, linearColumnFilter_caller<9 , CN
, T, D>,
linearColumnFilter_caller<10,
CN, T, D>, linearColumnFilter_caller<11, CN
, T, D>,
linearColumnFilter_caller<12,
CN, T, D>, linearColumnFilter_caller<13, CN
, T, D>,
linearColumnFilter_caller<14,
CN, T, D>, linearColumnFilter_caller<15, CN
, T, D>};
loadLinearKernel(kernel, ksize);
callers[ksize]((DevMem2D_<T>)src, (DevMem2D_<D>)dst, anchor);
}
void linearColumnFilter_gpu_32s32s(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
void linearColumnFilter_gpu_8u_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<4, uchar4, uchar4>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_8u_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<4, uchar4, char4>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_8s_8u_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<4, char4, uchar4>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_8s_8s_c4(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<4, char4, char4>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_16u_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<2, ushort2, ushort2>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_16u_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<2, ushort2, short2>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_16s_16u_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<2, short2, ushort2>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_16s_16s_c2(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<2, short2, short2>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_32s_32s_c1(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<int, int>(src, dst, kernel, ksize, anchor);
linearColumnFilter_gpu<
1,
int, int>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_32s
32f
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
void linearColumnFilter_gpu_32s
_32f_c1
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<int, float>(src, dst, kernel, ksize, anchor);
linearColumnFilter_gpu<
1,
int, float>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_32f
32s
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
void linearColumnFilter_gpu_32f
_32s_c1
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<float, int>(src, dst, kernel, ksize, anchor);
linearColumnFilter_gpu<
1,
float, int>(src, dst, kernel, ksize, anchor);
}
void linearColumnFilter_gpu_32f
32f
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
void linearColumnFilter_gpu_32f
_32f_c1
(const DevMem2D& src, const DevMem2D& dst, const float kernel[], int ksize, int anchor)
{
linearColumnFilter_gpu<float, float>(src, dst, kernel, ksize, anchor);
linearColumnFilter_gpu<
1,
float, float>(src, dst, kernel, ksize, anchor);
}
}}}
...
...
modules/gpu/src/cuda/saturate_cast.hpp
View file @
7d0feef7
...
...
@@ -163,92 +163,6 @@ namespace cv
return
saturate_cast
<
uint
>
((
float
)
v
);
#endif
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
uchar4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
char4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
ushort4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
short4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
uint4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
int4
v
)
{
return
_Tp
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
float4
v
)
{
return
_Tp
(
v
);
}
template
<>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
char4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
ushort4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
short4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
uint4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
int4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<>
static
__device__
uchar4
saturate_cast
<
uchar4
>
(
float4
v
)
{
return
make_uchar4
(
saturate_cast
<
uchar
>
(
v
.
x
),
saturate_cast
<
uchar
>
(
v
.
y
),
saturate_cast
<
uchar
>
(
v
.
z
),
saturate_cast
<
uchar
>
(
v
.
w
));
}
template
<>
static
__device__
char4
saturate_cast
<
char4
>
(
uchar4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<>
static
__device__
char4
saturate_cast
<
char4
>
(
ushort4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<>
static
__device__
char4
saturate_cast
<
char4
>
(
short4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<>
static
__device__
char4
saturate_cast
<
char4
>
(
uint4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<>
static
__device__
char4
saturate_cast
<
char4
>
(
int4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<>
static
__device__
char4
saturate_cast
<
char4
>
(
float4
v
)
{
return
make_char4
(
saturate_cast
<
char
>
(
v
.
x
),
saturate_cast
<
char
>
(
v
.
y
),
saturate_cast
<
char
>
(
v
.
z
),
saturate_cast
<
char
>
(
v
.
w
));
}
template
<>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
uchar4
v
)
{
return
make_ushort4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
char4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
short4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
uint4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
int4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<>
static
__device__
ushort4
saturate_cast
<
ushort4
>
(
float4
v
)
{
return
make_ushort4
(
saturate_cast
<
ushort
>
(
v
.
x
),
saturate_cast
<
ushort
>
(
v
.
y
),
saturate_cast
<
ushort
>
(
v
.
z
),
saturate_cast
<
ushort
>
(
v
.
w
));
}
template
<>
static
__device__
short4
saturate_cast
<
short4
>
(
uchar4
v
)
{
return
make_short4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
short4
saturate_cast
<
short4
>
(
char4
v
)
{
return
make_short4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
short4
saturate_cast
<
short4
>
(
ushort4
v
)
{
return
make_short4
(
saturate_cast
<
short
>
(
v
.
x
),
saturate_cast
<
short
>
(
v
.
y
),
saturate_cast
<
short
>
(
v
.
z
),
saturate_cast
<
short
>
(
v
.
w
));
}
template
<>
static
__device__
short4
saturate_cast
<
short4
>
(
uint4
v
)
{
return
make_short4
(
saturate_cast
<
short
>
(
v
.
x
),
saturate_cast
<
short
>
(
v
.
y
),
saturate_cast
<
short
>
(
v
.
z
),
saturate_cast
<
short
>
(
v
.
w
));
}
template
<>
static
__device__
short4
saturate_cast
<
short4
>
(
int4
v
)
{
return
make_short4
(
saturate_cast
<
short
>
(
v
.
x
),
saturate_cast
<
short
>
(
v
.
y
),
saturate_cast
<
short
>
(
v
.
z
),
saturate_cast
<
short
>
(
v
.
w
));
}
template
<>
static
__device__
short4
saturate_cast
<
short4
>
(
float4
v
)
{
return
make_short4
(
saturate_cast
<
short
>
(
v
.
x
),
saturate_cast
<
short
>
(
v
.
y
),
saturate_cast
<
short
>
(
v
.
z
),
saturate_cast
<
short
>
(
v
.
w
));
}
template
<>
static
__device__
uint4
saturate_cast
<
uint4
>
(
uchar4
v
)
{
return
make_uint4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
uint4
saturate_cast
<
uint4
>
(
char4
v
)
{
return
make_uint4
(
saturate_cast
<
uint
>
(
v
.
x
),
saturate_cast
<
uint
>
(
v
.
y
),
saturate_cast
<
uint
>
(
v
.
z
),
saturate_cast
<
uint
>
(
v
.
w
));
}
template
<>
static
__device__
uint4
saturate_cast
<
uint4
>
(
ushort4
v
)
{
return
make_uint4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
uint4
saturate_cast
<
uint4
>
(
short4
v
)
{
return
make_uint4
(
saturate_cast
<
uint
>
(
v
.
x
),
saturate_cast
<
uint
>
(
v
.
y
),
saturate_cast
<
uint
>
(
v
.
z
),
saturate_cast
<
uint
>
(
v
.
w
));
}
template
<>
static
__device__
uint4
saturate_cast
<
uint4
>
(
int4
v
)
{
return
make_uint4
(
saturate_cast
<
uint
>
(
v
.
x
),
saturate_cast
<
uint
>
(
v
.
y
),
saturate_cast
<
uint
>
(
v
.
z
),
saturate_cast
<
uint
>
(
v
.
w
));
}
template
<>
static
__device__
uint4
saturate_cast
<
uint4
>
(
float4
v
)
{
return
make_uint4
(
saturate_cast
<
uint
>
(
v
.
x
),
saturate_cast
<
uint
>
(
v
.
y
),
saturate_cast
<
uint
>
(
v
.
z
),
saturate_cast
<
uint
>
(
v
.
w
));
}
template
<>
static
__device__
int4
saturate_cast
<
int4
>
(
uchar4
v
)
{
return
make_int4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
int4
saturate_cast
<
int4
>
(
char4
v
)
{
return
make_int4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
int4
saturate_cast
<
int4
>
(
ushort4
v
)
{
return
make_int4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
int4
saturate_cast
<
int4
>
(
short4
v
)
{
return
make_int4
(
v
.
x
,
v
.
y
,
v
.
z
,
v
.
w
);
}
template
<>
static
__device__
int4
saturate_cast
<
int4
>
(
uint4
v
)
{
return
make_int4
(
saturate_cast
<
int
>
(
v
.
x
),
saturate_cast
<
int
>
(
v
.
y
),
saturate_cast
<
int
>
(
v
.
z
),
saturate_cast
<
int
>
(
v
.
w
));
}
template
<>
static
__device__
int4
saturate_cast
<
int4
>
(
float4
v
)
{
return
make_int4
(
saturate_cast
<
int
>
(
v
.
x
),
saturate_cast
<
int
>
(
v
.
y
),
saturate_cast
<
int
>
(
v
.
z
),
saturate_cast
<
int
>
(
v
.
w
));
}
}
}
...
...
modules/gpu/src/cuda/vecmath.hpp
View file @
7d0feef7
...
...
@@ -44,81 +44,884 @@
#define __OPENCV_GPU_VECMATH_HPP__
#include "cuda_shared.hpp"
#include "saturate_cast.hpp"
namespace
cv
{
namespace
gpu
{
template
<
typename
T
,
int
N
>
struct
TypeVec
;
template
<
typename
T
>
struct
TypeVec
<
T
,
1
>
{
typedef
T
vec_t
;
};
template
<>
struct
TypeVec
<
unsigned
char
,
2
>
{
typedef
uchar2
vec_t
;
};
template
<>
struct
TypeVec
<
uchar
,
1
>
{
typedef
uchar
vec_t
;
};
template
<>
struct
TypeVec
<
uchar1
,
1
>
{
typedef
uchar1
vec_t
;
};
template
<>
struct
TypeVec
<
uchar
,
2
>
{
typedef
uchar2
vec_t
;
};
template
<>
struct
TypeVec
<
uchar2
,
2
>
{
typedef
uchar2
vec_t
;
};
template
<>
struct
TypeVec
<
u
nsigned
char
,
3
>
{
typedef
uchar3
vec_t
;
};
;
template
<>
struct
TypeVec
<
u
char
,
3
>
{
typedef
uchar3
vec_t
;
}
;
template
<>
struct
TypeVec
<
uchar3
,
3
>
{
typedef
uchar3
vec_t
;
};
template
<>
struct
TypeVec
<
u
nsigned
char
,
4
>
{
typedef
uchar4
vec_t
;
};
;
template
<>
struct
TypeVec
<
u
char
,
4
>
{
typedef
uchar4
vec_t
;
}
;
template
<>
struct
TypeVec
<
uchar4
,
4
>
{
typedef
uchar4
vec_t
;
};
template
<>
struct
TypeVec
<
char
,
1
>
{
typedef
char
vec_t
;
};
template
<>
struct
TypeVec
<
char1
,
1
>
{
typedef
char1
vec_t
;
};
template
<>
struct
TypeVec
<
char
,
2
>
{
typedef
char2
vec_t
;
};
template
<>
struct
TypeVec
<
char2
,
2
>
{
typedef
char2
vec_t
;
};
template
<>
struct
TypeVec
<
char
,
3
>
{
typedef
char3
vec_t
;
};
template
<>
struct
TypeVec
<
char3
,
3
>
{
typedef
char3
vec_t
;
};
template
<>
struct
TypeVec
<
char
,
4
>
{
typedef
char4
vec_t
;
};
template
<>
struct
TypeVec
<
char4
,
4
>
{
typedef
char4
vec_t
;
};
template
<>
struct
TypeVec
<
unsigned
short
,
2
>
{
typedef
ushort2
vec_t
;
};
template
<>
struct
TypeVec
<
ushort
,
1
>
{
typedef
ushort
vec_t
;
};
template
<>
struct
TypeVec
<
ushort1
,
1
>
{
typedef
ushort1
vec_t
;
};
template
<>
struct
TypeVec
<
ushort
,
2
>
{
typedef
ushort2
vec_t
;
};
template
<>
struct
TypeVec
<
ushort2
,
2
>
{
typedef
ushort2
vec_t
;
};
template
<>
struct
TypeVec
<
u
nsigned
short
,
3
>
{
typedef
ushort3
vec_t
;
};
template
<>
struct
TypeVec
<
ushort
,
3
>
{
typedef
ushort3
vec_t
;
};
template
<>
struct
TypeVec
<
ushort3
,
3
>
{
typedef
ushort3
vec_t
;
};
template
<>
struct
TypeVec
<
u
nsigned
short
,
4
>
{
typedef
ushort4
vec_t
;
};
template
<>
struct
TypeVec
<
ushort
,
4
>
{
typedef
ushort4
vec_t
;
};
template
<>
struct
TypeVec
<
ushort4
,
4
>
{
typedef
ushort4
vec_t
;
};
template
<>
struct
TypeVec
<
short
,
1
>
{
typedef
short
vec_t
;
};
template
<>
struct
TypeVec
<
short1
,
1
>
{
typedef
short1
vec_t
;
};
template
<>
struct
TypeVec
<
short
,
2
>
{
typedef
short2
vec_t
;
};
template
<>
struct
TypeVec
<
short2
,
2
>
{
typedef
short2
vec_t
;
};
template
<>
struct
TypeVec
<
short
,
3
>
{
typedef
short3
vec_t
;
};
template
<>
struct
TypeVec
<
short3
,
3
>
{
typedef
short3
vec_t
;
};
template
<>
struct
TypeVec
<
short
,
4
>
{
typedef
short4
vec_t
;
};
template
<>
struct
TypeVec
<
short4
,
4
>
{
typedef
short4
vec_t
;
};
template
<>
struct
TypeVec
<
unsigned
int
,
2
>
{
typedef
uint2
vec_t
;
};
template
<>
struct
TypeVec
<
uint
,
1
>
{
typedef
uint
vec_t
;
};
template
<>
struct
TypeVec
<
uint1
,
1
>
{
typedef
uint1
vec_t
;
};
template
<>
struct
TypeVec
<
uint
,
2
>
{
typedef
uint2
vec_t
;
};
template
<>
struct
TypeVec
<
uint2
,
2
>
{
typedef
uint2
vec_t
;
};
template
<>
struct
TypeVec
<
u
nsigned
int
,
3
>
{
typedef
uint3
vec_t
;
};
template
<>
struct
TypeVec
<
uint
,
3
>
{
typedef
uint3
vec_t
;
};
template
<>
struct
TypeVec
<
uint3
,
3
>
{
typedef
uint3
vec_t
;
};
template
<>
struct
TypeVec
<
u
nsigned
int
,
4
>
{
typedef
uint4
vec_t
;
};
template
<>
struct
TypeVec
<
uint
,
4
>
{
typedef
uint4
vec_t
;
};
template
<>
struct
TypeVec
<
uint4
,
4
>
{
typedef
uint4
vec_t
;
};
template
<>
struct
TypeVec
<
int
,
1
>
{
typedef
int
vec_t
;
};
template
<>
struct
TypeVec
<
int1
,
1
>
{
typedef
int1
vec_t
;
};
template
<>
struct
TypeVec
<
int
,
2
>
{
typedef
int2
vec_t
;
};
template
<>
struct
TypeVec
<
int2
,
2
>
{
typedef
int2
vec_t
;
};
template
<>
struct
TypeVec
<
int
,
3
>
{
typedef
int3
vec_t
;
};
template
<>
struct
TypeVec
<
int3
,
3
>
{
typedef
int3
vec_t
;
};
template
<>
struct
TypeVec
<
int
,
4
>
{
typedef
int4
vec_t
;
};
template
<>
struct
TypeVec
<
int4
,
4
>
{
typedef
int4
vec_t
;
};
template
<>
struct
TypeVec
<
float
,
1
>
{
typedef
float
vec_t
;
};
template
<>
struct
TypeVec
<
float1
,
1
>
{
typedef
float1
vec_t
;
};
template
<>
struct
TypeVec
<
float
,
2
>
{
typedef
float2
vec_t
;
};
template
<>
struct
TypeVec
<
float2
,
2
>
{
typedef
float2
vec_t
;
};
template
<>
struct
TypeVec
<
float
,
3
>
{
typedef
float3
vec_t
;
};
template
<>
struct
TypeVec
<
float3
,
3
>
{
typedef
float3
vec_t
;
};
template
<>
struct
TypeVec
<
float
,
4
>
{
typedef
float4
vec_t
;
};
template
<>
struct
TypeVec
<
float4
,
4
>
{
typedef
float4
vec_t
;
};
template
<>
struct
TypeVec
<
float4
,
4
>
{
typedef
float4
vec_t
;
};
template
<
typename
T
>
struct
VecTraits
;
template
<>
struct
VecTraits
<
uchar
>
{
typedef
uchar
elem_t
;
enum
{
cn
=
1
};
static
__device__
uchar
all
(
uchar
v
)
{
return
v
;}
};
template
<>
struct
VecTraits
<
uchar1
>
{
typedef
uchar
elem_t
;
enum
{
cn
=
1
};
static
__device__
uchar1
all
(
uchar
v
)
{
return
make_uchar1
(
v
);}
};
template
<>
struct
VecTraits
<
uchar2
>
{
typedef
uchar
elem_t
;
enum
{
cn
=
2
};
static
__device__
uchar2
all
(
uchar
v
)
{
return
make_uchar2
(
v
,
v
);}
};
template
<>
struct
VecTraits
<
uchar3
>
{
typedef
uchar
elem_t
;
enum
{
cn
=
3
};
static
__device__
uchar3
all
(
uchar
v
)
{
return
make_uchar3
(
v
,
v
,
v
);}
};
template
<>
struct
VecTraits
<
uchar4
>
{
typedef
uchar
elem_t
;
enum
{
cn
=
4
};
static
__device__
uchar4
all
(
uchar
v
)
{
return
make_uchar4
(
v
,
v
,
v
,
v
);}
};
template
<>
struct
VecTraits
<
char
>
{
typedef
char
elem_t
;
enum
{
cn
=
1
};
static
__device__
char
all
(
char
v
)
{
return
v
;}
};
template
<>
struct
VecTraits
<
char1
>
{
typedef
char
elem_t
;
enum
{
cn
=
1
};
static
__device__
char1
all
(
char
v
)
{
return
make_char1
(
v
);}
};
template
<>
struct
VecTraits
<
char2
>
{
typedef
char
elem_t
;
enum
{
cn
=
2
};
static
__device__
char2
all
(
char
v
)
{
return
make_char2
(
v
,
v
);}
};
template
<>
struct
VecTraits
<
char3
>
{
typedef
char
elem_t
;
enum
{
cn
=
3
};
static
__device__
char3
all
(
char
v
)
{
return
make_char3
(
v
,
v
,
v
);}
};
template
<>
struct
VecTraits
<
char4
>
{
typedef
char
elem_t
;
enum
{
cn
=
4
};
static
__device__
char4
all
(
char
v
)
{
return
make_char4
(
v
,
v
,
v
,
v
);}
};
template
<>
struct
VecTraits
<
ushort
>
{
typedef
ushort
elem_t
;
enum
{
cn
=
1
};
static
__device__
ushort
all
(
ushort
v
)
{
return
v
;}
};
template
<>
struct
VecTraits
<
ushort1
>
{
typedef
ushort
elem_t
;
enum
{
cn
=
1
};
static
__device__
ushort1
all
(
ushort
v
)
{
return
make_ushort1
(
v
);}
};
template
<>
struct
VecTraits
<
ushort2
>
{
typedef
ushort
elem_t
;
enum
{
cn
=
2
};
static
__device__
ushort2
all
(
ushort
v
)
{
return
make_ushort2
(
v
,
v
);}
};
template
<>
struct
VecTraits
<
ushort3
>
{
typedef
ushort
elem_t
;
enum
{
cn
=
3
};
static
__device__
ushort3
all
(
ushort
v
)
{
return
make_ushort3
(
v
,
v
,
v
);}
};
template
<>
struct
VecTraits
<
ushort4
>
{
typedef
ushort
elem_t
;
enum
{
cn
=
4
};
static
__device__
ushort4
all
(
ushort
v
)
{
return
make_ushort4
(
v
,
v
,
v
,
v
);}
};
template
<>
struct
VecTraits
<
short
>
{
typedef
short
elem_t
;
enum
{
cn
=
1
};
static
__device__
short
all
(
short
v
)
{
return
v
;}
};
template
<>
struct
VecTraits
<
short1
>
{
typedef
short
elem_t
;
enum
{
cn
=
1
};
static
__device__
short1
all
(
short
v
)
{
return
make_short1
(
v
);}
};
template
<>
struct
VecTraits
<
short2
>
{
typedef
short
elem_t
;
enum
{
cn
=
2
};
static
__device__
short2
all
(
short
v
)
{
return
make_short2
(
v
,
v
);}
};
template
<>
struct
VecTraits
<
short3
>
{
typedef
short
elem_t
;
enum
{
cn
=
3
};
static
__device__
short3
all
(
short
v
)
{
return
make_short3
(
v
,
v
,
v
);}
};
template
<>
struct
VecTraits
<
short4
>
{
typedef
short
elem_t
;
enum
{
cn
=
4
};
static
__device__
short4
all
(
short
v
)
{
return
make_short4
(
v
,
v
,
v
,
v
);}
};
template
<>
struct
VecTraits
<
uint
>
{
typedef
uint
elem_t
;
enum
{
cn
=
1
};
static
__device__
uint
all
(
uint
v
)
{
return
v
;}
};
template
<>
struct
VecTraits
<
uint1
>
{
typedef
uint
elem_t
;
enum
{
cn
=
1
};
static
__device__
uint1
all
(
uint
v
)
{
return
make_uint1
(
v
);}
};
template
<>
struct
VecTraits
<
uint2
>
{
typedef
uint
elem_t
;
enum
{
cn
=
2
};
static
__device__
uint2
all
(
uint
v
)
{
return
make_uint2
(
v
,
v
);}
};
template
<>
struct
VecTraits
<
uint3
>
{
typedef
uint
elem_t
;
enum
{
cn
=
3
};
static
__device__
uint3
all
(
uint
v
)
{
return
make_uint3
(
v
,
v
,
v
);}
};
template
<>
struct
VecTraits
<
uint4
>
{
typedef
uint
elem_t
;
enum
{
cn
=
4
};
static
__device__
uint4
all
(
uint
v
)
{
return
make_uint4
(
v
,
v
,
v
,
v
);}
};
template
<>
struct
VecTraits
<
int
>
{
typedef
int
elem_t
;
enum
{
cn
=
1
};
static
__device__
int
all
(
int
v
)
{
return
v
;}
};
template
<>
struct
VecTraits
<
int1
>
{
typedef
int
elem_t
;
enum
{
cn
=
1
};
static
__device__
int1
all
(
int
v
)
{
return
make_int1
(
v
);}
};
template
<>
struct
VecTraits
<
int2
>
{
typedef
int
elem_t
;
enum
{
cn
=
2
};
static
__device__
int2
all
(
int
v
)
{
return
make_int2
(
v
,
v
);}
};
template
<>
struct
VecTraits
<
int3
>
{
typedef
int
elem_t
;
enum
{
cn
=
3
};
static
__device__
int3
all
(
int
v
)
{
return
make_int3
(
v
,
v
,
v
);}
};
template
<>
struct
VecTraits
<
int4
>
{
typedef
int
elem_t
;
enum
{
cn
=
4
};
static
__device__
int4
all
(
int
v
)
{
return
make_int4
(
v
,
v
,
v
,
v
);}
};
template
<>
struct
VecTraits
<
float
>
{
typedef
float
elem_t
;
enum
{
cn
=
1
};
static
__device__
float
all
(
float
v
)
{
return
v
;}
};
template
<>
struct
VecTraits
<
float1
>
{
typedef
float
elem_t
;
enum
{
cn
=
1
};
static
__device__
float1
all
(
float
v
)
{
return
make_float1
(
v
);}
};
template
<>
struct
VecTraits
<
float2
>
{
typedef
float
elem_t
;
enum
{
cn
=
2
};
static
__device__
float2
all
(
float
v
)
{
return
make_float2
(
v
,
v
);}
};
template
<>
struct
VecTraits
<
float3
>
{
typedef
float
elem_t
;
enum
{
cn
=
3
};
static
__device__
float3
all
(
float
v
)
{
return
make_float3
(
v
,
v
,
v
);}
};
template
<>
struct
VecTraits
<
float4
>
{
typedef
float
elem_t
;
enum
{
cn
=
4
};
static
__device__
float4
all
(
float
v
)
{
return
make_float4
(
v
,
v
,
v
,
v
);}
};
template
<
int
cn
,
typename
VecD
>
struct
SatCast
;
template
<
typename
VecD
>
struct
SatCast
<
1
,
VecD
>
{
template
<
typename
VecS
>
__device__
VecD
operator
()(
const
VecS
&
v
)
{
VecD
res
;
res
.
x
=
saturate_cast
<
VecTraits
<
VecD
>::
elem_t
>
(
v
.
x
);
return
res
;
}
};
template
<
typename
VecD
>
struct
SatCast
<
2
,
VecD
>
{
template
<
typename
VecS
>
__device__
VecD
operator
()(
const
VecS
&
v
)
{
VecD
res
;
res
.
x
=
saturate_cast
<
VecTraits
<
VecD
>::
elem_t
>
(
v
.
x
);
res
.
y
=
saturate_cast
<
VecTraits
<
VecD
>::
elem_t
>
(
v
.
y
);
return
res
;
}
};
template
<
typename
VecD
>
struct
SatCast
<
3
,
VecD
>
{
template
<
typename
VecS
>
__device__
VecD
operator
()(
const
VecS
&
v
)
{
VecD
res
;
res
.
x
=
saturate_cast
<
VecTraits
<
VecD
>::
elem_t
>
(
v
.
x
);
res
.
y
=
saturate_cast
<
VecTraits
<
VecD
>::
elem_t
>
(
v
.
y
);
res
.
y
=
saturate_cast
<
VecTraits
<
VecD
>::
elem_t
>
(
v
.
z
);
return
res
;
}
};
template
<
typename
VecD
>
struct
SatCast
<
4
,
VecD
>
{
template
<
typename
VecS
>
__device__
VecD
operator
()(
const
VecS
&
v
)
{
VecD
res
;
res
.
x
=
saturate_cast
<
VecTraits
<
VecD
>::
elem_t
>
(
v
.
x
);
res
.
y
=
saturate_cast
<
VecTraits
<
VecD
>::
elem_t
>
(
v
.
y
);
res
.
y
=
saturate_cast
<
VecTraits
<
VecD
>::
elem_t
>
(
v
.
z
);
res
.
w
=
saturate_cast
<
VecTraits
<
VecD
>::
elem_t
>
(
v
.
w
);
return
res
;
}
};
template
<
typename
VecD
,
typename
VecS
>
static
__device__
VecD
saturate_cast_caller
(
const
VecS
&
v
)
{
SatCast
<
VecTraits
<
VecD
>::
cn
,
VecD
>
cast
;
return
cast
(
v
);
}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uchar1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
char1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
ushort1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
short1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uint1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
int1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
float1
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uchar2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
char2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
ushort2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
short2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uint2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
int2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
float2
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uchar3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
char3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
ushort3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
short3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uint3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
int3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
float3
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uchar4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
char4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
ushort4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
short4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
uint4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
int4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
template
<
typename
_Tp
>
static
__device__
_Tp
saturate_cast
(
const
float4
&
v
)
{
return
saturate_cast_caller
<
_Tp
>
(
v
);}
static
__device__
uchar1
operator
+
(
const
uchar1
&
a
,
const
uchar1
&
b
)
{
return
make_uchar1
(
a
.
x
+
b
.
x
);
}
static
__device__
uchar1
operator
-
(
const
uchar1
&
a
,
const
uchar1
&
b
)
{
return
make_uchar1
(
a
.
x
-
b
.
x
);
}
static
__device__
uchar1
operator
*
(
const
uchar1
&
a
,
const
uchar1
&
b
)
{
return
make_uchar1
(
a
.
x
*
b
.
x
);
}
static
__device__
uchar1
operator
/
(
const
uchar1
&
a
,
const
uchar1
&
b
)
{
return
make_uchar1
(
a
.
x
/
b
.
x
);
}
static
__device__
float1
operator
*
(
const
uchar1
&
a
,
float
s
)
{
return
make_float1
(
a
.
x
*
s
);
}
static
__device__
uchar2
operator
+
(
const
uchar2
&
a
,
const
uchar2
&
b
)
{
return
make_uchar2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
static
__device__
uchar2
operator
-
(
const
uchar2
&
a
,
const
uchar2
&
b
)
{
return
make_uchar2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
static
__device__
uchar2
operator
*
(
const
uchar2
&
a
,
const
uchar2
&
b
)
{
return
make_uchar2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
static
__device__
uchar2
operator
/
(
const
uchar2
&
a
,
const
uchar2
&
b
)
{
return
make_uchar2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
static
__device__
float2
operator
*
(
const
uchar2
&
a
,
float
s
)
{
return
make_float2
(
a
.
x
*
s
,
a
.
y
*
s
);
}
static
__device__
uchar4
operator
+
(
const
uchar4
&
a
,
const
uchar4
&
b
)
static
__device__
uchar3
operator
+
(
const
uchar3
&
a
,
const
uchar3
&
b
)
{
return
make_uchar3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
static
__device__
uchar3
operator
-
(
const
uchar3
&
a
,
const
uchar3
&
b
)
{
return
make_uchar3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
static
__device__
uchar3
operator
*
(
const
uchar3
&
a
,
const
uchar3
&
b
)
{
return
make_uchar3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
static
__device__
uchar3
operator
/
(
const
uchar3
&
a
,
const
uchar3
&
b
)
{
return
make_uchar3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
static
__device__
float3
operator
*
(
const
uchar3
&
a
,
float
s
)
{
return
make_float3
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
);
}
static
__device__
uchar4
operator
+
(
const
uchar4
&
a
,
const
uchar4
&
b
)
{
return
make_uchar4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
uchar4
operator
-
(
const
uchar4
&
a
,
const
uchar4
&
b
)
static
__device__
uchar4
operator
-
(
const
uchar4
&
a
,
const
uchar4
&
b
)
{
return
make_uchar4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
uchar4
operator
*
(
const
uchar4
&
a
,
const
uchar4
&
b
)
static
__device__
uchar4
operator
*
(
const
uchar4
&
a
,
const
uchar4
&
b
)
{
return
make_uchar4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
uchar4
operator
/
(
const
uchar4
&
a
,
const
uchar4
&
b
)
static
__device__
uchar4
operator
/
(
const
uchar4
&
a
,
const
uchar4
&
b
)
{
return
make_uchar4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
template
<
typename
T
>
static
__device__
uchar4
operator
*
(
const
uchar4
&
a
,
T
s
)
static
__device__
float4
operator
*
(
const
uchar4
&
a
,
float
s
)
{
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
static
__device__
char1
operator
+
(
const
char1
&
a
,
const
char1
&
b
)
{
return
make_char1
(
a
.
x
+
b
.
x
);
}
static
__device__
char1
operator
-
(
const
char1
&
a
,
const
char1
&
b
)
{
return
make_char1
(
a
.
x
-
b
.
x
);
}
static
__device__
char1
operator
*
(
const
char1
&
a
,
const
char1
&
b
)
{
return
make_char1
(
a
.
x
*
b
.
x
);
}
static
__device__
char1
operator
/
(
const
char1
&
a
,
const
char1
&
b
)
{
return
make_char1
(
a
.
x
/
b
.
x
);
}
static
__device__
float1
operator
*
(
const
char1
&
a
,
float
s
)
{
return
make_float1
(
a
.
x
*
s
);
}
static
__device__
char2
operator
+
(
const
char2
&
a
,
const
char2
&
b
)
{
return
make_char2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
static
__device__
char2
operator
-
(
const
char2
&
a
,
const
char2
&
b
)
{
return
make_char2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
static
__device__
char2
operator
*
(
const
char2
&
a
,
const
char2
&
b
)
{
return
make_char2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
static
__device__
char2
operator
/
(
const
char2
&
a
,
const
char2
&
b
)
{
return
make_char2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
static
__device__
float2
operator
*
(
const
char2
&
a
,
float
s
)
{
return
make_float2
(
a
.
x
*
s
,
a
.
y
*
s
);
}
static
__device__
char3
operator
+
(
const
char3
&
a
,
const
char3
&
b
)
{
return
make_char3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
static
__device__
char3
operator
-
(
const
char3
&
a
,
const
char3
&
b
)
{
return
make_char3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
static
__device__
char3
operator
*
(
const
char3
&
a
,
const
char3
&
b
)
{
return
make_char3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
static
__device__
char3
operator
/
(
const
char3
&
a
,
const
char3
&
b
)
{
return
make_char3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
static
__device__
float3
operator
*
(
const
char3
&
a
,
float
s
)
{
return
make_float3
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
);
}
static
__device__
char4
operator
+
(
const
char4
&
a
,
const
char4
&
b
)
{
return
make_char4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
char4
operator
-
(
const
char4
&
a
,
const
char4
&
b
)
{
return
make_char4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
char4
operator
*
(
const
char4
&
a
,
const
char4
&
b
)
{
return
make_char4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
char4
operator
/
(
const
char4
&
a
,
const
char4
&
b
)
{
return
make_char4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
static
__device__
float4
operator
*
(
const
char4
&
a
,
float
s
)
{
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
static
__device__
ushort1
operator
+
(
const
ushort1
&
a
,
const
ushort1
&
b
)
{
return
make_ushort1
(
a
.
x
+
b
.
x
);
}
static
__device__
ushort1
operator
-
(
const
ushort1
&
a
,
const
ushort1
&
b
)
{
return
make_ushort1
(
a
.
x
-
b
.
x
);
}
static
__device__
ushort1
operator
*
(
const
ushort1
&
a
,
const
ushort1
&
b
)
{
return
make_ushort1
(
a
.
x
*
b
.
x
);
}
static
__device__
ushort1
operator
/
(
const
ushort1
&
a
,
const
ushort1
&
b
)
{
return
make_ushort1
(
a
.
x
/
b
.
x
);
}
static
__device__
float1
operator
*
(
const
ushort1
&
a
,
float
s
)
{
return
make_float1
(
a
.
x
*
s
);
}
static
__device__
ushort2
operator
+
(
const
ushort2
&
a
,
const
ushort2
&
b
)
{
return
make_ushort2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
static
__device__
ushort2
operator
-
(
const
ushort2
&
a
,
const
ushort2
&
b
)
{
return
make_ushort2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
static
__device__
ushort2
operator
*
(
const
ushort2
&
a
,
const
ushort2
&
b
)
{
return
make_ushort2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
static
__device__
ushort2
operator
/
(
const
ushort2
&
a
,
const
ushort2
&
b
)
{
return
make_ushort2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
static
__device__
float2
operator
*
(
const
ushort2
&
a
,
float
s
)
{
return
make_float2
(
a
.
x
*
s
,
a
.
y
*
s
);
}
static
__device__
ushort3
operator
+
(
const
ushort3
&
a
,
const
ushort3
&
b
)
{
return
make_ushort3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
static
__device__
ushort3
operator
-
(
const
ushort3
&
a
,
const
ushort3
&
b
)
{
return
make_ushort3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
static
__device__
ushort3
operator
*
(
const
ushort3
&
a
,
const
ushort3
&
b
)
{
return
make_ushort3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
static
__device__
ushort3
operator
/
(
const
ushort3
&
a
,
const
ushort3
&
b
)
{
return
make_ushort3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
static
__device__
float3
operator
*
(
const
ushort3
&
a
,
float
s
)
{
return
make_float3
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
);
}
static
__device__
ushort4
operator
+
(
const
ushort4
&
a
,
const
ushort4
&
b
)
{
return
make_ushort4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
ushort4
operator
-
(
const
ushort4
&
a
,
const
ushort4
&
b
)
{
return
make_ushort4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
ushort4
operator
*
(
const
ushort4
&
a
,
const
ushort4
&
b
)
{
return
make_ushort4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
ushort4
operator
/
(
const
ushort4
&
a
,
const
ushort4
&
b
)
{
return
make_ushort4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
static
__device__
float4
operator
*
(
const
ushort4
&
a
,
float
s
)
{
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
static
__device__
short1
operator
+
(
const
short1
&
a
,
const
short1
&
b
)
{
return
make_short1
(
a
.
x
+
b
.
x
);
}
static
__device__
short1
operator
-
(
const
short1
&
a
,
const
short1
&
b
)
{
return
make_short1
(
a
.
x
-
b
.
x
);
}
static
__device__
short1
operator
*
(
const
short1
&
a
,
const
short1
&
b
)
{
return
make_short1
(
a
.
x
*
b
.
x
);
}
static
__device__
short1
operator
/
(
const
short1
&
a
,
const
short1
&
b
)
{
return
make_short1
(
a
.
x
/
b
.
x
);
}
static
__device__
float1
operator
*
(
const
short1
&
a
,
float
s
)
{
return
make_float1
(
a
.
x
*
s
);
}
static
__device__
short2
operator
+
(
const
short2
&
a
,
const
short2
&
b
)
{
return
make_short2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
static
__device__
short2
operator
-
(
const
short2
&
a
,
const
short2
&
b
)
{
return
make_short2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
static
__device__
short2
operator
*
(
const
short2
&
a
,
const
short2
&
b
)
{
return
make_short2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
static
__device__
short2
operator
/
(
const
short2
&
a
,
const
short2
&
b
)
{
return
make_short2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
static
__device__
float2
operator
*
(
const
short2
&
a
,
float
s
)
{
return
make_float2
(
a
.
x
*
s
,
a
.
y
*
s
);
}
static
__device__
short3
operator
+
(
const
short3
&
a
,
const
short3
&
b
)
{
return
make_short3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
static
__device__
short3
operator
-
(
const
short3
&
a
,
const
short3
&
b
)
{
return
make_short3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
static
__device__
short3
operator
*
(
const
short3
&
a
,
const
short3
&
b
)
{
return
make_short3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
static
__device__
short3
operator
/
(
const
short3
&
a
,
const
short3
&
b
)
{
return
make_short3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
static
__device__
float3
operator
*
(
const
short3
&
a
,
float
s
)
{
return
make_float3
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
);
}
static
__device__
short4
operator
+
(
const
short4
&
a
,
const
short4
&
b
)
{
return
make_short4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
short4
operator
-
(
const
short4
&
a
,
const
short4
&
b
)
{
return
make_short4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
short4
operator
*
(
const
short4
&
a
,
const
short4
&
b
)
{
return
make_short4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
short4
operator
/
(
const
short4
&
a
,
const
short4
&
b
)
{
return
make_short4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
static
__device__
float4
operator
*
(
const
short4
&
a
,
float
s
)
{
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
static
__device__
int1
operator
+
(
const
int1
&
a
,
const
int1
&
b
)
{
return
make_int1
(
a
.
x
+
b
.
x
);
}
static
__device__
int1
operator
-
(
const
int1
&
a
,
const
int1
&
b
)
{
return
make_int1
(
a
.
x
-
b
.
x
);
}
static
__device__
int1
operator
*
(
const
int1
&
a
,
const
int1
&
b
)
{
return
make_int1
(
a
.
x
*
b
.
x
);
}
static
__device__
int1
operator
/
(
const
int1
&
a
,
const
int1
&
b
)
{
return
make_int1
(
a
.
x
/
b
.
x
);
}
static
__device__
float1
operator
*
(
const
int1
&
a
,
float
s
)
{
return
make_float1
(
a
.
x
*
s
);
}
static
__device__
int2
operator
+
(
const
int2
&
a
,
const
int2
&
b
)
{
return
make_int2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
static
__device__
int2
operator
-
(
const
int2
&
a
,
const
int2
&
b
)
{
return
make_int2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
static
__device__
int2
operator
*
(
const
int2
&
a
,
const
int2
&
b
)
{
return
make_int2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
static
__device__
int2
operator
/
(
const
int2
&
a
,
const
int2
&
b
)
{
return
make_int2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
static
__device__
float2
operator
*
(
const
int2
&
a
,
float
s
)
{
return
make_float2
(
a
.
x
*
s
,
a
.
y
*
s
);
}
static
__device__
int3
operator
+
(
const
int3
&
a
,
const
int3
&
b
)
{
return
make_int3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
static
__device__
int3
operator
-
(
const
int3
&
a
,
const
int3
&
b
)
{
return
make_int3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
static
__device__
int3
operator
*
(
const
int3
&
a
,
const
int3
&
b
)
{
return
make_int3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
static
__device__
int3
operator
/
(
const
int3
&
a
,
const
int3
&
b
)
{
return
make_int3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
static
__device__
float3
operator
*
(
const
int3
&
a
,
float
s
)
{
return
make_float3
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
);
}
static
__device__
int4
operator
+
(
const
int4
&
a
,
const
int4
&
b
)
{
return
make_int4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
int4
operator
-
(
const
int4
&
a
,
const
int4
&
b
)
{
return
make_int4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
int4
operator
*
(
const
int4
&
a
,
const
int4
&
b
)
{
return
make_int4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
int4
operator
/
(
const
int4
&
a
,
const
int4
&
b
)
{
return
make_int4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
static
__device__
float4
operator
*
(
const
int4
&
a
,
float
s
)
{
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
}
static
__device__
float1
operator
+
(
const
float1
&
a
,
const
float1
&
b
)
{
return
make_float1
(
a
.
x
+
b
.
x
);
}
static
__device__
float1
operator
-
(
const
float1
&
a
,
const
float1
&
b
)
{
return
make_float1
(
a
.
x
-
b
.
x
);
}
static
__device__
float1
operator
*
(
const
float1
&
a
,
const
float1
&
b
)
{
return
make_float1
(
a
.
x
*
b
.
x
);
}
static
__device__
float1
operator
/
(
const
float1
&
a
,
const
float1
&
b
)
{
return
make_float1
(
a
.
x
/
b
.
x
);
}
static
__device__
float1
operator
*
(
const
float1
&
a
,
float
s
)
{
return
make_float1
(
a
.
x
*
s
);
}
static
__device__
float2
operator
+
(
const
float2
&
a
,
const
float2
&
b
)
{
return
make_float2
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
);
}
static
__device__
float2
operator
-
(
const
float2
&
a
,
const
float2
&
b
)
{
return
make_float2
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
);
}
static
__device__
float2
operator
*
(
const
float2
&
a
,
const
float2
&
b
)
{
return
make_float2
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
);
}
static
__device__
float2
operator
/
(
const
float2
&
a
,
const
float2
&
b
)
{
return
make_float2
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
);
}
static
__device__
float2
operator
*
(
const
float2
&
a
,
float
s
)
{
return
make_float2
(
a
.
x
*
s
,
a
.
y
*
s
);
}
static
__device__
float3
operator
+
(
const
float3
&
a
,
const
float3
&
b
)
{
return
make_float3
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
);
}
static
__device__
float3
operator
-
(
const
float3
&
a
,
const
float3
&
b
)
{
return
make_float3
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
);
}
static
__device__
float3
operator
*
(
const
float3
&
a
,
const
float3
&
b
)
{
return
make_float3
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
);
}
static
__device__
float3
operator
/
(
const
float3
&
a
,
const
float3
&
b
)
{
return
make_float3
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
);
}
static
__device__
float3
operator
*
(
const
float3
&
a
,
float
s
)
{
return
make_float3
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
);
}
static
__device__
float4
operator
+
(
const
float4
&
a
,
const
float4
&
b
)
{
return
make_float4
(
a
.
x
+
b
.
x
,
a
.
y
+
b
.
y
,
a
.
z
+
b
.
z
,
a
.
w
+
b
.
w
);
}
static
__device__
float4
operator
-
(
const
float4
&
a
,
const
float4
&
b
)
{
return
make_float4
(
a
.
x
-
b
.
x
,
a
.
y
-
b
.
y
,
a
.
z
-
b
.
z
,
a
.
w
-
b
.
w
);
}
static
__device__
float4
operator
*
(
const
float4
&
a
,
const
float4
&
b
)
{
return
make_float4
(
a
.
x
*
b
.
x
,
a
.
y
*
b
.
y
,
a
.
z
*
b
.
z
,
a
.
w
*
b
.
w
);
}
static
__device__
float4
operator
/
(
const
float4
&
a
,
const
float4
&
b
)
{
return
make_
uchar4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
);
return
make_
float4
(
a
.
x
/
b
.
x
,
a
.
y
/
b
.
y
,
a
.
z
/
b
.
z
,
a
.
w
/
b
.
w
);
}
template
<
typename
T
>
static
__device__
uchar4
operator
*
(
T
s
,
const
uchar4
&
a
)
static
__device__
float4
operator
*
(
const
float4
&
a
,
float
s
)
{
return
a
*
s
;
return
make_float4
(
a
.
x
*
s
,
a
.
y
*
s
,
a
.
z
*
s
,
a
.
w
*
s
)
;
}
}
}
...
...
modules/gpu/src/cudastream.cpp
View file @
7d0feef7
...
...
@@ -163,7 +163,7 @@ void cv::gpu::Stream::waitForCompletion() { cudaSafeCall( cudaStreamSynchronize(
void
cv
::
gpu
::
Stream
::
enqueueDownload
(
const
GpuMat
&
src
,
Mat
&
dst
)
{
// if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert
(
src
.
cols
==
dst
.
cols
&&
src
.
rows
==
dst
.
rows
&&
src
.
type
()
==
dst
.
type
()
)
CV_Assert
(
src
.
cols
==
dst
.
cols
&&
src
.
rows
==
dst
.
rows
&&
src
.
type
()
==
dst
.
type
()
)
;
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToHost
);
}
void
cv
::
gpu
::
Stream
::
enqueueDownload
(
const
GpuMat
&
src
,
CudaMem
&
dst
)
{
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToHost
);
}
...
...
modules/gpu/src/filtering.cpp
View file @
7d0feef7
...
...
@@ -577,15 +577,31 @@ void cv::gpu::filter2D(const GpuMat& src, GpuMat& dst, int ddepth, const Mat& ke
namespace
cv
{
namespace
gpu
{
namespace
filters
{
void
linearRowFilter_gpu_32s32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32s32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32f32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32f32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32s32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32s32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32f32s
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32f32f
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_8u_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_8u_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_8s_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_8s_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_16u_16u_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_16u_16s_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_16s_16u_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_16s_16s_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32s_32s_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32s_32f_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32f_32s_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearRowFilter_gpu_32f_32f_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_8u_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_8u_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_8s_8u_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_8s_8s_c4
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_16u_16u_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_16u_16s_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_16s_16u_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_16s_16s_c2
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32s_32s_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32s_32f_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32f_32s_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
void
linearColumnFilter_gpu_32f_32f_c1
(
const
DevMem2D
&
src
,
const
DevMem2D
&
dst
,
const
float
kernel
[],
int
ksize
,
int
anchor
);
}}}
namespace
...
...
@@ -637,15 +653,15 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
static
const
nppFilter1D_t
nppFilter1D_callers
[]
=
{
0
,
nppiFilterRow_8u_C1R
,
0
,
0
,
nppiFilterRow_8u_C4R
};
static
const
gpuFilter1D_t
gpuFilter1D_callers
[
6
][
6
]
=
{
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
linearRowFilter_gpu_32s
32s
,
linearRowFilter_gpu_32s32f
},
{
0
,
0
,
0
,
0
,
linearRowFilter_gpu_32f
32s
,
linearRowFilter_gpu_32f32f
}
{
linearRowFilter_gpu_8u_8u_c4
,
linearRowFilter_gpu_8u_8s_c4
,
0
,
0
,
0
,
0
},
{
linearRowFilter_gpu_8s_8u_c4
,
linearRowFilter_gpu_8s_8s_c4
,
0
,
0
,
0
,
0
},
{
0
,
0
,
linearRowFilter_gpu_16u_16u_c2
,
linearRowFilter_gpu_16u_16s_c2
,
0
,
0
},
{
0
,
0
,
linearRowFilter_gpu_16s_16u_c2
,
linearRowFilter_gpu_16s_16s_c2
,
0
,
0
},
{
0
,
0
,
0
,
0
,
linearRowFilter_gpu_32s
_32s_c1
,
linearRowFilter_gpu_32s_32f_c1
},
{
0
,
0
,
0
,
0
,
linearRowFilter_gpu_32f
_32s_c1
,
linearRowFilter_gpu_32f_32f_c1
}
};
if
((
srcType
==
CV_8UC1
||
srcType
==
CV_8UC4
)
&&
bufType
==
srcType
)
if
((
bufType
==
srcType
)
&&
(
srcType
==
CV_8UC1
||
srcType
==
CV_8UC4
)
)
{
GpuMat
gpu_row_krnl
;
int
nDivisor
;
...
...
@@ -657,21 +673,19 @@ Ptr<BaseRowFilter_GPU> cv::gpu::getLinearRowFilter_GPU(int srcType, int bufType,
return
Ptr
<
BaseRowFilter_GPU
>
(
new
NppLinearRowFilter
(
ksize
,
anchor
,
gpu_row_krnl
,
nDivisor
,
nppFilter1D_callers
[
CV_MAT_CN
(
srcType
)]));
}
else
if
((
srcType
==
CV_32SC1
||
srcType
==
CV_32FC1
)
&&
(
bufType
==
CV_32SC1
||
bufType
==
CV_32FC1
))
{
Mat
temp
(
rowKernel
.
size
(),
CV_32FC1
);
rowKernel
.
convertTo
(
temp
,
CV_32FC1
);
Mat
cont_krnl
=
temp
.
reshape
(
1
,
1
);
int
ksize
=
cont_krnl
.
cols
;
normalizeAnchor
(
anchor
,
ksize
);
CV_Assert
(
srcType
==
CV_8UC4
||
srcType
==
CV_8SC4
||
srcType
==
CV_16UC2
||
srcType
==
CV_16SC2
||
srcType
==
CV_32SC1
||
srcType
==
CV_32FC1
)
;
CV_Assert
(
bufType
==
CV_8UC4
||
bufType
==
CV_8SC4
||
bufType
==
CV_16UC2
||
bufType
==
CV_16SC2
||
bufType
==
CV_32SC1
||
bufType
==
CV_32FC1
);
return
Ptr
<
BaseRowFilter_GPU
>
(
new
GpuLinearRowFilter
(
ksize
,
anchor
,
cont_krnl
,
gpuFilter1D_callers
[
CV_MAT_DEPTH
(
srcType
)][
CV_MAT_DEPTH
(
bufType
)])
);
}
Mat
temp
(
rowKernel
.
size
(),
CV_32FC1
);
rowKernel
.
convertTo
(
temp
,
CV_32FC1
);
Mat
cont_krnl
=
temp
.
reshape
(
1
,
1
);
CV_Assert
(
!
"Unsupported types"
);
return
Ptr
<
BaseRowFilter_GPU
>
(
0
);
int
ksize
=
cont_krnl
.
cols
;
normalizeAnchor
(
anchor
,
ksize
);
return
Ptr
<
BaseRowFilter_GPU
>
(
new
GpuLinearRowFilter
(
ksize
,
anchor
,
cont_krnl
,
gpuFilter1D_callers
[
CV_MAT_DEPTH
(
srcType
)][
CV_MAT_DEPTH
(
bufType
)]));
}
namespace
...
...
@@ -718,15 +732,18 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
static
const
nppFilter1D_t
nppFilter1D_callers
[]
=
{
0
,
nppiFilterColumn_8u_C1R
,
0
,
0
,
nppiFilterColumn_8u_C4R
};
static
const
gpuFilter1D_t
gpuFilter1D_callers
[
6
][
6
]
=
{
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
0
,
0
},
{
0
,
0
,
0
,
0
,
linearColumnFilter_gpu_32s
32s
,
linearColumnFilter_gpu_32s32f
},
{
0
,
0
,
0
,
0
,
linearColumnFilter_gpu_32f
32s
,
linearColumnFilter_gpu_32f32f
}
{
linearColumnFilter_gpu_8u_8u_c4
,
linearColumnFilter_gpu_8u_8s_c4
,
0
,
0
,
0
,
0
},
{
linearColumnFilter_gpu_8s_8u_c4
,
linearColumnFilter_gpu_8s_8s_c4
,
0
,
0
,
0
,
0
},
{
0
,
0
,
linearColumnFilter_gpu_16u_16u_c2
,
linearColumnFilter_gpu_16u_16s_c2
,
0
,
0
},
{
0
,
0
,
linearColumnFilter_gpu_16s_16u_c2
,
linearColumnFilter_gpu_16s_16s_c2
,
0
,
0
},
{
0
,
0
,
0
,
0
,
linearColumnFilter_gpu_32s
_32s_c1
,
linearColumnFilter_gpu_32s_32f_c1
},
{
0
,
0
,
0
,
0
,
linearColumnFilter_gpu_32f
_32s_c1
,
linearColumnFilter_gpu_32f_32f_c1
}
};
if
((
bufType
==
CV_8UC1
||
bufType
==
CV_8UC4
)
&&
dstType
==
bufType
)
double
kernelMin
;
minMaxLoc
(
columnKernel
,
&
kernelMin
);
if
((
bufType
==
dstType
)
&&
(
bufType
==
CV_8UC1
||
bufType
==
CV_8UC4
))
{
GpuMat
gpu_col_krnl
;
int
nDivisor
;
...
...
@@ -738,21 +755,19 @@ Ptr<BaseColumnFilter_GPU> cv::gpu::getLinearColumnFilter_GPU(int bufType, int ds
return
Ptr
<
BaseColumnFilter_GPU
>
(
new
NppLinearColumnFilter
(
ksize
,
anchor
,
gpu_col_krnl
,
nDivisor
,
nppFilter1D_callers
[
CV_MAT_CN
(
bufType
)]));
}
else
if
((
bufType
==
CV_32SC1
||
bufType
==
CV_32FC1
)
&&
(
dstType
==
CV_32SC1
||
dstType
==
CV_32FC1
))
{
Mat
temp
(
columnKernel
.
size
(),
CV_32FC1
);
columnKernel
.
convertTo
(
temp
,
CV_32FC1
);
Mat
cont_krnl
=
temp
.
reshape
(
1
,
1
);
int
ksize
=
cont_krnl
.
cols
;
normalizeAnchor
(
anchor
,
ksize
);
CV_Assert
(
dstType
==
CV_8UC4
||
dstType
==
CV_8SC4
||
dstType
==
CV_16UC2
||
dstType
==
CV_16SC2
||
dstType
==
CV_32SC1
||
dstType
==
CV_32FC1
)
;
CV_Assert
(
bufType
==
CV_8UC4
||
bufType
==
CV_8SC4
||
bufType
==
CV_16UC2
||
bufType
==
CV_16SC2
||
bufType
==
CV_32SC1
||
bufType
==
CV_32FC1
);
return
Ptr
<
BaseColumnFilter_GPU
>
(
new
GpuLinearColumnFilter
(
ksize
,
anchor
,
cont_krnl
,
gpuFilter1D_callers
[
CV_MAT_DEPTH
(
bufType
)][
CV_MAT_DEPTH
(
dstType
)]));
}
Mat
temp
(
columnKernel
.
size
(),
CV_32FC1
);
columnKernel
.
convertTo
(
temp
,
CV_32FC1
);
Mat
cont_krnl
=
temp
.
reshape
(
1
,
1
);
int
ksize
=
cont_krnl
.
cols
;
normalizeAnchor
(
anchor
,
ksize
);
CV_Assert
(
!
"Unsupported types"
);
return
Ptr
<
BaseColumnFilter_GPU
>
(
0
);
return
Ptr
<
BaseColumnFilter_GPU
>
(
new
GpuLinearColumnFilter
(
ksize
,
anchor
,
cont_krnl
,
gpuFilter1D_callers
[
CV_MAT_DEPTH
(
bufType
)][
CV_MAT_DEPTH
(
dstType
)])
);
}
Ptr
<
FilterEngine_GPU
>
cv
::
gpu
::
createSeparableLinearFilter_GPU
(
int
srcType
,
int
dstType
,
const
Mat
&
rowKernel
,
const
Mat
&
columnKernel
,
...
...
modules/gpu/src/imgproc_gpu.cpp
View file @
7d0feef7
...
...
@@ -652,7 +652,7 @@ void cv::gpu::cvtColor(const GpuMat& src, GpuMat& dst, int code, int dcn, const
double
cv
::
gpu
::
threshold
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
thresh
)
{
CV_Assert
(
src
.
type
()
==
CV_32FC1
)
CV_Assert
(
src
.
type
()
==
CV_32FC1
)
;
dst
.
create
(
src
.
size
(),
src
.
type
()
);
...
...
tests/gpu/src/filters.cpp
View file @
7d0feef7
...
...
@@ -166,6 +166,8 @@ struct CV_GpuNppImageSobelTest : public CV_GpuNppFilterTest
int
test
(
const
Mat
&
img
)
{
if
(
img
.
type
()
!=
CV_8UC1
)
return
CvTS
::
OK
;
int
ksizes
[]
=
{
3
,
5
,
7
};
int
ksizes_num
=
sizeof
(
ksizes
)
/
sizeof
(
int
);
...
...
@@ -181,8 +183,10 @@ struct CV_GpuNppImageSobelTest : public CV_GpuNppFilterTest
cv
::
Sobel
(
img
,
cpudst
,
-
1
,
dx
,
dy
,
ksizes
[
i
]);
GpuMat
gpu1
(
img
);
gpu1
.
convertTo
(
gpu1
,
CV_32S
);
GpuMat
gpudst
;
cv
::
gpu
::
Sobel
(
gpu1
,
gpudst
,
-
1
,
dx
,
dy
,
ksizes
[
i
]);
gpudst
.
convertTo
(
gpudst
,
CV_8U
);
if
(
CheckNorm
(
cpudst
,
gpudst
,
Size
(
ksizes
[
i
],
ksizes
[
i
]))
!=
CvTS
::
OK
)
test_res
=
CvTS
::
FAIL_GENERIC
;
...
...
@@ -200,15 +204,20 @@ struct CV_GpuNppImageScharrTest : public CV_GpuNppFilterTest
int
test
(
const
Mat
&
img
)
{
if
(
img
.
type
()
!=
CV_8UC1
)
return
CvTS
::
OK
;
int
dx
=
1
,
dy
=
0
;
Mat
cpudst
;
cv
::
Scharr
(
img
,
cpudst
,
-
1
,
dx
,
dy
);
GpuMat
gpu1
(
img
);
gpu1
.
convertTo
(
gpu1
,
CV_32S
);
GpuMat
gpudst
;
cv
::
gpu
::
Scharr
(
gpu1
,
gpudst
,
-
1
,
dx
,
dy
);
gpudst
.
convertTo
(
gpudst
,
CV_8U
);
return
CheckNorm
(
cpudst
,
gpudst
,
Size
(
3
,
3
));
}
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment