Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ef9917ec
Commit
ef9917ec
authored
Jul 29, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used new device layer for cv::gpu::compare
parent
43e81111
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
241 additions
and
425 deletions
+241
-425
cmp_mat.cu
modules/cudaarithm/src/cuda/cmp_mat.cu
+122
-109
cmp_scalar.cu
modules/cudaarithm/src/cuda/cmp_scalar.cu
+116
-175
element_operations.cpp
modules/cudaarithm/src/element_operations.cpp
+2
-140
vec_traits.hpp
modules/cudev/include/opencv2/cudev/util/vec_traits.hpp
+1
-1
No files found.
modules/cudaarithm/src/cuda/cmp_mat.cu
View file @
ef9917ec
...
...
@@ -40,30 +40,54 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace arithm
#include "opencv2/cudev.hpp"
using namespace cv::cudev;
void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop);
namespace
{
template <class Op, typename T> struct CmpOp : binary_function<T, T, uchar>
{
__device__ __forceinline__ uchar operator()(T a, T b) const
{
Op op;
return -op(a, b);
}
};
template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
{
};
template <> struct TransformPolicy<double> : DefaultTransformPolicy
{
enum {
shift = 1
};
};
template <template <typename> class Op, typename T>
void cmpMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
CmpOp<Op<T>, T> op;
gridTransformBinary_< TransformPolicy<T> >(globPtr<T>(src1), globPtr<T>(src2), globPtr<uchar>(dst), op, stream);
}
struct VCmpEq4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return vcmpeq4(a, b);
}
__host__ __device__ __forceinline__ VCmpEq4() {}
__host__ __device__ __forceinline__ VCmpEq4(const VCmpEq4&) {}
};
struct VCmpNe4 : binary_function<uint, uint, uint>
{
...
...
@@ -71,9 +95,6 @@ namespace arithm
{
return vcmpne4(a, b);
}
__host__ __device__ __forceinline__ VCmpNe4() {}
__host__ __device__ __forceinline__ VCmpNe4(const VCmpNe4&) {}
};
struct VCmpLt4 : binary_function<uint, uint, uint>
{
...
...
@@ -81,9 +102,6 @@ namespace arithm
{
return vcmplt4(a, b);
}
__host__ __device__ __forceinline__ VCmpLt4() {}
__host__ __device__ __forceinline__ VCmpLt4(const VCmpLt4&) {}
};
struct VCmpLe4 : binary_function<uint, uint, uint>
{
...
...
@@ -91,116 +109,111 @@ namespace arithm
{
return vcmple4(a, b);
}
__host__ __device__ __forceinline__ VCmpLe4() {}
__host__ __device__ __forceinline__ VCmpLe4(const VCmpLe4&) {}
};
template <class Op, typename T>
struct Cmp : binary_function<T, T, uchar>
{
__device__ __forceinline__ uchar operator()(T a, T b) const
{
Op op;
return -op(a, b);
}
};
}
namespace cv { namespace cuda { namespace device
{
template <> struct TransformFunctorTraits< arithm::VCmpEq4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
template <> struct TransformFunctorTraits< arithm::VCmpNe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
template <> struct TransformFunctorTraits< arithm::VCmpLt4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
template <> struct TransformFunctorTraits< arithm::VCmpLe4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
void cmpMatEq_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
}
;
const int vcols = src1.cols >> 2
;
template <class Op, typename T> struct TransformFunctorTraits< arithm::Cmp<Op, T> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
{
};
}}}
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
namespace arithm
{
void cmpMatEq_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
device::transform(src1, src2, dst, VCmpEq4(), WithOutMask(), stream);
gridTransformBinary(src1_, src2_, dst_, VCmpEq4(), stream);
}
void cmpMatNe_v4(
PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t
stream)
void cmpMatNe_v4(
const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream&
stream)
{
device::transform(src1, src2, dst, VCmpNe4(), WithOutMask(), stream);
const int vcols = src1.cols >> 2;
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
gridTransformBinary(src1_, src2_, dst_, VCmpNe4(), stream);
}
void cmpMatLt_v4(
PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t
stream)
void cmpMatLt_v4(
const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream&
stream)
{
device::transform(src1, src2, dst, VCmpLt4(), WithOutMask(), stream);
const int vcols = src1.cols >> 2;
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
gridTransformBinary(src1_, src2_, dst_, VCmpLt4(), stream);
}
void cmpMatLe_v4(
PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t
stream)
void cmpMatLe_v4(
const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream&
stream)
{
device::transform(src1, src2, dst, VCmpLe4(), WithOutMask(), stream);
const int vcols = src1.cols >> 2;
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
gridTransformBinary(src1_, src2_, dst_, VCmpLe4(), stream);
}
}
template <template <typename> class Op, typename T>
void cmpMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
void cmpMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop)
{
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
static const func_t funcs[7][4] =
{
{cmpMat_v1<equal_to, uchar> , cmpMat_v1<not_equal_to, uchar> , cmpMat_v1<less, uchar> , cmpMat_v1<less_equal, uchar> },
{cmpMat_v1<equal_to, schar> , cmpMat_v1<not_equal_to, schar> , cmpMat_v1<less, schar> , cmpMat_v1<less_equal, schar> },
{cmpMat_v1<equal_to, ushort>, cmpMat_v1<not_equal_to, ushort>, cmpMat_v1<less, ushort>, cmpMat_v1<less_equal, ushort>},
{cmpMat_v1<equal_to, short> , cmpMat_v1<not_equal_to, short> , cmpMat_v1<less, short> , cmpMat_v1<less_equal, short> },
{cmpMat_v1<equal_to, int> , cmpMat_v1<not_equal_to, int> , cmpMat_v1<less, int> , cmpMat_v1<less_equal, int> },
{cmpMat_v1<equal_to, float> , cmpMat_v1<not_equal_to, float> , cmpMat_v1<less, float> , cmpMat_v1<less_equal, float> },
{cmpMat_v1<equal_to, double>, cmpMat_v1<not_equal_to, double>, cmpMat_v1<less, double>, cmpMat_v1<less_equal, double>}
};
typedef void (*func_v4_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
static const func_v4_t funcs_v4[] =
{
Cmp<Op<T>, T> op;
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, dst, op, WithOutMask(), stream);
}
cmpMatEq_v4, cmpMatNe_v4, cmpMatLt_v4, cmpMatLe_v4
};
const int depth = src1.depth();
template <typename T> void cmpMatEq(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
CV_DbgAssert( depth <= CV_64F );
static const int codes[] =
{
cmpMat<equal_to, T>(src1, src2, dst, stream);
}
template <typename T> void cmpMatNe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
0, 2, 3, 2, 3, 1
}
;
const GpuMat* psrc1[] =
{
cmpMat<not_equal_to, T>(src1, src2, dst, stream);
}
template <typename T> void cmpMatLt(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
&src1, &src2, &src2, &src1, &src1, &src1
}
;
const GpuMat* psrc2[] =
{
cmpMat<less, T>(src1, src2, dst, stream);
}
template <typename T> void cmpMatLe(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream)
&src2, &src1, &src1, &src2, &src2, &src2
};
const int code = codes[cmpop];
GpuMat src1_ = psrc1[cmpop]->reshape(1);
GpuMat src2_ = psrc2[cmpop]->reshape(1);
GpuMat dst_ = dst.reshape(1);
if (depth == CV_8U && (src1_.cols & 3) == 0)
{
cmpMat<less_equal, T>(src1, src2, dst, stream);
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
if (isAllAligned)
{
funcs_v4[code](src1_, src2_, dst_, stream);
return;
}
}
template void cmpMatEq<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatEq<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatEq<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatEq<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatEq<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatEq<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatEq<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatNe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatNe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatNe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatNe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatNe<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatNe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatNe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLt<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLt<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLt<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLt<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLt<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLt<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLt<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLe<uchar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLe<schar >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLe<ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLe<short >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLe<int >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLe<float >(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
template void cmpMatLe<double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, cudaStream_t stream);
const func_t func = funcs[depth][code];
func(src1_, src2_, dst_, stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/cuda/cmp_scalar.cu
View file @
ef9917ec
...
...
@@ -40,24 +40,23 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#include "opencv2/core/cuda/vec_math.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace arithm
#include "opencv2/cudev.hpp"
using namespace cv::cudev;
void cmpScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop);
namespace
{
template <class Op, typename T>
struct Cmp : binary_function<T, T, uchar>
template <class Op, typename T> struct CmpOp : binary_function<T, T, uchar>
{
__device__ __forceinline__ uchar operator()(T a, T b) const
{
...
...
@@ -66,219 +65,161 @@ namespace arithm
}
};
#define TYPE_VEC(type, cn) typename TypeVec<type, cn>::vec_type
#define MAKE_VEC(_type, _cn) typename MakeVec<_type, _cn>::type
template <class Op, typename T, int cn> struct CmpScalarOp;
template <class Op, typename T, int cn> struct CmpScalar;
template <class Op, typename T>
struct CmpScalar<Op, T, 1> : unary_function<T, uchar>
struct CmpScalar
Op
<Op, T, 1> : unary_function<T, uchar>
{
T val;
__host__ explicit CmpScalar(T val_) : val(val_) {}
__device__ __forceinline__ uchar operator()(T src) const
{
Cmp<Op, T> op;
Cmp
Op
<Op, T> op;
return op(src, val);
}
};
template <class Op, typename T>
struct CmpScalar
<Op, T, 2> : unary_function<TYPE_VEC(T, 2), TYP
E_VEC(uchar, 2)>
struct CmpScalar
Op<Op, T, 2> : unary_function<MAKE_VEC(T, 2), MAK
E_VEC(uchar, 2)>
{
TYP
E_VEC(T, 2) val;
MAK
E_VEC(T, 2) val;
__host__ explicit CmpScalar(TYPE_VEC(T, 2) val_) : val(val_) {}
__device__ __forceinline__ TYPE_VEC(uchar, 2) operator()(const TYPE_VEC(T, 2) & src) const
__device__ __forceinline__ MAKE_VEC(uchar, 2) operator()(const MAKE_VEC(T, 2) & src) const
{
Cmp<Op, T> op;
return VecTraits<
TYP
E_VEC(uchar, 2)>::make(op(src.x, val.x), op(src.y, val.y));
Cmp
Op
<Op, T> op;
return VecTraits<
MAK
E_VEC(uchar, 2)>::make(op(src.x, val.x), op(src.y, val.y));
}
};
template <class Op, typename T>
struct CmpScalar
<Op, T, 3> : unary_function<TYPE_VEC(T, 3), TYP
E_VEC(uchar, 3)>
struct CmpScalar
Op<Op, T, 3> : unary_function<MAKE_VEC(T, 3), MAK
E_VEC(uchar, 3)>
{
TYPE_VEC(T, 3) val;
__host__ explicit CmpScalar(TYPE_VEC(T, 3) val_) : val(val_) {}
MAKE_VEC(T, 3) val;
__device__ __forceinline__
TYPE_VEC(uchar, 3) operator()(const TYP
E_VEC(T, 3) & src) const
__device__ __forceinline__
MAKE_VEC(uchar, 3) operator()(const MAK
E_VEC(T, 3) & src) const
{
Cmp<Op, T> op;
return VecTraits<
TYP
E_VEC(uchar, 3)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z));
Cmp
Op
<Op, T> op;
return VecTraits<
MAK
E_VEC(uchar, 3)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z));
}
};
template <class Op, typename T>
struct CmpScalar
<Op, T, 4> : unary_function<TYPE_VEC(T, 4), TYP
E_VEC(uchar, 4)>
struct CmpScalar
Op<Op, T, 4> : unary_function<MAKE_VEC(T, 4), MAK
E_VEC(uchar, 4)>
{
TYPE_VEC(T, 4) val;
__host__ explicit CmpScalar(TYPE_VEC(T, 4) val_) : val(val_) {}
MAKE_VEC(T, 4) val;
__device__ __forceinline__
TYPE_VEC(uchar, 4) operator()(const TYP
E_VEC(T, 4) & src) const
__device__ __forceinline__
MAKE_VEC(uchar, 4) operator()(const MAK
E_VEC(T, 4) & src) const
{
Cmp<Op, T> op;
return VecTraits<
TYP
E_VEC(uchar, 4)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z), op(src.w, val.w));
Cmp
Op
<Op, T> op;
return VecTraits<
MAK
E_VEC(uchar, 4)>::make(op(src.x, val.x), op(src.y, val.y), op(src.z, val.z), op(src.w, val.w));
}
};
#undef TYPE_VEC
}
namespace cv { namespace cuda { namespace device
{
template <class Op, typename T> struct TransformFunctorTraits< arithm::CmpScalar<Op, T, 1> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(uchar)>
template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
{
};
template <> struct TransformPolicy<double> : DefaultTransformPolicy
{
enum {
shift = 1
};
};
}}}
namespace arithm
{
template <template <typename> class Op, typename T, int cn>
void cmpScalar
(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t
stream)
void cmpScalar
Impl(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream&
stream)
{
typedef typename TypeVec<T, cn>::vec_type src_t;
typedef typename TypeVec<uchar, cn>::vec_type dst_t;
typedef typename MakeVec<T, cn>::type src_type;
typedef typename MakeVec<uchar, cn>::type dst_type;
cv::Scalar_<T> value_ = value;
T sval[] = {static_cast<T>(val[0]), static_cast<T>(val[1]), static_cast<T>(val[2]), static_cast<T>(val[3])}
;
src_t val1 = VecTraits<src_t>::make(s
val);
CmpScalarOp<Op<T>, T, cn> op
;
op.val = VecTraits<src_type>::make(value_.
val);
CmpScalar<Op<T>, T, cn> op(val1);
device::transform((PtrStepSz<src_t>) src, (PtrStepSz<dst_t>) dst, op, WithOutMask(), stream);
gridTransformUnary_< TransformPolicy<T> >(globPtr<src_type>(src), globPtr<dst_type>(dst), op, stream);
}
}
template <typename T> void cmpScalarEq(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
void cmpScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat&, double, Stream& stream, int cmpop)
{
typedef void (*func_t)(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream);
static const func_t funcs[7][6][4] =
{
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[] =
{
0,
cmpScalar<equal_to, T, 1>,
cmpScalar<equal_to, T, 2>,
cmpScalar<equal_to, T, 3>,
cmpScalar<equal_to, T, 4>
};
funcs[cn](src, val, dst, stream);
}
template <typename T> void cmpScalarNe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
{
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[] =
{cmpScalarImpl<equal_to, uchar, 1>, cmpScalarImpl<equal_to, uchar, 2>, cmpScalarImpl<equal_to, uchar, 3>, cmpScalarImpl<equal_to, uchar, 4>},
{cmpScalarImpl<greater, uchar, 1>, cmpScalarImpl<greater, uchar, 2>, cmpScalarImpl<greater, uchar, 3>, cmpScalarImpl<greater, uchar, 4>},
{cmpScalarImpl<greater_equal, uchar, 1>, cmpScalarImpl<greater_equal, uchar, 2>, cmpScalarImpl<greater_equal, uchar, 3>, cmpScalarImpl<greater_equal, uchar, 4>},
{cmpScalarImpl<less, uchar, 1>, cmpScalarImpl<less, uchar, 2>, cmpScalarImpl<less, uchar, 3>, cmpScalarImpl<less, uchar, 4>},
{cmpScalarImpl<less_equal, uchar, 1>, cmpScalarImpl<less_equal, uchar, 2>, cmpScalarImpl<less_equal, uchar, 3>, cmpScalarImpl<less_equal, uchar, 4>},
{cmpScalarImpl<not_equal_to, uchar, 1>, cmpScalarImpl<not_equal_to, uchar, 2>, cmpScalarImpl<not_equal_to, uchar, 3>, cmpScalarImpl<not_equal_to, uchar, 4>}
},
{
0,
cmpScalar<not_equal_to, T, 1>,
cmpScalar<not_equal_to, T, 2>,
cmpScalar<not_equal_to, T, 3>,
cmpScalar<not_equal_to, T, 4>
};
funcs[cn](src, val, dst, stream);
}
template <typename T> void cmpScalarLt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
{
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[] =
{cmpScalarImpl<equal_to, schar, 1>, cmpScalarImpl<equal_to, schar, 2>, cmpScalarImpl<equal_to, schar, 3>, cmpScalarImpl<equal_to, schar, 4>},
{cmpScalarImpl<greater, schar, 1>, cmpScalarImpl<greater, schar, 2>, cmpScalarImpl<greater, schar, 3>, cmpScalarImpl<greater, schar, 4>},
{cmpScalarImpl<greater_equal, schar, 1>, cmpScalarImpl<greater_equal, schar, 2>, cmpScalarImpl<greater_equal, schar, 3>, cmpScalarImpl<greater_equal, schar, 4>},
{cmpScalarImpl<less, schar, 1>, cmpScalarImpl<less, schar, 2>, cmpScalarImpl<less, schar, 3>, cmpScalarImpl<less, schar, 4>},
{cmpScalarImpl<less_equal, schar, 1>, cmpScalarImpl<less_equal, schar, 2>, cmpScalarImpl<less_equal, schar, 3>, cmpScalarImpl<less_equal, schar, 4>},
{cmpScalarImpl<not_equal_to, schar, 1>, cmpScalarImpl<not_equal_to, schar, 2>, cmpScalarImpl<not_equal_to, schar, 3>, cmpScalarImpl<not_equal_to, schar, 4>}
},
{
0,
cmpScalar<less, T, 1>,
cmpScalar<less, T, 2>,
cmpScalar<less, T, 3>,
cmpScalar<less, T, 4>
};
funcs[cn](src, val, dst, stream);
}
template <typename T> void cmpScalarLe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
{
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[] =
{cmpScalarImpl<equal_to, ushort, 1>, cmpScalarImpl<equal_to, ushort, 2>, cmpScalarImpl<equal_to, ushort, 3>, cmpScalarImpl<equal_to, ushort, 4>},
{cmpScalarImpl<greater, ushort, 1>, cmpScalarImpl<greater, ushort, 2>, cmpScalarImpl<greater, ushort, 3>, cmpScalarImpl<greater, ushort, 4>},
{cmpScalarImpl<greater_equal, ushort, 1>, cmpScalarImpl<greater_equal, ushort, 2>, cmpScalarImpl<greater_equal, ushort, 3>, cmpScalarImpl<greater_equal, ushort, 4>},
{cmpScalarImpl<less, ushort, 1>, cmpScalarImpl<less, ushort, 2>, cmpScalarImpl<less, ushort, 3>, cmpScalarImpl<less, ushort, 4>},
{cmpScalarImpl<less_equal, ushort, 1>, cmpScalarImpl<less_equal, ushort, 2>, cmpScalarImpl<less_equal, ushort, 3>, cmpScalarImpl<less_equal, ushort, 4>},
{cmpScalarImpl<not_equal_to, ushort, 1>, cmpScalarImpl<not_equal_to, ushort, 2>, cmpScalarImpl<not_equal_to, ushort, 3>, cmpScalarImpl<not_equal_to, ushort, 4>}
},
{
0,
cmpScalar<less_equal, T, 1>,
cmpScalar<less_equal, T, 2>,
cmpScalar<less_equal, T, 3>,
cmpScalar<less_equal, T, 4>
};
funcs[cn](src, val, dst, stream);
}
template <typename T> void cmpScalarGt(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
{
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[] =
{cmpScalarImpl<equal_to, short, 1>, cmpScalarImpl<equal_to, short, 2>, cmpScalarImpl<equal_to, short, 3>, cmpScalarImpl<equal_to, short, 4>},
{cmpScalarImpl<greater, short, 1>, cmpScalarImpl<greater, short, 2>, cmpScalarImpl<greater, short, 3>, cmpScalarImpl<greater, short, 4>},
{cmpScalarImpl<greater_equal, short, 1>, cmpScalarImpl<greater_equal, short, 2>, cmpScalarImpl<greater_equal, short, 3>, cmpScalarImpl<greater_equal, short, 4>},
{cmpScalarImpl<less, short, 1>, cmpScalarImpl<less, short, 2>, cmpScalarImpl<less, short, 3>, cmpScalarImpl<less, short, 4>},
{cmpScalarImpl<less_equal, short, 1>, cmpScalarImpl<less_equal, short, 2>, cmpScalarImpl<less_equal, short, 3>, cmpScalarImpl<less_equal, short, 4>},
{cmpScalarImpl<not_equal_to, short, 1>, cmpScalarImpl<not_equal_to, short, 2>, cmpScalarImpl<not_equal_to, short, 3>, cmpScalarImpl<not_equal_to, short, 4>}
},
{
0,
cmpScalar<greater, T, 1>,
cmpScalar<greater, T, 2>,
cmpScalar<greater, T, 3>,
cmpScalar<greater, T, 4>
};
funcs[cn](src, val, dst, stream);
}
template <typename T> void cmpScalarGe(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream)
{
typedef void (*func_t)(PtrStepSzb src, double val[4], PtrStepSzb dst, cudaStream_t stream);
static const func_t funcs[] =
{cmpScalarImpl<equal_to, int, 1>, cmpScalarImpl<equal_to, int, 2>, cmpScalarImpl<equal_to, int, 3>, cmpScalarImpl<equal_to, int, 4>},
{cmpScalarImpl<greater, int, 1>, cmpScalarImpl<greater, int, 2>, cmpScalarImpl<greater, int, 3>, cmpScalarImpl<greater, int, 4>},
{cmpScalarImpl<greater_equal, int, 1>, cmpScalarImpl<greater_equal, int, 2>, cmpScalarImpl<greater_equal, int, 3>, cmpScalarImpl<greater_equal, int, 4>},
{cmpScalarImpl<less, int, 1>, cmpScalarImpl<less, int, 2>, cmpScalarImpl<less, int, 3>, cmpScalarImpl<less, int, 4>},
{cmpScalarImpl<less_equal, int, 1>, cmpScalarImpl<less_equal, int, 2>, cmpScalarImpl<less_equal, int, 3>, cmpScalarImpl<less_equal, int, 4>},
{cmpScalarImpl<not_equal_to, int, 1>, cmpScalarImpl<not_equal_to, int, 2>, cmpScalarImpl<not_equal_to, int, 3>, cmpScalarImpl<not_equal_to, int, 4>}
},
{
0,
cmpScalar<greater_equal, T, 1>,
cmpScalar<greater_equal, T, 2>,
cmpScalar<greater_equal, T, 3>,
cmpScalar<greater_equal, T, 4>
};
{cmpScalarImpl<equal_to, float, 1>, cmpScalarImpl<equal_to, float, 2>, cmpScalarImpl<equal_to, float, 3>, cmpScalarImpl<equal_to, float, 4>},
{cmpScalarImpl<greater, float, 1>, cmpScalarImpl<greater, float, 2>, cmpScalarImpl<greater, float, 3>, cmpScalarImpl<greater, float, 4>},
{cmpScalarImpl<greater_equal, float, 1>, cmpScalarImpl<greater_equal, float, 2>, cmpScalarImpl<greater_equal, float, 3>, cmpScalarImpl<greater_equal, float, 4>},
{cmpScalarImpl<less, float, 1>, cmpScalarImpl<less, float, 2>, cmpScalarImpl<less, float, 3>, cmpScalarImpl<less, float, 4>},
{cmpScalarImpl<less_equal, float, 1>, cmpScalarImpl<less_equal, float, 2>, cmpScalarImpl<less_equal, float, 3>, cmpScalarImpl<less_equal, float, 4>},
{cmpScalarImpl<not_equal_to, float, 1>, cmpScalarImpl<not_equal_to, float, 2>, cmpScalarImpl<not_equal_to, float, 3>, cmpScalarImpl<not_equal_to, float, 4>}
},
{
{cmpScalarImpl<equal_to, double, 1>, cmpScalarImpl<equal_to, double, 2>, cmpScalarImpl<equal_to, double, 3>, cmpScalarImpl<equal_to, double, 4>},
{cmpScalarImpl<greater, double, 1>, cmpScalarImpl<greater, double, 2>, cmpScalarImpl<greater, double, 3>, cmpScalarImpl<greater, double, 4>},
{cmpScalarImpl<greater_equal, double, 1>, cmpScalarImpl<greater_equal, double, 2>, cmpScalarImpl<greater_equal, double, 3>, cmpScalarImpl<greater_equal, double, 4>},
{cmpScalarImpl<less, double, 1>, cmpScalarImpl<less, double, 2>, cmpScalarImpl<less, double, 3>, cmpScalarImpl<less, double, 4>},
{cmpScalarImpl<less_equal, double, 1>, cmpScalarImpl<less_equal, double, 2>, cmpScalarImpl<less_equal, double, 3>, cmpScalarImpl<less_equal, double, 4>},
{cmpScalarImpl<not_equal_to, double, 1>, cmpScalarImpl<not_equal_to, double, 2>, cmpScalarImpl<not_equal_to, double, 3>, cmpScalarImpl<not_equal_to, double, 4>}
}
};
funcs[cn](src, val, dst, stream);
if (inv)
{
// src1 is a scalar; swap it with src2
cmpop = cmpop == cv::CMP_LT ? cv::CMP_GT : cmpop == cv::CMP_LE ? cv::CMP_GE :
cmpop == cv::CMP_GE ? cv::CMP_LE : cmpop == cv::CMP_GT ? cv::CMP_LT : cmpop;
}
template void cmpScalarEq<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarEq<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarEq<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarEq<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarEq<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarEq<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarEq<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarNe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarNe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarNe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarNe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarNe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarNe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarNe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLt<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarLe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
const int depth = src.depth();
const int cn = src.channels();
template void cmpScalarGt<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarGt<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarGt<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarGt<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarGt<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarGt<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarGt<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
CV_DbgAssert( depth <= CV_64F && cn <= 4 );
template void cmpScalarGe<uchar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarGe<schar >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarGe<ushort>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarGe<short >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarGe<int >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarGe<float >(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
template void cmpScalarGe<double>(PtrStepSzb src, int cn, double val[4], PtrStepSzb dst, cudaStream_t stream);
funcs[depth][cmpop][cn - 1](src, val, dst, stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/element_operations.cpp
View file @
ef9917ec
...
...
@@ -454,147 +454,9 @@ void cv::cuda::absdiff(InputArray src1, InputArray src2, OutputArray dst, Stream
//////////////////////////////////////////////////////////////////////////////
// compare
namespace
arithm
{
void
cmpMatEq_v4
(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
);
void
cmpMatNe_v4
(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
);
void
cmpMatLt_v4
(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
);
void
cmpMatLe_v4
(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpMatEq
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpMatNe
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpMatLt
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpMatLe
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
}
static
void
cmpMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
_stream
,
int
cmpop
)
{
using
namespace
arithm
;
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
4
]
=
{
{
cmpMatEq
<
unsigned
char
>
,
cmpMatNe
<
unsigned
char
>
,
cmpMatLt
<
unsigned
char
>
,
cmpMatLe
<
unsigned
char
>
},
{
cmpMatEq
<
signed
char
>
,
cmpMatNe
<
signed
char
>
,
cmpMatLt
<
signed
char
>
,
cmpMatLe
<
signed
char
>
},
{
cmpMatEq
<
unsigned
short
>
,
cmpMatNe
<
unsigned
short
>
,
cmpMatLt
<
unsigned
short
>
,
cmpMatLe
<
unsigned
short
>
},
{
cmpMatEq
<
short
>
,
cmpMatNe
<
short
>
,
cmpMatLt
<
short
>
,
cmpMatLe
<
short
>
},
{
cmpMatEq
<
int
>
,
cmpMatNe
<
int
>
,
cmpMatLt
<
int
>
,
cmpMatLe
<
int
>
},
{
cmpMatEq
<
float
>
,
cmpMatNe
<
float
>
,
cmpMatLt
<
float
>
,
cmpMatLe
<
float
>
},
{
cmpMatEq
<
double
>
,
cmpMatNe
<
double
>
,
cmpMatLt
<
double
>
,
cmpMatLe
<
double
>
}
};
typedef
void
(
*
func_v4_t
)(
PtrStepSz
<
uint
>
src1
,
PtrStepSz
<
uint
>
src2
,
PtrStepSz
<
uint
>
dst
,
cudaStream_t
stream
);
static
const
func_v4_t
funcs_v4
[]
=
{
cmpMatEq_v4
,
cmpMatNe_v4
,
cmpMatLt_v4
,
cmpMatLe_v4
};
const
int
depth
=
src1
.
depth
();
const
int
cn
=
src1
.
channels
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
static
const
int
codes
[]
=
{
0
,
2
,
3
,
2
,
3
,
1
};
const
GpuMat
*
psrc1
[]
=
{
&
src1
,
&
src2
,
&
src2
,
&
src1
,
&
src1
,
&
src1
};
const
GpuMat
*
psrc2
[]
=
{
&
src2
,
&
src1
,
&
src1
,
&
src2
,
&
src2
,
&
src2
};
const
int
code
=
codes
[
cmpop
];
PtrStepSzb
src1_
(
src1
.
rows
,
src1
.
cols
*
cn
,
psrc1
[
cmpop
]
->
data
,
psrc1
[
cmpop
]
->
step
);
PtrStepSzb
src2_
(
src1
.
rows
,
src1
.
cols
*
cn
,
psrc2
[
cmpop
]
->
data
,
psrc2
[
cmpop
]
->
step
);
PtrStepSzb
dst_
(
src1
.
rows
,
src1
.
cols
*
cn
,
dst
.
data
,
dst
.
step
);
if
(
depth
==
CV_8U
&&
(
src1_
.
cols
&
3
)
==
0
)
{
const
intptr_t
src1ptr
=
reinterpret_cast
<
intptr_t
>
(
src1_
.
data
);
const
intptr_t
src2ptr
=
reinterpret_cast
<
intptr_t
>
(
src2_
.
data
);
const
intptr_t
dstptr
=
reinterpret_cast
<
intptr_t
>
(
dst_
.
data
);
const
bool
isAllAligned
=
(
src1ptr
&
31
)
==
0
&&
(
src2ptr
&
31
)
==
0
&&
(
dstptr
&
31
)
==
0
;
if
(
isAllAligned
)
{
const
int
vcols
=
src1_
.
cols
>>
2
;
void
cmpMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
stream
,
int
cmpop
);
funcs_v4
[
code
](
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src1_
.
data
,
src1_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src2_
.
data
,
src2_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
dst_
.
data
,
dst_
.
step
),
stream
);
return
;
}
}
const
func_t
func
=
funcs
[
depth
][
code
];
func
(
src1_
,
src2_
,
dst_
,
stream
);
}
namespace
arithm
{
template
<
typename
T
>
void
cmpScalarEq
(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpScalarNe
(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpScalarLt
(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpScalarLe
(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpScalarGt
(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
cmpScalarGe
(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
}
namespace
{
template
<
typename
T
>
void
castScalar
(
Scalar
&
sc
)
{
sc
.
val
[
0
]
=
saturate_cast
<
T
>
(
sc
.
val
[
0
]);
sc
.
val
[
1
]
=
saturate_cast
<
T
>
(
sc
.
val
[
1
]);
sc
.
val
[
2
]
=
saturate_cast
<
T
>
(
sc
.
val
[
2
]);
sc
.
val
[
3
]
=
saturate_cast
<
T
>
(
sc
.
val
[
3
]);
}
}
static
void
cmpScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
inv
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
stream
,
int
cmpop
)
{
using
namespace
arithm
;
typedef
void
(
*
func_t
)(
PtrStepSzb
src
,
int
cn
,
double
val
[
4
],
PtrStepSzb
dst
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
6
]
=
{
{
cmpScalarEq
<
unsigned
char
>
,
cmpScalarGt
<
unsigned
char
>
,
cmpScalarGe
<
unsigned
char
>
,
cmpScalarLt
<
unsigned
char
>
,
cmpScalarLe
<
unsigned
char
>
,
cmpScalarNe
<
unsigned
char
>
},
{
cmpScalarEq
<
signed
char
>
,
cmpScalarGt
<
signed
char
>
,
cmpScalarGe
<
signed
char
>
,
cmpScalarLt
<
signed
char
>
,
cmpScalarLe
<
signed
char
>
,
cmpScalarNe
<
signed
char
>
},
{
cmpScalarEq
<
unsigned
short
>
,
cmpScalarGt
<
unsigned
short
>
,
cmpScalarGe
<
unsigned
short
>
,
cmpScalarLt
<
unsigned
short
>
,
cmpScalarLe
<
unsigned
short
>
,
cmpScalarNe
<
unsigned
short
>
},
{
cmpScalarEq
<
short
>
,
cmpScalarGt
<
short
>
,
cmpScalarGe
<
short
>
,
cmpScalarLt
<
short
>
,
cmpScalarLe
<
short
>
,
cmpScalarNe
<
short
>
},
{
cmpScalarEq
<
int
>
,
cmpScalarGt
<
int
>
,
cmpScalarGe
<
int
>
,
cmpScalarLt
<
int
>
,
cmpScalarLe
<
int
>
,
cmpScalarNe
<
int
>
},
{
cmpScalarEq
<
float
>
,
cmpScalarGt
<
float
>
,
cmpScalarGe
<
float
>
,
cmpScalarLt
<
float
>
,
cmpScalarLe
<
float
>
,
cmpScalarNe
<
float
>
},
{
cmpScalarEq
<
double
>
,
cmpScalarGt
<
double
>
,
cmpScalarGe
<
double
>
,
cmpScalarLt
<
double
>
,
cmpScalarLe
<
double
>
,
cmpScalarNe
<
double
>
}
};
typedef
void
(
*
cast_func_t
)(
Scalar
&
sc
);
static
const
cast_func_t
cast_func
[]
=
{
castScalar
<
unsigned
char
>
,
castScalar
<
signed
char
>
,
castScalar
<
unsigned
short
>
,
castScalar
<
short
>
,
castScalar
<
int
>
,
castScalar
<
float
>
,
castScalar
<
double
>
};
if
(
inv
)
{
// src1 is a scalar; swap it with src2
cmpop
=
cmpop
==
CMP_LT
?
CMP_GT
:
cmpop
==
CMP_LE
?
CMP_GE
:
cmpop
==
CMP_GE
?
CMP_LE
:
cmpop
==
CMP_GT
?
CMP_LT
:
cmpop
;
}
const
int
depth
=
src
.
depth
();
const
int
cn
=
src
.
channels
();
cast_func
[
depth
](
val
);
funcs
[
depth
][
cmpop
](
src
,
cn
,
val
.
val
,
dst
,
StreamAccessor
::
getStream
(
stream
));
}
void
cmpScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
inv
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
,
Stream
&
stream
,
int
cmpop
);
void
cv
::
cuda
::
compare
(
InputArray
src1
,
InputArray
src2
,
OutputArray
dst
,
int
cmpop
,
Stream
&
stream
)
{
...
...
modules/cudev/include/opencv2/cudev/util/vec_traits.hpp
View file @
ef9917ec
...
...
@@ -70,7 +70,7 @@ CV_CUDEV_MAKE_VEC_INST(double)
#undef CV_CUDEV_MAKE_VEC_INST
template
<>
struct
MakeVec
<
schar
,
1
>
{
typedef
char
type
;
};
template
<>
struct
MakeVec
<
schar
,
1
>
{
typedef
schar
type
;
};
template
<>
struct
MakeVec
<
schar
,
2
>
{
typedef
char2
type
;
};
template
<>
struct
MakeVec
<
schar
,
3
>
{
typedef
char3
type
;
};
template
<>
struct
MakeVec
<
schar
,
4
>
{
typedef
char4
type
;
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment