Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
156f86ea
Commit
156f86ea
authored
Jul 29, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used new device layer for cv::gpu::subtract
parent
9c5da2ea
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
285 additions
and
430 deletions
+285
-430
sub_mat.cu
modules/cudaarithm/src/cuda/sub_mat.cu
+145
-105
sub_scalar.cu
modules/cudaarithm/src/cuda/sub_scalar.cu
+138
-84
element_operations.cpp
modules/cudaarithm/src/element_operations.cpp
+2
-241
No files found.
modules/cudaarithm/src/cuda/sub_mat.cu
View file @
156f86ea
...
...
@@ -40,146 +40,186 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace arithm
#include "opencv2/cudev.hpp"
using namespace cv::cudev;
void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int);
namespace
{
struct VSub4 : binary_function<uint, uint, uint
>
template <typename T, typename D> struct SubOp1 : binary_function<T, T, D
>
{
__device__ __forceinline__
uint operator ()(uint a, uint
b) const
__device__ __forceinline__
D operator ()(T a, T
b) const
{
return
vsub4(a,
b);
return
saturate_cast<D>(a -
b);
}
__host__ __device__ __forceinline__ VSub4() {}
__host__ __device__ __forceinline__ VSub4(const VSub4&) {}
};
struct VSub2 : binary_function<uint, uint, uint>
template <typename T, typename D>
void subMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
{
if (mask.data)
gridTransformBinary(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), SubOp1<T, D>(), globPtr<uchar>(mask), stream);
else
gridTransformBinary(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), SubOp1<T, D>(), stream);
}
struct SubOp2 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return vsub2(a, b);
}
__host__ __device__ __forceinline__ VSub2() {}
__host__ __device__ __forceinline__ VSub2(const VSub2&) {}
};
template <typename T, typename D> struct SubMat : binary_function<T, T, D>
{
__device__ __forceinline__ D operator ()(T a, T b) const
void subMat_v2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
return saturate_cast<D>(a - b);
const int vcols = src1.cols >> 1;
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
gridTransformBinary(src1_, src2_, dst_, SubOp2(), stream);
}
__host__ __device__ __forceinline__ SubMat() {}
__host__ __device__ __forceinline__ SubMat(const SubMat&) {}
struct SubOp4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
return vsub4(a, b);
}
};
void subMat_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
const int vcols = src1.cols >> 2;
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
gridTransformBinary(src1_, src2_, dst_, SubOp4(), stream);
}
}
namespace cv { namespace cuda { namespace device
void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int)
{
template <> struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream);
static const func_t funcs[7][7] =
{
};
template <> struct TransformFunctorTraits< arithm::VSub2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
template <typename T, typename D> struct TransformFunctorTraits< arithm::SubMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
subMat_v1<uchar, uchar>,
subMat_v1<uchar, schar>,
subMat_v1<uchar, ushort>,
subMat_v1<uchar, short>,
subMat_v1<uchar, int>,
subMat_v1<uchar, float>,
subMat_v1<uchar, double>
},
{
subMat_v1<schar, uchar>,
subMat_v1<schar, schar>,
subMat_v1<schar, ushort>,
subMat_v1<schar, short>,
subMat_v1<schar, int>,
subMat_v1<schar, float>,
subMat_v1<schar, double>
},
{
0 /*subMat_v1<ushort, uchar>*/,
0 /*subMat_v1<ushort, schar>*/,
subMat_v1<ushort, ushort>,
subMat_v1<ushort, short>,
subMat_v1<ushort, int>,
subMat_v1<ushort, float>,
subMat_v1<ushort, double>
},
{
0 /*subMat_v1<short, uchar>*/,
0 /*subMat_v1<short, schar>*/,
subMat_v1<short, ushort>,
subMat_v1<short, short>,
subMat_v1<short, int>,
subMat_v1<short, float>,
subMat_v1<short, double>
},
{
0 /*subMat_v1<int, uchar>*/,
0 /*subMat_v1<int, schar>*/,
0 /*subMat_v1<int, ushort>*/,
0 /*subMat_v1<int, short>*/,
subMat_v1<int, int>,
subMat_v1<int, float>,
subMat_v1<int, double>
},
{
0 /*subMat_v1<float, uchar>*/,
0 /*subMat_v1<float, schar>*/,
0 /*subMat_v1<float, ushort>*/,
0 /*subMat_v1<float, short>*/,
0 /*subMat_v1<float, int>*/,
subMat_v1<float, float>,
subMat_v1<float, double>
},
{
0 /*subMat_v1<double, uchar>*/,
0 /*subMat_v1<double, schar>*/,
0 /*subMat_v1<double, ushort>*/,
0 /*subMat_v1<double, short>*/,
0 /*subMat_v1<double, int>*/,
0 /*subMat_v1<double, float>*/,
subMat_v1<double, double>
}
};
}}}
namespace arithm
{
void subMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
const int sdepth = src1.depth();
const int ddepth = dst.depth();
CV_DbgAssert( sdepth < 7 && ddepth < 7 );
GpuMat src1_ = src1.reshape(1);
GpuMat src2_ = src2.reshape(1);
GpuMat dst_ = dst.reshape(1);
if (mask.empty() && (sdepth == CV_8U || sdepth == CV_16U) && ddepth == sdepth)
{
device::transform(src1, src2, dst, VSub4(), WithOutMask(), stream);
}
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
void subMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream
)
if (isAllAligned
)
{
device::transform(src1, src2, dst, VSub2(), WithOutMask(), stream);
if (sdepth == CV_8U && (src1_.cols & 3) == 0)
{
subMat_v4(src1_, src2_, dst_, stream);
return;
}
template <typename T, typename D>
void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
else if (sdepth == CV_16U && (src1_.cols & 1) == 0)
{
if (mask.data)
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), mask, stream);
else
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), WithOutMask(), stream);
subMat_v2(src1_, src2_, dst_, stream);
return;
}
}
}
const func_t func = funcs[sdepth][ddepth];
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
template void subMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<uchar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<uchar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<uchar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<ushort, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<ushort, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<ushort, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<ushort, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<ushort, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<ushort, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<ushort, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<short, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<short, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<short, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<short, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<short, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<short, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<short, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<int, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<int, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<int, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<int, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<int, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<int, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
func(src1_, src2_, dst_, mask, stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/cuda/sub_scalar.cu
View file @
156f86ea
...
...
@@ -40,110 +40,164 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace arithm
#include "opencv2/cudev.hpp"
using namespace cv::cudev;
void subScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int);
namespace
{
template <typename
T, typename S, typename D> struct SubScalar : unary_function<T, D
>
template <typename
SrcType, typename ScalarType, typename DstType> struct SubScalarOp : unary_function<SrcType, DstType
>
{
S val;
int scale;
ScalarType val;
__host__ SubScalar(S val_, int scale_) : val(val_), scale(scale_) {}
__device__ __forceinline__ DstType operator ()(SrcType a) const
{
return saturate_cast<DstType>(saturate_cast<ScalarType>(a) - val);
}
};
__device__ __forceinline__ D operator ()(T a) const
template <typename SrcType, typename ScalarType, typename DstType> struct SubScalarOpInv : unary_function<SrcType, DstType>
{
return saturate_cast<D>(scale * (a - val));
ScalarType val;
__device__ __forceinline__ DstType operator ()(SrcType a) const
{
return saturate_cast<DstType>(val - saturate_cast<ScalarType>(a));
}
};
}
namespace cv { namespace cuda { namespace device
{
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::SubScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
{
};
}}}
template <> struct TransformPolicy<double> : DefaultTransformPolicy
{
enum {
shift = 1
};
};
namespace arithm
{
template <typename T, typename S, typename D>
void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
template <typename SrcType, typename ScalarDepth, typename DstType>
void subScalarImpl(const GpuMat& src, cv::Scalar value, bool inv, GpuMat& dst, const GpuMat& mask, Stream& stream)
{
typedef typename MakeVec<ScalarDepth, VecTraits<SrcType>::cn>::type ScalarType;
cv::Scalar_<ScalarDepth> value_ = value;
if (inv)
{
SubScalar<T, S, D> op(static_cast<S>(val), inv ? -1 : 1);
SubScalarOpInv<SrcType, ScalarType, DstType> op;
op.val = VecTraits<ScalarType>::make(value_.val);
if (mask.data)
device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask
, stream);
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, globPtr<uchar>(mask)
, stream);
else
device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask()
, stream);
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op
, stream);
}
else
{
SubScalarOp<SrcType, ScalarType, DstType> op;
op.val = VecTraits<ScalarType>::make(value_.val);
if (mask.data)
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, globPtr<uchar>(mask), stream);
else
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, stream);
}
}
}
void subScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int)
{
typedef void (*func_t)(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, Stream& stream);
static const func_t funcs[7][7][4] =
{
{
{subScalarImpl<uchar, float, uchar>, subScalarImpl<uchar2, float, uchar2>, subScalarImpl<uchar3, float, uchar3>, subScalarImpl<uchar4, float, uchar4>},
{subScalarImpl<uchar, float, schar>, subScalarImpl<uchar2, float, char2>, subScalarImpl<uchar3, float, char3>, subScalarImpl<uchar4, float, char4>},
{subScalarImpl<uchar, float, ushort>, subScalarImpl<uchar2, float, ushort2>, subScalarImpl<uchar3, float, ushort3>, subScalarImpl<uchar4, float, ushort4>},
{subScalarImpl<uchar, float, short>, subScalarImpl<uchar2, float, short2>, subScalarImpl<uchar3, float, short3>, subScalarImpl<uchar4, float, short4>},
{subScalarImpl<uchar, float, int>, subScalarImpl<uchar2, float, int2>, subScalarImpl<uchar3, float, int3>, subScalarImpl<uchar4, float, int4>},
{subScalarImpl<uchar, float, float>, subScalarImpl<uchar2, float, float2>, subScalarImpl<uchar3, float, float3>, subScalarImpl<uchar4, float, float4>},
{subScalarImpl<uchar, double, double>, subScalarImpl<uchar2, double, double2>, subScalarImpl<uchar3, double, double3>, subScalarImpl<uchar4, double, double4>}
},
{
{subScalarImpl<schar, float, uchar>, subScalarImpl<char2, float, uchar2>, subScalarImpl<char3, float, uchar3>, subScalarImpl<char4, float, uchar4>},
{subScalarImpl<schar, float, schar>, subScalarImpl<char2, float, char2>, subScalarImpl<char3, float, char3>, subScalarImpl<char4, float, char4>},
{subScalarImpl<schar, float, ushort>, subScalarImpl<char2, float, ushort2>, subScalarImpl<char3, float, ushort3>, subScalarImpl<char4, float, ushort4>},
{subScalarImpl<schar, float, short>, subScalarImpl<char2, float, short2>, subScalarImpl<char3, float, short3>, subScalarImpl<char4, float, short4>},
{subScalarImpl<schar, float, int>, subScalarImpl<char2, float, int2>, subScalarImpl<char3, float, int3>, subScalarImpl<char4, float, int4>},
{subScalarImpl<schar, float, float>, subScalarImpl<char2, float, float2>, subScalarImpl<char3, float, float3>, subScalarImpl<char4, float, float4>},
{subScalarImpl<schar, double, double>, subScalarImpl<char2, double, double2>, subScalarImpl<char3, double, double3>, subScalarImpl<char4, double, double4>}
},
{
{0 /*subScalarImpl<ushort, float, uchar>*/, 0 /*subScalarImpl<ushort2, float, uchar2>*/, 0 /*subScalarImpl<ushort3, float, uchar3>*/, 0 /*subScalarImpl<ushort4, float, uchar4>*/},
{0 /*subScalarImpl<ushort, float, schar>*/, 0 /*subScalarImpl<ushort2, float, char2>*/, 0 /*subScalarImpl<ushort3, float, char3>*/, 0 /*subScalarImpl<ushort4, float, char4>*/},
{subScalarImpl<ushort, float, ushort>, subScalarImpl<ushort2, float, ushort2>, subScalarImpl<ushort3, float, ushort3>, subScalarImpl<ushort4, float, ushort4>},
{subScalarImpl<ushort, float, short>, subScalarImpl<ushort2, float, short2>, subScalarImpl<ushort3, float, short3>, subScalarImpl<ushort4, float, short4>},
{subScalarImpl<ushort, float, int>, subScalarImpl<ushort2, float, int2>, subScalarImpl<ushort3, float, int3>, subScalarImpl<ushort4, float, int4>},
{subScalarImpl<ushort, float, float>, subScalarImpl<ushort2, float, float2>, subScalarImpl<ushort3, float, float3>, subScalarImpl<ushort4, float, float4>},
{subScalarImpl<ushort, double, double>, subScalarImpl<ushort2, double, double2>, subScalarImpl<ushort3, double, double3>, subScalarImpl<ushort4, double, double4>}
},
{
{0 /*subScalarImpl<short, float, uchar>*/, 0 /*subScalarImpl<short2, float, uchar2>*/, 0 /*subScalarImpl<short3, float, uchar3>*/, 0 /*subScalarImpl<short4, float, uchar4>*/},
{0 /*subScalarImpl<short, float, schar>*/, 0 /*subScalarImpl<short2, float, char2>*/, 0 /*subScalarImpl<short3, float, char3>*/, 0 /*subScalarImpl<short4, float, char4>*/},
{subScalarImpl<short, float, ushort>, subScalarImpl<short2, float, ushort2>, subScalarImpl<short3, float, ushort3>, subScalarImpl<short4, float, ushort4>},
{subScalarImpl<short, float, short>, subScalarImpl<short2, float, short2>, subScalarImpl<short3, float, short3>, subScalarImpl<short4, float, short4>},
{subScalarImpl<short, float, int>, subScalarImpl<short2, float, int2>, subScalarImpl<short3, float, int3>, subScalarImpl<short4, float, int4>},
{subScalarImpl<short, float, float>, subScalarImpl<short2, float, float2>, subScalarImpl<short3, float, float3>, subScalarImpl<short4, float, float4>},
{subScalarImpl<short, double, double>, subScalarImpl<short2, double, double2>, subScalarImpl<short3, double, double3>, subScalarImpl<short4, double, double4>}
},
{
{0 /*subScalarImpl<int, float, uchar>*/, 0 /*subScalarImpl<int2, float, uchar2>*/, 0 /*subScalarImpl<int3, float, uchar3>*/, 0 /*subScalarImpl<int4, float, uchar4>*/},
{0 /*subScalarImpl<int, float, schar>*/, 0 /*subScalarImpl<int2, float, char2>*/, 0 /*subScalarImpl<int3, float, char3>*/, 0 /*subScalarImpl<int4, float, char4>*/},
{0 /*subScalarImpl<int, float, ushort>*/, 0 /*subScalarImpl<int2, float, ushort2>*/, 0 /*subScalarImpl<int3, float, ushort3>*/, 0 /*subScalarImpl<int4, float, ushort4>*/},
{0 /*subScalarImpl<int, float, short>*/, 0 /*subScalarImpl<int2, float, short2>*/, 0 /*subScalarImpl<int3, float, short3>*/, 0 /*subScalarImpl<int4, float, short4>*/},
{subScalarImpl<int, float, int>, subScalarImpl<int2, float, int2>, subScalarImpl<int3, float, int3>, subScalarImpl<int4, float, int4>},
{subScalarImpl<int, float, float>, subScalarImpl<int2, float, float2>, subScalarImpl<int3, float, float3>, subScalarImpl<int4, float, float4>},
{subScalarImpl<int, double, double>, subScalarImpl<int2, double, double2>, subScalarImpl<int3, double, double3>, subScalarImpl<int4, double, double4>}
},
{
{0 /*subScalarImpl<float, float, uchar>*/, 0 /*subScalarImpl<float2, float, uchar2>*/, 0 /*subScalarImpl<float3, float, uchar3>*/, 0 /*subScalarImpl<float4, float, uchar4>*/},
{0 /*subScalarImpl<float, float, schar>*/, 0 /*subScalarImpl<float2, float, char2>*/, 0 /*subScalarImpl<float3, float, char3>*/, 0 /*subScalarImpl<float4, float, char4>*/},
{0 /*subScalarImpl<float, float, ushort>*/, 0 /*subScalarImpl<float2, float, ushort2>*/, 0 /*subScalarImpl<float3, float, ushort3>*/, 0 /*subScalarImpl<float4, float, ushort4>*/},
{0 /*subScalarImpl<float, float, short>*/, 0 /*subScalarImpl<float2, float, short2>*/, 0 /*subScalarImpl<float3, float, short3>*/, 0 /*subScalarImpl<float4, float, short4>*/},
{0 /*subScalarImpl<float, float, int>*/, 0 /*subScalarImpl<float2, float, int2>*/, 0 /*subScalarImpl<float3, float, int3>*/, 0 /*subScalarImpl<float4, float, int4>*/},
{subScalarImpl<float, float, float>, subScalarImpl<float2, float, float2>, subScalarImpl<float3, float, float3>, subScalarImpl<float4, float, float4>},
{subScalarImpl<float, double, double>, subScalarImpl<float2, double, double2>, subScalarImpl<float3, double, double3>, subScalarImpl<float4, double, double4>}
},
{
{0 /*subScalarImpl<double, double, uchar>*/, 0 /*subScalarImpl<double2, double, uchar2>*/, 0 /*subScalarImpl<double3, double, uchar3>*/, 0 /*subScalarImpl<double4, double, uchar4>*/},
{0 /*subScalarImpl<double, double, schar>*/, 0 /*subScalarImpl<double2, double, char2>*/, 0 /*subScalarImpl<double3, double, char3>*/, 0 /*subScalarImpl<double4, double, char4>*/},
{0 /*subScalarImpl<double, double, ushort>*/, 0 /*subScalarImpl<double2, double, ushort2>*/, 0 /*subScalarImpl<double3, double, ushort3>*/, 0 /*subScalarImpl<double4, double, ushort4>*/},
{0 /*subScalarImpl<double, double, short>*/, 0 /*subScalarImpl<double2, double, short2>*/, 0 /*subScalarImpl<double3, double, short3>*/, 0 /*subScalarImpl<double4, double, short4>*/},
{0 /*subScalarImpl<double, double, int>*/, 0 /*subScalarImpl<double2, double, int2>*/, 0 /*subScalarImpl<double3, double, int3>*/, 0 /*subScalarImpl<double4, double, int4>*/},
{0 /*subScalarImpl<double, double, float>*/, 0 /*subScalarImpl<double2, double, float2>*/, 0 /*subScalarImpl<double3, double, float3>*/, 0 /*subScalarImpl<double4, double, float4>*/},
{subScalarImpl<double, double, double>, subScalarImpl<double2, double, double2>, subScalarImpl<double3, double, double3>, subScalarImpl<double4, double, double4>}
}
};
const int sdepth = src.depth();
const int ddepth = dst.depth();
const int cn = src.channels();
CV_DbgAssert( sdepth < 7 && ddepth < 7 && cn <= 4 );
const func_t func = funcs[sdepth][ddepth][cn - 1];
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
template void subScalar<uchar, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<uchar, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<uchar, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<uchar, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<uchar, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<uchar, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<uchar, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<ushort, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<ushort, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<ushort, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<ushort, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<ushort, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<ushort, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<ushort, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<short, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<short, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<short, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<short, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<short, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<short, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<short, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<int, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<int, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<int, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<int, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<int, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<int, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<int, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<float, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<float, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<float, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<float, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<float, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<float, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<float, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<double, double, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<double, double, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<double, double, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<double, double, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<double, double, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<double, double, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<double, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
func(src, val, inv, dst, mask, stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/element_operations.cpp
View file @
156f86ea
...
...
@@ -348,248 +348,9 @@ void cv::cuda::add(InputArray src1, InputArray src2, OutputArray dst, InputArray
////////////////////////////////////////////////////////////////////////
// subtract
namespace
arithm
{
void
subMat_v4
(
PtrStepSz
<
unsigned
int
>
src1
,
PtrStepSz
<
unsigned
int
>
src2
,
PtrStepSz
<
unsigned
int
>
dst
,
cudaStream_t
stream
);
void
subMat_v2
(
PtrStepSz
<
unsigned
int
>
src1
,
PtrStepSz
<
unsigned
int
>
src2
,
PtrStepSz
<
unsigned
int
>
dst
,
cudaStream_t
stream
);
template
<
typename
T
,
typename
D
>
void
subMat
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
}
void
subMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
_stream
,
int
);
static
void
subMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
_stream
,
int
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
arithm
::
subMat
<
unsigned
char
,
unsigned
char
>
,
arithm
::
subMat
<
unsigned
char
,
signed
char
>
,
arithm
::
subMat
<
unsigned
char
,
unsigned
short
>
,
arithm
::
subMat
<
unsigned
char
,
short
>
,
arithm
::
subMat
<
unsigned
char
,
int
>
,
arithm
::
subMat
<
unsigned
char
,
float
>
,
arithm
::
subMat
<
unsigned
char
,
double
>
},
{
arithm
::
subMat
<
signed
char
,
unsigned
char
>
,
arithm
::
subMat
<
signed
char
,
signed
char
>
,
arithm
::
subMat
<
signed
char
,
unsigned
short
>
,
arithm
::
subMat
<
signed
char
,
short
>
,
arithm
::
subMat
<
signed
char
,
int
>
,
arithm
::
subMat
<
signed
char
,
float
>
,
arithm
::
subMat
<
signed
char
,
double
>
},
{
0
/*arithm::subMat<unsigned short, unsigned char>*/
,
0
/*arithm::subMat<unsigned short, signed char>*/
,
arithm
::
subMat
<
unsigned
short
,
unsigned
short
>
,
arithm
::
subMat
<
unsigned
short
,
short
>
,
arithm
::
subMat
<
unsigned
short
,
int
>
,
arithm
::
subMat
<
unsigned
short
,
float
>
,
arithm
::
subMat
<
unsigned
short
,
double
>
},
{
0
/*arithm::subMat<short, unsigned char>*/
,
0
/*arithm::subMat<short, signed char>*/
,
arithm
::
subMat
<
short
,
unsigned
short
>
,
arithm
::
subMat
<
short
,
short
>
,
arithm
::
subMat
<
short
,
int
>
,
arithm
::
subMat
<
short
,
float
>
,
arithm
::
subMat
<
short
,
double
>
},
{
0
/*arithm::subMat<int, unsigned char>*/
,
0
/*arithm::subMat<int, signed char>*/
,
0
/*arithm::subMat<int, unsigned short>*/
,
0
/*arithm::subMat<int, short>*/
,
arithm
::
subMat
<
int
,
int
>
,
arithm
::
subMat
<
int
,
float
>
,
arithm
::
subMat
<
int
,
double
>
},
{
0
/*arithm::subMat<float, unsigned char>*/
,
0
/*arithm::subMat<float, signed char>*/
,
0
/*arithm::subMat<float, unsigned short>*/
,
0
/*arithm::subMat<float, short>*/
,
0
/*arithm::subMat<float, int>*/
,
arithm
::
subMat
<
float
,
float
>
,
arithm
::
subMat
<
float
,
double
>
},
{
0
/*arithm::subMat<double, unsigned char>*/
,
0
/*arithm::subMat<double, signed char>*/
,
0
/*arithm::subMat<double, unsigned short>*/
,
0
/*arithm::subMat<double, short>*/
,
0
/*arithm::subMat<double, int>*/
,
0
/*arithm::subMat<double, float>*/
,
arithm
::
subMat
<
double
,
double
>
}
};
const
int
sdepth
=
src1
.
depth
();
const
int
ddepth
=
dst
.
depth
();
const
int
cn
=
src1
.
channels
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
PtrStepSzb
src1_
(
src1
.
rows
,
src1
.
cols
*
cn
,
src1
.
data
,
src1
.
step
);
PtrStepSzb
src2_
(
src1
.
rows
,
src1
.
cols
*
cn
,
src2
.
data
,
src2
.
step
);
PtrStepSzb
dst_
(
src1
.
rows
,
src1
.
cols
*
cn
,
dst
.
data
,
dst
.
step
);
if
(
mask
.
empty
()
&&
(
sdepth
==
CV_8U
||
sdepth
==
CV_16U
)
&&
ddepth
==
sdepth
)
{
const
intptr_t
src1ptr
=
reinterpret_cast
<
intptr_t
>
(
src1_
.
data
);
const
intptr_t
src2ptr
=
reinterpret_cast
<
intptr_t
>
(
src2_
.
data
);
const
intptr_t
dstptr
=
reinterpret_cast
<
intptr_t
>
(
dst_
.
data
);
const
bool
isAllAligned
=
(
src1ptr
&
31
)
==
0
&&
(
src2ptr
&
31
)
==
0
&&
(
dstptr
&
31
)
==
0
;
if
(
isAllAligned
)
{
if
(
sdepth
==
CV_8U
&&
(
src1_
.
cols
&
3
)
==
0
)
{
const
int
vcols
=
src1_
.
cols
>>
2
;
arithm
::
subMat_v4
(
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src1_
.
data
,
src1_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src2_
.
data
,
src2_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
dst_
.
data
,
dst_
.
step
),
stream
);
return
;
}
else
if
(
sdepth
==
CV_16U
&&
(
src1_
.
cols
&
1
)
==
0
)
{
const
int
vcols
=
src1_
.
cols
>>
1
;
arithm
::
subMat_v2
(
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src1_
.
data
,
src1_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src2_
.
data
,
src2_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
dst_
.
data
,
dst_
.
step
),
stream
);
return
;
}
}
}
const
func_t
func
=
funcs
[
sdepth
][
ddepth
];
if
(
!
func
)
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"Unsupported combination of source and destination types"
);
func
(
src1_
,
src2_
,
dst_
,
mask
,
stream
);
}
namespace
arithm
{
template
<
typename
T
,
typename
S
,
typename
D
>
void
subScalar
(
PtrStepSzb
src1
,
double
val
,
bool
inv
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
}
static
void
subScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
inv
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
_stream
,
int
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
double
val
,
bool
inv
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
arithm
::
subScalar
<
unsigned
char
,
float
,
unsigned
char
>
,
arithm
::
subScalar
<
unsigned
char
,
float
,
signed
char
>
,
arithm
::
subScalar
<
unsigned
char
,
float
,
unsigned
short
>
,
arithm
::
subScalar
<
unsigned
char
,
float
,
short
>
,
arithm
::
subScalar
<
unsigned
char
,
float
,
int
>
,
arithm
::
subScalar
<
unsigned
char
,
float
,
float
>
,
arithm
::
subScalar
<
unsigned
char
,
double
,
double
>
},
{
arithm
::
subScalar
<
signed
char
,
float
,
unsigned
char
>
,
arithm
::
subScalar
<
signed
char
,
float
,
signed
char
>
,
arithm
::
subScalar
<
signed
char
,
float
,
unsigned
short
>
,
arithm
::
subScalar
<
signed
char
,
float
,
short
>
,
arithm
::
subScalar
<
signed
char
,
float
,
int
>
,
arithm
::
subScalar
<
signed
char
,
float
,
float
>
,
arithm
::
subScalar
<
signed
char
,
double
,
double
>
},
{
0
/*arithm::subScalar<unsigned short, float, unsigned char>*/
,
0
/*arithm::subScalar<unsigned short, float, signed char>*/
,
arithm
::
subScalar
<
unsigned
short
,
float
,
unsigned
short
>
,
arithm
::
subScalar
<
unsigned
short
,
float
,
short
>
,
arithm
::
subScalar
<
unsigned
short
,
float
,
int
>
,
arithm
::
subScalar
<
unsigned
short
,
float
,
float
>
,
arithm
::
subScalar
<
unsigned
short
,
double
,
double
>
},
{
0
/*arithm::subScalar<short, float, unsigned char>*/
,
0
/*arithm::subScalar<short, float, signed char>*/
,
arithm
::
subScalar
<
short
,
float
,
unsigned
short
>
,
arithm
::
subScalar
<
short
,
float
,
short
>
,
arithm
::
subScalar
<
short
,
float
,
int
>
,
arithm
::
subScalar
<
short
,
float
,
float
>
,
arithm
::
subScalar
<
short
,
double
,
double
>
},
{
0
/*arithm::subScalar<int, float, unsigned char>*/
,
0
/*arithm::subScalar<int, float, signed char>*/
,
0
/*arithm::subScalar<int, float, unsigned short>*/
,
0
/*arithm::subScalar<int, float, short>*/
,
arithm
::
subScalar
<
int
,
float
,
int
>
,
arithm
::
subScalar
<
int
,
float
,
float
>
,
arithm
::
subScalar
<
int
,
double
,
double
>
},
{
0
/*arithm::subScalar<float, float, unsigned char>*/
,
0
/*arithm::subScalar<float, float, signed char>*/
,
0
/*arithm::subScalar<float, float, unsigned short>*/
,
0
/*arithm::subScalar<float, float, short>*/
,
0
/*arithm::subScalar<float, float, int>*/
,
arithm
::
subScalar
<
float
,
float
,
float
>
,
arithm
::
subScalar
<
float
,
double
,
double
>
},
{
0
/*arithm::subScalar<double, double, unsigned char>*/
,
0
/*arithm::subScalar<double, double, signed char>*/
,
0
/*arithm::subScalar<double, double, unsigned short>*/
,
0
/*arithm::subScalar<double, double, short>*/
,
0
/*arithm::subScalar<double, double, int>*/
,
0
/*arithm::subScalar<double, double, float>*/
,
arithm
::
subScalar
<
double
,
double
,
double
>
}
};
typedef
void
(
*
npp_func_t
)(
const
PtrStepSzb
src
,
Scalar
sc
,
PtrStepb
dst
,
cudaStream_t
stream
);
static
const
npp_func_t
npp_funcs
[
7
][
4
]
=
{
{
NppArithmScalar
<
CV_8U
,
1
,
nppiSubC_8u_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_8U
,
3
,
nppiSubC_8u_C3RSfs
>::
call
,
NppArithmScalar
<
CV_8U
,
4
,
nppiSubC_8u_C4RSfs
>::
call
},
{
0
,
0
,
0
,
0
},
{
NppArithmScalar
<
CV_16U
,
1
,
nppiSubC_16u_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_16U
,
3
,
nppiSubC_16u_C3RSfs
>::
call
,
NppArithmScalar
<
CV_16U
,
4
,
nppiSubC_16u_C4RSfs
>::
call
},
{
NppArithmScalar
<
CV_16S
,
1
,
nppiSubC_16s_C1RSfs
>::
call
,
NppArithmScalar
<
CV_16S
,
2
,
nppiSubC_16sc_C1RSfs
>::
call
,
NppArithmScalar
<
CV_16S
,
3
,
nppiSubC_16s_C3RSfs
>::
call
,
NppArithmScalar
<
CV_16S
,
4
,
nppiSubC_16s_C4RSfs
>::
call
},
{
NppArithmScalar
<
CV_32S
,
1
,
nppiSubC_32s_C1RSfs
>::
call
,
NppArithmScalar
<
CV_32S
,
2
,
nppiSubC_32sc_C1RSfs
>::
call
,
NppArithmScalar
<
CV_32S
,
3
,
nppiSubC_32s_C3RSfs
>::
call
,
0
},
{
NppArithmScalar
<
CV_32F
,
1
,
nppiSubC_32f_C1R
>::
call
,
NppArithmScalar
<
CV_32F
,
2
,
nppiSubC_32fc_C1R
>::
call
,
NppArithmScalar
<
CV_32F
,
3
,
nppiSubC_32f_C3R
>::
call
,
NppArithmScalar
<
CV_32F
,
4
,
nppiSubC_32f_C4R
>::
call
},
{
0
,
0
,
0
,
0
}
};
const
int
sdepth
=
src
.
depth
();
const
int
ddepth
=
dst
.
depth
();
const
int
cn
=
src
.
channels
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
const
npp_func_t
npp_func
=
npp_funcs
[
sdepth
][
cn
-
1
];
if
(
ddepth
==
sdepth
&&
cn
>
1
&&
npp_func
!=
0
&&
!
inv
)
{
npp_func
(
src
,
val
,
dst
,
stream
);
return
;
}
CV_Assert
(
cn
==
1
);
const
func_t
func
=
funcs
[
sdepth
][
ddepth
];
if
(
!
func
)
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"Unsupported combination of source and destination types"
);
func
(
src
,
val
[
0
],
inv
,
dst
,
mask
,
stream
);
}
void
subScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
inv
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
stream
,
int
);
void
cv
::
cuda
::
subtract
(
InputArray
src1
,
InputArray
src2
,
OutputArray
dst
,
InputArray
mask
,
int
dtype
,
Stream
&
stream
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment