Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
156f86ea
Commit
156f86ea
authored
Jul 29, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used new device layer for cv::gpu::subtract
parent
9c5da2ea
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
288 additions
and
433 deletions
+288
-433
sub_mat.cu
modules/cudaarithm/src/cuda/sub_mat.cu
+147
-107
sub_scalar.cu
modules/cudaarithm/src/cuda/sub_scalar.cu
+139
-85
element_operations.cpp
modules/cudaarithm/src/element_operations.cpp
+2
-241
No files found.
modules/cudaarithm/src/cuda/sub_mat.cu
View file @
156f86ea
...
@@ -40,146 +40,186 @@
...
@@ -40,146 +40,186 @@
//
//
//M*/
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#ifndef HAVE_OPENCV_CUDEV
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
#else
using namespace cv::cuda::device;
namespace arithm
#include "opencv2/cudev.hpp"
using namespace cv::cudev;
void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& _stream, int);
namespace
{
{
struct VSub4 : binary_function<uint, uint, uint
>
template <typename T, typename D> struct SubOp1 : binary_function<T, T, D
>
{
{
__device__ __forceinline__
uint operator ()(uint a, uint
b) const
__device__ __forceinline__
D operator ()(T a, T
b) const
{
{
return
vsub4(a,
b);
return
saturate_cast<D>(a -
b);
}
}
__host__ __device__ __forceinline__ VSub4() {}
__host__ __device__ __forceinline__ VSub4(const VSub4&) {}
};
};
struct VSub2 : binary_function<uint, uint, uint>
template <typename T, typename D>
void subMat_v1(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream)
{
if (mask.data)
gridTransformBinary(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), SubOp1<T, D>(), globPtr<uchar>(mask), stream);
else
gridTransformBinary(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), SubOp1<T, D>(), stream);
}
struct SubOp2 : binary_function<uint, uint, uint>
{
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
{
return vsub2(a, b);
return vsub2(a, b);
}
}
__host__ __device__ __forceinline__ VSub2() {}
__host__ __device__ __forceinline__ VSub2(const VSub2&) {}
};
};
template <typename T, typename D> struct SubMat : binary_function<T, T, D>
void subMat_v2(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
{
__device__ __forceinline__ D operator ()(T a, T b) const
const int vcols = src1.cols >> 1;
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
gridTransformBinary(src1_, src2_, dst_, SubOp2(), stream);
}
struct SubOp4 : binary_function<uint, uint, uint>
{
__device__ __forceinline__ uint operator ()(uint a, uint b) const
{
{
return
saturate_cast<D>(a -
b);
return
vsub4(a,
b);
}
}
__host__ __device__ __forceinline__ SubMat() {}
__host__ __device__ __forceinline__ SubMat(const SubMat&) {}
};
};
void subMat_v4(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
const int vcols = src1.cols >> 2;
GlobPtrSz<uint> src1_ = globPtr((uint*) src1.data, src1.step, src1.rows, vcols);
GlobPtrSz<uint> src2_ = globPtr((uint*) src2.data, src2.step, src1.rows, vcols);
GlobPtrSz<uint> dst_ = globPtr((uint*) dst.data, dst.step, src1.rows, vcols);
gridTransformBinary(src1_, src2_, dst_, SubOp4(), stream);
}
}
}
namespace cv { namespace cuda { namespace device
void subMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int)
{
{
template <> struct TransformFunctorTraits< arithm::VSub4 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, Stream& stream);
static const func_t funcs[7][7] =
{
{
{
subMat_v1<uchar, uchar>,
subMat_v1<uchar, schar>,
subMat_v1<uchar, ushort>,
subMat_v1<uchar, short>,
subMat_v1<uchar, int>,
subMat_v1<uchar, float>,
subMat_v1<uchar, double>
},
{
subMat_v1<schar, uchar>,
subMat_v1<schar, schar>,
subMat_v1<schar, ushort>,
subMat_v1<schar, short>,
subMat_v1<schar, int>,
subMat_v1<schar, float>,
subMat_v1<schar, double>
},
{
0 /*subMat_v1<ushort, uchar>*/,
0 /*subMat_v1<ushort, schar>*/,
subMat_v1<ushort, ushort>,
subMat_v1<ushort, short>,
subMat_v1<ushort, int>,
subMat_v1<ushort, float>,
subMat_v1<ushort, double>
},
{
0 /*subMat_v1<short, uchar>*/,
0 /*subMat_v1<short, schar>*/,
subMat_v1<short, ushort>,
subMat_v1<short, short>,
subMat_v1<short, int>,
subMat_v1<short, float>,
subMat_v1<short, double>
},
{
0 /*subMat_v1<int, uchar>*/,
0 /*subMat_v1<int, schar>*/,
0 /*subMat_v1<int, ushort>*/,
0 /*subMat_v1<int, short>*/,
subMat_v1<int, int>,
subMat_v1<int, float>,
subMat_v1<int, double>
},
{
0 /*subMat_v1<float, uchar>*/,
0 /*subMat_v1<float, schar>*/,
0 /*subMat_v1<float, ushort>*/,
0 /*subMat_v1<float, short>*/,
0 /*subMat_v1<float, int>*/,
subMat_v1<float, float>,
subMat_v1<float, double>
},
{
0 /*subMat_v1<double, uchar>*/,
0 /*subMat_v1<double, schar>*/,
0 /*subMat_v1<double, ushort>*/,
0 /*subMat_v1<double, short>*/,
0 /*subMat_v1<double, int>*/,
0 /*subMat_v1<double, float>*/,
subMat_v1<double, double>
}
};
};
template <> struct TransformFunctorTraits< arithm::VSub2 > : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
const int sdepth = src1.depth();
{
const int ddepth = dst.depth();
};
template <typename T, typename D> struct TransformFunctorTraits< arithm::SubMat<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
CV_DbgAssert( sdepth < 7 && ddepth < 7 );
{
};
}}}
namespace arithm
GpuMat src1_ = src1.reshape(1);
{
GpuMat src2_ = src2.reshape(1);
void subMat_v4(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream)
GpuMat dst_ = dst.reshape(1);
{
device::transform(src1, src2, dst, VSub4(), WithOutMask(), stream);
}
void subMat_v2(PtrStepSz<uint> src1, PtrStepSz<uint> src2, PtrStepSz<uint> dst, cudaStream_t stream
)
if (mask.empty() && (sdepth == CV_8U || sdepth == CV_16U) && ddepth == sdepth
)
{
{
device::transform(src1, src2, dst, VSub2(), WithOutMask(), stream);
const intptr_t src1ptr = reinterpret_cast<intptr_t>(src1_.data);
}
const intptr_t src2ptr = reinterpret_cast<intptr_t>(src2_.data);
const intptr_t dstptr = reinterpret_cast<intptr_t>(dst_.data);
template <typename T, typename D>
const bool isAllAligned = (src1ptr & 31) == 0 && (src2ptr & 31) == 0 && (dstptr & 31) == 0;
void subMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
{
if (isAllAligned)
if (mask.data)
{
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), mask, stream);
if (sdepth == CV_8U && (src1_.cols & 3) == 0)
else
{
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, SubMat<T, D>(), WithOutMask(), stream);
subMat_v4(src1_, src2_, dst_, stream);
return;
}
else if (sdepth == CV_16U && (src1_.cols & 1) == 0)
{
subMat_v2(src1_, src2_, dst_, stream);
return;
}
}
}
}
template void subMat<uchar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
const func_t func = funcs[sdepth][ddepth];
template void subMat<uchar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<uchar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
if (!func)
template void subMat<uchar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
template void subMat<uchar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<uchar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
func(src1_, src2_, dst_, mask, stream);
template void subMat<uchar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<schar, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<ushort, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<ushort, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<ushort, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<ushort, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<ushort, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<ushort, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<ushort, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<short, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<short, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<short, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<short, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<short, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<short, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<short, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<int, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<int, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<int, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<int, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<int, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<int, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<int, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<float, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subMat<double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subMat<double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
}
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/cuda/sub_scalar.cu
View file @
156f86ea
...
@@ -40,110 +40,164 @@
...
@@ -40,110 +40,164 @@
//
//
//M*/
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#ifndef HAVE_OPENCV_CUDEV
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
#else
using namespace cv::cuda::device;
namespace arithm
#include "opencv2/cudev.hpp"
using namespace cv::cudev;
void subScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int);
namespace
{
{
template <typename
T, typename S, typename D> struct SubScalar : unary_function<T, D
>
template <typename
SrcType, typename ScalarType, typename DstType> struct SubScalarOp : unary_function<SrcType, DstType
>
{
{
S val;
ScalarType val;
int scale;
__device__ __forceinline__ DstType operator ()(SrcType a) const
{
return saturate_cast<DstType>(saturate_cast<ScalarType>(a) - val);
}
};
__host__ SubScalar(S val_, int scale_) : val(val_), scale(scale_) {}
template <typename SrcType, typename ScalarType, typename DstType> struct SubScalarOpInv : unary_function<SrcType, DstType>
{
ScalarType val;
__device__ __forceinline__ D
operator ()(T
a) const
__device__ __forceinline__ D
stType operator ()(SrcType
a) const
{
{
return saturate_cast<D
>(scale * (a - val
));
return saturate_cast<D
stType>(val - saturate_cast<ScalarType>(a
));
}
}
};
};
}
namespace cv { namespace cuda { namespace device
template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
{
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::SubScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
{
};
};
}}}
template <> struct TransformPolicy<double> : DefaultTransformPolicy
{
enum {
shift = 1
};
};
namespace arithm
template <typename SrcType, typename ScalarDepth, typename DstType>
{
void subScalarImpl(const GpuMat& src, cv::Scalar value, bool inv, GpuMat& dst, const GpuMat& mask, Stream& stream)
template <typename T, typename S, typename D>
void subScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream)
{
{
SubScalar<T, S, D> op(static_cast<S>(val), inv ? -1 : 1)
;
typedef typename MakeVec<ScalarDepth, VecTraits<SrcType>::cn>::type ScalarType
;
if (mask.data)
cv::Scalar_<ScalarDepth> value_ = value;
device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, mask, stream);
if (inv)
{
SubScalarOpInv<SrcType, ScalarType, DstType> op;
op.val = VecTraits<ScalarType>::make(value_.val);
if (mask.data)
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, globPtr<uchar>(mask), stream);
else
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, stream);
}
else
else
device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
{
SubScalarOp<SrcType, ScalarType, DstType> op;
op.val = VecTraits<ScalarType>::make(value_.val);
if (mask.data)
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, globPtr<uchar>(mask), stream);
else
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, stream);
}
}
}
}
void subScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int)
{
typedef void (*func_t)(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, Stream& stream);
static const func_t funcs[7][7][4] =
{
{
{subScalarImpl<uchar, float, uchar>, subScalarImpl<uchar2, float, uchar2>, subScalarImpl<uchar3, float, uchar3>, subScalarImpl<uchar4, float, uchar4>},
{subScalarImpl<uchar, float, schar>, subScalarImpl<uchar2, float, char2>, subScalarImpl<uchar3, float, char3>, subScalarImpl<uchar4, float, char4>},
{subScalarImpl<uchar, float, ushort>, subScalarImpl<uchar2, float, ushort2>, subScalarImpl<uchar3, float, ushort3>, subScalarImpl<uchar4, float, ushort4>},
{subScalarImpl<uchar, float, short>, subScalarImpl<uchar2, float, short2>, subScalarImpl<uchar3, float, short3>, subScalarImpl<uchar4, float, short4>},
{subScalarImpl<uchar, float, int>, subScalarImpl<uchar2, float, int2>, subScalarImpl<uchar3, float, int3>, subScalarImpl<uchar4, float, int4>},
{subScalarImpl<uchar, float, float>, subScalarImpl<uchar2, float, float2>, subScalarImpl<uchar3, float, float3>, subScalarImpl<uchar4, float, float4>},
{subScalarImpl<uchar, double, double>, subScalarImpl<uchar2, double, double2>, subScalarImpl<uchar3, double, double3>, subScalarImpl<uchar4, double, double4>}
},
{
{subScalarImpl<schar, float, uchar>, subScalarImpl<char2, float, uchar2>, subScalarImpl<char3, float, uchar3>, subScalarImpl<char4, float, uchar4>},
{subScalarImpl<schar, float, schar>, subScalarImpl<char2, float, char2>, subScalarImpl<char3, float, char3>, subScalarImpl<char4, float, char4>},
{subScalarImpl<schar, float, ushort>, subScalarImpl<char2, float, ushort2>, subScalarImpl<char3, float, ushort3>, subScalarImpl<char4, float, ushort4>},
{subScalarImpl<schar, float, short>, subScalarImpl<char2, float, short2>, subScalarImpl<char3, float, short3>, subScalarImpl<char4, float, short4>},
{subScalarImpl<schar, float, int>, subScalarImpl<char2, float, int2>, subScalarImpl<char3, float, int3>, subScalarImpl<char4, float, int4>},
{subScalarImpl<schar, float, float>, subScalarImpl<char2, float, float2>, subScalarImpl<char3, float, float3>, subScalarImpl<char4, float, float4>},
{subScalarImpl<schar, double, double>, subScalarImpl<char2, double, double2>, subScalarImpl<char3, double, double3>, subScalarImpl<char4, double, double4>}
},
{
{0 /*subScalarImpl<ushort, float, uchar>*/, 0 /*subScalarImpl<ushort2, float, uchar2>*/, 0 /*subScalarImpl<ushort3, float, uchar3>*/, 0 /*subScalarImpl<ushort4, float, uchar4>*/},
{0 /*subScalarImpl<ushort, float, schar>*/, 0 /*subScalarImpl<ushort2, float, char2>*/, 0 /*subScalarImpl<ushort3, float, char3>*/, 0 /*subScalarImpl<ushort4, float, char4>*/},
{subScalarImpl<ushort, float, ushort>, subScalarImpl<ushort2, float, ushort2>, subScalarImpl<ushort3, float, ushort3>, subScalarImpl<ushort4, float, ushort4>},
{subScalarImpl<ushort, float, short>, subScalarImpl<ushort2, float, short2>, subScalarImpl<ushort3, float, short3>, subScalarImpl<ushort4, float, short4>},
{subScalarImpl<ushort, float, int>, subScalarImpl<ushort2, float, int2>, subScalarImpl<ushort3, float, int3>, subScalarImpl<ushort4, float, int4>},
{subScalarImpl<ushort, float, float>, subScalarImpl<ushort2, float, float2>, subScalarImpl<ushort3, float, float3>, subScalarImpl<ushort4, float, float4>},
{subScalarImpl<ushort, double, double>, subScalarImpl<ushort2, double, double2>, subScalarImpl<ushort3, double, double3>, subScalarImpl<ushort4, double, double4>}
},
{
{0 /*subScalarImpl<short, float, uchar>*/, 0 /*subScalarImpl<short2, float, uchar2>*/, 0 /*subScalarImpl<short3, float, uchar3>*/, 0 /*subScalarImpl<short4, float, uchar4>*/},
{0 /*subScalarImpl<short, float, schar>*/, 0 /*subScalarImpl<short2, float, char2>*/, 0 /*subScalarImpl<short3, float, char3>*/, 0 /*subScalarImpl<short4, float, char4>*/},
{subScalarImpl<short, float, ushort>, subScalarImpl<short2, float, ushort2>, subScalarImpl<short3, float, ushort3>, subScalarImpl<short4, float, ushort4>},
{subScalarImpl<short, float, short>, subScalarImpl<short2, float, short2>, subScalarImpl<short3, float, short3>, subScalarImpl<short4, float, short4>},
{subScalarImpl<short, float, int>, subScalarImpl<short2, float, int2>, subScalarImpl<short3, float, int3>, subScalarImpl<short4, float, int4>},
{subScalarImpl<short, float, float>, subScalarImpl<short2, float, float2>, subScalarImpl<short3, float, float3>, subScalarImpl<short4, float, float4>},
{subScalarImpl<short, double, double>, subScalarImpl<short2, double, double2>, subScalarImpl<short3, double, double3>, subScalarImpl<short4, double, double4>}
},
{
{0 /*subScalarImpl<int, float, uchar>*/, 0 /*subScalarImpl<int2, float, uchar2>*/, 0 /*subScalarImpl<int3, float, uchar3>*/, 0 /*subScalarImpl<int4, float, uchar4>*/},
{0 /*subScalarImpl<int, float, schar>*/, 0 /*subScalarImpl<int2, float, char2>*/, 0 /*subScalarImpl<int3, float, char3>*/, 0 /*subScalarImpl<int4, float, char4>*/},
{0 /*subScalarImpl<int, float, ushort>*/, 0 /*subScalarImpl<int2, float, ushort2>*/, 0 /*subScalarImpl<int3, float, ushort3>*/, 0 /*subScalarImpl<int4, float, ushort4>*/},
{0 /*subScalarImpl<int, float, short>*/, 0 /*subScalarImpl<int2, float, short2>*/, 0 /*subScalarImpl<int3, float, short3>*/, 0 /*subScalarImpl<int4, float, short4>*/},
{subScalarImpl<int, float, int>, subScalarImpl<int2, float, int2>, subScalarImpl<int3, float, int3>, subScalarImpl<int4, float, int4>},
{subScalarImpl<int, float, float>, subScalarImpl<int2, float, float2>, subScalarImpl<int3, float, float3>, subScalarImpl<int4, float, float4>},
{subScalarImpl<int, double, double>, subScalarImpl<int2, double, double2>, subScalarImpl<int3, double, double3>, subScalarImpl<int4, double, double4>}
},
{
{0 /*subScalarImpl<float, float, uchar>*/, 0 /*subScalarImpl<float2, float, uchar2>*/, 0 /*subScalarImpl<float3, float, uchar3>*/, 0 /*subScalarImpl<float4, float, uchar4>*/},
{0 /*subScalarImpl<float, float, schar>*/, 0 /*subScalarImpl<float2, float, char2>*/, 0 /*subScalarImpl<float3, float, char3>*/, 0 /*subScalarImpl<float4, float, char4>*/},
{0 /*subScalarImpl<float, float, ushort>*/, 0 /*subScalarImpl<float2, float, ushort2>*/, 0 /*subScalarImpl<float3, float, ushort3>*/, 0 /*subScalarImpl<float4, float, ushort4>*/},
{0 /*subScalarImpl<float, float, short>*/, 0 /*subScalarImpl<float2, float, short2>*/, 0 /*subScalarImpl<float3, float, short3>*/, 0 /*subScalarImpl<float4, float, short4>*/},
{0 /*subScalarImpl<float, float, int>*/, 0 /*subScalarImpl<float2, float, int2>*/, 0 /*subScalarImpl<float3, float, int3>*/, 0 /*subScalarImpl<float4, float, int4>*/},
{subScalarImpl<float, float, float>, subScalarImpl<float2, float, float2>, subScalarImpl<float3, float, float3>, subScalarImpl<float4, float, float4>},
{subScalarImpl<float, double, double>, subScalarImpl<float2, double, double2>, subScalarImpl<float3, double, double3>, subScalarImpl<float4, double, double4>}
},
{
{0 /*subScalarImpl<double, double, uchar>*/, 0 /*subScalarImpl<double2, double, uchar2>*/, 0 /*subScalarImpl<double3, double, uchar3>*/, 0 /*subScalarImpl<double4, double, uchar4>*/},
{0 /*subScalarImpl<double, double, schar>*/, 0 /*subScalarImpl<double2, double, char2>*/, 0 /*subScalarImpl<double3, double, char3>*/, 0 /*subScalarImpl<double4, double, char4>*/},
{0 /*subScalarImpl<double, double, ushort>*/, 0 /*subScalarImpl<double2, double, ushort2>*/, 0 /*subScalarImpl<double3, double, ushort3>*/, 0 /*subScalarImpl<double4, double, ushort4>*/},
{0 /*subScalarImpl<double, double, short>*/, 0 /*subScalarImpl<double2, double, short2>*/, 0 /*subScalarImpl<double3, double, short3>*/, 0 /*subScalarImpl<double4, double, short4>*/},
{0 /*subScalarImpl<double, double, int>*/, 0 /*subScalarImpl<double2, double, int2>*/, 0 /*subScalarImpl<double3, double, int3>*/, 0 /*subScalarImpl<double4, double, int4>*/},
{0 /*subScalarImpl<double, double, float>*/, 0 /*subScalarImpl<double2, double, float2>*/, 0 /*subScalarImpl<double3, double, float3>*/, 0 /*subScalarImpl<double4, double, float4>*/},
{subScalarImpl<double, double, double>, subScalarImpl<double2, double, double2>, subScalarImpl<double3, double, double3>, subScalarImpl<double4, double, double4>}
}
};
const int sdepth = src.depth();
const int ddepth = dst.depth();
const int cn = src.channels();
CV_DbgAssert( sdepth < 7 && ddepth < 7 && cn <= 4 );
const func_t func = funcs[sdepth][ddepth][cn - 1];
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
template void subScalar<uchar, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
func(src, val, inv, dst, mask, stream);
template void subScalar<uchar, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<uchar, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<uchar, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<uchar, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<uchar, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<uchar, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<schar, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<ushort, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<ushort, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<ushort, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<ushort, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<ushort, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<ushort, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<ushort, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<short, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<short, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<short, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<short, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<short, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<short, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<short, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<int, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<int, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<int, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<int, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<int, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<int, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<int, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<float, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<float, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<float, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<float, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<float, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<float, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<float, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<double, double, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<double, double, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<double, double, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<double, double, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<double, double, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
//template void subScalar<double, double, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
template void subScalar<double, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, PtrStepb mask, cudaStream_t stream);
}
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/element_operations.cpp
View file @
156f86ea
...
@@ -348,248 +348,9 @@ void cv::cuda::add(InputArray src1, InputArray src2, OutputArray dst, InputArray
...
@@ -348,248 +348,9 @@ void cv::cuda::add(InputArray src1, InputArray src2, OutputArray dst, InputArray
////////////////////////////////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////
// subtract
// subtract
namespace
arithm
void
subMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
_stream
,
int
);
{
void
subMat_v4
(
PtrStepSz
<
unsigned
int
>
src1
,
PtrStepSz
<
unsigned
int
>
src2
,
PtrStepSz
<
unsigned
int
>
dst
,
cudaStream_t
stream
);
void
subMat_v2
(
PtrStepSz
<
unsigned
int
>
src1
,
PtrStepSz
<
unsigned
int
>
src2
,
PtrStepSz
<
unsigned
int
>
dst
,
cudaStream_t
stream
);
template
<
typename
T
,
typename
D
>
void
subMat
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
}
static
void
subMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
_stream
,
int
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
arithm
::
subMat
<
unsigned
char
,
unsigned
char
>
,
arithm
::
subMat
<
unsigned
char
,
signed
char
>
,
arithm
::
subMat
<
unsigned
char
,
unsigned
short
>
,
arithm
::
subMat
<
unsigned
char
,
short
>
,
arithm
::
subMat
<
unsigned
char
,
int
>
,
arithm
::
subMat
<
unsigned
char
,
float
>
,
arithm
::
subMat
<
unsigned
char
,
double
>
},
{
arithm
::
subMat
<
signed
char
,
unsigned
char
>
,
arithm
::
subMat
<
signed
char
,
signed
char
>
,
arithm
::
subMat
<
signed
char
,
unsigned
short
>
,
arithm
::
subMat
<
signed
char
,
short
>
,
arithm
::
subMat
<
signed
char
,
int
>
,
arithm
::
subMat
<
signed
char
,
float
>
,
arithm
::
subMat
<
signed
char
,
double
>
},
{
0
/*arithm::subMat<unsigned short, unsigned char>*/
,
0
/*arithm::subMat<unsigned short, signed char>*/
,
arithm
::
subMat
<
unsigned
short
,
unsigned
short
>
,
arithm
::
subMat
<
unsigned
short
,
short
>
,
arithm
::
subMat
<
unsigned
short
,
int
>
,
arithm
::
subMat
<
unsigned
short
,
float
>
,
arithm
::
subMat
<
unsigned
short
,
double
>
},
{
0
/*arithm::subMat<short, unsigned char>*/
,
0
/*arithm::subMat<short, signed char>*/
,
arithm
::
subMat
<
short
,
unsigned
short
>
,
arithm
::
subMat
<
short
,
short
>
,
arithm
::
subMat
<
short
,
int
>
,
arithm
::
subMat
<
short
,
float
>
,
arithm
::
subMat
<
short
,
double
>
},
{
0
/*arithm::subMat<int, unsigned char>*/
,
0
/*arithm::subMat<int, signed char>*/
,
0
/*arithm::subMat<int, unsigned short>*/
,
0
/*arithm::subMat<int, short>*/
,
arithm
::
subMat
<
int
,
int
>
,
arithm
::
subMat
<
int
,
float
>
,
arithm
::
subMat
<
int
,
double
>
},
{
0
/*arithm::subMat<float, unsigned char>*/
,
0
/*arithm::subMat<float, signed char>*/
,
0
/*arithm::subMat<float, unsigned short>*/
,
0
/*arithm::subMat<float, short>*/
,
0
/*arithm::subMat<float, int>*/
,
arithm
::
subMat
<
float
,
float
>
,
arithm
::
subMat
<
float
,
double
>
},
{
0
/*arithm::subMat<double, unsigned char>*/
,
0
/*arithm::subMat<double, signed char>*/
,
0
/*arithm::subMat<double, unsigned short>*/
,
0
/*arithm::subMat<double, short>*/
,
0
/*arithm::subMat<double, int>*/
,
0
/*arithm::subMat<double, float>*/
,
arithm
::
subMat
<
double
,
double
>
}
};
const
int
sdepth
=
src1
.
depth
();
const
int
ddepth
=
dst
.
depth
();
const
int
cn
=
src1
.
channels
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
PtrStepSzb
src1_
(
src1
.
rows
,
src1
.
cols
*
cn
,
src1
.
data
,
src1
.
step
);
PtrStepSzb
src2_
(
src1
.
rows
,
src1
.
cols
*
cn
,
src2
.
data
,
src2
.
step
);
PtrStepSzb
dst_
(
src1
.
rows
,
src1
.
cols
*
cn
,
dst
.
data
,
dst
.
step
);
if
(
mask
.
empty
()
&&
(
sdepth
==
CV_8U
||
sdepth
==
CV_16U
)
&&
ddepth
==
sdepth
)
{
const
intptr_t
src1ptr
=
reinterpret_cast
<
intptr_t
>
(
src1_
.
data
);
const
intptr_t
src2ptr
=
reinterpret_cast
<
intptr_t
>
(
src2_
.
data
);
const
intptr_t
dstptr
=
reinterpret_cast
<
intptr_t
>
(
dst_
.
data
);
const
bool
isAllAligned
=
(
src1ptr
&
31
)
==
0
&&
(
src2ptr
&
31
)
==
0
&&
(
dstptr
&
31
)
==
0
;
if
(
isAllAligned
)
{
if
(
sdepth
==
CV_8U
&&
(
src1_
.
cols
&
3
)
==
0
)
{
const
int
vcols
=
src1_
.
cols
>>
2
;
arithm
::
subMat_v4
(
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src1_
.
data
,
src1_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src2_
.
data
,
src2_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
dst_
.
data
,
dst_
.
step
),
stream
);
return
;
}
else
if
(
sdepth
==
CV_16U
&&
(
src1_
.
cols
&
1
)
==
0
)
{
const
int
vcols
=
src1_
.
cols
>>
1
;
arithm
::
subMat_v2
(
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src1_
.
data
,
src1_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
src2_
.
data
,
src2_
.
step
),
PtrStepSz
<
unsigned
int
>
(
src1_
.
rows
,
vcols
,
(
unsigned
int
*
)
dst_
.
data
,
dst_
.
step
),
stream
);
return
;
}
}
}
const
func_t
func
=
funcs
[
sdepth
][
ddepth
];
void
subScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
inv
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
stream
,
int
);
if
(
!
func
)
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"Unsupported combination of source and destination types"
);
func
(
src1_
,
src2_
,
dst_
,
mask
,
stream
);
}
namespace
arithm
{
template
<
typename
T
,
typename
S
,
typename
D
>
void
subScalar
(
PtrStepSzb
src1
,
double
val
,
bool
inv
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
}
static
void
subScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
inv
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
,
Stream
&
_stream
,
int
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
double
val
,
bool
inv
,
PtrStepSzb
dst
,
PtrStepb
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
arithm
::
subScalar
<
unsigned
char
,
float
,
unsigned
char
>
,
arithm
::
subScalar
<
unsigned
char
,
float
,
signed
char
>
,
arithm
::
subScalar
<
unsigned
char
,
float
,
unsigned
short
>
,
arithm
::
subScalar
<
unsigned
char
,
float
,
short
>
,
arithm
::
subScalar
<
unsigned
char
,
float
,
int
>
,
arithm
::
subScalar
<
unsigned
char
,
float
,
float
>
,
arithm
::
subScalar
<
unsigned
char
,
double
,
double
>
},
{
arithm
::
subScalar
<
signed
char
,
float
,
unsigned
char
>
,
arithm
::
subScalar
<
signed
char
,
float
,
signed
char
>
,
arithm
::
subScalar
<
signed
char
,
float
,
unsigned
short
>
,
arithm
::
subScalar
<
signed
char
,
float
,
short
>
,
arithm
::
subScalar
<
signed
char
,
float
,
int
>
,
arithm
::
subScalar
<
signed
char
,
float
,
float
>
,
arithm
::
subScalar
<
signed
char
,
double
,
double
>
},
{
0
/*arithm::subScalar<unsigned short, float, unsigned char>*/
,
0
/*arithm::subScalar<unsigned short, float, signed char>*/
,
arithm
::
subScalar
<
unsigned
short
,
float
,
unsigned
short
>
,
arithm
::
subScalar
<
unsigned
short
,
float
,
short
>
,
arithm
::
subScalar
<
unsigned
short
,
float
,
int
>
,
arithm
::
subScalar
<
unsigned
short
,
float
,
float
>
,
arithm
::
subScalar
<
unsigned
short
,
double
,
double
>
},
{
0
/*arithm::subScalar<short, float, unsigned char>*/
,
0
/*arithm::subScalar<short, float, signed char>*/
,
arithm
::
subScalar
<
short
,
float
,
unsigned
short
>
,
arithm
::
subScalar
<
short
,
float
,
short
>
,
arithm
::
subScalar
<
short
,
float
,
int
>
,
arithm
::
subScalar
<
short
,
float
,
float
>
,
arithm
::
subScalar
<
short
,
double
,
double
>
},
{
0
/*arithm::subScalar<int, float, unsigned char>*/
,
0
/*arithm::subScalar<int, float, signed char>*/
,
0
/*arithm::subScalar<int, float, unsigned short>*/
,
0
/*arithm::subScalar<int, float, short>*/
,
arithm
::
subScalar
<
int
,
float
,
int
>
,
arithm
::
subScalar
<
int
,
float
,
float
>
,
arithm
::
subScalar
<
int
,
double
,
double
>
},
{
0
/*arithm::subScalar<float, float, unsigned char>*/
,
0
/*arithm::subScalar<float, float, signed char>*/
,
0
/*arithm::subScalar<float, float, unsigned short>*/
,
0
/*arithm::subScalar<float, float, short>*/
,
0
/*arithm::subScalar<float, float, int>*/
,
arithm
::
subScalar
<
float
,
float
,
float
>
,
arithm
::
subScalar
<
float
,
double
,
double
>
},
{
0
/*arithm::subScalar<double, double, unsigned char>*/
,
0
/*arithm::subScalar<double, double, signed char>*/
,
0
/*arithm::subScalar<double, double, unsigned short>*/
,
0
/*arithm::subScalar<double, double, short>*/
,
0
/*arithm::subScalar<double, double, int>*/
,
0
/*arithm::subScalar<double, double, float>*/
,
arithm
::
subScalar
<
double
,
double
,
double
>
}
};
typedef
void
(
*
npp_func_t
)(
const
PtrStepSzb
src
,
Scalar
sc
,
PtrStepb
dst
,
cudaStream_t
stream
);
static
const
npp_func_t
npp_funcs
[
7
][
4
]
=
{
{
NppArithmScalar
<
CV_8U
,
1
,
nppiSubC_8u_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_8U
,
3
,
nppiSubC_8u_C3RSfs
>::
call
,
NppArithmScalar
<
CV_8U
,
4
,
nppiSubC_8u_C4RSfs
>::
call
},
{
0
,
0
,
0
,
0
},
{
NppArithmScalar
<
CV_16U
,
1
,
nppiSubC_16u_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_16U
,
3
,
nppiSubC_16u_C3RSfs
>::
call
,
NppArithmScalar
<
CV_16U
,
4
,
nppiSubC_16u_C4RSfs
>::
call
},
{
NppArithmScalar
<
CV_16S
,
1
,
nppiSubC_16s_C1RSfs
>::
call
,
NppArithmScalar
<
CV_16S
,
2
,
nppiSubC_16sc_C1RSfs
>::
call
,
NppArithmScalar
<
CV_16S
,
3
,
nppiSubC_16s_C3RSfs
>::
call
,
NppArithmScalar
<
CV_16S
,
4
,
nppiSubC_16s_C4RSfs
>::
call
},
{
NppArithmScalar
<
CV_32S
,
1
,
nppiSubC_32s_C1RSfs
>::
call
,
NppArithmScalar
<
CV_32S
,
2
,
nppiSubC_32sc_C1RSfs
>::
call
,
NppArithmScalar
<
CV_32S
,
3
,
nppiSubC_32s_C3RSfs
>::
call
,
0
},
{
NppArithmScalar
<
CV_32F
,
1
,
nppiSubC_32f_C1R
>::
call
,
NppArithmScalar
<
CV_32F
,
2
,
nppiSubC_32fc_C1R
>::
call
,
NppArithmScalar
<
CV_32F
,
3
,
nppiSubC_32f_C3R
>::
call
,
NppArithmScalar
<
CV_32F
,
4
,
nppiSubC_32f_C4R
>::
call
},
{
0
,
0
,
0
,
0
}
};
const
int
sdepth
=
src
.
depth
();
const
int
ddepth
=
dst
.
depth
();
const
int
cn
=
src
.
channels
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
const
npp_func_t
npp_func
=
npp_funcs
[
sdepth
][
cn
-
1
];
if
(
ddepth
==
sdepth
&&
cn
>
1
&&
npp_func
!=
0
&&
!
inv
)
{
npp_func
(
src
,
val
,
dst
,
stream
);
return
;
}
CV_Assert
(
cn
==
1
);
const
func_t
func
=
funcs
[
sdepth
][
ddepth
];
if
(
!
func
)
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"Unsupported combination of source and destination types"
);
func
(
src
,
val
[
0
],
inv
,
dst
,
mask
,
stream
);
}
void
cv
::
cuda
::
subtract
(
InputArray
src1
,
InputArray
src2
,
OutputArray
dst
,
InputArray
mask
,
int
dtype
,
Stream
&
stream
)
void
cv
::
cuda
::
subtract
(
InputArray
src1
,
InputArray
src2
,
OutputArray
dst
,
InputArray
mask
,
int
dtype
,
Stream
&
stream
)
{
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment