Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
e7dba695
Commit
e7dba695
authored
Jul 29, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used new device layer for cv::gpu::multiply
parent
156f86ea
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
269 additions
and
426 deletions
+269
-426
mul_mat.cu
modules/cudaarithm/src/cuda/mul_mat.cu
+142
-129
mul_scalar.cu
modules/cudaarithm/src/cuda/mul_scalar.cu
+121
-83
element_operations.cpp
modules/cudaarithm/src/element_operations.cpp
+6
-214
No files found.
modules/cudaarithm/src/cuda/mul_mat.cu
View file @
e7dba695
...
...
@@ -40,172 +40,185 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace arithm
{
struct Mul_8uc4_32f : binary_function<uint, float, uint>
{
__device__ __forceinline__ uint operator ()(uint a, float b) const
{
uint res = 0;
res |= (saturate_cast<uchar>((0xffu & (a )) * b) );
res |= (saturate_cast<uchar>((0xffu & (a >> 8)) * b) << 8);
res |= (saturate_cast<uchar>((0xffu & (a >> 16)) * b) << 16);
res |= (saturate_cast<uchar>((0xffu & (a >> 24)) * b) << 24);
return res;
}
__host__ __device__ __forceinline__ Mul_8uc4_32f() {}
__host__ __device__ __forceinline__ Mul_8uc4_32f(const Mul_8uc4_32f&) {}
};
#include "opencv2/cudev.hpp"
struct Mul_16sc4_32f : binary_function<short4, float, short4>
{
__device__ __forceinline__ short4 operator ()(short4 a, float b) const
{
return make_short4(saturate_cast<short>(a.x * b), saturate_cast<short>(a.y * b),
saturate_cast<short>(a.z * b), saturate_cast<short>(a.w * b));
}
using namespace cv::cudev;
__host__ __device__ __forceinline__ Mul_16sc4_32f() {}
__host__ __device__ __forceinline__ Mul_16sc4_32f(const Mul_16sc4_32f&) {}
}
;
void mulMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int);
void mulMat_8uc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
void mulMat_16sc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
;
template <typename T, typename D> struct Mul : binary_function<T, T, D>
namespace
{
template <typename T, typename D> struct MulOp : binary_function<T, T, D>
{
__device__ __forceinline__ D operator ()(T a, T b) const
{
return saturate_cast<D>(a * b);
}
__host__ __device__ __forceinline__ Mul() {}
__host__ __device__ __forceinline__ Mul(const Mul&) {}
};
template <typename T, typename S, typename D> struct MulScale : binary_function<T, T, D>
template <typename T, typename S, typename D> struct MulScale
Op
: binary_function<T, T, D>
{
S scale;
__host__ explicit MulScale(S scale_) : scale(scale_) {}
__device__ __forceinline__ D operator ()(T a, T b) const
{
return saturate_cast<D>(scale * a * b);
}
};
}
namespace cv { namespace cuda { namespace device
{
template <> struct TransformFunctorTraits<arithm::Mul_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
{
};
template <typename
T, typename D> struct TransformFunctorTraits< arithm::Mul<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename
ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
{
};
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <> struct TransformPolicy<double> : DefaultTransformPolicy
{
enum {
shift = 1
};
};
}}}
namespace arithm
{
void mulMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream)
{
device::transform(src1, src2, dst, Mul_8uc4_32f(), WithOutMask(), stream);
}
void mulMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream)
{
device::transform(src1, src2, dst, Mul_16sc4_32f(), WithOutMask(), stream);
}
template <typename T, typename S, typename D>
void mulMat
(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t
stream)
void mulMat
Impl(const GpuMat& src1, const GpuMat& src2, const GpuMat& dst, double scale, Stream&
stream)
{
if (scale == 1)
{
Mul<T, D> op;
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask()
, stream);
Mul
Op
<T, D> op;
gridTransformBinary_< TransformPolicy<S> >(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), op
, stream);
}
else
{
MulScale<T, S, D> op(static_cast<S>(scale));
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
MulScaleOp<T, S, D> op;
op.scale = static_cast<S>(scale);
gridTransformBinary_< TransformPolicy<S> >(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), op, stream);
}
}
}
void mulMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int)
{
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, const GpuMat& dst, double scale, Stream& stream);
static const func_t funcs[7][7] =
{
{
mulMatImpl<uchar, float, uchar>,
mulMatImpl<uchar, float, schar>,
mulMatImpl<uchar, float, ushort>,
mulMatImpl<uchar, float, short>,
mulMatImpl<uchar, float, int>,
mulMatImpl<uchar, float, float>,
mulMatImpl<uchar, double, double>
},
{
mulMatImpl<schar, float, uchar>,
mulMatImpl<schar, float, schar>,
mulMatImpl<schar, float, ushort>,
mulMatImpl<schar, float, short>,
mulMatImpl<schar, float, int>,
mulMatImpl<schar, float, float>,
mulMatImpl<schar, double, double>
},
{
0 /*mulMatImpl<ushort, float, uchar>*/,
0 /*mulMatImpl<ushort, float, schar>*/,
mulMatImpl<ushort, float, ushort>,
mulMatImpl<ushort, float, short>,
mulMatImpl<ushort, float, int>,
mulMatImpl<ushort, float, float>,
mulMatImpl<ushort, double, double>
},
{
0 /*mulMatImpl<short, float, uchar>*/,
0 /*mulMatImpl<short, float, schar>*/,
mulMatImpl<short, float, ushort>,
mulMatImpl<short, float, short>,
mulMatImpl<short, float, int>,
mulMatImpl<short, float, float>,
mulMatImpl<short, double, double>
},
{
0 /*mulMatImpl<int, float, uchar>*/,
0 /*mulMatImpl<int, float, schar>*/,
0 /*mulMatImpl<int, float, ushort>*/,
0 /*mulMatImpl<int, float, short>*/,
mulMatImpl<int, float, int>,
mulMatImpl<int, float, float>,
mulMatImpl<int, double, double>
},
{
0 /*mulMatImpl<float, float, uchar>*/,
0 /*mulMatImpl<float, float, schar>*/,
0 /*mulMatImpl<float, float, ushort>*/,
0 /*mulMatImpl<float, float, short>*/,
0 /*mulMatImpl<float, float, int>*/,
mulMatImpl<float, float, float>,
mulMatImpl<float, double, double>
},
{
0 /*mulMatImpl<double, double, uchar>*/,
0 /*mulMatImpl<double, double, schar>*/,
0 /*mulMatImpl<double, double, ushort>*/,
0 /*mulMatImpl<double, double, short>*/,
0 /*mulMatImpl<double, double, int>*/,
0 /*mulMatImpl<double, double, float>*/,
mulMatImpl<double, double, double>
}
};
const int sdepth = src1.depth();
const int ddepth = dst.depth();
CV_DbgAssert( sdepth < 7 && ddepth < 7 );
GpuMat src1_ = src1.reshape(1);
GpuMat src2_ = src2.reshape(1);
GpuMat dst_ = dst.reshape(1);
const func_t func = funcs[sdepth][ddepth];
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1_, src2_, dst_, scale, stream);
}
namespace
{
template <typename T>
struct MulOpSpecial : binary_function<T, float, T>
{
__device__ __forceinline__ T operator ()(const T& a, float b) const
{
typedef typename VecTraits<T>::elem_type elem_type;
T res;
res.x = saturate_cast<elem_type>(a.x * b);
res.y = saturate_cast<elem_type>(a.y * b);
res.z = saturate_cast<elem_type>(a.z * b);
res.w = saturate_cast<elem_type>(a.w * b);
return res;
}
};
}
template void mulMat<uchar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<uchar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<uchar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<uchar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<uchar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<uchar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<uchar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<schar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<schar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<schar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<schar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<schar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<schar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<schar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<ushort, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<ushort, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<ushort, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<ushort, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<ushort, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<ushort, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<ushort, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<short, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<short, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<short, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<short, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<short, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<short, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<short, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<int, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<int, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<int, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<int, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<int, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<int, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<int, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<float, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<float, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<float, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<float, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<float, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<float, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<float, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<double, double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<double, double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<double, double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<double, double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<double, double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void mulMat<double, double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void mulMat<double, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
void mulMat_8uc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
gridTransformBinary(globPtr<uchar4>(src1), globPtr<float>(src2), globPtr<uchar4>(dst), MulOpSpecial<uchar4>(), stream);
}
void mulMat_16sc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
gridTransformBinary(globPtr<short4>(src1), globPtr<float>(src2), globPtr<short4>(dst), MulOpSpecial<short4>(), stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/cuda/mul_scalar.cu
View file @
e7dba695
...
...
@@ -40,105 +40,143 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace arithm
#include "opencv2/cudev.hpp"
using namespace cv::cudev;
void mulScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int);
namespace
{
template <typename
T, typename S, typename D> struct MulScalar : unary_function<T, D
>
template <typename
SrcType, typename ScalarType, typename DstType> struct MulScalarOp : unary_function<SrcType, DstType
>
{
S val;
__host__ explicit MulScalar(S val_) : val(val_) {}
ScalarType val;
__device__ __forceinline__ D
operator ()(T
a) const
__device__ __forceinline__ D
stType operator ()(SrcType
a) const
{
return saturate_cast<D
>(a
* val);
return saturate_cast<D
stType>(saturate_cast<ScalarType>(a)
* val);
}
};
}
namespace cv { namespace cuda { namespace device
{
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::MulScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
{
};
template <> struct TransformPolicy<double> : DefaultTransformPolicy
{
enum {
shift = 1
};
};
}}}
namespace arithm
{
template <typename T, typename S, typename D>
void mulScalar(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream)
template <typename SrcType, typename ScalarDepth, typename DstType>
void mulScalarImpl(const GpuMat& src, cv::Scalar value, GpuMat& dst, Stream& stream)
{
MulScalar<T, S, D> op(static_cast<S>(val));
device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
typedef typename MakeVec<ScalarDepth, VecTraits<SrcType>::cn>::type ScalarType;
cv::Scalar_<ScalarDepth> value_ = value;
MulScalarOp<SrcType, ScalarType, DstType> op;
op.val = VecTraits<ScalarType>::make(value_.val);
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, stream);
}
}
void mulScalar(const GpuMat& src, cv::Scalar val, bool, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int)
{
typedef void (*func_t)(const GpuMat& src, cv::Scalar val, GpuMat& dst, Stream& stream);
static const func_t funcs[7][7][4] =
{
{
{mulScalarImpl<uchar, float, uchar>, mulScalarImpl<uchar2, float, uchar2>, mulScalarImpl<uchar3, float, uchar3>, mulScalarImpl<uchar4, float, uchar4>},
{mulScalarImpl<uchar, float, schar>, mulScalarImpl<uchar2, float, char2>, mulScalarImpl<uchar3, float, char3>, mulScalarImpl<uchar4, float, char4>},
{mulScalarImpl<uchar, float, ushort>, mulScalarImpl<uchar2, float, ushort2>, mulScalarImpl<uchar3, float, ushort3>, mulScalarImpl<uchar4, float, ushort4>},
{mulScalarImpl<uchar, float, short>, mulScalarImpl<uchar2, float, short2>, mulScalarImpl<uchar3, float, short3>, mulScalarImpl<uchar4, float, short4>},
{mulScalarImpl<uchar, float, int>, mulScalarImpl<uchar2, float, int2>, mulScalarImpl<uchar3, float, int3>, mulScalarImpl<uchar4, float, int4>},
{mulScalarImpl<uchar, float, float>, mulScalarImpl<uchar2, float, float2>, mulScalarImpl<uchar3, float, float3>, mulScalarImpl<uchar4, float, float4>},
{mulScalarImpl<uchar, double, double>, mulScalarImpl<uchar2, double, double2>, mulScalarImpl<uchar3, double, double3>, mulScalarImpl<uchar4, double, double4>}
},
{
{mulScalarImpl<schar, float, uchar>, mulScalarImpl<char2, float, uchar2>, mulScalarImpl<char3, float, uchar3>, mulScalarImpl<char4, float, uchar4>},
{mulScalarImpl<schar, float, schar>, mulScalarImpl<char2, float, char2>, mulScalarImpl<char3, float, char3>, mulScalarImpl<char4, float, char4>},
{mulScalarImpl<schar, float, ushort>, mulScalarImpl<char2, float, ushort2>, mulScalarImpl<char3, float, ushort3>, mulScalarImpl<char4, float, ushort4>},
{mulScalarImpl<schar, float, short>, mulScalarImpl<char2, float, short2>, mulScalarImpl<char3, float, short3>, mulScalarImpl<char4, float, short4>},
{mulScalarImpl<schar, float, int>, mulScalarImpl<char2, float, int2>, mulScalarImpl<char3, float, int3>, mulScalarImpl<char4, float, int4>},
{mulScalarImpl<schar, float, float>, mulScalarImpl<char2, float, float2>, mulScalarImpl<char3, float, float3>, mulScalarImpl<char4, float, float4>},
{mulScalarImpl<schar, double, double>, mulScalarImpl<char2, double, double2>, mulScalarImpl<char3, double, double3>, mulScalarImpl<char4, double, double4>}
},
{
{0 /*mulScalarImpl<ushort, float, uchar>*/, 0 /*mulScalarImpl<ushort2, float, uchar2>*/, 0 /*mulScalarImpl<ushort3, float, uchar3>*/, 0 /*mulScalarImpl<ushort4, float, uchar4>*/},
{0 /*mulScalarImpl<ushort, float, schar>*/, 0 /*mulScalarImpl<ushort2, float, char2>*/, 0 /*mulScalarImpl<ushort3, float, char3>*/, 0 /*mulScalarImpl<ushort4, float, char4>*/},
{mulScalarImpl<ushort, float, ushort>, mulScalarImpl<ushort2, float, ushort2>, mulScalarImpl<ushort3, float, ushort3>, mulScalarImpl<ushort4, float, ushort4>},
{mulScalarImpl<ushort, float, short>, mulScalarImpl<ushort2, float, short2>, mulScalarImpl<ushort3, float, short3>, mulScalarImpl<ushort4, float, short4>},
{mulScalarImpl<ushort, float, int>, mulScalarImpl<ushort2, float, int2>, mulScalarImpl<ushort3, float, int3>, mulScalarImpl<ushort4, float, int4>},
{mulScalarImpl<ushort, float, float>, mulScalarImpl<ushort2, float, float2>, mulScalarImpl<ushort3, float, float3>, mulScalarImpl<ushort4, float, float4>},
{mulScalarImpl<ushort, double, double>, mulScalarImpl<ushort2, double, double2>, mulScalarImpl<ushort3, double, double3>, mulScalarImpl<ushort4, double, double4>}
},
{
{0 /*mulScalarImpl<short, float, uchar>*/, 0 /*mulScalarImpl<short2, float, uchar2>*/, 0 /*mulScalarImpl<short3, float, uchar3>*/, 0 /*mulScalarImpl<short4, float, uchar4>*/},
{0 /*mulScalarImpl<short, float, schar>*/, 0 /*mulScalarImpl<short2, float, char2>*/, 0 /*mulScalarImpl<short3, float, char3>*/, 0 /*mulScalarImpl<short4, float, char4>*/},
{mulScalarImpl<short, float, ushort>, mulScalarImpl<short2, float, ushort2>, mulScalarImpl<short3, float, ushort3>, mulScalarImpl<short4, float, ushort4>},
{mulScalarImpl<short, float, short>, mulScalarImpl<short2, float, short2>, mulScalarImpl<short3, float, short3>, mulScalarImpl<short4, float, short4>},
{mulScalarImpl<short, float, int>, mulScalarImpl<short2, float, int2>, mulScalarImpl<short3, float, int3>, mulScalarImpl<short4, float, int4>},
{mulScalarImpl<short, float, float>, mulScalarImpl<short2, float, float2>, mulScalarImpl<short3, float, float3>, mulScalarImpl<short4, float, float4>},
{mulScalarImpl<short, double, double>, mulScalarImpl<short2, double, double2>, mulScalarImpl<short3, double, double3>, mulScalarImpl<short4, double, double4>}
},
{
{0 /*mulScalarImpl<int, float, uchar>*/, 0 /*mulScalarImpl<int2, float, uchar2>*/, 0 /*mulScalarImpl<int3, float, uchar3>*/, 0 /*mulScalarImpl<int4, float, uchar4>*/},
{0 /*mulScalarImpl<int, float, schar>*/, 0 /*mulScalarImpl<int2, float, char2>*/, 0 /*mulScalarImpl<int3, float, char3>*/, 0 /*mulScalarImpl<int4, float, char4>*/},
{0 /*mulScalarImpl<int, float, ushort>*/, 0 /*mulScalarImpl<int2, float, ushort2>*/, 0 /*mulScalarImpl<int3, float, ushort3>*/, 0 /*mulScalarImpl<int4, float, ushort4>*/},
{0 /*mulScalarImpl<int, float, short>*/, 0 /*mulScalarImpl<int2, float, short2>*/, 0 /*mulScalarImpl<int3, float, short3>*/, 0 /*mulScalarImpl<int4, float, short4>*/},
{mulScalarImpl<int, float, int>, mulScalarImpl<int2, float, int2>, mulScalarImpl<int3, float, int3>, mulScalarImpl<int4, float, int4>},
{mulScalarImpl<int, float, float>, mulScalarImpl<int2, float, float2>, mulScalarImpl<int3, float, float3>, mulScalarImpl<int4, float, float4>},
{mulScalarImpl<int, double, double>, mulScalarImpl<int2, double, double2>, mulScalarImpl<int3, double, double3>, mulScalarImpl<int4, double, double4>}
},
{
{0 /*mulScalarImpl<float, float, uchar>*/, 0 /*mulScalarImpl<float2, float, uchar2>*/, 0 /*mulScalarImpl<float3, float, uchar3>*/, 0 /*mulScalarImpl<float4, float, uchar4>*/},
{0 /*mulScalarImpl<float, float, schar>*/, 0 /*mulScalarImpl<float2, float, char2>*/, 0 /*mulScalarImpl<float3, float, char3>*/, 0 /*mulScalarImpl<float4, float, char4>*/},
{0 /*mulScalarImpl<float, float, ushort>*/, 0 /*mulScalarImpl<float2, float, ushort2>*/, 0 /*mulScalarImpl<float3, float, ushort3>*/, 0 /*mulScalarImpl<float4, float, ushort4>*/},
{0 /*mulScalarImpl<float, float, short>*/, 0 /*mulScalarImpl<float2, float, short2>*/, 0 /*mulScalarImpl<float3, float, short3>*/, 0 /*mulScalarImpl<float4, float, short4>*/},
{0 /*mulScalarImpl<float, float, int>*/, 0 /*mulScalarImpl<float2, float, int2>*/, 0 /*mulScalarImpl<float3, float, int3>*/, 0 /*mulScalarImpl<float4, float, int4>*/},
{mulScalarImpl<float, float, float>, mulScalarImpl<float2, float, float2>, mulScalarImpl<float3, float, float3>, mulScalarImpl<float4, float, float4>},
{mulScalarImpl<float, double, double>, mulScalarImpl<float2, double, double2>, mulScalarImpl<float3, double, double3>, mulScalarImpl<float4, double, double4>}
},
{
{0 /*mulScalarImpl<double, double, uchar>*/, 0 /*mulScalarImpl<double2, double, uchar2>*/, 0 /*mulScalarImpl<double3, double, uchar3>*/, 0 /*mulScalarImpl<double4, double, uchar4>*/},
{0 /*mulScalarImpl<double, double, schar>*/, 0 /*mulScalarImpl<double2, double, char2>*/, 0 /*mulScalarImpl<double3, double, char3>*/, 0 /*mulScalarImpl<double4, double, char4>*/},
{0 /*mulScalarImpl<double, double, ushort>*/, 0 /*mulScalarImpl<double2, double, ushort2>*/, 0 /*mulScalarImpl<double3, double, ushort3>*/, 0 /*mulScalarImpl<double4, double, ushort4>*/},
{0 /*mulScalarImpl<double, double, short>*/, 0 /*mulScalarImpl<double2, double, short2>*/, 0 /*mulScalarImpl<double3, double, short3>*/, 0 /*mulScalarImpl<double4, double, short4>*/},
{0 /*mulScalarImpl<double, double, int>*/, 0 /*mulScalarImpl<double2, double, int2>*/, 0 /*mulScalarImpl<double3, double, int3>*/, 0 /*mulScalarImpl<double4, double, int4>*/},
{0 /*mulScalarImpl<double, double, float>*/, 0 /*mulScalarImpl<double2, double, float2>*/, 0 /*mulScalarImpl<double3, double, float3>*/, 0 /*mulScalarImpl<double4, double, float4>*/},
{mulScalarImpl<double, double, double>, mulScalarImpl<double2, double, double2>, mulScalarImpl<double3, double, double3>, mulScalarImpl<double4, double, double4>}
}
};
const int sdepth = src.depth();
const int ddepth = dst.depth();
const int cn = src.channels();
CV_DbgAssert( sdepth < 7 && ddepth < 7 && cn <= 4 );
val[0] *= scale;
val[1] *= scale;
val[2] *= scale;
val[3] *= scale;
const func_t func = funcs[sdepth][ddepth][cn - 1];
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
template void mulScalar<uchar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<uchar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<uchar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<uchar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<uchar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<uchar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<uchar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<schar, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<schar, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<schar, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<schar, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<schar, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<schar, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<schar, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<ushort, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<ushort, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<ushort, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<ushort, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<ushort, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<ushort, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<ushort, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<short, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<short, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<short, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<short, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<short, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<short, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<short, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<int, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<int, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<int, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<int, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<int, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<int, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<int, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<float, float, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<float, float, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<float, float, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<float, float, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<float, float, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<float, float, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<float, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<double, double, uchar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<double, double, schar>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<double, double, ushort>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<double, double, short>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<double, double, int>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
//template void mulScalar<double, double, float>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
template void mulScalar<double, double, double>(PtrStepSzb src1, double val, PtrStepSzb dst, cudaStream_t stream);
func(src, val, dst, stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/element_operations.cpp
View file @
e7dba695
...
...
@@ -360,219 +360,11 @@ void cv::cuda::subtract(InputArray src1, InputArray src2, OutputArray dst, Input
////////////////////////////////////////////////////////////////////////
// multiply
namespace
arithm
{
void
mulMat_8uc4_32f
(
PtrStepSz
<
unsigned
int
>
src1
,
PtrStepSzf
src2
,
PtrStepSz
<
unsigned
int
>
dst
,
cudaStream_t
stream
);
void
mulMat_16sc4_32f
(
PtrStepSz
<
short4
>
src1
,
PtrStepSzf
src2
,
PtrStepSz
<
short4
>
dst
,
cudaStream_t
stream
);
template
<
typename
T
,
typename
S
,
typename
D
>
void
mulMat
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
double
scale
,
cudaStream_t
stream
);
}
static
void
mulMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
scale
,
Stream
&
_stream
,
int
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
double
scale
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
arithm
::
mulMat
<
unsigned
char
,
float
,
unsigned
char
>
,
arithm
::
mulMat
<
unsigned
char
,
float
,
signed
char
>
,
arithm
::
mulMat
<
unsigned
char
,
float
,
unsigned
short
>
,
arithm
::
mulMat
<
unsigned
char
,
float
,
short
>
,
arithm
::
mulMat
<
unsigned
char
,
float
,
int
>
,
arithm
::
mulMat
<
unsigned
char
,
float
,
float
>
,
arithm
::
mulMat
<
unsigned
char
,
double
,
double
>
},
{
arithm
::
mulMat
<
signed
char
,
float
,
unsigned
char
>
,
arithm
::
mulMat
<
signed
char
,
float
,
signed
char
>
,
arithm
::
mulMat
<
signed
char
,
float
,
unsigned
short
>
,
arithm
::
mulMat
<
signed
char
,
float
,
short
>
,
arithm
::
mulMat
<
signed
char
,
float
,
int
>
,
arithm
::
mulMat
<
signed
char
,
float
,
float
>
,
arithm
::
mulMat
<
signed
char
,
double
,
double
>
},
{
0
/*arithm::mulMat<unsigned short, float, unsigned char>*/
,
0
/*arithm::mulMat<unsigned short, float, signed char>*/
,
arithm
::
mulMat
<
unsigned
short
,
float
,
unsigned
short
>
,
arithm
::
mulMat
<
unsigned
short
,
float
,
short
>
,
arithm
::
mulMat
<
unsigned
short
,
float
,
int
>
,
arithm
::
mulMat
<
unsigned
short
,
float
,
float
>
,
arithm
::
mulMat
<
unsigned
short
,
double
,
double
>
},
{
0
/*arithm::mulMat<short, float, unsigned char>*/
,
0
/*arithm::mulMat<short, float, signed char>*/
,
arithm
::
mulMat
<
short
,
float
,
unsigned
short
>
,
arithm
::
mulMat
<
short
,
float
,
short
>
,
arithm
::
mulMat
<
short
,
float
,
int
>
,
arithm
::
mulMat
<
short
,
float
,
float
>
,
arithm
::
mulMat
<
short
,
double
,
double
>
},
{
0
/*arithm::mulMat<int, float, unsigned char>*/
,
0
/*arithm::mulMat<int, float, signed char>*/
,
0
/*arithm::mulMat<int, float, unsigned short>*/
,
0
/*arithm::mulMat<int, float, short>*/
,
arithm
::
mulMat
<
int
,
float
,
int
>
,
arithm
::
mulMat
<
int
,
float
,
float
>
,
arithm
::
mulMat
<
int
,
double
,
double
>
},
{
0
/*arithm::mulMat<float, float, unsigned char>*/
,
0
/*arithm::mulMat<float, float, signed char>*/
,
0
/*arithm::mulMat<float, float, unsigned short>*/
,
0
/*arithm::mulMat<float, float, short>*/
,
0
/*arithm::mulMat<float, float, int>*/
,
arithm
::
mulMat
<
float
,
float
,
float
>
,
arithm
::
mulMat
<
float
,
double
,
double
>
},
{
0
/*arithm::mulMat<double, double, unsigned char>*/
,
0
/*arithm::mulMat<double, double, signed char>*/
,
0
/*arithm::mulMat<double, double, unsigned short>*/
,
0
/*arithm::mulMat<double, double, short>*/
,
0
/*arithm::mulMat<double, double, int>*/
,
0
/*arithm::mulMat<double, double, float>*/
,
arithm
::
mulMat
<
double
,
double
,
double
>
}
};
const
int
sdepth
=
src1
.
depth
();
const
int
ddepth
=
dst
.
depth
();
const
int
cn
=
src1
.
channels
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
PtrStepSzb
src1_
(
src1
.
rows
,
src1
.
cols
*
cn
,
src1
.
data
,
src1
.
step
);
PtrStepSzb
src2_
(
src1
.
rows
,
src1
.
cols
*
cn
,
src2
.
data
,
src2
.
step
);
PtrStepSzb
dst_
(
src1
.
rows
,
src1
.
cols
*
cn
,
dst
.
data
,
dst
.
step
);
const
func_t
func
=
funcs
[
sdepth
][
ddepth
];
if
(
!
func
)
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"Unsupported combination of source and destination types"
);
func
(
src1_
,
src2_
,
dst_
,
scale
,
stream
);
}
namespace
arithm
{
template
<
typename
T
,
typename
S
,
typename
D
>
void
mulScalar
(
PtrStepSzb
src1
,
double
val
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
}
void
mulMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
scale
,
Stream
&
stream
,
int
);
void
mulMat_8uc4_32f
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
Stream
&
stream
);
void
mulMat_16sc4_32f
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
Stream
&
stream
);
static
void
mulScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
scale
,
Stream
&
_stream
,
int
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
double
val
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
arithm
::
mulScalar
<
unsigned
char
,
float
,
unsigned
char
>
,
arithm
::
mulScalar
<
unsigned
char
,
float
,
signed
char
>
,
arithm
::
mulScalar
<
unsigned
char
,
float
,
unsigned
short
>
,
arithm
::
mulScalar
<
unsigned
char
,
float
,
short
>
,
arithm
::
mulScalar
<
unsigned
char
,
float
,
int
>
,
arithm
::
mulScalar
<
unsigned
char
,
float
,
float
>
,
arithm
::
mulScalar
<
unsigned
char
,
double
,
double
>
},
{
arithm
::
mulScalar
<
signed
char
,
float
,
unsigned
char
>
,
arithm
::
mulScalar
<
signed
char
,
float
,
signed
char
>
,
arithm
::
mulScalar
<
signed
char
,
float
,
unsigned
short
>
,
arithm
::
mulScalar
<
signed
char
,
float
,
short
>
,
arithm
::
mulScalar
<
signed
char
,
float
,
int
>
,
arithm
::
mulScalar
<
signed
char
,
float
,
float
>
,
arithm
::
mulScalar
<
signed
char
,
double
,
double
>
},
{
0
/*arithm::mulScalar<unsigned short, float, unsigned char>*/
,
0
/*arithm::mulScalar<unsigned short, float, signed char>*/
,
arithm
::
mulScalar
<
unsigned
short
,
float
,
unsigned
short
>
,
arithm
::
mulScalar
<
unsigned
short
,
float
,
short
>
,
arithm
::
mulScalar
<
unsigned
short
,
float
,
int
>
,
arithm
::
mulScalar
<
unsigned
short
,
float
,
float
>
,
arithm
::
mulScalar
<
unsigned
short
,
double
,
double
>
},
{
0
/*arithm::mulScalar<short, float, unsigned char>*/
,
0
/*arithm::mulScalar<short, float, signed char>*/
,
arithm
::
mulScalar
<
short
,
float
,
unsigned
short
>
,
arithm
::
mulScalar
<
short
,
float
,
short
>
,
arithm
::
mulScalar
<
short
,
float
,
int
>
,
arithm
::
mulScalar
<
short
,
float
,
float
>
,
arithm
::
mulScalar
<
short
,
double
,
double
>
},
{
0
/*arithm::mulScalar<int, float, unsigned char>*/
,
0
/*arithm::mulScalar<int, float, signed char>*/
,
0
/*arithm::mulScalar<int, float, unsigned short>*/
,
0
/*arithm::mulScalar<int, float, short>*/
,
arithm
::
mulScalar
<
int
,
float
,
int
>
,
arithm
::
mulScalar
<
int
,
float
,
float
>
,
arithm
::
mulScalar
<
int
,
double
,
double
>
},
{
0
/*arithm::mulScalar<float, float, unsigned char>*/
,
0
/*arithm::mulScalar<float, float, signed char>*/
,
0
/*arithm::mulScalar<float, float, unsigned short>*/
,
0
/*arithm::mulScalar<float, float, short>*/
,
0
/*arithm::mulScalar<float, float, int>*/
,
arithm
::
mulScalar
<
float
,
float
,
float
>
,
arithm
::
mulScalar
<
float
,
double
,
double
>
},
{
0
/*arithm::mulScalar<double, double, unsigned char>*/
,
0
/*arithm::mulScalar<double, double, signed char>*/
,
0
/*arithm::mulScalar<double, double, unsigned short>*/
,
0
/*arithm::mulScalar<double, double, short>*/
,
0
/*arithm::mulScalar<double, double, int>*/
,
0
/*arithm::mulScalar<double, double, float>*/
,
arithm
::
mulScalar
<
double
,
double
,
double
>
}
};
typedef
void
(
*
npp_func_t
)(
const
PtrStepSzb
src
,
Scalar
sc
,
PtrStepb
dst
,
cudaStream_t
stream
);
static
const
npp_func_t
npp_funcs
[
7
][
4
]
=
{
{
NppArithmScalar
<
CV_8U
,
1
,
nppiMulC_8u_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_8U
,
3
,
nppiMulC_8u_C3RSfs
>::
call
,
NppArithmScalar
<
CV_8U
,
4
,
nppiMulC_8u_C4RSfs
>::
call
},
{
0
,
0
,
0
,
0
},
{
NppArithmScalar
<
CV_16U
,
1
,
nppiMulC_16u_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_16U
,
3
,
nppiMulC_16u_C3RSfs
>::
call
,
NppArithmScalar
<
CV_16U
,
4
,
nppiMulC_16u_C4RSfs
>::
call
},
{
NppArithmScalar
<
CV_16S
,
1
,
nppiMulC_16s_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_16S
,
3
,
nppiMulC_16s_C3RSfs
>::
call
,
NppArithmScalar
<
CV_16S
,
4
,
nppiMulC_16s_C4RSfs
>::
call
},
{
NppArithmScalar
<
CV_32S
,
1
,
nppiMulC_32s_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_32S
,
3
,
nppiMulC_32s_C3RSfs
>::
call
,
0
},
{
NppArithmScalar
<
CV_32F
,
1
,
nppiMulC_32f_C1R
>::
call
,
0
,
NppArithmScalar
<
CV_32F
,
3
,
nppiMulC_32f_C3R
>::
call
,
NppArithmScalar
<
CV_32F
,
4
,
nppiMulC_32f_C4R
>::
call
},
{
0
,
0
,
0
,
0
}
};
const
int
sdepth
=
src
.
depth
();
const
int
ddepth
=
dst
.
depth
();
const
int
cn
=
src
.
channels
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
val
[
0
]
*=
scale
;
val
[
1
]
*=
scale
;
val
[
2
]
*=
scale
;
val
[
3
]
*=
scale
;
const
npp_func_t
npp_func
=
npp_funcs
[
sdepth
][
cn
-
1
];
if
(
ddepth
==
sdepth
&&
cn
>
1
&&
npp_func
!=
0
)
{
npp_func
(
src
,
val
,
dst
,
stream
);
return
;
}
CV_Assert
(
cn
==
1
);
const
func_t
func
=
funcs
[
sdepth
][
ddepth
];
if
(
!
func
)
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"Unsupported combination of source and destination types"
);
func
(
src
,
val
[
0
],
dst
,
stream
);
}
void
mulScalar
(
const
GpuMat
&
src
,
cv
::
Scalar
val
,
bool
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
scale
,
Stream
&
stream
,
int
);
void
cv
::
cuda
::
multiply
(
InputArray
_src1
,
InputArray
_src2
,
OutputArray
_dst
,
double
scale
,
int
dtype
,
Stream
&
stream
)
{
...
...
@@ -586,7 +378,7 @@ void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, do
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
arithm
::
mulMat_8uc4_32f
(
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
)
);
mulMat_8uc4_32f
(
src1
,
src2
,
dst
,
stream
);
}
else
if
(
_src1
.
type
()
==
CV_16SC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
...
...
@@ -598,7 +390,7 @@ void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, do
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
arithm
::
mulMat_16sc4_32f
(
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
)
);
mulMat_16sc4_32f
(
src1
,
src2
,
dst
,
stream
);
}
else
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment