Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
574ff471
Commit
574ff471
authored
Jul 29, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
used new device layer for cv::gpu::divide
parent
e7dba695
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
341 additions
and
451 deletions
+341
-451
div_mat.cu
modules/cudaarithm/src/cuda/div_mat.cu
+148
-136
div_scalar.cu
modules/cudaarithm/src/cuda/div_scalar.cu
+187
-91
element_operations.cpp
modules/cudaarithm/src/element_operations.cpp
+6
-224
No files found.
modules/cudaarithm/src/cuda/div_mat.cu
View file @
574ff471
...
...
@@ -40,191 +40,203 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace arithm
{
struct Div_8uc4_32f : binary_function<uint, float, uint>
{
__device__ __forceinline__ uint operator ()(uint a, float b) const
{
uint res = 0;
if (b != 0)
{
b = 1.0f / b;
res |= (saturate_cast<uchar>((0xffu & (a )) * b) );
res |= (saturate_cast<uchar>((0xffu & (a >> 8)) * b) << 8);
res |= (saturate_cast<uchar>((0xffu & (a >> 16)) * b) << 16);
res |= (saturate_cast<uchar>((0xffu & (a >> 24)) * b) << 24);
}
#include "opencv2/cudev.hpp"
return res;
}
};
using namespace cv::cudev;
struct Div_16sc4_32f : binary_function<short4, float, short4>
{
__device__ __forceinline__ short4 operator ()(short4 a, float b) const
{
return b != 0 ? make_short4(saturate_cast<short>(a.x / b), saturate_cast<short>(a.y / b),
saturate_cast<short>(a.z / b), saturate_cast<short>(a.w / b))
: make_short4(0,0,0,0);
}
};
void divMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int);
void divMat_8uc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
void divMat_16sc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream);
template <typename T, typename D> struct Div : binary_function<T, T, D>
namespace
{
template <typename T, typename D> struct DivOp : binary_function<T, T, D>
{
__device__ __forceinline__ D operator ()(T a, T b) const
{
return b != 0 ? saturate_cast<D>(a / b) : 0;
}
__host__ __device__ __forceinline__ Div() {}
__host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T> struct Div<T, float> : binary_function<T, T, float>
template <typename T> struct Div
Op
<T, float> : binary_function<T, T, float>
{
__device__ __forceinline__ float operator ()(T a, T b) const
{
return b != 0 ? static_cast<float>(a) / b : 0;
return b != 0 ? static_cast<float>(a) / b : 0
.0f
;
}
__host__ __device__ __forceinline__ Div() {}
__host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T> struct Div<T, double> : binary_function<T, T, double>
template <typename T> struct Div
Op
<T, double> : binary_function<T, T, double>
{
__device__ __forceinline__ double operator ()(T a, T b) const
{
return b != 0 ? static_cast<double>(a) / b : 0;
return b != 0 ? static_cast<double>(a) / b : 0
.0
;
}
__host__ __device__ __forceinline__ Div() {}
__host__ __device__ __forceinline__ Div(const Div&) {}
};
template <typename T, typename S, typename D> struct DivScale : binary_function<T, T, D>
template <typename T, typename S, typename D> struct DivScale
Op
: binary_function<T, T, D>
{
S scale;
__host__ explicit DivScale(S scale_) : scale(scale_) {}
__device__ __forceinline__ D operator ()(T a, T b) const
{
return b != 0 ? saturate_cast<D>(scale * a / b) : 0;
}
};
}
namespace cv { namespace cuda { namespace device
{
template <> struct TransformFunctorTraits<arithm::Div_8uc4_32f> : arithm::ArithmFuncTraits<sizeof(uint), sizeof(uint)>
template <typename ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
{
};
template <typename T, typename D> struct TransformFunctorTraits< arithm::Div<T, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <> struct TransformPolicy<double> : DefaultTransformPolicy
{
enum {
shift = 1
};
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScale<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
{
};
}}}
namespace arithm
{
void divMat_8uc4_32f(PtrStepSz<uint> src1, PtrStepSzf src2, PtrStepSz<uint> dst, cudaStream_t stream)
template <typename T, typename S, typename D>
void divMatImpl(const GpuMat& src1, const GpuMat& src2, const GpuMat& dst, double scale, Stream& stream)
{
device::transform(src1, src2, dst, Div_8uc4_32f(), WithOutMask(), stream);
if (scale == 1)
{
DivOp<T, D> op;
gridTransformBinary_< TransformPolicy<S> >(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), op, stream);
}
void divMat_16sc4_32f(PtrStepSz<short4> src1, PtrStepSzf src2, PtrStepSz<short4> dst, cudaStream_t stream)
else
{
device::transform(src1, src2, dst, Div_16sc4_32f(), WithOutMask(), stream);
DivScaleOp<T, S, D> op;
op.scale = static_cast<S>(scale);
gridTransformBinary_< TransformPolicy<S> >(globPtr<T>(src1), globPtr<T>(src2), globPtr<D>(dst), op, stream);
}
}
}
template <typename T, typename S, typename D>
void divMat(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream)
void divMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int)
{
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, const GpuMat& dst, double scale, Stream& stream);
static const func_t funcs[7][7] =
{
{
divMatImpl<uchar, float, uchar>,
divMatImpl<uchar, float, schar>,
divMatImpl<uchar, float, ushort>,
divMatImpl<uchar, float, short>,
divMatImpl<uchar, float, int>,
divMatImpl<uchar, float, float>,
divMatImpl<uchar, double, double>
},
{
divMatImpl<schar, float, uchar>,
divMatImpl<schar, float, schar>,
divMatImpl<schar, float, ushort>,
divMatImpl<schar, float, short>,
divMatImpl<schar, float, int>,
divMatImpl<schar, float, float>,
divMatImpl<schar, double, double>
},
{
0 /*divMatImpl<ushort, float, uchar>*/,
0 /*divMatImpl<ushort, float, schar>*/,
divMatImpl<ushort, float, ushort>,
divMatImpl<ushort, float, short>,
divMatImpl<ushort, float, int>,
divMatImpl<ushort, float, float>,
divMatImpl<ushort, double, double>
},
{
0 /*divMatImpl<short, float, uchar>*/,
0 /*divMatImpl<short, float, schar>*/,
divMatImpl<short, float, ushort>,
divMatImpl<short, float, short>,
divMatImpl<short, float, int>,
divMatImpl<short, float, float>,
divMatImpl<short, double, double>
},
{
0 /*divMatImpl<int, float, uchar>*/,
0 /*divMatImpl<int, float, schar>*/,
0 /*divMatImpl<int, float, ushort>*/,
0 /*divMatImpl<int, float, short>*/,
divMatImpl<int, float, int>,
divMatImpl<int, float, float>,
divMatImpl<int, double, double>
},
{
0 /*divMatImpl<float, float, uchar>*/,
0 /*divMatImpl<float, float, schar>*/,
0 /*divMatImpl<float, float, ushort>*/,
0 /*divMatImpl<float, float, short>*/,
0 /*divMatImpl<float, float, int>*/,
divMatImpl<float, float, float>,
divMatImpl<float, double, double>
},
{
0 /*divMatImpl<double, double, uchar>*/,
0 /*divMatImpl<double, double, schar>*/,
0 /*divMatImpl<double, double, ushort>*/,
0 /*divMatImpl<double, double, short>*/,
0 /*divMatImpl<double, double, int>*/,
0 /*divMatImpl<double, double, float>*/,
divMatImpl<double, double, double>
}
};
const int sdepth = src1.depth();
const int ddepth = dst.depth();
CV_DbgAssert( sdepth < 7 && ddepth < 7 );
GpuMat src1_ = src1.reshape(1);
GpuMat src2_ = src2.reshape(1);
GpuMat dst_ = dst.reshape(1);
const func_t func = funcs[sdepth][ddepth];
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1_, src2_, dst_, scale, stream);
}
namespace
{
template <typename T>
struct DivOpSpecial : binary_function<T, float, T>
{
if (scale == 1)
__device__ __forceinline__ T operator ()(const T& a, float b) const
{
Div<T, D> op;
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
}
else
typedef typename VecTraits<T>::elem_type elem_type;
T res = VecTraits<T>::all(0);
if (b != 0)
{
DivScale<T, S, D> op(static_cast<S>(scale));
device::transform((PtrStepSz<T>) src1, (PtrStepSz<T>) src2, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
b = 1.0f / b;
res.x = saturate_cast<elem_type>(a.x * b);
res.y = saturate_cast<elem_type>(a.y * b);
res.z = saturate_cast<elem_type>(a.z * b);
res.w = saturate_cast<elem_type>(a.w * b);
}
return res;
}
};
}
template void divMat<uchar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<uchar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<uchar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<uchar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<uchar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<uchar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<uchar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<schar, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<schar, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<schar, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<schar, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<schar, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<schar, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<schar, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<ushort, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<ushort, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<ushort, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<ushort, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<ushort, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<ushort, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<ushort, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<short, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<short, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<short, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<short, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<short, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<short, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<short, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<int, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<int, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<int, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<int, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<int, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<int, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<int, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<float, float, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<float, float, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<float, float, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<float, float, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<float, float, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<float, float, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<float, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<double, double, uchar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<double, double, schar>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<double, double, ushort>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<double, double, short>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<double, double, int>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
//template void divMat<double, double, float>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
template void divMat<double, double, double>(PtrStepSzb src1, PtrStepSzb src2, PtrStepSzb dst, double scale, cudaStream_t stream);
void divMat_8uc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
gridTransformBinary(globPtr<uchar4>(src1), globPtr<float>(src2), globPtr<uchar4>(dst), DivOpSpecial<uchar4>(), stream);
}
void divMat_16sc4_32f(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{
gridTransformBinary(globPtr<short4>(src1), globPtr<float>(src2), globPtr<short4>(dst), DivOpSpecial<short4>(), stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/cuda/div_scalar.cu
View file @
574ff471
...
...
@@ -40,129 +40,225 @@
//
//M*/
#i
f !defined CUDA_DISABLER
#i
nclude "opencv2/opencv_modules.hpp"
#include "opencv2/core/cuda/common.hpp"
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/transform.hpp"
#include "opencv2/core/cuda/saturate_cast.hpp"
#include "opencv2/core/cuda/simd_functions.hpp"
#ifndef HAVE_OPENCV_CUDEV
#
include "arithm_func_traits.hpp
"
#
error "opencv_cudev is required
"
using namespace cv::cuda;
using namespace cv::cuda::device;
#else
namespace arithm
#include "opencv2/cudev.hpp"
using namespace cv::cudev;
void divScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat& mask, double scale, Stream& stream, int);
namespace
{
template <typename T, typename S, typename D> struct DivScalar : unary_function<T, D>
template <int cn> struct SafeDiv;
template <> struct SafeDiv<1>
{
template <typename T>
__device__ __forceinline__ static T op(T a, T b)
{
return b != 0 ? a / b : 0;
}
};
template <> struct SafeDiv<2>
{
template <typename T>
__device__ __forceinline__ static T op(const T& a, const T& b)
{
S val
;
T res
;
__host__ explicit DivScalar(S val_) : val(val_) {}
res.x = b.x != 0 ? a.x / b.x : 0;
res.y = b.y != 0 ? a.y / b.y : 0;
__device__ __forceinline__ D operator ()(T a) const
{
return saturate_cast<D>(a / val);
return res;
}
};
template <> struct SafeDiv<3>
{
template <typename T>
__device__ __forceinline__ static T op(const T& a, const T& b)
{
T res;
res.x = b.x != 0 ? a.x / b.x : 0;
res.y = b.y != 0 ? a.y / b.y : 0;
res.z = b.z != 0 ? a.z / b.z : 0;
template <typename T, typename S, typename D> struct DivScalarInv : unary_function<T, D>
return res;
}
};
template <> struct SafeDiv<4>
{
template <typename T>
__device__ __forceinline__ static T op(const T& a, const T& b)
{
S val
;
T res
;
explicit DivScalarInv(S val_) : val(val_) {}
res.x = b.x != 0 ? a.x / b.x : 0;
res.y = b.y != 0 ? a.y / b.y : 0;
res.z = b.z != 0 ? a.z / b.z : 0;
res.w = b.w != 0 ? a.w / b.w : 0;
__device__ __forceinline__ D operator ()(T a) const
return res;
}
};
template <typename SrcType, typename ScalarType, typename DstType> struct DivScalarOp : unary_function<SrcType, DstType>
{
return a != 0 ? saturate_cast<D>(val / a) : 0;
ScalarType val;
__device__ __forceinline__ DstType operator ()(SrcType a) const
{
return saturate_cast<DstType>(SafeDiv<VecTraits<ScalarType>::cn>::op(saturate_cast<ScalarType>(a), val));
}
};
}
namespace cv { namespace cuda { namespace device
{
template <typename T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScalar<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename SrcType, typename ScalarType, typename DstType> struct DivScalarOpInv : unary_function<SrcType, DstType>
{
ScalarType val;
__device__ __forceinline__ DstType operator ()(SrcType a) const
{
return saturate_cast<DstType>(SafeDiv<VecTraits<ScalarType>::cn>::op(val, saturate_cast<ScalarType>(a)));
}
};
template <typename
T, typename S, typename D> struct TransformFunctorTraits< arithm::DivScalarInv<T, S, D> > : arithm::ArithmFuncTraits<sizeof(T), sizeof(D)>
template <typename
ScalarDepth> struct TransformPolicy : DefaultTransformPolicy
{
};
}}}
template <> struct TransformPolicy<double> : DefaultTransformPolicy
{
enum {
shift = 1
};
};
namespace arithm
{
template <typename T, typename S, typename D>
void divScalar(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream)
template <typename SrcType, typename ScalarDepth, typename DstType>
void divScalarImpl(const GpuMat& src, cv::Scalar value, bool inv, GpuMat& dst, Stream& stream)
{
typedef typename MakeVec<ScalarDepth, VecTraits<SrcType>::cn>::type ScalarType;
cv::Scalar_<ScalarDepth> value_ = value;
if (inv)
{
DivScalarInv<T, S, D> op(static_cast<S>(val));
device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
DivScalarOpInv<SrcType, ScalarType, DstType> op;
op.val = VecTraits<ScalarType>::make(value_.val);
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, stream);
}
else
{
DivScalar<T, S, D> op(static_cast<S>(val));
device::transform((PtrStepSz<T>) src1, (PtrStepSz<D>) dst, op, WithOutMask(), stream);
DivScalarOp<SrcType, ScalarType, DstType> op;
op.val = VecTraits<ScalarType>::make(value_.val);
gridTransformUnary_< TransformPolicy<ScalarDepth> >(globPtr<SrcType>(src), globPtr<DstType>(dst), op, stream);
}
}
}
void divScalar(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, const GpuMat&, double scale, Stream& stream, int)
{
typedef void (*func_t)(const GpuMat& src, cv::Scalar val, bool inv, GpuMat& dst, Stream& stream);
static const func_t funcs[7][7][4] =
{
{
{divScalarImpl<uchar, float, uchar>, divScalarImpl<uchar2, float, uchar2>, divScalarImpl<uchar3, float, uchar3>, divScalarImpl<uchar4, float, uchar4>},
{divScalarImpl<uchar, float, schar>, divScalarImpl<uchar2, float, char2>, divScalarImpl<uchar3, float, char3>, divScalarImpl<uchar4, float, char4>},
{divScalarImpl<uchar, float, ushort>, divScalarImpl<uchar2, float, ushort2>, divScalarImpl<uchar3, float, ushort3>, divScalarImpl<uchar4, float, ushort4>},
{divScalarImpl<uchar, float, short>, divScalarImpl<uchar2, float, short2>, divScalarImpl<uchar3, float, short3>, divScalarImpl<uchar4, float, short4>},
{divScalarImpl<uchar, float, int>, divScalarImpl<uchar2, float, int2>, divScalarImpl<uchar3, float, int3>, divScalarImpl<uchar4, float, int4>},
{divScalarImpl<uchar, float, float>, divScalarImpl<uchar2, float, float2>, divScalarImpl<uchar3, float, float3>, divScalarImpl<uchar4, float, float4>},
{divScalarImpl<uchar, double, double>, divScalarImpl<uchar2, double, double2>, divScalarImpl<uchar3, double, double3>, divScalarImpl<uchar4, double, double4>}
},
{
{divScalarImpl<schar, float, uchar>, divScalarImpl<char2, float, uchar2>, divScalarImpl<char3, float, uchar3>, divScalarImpl<char4, float, uchar4>},
{divScalarImpl<schar, float, schar>, divScalarImpl<char2, float, char2>, divScalarImpl<char3, float, char3>, divScalarImpl<char4, float, char4>},
{divScalarImpl<schar, float, ushort>, divScalarImpl<char2, float, ushort2>, divScalarImpl<char3, float, ushort3>, divScalarImpl<char4, float, ushort4>},
{divScalarImpl<schar, float, short>, divScalarImpl<char2, float, short2>, divScalarImpl<char3, float, short3>, divScalarImpl<char4, float, short4>},
{divScalarImpl<schar, float, int>, divScalarImpl<char2, float, int2>, divScalarImpl<char3, float, int3>, divScalarImpl<char4, float, int4>},
{divScalarImpl<schar, float, float>, divScalarImpl<char2, float, float2>, divScalarImpl<char3, float, float3>, divScalarImpl<char4, float, float4>},
{divScalarImpl<schar, double, double>, divScalarImpl<char2, double, double2>, divScalarImpl<char3, double, double3>, divScalarImpl<char4, double, double4>}
},
{
{0 /*divScalarImpl<ushort, float, uchar>*/, 0 /*divScalarImpl<ushort2, float, uchar2>*/, 0 /*divScalarImpl<ushort3, float, uchar3>*/, 0 /*divScalarImpl<ushort4, float, uchar4>*/},
{0 /*divScalarImpl<ushort, float, schar>*/, 0 /*divScalarImpl<ushort2, float, char2>*/, 0 /*divScalarImpl<ushort3, float, char3>*/, 0 /*divScalarImpl<ushort4, float, char4>*/},
{divScalarImpl<ushort, float, ushort>, divScalarImpl<ushort2, float, ushort2>, divScalarImpl<ushort3, float, ushort3>, divScalarImpl<ushort4, float, ushort4>},
{divScalarImpl<ushort, float, short>, divScalarImpl<ushort2, float, short2>, divScalarImpl<ushort3, float, short3>, divScalarImpl<ushort4, float, short4>},
{divScalarImpl<ushort, float, int>, divScalarImpl<ushort2, float, int2>, divScalarImpl<ushort3, float, int3>, divScalarImpl<ushort4, float, int4>},
{divScalarImpl<ushort, float, float>, divScalarImpl<ushort2, float, float2>, divScalarImpl<ushort3, float, float3>, divScalarImpl<ushort4, float, float4>},
{divScalarImpl<ushort, double, double>, divScalarImpl<ushort2, double, double2>, divScalarImpl<ushort3, double, double3>, divScalarImpl<ushort4, double, double4>}
},
{
{0 /*divScalarImpl<short, float, uchar>*/, 0 /*divScalarImpl<short2, float, uchar2>*/, 0 /*divScalarImpl<short3, float, uchar3>*/, 0 /*divScalarImpl<short4, float, uchar4>*/},
{0 /*divScalarImpl<short, float, schar>*/, 0 /*divScalarImpl<short2, float, char2>*/, 0 /*divScalarImpl<short3, float, char3>*/, 0 /*divScalarImpl<short4, float, char4>*/},
{divScalarImpl<short, float, ushort>, divScalarImpl<short2, float, ushort2>, divScalarImpl<short3, float, ushort3>, divScalarImpl<short4, float, ushort4>},
{divScalarImpl<short, float, short>, divScalarImpl<short2, float, short2>, divScalarImpl<short3, float, short3>, divScalarImpl<short4, float, short4>},
{divScalarImpl<short, float, int>, divScalarImpl<short2, float, int2>, divScalarImpl<short3, float, int3>, divScalarImpl<short4, float, int4>},
{divScalarImpl<short, float, float>, divScalarImpl<short2, float, float2>, divScalarImpl<short3, float, float3>, divScalarImpl<short4, float, float4>},
{divScalarImpl<short, double, double>, divScalarImpl<short2, double, double2>, divScalarImpl<short3, double, double3>, divScalarImpl<short4, double, double4>}
},
{
{0 /*divScalarImpl<int, float, uchar>*/, 0 /*divScalarImpl<int2, float, uchar2>*/, 0 /*divScalarImpl<int3, float, uchar3>*/, 0 /*divScalarImpl<int4, float, uchar4>*/},
{0 /*divScalarImpl<int, float, schar>*/, 0 /*divScalarImpl<int2, float, char2>*/, 0 /*divScalarImpl<int3, float, char3>*/, 0 /*divScalarImpl<int4, float, char4>*/},
{0 /*divScalarImpl<int, float, ushort>*/, 0 /*divScalarImpl<int2, float, ushort2>*/, 0 /*divScalarImpl<int3, float, ushort3>*/, 0 /*divScalarImpl<int4, float, ushort4>*/},
{0 /*divScalarImpl<int, float, short>*/, 0 /*divScalarImpl<int2, float, short2>*/, 0 /*divScalarImpl<int3, float, short3>*/, 0 /*divScalarImpl<int4, float, short4>*/},
{divScalarImpl<int, float, int>, divScalarImpl<int2, float, int2>, divScalarImpl<int3, float, int3>, divScalarImpl<int4, float, int4>},
{divScalarImpl<int, float, float>, divScalarImpl<int2, float, float2>, divScalarImpl<int3, float, float3>, divScalarImpl<int4, float, float4>},
{divScalarImpl<int, double, double>, divScalarImpl<int2, double, double2>, divScalarImpl<int3, double, double3>, divScalarImpl<int4, double, double4>}
},
{
{0 /*divScalarImpl<float, float, uchar>*/, 0 /*divScalarImpl<float2, float, uchar2>*/, 0 /*divScalarImpl<float3, float, uchar3>*/, 0 /*divScalarImpl<float4, float, uchar4>*/},
{0 /*divScalarImpl<float, float, schar>*/, 0 /*divScalarImpl<float2, float, char2>*/, 0 /*divScalarImpl<float3, float, char3>*/, 0 /*divScalarImpl<float4, float, char4>*/},
{0 /*divScalarImpl<float, float, ushort>*/, 0 /*divScalarImpl<float2, float, ushort2>*/, 0 /*divScalarImpl<float3, float, ushort3>*/, 0 /*divScalarImpl<float4, float, ushort4>*/},
{0 /*divScalarImpl<float, float, short>*/, 0 /*divScalarImpl<float2, float, short2>*/, 0 /*divScalarImpl<float3, float, short3>*/, 0 /*divScalarImpl<float4, float, short4>*/},
{0 /*divScalarImpl<float, float, int>*/, 0 /*divScalarImpl<float2, float, int2>*/, 0 /*divScalarImpl<float3, float, int3>*/, 0 /*divScalarImpl<float4, float, int4>*/},
{divScalarImpl<float, float, float>, divScalarImpl<float2, float, float2>, divScalarImpl<float3, float, float3>, divScalarImpl<float4, float, float4>},
{divScalarImpl<float, double, double>, divScalarImpl<float2, double, double2>, divScalarImpl<float3, double, double3>, divScalarImpl<float4, double, double4>}
},
{
{0 /*divScalarImpl<double, double, uchar>*/, 0 /*divScalarImpl<double2, double, uchar2>*/, 0 /*divScalarImpl<double3, double, uchar3>*/, 0 /*divScalarImpl<double4, double, uchar4>*/},
{0 /*divScalarImpl<double, double, schar>*/, 0 /*divScalarImpl<double2, double, char2>*/, 0 /*divScalarImpl<double3, double, char3>*/, 0 /*divScalarImpl<double4, double, char4>*/},
{0 /*divScalarImpl<double, double, ushort>*/, 0 /*divScalarImpl<double2, double, ushort2>*/, 0 /*divScalarImpl<double3, double, ushort3>*/, 0 /*divScalarImpl<double4, double, ushort4>*/},
{0 /*divScalarImpl<double, double, short>*/, 0 /*divScalarImpl<double2, double, short2>*/, 0 /*divScalarImpl<double3, double, short3>*/, 0 /*divScalarImpl<double4, double, short4>*/},
{0 /*divScalarImpl<double, double, int>*/, 0 /*divScalarImpl<double2, double, int2>*/, 0 /*divScalarImpl<double3, double, int3>*/, 0 /*divScalarImpl<double4, double, int4>*/},
{0 /*divScalarImpl<double, double, float>*/, 0 /*divScalarImpl<double2, double, float2>*/, 0 /*divScalarImpl<double3, double, float3>*/, 0 /*divScalarImpl<double4, double, float4>*/},
{divScalarImpl<double, double, double>, divScalarImpl<double2, double, double2>, divScalarImpl<double3, double, double3>, divScalarImpl<double4, double, double4>}
}
};
const int sdepth = src.depth();
const int ddepth = dst.depth();
const int cn = src.channels();
CV_DbgAssert( sdepth < 7 && ddepth < 7 && cn <= 4 );
if (inv)
{
val[0] *= scale;
val[1] *= scale;
val[2] *= scale;
val[3] *= scale;
}
else
{
val[0] /= scale;
val[1] /= scale;
val[2] /= scale;
val[3] /= scale;
}
template void divScalar<uchar, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<uchar, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<uchar, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<uchar, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<uchar, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<uchar, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<uchar, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<schar, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<schar, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<schar, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<schar, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<schar, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<schar, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<schar, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<ushort, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<ushort, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<ushort, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<ushort, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<ushort, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<ushort, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<ushort, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<short, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<short, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<short, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<short, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<short, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<short, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<short, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<int, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<int, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<int, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<int, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<int, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<int, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<int, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<float, float, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<float, float, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<float, float, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<float, float, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<float, float, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<float, float, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<float, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<double, double, uchar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<double, double, schar>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<double, double, ushort>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<double, double, short>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<double, double, int>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
//template void divScalar<double, double, float>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
template void divScalar<double, double, double>(PtrStepSzb src1, double val, bool inv, PtrStepSzb dst, cudaStream_t stream);
const func_t func = funcs[sdepth][ddepth][cn - 1];
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src, val, inv, dst, stream);
}
#endif
// CUDA_DISABLER
#endif
modules/cudaarithm/src/element_operations.cpp
View file @
574ff471
...
...
@@ -401,229 +401,11 @@ void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, do
////////////////////////////////////////////////////////////////////////
// divide
namespace
arithm
{
void
divMat_8uc4_32f
(
PtrStepSz
<
unsigned
int
>
src1
,
PtrStepSzf
src2
,
PtrStepSz
<
unsigned
int
>
dst
,
cudaStream_t
stream
);
void
divMat_16sc4_32f
(
PtrStepSz
<
short4
>
src1
,
PtrStepSzf
src2
,
PtrStepSz
<
short4
>
dst
,
cudaStream_t
stream
);
template
<
typename
T
,
typename
S
,
typename
D
>
void
divMat
(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
double
scale
,
cudaStream_t
stream
);
}
static
void
divMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
scale
,
Stream
&
_stream
,
int
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
PtrStepSzb
src2
,
PtrStepSzb
dst
,
double
scale
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
arithm
::
divMat
<
unsigned
char
,
float
,
unsigned
char
>
,
arithm
::
divMat
<
unsigned
char
,
float
,
signed
char
>
,
arithm
::
divMat
<
unsigned
char
,
float
,
unsigned
short
>
,
arithm
::
divMat
<
unsigned
char
,
float
,
short
>
,
arithm
::
divMat
<
unsigned
char
,
float
,
int
>
,
arithm
::
divMat
<
unsigned
char
,
float
,
float
>
,
arithm
::
divMat
<
unsigned
char
,
double
,
double
>
},
{
arithm
::
divMat
<
signed
char
,
float
,
unsigned
char
>
,
arithm
::
divMat
<
signed
char
,
float
,
signed
char
>
,
arithm
::
divMat
<
signed
char
,
float
,
unsigned
short
>
,
arithm
::
divMat
<
signed
char
,
float
,
short
>
,
arithm
::
divMat
<
signed
char
,
float
,
int
>
,
arithm
::
divMat
<
signed
char
,
float
,
float
>
,
arithm
::
divMat
<
signed
char
,
double
,
double
>
},
{
0
/*arithm::divMat<unsigned short, float, unsigned char>*/
,
0
/*arithm::divMat<unsigned short, float, signed char>*/
,
arithm
::
divMat
<
unsigned
short
,
float
,
unsigned
short
>
,
arithm
::
divMat
<
unsigned
short
,
float
,
short
>
,
arithm
::
divMat
<
unsigned
short
,
float
,
int
>
,
arithm
::
divMat
<
unsigned
short
,
float
,
float
>
,
arithm
::
divMat
<
unsigned
short
,
double
,
double
>
},
{
0
/*arithm::divMat<short, float, unsigned char>*/
,
0
/*arithm::divMat<short, float, signed char>*/
,
arithm
::
divMat
<
short
,
float
,
unsigned
short
>
,
arithm
::
divMat
<
short
,
float
,
short
>
,
arithm
::
divMat
<
short
,
float
,
int
>
,
arithm
::
divMat
<
short
,
float
,
float
>
,
arithm
::
divMat
<
short
,
double
,
double
>
},
{
0
/*arithm::divMat<int, float, unsigned char>*/
,
0
/*arithm::divMat<int, float, signed char>*/
,
0
/*arithm::divMat<int, float, unsigned short>*/
,
0
/*arithm::divMat<int, float, short>*/
,
arithm
::
divMat
<
int
,
float
,
int
>
,
arithm
::
divMat
<
int
,
float
,
float
>
,
arithm
::
divMat
<
int
,
double
,
double
>
},
{
0
/*arithm::divMat<float, float, unsigned char>*/
,
0
/*arithm::divMat<float, float, signed char>*/
,
0
/*arithm::divMat<float, float, unsigned short>*/
,
0
/*arithm::divMat<float, float, short>*/
,
0
/*arithm::divMat<float, float, int>*/
,
arithm
::
divMat
<
float
,
float
,
float
>
,
arithm
::
divMat
<
float
,
double
,
double
>
},
{
0
/*arithm::divMat<double, double, unsigned char>*/
,
0
/*arithm::divMat<double, double, signed char>*/
,
0
/*arithm::divMat<double, double, unsigned short>*/
,
0
/*arithm::divMat<double, double, short>*/
,
0
/*arithm::divMat<double, double, int>*/
,
0
/*arithm::divMat<double, double, float>*/
,
arithm
::
divMat
<
double
,
double
,
double
>
}
};
const
int
sdepth
=
src1
.
depth
();
const
int
ddepth
=
dst
.
depth
();
const
int
cn
=
src1
.
channels
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
PtrStepSzb
src1_
(
src1
.
rows
,
src1
.
cols
*
cn
,
src1
.
data
,
src1
.
step
);
PtrStepSzb
src2_
(
src1
.
rows
,
src1
.
cols
*
cn
,
src2
.
data
,
src2
.
step
);
PtrStepSzb
dst_
(
src1
.
rows
,
src1
.
cols
*
cn
,
dst
.
data
,
dst
.
step
);
const
func_t
func
=
funcs
[
sdepth
][
ddepth
];
if
(
!
func
)
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"Unsupported combination of source and destination types"
);
func
(
src1_
,
src2_
,
dst_
,
scale
,
stream
);
}
namespace
arithm
{
template
<
typename
T
,
typename
S
,
typename
D
>
void
divScalar
(
PtrStepSzb
src1
,
double
val
,
bool
inv
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
}
static
void
divScalar
(
const
GpuMat
&
src
,
Scalar
val
,
bool
inv
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
scale
,
Stream
&
_stream
,
int
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
src1
,
double
val
,
bool
inv
,
PtrStepSzb
dst
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
]
=
{
{
arithm
::
divScalar
<
unsigned
char
,
float
,
unsigned
char
>
,
arithm
::
divScalar
<
unsigned
char
,
float
,
signed
char
>
,
arithm
::
divScalar
<
unsigned
char
,
float
,
unsigned
short
>
,
arithm
::
divScalar
<
unsigned
char
,
float
,
short
>
,
arithm
::
divScalar
<
unsigned
char
,
float
,
int
>
,
arithm
::
divScalar
<
unsigned
char
,
float
,
float
>
,
arithm
::
divScalar
<
unsigned
char
,
double
,
double
>
},
{
arithm
::
divScalar
<
signed
char
,
float
,
unsigned
char
>
,
arithm
::
divScalar
<
signed
char
,
float
,
signed
char
>
,
arithm
::
divScalar
<
signed
char
,
float
,
unsigned
short
>
,
arithm
::
divScalar
<
signed
char
,
float
,
short
>
,
arithm
::
divScalar
<
signed
char
,
float
,
int
>
,
arithm
::
divScalar
<
signed
char
,
float
,
float
>
,
arithm
::
divScalar
<
signed
char
,
double
,
double
>
},
{
0
/*arithm::divScalar<unsigned short, float, unsigned char>*/
,
0
/*arithm::divScalar<unsigned short, float, signed char>*/
,
arithm
::
divScalar
<
unsigned
short
,
float
,
unsigned
short
>
,
arithm
::
divScalar
<
unsigned
short
,
float
,
short
>
,
arithm
::
divScalar
<
unsigned
short
,
float
,
int
>
,
arithm
::
divScalar
<
unsigned
short
,
float
,
float
>
,
arithm
::
divScalar
<
unsigned
short
,
double
,
double
>
},
{
0
/*arithm::divScalar<short, float, unsigned char>*/
,
0
/*arithm::divScalar<short, float, signed char>*/
,
arithm
::
divScalar
<
short
,
float
,
unsigned
short
>
,
arithm
::
divScalar
<
short
,
float
,
short
>
,
arithm
::
divScalar
<
short
,
float
,
int
>
,
arithm
::
divScalar
<
short
,
float
,
float
>
,
arithm
::
divScalar
<
short
,
double
,
double
>
},
{
0
/*arithm::divScalar<int, float, unsigned char>*/
,
0
/*arithm::divScalar<int, float, signed char>*/
,
0
/*arithm::divScalar<int, float, unsigned short>*/
,
0
/*arithm::divScalar<int, float, short>*/
,
arithm
::
divScalar
<
int
,
float
,
int
>
,
arithm
::
divScalar
<
int
,
float
,
float
>
,
arithm
::
divScalar
<
int
,
double
,
double
>
},
{
0
/*arithm::divScalar<float, float, unsigned char>*/
,
0
/*arithm::divScalar<float, float, signed char>*/
,
0
/*arithm::divScalar<float, float, unsigned short>*/
,
0
/*arithm::divScalar<float, float, short>*/
,
0
/*arithm::divScalar<float, float, int>*/
,
arithm
::
divScalar
<
float
,
float
,
float
>
,
arithm
::
divScalar
<
float
,
double
,
double
>
},
{
0
/*arithm::divScalar<double, double, unsigned char>*/
,
0
/*arithm::divScalar<double, double, signed char>*/
,
0
/*arithm::divScalar<double, double, unsigned short>*/
,
0
/*arithm::divScalar<double, double, short>*/
,
0
/*arithm::divScalar<double, double, int>*/
,
0
/*arithm::divScalar<double, double, float>*/
,
arithm
::
divScalar
<
double
,
double
,
double
>
}
};
typedef
void
(
*
npp_func_t
)(
const
PtrStepSzb
src
,
Scalar
sc
,
PtrStepb
dst
,
cudaStream_t
stream
);
static
const
npp_func_t
npp_funcs
[
7
][
4
]
=
{
{
NppArithmScalar
<
CV_8U
,
1
,
nppiDivC_8u_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_8U
,
3
,
nppiDivC_8u_C3RSfs
>::
call
,
NppArithmScalar
<
CV_8U
,
4
,
nppiDivC_8u_C4RSfs
>::
call
},
{
0
,
0
,
0
,
0
},
{
NppArithmScalar
<
CV_16U
,
1
,
nppiDivC_16u_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_16U
,
3
,
nppiDivC_16u_C3RSfs
>::
call
,
NppArithmScalar
<
CV_16U
,
4
,
nppiDivC_16u_C4RSfs
>::
call
},
{
NppArithmScalar
<
CV_16S
,
1
,
nppiDivC_16s_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_16S
,
3
,
nppiDivC_16s_C3RSfs
>::
call
,
NppArithmScalar
<
CV_16S
,
4
,
nppiDivC_16s_C4RSfs
>::
call
},
{
NppArithmScalar
<
CV_32S
,
1
,
nppiDivC_32s_C1RSfs
>::
call
,
0
,
NppArithmScalar
<
CV_32S
,
3
,
nppiDivC_32s_C3RSfs
>::
call
,
0
},
{
NppArithmScalar
<
CV_32F
,
1
,
nppiDivC_32f_C1R
>::
call
,
0
,
NppArithmScalar
<
CV_32F
,
3
,
nppiDivC_32f_C3R
>::
call
,
NppArithmScalar
<
CV_32F
,
4
,
nppiDivC_32f_C4R
>::
call
},
{
0
,
0
,
0
,
0
}
};
void
divMat
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
,
double
scale
,
Stream
&
stream
,
int
);
void
divMat_8uc4_32f
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
Stream
&
stream
);
void
divMat_16sc4_32f
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
Stream
&
stream
);
const
int
sdepth
=
src
.
depth
();
const
int
ddepth
=
dst
.
depth
();
const
int
cn
=
src
.
channels
();
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
if
(
inv
)
{
val
[
0
]
*=
scale
;
val
[
1
]
*=
scale
;
val
[
2
]
*=
scale
;
val
[
3
]
*=
scale
;
}
else
{
val
[
0
]
/=
scale
;
val
[
1
]
/=
scale
;
val
[
2
]
/=
scale
;
val
[
3
]
/=
scale
;
}
const
npp_func_t
npp_func
=
npp_funcs
[
sdepth
][
cn
-
1
];
if
(
ddepth
==
sdepth
&&
cn
>
1
&&
npp_func
!=
0
&&
!
inv
)
{
npp_func
(
src
,
val
,
dst
,
stream
);
return
;
}
CV_Assert
(
cn
==
1
);
const
func_t
func
=
funcs
[
sdepth
][
ddepth
];
if
(
!
func
)
CV_Error
(
cv
::
Error
::
StsUnsupportedFormat
,
"Unsupported combination of source and destination types"
);
func
(
src
,
val
[
0
],
inv
,
dst
,
stream
);
}
void
divScalar
(
const
GpuMat
&
src
,
cv
::
Scalar
val
,
bool
inv
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
double
scale
,
Stream
&
stream
,
int
);
void
cv
::
cuda
::
divide
(
InputArray
_src1
,
InputArray
_src2
,
OutputArray
_dst
,
double
scale
,
int
dtype
,
Stream
&
stream
)
{
...
...
@@ -637,7 +419,7 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
arithm
::
divMat_8uc4_32f
(
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
)
);
divMat_8uc4_32f
(
src1
,
src2
,
dst
,
stream
);
}
else
if
(
_src1
.
type
()
==
CV_16SC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
...
...
@@ -649,7 +431,7 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
arithm
::
divMat_16sc4_32f
(
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
)
);
divMat_16sc4_32f
(
src1
,
src2
,
dst
,
stream
);
}
else
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment