Commit 9034a2d6 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

fixed gpu arithm functions (mismatch with cpu version)

parent 15902284
...@@ -488,11 +488,29 @@ namespace cv { namespace gpu { namespace device ...@@ -488,11 +488,29 @@ namespace cv { namespace gpu { namespace device
template <typename T, typename D> struct Multiply : binary_function<T, T, D> template <typename T, typename D> struct Multiply : binary_function<T, T, D>
{ {
Multiply(double scale_) : scale(scale_) {} Multiply(float scale_) : scale(scale_) {}
__device__ __forceinline__ D operator ()(T a, T b) const __device__ __forceinline__ D operator ()(T a, T b) const
{ {
return saturate_cast<D>(scale * a * b); return saturate_cast<D>(scale * a * b);
} }
const float scale;
};
template <typename T> struct Multiply<T, double> : binary_function<T, T, double>
{
Multiply(double scale_) : scale(scale_) {}
__device__ __forceinline__ double operator ()(T a, T b) const
{
return scale * a * b;
}
const double scale;
};
template <> struct Multiply<int, int> : binary_function<int, int, int>
{
Multiply(double scale_) : scale(scale_) {}
__device__ __forceinline__ int operator ()(int a, int b) const
{
return saturate_cast<int>(scale * a * b);
}
const double scale; const double scale;
}; };
...@@ -517,11 +535,36 @@ namespace cv { namespace gpu { namespace device ...@@ -517,11 +535,36 @@ namespace cv { namespace gpu { namespace device
enum { smart_shift = 4 }; enum { smart_shift = 4 };
}; };
template <typename T, typename D> struct MultiplyCaller
{
static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
{
Multiply<T, D> op(static_cast<float>(scale));
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
}
};
template <typename T> struct MultiplyCaller<T, double>
{
static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
{
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
Multiply<T, double> op(scale);
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<double>)dst, op, WithOutMask(), stream);
}
};
template <> struct MultiplyCaller<int, int>
{
static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
{
cudaSafeCall( cudaSetDoubleForDevice(&scale) );
Multiply<int, int> op(scale);
cv::gpu::device::transform((DevMem2D_<int>)src1, (DevMem2D_<int>)src2, (DevMem2D_<int>)dst, op, WithOutMask(), stream);
}
};
template <typename T, typename D> void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream) template <typename T, typename D> void multiply_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
{ {
cudaSafeCall( cudaSetDoubleForDevice(&scale) ); MultiplyCaller<T, D>::call(src1, src2, dst, scale, stream);
Multiply<T, D> op(scale);
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<D>)dst, op, WithOutMask(), stream);
} }
template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream); template void multiply_gpu<uchar, uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
...@@ -729,7 +772,7 @@ namespace cv { namespace gpu { namespace device ...@@ -729,7 +772,7 @@ namespace cv { namespace gpu { namespace device
Divide(double scale_) : scale(scale_) {} Divide(double scale_) : scale(scale_) {}
__device__ __forceinline__ D operator ()(T a, T b) const __device__ __forceinline__ D operator ()(T a, T b) const
{ {
return b != 0 ? saturate_cast<D>(scale * a / b) : 0; return b != 0 ? saturate_cast<D>(a * scale / b) : 0;
} }
const double scale; const double scale;
}; };
......
...@@ -115,7 +115,7 @@ namespace ...@@ -115,7 +115,7 @@ namespace
{ {
typedef typename NppArithmFunc<DEPTH>::npp_t npp_t; typedef typename NppArithmFunc<DEPTH>::npp_t npp_t;
static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream) static void call(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream)
{ {
NppStreamHandler h(stream); NppStreamHandler h(stream);
...@@ -124,21 +124,17 @@ namespace ...@@ -124,21 +124,17 @@ namespace
sz.height = src1.rows; sz.height = src1.rows;
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step), nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),
(npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) ); (npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
{
call(src1, src2, dst, PtrStepb(), stream);
}
}; };
template <typename NppArithmFunc<CV_32F>::func_t func> struct NppArithm<CV_32F, func> template <typename NppArithmFunc<CV_32F>::func_t func> struct NppArithm<CV_32F, func>
{ {
typedef typename NppArithmFunc<CV_32F>::npp_t npp_t; typedef typename NppArithmFunc<CV_32F>::npp_t npp_t;
static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream) static void call(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream)
{ {
NppStreamHandler h(stream); NppStreamHandler h(stream);
...@@ -147,83 +143,13 @@ namespace ...@@ -147,83 +143,13 @@ namespace
sz.height = src1.rows; sz.height = src1.rows;
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step), nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),
(npp_t*)dst.data, static_cast<int>(dst.step), sz) ); (npp_t*)dst.data, static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
static void call(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream)
{
call(src1, src2, dst, PtrStepb(), stream);
}
};
}
////////////////////////////////////////////////////////////////////////
// add
namespace cv { namespace gpu { namespace device
{
template <typename T, typename D>
void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
template <typename T, typename D>
void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
}}}
void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
{
using namespace ::cv::gpu::device;
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
static const func_t funcs[7][7] =
{
{add_gpu<unsigned char, unsigned char>, 0/*add_gpu<unsigned char, signed char>*/, add_gpu<unsigned char, unsigned short>, add_gpu<unsigned char, short>, add_gpu<unsigned char, int>, add_gpu<unsigned char, float>, add_gpu<unsigned char, double>},
{0/*add_gpu<signed char, unsigned char>*/, 0/*add_gpu<signed char, signed char>*/, 0/*add_gpu<signed char, unsigned short>*/, 0/*add_gpu<signed char, short>*/, 0/*add_gpu<signed char, int>*/, 0/*add_gpu<signed char, float>*/, 0/*add_gpu<signed char, double>*/},
{0/*add_gpu<unsigned short, unsigned char>*/, 0/*add_gpu<unsigned short, signed char>*/, add_gpu<unsigned short, unsigned short>, 0/*add_gpu<unsigned short, short>*/, add_gpu<unsigned short, int>, add_gpu<unsigned short, float>, add_gpu<unsigned short, double>},
{0/*add_gpu<short, unsigned char>*/, 0/*add_gpu<short, signed char>*/, 0/*add_gpu<short, unsigned short>*/, add_gpu<short, short>, add_gpu<short, int>, add_gpu<short, float>, add_gpu<short, double>},
{0/*add_gpu<int, unsigned char>*/, 0/*add_gpu<int, signed char>*/, 0/*add_gpu<int, unsigned short>*/, 0/*add_gpu<int, short>*/, add_gpu<int, int>, add_gpu<int, float>, add_gpu<int, double>},
{0/*add_gpu<float, unsigned char>*/, 0/*add_gpu<float, signed char>*/, 0/*add_gpu<float, unsigned short>*/, 0/*add_gpu<float, short>*/, 0/*add_gpu<float, int>*/, add_gpu<float, float>, add_gpu<float, double>},
{0/*add_gpu<double, unsigned char>*/, 0/*add_gpu<double, signed char>*/, 0/*add_gpu<double, unsigned short>*/, 0/*add_gpu<double, short>*/, 0/*add_gpu<double, int>*/, 0/*add_gpu<double, float>*/, add_gpu<double, double>}
}; };
static const func_t npp_funcs[7] =
{
NppArithm<CV_8U, nppiAdd_8u_C1RSfs>::call,
0,
NppArithm<CV_16U, nppiAdd_16u_C1RSfs>::call,
NppArithm<CV_16S, nppiAdd_16s_C1RSfs>::call,
NppArithm<CV_32S, nppiAdd_32s_C1RSfs>::call,
NppArithm<CV_32F, nppiAdd_32f_C1R>::call,
add_gpu<double, double>
};
CV_Assert(src1.type() != CV_8S);
CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());
CV_Assert(mask.empty() || (src1.channels() == 1 && mask.size() == src1.size() && mask.type() == CV_8U));
if (dtype < 0)
dtype = src1.depth();
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
cudaStream_t stream = StreamAccessor::getStream(s);
if (mask.empty() && dst.type() == src1.type())
{
npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), PtrStepb(), stream);
return;
}
const func_t func = funcs[src1.depth()][dst.depth()];
CV_Assert(func != 0);
func(src1.reshape(1), src2.reshape(1), dst.reshape(1), mask, stream);
}
namespace
{
template<int DEPTH, int cn> struct NppArithmScalarFunc template<int DEPTH, int cn> struct NppArithmScalarFunc
{ {
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t; typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
...@@ -262,7 +188,7 @@ namespace ...@@ -262,7 +188,7 @@ namespace
{ {
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t; typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream) static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)
{ {
NppStreamHandler h(stream); NppStreamHandler h(stream);
...@@ -272,7 +198,7 @@ namespace ...@@ -272,7 +198,7 @@ namespace
const npp_t pConstants[] = { saturate_cast<npp_t>(sc.val[0]), saturate_cast<npp_t>(sc.val[1]), saturate_cast<npp_t>(sc.val[2]), saturate_cast<npp_t>(sc.val[3]) }; const npp_t pConstants[] = { saturate_cast<npp_t>(sc.val[0]), saturate_cast<npp_t>(sc.val[1]), saturate_cast<npp_t>(sc.val[2]), saturate_cast<npp_t>(sc.val[3]) };
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), pConstants, dst.ptr<npp_t>(), static_cast<int>(dst.step), sz, 0) ); nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), pConstants, (npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -282,7 +208,7 @@ namespace ...@@ -282,7 +208,7 @@ namespace
{ {
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t; typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream) static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)
{ {
NppStreamHandler h(stream); NppStreamHandler h(stream);
...@@ -290,7 +216,7 @@ namespace ...@@ -290,7 +216,7 @@ namespace
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
nppSafeCall( func(src.ptr<npp_t>(), static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), dst.ptr<npp_t>(), static_cast<int>(dst.step), sz, 0) ); nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), saturate_cast<npp_t>(sc.val[0]), (npp_t*)dst.data, static_cast<int>(dst.step), sz, 0) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -301,7 +227,7 @@ namespace ...@@ -301,7 +227,7 @@ namespace
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t; typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
typedef typename NppTypeTraits<DEPTH>::npp_complex_type npp_complex_type; typedef typename NppTypeTraits<DEPTH>::npp_complex_type npp_complex_type;
static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream) static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)
{ {
NppStreamHandler h(stream); NppStreamHandler h(stream);
...@@ -313,8 +239,8 @@ namespace ...@@ -313,8 +239,8 @@ namespace
nConstant.re = saturate_cast<npp_t>(sc.val[0]); nConstant.re = saturate_cast<npp_t>(sc.val[0]);
nConstant.im = saturate_cast<npp_t>(sc.val[1]); nConstant.im = saturate_cast<npp_t>(sc.val[1]);
nppSafeCall( func(src.ptr<npp_complex_type>(), static_cast<int>(src.step), nConstant, nppSafeCall( func((const npp_complex_type*)src.data, static_cast<int>(src.step), nConstant,
dst.ptr<npp_complex_type>(), static_cast<int>(dst.step), sz, 0) ); (npp_complex_type*)dst.data, static_cast<int>(dst.step), sz, 0) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -322,7 +248,9 @@ namespace ...@@ -322,7 +248,9 @@ namespace
}; };
template<int cn, typename NppArithmScalarFunc<CV_32F, cn>::func_ptr func> struct NppArithmScalar<CV_32F, cn, func> template<int cn, typename NppArithmScalarFunc<CV_32F, cn>::func_ptr func> struct NppArithmScalar<CV_32F, cn, func>
{ {
static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream) typedef typename NppTypeTraits<CV_32F>::npp_t npp_t;
static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)
{ {
NppStreamHandler h(stream); NppStreamHandler h(stream);
...@@ -332,7 +260,7 @@ namespace ...@@ -332,7 +260,7 @@ namespace
const Npp32f pConstants[] = { saturate_cast<Npp32f>(sc.val[0]), saturate_cast<Npp32f>(sc.val[1]), saturate_cast<Npp32f>(sc.val[2]), saturate_cast<Npp32f>(sc.val[3]) }; const Npp32f pConstants[] = { saturate_cast<Npp32f>(sc.val[0]), saturate_cast<Npp32f>(sc.val[1]), saturate_cast<Npp32f>(sc.val[2]), saturate_cast<Npp32f>(sc.val[3]) };
nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), pConstants, dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), pConstants, (npp_t*)dst.data, static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -340,7 +268,9 @@ namespace ...@@ -340,7 +268,9 @@ namespace
}; };
template<typename NppArithmScalarFunc<CV_32F, 1>::func_ptr func> struct NppArithmScalar<CV_32F, 1, func> template<typename NppArithmScalarFunc<CV_32F, 1>::func_ptr func> struct NppArithmScalar<CV_32F, 1, func>
{ {
static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream) typedef typename NppTypeTraits<CV_32F>::npp_t npp_t;
static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)
{ {
NppStreamHandler h(stream); NppStreamHandler h(stream);
...@@ -348,7 +278,7 @@ namespace ...@@ -348,7 +278,7 @@ namespace
sz.width = src.cols; sz.width = src.cols;
sz.height = src.rows; sz.height = src.rows;
nppSafeCall( func(src.ptr<Npp32f>(), static_cast<int>(src.step), saturate_cast<Npp32f>(sc.val[0]), dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); nppSafeCall( func((const npp_t*)src.data, static_cast<int>(src.step), saturate_cast<Npp32f>(sc.val[0]), (npp_t*)dst.data, static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -356,7 +286,10 @@ namespace ...@@ -356,7 +286,10 @@ namespace
}; };
template<typename NppArithmScalarFunc<CV_32F, 2>::func_ptr func> struct NppArithmScalar<CV_32F, 2, func> template<typename NppArithmScalarFunc<CV_32F, 2>::func_ptr func> struct NppArithmScalar<CV_32F, 2, func>
{ {
static void call(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream) typedef typename NppTypeTraits<CV_32F>::npp_t npp_t;
typedef typename NppTypeTraits<CV_32F>::npp_complex_type npp_complex_type;
static void call(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream)
{ {
NppStreamHandler h(stream); NppStreamHandler h(stream);
...@@ -368,7 +301,7 @@ namespace ...@@ -368,7 +301,7 @@ namespace
nConstant.re = saturate_cast<Npp32f>(sc.val[0]); nConstant.re = saturate_cast<Npp32f>(sc.val[0]);
nConstant.im = saturate_cast<Npp32f>(sc.val[1]); nConstant.im = saturate_cast<Npp32f>(sc.val[1]);
nppSafeCall( func(src.ptr<Npp32fc>(), static_cast<int>(src.step), nConstant, dst.ptr<Npp32fc>(), static_cast<int>(dst.step), sz) ); nppSafeCall( func((const npp_complex_type*)src.data, static_cast<int>(src.step), nConstant, (npp_complex_type*)dst.data, static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -376,40 +309,117 @@ namespace ...@@ -376,40 +309,117 @@ namespace
}; };
} }
////////////////////////////////////////////////////////////////////////
// add
namespace cv { namespace gpu { namespace device
{
template <typename T, typename D>
void add_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
template <typename T, typename D>
void add_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
}}}
void cv::gpu::add(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
{
using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
static const func_t funcs[7][7] =
{
{add_gpu<unsigned char, unsigned char> , 0 /*add_gpu<unsigned char, signed char>*/ , add_gpu<unsigned char, unsigned short> , add_gpu<unsigned char, short> , add_gpu<unsigned char, int> , add_gpu<unsigned char, float> , add_gpu<unsigned char, double> },
{0 /*add_gpu<signed char, unsigned char>*/ , 0 /*add_gpu<signed char, signed char>*/ , 0 /*add_gpu<signed char, unsigned short>*/, 0 /*add_gpu<signed char, short>*/ , 0 /*add_gpu<signed char, int>*/, 0 /*add_gpu<signed char, float>*/, 0 /*add_gpu<signed char, double>*/},
{0 /*add_gpu<unsigned short, unsigned char>*/, 0 /*add_gpu<unsigned short, signed char>*/, add_gpu<unsigned short, unsigned short> , 0 /*add_gpu<unsigned short, short>*/, add_gpu<unsigned short, int> , add_gpu<unsigned short, float> , add_gpu<unsigned short, double> },
{0 /*add_gpu<short, unsigned char>*/ , 0 /*add_gpu<short, signed char>*/ , 0 /*add_gpu<short, unsigned short>*/ , add_gpu<short, short> , add_gpu<short, int> , add_gpu<short, float> , add_gpu<short, double> },
{0 /*add_gpu<int, unsigned char>*/ , 0 /*add_gpu<int, signed char>*/ , 0 /*add_gpu<int, unsigned short>*/ , 0 /*add_gpu<int, short>*/ , add_gpu<int, int> , add_gpu<int, float> , add_gpu<int, double> },
{0 /*add_gpu<float, unsigned char>*/ , 0 /*add_gpu<float, signed char>*/ , 0 /*add_gpu<float, unsigned short>*/ , 0 /*add_gpu<float, short>*/ , 0 /*add_gpu<float, int>*/ , add_gpu<float, float> , add_gpu<float, double> },
{0 /*add_gpu<double, unsigned char>*/ , 0 /*add_gpu<double, signed char>*/ , 0 /*add_gpu<double, unsigned short>*/ , 0 /*add_gpu<double, short>*/ , 0 /*add_gpu<double, int>*/ , 0 /*add_gpu<double, float>*/ , add_gpu<double, double> }
};
typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
static const npp_func_t npp_funcs[] =
{
NppArithm<CV_8U , nppiAdd_8u_C1RSfs >::call,
0,
NppArithm<CV_16U, nppiAdd_16u_C1RSfs>::call,
NppArithm<CV_16S, nppiAdd_16s_C1RSfs>::call,
NppArithm<CV_32S, nppiAdd_32s_C1RSfs>::call,
NppArithm<CV_32F, nppiAdd_32f_C1R >::call
};
if (dtype < 0)
dtype = src1.depth();
CV_Assert(src1.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);
CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());
CV_Assert(mask.empty() || (src1.channels() == 1 && mask.size() == src1.size() && mask.type() == CV_8U));
if (src1.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
cudaStream_t stream = StreamAccessor::getStream(s);
if (mask.empty() && dst.type() == src1.type() && src1.depth() <= CV_32F)
{
npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
return;
}
const func_t func = funcs[src1.depth()][dst.depth()];
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1.reshape(1), src2.reshape(1), dst.reshape(1), mask, stream);
}
void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s) void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
static const func_t funcs[7][7] = static const func_t funcs[7][7] =
{ {
{add_gpu<unsigned char, unsigned char>, 0/*add_gpu<unsigned char, signed char>*/, add_gpu<unsigned char, unsigned short>, add_gpu<unsigned char, short>, add_gpu<unsigned char, int>, add_gpu<unsigned char, float>, add_gpu<unsigned char, double>}, {add_gpu<unsigned char, unsigned char> , 0 /*add_gpu<unsigned char, signed char>*/ , add_gpu<unsigned char, unsigned short> , add_gpu<unsigned char, short> , add_gpu<unsigned char, int> , add_gpu<unsigned char, float> , add_gpu<unsigned char, double> },
{0/*add_gpu<signed char, unsigned char>*/, 0/*add_gpu<signed char, signed char>*/, 0/*add_gpu<signed char, unsigned short>*/, 0/*add_gpu<signed char, short>*/, 0/*add_gpu<signed char, int>*/, 0/*add_gpu<signed char, float>*/, 0/*add_gpu<signed char, double>*/}, {0 /*add_gpu<signed char, unsigned char>*/ , 0 /*add_gpu<signed char, signed char>*/ , 0 /*add_gpu<signed char, unsigned short>*/, 0 /*add_gpu<signed char, short>*/ , 0 /*add_gpu<signed char, int>*/, 0 /*add_gpu<signed char, float>*/, 0 /*add_gpu<signed char, double>*/},
{0/*add_gpu<unsigned short, unsigned char>*/, 0/*add_gpu<unsigned short, signed char>*/, add_gpu<unsigned short, unsigned short>, 0/*add_gpu<unsigned short, short>*/, add_gpu<unsigned short, int>, add_gpu<unsigned short, float>, add_gpu<unsigned short, double>}, {0 /*add_gpu<unsigned short, unsigned char>*/, 0 /*add_gpu<unsigned short, signed char>*/, add_gpu<unsigned short, unsigned short> , 0 /*add_gpu<unsigned short, short>*/, add_gpu<unsigned short, int> , add_gpu<unsigned short, float> , add_gpu<unsigned short, double> },
{0/*add_gpu<short, unsigned char>*/, 0/*add_gpu<short, signed char>*/, 0/*add_gpu<short, unsigned short>*/, add_gpu<short, short>, add_gpu<short, int>, add_gpu<short, float>, add_gpu<short, double>}, {0 /*add_gpu<short, unsigned char>*/ , 0 /*add_gpu<short, signed char>*/ , 0 /*add_gpu<short, unsigned short>*/ , add_gpu<short, short> , add_gpu<short, int> , add_gpu<short, float> , add_gpu<short, double> },
{0/*add_gpu<int, unsigned char>*/, 0/*add_gpu<int, signed char>*/, 0/*add_gpu<int, unsigned short>*/, 0/*add_gpu<int, short>*/, add_gpu<int, int>, add_gpu<int, float>, add_gpu<int, double>}, {0 /*add_gpu<int, unsigned char>*/ , 0 /*add_gpu<int, signed char>*/ , 0 /*add_gpu<int, unsigned short>*/ , 0 /*add_gpu<int, short>*/ , add_gpu<int, int> , add_gpu<int, float> , add_gpu<int, double> },
{0/*add_gpu<float, unsigned char>*/, 0/*add_gpu<float, signed char>*/, 0/*add_gpu<float, unsigned short>*/, 0/*add_gpu<float, short>*/, 0/*add_gpu<float, int>*/, add_gpu<float, float>, add_gpu<float, double>}, {0 /*add_gpu<float, unsigned char>*/ , 0 /*add_gpu<float, signed char>*/ , 0 /*add_gpu<float, unsigned short>*/ , 0 /*add_gpu<float, short>*/ , 0 /*add_gpu<float, int>*/ , add_gpu<float, float> , add_gpu<float, double> },
{0/*add_gpu<double, unsigned char>*/, 0/*add_gpu<double, signed char>*/, 0/*add_gpu<double, unsigned short>*/, 0/*add_gpu<double, short>*/, 0/*add_gpu<double, int>*/, 0/*add_gpu<double, float>*/, add_gpu<double, double>} {0 /*add_gpu<double, unsigned char>*/ , 0 /*add_gpu<double, signed char>*/ , 0 /*add_gpu<double, unsigned short>*/ , 0 /*add_gpu<double, short>*/ , 0 /*add_gpu<double, int>*/ , 0 /*add_gpu<double, float>*/ , add_gpu<double, double> }
}; };
typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream); typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream);
static const npp_func_t npp_funcs[7][4] = static const npp_func_t npp_funcs[7][4] =
{ {
{NppArithmScalar<CV_8U, 1, nppiAddC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiAddC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiAddC_8u_C4RSfs>::call}, {NppArithmScalar<CV_8U , 1, nppiAddC_8u_C1RSfs >::call, 0 , NppArithmScalar<CV_8U , 3, nppiAddC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiAddC_8u_C4RSfs >::call},
{0,0,0,0}, {0 , 0 , 0 , 0 },
{NppArithmScalar<CV_16U, 1, nppiAddC_16u_C1RSfs>::call, 0, NppArithmScalar<CV_16U, 3, nppiAddC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiAddC_16u_C4RSfs>::call}, {NppArithmScalar<CV_16U, 1, nppiAddC_16u_C1RSfs>::call, 0 , NppArithmScalar<CV_16U, 3, nppiAddC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiAddC_16u_C4RSfs>::call},
{NppArithmScalar<CV_16S, 1, nppiAddC_16s_C1RSfs>::call, NppArithmScalar<CV_16S, 2, nppiAddC_16sc_C1RSfs>::call, NppArithmScalar<CV_16S, 3, nppiAddC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiAddC_16s_C4RSfs>::call}, {NppArithmScalar<CV_16S, 1, nppiAddC_16s_C1RSfs>::call, NppArithmScalar<CV_16S, 2, nppiAddC_16sc_C1RSfs>::call, NppArithmScalar<CV_16S, 3, nppiAddC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiAddC_16s_C4RSfs>::call},
{NppArithmScalar<CV_32S, 1, nppiAddC_32s_C1RSfs>::call, NppArithmScalar<CV_32S, 2, nppiAddC_32sc_C1RSfs>::call, NppArithmScalar<CV_32S, 3, nppiAddC_32s_C3RSfs>::call, 0}, {NppArithmScalar<CV_32S, 1, nppiAddC_32s_C1RSfs>::call, NppArithmScalar<CV_32S, 2, nppiAddC_32sc_C1RSfs>::call, NppArithmScalar<CV_32S, 3, nppiAddC_32s_C3RSfs>::call, 0 },
{NppArithmScalar<CV_32F, 1, nppiAddC_32f_C1R>::call, NppArithmScalar<CV_32F, 2, nppiAddC_32fc_C1R>::call, NppArithmScalar<CV_32F, 3, nppiAddC_32f_C3R>::call, NppArithmScalar<CV_32F, 4, nppiAddC_32f_C4R>::call}, {NppArithmScalar<CV_32F, 1, nppiAddC_32f_C1R >::call, NppArithmScalar<CV_32F, 2, nppiAddC_32fc_C1R >::call, NppArithmScalar<CV_32F, 3, nppiAddC_32f_C3R >::call, NppArithmScalar<CV_32F, 4, nppiAddC_32f_C4R >::call},
{0,0,0,0} {0 , 0 , 0 , 0 }
}; };
CV_Assert(mask.empty() || (src.channels() == 1 && mask.size() == src.size() && mask.type() == CV_8U));
if (dtype < 0) if (dtype < 0)
dtype = src.depth(); dtype = src.depth();
CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);
CV_Assert(src.channels() <= 4);
CV_Assert(mask.empty() || (src.channels() == 1 && mask.size() == src.size() && mask.type() == CV_8U));
if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
...@@ -428,7 +438,9 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat ...@@ -428,7 +438,9 @@ void cv::gpu::add(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat
CV_Assert(src.channels() == 1); CV_Assert(src.channels() == 1);
const func_t func = funcs[src.depth()][dst.depth()]; const func_t func = funcs[src.depth()][dst.depth()];
CV_Assert(func != 0);
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src, sc.val[0], dst, mask, stream); func(src, sc.val[0], dst, mask, stream);
} }
...@@ -447,37 +459,43 @@ namespace cv { namespace gpu { namespace device ...@@ -447,37 +459,43 @@ namespace cv { namespace gpu { namespace device
void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s) void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
static const func_t funcs[7][7] = static const func_t funcs[7][7] =
{ {
{subtract_gpu<unsigned char, unsigned char>, 0/*subtract_gpu<unsigned char, signed char>*/, subtract_gpu<unsigned char, unsigned short>, subtract_gpu<unsigned char, short>, subtract_gpu<unsigned char, int>, subtract_gpu<unsigned char, float>, subtract_gpu<unsigned char, double>}, {subtract_gpu<unsigned char, unsigned char> , 0 /*subtract_gpu<unsigned char, signed char>*/ , subtract_gpu<unsigned char, unsigned short> , subtract_gpu<unsigned char, short> , subtract_gpu<unsigned char, int> , subtract_gpu<unsigned char, float> , subtract_gpu<unsigned char, double> },
{0/*subtract_gpu<signed char, unsigned char>*/, 0/*subtract_gpu<signed char, signed char>*/, 0/*subtract_gpu<signed char, unsigned short>*/, 0/*subtract_gpu<signed char, short>*/, 0/*subtract_gpu<signed char, int>*/, 0/*subtract_gpu<signed char, float>*/, 0/*subtract_gpu<signed char, double>*/}, {0 /*subtract_gpu<signed char, unsigned char>*/ , 0 /*subtract_gpu<signed char, signed char>*/ , 0 /*subtract_gpu<signed char, unsigned short>*/, 0 /*subtract_gpu<signed char, short>*/ , 0 /*subtract_gpu<signed char, int>*/, 0 /*subtract_gpu<signed char, float>*/, 0 /*subtract_gpu<signed char, double>*/},
{0/*subtract_gpu<unsigned short, unsigned char>*/, 0/*subtract_gpu<unsigned short, signed char>*/, subtract_gpu<unsigned short, unsigned short>, 0/*subtract_gpu<unsigned short, short>*/, subtract_gpu<unsigned short, int>, subtract_gpu<unsigned short, float>, subtract_gpu<unsigned short, double>}, {0 /*subtract_gpu<unsigned short, unsigned char>*/, 0 /*subtract_gpu<unsigned short, signed char>*/, subtract_gpu<unsigned short, unsigned short> , 0 /*subtract_gpu<unsigned short, short>*/, subtract_gpu<unsigned short, int> , subtract_gpu<unsigned short, float> , subtract_gpu<unsigned short, double> },
{0/*subtract_gpu<short, unsigned char>*/, 0/*subtract_gpu<short, signed char>*/, 0/*subtract_gpu<short, unsigned short>*/, subtract_gpu<short, short>, subtract_gpu<short, int>, subtract_gpu<short, float>, subtract_gpu<short, double>}, {0 /*subtract_gpu<short, unsigned char>*/ , 0 /*subtract_gpu<short, signed char>*/ , 0 /*subtract_gpu<short, unsigned short>*/ , subtract_gpu<short, short> , subtract_gpu<short, int> , subtract_gpu<short, float> , subtract_gpu<short, double> },
{0/*subtract_gpu<int, unsigned char>*/, 0/*subtract_gpu<int, signed char>*/, 0/*subtract_gpu<int, unsigned short>*/, 0/*subtract_gpu<int, short>*/, subtract_gpu<int, int>, subtract_gpu<int, float>, subtract_gpu<int, double>}, {0 /*subtract_gpu<int, unsigned char>*/ , 0 /*subtract_gpu<int, signed char>*/ , 0 /*subtract_gpu<int, unsigned short>*/ , 0 /*subtract_gpu<int, short>*/ , subtract_gpu<int, int> , subtract_gpu<int, float> , subtract_gpu<int, double> },
{0/*subtract_gpu<float, unsigned char>*/, 0/*subtract_gpu<float, signed char>*/, 0/*subtract_gpu<float, unsigned short>*/, 0/*subtract_gpu<float, short>*/, 0/*subtract_gpu<float, int>*/, subtract_gpu<float, float>, subtract_gpu<float, double>}, {0 /*subtract_gpu<float, unsigned char>*/ , 0 /*subtract_gpu<float, signed char>*/ , 0 /*subtract_gpu<float, unsigned short>*/ , 0 /*subtract_gpu<float, short>*/ , 0 /*subtract_gpu<float, int>*/ , subtract_gpu<float, float> , subtract_gpu<float, double> },
{0/*subtract_gpu<double, unsigned char>*/, 0/*subtract_gpu<double, signed char>*/, 0/*subtract_gpu<double, unsigned short>*/, 0/*subtract_gpu<double, short>*/, 0/*subtract_gpu<double, int>*/, 0/*subtract_gpu<double, float>*/, subtract_gpu<double, double>} {0 /*subtract_gpu<double, unsigned char>*/ , 0 /*subtract_gpu<double, signed char>*/ , 0 /*subtract_gpu<double, unsigned short>*/ , 0 /*subtract_gpu<double, short>*/ , 0 /*subtract_gpu<double, int>*/ , 0 /*subtract_gpu<double, float>*/ , subtract_gpu<double, double> }
}; };
static const func_t npp_funcs[6] = typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
static const npp_func_t npp_funcs[6] =
{ {
NppArithm<CV_8U, nppiSub_8u_C1RSfs>::call, NppArithm<CV_8U , nppiSub_8u_C1RSfs>::call,
0, 0,
NppArithm<CV_16U, nppiSub_16u_C1RSfs>::call, NppArithm<CV_16U, nppiSub_16u_C1RSfs>::call,
NppArithm<CV_16S, nppiSub_16s_C1RSfs>::call, NppArithm<CV_16S, nppiSub_16s_C1RSfs>::call,
NppArithm<CV_32S, nppiSub_32s_C1RSfs>::call, NppArithm<CV_32S, nppiSub_32s_C1RSfs>::call,
NppArithm<CV_32F, nppiSub_32f_C1R>::call NppArithm<CV_32F, nppiSub_32f_C1R >::call
}; };
CV_Assert(src1.type() != CV_8S); if (dtype < 0)
dtype = src1.depth();
CV_Assert(src1.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);
CV_Assert(src1.type() == src2.type() && src1.size() == src2.size()); CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());
CV_Assert(mask.empty() || (src1.channels() == 1 && mask.size() == src1.size() && mask.type() == CV_8U)); CV_Assert(mask.empty() || (src1.channels() == 1 && mask.size() == src1.size() && mask.type() == CV_8U));
if (dtype < 0) if (src1.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)
dtype = src1.depth(); {
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels())); dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
...@@ -485,50 +503,59 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons ...@@ -485,50 +503,59 @@ void cv::gpu::subtract(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cons
if (mask.empty() && dst.type() == src1.type() && src1.depth() <= CV_32F) if (mask.empty() && dst.type() == src1.type() && src1.depth() <= CV_32F)
{ {
npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), PtrStepb(), stream); npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), stream);
return; return;
} }
const func_t func = funcs[src1.depth()][dst.depth()]; const func_t func = funcs[src1.depth()][dst.depth()];
CV_Assert(func != 0);
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1.reshape(1), src2.reshape(1), dst.reshape(1), mask, stream); func(src1.reshape(1), src2.reshape(1), dst.reshape(1), mask, stream);
} }
void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s) void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const GpuMat& mask, int dtype, Stream& s)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream); typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, const PtrStepb& mask, cudaStream_t stream);
static const func_t funcs[7][7] = static const func_t funcs[7][7] =
{ {
{subtract_gpu<unsigned char, unsigned char>, 0/*subtract_gpu<unsigned char, signed char>*/, subtract_gpu<unsigned char, unsigned short>, subtract_gpu<unsigned char, short>, subtract_gpu<unsigned char, int>, subtract_gpu<unsigned char, float>, subtract_gpu<unsigned char, double>}, {subtract_gpu<unsigned char, unsigned char> , 0 /*subtract_gpu<unsigned char, signed char>*/ , subtract_gpu<unsigned char, unsigned short> , subtract_gpu<unsigned char, short> , subtract_gpu<unsigned char, int> , subtract_gpu<unsigned char, float> , subtract_gpu<unsigned char, double> },
{0/*subtract_gpu<signed char, unsigned char>*/, 0/*subtract_gpu<signed char, signed char>*/, 0/*subtract_gpu<signed char, unsigned short>*/, 0/*subtract_gpu<signed char, short>*/, 0/*subtract_gpu<signed char, int>*/, 0/*subtract_gpu<signed char, float>*/, 0/*subtract_gpu<signed char, double>*/}, {0 /*subtract_gpu<signed char, unsigned char>*/ , 0 /*subtract_gpu<signed char, signed char>*/ , 0 /*subtract_gpu<signed char, unsigned short>*/, 0 /*subtract_gpu<signed char, short>*/ , 0 /*subtract_gpu<signed char, int>*/, 0 /*subtract_gpu<signed char, float>*/, 0 /*subtract_gpu<signed char, double>*/},
{0/*subtract_gpu<unsigned short, unsigned char>*/, 0/*subtract_gpu<unsigned short, signed char>*/, subtract_gpu<unsigned short, unsigned short>, 0/*subtract_gpu<unsigned short, short>*/, subtract_gpu<unsigned short, int>, subtract_gpu<unsigned short, float>, subtract_gpu<unsigned short, double>}, {0 /*subtract_gpu<unsigned short, unsigned char>*/, 0 /*subtract_gpu<unsigned short, signed char>*/, subtract_gpu<unsigned short, unsigned short> , 0 /*subtract_gpu<unsigned short, short>*/, subtract_gpu<unsigned short, int> , subtract_gpu<unsigned short, float> , subtract_gpu<unsigned short, double> },
{0/*subtract_gpu<short, unsigned char>*/, 0/*subtract_gpu<short, signed char>*/, 0/*subtract_gpu<short, unsigned short>*/, subtract_gpu<short, short>, subtract_gpu<short, int>, subtract_gpu<short, float>, subtract_gpu<short, double>}, {0 /*subtract_gpu<short, unsigned char>*/ , 0 /*subtract_gpu<short, signed char>*/ , 0 /*subtract_gpu<short, unsigned short>*/ , subtract_gpu<short, short> , subtract_gpu<short, int> , subtract_gpu<short, float> , subtract_gpu<short, double> },
{0/*subtract_gpu<int, unsigned char>*/, 0/*subtract_gpu<int, signed char>*/, 0/*subtract_gpu<int, unsigned short>*/, 0/*subtract_gpu<int, short>*/, subtract_gpu<int, int>, subtract_gpu<int, float>, subtract_gpu<int, double>}, {0 /*subtract_gpu<int, unsigned char>*/ , 0 /*subtract_gpu<int, signed char>*/ , 0 /*subtract_gpu<int, unsigned short>*/ , 0 /*subtract_gpu<int, short>*/ , subtract_gpu<int, int> , subtract_gpu<int, float> , subtract_gpu<int, double> },
{0/*subtract_gpu<float, unsigned char>*/, 0/*subtract_gpu<float, signed char>*/, 0/*subtract_gpu<float, unsigned short>*/, 0/*subtract_gpu<float, short>*/, 0/*subtract_gpu<float, int>*/, subtract_gpu<float, float>, subtract_gpu<float, double>}, {0 /*subtract_gpu<float, unsigned char>*/ , 0 /*subtract_gpu<float, signed char>*/ , 0 /*subtract_gpu<float, unsigned short>*/ , 0 /*subtract_gpu<float, short>*/ , 0 /*subtract_gpu<float, int>*/ , subtract_gpu<float, float> , subtract_gpu<float, double> },
{0/*subtract_gpu<double, unsigned char>*/, 0/*subtract_gpu<double, signed char>*/, 0/*subtract_gpu<double, unsigned short>*/, 0/*subtract_gpu<double, short>*/, 0/*subtract_gpu<double, int>*/, 0/*subtract_gpu<double, float>*/, subtract_gpu<double, double>} {0 /*subtract_gpu<double, unsigned char>*/ , 0 /*subtract_gpu<double, signed char>*/ , 0 /*subtract_gpu<double, unsigned short>*/ , 0 /*subtract_gpu<double, short>*/ , 0 /*subtract_gpu<double, int>*/ , 0 /*subtract_gpu<double, float>*/ , subtract_gpu<double, double> }
}; };
typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream); typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream);
static const npp_func_t npp_funcs[7][4] = static const npp_func_t npp_funcs[7][4] =
{ {
{NppArithmScalar<CV_8U, 1, nppiSubC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiSubC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiSubC_8u_C4RSfs>::call}, {NppArithmScalar<CV_8U , 1, nppiSubC_8u_C1RSfs >::call, 0 , NppArithmScalar<CV_8U , 3, nppiSubC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiSubC_8u_C4RSfs >::call},
{0,0,0,0}, {0 , 0 , 0 , 0 },
{NppArithmScalar<CV_16U, 1, nppiSubC_16u_C1RSfs>::call, 0, NppArithmScalar<CV_16U, 3, nppiSubC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiSubC_16u_C4RSfs>::call}, {NppArithmScalar<CV_16U, 1, nppiSubC_16u_C1RSfs>::call, 0 , NppArithmScalar<CV_16U, 3, nppiSubC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiSubC_16u_C4RSfs>::call},
{NppArithmScalar<CV_16S, 1, nppiSubC_16s_C1RSfs>::call, NppArithmScalar<CV_16S, 2, nppiSubC_16sc_C1RSfs>::call, NppArithmScalar<CV_16S, 3, nppiSubC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiSubC_16s_C4RSfs>::call}, {NppArithmScalar<CV_16S, 1, nppiSubC_16s_C1RSfs>::call, NppArithmScalar<CV_16S, 2, nppiSubC_16sc_C1RSfs>::call, NppArithmScalar<CV_16S, 3, nppiSubC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiSubC_16s_C4RSfs>::call},
{NppArithmScalar<CV_32S, 1, nppiSubC_32s_C1RSfs>::call, NppArithmScalar<CV_32S, 2, nppiSubC_32sc_C1RSfs>::call, NppArithmScalar<CV_32S, 3, nppiSubC_32s_C3RSfs>::call, 0}, {NppArithmScalar<CV_32S, 1, nppiSubC_32s_C1RSfs>::call, NppArithmScalar<CV_32S, 2, nppiSubC_32sc_C1RSfs>::call, NppArithmScalar<CV_32S, 3, nppiSubC_32s_C3RSfs>::call, 0 },
{NppArithmScalar<CV_32F, 1, nppiSubC_32f_C1R>::call, NppArithmScalar<CV_32F, 2, nppiSubC_32fc_C1R>::call, NppArithmScalar<CV_32F, 3, nppiSubC_32f_C3R>::call, NppArithmScalar<CV_32F, 4, nppiSubC_32f_C4R>::call}, {NppArithmScalar<CV_32F, 1, nppiSubC_32f_C1R >::call, NppArithmScalar<CV_32F, 2, nppiSubC_32fc_C1R >::call, NppArithmScalar<CV_32F, 3, nppiSubC_32f_C3R >::call, NppArithmScalar<CV_32F, 4, nppiSubC_32f_C4R >::call},
{0,0,0,0} {0 , 0 , 0 , 0 }
}; };
CV_Assert(mask.empty() || (src.channels() == 1 && mask.size() == src.size() && mask.type() == CV_8U));
if (dtype < 0) if (dtype < 0)
dtype = src.depth(); dtype = src.depth();
CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);
CV_Assert(src.channels() <= 4);
CV_Assert(mask.empty() || (src.channels() == 1 && mask.size() == src.size() && mask.type() == CV_8U));
if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
...@@ -547,7 +574,9 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G ...@@ -547,7 +574,9 @@ void cv::gpu::subtract(const GpuMat& src, const Scalar& sc, GpuMat& dst, const G
CV_Assert(src.channels() == 1); CV_Assert(src.channels() == 1);
const func_t func = funcs[src.depth()][dst.depth()]; const func_t func = funcs[src.depth()][dst.depth()];
CV_Assert(func != 0);
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src, sc.val[0], dst, mask, stream); func(src, sc.val[0], dst, mask, stream);
} }
...@@ -569,31 +598,7 @@ namespace cv { namespace gpu { namespace device ...@@ -569,31 +598,7 @@ namespace cv { namespace gpu { namespace device
void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s) void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
static const func_t funcs[7][7] =
{
{multiply_gpu<unsigned char, unsigned char>, 0/*multiply_gpu<unsigned char, signed char>*/, multiply_gpu<unsigned char, unsigned short>, multiply_gpu<unsigned char, short>, multiply_gpu<unsigned char, int>, multiply_gpu<unsigned char, float>, multiply_gpu<unsigned char, double>},
{0/*multiply_gpu<signed char, unsigned char>*/, 0/*multiply_gpu<signed char, signed char>*/, 0/*multiply_gpu<signed char, unsigned short>*/, 0/*multiply_gpu<signed char, short>*/, 0/*multiply_gpu<signed char, int>*/, 0/*multiply_gpu<signed char, float>*/, 0/*multiply_gpu<signed char, double>*/},
{0/*multiply_gpu<unsigned short, unsigned char>*/, 0/*multiply_gpu<unsigned short, signed char>*/, multiply_gpu<unsigned short, unsigned short>, 0/*multiply_gpu<unsigned short, short>*/, multiply_gpu<unsigned short, int>, multiply_gpu<unsigned short, float>, multiply_gpu<unsigned short, double>},
{0/*multiply_gpu<short, unsigned char>*/, 0/*multiply_gpu<short, signed char>*/, 0/*multiply_gpu<short, unsigned short>*/, multiply_gpu<short, short>, multiply_gpu<short, int>, multiply_gpu<short, float>, multiply_gpu<short, double>},
{0/*multiply_gpu<int, unsigned char>*/, 0/*multiply_gpu<int, signed char>*/, 0/*multiply_gpu<int, unsigned short>*/, 0/*multiply_gpu<int, short>*/, multiply_gpu<int, int>, multiply_gpu<int, float>, multiply_gpu<int, double>},
{0/*multiply_gpu<float, unsigned char>*/, 0/*multiply_gpu<float, signed char>*/, 0/*multiply_gpu<float, unsigned short>*/, 0/*multiply_gpu<float, short>*/, 0/*multiply_gpu<float, int>*/, multiply_gpu<float, float>, multiply_gpu<float, double>},
{0/*multiply_gpu<double, unsigned char>*/, 0/*multiply_gpu<double, signed char>*/, 0/*multiply_gpu<double, unsigned short>*/, 0/*multiply_gpu<double, short>*/, 0/*multiply_gpu<double, int>*/, 0/*multiply_gpu<double, float>*/, multiply_gpu<double, double>}
};
static const func_t npp_funcs[7] =
{
NppArithm<CV_8U, nppiMul_8u_C1RSfs>::call,
0,
NppArithm<CV_16U, nppiMul_16u_C1RSfs>::call,
NppArithm<CV_16S, nppiMul_16s_C1RSfs>::call,
NppArithm<CV_32S, nppiMul_32s_C1RSfs>::call,
NppArithm<CV_32F, nppiMul_32f_C1R>::call,
multiply_gpu<double, double>
};
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
...@@ -615,22 +620,53 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub ...@@ -615,22 +620,53 @@ void cv::gpu::multiply(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, doub
} }
else else
{ {
CV_Assert(src1.type() != CV_8S); typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
CV_Assert(src1.type() == src2.type() && src1.size() == src2.size()); static const func_t funcs[7][7] =
{
{multiply_gpu<unsigned char, unsigned char> , 0 /*multiply_gpu<unsigned char, signed char>*/ , multiply_gpu<unsigned char, unsigned short> , multiply_gpu<unsigned char, short> , multiply_gpu<unsigned char, int> , multiply_gpu<unsigned char, float> , multiply_gpu<unsigned char, double> },
{0 /*multiply_gpu<signed char, unsigned char>*/ , 0 /*multiply_gpu<signed char, signed char>*/ , 0 /*multiply_gpu<signed char, unsigned short>*/, 0 /*multiply_gpu<signed char, short>*/ , 0 /*multiply_gpu<signed char, int>*/, 0 /*multiply_gpu<signed char, float>*/, 0 /*multiply_gpu<signed char, double>*/},
{0 /*multiply_gpu<unsigned short, unsigned char>*/, 0 /*multiply_gpu<unsigned short, signed char>*/, multiply_gpu<unsigned short, unsigned short> , 0 /*multiply_gpu<unsigned short, short>*/, multiply_gpu<unsigned short, int> , multiply_gpu<unsigned short, float> , multiply_gpu<unsigned short, double> },
{0 /*multiply_gpu<short, unsigned char>*/ , 0 /*multiply_gpu<short, signed char>*/ , 0 /*multiply_gpu<short, unsigned short>*/ , multiply_gpu<short, short> , multiply_gpu<short, int> , multiply_gpu<short, float> , multiply_gpu<short, double> },
{0 /*multiply_gpu<int, unsigned char>*/ , 0 /*multiply_gpu<int, signed char>*/ , 0 /*multiply_gpu<int, unsigned short>*/ , 0 /*multiply_gpu<int, short>*/ , multiply_gpu<int, int> , multiply_gpu<int, float> , multiply_gpu<int, double> },
{0 /*multiply_gpu<float, unsigned char>*/ , 0 /*multiply_gpu<float, signed char>*/ , 0 /*multiply_gpu<float, unsigned short>*/ , 0 /*multiply_gpu<float, short>*/ , 0 /*multiply_gpu<float, int>*/ , multiply_gpu<float, float> , multiply_gpu<float, double> },
{0 /*multiply_gpu<double, unsigned char>*/ , 0 /*multiply_gpu<double, signed char>*/ , 0 /*multiply_gpu<double, unsigned short>*/ , 0 /*multiply_gpu<double, short>*/ , 0 /*multiply_gpu<double, int>*/ , 0 /*multiply_gpu<double, float>*/ , multiply_gpu<double, double> }
};
typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
static const npp_func_t npp_funcs[] =
{
NppArithm<CV_8U , nppiMul_8u_C1RSfs >::call,
0,
NppArithm<CV_16U, nppiMul_16u_C1RSfs>::call,
NppArithm<CV_16S, nppiMul_16s_C1RSfs>::call,
NppArithm<CV_32S, nppiMul_32s_C1RSfs>::call,
NppArithm<CV_32F, nppiMul_32f_C1R >::call
};
if (dtype < 0) if (dtype < 0)
dtype = src1.depth(); dtype = src1.depth();
CV_Assert(src1.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);
CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());
if (src1.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels())); dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
if (scale == 1 && dst.type() == src1.type()) if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F)
{ {
npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), 1, stream); npp_funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
return; return;
} }
const func_t func = funcs[src1.depth()][dst.depth()]; const func_t func = funcs[src1.depth()][dst.depth()];
CV_Assert(func != 0);
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1.reshape(1), src2.reshape(1), dst.reshape(1), scale, stream); func(src1.reshape(1), src2.reshape(1), dst.reshape(1), scale, stream);
} }
...@@ -646,56 +682,67 @@ namespace ...@@ -646,56 +682,67 @@ namespace
void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s) void cv::gpu::multiply(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream); typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
static const func_t funcs[7][7] = static const func_t funcs[7][7] =
{ {
{multiply_gpu<unsigned char, unsigned char>, 0/*multiply_gpu<unsigned char, signed char>*/, multiply_gpu<unsigned char, unsigned short>, multiply_gpu<unsigned char, short>, multiply_gpu<unsigned char, int>, multiply_gpu<unsigned char, float>, multiply_gpu<unsigned char, double>}, {multiply_gpu<unsigned char, unsigned char> , 0 /*multiply_gpu<unsigned char, signed char>*/ , multiply_gpu<unsigned char, unsigned short> , multiply_gpu<unsigned char, short> , multiply_gpu<unsigned char, int> , multiply_gpu<unsigned char, float> , multiply_gpu<unsigned char, double> },
{0/*multiply_gpu<signed char, unsigned char>*/, 0/*multiply_gpu<signed char, signed char>*/, 0/*multiply_gpu<signed char, unsigned short>*/, 0/*multiply_gpu<signed char, short>*/, 0/*multiply_gpu<signed char, int>*/, 0/*multiply_gpu<signed char, float>*/, 0/*multiply_gpu<signed char, double>*/}, {0 /*multiply_gpu<signed char, unsigned char>*/ , 0 /*multiply_gpu<signed char, signed char>*/ , 0 /*multiply_gpu<signed char, unsigned short>*/, 0 /*multiply_gpu<signed char, short>*/ , 0 /*multiply_gpu<signed char, int>*/, 0 /*multiply_gpu<signed char, float>*/, 0 /*multiply_gpu<signed char, double>*/},
{0/*multiply_gpu<unsigned short, unsigned char>*/, 0/*multiply_gpu<unsigned short, signed char>*/, multiply_gpu<unsigned short, unsigned short>, 0/*multiply_gpu<unsigned short, short>*/, multiply_gpu<unsigned short, int>, multiply_gpu<unsigned short, float>, multiply_gpu<unsigned short, double>}, {0 /*multiply_gpu<unsigned short, unsigned char>*/, 0 /*multiply_gpu<unsigned short, signed char>*/, multiply_gpu<unsigned short, unsigned short> , 0 /*multiply_gpu<unsigned short, short>*/, multiply_gpu<unsigned short, int> , multiply_gpu<unsigned short, float> , multiply_gpu<unsigned short, double> },
{0/*multiply_gpu<short, unsigned char>*/, 0/*multiply_gpu<short, signed char>*/, 0/*multiply_gpu<short, unsigned short>*/, multiply_gpu<short, short>, multiply_gpu<short, int>, multiply_gpu<short, float>, multiply_gpu<short, double>}, {0 /*multiply_gpu<short, unsigned char>*/ , 0 /*multiply_gpu<short, signed char>*/ , 0 /*multiply_gpu<short, unsigned short>*/ , multiply_gpu<short, short> , multiply_gpu<short, int> , multiply_gpu<short, float> , multiply_gpu<short, double> },
{0/*multiply_gpu<int, unsigned char>*/, 0/*multiply_gpu<int, signed char>*/, 0/*multiply_gpu<int, unsigned short>*/, 0/*multiply_gpu<int, short>*/, multiply_gpu<int, int>, multiply_gpu<int, float>, multiply_gpu<int, double>}, {0 /*multiply_gpu<int, unsigned char>*/ , 0 /*multiply_gpu<int, signed char>*/ , 0 /*multiply_gpu<int, unsigned short>*/ , 0 /*multiply_gpu<int, short>*/ , multiply_gpu<int, int> , multiply_gpu<int, float> , multiply_gpu<int, double> },
{0/*multiply_gpu<float, unsigned char>*/, 0/*multiply_gpu<float, signed char>*/, 0/*multiply_gpu<float, unsigned short>*/, 0/*multiply_gpu<float, short>*/, 0/*multiply_gpu<float, int>*/, multiply_gpu<float, float>, multiply_gpu<float, double>}, {0 /*multiply_gpu<float, unsigned char>*/ , 0 /*multiply_gpu<float, signed char>*/ , 0 /*multiply_gpu<float, unsigned short>*/ , 0 /*multiply_gpu<float, short>*/ , 0 /*multiply_gpu<float, int>*/ , multiply_gpu<float, float> , multiply_gpu<float, double> },
{0/*multiply_gpu<double, unsigned char>*/, 0/*multiply_gpu<double, signed char>*/, 0/*multiply_gpu<double, unsigned short>*/, 0/*multiply_gpu<double, short>*/, 0/*multiply_gpu<double, int>*/, 0/*multiply_gpu<double, float>*/, multiply_gpu<double, double>} {0 /*multiply_gpu<double, unsigned char>*/ , 0 /*multiply_gpu<double, signed char>*/ , 0 /*multiply_gpu<double, unsigned short>*/ , 0 /*multiply_gpu<double, short>*/ , 0 /*multiply_gpu<double, int>*/ , 0 /*multiply_gpu<double, float>*/ , multiply_gpu<double, double> }
}; };
typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream); typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream);
static const npp_func_t npp_funcs[7][4] = static const npp_func_t npp_funcs[7][4] =
{ {
{NppArithmScalar<CV_8U, 1, nppiMulC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiMulC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiMulC_8u_C4RSfs>::call}, {NppArithmScalar<CV_8U , 1, nppiMulC_8u_C1RSfs >::call, 0, NppArithmScalar<CV_8U , 3, nppiMulC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiMulC_8u_C4RSfs >::call},
{0,0,0,0}, {0 , 0, 0 , 0 },
{NppArithmScalar<CV_16U, 1, nppiMulC_16u_C1RSfs>::call, 0, NppArithmScalar<CV_16U, 3, nppiMulC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiMulC_16u_C4RSfs>::call}, {NppArithmScalar<CV_16U, 1, nppiMulC_16u_C1RSfs>::call, 0, NppArithmScalar<CV_16U, 3, nppiMulC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiMulC_16u_C4RSfs>::call},
{NppArithmScalar<CV_16S, 1, nppiMulC_16s_C1RSfs>::call, 0, NppArithmScalar<CV_16S, 3, nppiMulC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiMulC_16s_C4RSfs>::call}, {NppArithmScalar<CV_16S, 1, nppiMulC_16s_C1RSfs>::call, 0, NppArithmScalar<CV_16S, 3, nppiMulC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiMulC_16s_C4RSfs>::call},
{NppArithmScalar<CV_32S, 1, nppiMulC_32s_C1RSfs>::call, 0, NppArithmScalar<CV_32S, 3, nppiMulC_32s_C3RSfs>::call, 0}, {NppArithmScalar<CV_32S, 1, nppiMulC_32s_C1RSfs>::call, 0, NppArithmScalar<CV_32S, 3, nppiMulC_32s_C3RSfs>::call, 0 },
{NppArithmScalar<CV_32F, 1, nppiMulC_32f_C1R>::call, 0, NppArithmScalar<CV_32F, 3, nppiMulC_32f_C3R>::call, NppArithmScalar<CV_32F, 4, nppiMulC_32f_C4R>::call}, {NppArithmScalar<CV_32F, 1, nppiMulC_32f_C1R >::call, 0, NppArithmScalar<CV_32F, 3, nppiMulC_32f_C3R >::call, NppArithmScalar<CV_32F, 4, nppiMulC_32f_C4R >::call},
{0,0,0,0} {0 , 0, 0 , 0 }
}; };
if (dtype < 0) if (dtype < 0)
dtype = src.depth(); dtype = src.depth();
CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);
CV_Assert(src.channels() <= 4);
if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
if (dst.type() == src.type() && scale == 1) if (dst.type() == src.type() && scale == 1 && (src.depth() == CV_32F || isIntScalar(sc)))
{ {
const npp_func_t npp_func = npp_funcs[src.depth()][src.channels() - 1]; const npp_func_t npp_func = npp_funcs[src.depth()][src.channels() - 1];
if (npp_func && (src.depth() == CV_32F || isIntScalar(sc))) if (npp_func)
{ {
npp_func(src, sc, dst, stream); npp_func(src, sc, dst, stream);
return; return;
} }
} }
CV_Assert(src.channels() == 1);
const func_t func = funcs[src.depth()][dst.depth()]; const func_t func = funcs[src.depth()][dst.depth()];
CV_Assert(func != 0); if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src.reshape(1), sc.val[0], dst.reshape(1), scale, stream); func(src, sc.val[0], dst, scale, stream);
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
...@@ -718,30 +765,7 @@ namespace cv { namespace gpu { namespace device ...@@ -718,30 +765,7 @@ namespace cv { namespace gpu { namespace device
void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s) void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double scale, int dtype, Stream& s)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
static const func_t funcs[7][7] =
{
{divide_gpu<unsigned char, unsigned char>, 0/*divide_gpu<unsigned char, signed char>*/, divide_gpu<unsigned char, unsigned short>, divide_gpu<unsigned char, short>, divide_gpu<unsigned char, int>, divide_gpu<unsigned char, float>, divide_gpu<unsigned char, double>},
{0/*divide_gpu<signed char, unsigned char>*/, 0/*divide_gpu<signed char, signed char>*/, 0/*divide_gpu<signed char, unsigned short>*/, 0/*divide_gpu<signed char, short>*/, 0/*divide_gpu<signed char, int>*/, 0/*divide_gpu<signed char, float>*/, 0/*divide_gpu<signed char, double>*/},
{0/*divide_gpu<unsigned short, unsigned char>*/, 0/*divide_gpu<unsigned short, signed char>*/, divide_gpu<unsigned short, unsigned short>, 0/*divide_gpu<unsigned short, short>*/, divide_gpu<unsigned short, int>, divide_gpu<unsigned short, float>, divide_gpu<unsigned short, double>},
{0/*divide_gpu<short, unsigned char>*/, 0/*divide_gpu<short, signed char>*/, 0/*divide_gpu<short, unsigned short>*/, divide_gpu<short, short>, divide_gpu<short, int>, divide_gpu<short, float>, divide_gpu<short, double>},
{0/*divide_gpu<int, unsigned char>*/, 0/*divide_gpu<int, signed char>*/, 0/*divide_gpu<int, unsigned short>*/, 0/*divide_gpu<int, short>*/, divide_gpu<int, int>, divide_gpu<int, float>, divide_gpu<int, double>},
{0/*divide_gpu<float, unsigned char>*/, 0/*divide_gpu<float, signed char>*/, 0/*divide_gpu<float, unsigned short>*/, 0/*divide_gpu<float, short>*/, 0/*divide_gpu<float, int>*/, divide_gpu<float, float>, divide_gpu<float, double>},
{0/*divide_gpu<double, unsigned char>*/, 0/*divide_gpu<double, signed char>*/, 0/*divide_gpu<double, unsigned short>*/, 0/*divide_gpu<double, short>*/, 0/*divide_gpu<double, int>*/, 0/*divide_gpu<double, float>*/, divide_gpu<double, double>}
};
static const func_t npp_funcs[6] =
{
NppArithm<CV_8U, nppiDiv_8u_C1RSfs>::call,
0,
NppArithm<CV_16U, nppiDiv_16u_C1RSfs>::call,
NppArithm<CV_16S, nppiDiv_16s_C1RSfs>::call,
NppArithm<CV_32S, nppiDiv_32s_C1RSfs>::call,
NppArithm<CV_32F, nppiDiv_32f_C1R>::call
};
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
...@@ -763,22 +787,53 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double ...@@ -763,22 +787,53 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
} }
else else
{ {
CV_Assert(src1.type() != CV_8S); typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, double scale, cudaStream_t stream);
CV_Assert(src1.type() == src2.type() && src1.size() == src2.size()); static const func_t funcs[7][7] =
{
{divide_gpu<unsigned char, unsigned char> , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu<unsigned char, unsigned short> , divide_gpu<unsigned char, short> , divide_gpu<unsigned char, int> , divide_gpu<unsigned char, float> , divide_gpu<unsigned char, double> },
{0 /*divide_gpu<signed char, unsigned char>*/ , 0 /*divide_gpu<signed char, signed char>*/ , 0 /*divide_gpu<signed char, unsigned short>*/, 0 /*divide_gpu<signed char, short>*/ , 0 /*divide_gpu<signed char, int>*/, 0 /*divide_gpu<signed char, float>*/, 0 /*divide_gpu<signed char, double>*/},
{0 /*divide_gpu<unsigned short, unsigned char>*/, 0 /*divide_gpu<unsigned short, signed char>*/, divide_gpu<unsigned short, unsigned short> , 0 /*divide_gpu<unsigned short, short>*/, divide_gpu<unsigned short, int> , divide_gpu<unsigned short, float> , divide_gpu<unsigned short, double> },
{0 /*divide_gpu<short, unsigned char>*/ , 0 /*divide_gpu<short, signed char>*/ , 0 /*divide_gpu<short, unsigned short>*/ , divide_gpu<short, short> , divide_gpu<short, int> , divide_gpu<short, float> , divide_gpu<short, double> },
{0 /*divide_gpu<int, unsigned char>*/ , 0 /*divide_gpu<int, signed char>*/ , 0 /*divide_gpu<int, unsigned short>*/ , 0 /*divide_gpu<int, short>*/ , divide_gpu<int, int> , divide_gpu<int, float> , divide_gpu<int, double> },
{0 /*divide_gpu<float, unsigned char>*/ , 0 /*divide_gpu<float, signed char>*/ , 0 /*divide_gpu<float, unsigned short>*/ , 0 /*divide_gpu<float, short>*/ , 0 /*divide_gpu<float, int>*/ , divide_gpu<float, float> , divide_gpu<float, double> },
{0 /*divide_gpu<double, unsigned char>*/ , 0 /*divide_gpu<double, signed char>*/ , 0 /*divide_gpu<double, unsigned short>*/ , 0 /*divide_gpu<double, short>*/ , 0 /*divide_gpu<double, int>*/ , 0 /*divide_gpu<double, float>*/ , divide_gpu<double, double> }
};
typedef void (*npp_func_t)(const DevMem2Db src1, const PtrStepb src2, PtrStepb dst, cudaStream_t stream);
static const npp_func_t npp_funcs[6] =
{
NppArithm<CV_8U , nppiDiv_8u_C1RSfs >::call,
0,
NppArithm<CV_16U, nppiDiv_16u_C1RSfs>::call,
NppArithm<CV_16S, nppiDiv_16s_C1RSfs>::call,
NppArithm<CV_32S, nppiDiv_32s_C1RSfs>::call,
NppArithm<CV_32F, nppiDiv_32f_C1R >::call
};
if (dtype < 0) if (dtype < 0)
dtype = src1.depth(); dtype = src1.depth();
CV_Assert(src1.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);
CV_Assert(src1.type() == src2.type() && src1.size() == src2.size());
if (src1.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels())); dst.create(src1.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src1.channels()));
if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F) if (scale == 1 && dst.type() == src1.type() && src1.depth() <= CV_32F)
{ {
npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), 1, stream); npp_funcs[src1.depth()](src2.reshape(1), src1.reshape(1), dst.reshape(1), stream);
return; return;
} }
const func_t func = funcs[src1.depth()][dst.depth()]; const func_t func = funcs[src1.depth()][dst.depth()];
CV_Assert(func != 0);
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1.reshape(1), src2.reshape(1), dst.reshape(1), scale, stream); func(src1.reshape(1), src2.reshape(1), dst.reshape(1), scale, stream);
} }
...@@ -786,86 +841,105 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double ...@@ -786,86 +841,105 @@ void cv::gpu::divide(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, double
void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s) void cv::gpu::divide(const GpuMat& src, const Scalar& sc, GpuMat& dst, double scale, int dtype, Stream& s)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream); typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, double scale, cudaStream_t stream);
static const func_t funcs[7][7] = static const func_t funcs[7][7] =
{ {
{divide_gpu<unsigned char, unsigned char>, 0/*divide_gpu<unsigned char, signed char>*/, divide_gpu<unsigned char, unsigned short>, divide_gpu<unsigned char, short>, divide_gpu<unsigned char, int>, divide_gpu<unsigned char, float>, divide_gpu<unsigned char, double>}, {divide_gpu<unsigned char, unsigned char> , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu<unsigned char, unsigned short> , divide_gpu<unsigned char, short> , divide_gpu<unsigned char, int> , divide_gpu<unsigned char, float> , divide_gpu<unsigned char, double> },
{0/*divide_gpu<signed char, unsigned char>*/, 0/*divide_gpu<signed char, signed char>*/, 0/*divide_gpu<signed char, unsigned short>*/, 0/*divide_gpu<signed char, short>*/, 0/*divide_gpu<signed char, int>*/, 0/*divide_gpu<signed char, float>*/, 0/*divide_gpu<signed char, double>*/}, {0 /*divide_gpu<signed char, unsigned char>*/ , 0 /*divide_gpu<signed char, signed char>*/ , 0 /*divide_gpu<signed char, unsigned short>*/, 0 /*divide_gpu<signed char, short>*/ , 0 /*divide_gpu<signed char, int>*/, 0 /*divide_gpu<signed char, float>*/, 0 /*divide_gpu<signed char, double>*/},
{0/*divide_gpu<unsigned short, unsigned char>*/, 0/*divide_gpu<unsigned short, signed char>*/, divide_gpu<unsigned short, unsigned short>, 0/*divide_gpu<unsigned short, short>*/, divide_gpu<unsigned short, int>, divide_gpu<unsigned short, float>, divide_gpu<unsigned short, double>}, {0 /*divide_gpu<unsigned short, unsigned char>*/, 0 /*divide_gpu<unsigned short, signed char>*/, divide_gpu<unsigned short, unsigned short> , 0 /*divide_gpu<unsigned short, short>*/, divide_gpu<unsigned short, int> , divide_gpu<unsigned short, float> , divide_gpu<unsigned short, double> },
{0/*divide_gpu<short, unsigned char>*/, 0/*divide_gpu<short, signed char>*/, 0/*divide_gpu<short, unsigned short>*/, divide_gpu<short, short>, divide_gpu<short, int>, divide_gpu<short, float>, divide_gpu<short, double>}, {0 /*divide_gpu<short, unsigned char>*/ , 0 /*divide_gpu<short, signed char>*/ , 0 /*divide_gpu<short, unsigned short>*/ , divide_gpu<short, short> , divide_gpu<short, int> , divide_gpu<short, float> , divide_gpu<short, double> },
{0/*divide_gpu<int, unsigned char>*/, 0/*divide_gpu<int, signed char>*/, 0/*divide_gpu<int, unsigned short>*/, 0/*divide_gpu<int, short>*/, divide_gpu<int, int>, divide_gpu<int, float>, divide_gpu<int, double>}, {0 /*divide_gpu<int, unsigned char>*/ , 0 /*divide_gpu<int, signed char>*/ , 0 /*divide_gpu<int, unsigned short>*/ , 0 /*divide_gpu<int, short>*/ , divide_gpu<int, int> , divide_gpu<int, float> , divide_gpu<int, double> },
{0/*divide_gpu<float, unsigned char>*/, 0/*divide_gpu<float, signed char>*/, 0/*divide_gpu<float, unsigned short>*/, 0/*divide_gpu<float, short>*/, 0/*divide_gpu<float, int>*/, divide_gpu<float, float>, divide_gpu<float, double>}, {0 /*divide_gpu<float, unsigned char>*/ , 0 /*divide_gpu<float, signed char>*/ , 0 /*divide_gpu<float, unsigned short>*/ , 0 /*divide_gpu<float, short>*/ , 0 /*divide_gpu<float, int>*/ , divide_gpu<float, float> , divide_gpu<float, double> },
{0/*divide_gpu<double, unsigned char>*/, 0/*divide_gpu<double, signed char>*/, 0/*divide_gpu<double, unsigned short>*/, 0/*divide_gpu<double, short>*/, 0/*divide_gpu<double, int>*/, 0/*divide_gpu<double, float>*/, divide_gpu<double, double>} {0 /*divide_gpu<double, unsigned char>*/ , 0 /*divide_gpu<double, signed char>*/ , 0 /*divide_gpu<double, unsigned short>*/ , 0 /*divide_gpu<double, short>*/ , 0 /*divide_gpu<double, int>*/ , 0 /*divide_gpu<double, float>*/ , divide_gpu<double, double> }
}; };
typedef void (*npp_func_t)(const GpuMat& src, const Scalar& sc, GpuMat& dst, cudaStream_t stream); typedef void (*npp_func_t)(const DevMem2Db src, Scalar sc, PtrStepb dst, cudaStream_t stream);
static const npp_func_t npp_funcs[7][4] = static const npp_func_t npp_funcs[7][4] =
{ {
{NppArithmScalar<CV_8U, 1, nppiDivC_8u_C1RSfs>::call, 0, NppArithmScalar<CV_8U, 3, nppiDivC_8u_C3RSfs>::call, NppArithmScalar<CV_8U, 4, nppiDivC_8u_C4RSfs>::call}, {NppArithmScalar<CV_8U , 1, nppiDivC_8u_C1RSfs >::call, 0, NppArithmScalar<CV_8U , 3, nppiDivC_8u_C3RSfs >::call, NppArithmScalar<CV_8U , 4, nppiDivC_8u_C4RSfs >::call},
{0,0,0,0}, {0 , 0, 0 , 0 },
{NppArithmScalar<CV_16U, 1, nppiDivC_16u_C1RSfs>::call, 0, NppArithmScalar<CV_16U, 3, nppiDivC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiDivC_16u_C4RSfs>::call}, {NppArithmScalar<CV_16U, 1, nppiDivC_16u_C1RSfs>::call, 0, NppArithmScalar<CV_16U, 3, nppiDivC_16u_C3RSfs>::call, NppArithmScalar<CV_16U, 4, nppiDivC_16u_C4RSfs>::call},
{NppArithmScalar<CV_16S, 1, nppiDivC_16s_C1RSfs>::call, 0, NppArithmScalar<CV_16S, 3, nppiDivC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiDivC_16s_C4RSfs>::call}, {NppArithmScalar<CV_16S, 1, nppiDivC_16s_C1RSfs>::call, 0, NppArithmScalar<CV_16S, 3, nppiDivC_16s_C3RSfs>::call, NppArithmScalar<CV_16S, 4, nppiDivC_16s_C4RSfs>::call},
{NppArithmScalar<CV_32S, 1, nppiDivC_32s_C1RSfs>::call, 0, NppArithmScalar<CV_32S, 3, nppiDivC_32s_C3RSfs>::call, 0}, {NppArithmScalar<CV_32S, 1, nppiDivC_32s_C1RSfs>::call, 0, NppArithmScalar<CV_32S, 3, nppiDivC_32s_C3RSfs>::call, 0 },
{NppArithmScalar<CV_32F, 1, nppiDivC_32f_C1R>::call, 0, NppArithmScalar<CV_32F, 3, nppiDivC_32f_C3R>::call, NppArithmScalar<CV_32F, 4, nppiDivC_32f_C4R>::call}, {NppArithmScalar<CV_32F, 1, nppiDivC_32f_C1R >::call, 0, NppArithmScalar<CV_32F, 3, nppiDivC_32f_C3R >::call, NppArithmScalar<CV_32F, 4, nppiDivC_32f_C4R >::call},
{0,0,0,0} {0 , 0, 0 , 0 }
}; };
if (dtype < 0) if (dtype < 0)
dtype = src.depth(); dtype = src.depth();
CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);
CV_Assert(src.channels() <= 4);
if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
if (dst.type() == src.type() && scale == 1) if (dst.type() == src.type() && scale == 1 && (src.depth() == CV_32F || isIntScalar(sc)))
{ {
const npp_func_t npp_func = npp_funcs[src.depth()][src.channels() - 1]; const npp_func_t npp_func = npp_funcs[src.depth()][src.channels() - 1];
if (npp_func && (src.depth() == CV_32F || isIntScalar(sc))) if (npp_func)
{ {
npp_func(src, sc, dst, stream); npp_func(src, sc, dst, stream);
return; return;
} }
} }
CV_Assert(src.channels() == 1);
const func_t func = funcs[src.depth()][dst.depth()]; const func_t func = funcs[src.depth()][dst.depth()];
CV_Assert(func != 0); if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src.reshape(1), sc.val[0], dst.reshape(1), scale, stream); func(src, sc.val[0], dst, scale, stream);
} }
void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, Stream& s) void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, Stream& s)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); typedef void (*func_t)(double scalar, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
static const func_t funcs[7][7] = static const func_t funcs[7][7] =
{ {
{divide_gpu<unsigned char, unsigned char>, 0/*divide_gpu<unsigned char, signed char>*/, divide_gpu<unsigned char, unsigned short>, divide_gpu<unsigned char, short>, divide_gpu<unsigned char, int>, divide_gpu<unsigned char, float>, divide_gpu<unsigned char, double>}, {divide_gpu<unsigned char, unsigned char> , 0 /*divide_gpu<unsigned char, signed char>*/ , divide_gpu<unsigned char, unsigned short> , divide_gpu<unsigned char, short> , divide_gpu<unsigned char, int> , divide_gpu<unsigned char, float> , divide_gpu<unsigned char, double> },
{0/*divide_gpu<signed char, unsigned char>*/, 0/*divide_gpu<signed char, signed char>*/, 0/*divide_gpu<signed char, unsigned short>*/, 0/*divide_gpu<signed char, short>*/, 0/*divide_gpu<signed char, int>*/, 0/*divide_gpu<signed char, float>*/, 0/*divide_gpu<signed char, double>*/}, {0 /*divide_gpu<signed char, unsigned char>*/ , 0 /*divide_gpu<signed char, signed char>*/ , 0 /*divide_gpu<signed char, unsigned short>*/, 0 /*divide_gpu<signed char, short>*/ , 0 /*divide_gpu<signed char, int>*/, 0 /*divide_gpu<signed char, float>*/, 0 /*divide_gpu<signed char, double>*/},
{0/*divide_gpu<unsigned short, unsigned char>*/, 0/*divide_gpu<unsigned short, signed char>*/, divide_gpu<unsigned short, unsigned short>, 0/*divide_gpu<unsigned short, short>*/, divide_gpu<unsigned short, int>, divide_gpu<unsigned short, float>, divide_gpu<unsigned short, double>}, {0 /*divide_gpu<unsigned short, unsigned char>*/, 0 /*divide_gpu<unsigned short, signed char>*/, divide_gpu<unsigned short, unsigned short> , 0 /*divide_gpu<unsigned short, short>*/, divide_gpu<unsigned short, int> , divide_gpu<unsigned short, float> , divide_gpu<unsigned short, double> },
{0/*divide_gpu<short, unsigned char>*/, 0/*divide_gpu<short, signed char>*/, 0/*divide_gpu<short, unsigned short>*/, divide_gpu<short, short>, divide_gpu<short, int>, divide_gpu<short, float>, divide_gpu<short, double>}, {0 /*divide_gpu<short, unsigned char>*/ , 0 /*divide_gpu<short, signed char>*/ , 0 /*divide_gpu<short, unsigned short>*/ , divide_gpu<short, short> , divide_gpu<short, int> , divide_gpu<short, float> , divide_gpu<short, double> },
{0/*divide_gpu<int, unsigned char>*/, 0/*divide_gpu<int, signed char>*/, 0/*divide_gpu<int, unsigned short>*/, 0/*divide_gpu<int, short>*/, divide_gpu<int, int>, divide_gpu<int, float>, divide_gpu<int, double>}, {0 /*divide_gpu<int, unsigned char>*/ , 0 /*divide_gpu<int, signed char>*/ , 0 /*divide_gpu<int, unsigned short>*/ , 0 /*divide_gpu<int, short>*/ , divide_gpu<int, int> , divide_gpu<int, float> , divide_gpu<int, double> },
{0/*divide_gpu<float, unsigned char>*/, 0/*divide_gpu<float, signed char>*/, 0/*divide_gpu<float, unsigned short>*/, 0/*divide_gpu<float, short>*/, 0/*divide_gpu<float, int>*/, divide_gpu<float, float>, divide_gpu<float, double>}, {0 /*divide_gpu<float, unsigned char>*/ , 0 /*divide_gpu<float, signed char>*/ , 0 /*divide_gpu<float, unsigned short>*/ , 0 /*divide_gpu<float, short>*/ , 0 /*divide_gpu<float, int>*/ , divide_gpu<float, float> , divide_gpu<float, double> },
{0/*divide_gpu<double, unsigned char>*/, 0/*divide_gpu<double, signed char>*/, 0/*divide_gpu<double, unsigned short>*/, 0/*divide_gpu<double, short>*/, 0/*divide_gpu<double, int>*/, 0/*divide_gpu<double, float>*/, divide_gpu<double, double>} {0 /*divide_gpu<double, unsigned char>*/ , 0 /*divide_gpu<double, signed char>*/ , 0 /*divide_gpu<double, unsigned short>*/ , 0 /*divide_gpu<double, short>*/ , 0 /*divide_gpu<double, int>*/ , 0 /*divide_gpu<double, float>*/ , divide_gpu<double, double> }
}; };
CV_Assert(src.channels() == 1);
if (dtype < 0) if (dtype < 0)
dtype = src.depth(); dtype = src.depth();
CV_Assert(src.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);
CV_Assert(src.channels() == 1);
if (src.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); dst.create(src.size(), CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
const func_t func = funcs[src.depth()][dst.depth()]; const func_t func = funcs[src.depth()][dst.depth()];
CV_Assert(func != 0);
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(scale, src, dst, stream); func(scale, src, dst, stream);
} }
......
...@@ -55,7 +55,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&, ...@@ -55,7 +55,7 @@ void cv::gpu::StereoBM_GPU::operator() ( const GpuMat&, const GpuMat&, GpuMat&,
#else /* !defined (HAVE_CUDA) */ #else /* !defined (HAVE_CUDA) */
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
namespace stereobm namespace stereobm
{ {
...@@ -65,10 +65,13 @@ namespace cv { namespace gpu { namespace device ...@@ -65,10 +65,13 @@ namespace cv { namespace gpu { namespace device
} }
}}} }}}
const float defaultAvgTexThreshold = 3; namespace
{
const float defaultAvgTexThreshold = 3;
}
cv::gpu::StereoBM_GPU::StereoBM_GPU() cv::gpu::StereoBM_GPU::StereoBM_GPU()
: preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold) : preset(BASIC_PRESET), ndisp(DEFAULT_NDISP), winSize(DEFAULT_WINSZ), avergeTexThreshold(defaultAvgTexThreshold)
{ {
} }
...@@ -100,9 +103,9 @@ namespace ...@@ -100,9 +103,9 @@ namespace
{ {
using namespace ::cv::gpu::device::stereobm; using namespace ::cv::gpu::device::stereobm;
CV_DbgAssert(left.rows == right.rows && left.cols == right.cols); CV_Assert(left.rows == right.rows && left.cols == right.cols);
CV_DbgAssert(left.type() == CV_8UC1); CV_Assert(left.type() == CV_8UC1);
CV_DbgAssert(right.type() == CV_8UC1); CV_Assert(right.type() == CV_8UC1);
disparity.create(left.size(), CV_8U); disparity.create(left.size(), CV_8U);
minSSD.create(left.size(), CV_32S); minSSD.create(left.size(), CV_32S);
...@@ -115,7 +118,7 @@ namespace ...@@ -115,7 +118,7 @@ namespace
leBuf.create( left.size(), left.type()); leBuf.create( left.size(), left.type());
riBuf.create(right.size(), right.type()); riBuf.create(right.size(), right.type());
prefilter_xsobel( left, leBuf, 31, stream); prefilter_xsobel( left, leBuf, 31, stream);
prefilter_xsobel(right, riBuf, 31, stream); prefilter_xsobel(right, riBuf, 31, stream);
le_for_bm = leBuf; le_for_bm = leBuf;
......
...@@ -50,7 +50,7 @@ PARAM_TEST_CASE(Add_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, Ma ...@@ -50,7 +50,7 @@ PARAM_TEST_CASE(Add_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, Ma
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
std::pair<MatType, MatType> depth; std::pair<MatDepth, MatDepth> depth;
int channels; int channels;
bool useRoi; bool useRoi;
...@@ -78,14 +78,29 @@ TEST_P(Add_Array, Accuracy) ...@@ -78,14 +78,29 @@ TEST_P(Add_Array, Accuracy)
cv::Mat mat2 = randomMat(size, stype); cv::Mat mat2 = randomMat(size, stype);
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
cv::gpu::GpuMat dst = createMat(size, dtype, useRoi); if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
dst.setTo(cv::Scalar::all(0)); {
cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second); try
{
cv::gpu::GpuMat dst;
cv::gpu::add(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
dst.setTo(cv::Scalar::all(0));
cv::gpu::add(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second);
cv::Mat dst_gold(size, dtype, cv::Scalar::all(0)); cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
cv::add(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second); cv::add(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Array, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Array, testing::Combine(
...@@ -102,7 +117,7 @@ PARAM_TEST_CASE(Add_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, M ...@@ -102,7 +117,7 @@ PARAM_TEST_CASE(Add_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, M
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
std::pair<MatType, MatType> depth; std::pair<MatDepth, MatDepth> depth;
bool useRoi; bool useRoi;
virtual void SetUp() virtual void SetUp()
...@@ -116,20 +131,65 @@ PARAM_TEST_CASE(Add_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, M ...@@ -116,20 +131,65 @@ PARAM_TEST_CASE(Add_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, M
} }
}; };
TEST_P(Add_Scalar, Accuracy) TEST_P(Add_Scalar, WithOutMask)
{
cv::Mat mat = randomMat(size, depth.first);
cv::Scalar val = randomScalar(0, 255);
if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::add(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
dst.setTo(cv::Scalar::all(0));
cv::gpu::add(loadMat(mat, useRoi), val, dst, cv::gpu::GpuMat(), depth.second);
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
cv::add(mat, val, dst_gold, cv::noArray(), depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
}
}
TEST_P(Add_Scalar, WithMask)
{ {
cv::Mat mat = randomMat(size, depth.first); cv::Mat mat = randomMat(size, depth.first);
cv::Scalar val = randomScalar(0, 255); cv::Scalar val = randomScalar(0, 255);
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
dst.setTo(cv::Scalar::all(0)); {
cv::gpu::add(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second); try
{
cv::gpu::GpuMat dst;
cv::gpu::add(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
dst.setTo(cv::Scalar::all(0));
cv::gpu::add(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
cv::add(mat, val, dst_gold, mask, depth.second); cv::add(mat, val, dst_gold, mask, depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Scalar, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Add_Scalar, testing::Combine(
...@@ -145,7 +205,7 @@ PARAM_TEST_CASE(Subtract_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDept ...@@ -145,7 +205,7 @@ PARAM_TEST_CASE(Subtract_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDept
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
std::pair<MatType, MatType> depth; std::pair<MatDepth, MatDepth> depth;
int channels; int channels;
bool useRoi; bool useRoi;
...@@ -173,14 +233,29 @@ TEST_P(Subtract_Array, Accuracy) ...@@ -173,14 +233,29 @@ TEST_P(Subtract_Array, Accuracy)
cv::Mat mat2 = randomMat(size, stype); cv::Mat mat2 = randomMat(size, stype);
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
cv::gpu::GpuMat dst = createMat(size, dtype, useRoi); if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
dst.setTo(cv::Scalar::all(0)); {
cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second); try
{
cv::gpu::GpuMat dst;
cv::gpu::subtract(loadMat(mat1), loadMat(mat2), dst, cv::gpu::GpuMat(), depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
dst.setTo(cv::Scalar::all(0));
cv::gpu::subtract(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, channels == 1 ? loadMat(mask, useRoi) : cv::gpu::GpuMat(), depth.second);
cv::Mat dst_gold(size, dtype, cv::Scalar::all(0)); cv::Mat dst_gold(size, dtype, cv::Scalar::all(0));
cv::subtract(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second); cv::subtract(mat1, mat2, dst_gold, channels == 1 ? mask : cv::noArray(), depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Array, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Array, testing::Combine(
...@@ -197,7 +272,7 @@ PARAM_TEST_CASE(Subtract_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDep ...@@ -197,7 +272,7 @@ PARAM_TEST_CASE(Subtract_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDep
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
std::pair<MatType, MatType> depth; std::pair<MatDepth, MatDepth> depth;
bool useRoi; bool useRoi;
virtual void SetUp() virtual void SetUp()
...@@ -211,20 +286,65 @@ PARAM_TEST_CASE(Subtract_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDep ...@@ -211,20 +286,65 @@ PARAM_TEST_CASE(Subtract_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDep
} }
}; };
TEST_P(Subtract_Scalar, Accuracy) TEST_P(Subtract_Scalar, WithOutMask)
{
cv::Mat mat = randomMat(size, depth.first);
cv::Scalar val = randomScalar(0, 255);
if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::subtract(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
dst.setTo(cv::Scalar::all(0));
cv::gpu::subtract(loadMat(mat, useRoi), val, dst, cv::gpu::GpuMat(), depth.second);
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
cv::subtract(mat, val, dst_gold, cv::noArray(), depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
}
}
TEST_P(Subtract_Scalar, WithMask)
{ {
cv::Mat mat = randomMat(size, depth.first); cv::Mat mat = randomMat(size, depth.first);
cv::Scalar val = randomScalar(0, 255); cv::Scalar val = randomScalar(0, 255);
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
dst.setTo(cv::Scalar::all(0)); {
cv::gpu::subtract(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second); try
{
cv::gpu::GpuMat dst;
cv::gpu::subtract(loadMat(mat), val, dst, cv::gpu::GpuMat(), depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
dst.setTo(cv::Scalar::all(0));
cv::gpu::subtract(loadMat(mat, useRoi), val, dst, loadMat(mask, useRoi), depth.second);
cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0)); cv::Mat dst_gold(size, depth.second, cv::Scalar::all(0));
cv::subtract(mat, val, dst_gold, mask, depth.second); cv::subtract(mat, val, dst_gold, mask, depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Scalar, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Subtract_Scalar, testing::Combine(
...@@ -240,7 +360,7 @@ PARAM_TEST_CASE(Multiply_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDept ...@@ -240,7 +360,7 @@ PARAM_TEST_CASE(Multiply_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDept
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
std::pair<MatType, MatType> depth; std::pair<MatDepth, MatDepth> depth;
int channels; int channels;
bool useRoi; bool useRoi;
...@@ -262,19 +382,63 @@ PARAM_TEST_CASE(Multiply_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDept ...@@ -262,19 +382,63 @@ PARAM_TEST_CASE(Multiply_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDept
} }
}; };
TEST_P(Multiply_Array, Accuracy) TEST_P(Multiply_Array, WithOutScale)
{
cv::Mat mat1 = randomMat(size, stype);
cv::Mat mat2 = randomMat(size, stype);
if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::multiply(loadMat(mat1), loadMat(mat2), dst, 1, depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, 1, depth.second);
cv::Mat dst_gold;
cv::multiply(mat1, mat2, dst_gold, 1, depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
}
}
TEST_P(Multiply_Array, WithScale)
{ {
cv::Mat mat1 = randomMat(size, stype); cv::Mat mat1 = randomMat(size, stype);
cv::Mat mat2 = randomMat(size, stype); cv::Mat mat2 = randomMat(size, stype);
double scale = randomDouble(0.0, 255.0); double scale = randomDouble(0.0, 255.0);
cv::gpu::GpuMat dst = createMat(size, dtype, useRoi); if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second); {
try
{
cv::gpu::GpuMat dst;
cv::gpu::multiply(loadMat(mat1), loadMat(mat2), dst, scale, depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
cv::gpu::multiply(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::multiply(mat1, mat2, dst_gold, scale, depth.second); cv::multiply(mat1, mat2, dst_gold, scale, depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, 1.0); EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Multiply_Array, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Multiply_Array, testing::Combine(
...@@ -389,7 +553,7 @@ PARAM_TEST_CASE(Multiply_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDep ...@@ -389,7 +553,7 @@ PARAM_TEST_CASE(Multiply_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDep
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
std::pair<MatType, MatType> depth; std::pair<MatDepth, MatDepth> depth;
bool useRoi; bool useRoi;
virtual void SetUp() virtual void SetUp()
...@@ -403,19 +567,64 @@ PARAM_TEST_CASE(Multiply_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDep ...@@ -403,19 +567,64 @@ PARAM_TEST_CASE(Multiply_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDep
} }
}; };
TEST_P(Multiply_Scalar, Accuracy) TEST_P(Multiply_Scalar, WithOutScale)
{
cv::Mat mat = randomMat(size, depth.first);
cv::Scalar val = randomScalar(0, 255);
if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::multiply(loadMat(mat), val, dst, 1, depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
cv::gpu::multiply(loadMat(mat, useRoi), val, dst, 1, depth.second);
cv::Mat dst_gold;
cv::multiply(mat, val, dst_gold, 1, depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-2 : 0.0);
}
}
TEST_P(Multiply_Scalar, WithScale)
{ {
cv::Mat mat = randomMat(size, depth.first); cv::Mat mat = randomMat(size, depth.first);
cv::Scalar val = randomScalar(0, 255); cv::Scalar val = randomScalar(0, 255);
double scale = randomDouble(0.0, 255.0); double scale = randomDouble(0.0, 255.0);
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::multiply(loadMat(mat, useRoi), val, dst, scale, depth.second); {
try
{
cv::gpu::GpuMat dst;
cv::gpu::multiply(loadMat(mat), val, dst, scale, depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
cv::gpu::multiply(loadMat(mat, useRoi), val, dst, scale, depth.second);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::multiply(mat, val, dst_gold, scale, depth.second); cv::multiply(mat, val, dst_gold, scale, depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Multiply_Scalar, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Multiply_Scalar, testing::Combine(
...@@ -431,7 +640,7 @@ PARAM_TEST_CASE(Divide_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, ...@@ -431,7 +640,7 @@ PARAM_TEST_CASE(Divide_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth,
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
std::pair<MatType, MatType> depth; std::pair<MatDepth, MatDepth> depth;
int channels; int channels;
bool useRoi; bool useRoi;
...@@ -453,19 +662,64 @@ PARAM_TEST_CASE(Divide_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth, ...@@ -453,19 +662,64 @@ PARAM_TEST_CASE(Divide_Array, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth,
} }
}; };
TEST_P(Divide_Array, Accuracy) TEST_P(Divide_Array, WithOutScale)
{
cv::Mat mat1 = randomMat(size, stype);
cv::Mat mat2 = randomMat(size, stype, 1.0, 255.0);
if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::divide(loadMat(mat1), loadMat(mat2), dst, 1, depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, 1, depth.second);
cv::Mat dst_gold;
cv::divide(mat1, mat2, dst_gold, 1, depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
}
}
TEST_P(Divide_Array, WithScale)
{ {
cv::Mat mat1 = randomMat(size, stype); cv::Mat mat1 = randomMat(size, stype);
cv::Mat mat2 = randomMat(size, stype, 1.0, 255.0); cv::Mat mat2 = randomMat(size, stype, 1.0, 255.0);
double scale = randomDouble(0.0, 255.0); double scale = randomDouble(0.0, 255.0);
cv::gpu::GpuMat dst = createMat(size, dtype, useRoi); if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second); {
try
{
cv::gpu::GpuMat dst;
cv::gpu::divide(loadMat(mat1), loadMat(mat2), dst, scale, depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, dtype, useRoi);
cv::gpu::divide(loadMat(mat1, useRoi), loadMat(mat2, useRoi), dst, scale, depth.second);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::divide(mat1, mat2, dst_gold, scale, depth.second); cv::divide(mat1, mat2, dst_gold, scale, depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, 1.0); EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 1.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Array, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Array, testing::Combine(
...@@ -580,7 +834,7 @@ PARAM_TEST_CASE(Divide_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth ...@@ -580,7 +834,7 @@ PARAM_TEST_CASE(Divide_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
std::pair<MatType, MatType> depth; std::pair<MatDepth, MatDepth> depth;
bool useRoi; bool useRoi;
virtual void SetUp() virtual void SetUp()
...@@ -594,19 +848,63 @@ PARAM_TEST_CASE(Divide_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth ...@@ -594,19 +848,63 @@ PARAM_TEST_CASE(Divide_Scalar, cv::gpu::DeviceInfo, cv::Size, std::pair<MatDepth
} }
}; };
TEST_P(Divide_Scalar, Accuracy) TEST_P(Divide_Scalar, WithOutScale)
{
cv::Mat mat = randomMat(size, depth.first);
cv::Scalar val = randomScalar(1.0, 255.0);
if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::divide(loadMat(mat), val, dst, 1, depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
cv::gpu::divide(loadMat(mat, useRoi), val, dst, 1, depth.second);
cv::Mat dst_gold;
cv::divide(mat, val, dst_gold, 1, depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
}
}
TEST_P(Divide_Scalar, WithScale)
{ {
cv::Mat mat = randomMat(size, depth.first); cv::Mat mat = randomMat(size, depth.first);
cv::Scalar val = randomScalar(1.0, 255.0); cv::Scalar val = randomScalar(1.0, 255.0);
double scale = randomDouble(0.0, 255.0); double scale = randomDouble(0.0, 255.0);
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::divide(loadMat(mat, useRoi), val, dst, scale, depth.second); {
try
{
cv::gpu::GpuMat dst;
cv::gpu::divide(loadMat(mat), val, dst, scale, depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
cv::gpu::divide(loadMat(mat, useRoi), val, dst, scale, depth.second);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::divide(mat, val, dst_gold, scale, depth.second); cv::divide(mat, val, dst_gold, scale, depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Scalar, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Scalar, testing::Combine(
...@@ -622,7 +920,7 @@ PARAM_TEST_CASE(Divide_Scalar_Inv, cv::gpu::DeviceInfo, cv::Size, std::pair<MatD ...@@ -622,7 +920,7 @@ PARAM_TEST_CASE(Divide_Scalar_Inv, cv::gpu::DeviceInfo, cv::Size, std::pair<MatD
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
cv::Size size; cv::Size size;
std::pair<MatType, MatType> depth; std::pair<MatDepth, MatDepth> depth;
bool useRoi; bool useRoi;
virtual void SetUp() virtual void SetUp()
...@@ -641,13 +939,28 @@ TEST_P(Divide_Scalar_Inv, Accuracy) ...@@ -641,13 +939,28 @@ TEST_P(Divide_Scalar_Inv, Accuracy)
double scale = randomDouble(0.0, 255.0); double scale = randomDouble(0.0, 255.0);
cv::Mat mat = randomMat(size, depth.first, 1.0, 255.0); cv::Mat mat = randomMat(size, depth.first, 1.0, 255.0);
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi); if ((depth.first == CV_64F || depth.second == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::divide(scale, loadMat(mat, useRoi), dst, depth.second); {
try
{
cv::gpu::GpuMat dst;
cv::gpu::divide(scale, loadMat(mat), dst, depth.second);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth.second, useRoi);
cv::gpu::divide(scale, loadMat(mat, useRoi), dst, depth.second);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::divide(scale, mat, dst_gold, depth.second); cv::divide(scale, mat, dst_gold, depth.second);
EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0); EXPECT_MAT_NEAR(dst_gold, dst, depth.first >= CV_32F || depth.second >= CV_32F ? 1e-4 : 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Scalar_Inv, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Divide_Scalar_Inv, testing::Combine(
......
...@@ -254,7 +254,7 @@ static void cvImageWidget_set_size(GtkWidget * widget, int max_width, int max_he ...@@ -254,7 +254,7 @@ static void cvImageWidget_set_size(GtkWidget * widget, int max_width, int max_he
} }
static void static void
cvImageWidget_size_allocate (GtkWidget *widget, cvImageWidget_size_allocate (GtkWidget *widget,
GtkAllocation *allocation) GtkAllocation *allocation)
{ {
CvImageWidget *image_widget; CvImageWidget *image_widget;
...@@ -719,7 +719,7 @@ namespace ...@@ -719,7 +719,7 @@ namespace
void generateBitmapFont(const std::string& family, int height, int weight, bool italic, bool underline, int start, int count, int base) const; void generateBitmapFont(const std::string& family, int height, int weight, bool italic, bool underline, int start, int count, int base) const;
bool isGlContextInitialized() const; bool isGlContextInitialized() const;
PFNGLGENBUFFERSPROC glGenBuffersExt; PFNGLGENBUFFERSPROC glGenBuffersExt;
PFNGLDELETEBUFFERSPROC glDeleteBuffersExt; PFNGLDELETEBUFFERSPROC glDeleteBuffersExt;
...@@ -866,22 +866,22 @@ namespace ...@@ -866,22 +866,22 @@ namespace
CV_FUNCNAME( "GlFuncTab_GTK::generateBitmapFont" ); CV_FUNCNAME( "GlFuncTab_GTK::generateBitmapFont" );
__BEGIN__; __BEGIN__;
fontDecr = pango_font_description_new(); fontDecr = pango_font_description_new();
pango_font_description_set_size(fontDecr, height); pango_font_description_set_size(fontDecr, height);
pango_font_description_set_family_static(fontDecr, family.c_str()); pango_font_description_set_family_static(fontDecr, family.c_str());
pango_font_description_set_weight(fontDecr, static_cast<PangoWeight>(weight)); pango_font_description_set_weight(fontDecr, static_cast<PangoWeight>(weight));
pango_font_description_set_style(fontDecr, italic ? PANGO_STYLE_ITALIC : PANGO_STYLE_NORMAL); pango_font_description_set_style(fontDecr, italic ? PANGO_STYLE_ITALIC : PANGO_STYLE_NORMAL);
pangoFont = gdk_gl_font_use_pango_font(fontDecr, start, count, base); pangoFont = gdk_gl_font_use_pango_font(fontDecr, start, count, base);
pango_font_description_free(fontDecr); pango_font_description_free(fontDecr);
if (!pangoFont) if (!pangoFont)
CV_ERROR(CV_OpenGlApiCallError, "Can't create font"); CV_ERROR(CV_OpenGlApiCallError, "Can't create font");
...@@ -960,13 +960,13 @@ namespace ...@@ -960,13 +960,13 @@ namespace
void releaseGlContext(CvWindow* window) void releaseGlContext(CvWindow* window)
{ {
CV_FUNCNAME( "releaseGlContext" ); //CV_FUNCNAME( "releaseGlContext" );
__BEGIN__; //__BEGIN__;
window->useGl = false; window->useGl = false;
__END__; //__END__;
} }
void drawGl(CvWindow* window) void drawGl(CvWindow* window)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment