Commit 26691e00 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

fixed gpu core tests (added additional check for device's feature support)

added assertion on double types for old devices
parent 98d7b10c
...@@ -69,16 +69,7 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G ...@@ -69,16 +69,7 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
{ {
#ifndef HAVE_CUBLAS #ifndef HAVE_CUBLAS
OPENCV_GPU_UNUSED(src1); CV_Error(CV_StsNotImplemented, "The library was build without CUBLAS");
OPENCV_GPU_UNUSED(src2);
OPENCV_GPU_UNUSED(alpha);
OPENCV_GPU_UNUSED(src3);
OPENCV_GPU_UNUSED(beta);
OPENCV_GPU_UNUSED(dst);
OPENCV_GPU_UNUSED(flags);
OPENCV_GPU_UNUSED(stream);
throw_nogpu();
#else #else
...@@ -87,6 +78,12 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G ...@@ -87,6 +78,12 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
CV_Assert(src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2); CV_Assert(src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2);
CV_Assert(src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type())); CV_Assert(src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()));
if (src1.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
bool tr1 = (flags & GEMM_1_T) != 0; bool tr1 = (flags & GEMM_1_T) != 0;
bool tr2 = (flags & GEMM_2_T) != 0; bool tr2 = (flags & GEMM_2_T) != 0;
bool tr3 = (flags & GEMM_3_T) != 0; bool tr3 = (flags & GEMM_3_T) != 0;
...@@ -230,6 +227,9 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s) ...@@ -230,6 +227,9 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
} }
else // if (src.elemSize() == 8) else // if (src.elemSize() == 8)
{ {
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
NppStStreamHandler h(stream); NppStStreamHandler h(stream);
NcvSize32u sz; NcvSize32u sz;
...@@ -290,7 +290,6 @@ namespace ...@@ -290,7 +290,6 @@ namespace
void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream) void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream)
{ {
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream);
static const func_t funcs[6][4] = static const func_t funcs[6][4] =
{ {
{NppMirror<CV_8U, nppiMirror_8u_C1R>::call, 0, NppMirror<CV_8U, nppiMirror_8u_C3R>::call, NppMirror<CV_8U, nppiMirror_8u_C4R>::call}, {NppMirror<CV_8U, nppiMirror_8u_C1R>::call, 0, NppMirror<CV_8U, nppiMirror_8u_C3R>::call, NppMirror<CV_8U, nppiMirror_8u_C4R>::call},
...@@ -403,12 +402,12 @@ namespace ...@@ -403,12 +402,12 @@ namespace
void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst, Stream& stream) void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst, Stream& stream)
{ {
::npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream)); npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream));
} }
void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream) void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream)
{ {
::npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream)); npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream));
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
...@@ -429,7 +428,7 @@ namespace ...@@ -429,7 +428,7 @@ namespace
{ {
using namespace ::cv::gpu::device::mathfunc; using namespace ::cv::gpu::device::mathfunc;
CV_DbgAssert(x.size() == y.size() && x.type() == y.type()); CV_Assert(x.size() == y.size() && x.type() == y.type());
CV_Assert(x.depth() == CV_32F); CV_Assert(x.depth() == CV_32F);
if (mag) if (mag)
...@@ -449,7 +448,7 @@ namespace ...@@ -449,7 +448,7 @@ namespace
{ {
using namespace ::cv::gpu::device::mathfunc; using namespace ::cv::gpu::device::mathfunc;
CV_DbgAssert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type()); CV_Assert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
CV_Assert(mag.depth() == CV_32F); CV_Assert(mag.depth() == CV_32F);
x.create(mag.size(), mag.type()); x.create(mag.size(), mag.type());
......
...@@ -1096,18 +1096,18 @@ namespace cv { namespace gpu { namespace device ...@@ -1096,18 +1096,18 @@ namespace cv { namespace gpu { namespace device
enum { smart_shift = 4 }; enum { smart_shift = 4 };
}; };
template <typename T> void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream) template <typename T> void absdiff_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream)
{ {
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), WithOutMask(), stream); cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), WithOutMask(), stream);
} }
template void absdiff_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); //template void absdiff_gpu<uchar >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<schar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<schar >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<ushort>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); //template void absdiff_gpu<ushort>(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<short >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<short >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<int >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<int >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<float >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); //template void absdiff_gpu<float >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<double>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<double>(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template <typename T> struct AbsdiffScalar : unary_function<T, T> template <typename T> struct AbsdiffScalar : unary_function<T, T>
{ {
...@@ -1140,20 +1140,20 @@ namespace cv { namespace gpu { namespace device ...@@ -1140,20 +1140,20 @@ namespace cv { namespace gpu { namespace device
enum { smart_shift = 4 }; enum { smart_shift = 4 };
}; };
template <typename T> void absdiff_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream) template <typename T> void absdiff_gpu(const DevMem2Db src1, double val, DevMem2Db dst, cudaStream_t stream)
{ {
cudaSafeCall( cudaSetDoubleForDevice(&val) ); cudaSafeCall( cudaSetDoubleForDevice(&val) );
AbsdiffScalar<T> op(val); AbsdiffScalar<T> op(val);
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, WithOutMask(), stream); cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, WithOutMask(), stream);
} }
//template void absdiff_gpu<uchar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); //template void absdiff_gpu<uchar >(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<schar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<schar >(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
//template void absdiff_gpu<ushort>(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); //template void absdiff_gpu<ushort>(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<short >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<short >(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<int >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<int >(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
//template void absdiff_gpu<float >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); //template void absdiff_gpu<float >(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<double>(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<double>(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////
// Compare // Compare
...@@ -1587,60 +1587,60 @@ namespace cv { namespace gpu { namespace device ...@@ -1587,60 +1587,60 @@ namespace cv { namespace gpu { namespace device
}; };
template <typename T> template <typename T>
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream) void min_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream)
{ {
cv::gpu::device::transform(src1, src2, dst, minimum<T>(), WithOutMask(), stream); cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, minimum<T>(), WithOutMask(), stream);
} }
template void min_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); template void min_gpu<uchar >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<schar >(const DevMem2D_<schar>& src1, const DevMem2D_<schar>& src2, const DevMem2D_<schar>& dst, cudaStream_t stream); template void min_gpu<schar >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<ushort>(const DevMem2D_<ushort>& src1, const DevMem2D_<ushort>& src2, const DevMem2D_<ushort>& dst, cudaStream_t stream); template void min_gpu<ushort>(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<short >(const DevMem2D_<short>& src1, const DevMem2D_<short>& src2, const DevMem2D_<short>& dst, cudaStream_t stream); template void min_gpu<short >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<int >(const DevMem2D_<int>& src1, const DevMem2D_<int>& src2, const DevMem2D_<int>& dst, cudaStream_t stream); template void min_gpu<int >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<float >(const DevMem2D_<float>& src1, const DevMem2D_<float>& src2, const DevMem2D_<float>& dst, cudaStream_t stream); template void min_gpu<float >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<double>& src2, const DevMem2D_<double>& dst, cudaStream_t stream); template void min_gpu<double>(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template <typename T> template <typename T>
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream) void max_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream)
{ {
cv::gpu::device::transform(src1, src2, dst, maximum<T>(), WithOutMask(), stream); cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, maximum<T>(), WithOutMask(), stream);
} }
template void max_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); template void max_gpu<uchar >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<schar >(const DevMem2D_<schar>& src1, const DevMem2D_<schar>& src2, const DevMem2D_<schar>& dst, cudaStream_t stream); template void max_gpu<schar >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<ushort>(const DevMem2D_<ushort>& src1, const DevMem2D_<ushort>& src2, const DevMem2D_<ushort>& dst, cudaStream_t stream); template void max_gpu<ushort>(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<short >(const DevMem2D_<short>& src1, const DevMem2D_<short>& src2, const DevMem2D_<short>& dst, cudaStream_t stream); template void max_gpu<short >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<int >(const DevMem2D_<int>& src1, const DevMem2D_<int>& src2, const DevMem2D_<int>& dst, cudaStream_t stream); template void max_gpu<int >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<float >(const DevMem2D_<float>& src1, const DevMem2D_<float>& src2, const DevMem2D_<float>& dst, cudaStream_t stream); template void max_gpu<float >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<double>& src2, const DevMem2D_<double>& dst, cudaStream_t stream); template void max_gpu<double>(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template <typename T> template <typename T>
void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream) void min_gpu(const DevMem2Db src, T val, DevMem2Db dst, cudaStream_t stream)
{ {
cv::gpu::device::transform(src1, dst, device::bind2nd(minimum<T>(), src2), WithOutMask(), stream); cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, device::bind2nd(minimum<T>(), val), WithOutMask(), stream);
} }
template void min_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream); template void min_gpu<uchar >(const DevMem2Db src, uchar val, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<schar >(const DevMem2D_<schar>& src1, schar src2, const DevMem2D_<schar>& dst, cudaStream_t stream); template void min_gpu<schar >(const DevMem2Db src, schar val, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<ushort>(const DevMem2D_<ushort>& src1, ushort src2, const DevMem2D_<ushort>& dst, cudaStream_t stream); template void min_gpu<ushort>(const DevMem2Db src, ushort val, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<short >(const DevMem2D_<short>& src1, short src2, const DevMem2D_<short>& dst, cudaStream_t stream); template void min_gpu<short >(const DevMem2Db src, short val, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<int >(const DevMem2D_<int>& src1, int src2, const DevMem2D_<int>& dst, cudaStream_t stream); template void min_gpu<int >(const DevMem2Db src, int val, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<float >(const DevMem2D_<float>& src1, float src2, const DevMem2D_<float>& dst, cudaStream_t stream); template void min_gpu<float >(const DevMem2Db src, float val, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream); template void min_gpu<double>(const DevMem2Db src, double val, DevMem2Db dst, cudaStream_t stream);
template <typename T> template <typename T>
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream) void max_gpu(const DevMem2Db src, T val, DevMem2Db dst, cudaStream_t stream)
{ {
cv::gpu::device::transform(src1, dst, device::bind2nd(maximum<T>(), src2), WithOutMask(), stream); cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, device::bind2nd(maximum<T>(), val), WithOutMask(), stream);
} }
template void max_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream); template void max_gpu<uchar >(const DevMem2Db src, uchar val, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<schar >(const DevMem2D_<schar>& src1, schar src2, const DevMem2D_<schar>& dst, cudaStream_t stream); template void max_gpu<schar >(const DevMem2Db src, schar val, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<ushort>(const DevMem2D_<ushort>& src1, ushort src2, const DevMem2D_<ushort>& dst, cudaStream_t stream); template void max_gpu<ushort>(const DevMem2Db src, ushort val, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<short >(const DevMem2D_<short>& src1, short src2, const DevMem2D_<short>& dst, cudaStream_t stream); template void max_gpu<short >(const DevMem2Db src, short val, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<int >(const DevMem2D_<int>& src1, int src2, const DevMem2D_<int>& dst, cudaStream_t stream); template void max_gpu<int >(const DevMem2Db src, int val, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<float >(const DevMem2D_<float>& src1, float src2, const DevMem2D_<float>& dst, cudaStream_t stream); template void max_gpu<float >(const DevMem2Db src, float val, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream); template void max_gpu<double>(const DevMem2Db src, double val, DevMem2Db dst, cudaStream_t stream);
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// threshold // threshold
...@@ -1805,18 +1805,63 @@ namespace cv { namespace gpu { namespace device ...@@ -1805,18 +1805,63 @@ namespace cv { namespace gpu { namespace device
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// addWeighted // addWeighted
template <typename T1, typename T2, typename D> struct AddWeighted : binary_function<T1, T2, D> namespace detail
{ {
__host__ __device__ __forceinline__ AddWeighted(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {} template <typename T> struct UseDouble
{
enum {value = 0};
};
template <> struct UseDouble<int>
{
enum {value = 1};
};
template <> struct UseDouble<float>
{
enum {value = 1};
};
template <> struct UseDouble<double>
{
enum {value = 1};
};
}
template <typename T1, typename T2, typename D> struct UseDouble
{
enum {value = (detail::UseDouble<T1>::value || detail::UseDouble<T2>::value || detail::UseDouble<D>::value)};
};
__device__ __forceinline__ D operator ()(typename TypeTraits<T1>::ParameterType a, typename TypeTraits<T2>::ParameterType b) const namespace detail
{
template <typename T1, typename T2, typename D, bool useDouble> struct AddWeighted;
template <typename T1, typename T2, typename D> struct AddWeighted<T1, T2, D, false> : binary_function<T1, T2, D>
{ {
return saturate_cast<D>(alpha * a + beta * b + gamma); AddWeighted(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {}
}
const double alpha; __device__ __forceinline__ D operator ()(T1 a, T2 b) const
const double beta; {
const double gamma; return saturate_cast<D>(a * alpha + b * beta + gamma);
}
const float alpha;
const float beta;
const float gamma;
};
template <typename T1, typename T2, typename D> struct AddWeighted<T1, T2, D, true> : binary_function<T1, T2, D>
{
AddWeighted(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {}
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
{
return saturate_cast<D>(a * alpha + b * beta + gamma);
}
const double alpha;
const double beta;
const double gamma;
};
}
template <typename T1, typename T2, typename D> struct AddWeighted : detail::AddWeighted<T1, T2, D, UseDouble<T1, T2, D>::value>
{
AddWeighted(double alpha_, double beta_, double gamma_) : detail::AddWeighted<T1, T2, D, UseDouble<T1, T2, D>::value>(alpha_, beta_, gamma_) {}
}; };
template <> struct TransformFunctorTraits< AddWeighted<ushort, ushort, ushort> > : DefaultTransformFunctorTraits< AddWeighted<ushort, ushort, ushort> > template <> struct TransformFunctorTraits< AddWeighted<ushort, ushort, ushort> > : DefaultTransformFunctorTraits< AddWeighted<ushort, ushort, ushort> >
...@@ -1878,9 +1923,12 @@ namespace cv { namespace gpu { namespace device ...@@ -1878,9 +1923,12 @@ namespace cv { namespace gpu { namespace device
template <typename T1, typename T2, typename D> template <typename T1, typename T2, typename D>
void addWeighted_gpu(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream) void addWeighted_gpu(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream)
{ {
cudaSafeCall( cudaSetDoubleForDevice(&alpha) ); if (UseDouble<T1, T2, D>::value)
cudaSafeCall( cudaSetDoubleForDevice(&beta) ); {
cudaSafeCall( cudaSetDoubleForDevice(&gamma) ); cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
cudaSafeCall( cudaSetDoubleForDevice(&beta) );
cudaSafeCall( cudaSetDoubleForDevice(&gamma) );
}
AddWeighted<T1, T2, D> op(alpha, beta, gamma); AddWeighted<T1, T2, D> op(alpha, beta, gamma);
......
...@@ -950,90 +950,62 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St ...@@ -950,90 +950,62 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
template <typename T> template <typename T>
void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); void absdiff_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template <typename T> template <typename T>
void absdiff_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream); void absdiff_gpu(const DevMem2Db src1, double val, DevMem2Db dst, cudaStream_t stream);
}}} }}}
void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s) namespace
{ {
using namespace ::cv::gpu::device; template <int DEPTH> struct NppAbsDiffFunc
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
static const func_t funcs[] =
{
absdiff_gpu<unsigned char>, absdiff_gpu<signed char>, absdiff_gpu<unsigned short>, absdiff_gpu<short>, absdiff_gpu<int>, absdiff_gpu<float>, absdiff_gpu<double>
};
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
dst.create( src1.size(), src1.type() );
cudaStream_t stream = StreamAccessor::getStream(s);
NppiSize sz;
sz.width = src1.cols * src1.channels();
sz.height = src1.rows;
if (src1.depth() == CV_8U)
{ {
NppStreamHandler h(stream); typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), typedef NppStatus (*func_t)(const npp_t* src1, int src1_step, const npp_t* src2, int src2_step, npp_t* dst, int dst_step, NppiSize sz);
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) ); };
if (stream == 0) template <int DEPTH, typename NppAbsDiffFunc<DEPTH>::func_t func> struct NppAbsDiff
cudaSafeCall( cudaDeviceSynchronize() );
}
else if (src1.depth() == CV_16U)
{ {
NppStreamHandler h(stream); typedef typename NppAbsDiffFunc<DEPTH>::npp_t npp_t;
nppSafeCall( nppiAbsDiff_16u_C1R(src1.ptr<Npp16u>(), static_cast<int>(src1.step), src2.ptr<Npp16u>(), static_cast<int>(src2.step), static void call(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream)
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); {
NppStreamHandler h(stream);
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
else if (src1.depth() == CV_32F)
{
NppStreamHandler h(stream);
nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), static_cast<int>(src1.step), src2.ptr<Npp32f>(), static_cast<int>(src2.step), NppiSize sz;
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); sz.width = src1.cols;
sz.height = src1.rows;
if (stream == 0) nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),
cudaSafeCall( cudaDeviceSynchronize() ); (npp_t*)dst.data, static_cast<int>(dst.step), sz) );
}
else
{
const func_t func = funcs[src1.depth()];
CV_Assert(func != 0);
func(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream); if (stream == 0)
} cudaSafeCall( cudaDeviceSynchronize() );
} }
};
namespace
{
template <int DEPTH> struct NppAbsDiffCFunc template <int DEPTH> struct NppAbsDiffCFunc
{ {
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t; typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
typedef npp_t scalar_t;
typedef NppStatus (*func_t)(const npp_t* pSrc1, int nSrc1Step, npp_t* pDst, int nDstStep, NppiSize oSizeROI, npp_t nConstant); typedef NppStatus (*func_t)(const npp_t* pSrc1, int nSrc1Step, npp_t* pDst, int nDstStep, NppiSize oSizeROI, npp_t nConstant);
}; };
template <> struct NppAbsDiffCFunc<CV_16U> template <> struct NppAbsDiffCFunc<CV_16U>
{ {
typedef NppTypeTraits<CV_16U>::npp_t npp_t;
typedef Npp32u scalar_t;
typedef NppStatus (*func_t)(const Npp16u* pSrc1, int nSrc1Step, Npp16u* pDst, int nDstStep, NppiSize oSizeROI, Npp32u nConstant); typedef NppStatus (*func_t)(const Npp16u* pSrc1, int nSrc1Step, Npp16u* pDst, int nDstStep, NppiSize oSizeROI, Npp32u nConstant);
}; };
template <int DEPTH, typename NppAbsDiffCFunc<DEPTH>::func_t func> struct NppAbsDiffC template <int DEPTH, typename NppAbsDiffCFunc<DEPTH>::func_t func> struct NppAbsDiffC
{ {
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t; typedef typename NppAbsDiffCFunc<DEPTH>::npp_t npp_t;
typedef typename NppAbsDiffCFunc<DEPTH>::scalar_t scalar_t;
static void call(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream) static void call(const DevMem2Db src1, double val, DevMem2Db dst, cudaStream_t stream)
{ {
NppStreamHandler h(stream); NppStreamHandler h(stream);
...@@ -1041,8 +1013,8 @@ namespace ...@@ -1041,8 +1013,8 @@ namespace
sz.width = src1.cols; sz.width = src1.cols;
sz.height = src1.rows; sz.height = src1.rows;
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (npp_t*)dst.data, static_cast<int>(dst.step), nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step),
sz, static_cast<npp_t>(val)) ); (npp_t*)dst.data, static_cast<int>(dst.step), sz, static_cast<scalar_t>(val)) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -1050,12 +1022,41 @@ namespace ...@@ -1050,12 +1022,41 @@ namespace
}; };
} }
void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Stream& s) void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{ {
using namespace cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream); typedef void (*func_t)(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
static const func_t funcs[] =
{
NppAbsDiff<CV_8U, nppiAbsDiff_8u_C1R>::call,
absdiff_gpu<signed char>,
NppAbsDiff<CV_16U, nppiAbsDiff_16u_C1R>::call,
absdiff_gpu<short>,
absdiff_gpu<int>,
NppAbsDiff<CV_32F, nppiAbsDiff_32f_C1R>::call,
absdiff_gpu<double>
};
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
if (src1.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src1.size(), src1.type());
funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), StreamAccessor::getStream(stream));
}
void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Stream& stream)
{
using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db src1, double val, DevMem2Db dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
NppAbsDiffC<CV_8U, nppiAbsDiffC_8u_C1R>::call, NppAbsDiffC<CV_8U, nppiAbsDiffC_8u_C1R>::call,
...@@ -1067,13 +1068,18 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea ...@@ -1067,13 +1068,18 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea
absdiff_gpu<double> absdiff_gpu<double>
}; };
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.channels() == 1); CV_Assert(src1.channels() == 1);
dst.create(src1.size(), src1.type()); if (src1.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
cudaStream_t stream = StreamAccessor::getStream(s); dst.create(src1.size(), src1.type());
funcs[src1.depth()](src1, src2.val[0], dst, stream); funcs[src1.depth()](src1, src2.val[0], dst, StreamAccessor::getStream(stream));
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
...@@ -1359,34 +1365,38 @@ namespace cv { namespace gpu { namespace device ...@@ -1359,34 +1365,38 @@ namespace cv { namespace gpu { namespace device
void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop, Stream& stream) void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop, Stream& stream)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
static const func_t funcs[7][4] = static const func_t funcs[7][4] =
{ {
{compare_eq<unsigned char>, compare_ne<unsigned char>, compare_lt<unsigned char>, compare_le<unsigned char>}, {compare_eq<unsigned char> , compare_ne<unsigned char> , compare_lt<unsigned char> , compare_le<unsigned char> },
{compare_eq<signed char>, compare_ne<signed char>, compare_lt<signed char>, compare_le<signed char>}, {compare_eq<signed char> , compare_ne<signed char> , compare_lt<signed char> , compare_le<signed char> },
{compare_eq<unsigned short>, compare_ne<unsigned short>, compare_lt<unsigned short>, compare_le<unsigned short>}, {compare_eq<unsigned short>, compare_ne<unsigned short>, compare_lt<unsigned short>, compare_le<unsigned short>},
{compare_eq<short>, compare_ne<short>, compare_lt<short>, compare_le<short>}, {compare_eq<short> , compare_ne<short> , compare_lt<short> , compare_le<short> },
{compare_eq<int>, compare_ne<int>, compare_lt<int>, compare_le<int>}, {compare_eq<int> , compare_ne<int> , compare_lt<int> , compare_le<int> },
{compare_eq<float>, compare_ne<float>, compare_lt<float>, compare_le<float>}, {compare_eq<float> , compare_ne<float> , compare_lt<float> , compare_le<float> },
{compare_eq<double>, compare_ne<double>, compare_lt<double>, compare_le<double>} {compare_eq<double> , compare_ne<double> , compare_lt<double> , compare_le<double> }
}; };
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(cmpop >= CMP_EQ && cmpop <= CMP_NE); CV_Assert(cmpop >= CMP_EQ && cmpop <= CMP_NE);
if (src1.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
static const int codes[] = static const int codes[] =
{ {
0, 2, 3, 2, 3, 1 0, 2, 3, 2, 3, 1
}; };
const GpuMat* psrc1[] = const GpuMat* psrc1[] =
{ {
&src1, &src2, &src2, &src1, &src1, &src1 &src1, &src2, &src2, &src1, &src1, &src1
}; };
const GpuMat* psrc2[] = const GpuMat* psrc2[] =
{ {
&src2, &src1, &src1, &src2, &src2, &src2 &src2, &src1, &src1, &src2, &src2, &src2
...@@ -1415,17 +1425,15 @@ namespace ...@@ -1415,17 +1425,15 @@ namespace
{ {
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
::cv::gpu::device::bitwiseNotCaller(src.rows, src.cols, src.elemSize1(), dst.channels(), src, dst, stream); cv::gpu::device::bitwiseNotCaller(src.rows, src.cols, src.elemSize1(), dst.channels(), src, dst, stream);
} }
void bitwiseNotCaller(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) void bitwiseNotCaller(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t); typedef void (*func_t)(int, int, int, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static func_t funcs[] =
static Caller callers[] =
{ {
bitwiseMaskNotCaller<unsigned char>, bitwiseMaskNotCaller<unsigned char>, bitwiseMaskNotCaller<unsigned char>, bitwiseMaskNotCaller<unsigned char>,
bitwiseMaskNotCaller<unsigned short>, bitwiseMaskNotCaller<unsigned short>, bitwiseMaskNotCaller<unsigned short>, bitwiseMaskNotCaller<unsigned short>,
...@@ -1433,19 +1441,19 @@ namespace ...@@ -1433,19 +1441,19 @@ namespace
bitwiseMaskNotCaller<unsigned int> bitwiseMaskNotCaller<unsigned int>
}; };
CV_Assert(src.depth() <= CV_64F);
CV_Assert(mask.type() == CV_8U && mask.size() == src.size()); CV_Assert(mask.type() == CV_8U && mask.size() == src.size());
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
Caller caller = callers[src.depth()]; const func_t func = funcs[src.depth()];
CV_Assert(caller);
int cn = src.depth() != CV_64F ? src.channels() : src.channels() * (sizeof(double) / sizeof(unsigned int)); int cn = src.depth() != CV_64F ? src.channels() : src.channels() * (sizeof(double) / sizeof(unsigned int));
caller(src.rows, src.cols, cn, src, mask, dst, stream);
}
func(src.rows, src.cols, cn, src, mask, dst, stream);
}
} }
void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& stream) void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& stream)
{ {
if (mask.empty()) if (mask.empty())
...@@ -1454,7 +1462,6 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St ...@@ -1454,7 +1462,6 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St
bitwiseNotCaller(src, dst, mask, StreamAccessor::getStream(stream)); bitwiseNotCaller(src, dst, mask, StreamAccessor::getStream(stream));
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// Binary bitwise logical operations // Binary bitwise logical operations
...@@ -1481,18 +1488,18 @@ namespace ...@@ -1481,18 +1488,18 @@ namespace
void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
::cv::gpu::device::bitwiseOrCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream); cv::gpu::device::bitwiseOrCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
} }
void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static Caller callers[] = typedef void (*func_t)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static func_t funcs[] =
{ {
bitwiseMaskOrCaller<unsigned char>, bitwiseMaskOrCaller<unsigned char>, bitwiseMaskOrCaller<unsigned char>, bitwiseMaskOrCaller<unsigned char>,
bitwiseMaskOrCaller<unsigned short>, bitwiseMaskOrCaller<unsigned short>, bitwiseMaskOrCaller<unsigned short>, bitwiseMaskOrCaller<unsigned short>,
...@@ -1500,33 +1507,35 @@ namespace ...@@ -1500,33 +1507,35 @@ namespace
bitwiseMaskOrCaller<unsigned int> bitwiseMaskOrCaller<unsigned int>
}; };
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(mask.type() == CV_8U && mask.size() == src1.size());
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
Caller caller = callers[src1.depth()]; const func_t func = funcs[src1.depth()];
CV_Assert(caller);
int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int)); int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int));
caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
func(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
} }
void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
::cv::gpu::device::bitwiseAndCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream); cv::gpu::device::bitwiseAndCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
} }
void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t); typedef void (*func_t)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static func_t funcs[] =
static Caller callers[] =
{ {
bitwiseMaskAndCaller<unsigned char>, bitwiseMaskAndCaller<unsigned char>, bitwiseMaskAndCaller<unsigned char>, bitwiseMaskAndCaller<unsigned char>,
bitwiseMaskAndCaller<unsigned short>, bitwiseMaskAndCaller<unsigned short>, bitwiseMaskAndCaller<unsigned short>, bitwiseMaskAndCaller<unsigned short>,
...@@ -1534,33 +1543,35 @@ namespace ...@@ -1534,33 +1543,35 @@ namespace
bitwiseMaskAndCaller<unsigned int> bitwiseMaskAndCaller<unsigned int>
}; };
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(mask.type() == CV_8U && mask.size() == src1.size());
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
Caller caller = callers[src1.depth()]; const func_t func = funcs[src1.depth()];
CV_Assert(caller);
int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int)); int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int));
caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
func(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
} }
void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
::cv::gpu::device::bitwiseXorCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream); cv::gpu::device::bitwiseXorCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
} }
void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t); typedef void (*func_t)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static func_t funcs[] =
static Caller callers[] =
{ {
bitwiseMaskXorCaller<unsigned char>, bitwiseMaskXorCaller<unsigned char>, bitwiseMaskXorCaller<unsigned char>, bitwiseMaskXorCaller<unsigned char>,
bitwiseMaskXorCaller<unsigned short>, bitwiseMaskXorCaller<unsigned short>, bitwiseMaskXorCaller<unsigned short>, bitwiseMaskXorCaller<unsigned short>,
...@@ -1568,14 +1579,17 @@ namespace ...@@ -1568,14 +1579,17 @@ namespace
bitwiseMaskXorCaller<unsigned int> bitwiseMaskXorCaller<unsigned int>
}; };
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(mask.type() == CV_8U && mask.size() == src1.size());
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
Caller caller = callers[src1.depth()]; const func_t func = funcs[src1.depth()];
CV_Assert(caller);
int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int)); int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int));
caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
func(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
} }
} }
...@@ -1661,10 +1675,9 @@ namespace ...@@ -1661,10 +1675,9 @@ namespace
void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
{ {
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] = static const func_t funcs[5][4] =
{ {
{NppBitwiseC<CV_8U, 1, nppiOrC_8u_C1R>::call, 0, NppBitwiseC<CV_8U, 3, nppiOrC_8u_C3R>::call, NppBitwiseC<CV_8U, 4, nppiOrC_8u_C4R>::call}, {NppBitwiseC<CV_8U , 1, nppiOrC_8u_C1R >::call, 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, NppBitwiseC<CV_8U , 4, nppiOrC_8u_C4R >::call},
{0,0,0,0}, {0,0,0,0},
{NppBitwiseC<CV_16U, 1, nppiOrC_16u_C1R>::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call}, {NppBitwiseC<CV_16U, 1, nppiOrC_16u_C1R>::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
{0,0,0,0}, {0,0,0,0},
...@@ -1682,10 +1695,9 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea ...@@ -1682,10 +1695,9 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea
void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
{ {
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] = static const func_t funcs[5][4] =
{ {
{NppBitwiseC<CV_8U, 1, nppiAndC_8u_C1R>::call, 0, NppBitwiseC<CV_8U, 3, nppiAndC_8u_C3R>::call, NppBitwiseC<CV_8U, 4, nppiAndC_8u_C4R>::call}, {NppBitwiseC<CV_8U , 1, nppiAndC_8u_C1R >::call, 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, NppBitwiseC<CV_8U , 4, nppiAndC_8u_C4R >::call},
{0,0,0,0}, {0,0,0,0},
{NppBitwiseC<CV_16U, 1, nppiAndC_16u_C1R>::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call}, {NppBitwiseC<CV_16U, 1, nppiAndC_16u_C1R>::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
{0,0,0,0}, {0,0,0,0},
...@@ -1703,10 +1715,9 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre ...@@ -1703,10 +1715,9 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
{ {
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] = static const func_t funcs[5][4] =
{ {
{NppBitwiseC<CV_8U, 1, nppiXorC_8u_C1R>::call, 0, NppBitwiseC<CV_8U, 3, nppiXorC_8u_C3R>::call, NppBitwiseC<CV_8U, 4, nppiXorC_8u_C4R>::call}, {NppBitwiseC<CV_8U , 1, nppiXorC_8u_C1R >::call, 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, NppBitwiseC<CV_8U , 4, nppiXorC_8u_C4R >::call},
{0,0,0,0}, {0,0,0,0},
{NppBitwiseC<CV_16U, 1, nppiXorC_16u_C1R>::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call}, {NppBitwiseC<CV_16U, 1, nppiXorC_16u_C1R>::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
{0,0,0,0}, {0,0,0,0},
...@@ -1822,107 +1833,140 @@ void cv::gpu::lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& st ...@@ -1822,107 +1833,140 @@ void cv::gpu::lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& st
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
template <typename T> template <typename T> void min_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream); template <typename T> void max_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template <typename T>
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
template <typename T> template <typename T> void min_gpu(const DevMem2Db src, T val, DevMem2Db dst, cudaStream_t stream);
void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream); template <typename T> void max_gpu(const DevMem2Db src, T val, DevMem2Db dst, cudaStream_t stream);
template <typename T>
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream);
}}} }}}
namespace void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{ {
template <typename T> using namespace cv::gpu::device;
void min_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
{
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
dst.create(src1.size(), src1.type());
::cv::gpu::device::min_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
}
template <typename T> typedef void (*func_t)(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
void min_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream) static const func_t funcs[] =
{ {
dst.create(src1.size(), src1.type()); min_gpu<unsigned char>,
::cv::gpu::device::min_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream); min_gpu<signed char>,
} min_gpu<unsigned short>,
min_gpu<short>,
min_gpu<int>,
min_gpu<float>,
min_gpu<double>
};
template <typename T> CV_Assert(src1.depth() <= CV_64F);
void max_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
{
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
dst.create(src1.size(), src1.type());
::cv::gpu::device::max_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
}
template <typename T> if (src1.depth() == CV_64F)
void max_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
{ {
dst.create(src1.size(), src1.type()); if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
::cv::gpu::device::max_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream); CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
} }
dst.create(src1.size(), src1.type());
funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), StreamAccessor::getStream(stream));
} }
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); using namespace cv::gpu::device;
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
min_caller<unsigned char>, min_caller<signed char>, min_caller<unsigned short>, min_caller<short>, min_caller<int>, max_gpu<unsigned char>,
min_caller<float>, min_caller<double> max_gpu<signed char>,
max_gpu<unsigned short>,
max_gpu<short>,
max_gpu<int>,
max_gpu<float>,
max_gpu<double>
}; };
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
if (src1.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src1.size(), src1.type());
funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), StreamAccessor::getStream(stream));
} }
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
namespace
{ {
CV_Assert((src1.depth() != CV_64F) || template <typename T> void minScalar(const DevMem2Db src, double val, DevMem2Db dst, cudaStream_t stream)
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); {
cv::gpu::device::min_gpu(src, saturate_cast<T>(val), dst, stream);
}
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream); template <typename T> void maxScalar(const DevMem2Db src, double val, DevMem2Db dst, cudaStream_t stream)
static const func_t funcs[] =
{ {
min_caller<unsigned char>, min_caller<signed char>, min_caller<unsigned short>, min_caller<short>, min_caller<int>, cv::gpu::device::max_gpu(src, saturate_cast<T>(val), dst, stream);
min_caller<float>, min_caller<double> }
};
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
} }
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); typedef void (*func_t)(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
max_caller<unsigned char>, max_caller<signed char>, max_caller<unsigned short>, max_caller<short>, max_caller<int>, minScalar<unsigned char>,
max_caller<float>, max_caller<double> minScalar<signed char>,
minScalar<unsigned short>,
minScalar<short>,
minScalar<int>,
minScalar<float>,
minScalar<double>
}; };
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
CV_Assert(src.depth() <= CV_64F);
CV_Assert(src.channels() == 1);
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), src.type());
funcs[src.depth()](src, val, dst, StreamAccessor::getStream(stream));
} }
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream) void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
{ {
CV_Assert((src1.depth() != CV_64F) || typedef void (*func_t)(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
max_caller<unsigned char>, max_caller<signed char>, max_caller<unsigned short>, max_caller<short>, max_caller<int>, maxScalar<unsigned char>,
max_caller<float>, max_caller<double> maxScalar<signed char>,
maxScalar<unsigned short>,
maxScalar<short>,
maxScalar<int>,
maxScalar<float>,
maxScalar<double>
}; };
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
CV_Assert(src.depth() <= CV_64F);
CV_Assert(src.channels() == 1);
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), src.type());
funcs[src.depth()](src, val, dst, StreamAccessor::getStream(stream));
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
...@@ -1947,6 +1991,12 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double ...@@ -1947,6 +1991,12 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
CV_Assert(src.channels() == 1 && src.depth() <= CV_64F); CV_Assert(src.channels() == 1 && src.depth() <= CV_64F);
CV_Assert(type <= THRESH_TOZERO_INV); CV_Assert(type <= THRESH_TOZERO_INV);
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
...@@ -1967,9 +2017,8 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double ...@@ -1967,9 +2017,8 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
} }
else else
{ {
typedef void (*caller_t)(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, cudaStream_t stream);
static const func_t funcs[] =
static const caller_t callers[] =
{ {
threshold_caller<unsigned char>, threshold_caller<signed char>, threshold_caller<unsigned char>, threshold_caller<signed char>,
threshold_caller<unsigned short>, threshold_caller<short>, threshold_caller<unsigned short>, threshold_caller<short>,
...@@ -1982,7 +2031,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double ...@@ -1982,7 +2031,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
maxVal = cvRound(maxVal); maxVal = cvRound(maxVal);
} }
callers[src.depth()](src, dst, thresh, maxVal, type, stream); funcs[src.depth()](src, dst, thresh, maxVal, type, stream);
} }
return thresh; return thresh;
...@@ -1993,8 +2042,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double ...@@ -1993,8 +2042,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
template<typename T> template<typename T> void pow_caller(DevMem2Db src, double power, DevMem2Db dst, cudaStream_t stream);
void pow_caller(DevMem2Db src, double power, DevMem2Db dst, cudaStream_t stream);
}}} }}}
void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
...@@ -2002,7 +2050,6 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) ...@@ -2002,7 +2050,6 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
using namespace cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(DevMem2Db src, double power, DevMem2Db dst, cudaStream_t stream); typedef void (*func_t)(DevMem2Db src, double power, DevMem2Db dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
pow_caller<unsigned char>, pow_caller<signed char>, pow_caller<unsigned char>, pow_caller<signed char>,
...@@ -2010,6 +2057,14 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) ...@@ -2010,6 +2057,14 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
pow_caller<int>, pow_caller<float>, pow_caller<double> pow_caller<int>, pow_caller<float>, pow_caller<double>
}; };
CV_Assert(src.depth() <= CV_64F);
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
funcs[src.depth()](src.reshape(1), power, dst.reshape(1), StreamAccessor::getStream(stream)); funcs[src.depth()](src.reshape(1), power, dst.reshape(1), StreamAccessor::getStream(stream));
...@@ -2075,8 +2130,7 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int ...@@ -2075,8 +2130,7 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int
NppAlphaComp<CV_16U, nppiAlphaComp_16u_AC4R>::call, NppAlphaComp<CV_16U, nppiAlphaComp_16u_AC4R>::call,
0, 0,
NppAlphaComp<CV_32S, nppiAlphaComp_32s_AC4R>::call, NppAlphaComp<CV_32S, nppiAlphaComp_32s_AC4R>::call,
NppAlphaComp<CV_32F, nppiAlphaComp_32f_AC4R>::call, NppAlphaComp<CV_32F, nppiAlphaComp_32f_AC4R>::call
0
}; };
CV_Assert(img1.type() == CV_8UC4 || img1.type() == CV_16UC4 || img1.type() == CV_32SC4 || img1.type() == CV_32FC4); CV_Assert(img1.type() == CV_8UC4 || img1.type() == CV_16UC4 || img1.type() == CV_32SC4 || img1.type() == CV_32FC4);
...@@ -2085,7 +2139,6 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int ...@@ -2085,7 +2139,6 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int
dst.create(img1.size(), img1.type()); dst.create(img1.size(), img1.type());
const func_t func = funcs[img1.depth()]; const func_t func = funcs[img1.depth()];
CV_Assert(func != 0);
func(img1, img2, dst, npp_alpha_ops[alpha_op], StreamAccessor::getStream(stream)); func(img1, img2, dst, npp_alpha_ops[alpha_op], StreamAccessor::getStream(stream));
} }
...@@ -2569,6 +2622,14 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, ...@@ -2569,6 +2622,14 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2,
dtype = dtype >= 0 ? CV_MAKETYPE(dtype, src1.channels()) : src1.type(); dtype = dtype >= 0 ? CV_MAKETYPE(dtype, src1.channels()) : src1.type();
CV_Assert(src1.depth() <= CV_64F && src2.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);
if (src1.depth() == CV_64F || src2.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src1.size(), dtype); dst.create(src1.size(), dtype);
const GpuMat* psrc1 = &src1; const GpuMat* psrc1 = &src1;
...@@ -2581,7 +2642,9 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, ...@@ -2581,7 +2642,9 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2,
} }
const func_t func = funcs[psrc1->depth()][psrc2->depth()][dst.depth()]; const func_t func = funcs[psrc1->depth()][psrc2->depth()][dst.depth()];
CV_Assert(func != 0);
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(psrc1->reshape(1), alpha, psrc2->reshape(1), beta, gamma, dst.reshape(1), StreamAccessor::getStream(stream)); func(psrc1->reshape(1), alpha, psrc2->reshape(1), beta, gamma, dst.reshape(1), StreamAccessor::getStream(stream));
} }
......
...@@ -132,7 +132,7 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat ...@@ -132,7 +132,7 @@ void cv::gpu::meanStdDev(const GpuMat& src, Scalar& mean, Scalar& stddev, GpuMat
nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) ); nppSafeCall( nppiMean_StdDev_8u_C1R(src.ptr<Npp8u>(), static_cast<int>(src.step), sz, buf.ptr<Npp8u>(), dbuf, (double*)dbuf + 1) );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
double* ptrs[2] = {mean.val, stddev.val}; double* ptrs[2] = {mean.val, stddev.val};
dbuf.download(ptrs); dbuf.download(ptrs);
} }
...@@ -148,6 +148,8 @@ double cv::gpu::norm(const GpuMat& src, int normType) ...@@ -148,6 +148,8 @@ double cv::gpu::norm(const GpuMat& src, int normType)
double cv::gpu::norm(const GpuMat& src, int normType, GpuMat& buf) double cv::gpu::norm(const GpuMat& src, int normType, GpuMat& buf)
{ {
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
GpuMat src_single_channel = src.reshape(1); GpuMat src_single_channel = src.reshape(1);
if (normType == NORM_L1) if (normType == NORM_L1)
...@@ -156,22 +158,16 @@ double cv::gpu::norm(const GpuMat& src, int normType, GpuMat& buf) ...@@ -156,22 +158,16 @@ double cv::gpu::norm(const GpuMat& src, int normType, GpuMat& buf)
if (normType == NORM_L2) if (normType == NORM_L2)
return std::sqrt(sqrSum(src_single_channel, buf)[0]); return std::sqrt(sqrSum(src_single_channel, buf)[0]);
if (normType == NORM_INF) // NORM_INF
{ double min_val, max_val;
double min_val, max_val; minMax(src_single_channel, &min_val, &max_val, GpuMat(), buf);
minMax(src_single_channel, &min_val, &max_val, GpuMat(), buf); return std::max(std::abs(min_val), std::abs(max_val));
return std::max(std::abs(min_val), std::abs(max_val));
}
CV_Error(CV_StsBadArg, "norm: unsupported norm type");
return 0;
} }
double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
{ {
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(src1.type() == CV_8UC1); CV_Assert(src1.type() == CV_8UC1);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
...@@ -184,7 +180,7 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) ...@@ -184,7 +180,7 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
sz.height = src1.rows; sz.height = src1.rows;
int funcIdx = normType >> 1; int funcIdx = normType >> 1;
double retVal; double retVal;
DeviceBuffer dbuf; DeviceBuffer dbuf;
...@@ -192,7 +188,7 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) ...@@ -192,7 +188,7 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) ); nppSafeCall( npp_norm_diff_func[funcIdx](src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step), sz, dbuf) );
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
dbuf.download(&retVal); dbuf.download(&retVal);
return retVal; return retVal;
...@@ -201,9 +197,9 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) ...@@ -201,9 +197,9 @@ double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Sum // Sum
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
namespace matrix_reductions namespace matrix_reductions
{ {
namespace sum namespace sum
{ {
...@@ -230,34 +226,36 @@ namespace cv { namespace gpu { namespace device ...@@ -230,34 +226,36 @@ namespace cv { namespace gpu { namespace device
} }
}}} }}}
Scalar cv::gpu::sum(const GpuMat& src) Scalar cv::gpu::sum(const GpuMat& src)
{ {
GpuMat buf; GpuMat buf;
return sum(src, buf); return sum(src, buf);
} }
Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
{ {
using namespace ::cv::gpu::device::matrix_reductions::sum; using namespace cv::gpu::device::matrix_reductions::sum;
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int); typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
static Caller multipass_callers[7] = static Caller multipass_callers[] =
{ {
sumMultipassCaller<unsigned char>, sumMultipassCaller<char>, sumMultipassCaller<unsigned char>, sumMultipassCaller<char>,
sumMultipassCaller<unsigned short>, sumMultipassCaller<short>, sumMultipassCaller<unsigned short>, sumMultipassCaller<short>,
sumMultipassCaller<int>, sumMultipassCaller<float>, 0 sumMultipassCaller<int>, sumMultipassCaller<float>
}; };
static Caller singlepass_callers[7] = { static Caller singlepass_callers[] = {
sumCaller<unsigned char>, sumCaller<char>, sumCaller<unsigned char>, sumCaller<char>,
sumCaller<unsigned short>, sumCaller<short>, sumCaller<unsigned short>, sumCaller<short>,
sumCaller<int>, sumCaller<float>, 0 sumCaller<int>, sumCaller<float>
}; };
CV_Assert(src.depth() <= CV_32F);
Size buf_size; Size buf_size;
getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height); getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height);
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
Caller* callers = multipass_callers; Caller* callers = multipass_callers;
...@@ -265,7 +263,6 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) ...@@ -265,7 +263,6 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.depth()]; Caller caller = callers[src.depth()];
if (!caller) CV_Error(CV_StsBadArg, "sum: unsupported type");
double result[4]; double result[4];
caller(src, buf, result, src.channels()); caller(src, buf, result, src.channels());
...@@ -273,35 +270,37 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) ...@@ -273,35 +270,37 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
} }
Scalar cv::gpu::absSum(const GpuMat& src) Scalar cv::gpu::absSum(const GpuMat& src)
{ {
GpuMat buf; GpuMat buf;
return absSum(src, buf); return absSum(src, buf);
} }
Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
{ {
using namespace ::cv::gpu::device::matrix_reductions::sum; using namespace cv::gpu::device::matrix_reductions::sum;
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int); typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
static Caller multipass_callers[7] = static Caller multipass_callers[] =
{ {
absSumMultipassCaller<unsigned char>, absSumMultipassCaller<char>, absSumMultipassCaller<unsigned char>, absSumMultipassCaller<char>,
absSumMultipassCaller<unsigned short>, absSumMultipassCaller<short>, absSumMultipassCaller<unsigned short>, absSumMultipassCaller<short>,
absSumMultipassCaller<int>, absSumMultipassCaller<float>, 0 absSumMultipassCaller<int>, absSumMultipassCaller<float>
}; };
static Caller singlepass_callers[7] = static Caller singlepass_callers[] =
{ {
absSumCaller<unsigned char>, absSumCaller<char>, absSumCaller<unsigned char>, absSumCaller<char>,
absSumCaller<unsigned short>, absSumCaller<short>, absSumCaller<unsigned short>, absSumCaller<short>,
absSumCaller<int>, absSumCaller<float>, 0 absSumCaller<int>, absSumCaller<float>
}; };
CV_Assert(src.depth() <= CV_32F);
Size buf_size; Size buf_size;
getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height); getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height);
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
Caller* callers = multipass_callers; Caller* callers = multipass_callers;
...@@ -309,7 +308,6 @@ Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf) ...@@ -309,7 +308,6 @@ Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.depth()]; Caller caller = callers[src.depth()];
if (!caller) CV_Error(CV_StsBadArg, "absSum: unsupported type");
double result[4]; double result[4];
caller(src, buf, result, src.channels()); caller(src, buf, result, src.channels());
...@@ -317,43 +315,44 @@ Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf) ...@@ -317,43 +315,44 @@ Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
} }
Scalar cv::gpu::sqrSum(const GpuMat& src) Scalar cv::gpu::sqrSum(const GpuMat& src)
{ {
GpuMat buf; GpuMat buf;
return sqrSum(src, buf); return sqrSum(src, buf);
} }
Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
{ {
using namespace ::cv::gpu::device::matrix_reductions::sum; using namespace cv::gpu::device::matrix_reductions::sum;
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int); typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
static Caller multipass_callers[7] = static Caller multipass_callers[] =
{ {
sqrSumMultipassCaller<unsigned char>, sqrSumMultipassCaller<char>, sqrSumMultipassCaller<unsigned char>, sqrSumMultipassCaller<char>,
sqrSumMultipassCaller<unsigned short>, sqrSumMultipassCaller<short>, sqrSumMultipassCaller<unsigned short>, sqrSumMultipassCaller<short>,
sqrSumMultipassCaller<int>, sqrSumMultipassCaller<float>, 0 sqrSumMultipassCaller<int>, sqrSumMultipassCaller<float>
}; };
static Caller singlepass_callers[7] = static Caller singlepass_callers[7] =
{ {
sqrSumCaller<unsigned char>, sqrSumCaller<char>, sqrSumCaller<unsigned char>, sqrSumCaller<char>,
sqrSumCaller<unsigned short>, sqrSumCaller<short>, sqrSumCaller<unsigned short>, sqrSumCaller<short>,
sqrSumCaller<int>, sqrSumCaller<float>, 0 sqrSumCaller<int>, sqrSumCaller<float>
}; };
CV_Assert(src.depth() <= CV_32F);
Caller* callers = multipass_callers; Caller* callers = multipass_callers;
if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)) if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS))
callers = singlepass_callers; callers = singlepass_callers;
Size buf_size; Size buf_size;
getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height); getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height);
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
Caller caller = callers[src.depth()]; Caller caller = callers[src.depth()];
if (!caller) CV_Error(CV_StsBadArg, "sqrSum: unsupported type");
double result[4]; double result[4];
caller(src, buf, result, src.channels()); caller(src, buf, result, src.channels());
...@@ -363,24 +362,24 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) ...@@ -363,24 +362,24 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Find min or max // Find min or max
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
namespace matrix_reductions namespace matrix_reductions
{ {
namespace minmax namespace minmax
{ {
void getBufSizeRequired(int cols, int rows, int elem_size, int& bufcols, int& bufrows); void getBufSizeRequired(int cols, int rows, int elem_size, int& bufcols, int& bufrows);
template <typename T> template <typename T>
void minMaxCaller(const DevMem2Db src, double* minval, double* maxval, PtrStepb buf); void minMaxCaller(const DevMem2Db src, double* minval, double* maxval, PtrStepb buf);
template <typename T> template <typename T>
void minMaxMaskCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); void minMaxMaskCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
template <typename T> template <typename T>
void minMaxMultipassCaller(const DevMem2Db src, double* minval, double* maxval, PtrStepb buf); void minMaxMultipassCaller(const DevMem2Db src, double* minval, double* maxval, PtrStepb buf);
template <typename T> template <typename T>
void minMaxMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf); void minMaxMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, PtrStepb buf);
} }
} }
...@@ -401,41 +400,47 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ...@@ -401,41 +400,47 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
typedef void (*Caller)(const DevMem2Db, double*, double*, PtrStepb); typedef void (*Caller)(const DevMem2Db, double*, double*, PtrStepb);
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb); typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
static Caller multipass_callers[7] = static Caller multipass_callers[] =
{ {
minMaxMultipassCaller<unsigned char>, minMaxMultipassCaller<char>, minMaxMultipassCaller<unsigned char>, minMaxMultipassCaller<char>,
minMaxMultipassCaller<unsigned short>, minMaxMultipassCaller<short>, minMaxMultipassCaller<unsigned short>, minMaxMultipassCaller<short>,
minMaxMultipassCaller<int>, minMaxMultipassCaller<float>, 0 minMaxMultipassCaller<int>, minMaxMultipassCaller<float>, 0
}; };
static Caller singlepass_callers[7] = static Caller singlepass_callers[] =
{ {
minMaxCaller<unsigned char>, minMaxCaller<char>, minMaxCaller<unsigned char>, minMaxCaller<char>,
minMaxCaller<unsigned short>, minMaxCaller<short>, minMaxCaller<unsigned short>, minMaxCaller<short>,
minMaxCaller<int>, minMaxCaller<float>, minMaxCaller<double> minMaxCaller<int>, minMaxCaller<float>, minMaxCaller<double>
}; };
static MaskedCaller masked_multipass_callers[7] = static MaskedCaller masked_multipass_callers[] =
{ {
minMaxMaskMultipassCaller<unsigned char>, minMaxMaskMultipassCaller<char>, minMaxMaskMultipassCaller<unsigned char>, minMaxMaskMultipassCaller<char>,
minMaxMaskMultipassCaller<unsigned short>, minMaxMaskMultipassCaller<short>, minMaxMaskMultipassCaller<unsigned short>, minMaxMaskMultipassCaller<short>,
minMaxMaskMultipassCaller<int>, minMaxMaskMultipassCaller<float>, 0 minMaxMaskMultipassCaller<int>, minMaxMaskMultipassCaller<float>, 0
}; };
static MaskedCaller masked_singlepass_callers[7] = static MaskedCaller masked_singlepass_callers[] =
{ {
minMaxMaskCaller<unsigned char>, minMaxMaskCaller<char>, minMaxMaskCaller<unsigned char>, minMaxMaskCaller<char>,
minMaxMaskCaller<unsigned short>, minMaxMaskCaller<short>, minMaxMaskCaller<unsigned short>, minMaxMaskCaller<short>,
minMaxMaskCaller<int>, minMaxMaskCaller<float>, minMaxMaskCaller<double> minMaxMaskCaller<int>, minMaxMaskCaller<float>, minMaxMaskCaller<double>
}; };
CV_Assert(src.depth() <= CV_64F);
CV_Assert(src.channels() == 1); CV_Assert(src.channels() == 1);
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size())); CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
double minVal_; if (!minVal) minVal = &minVal_; double minVal_; if (!minVal) minVal = &minVal_;
double maxVal_; if (!maxVal) maxVal = &maxVal_; double maxVal_; if (!maxVal) maxVal = &maxVal_;
Size buf_size; Size buf_size;
getBufSizeRequired(src.cols, src.rows, static_cast<int>(src.elemSize()), buf_size.width, buf_size.height); getBufSizeRequired(src.cols, src.rows, static_cast<int>(src.elemSize()), buf_size.width, buf_size.height);
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
...@@ -447,7 +452,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ...@@ -447,7 +452,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.type()]; Caller caller = callers[src.type()];
if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type"); CV_Assert(caller != 0);
caller(src, minVal, maxVal, buf); caller(src, minVal, maxVal, buf);
} }
else else
...@@ -457,7 +462,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ...@@ -457,7 +462,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
callers = masked_singlepass_callers; callers = masked_singlepass_callers;
MaskedCaller caller = callers[src.type()]; MaskedCaller caller = callers[src.type()];
if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type"); CV_Assert(caller != 0);
caller(src, mask, minVal, maxVal, buf); caller(src, mask, minVal, maxVal, buf);
} }
} }
...@@ -466,36 +471,36 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ...@@ -466,36 +471,36 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
// Locate min and max // Locate min and max
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
namespace matrix_reductions namespace matrix_reductions
{ {
namespace minmaxloc namespace minmaxloc
{ {
void getBufSizeRequired(int cols, int rows, int elem_size, int& b1cols, void getBufSizeRequired(int cols, int rows, int elem_size, int& b1cols,
int& b1rows, int& b2cols, int& b2rows); int& b1rows, int& b2cols, int& b2rows);
template <typename T> template <typename T>
void minMaxLocCaller(const DevMem2Db src, double* minval, double* maxval, void minMaxLocCaller(const DevMem2Db src, double* minval, double* maxval,
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf); int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
template <typename T> template <typename T>
void minMaxLocMaskCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, void minMaxLocMaskCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf); int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
template <typename T> template <typename T>
void minMaxLocMultipassCaller(const DevMem2Db src, double* minval, double* maxval, void minMaxLocMultipassCaller(const DevMem2Db src, double* minval, double* maxval,
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf); int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
template <typename T> template <typename T>
void minMaxLocMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval, void minMaxLocMaskMultipassCaller(const DevMem2Db src, const PtrStepb mask, double* minval, double* maxval,
int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf); int minloc[2], int maxloc[2], PtrStepb valBuf, PtrStepb locBuf);
} }
} }
}}} }}}
void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask) void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, const GpuMat& mask)
{ {
GpuMat valBuf, locBuf; GpuMat valBuf, locBuf;
minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf); minMaxLoc(src, minVal, maxVal, minLoc, maxLoc, mask, valBuf, locBuf);
} }
...@@ -508,45 +513,51 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ...@@ -508,45 +513,51 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
typedef void (*Caller)(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb); typedef void (*Caller)(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb); typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
static Caller multipass_callers[7] = static Caller multipass_callers[] =
{ {
minMaxLocMultipassCaller<unsigned char>, minMaxLocMultipassCaller<char>, minMaxLocMultipassCaller<unsigned char>, minMaxLocMultipassCaller<char>,
minMaxLocMultipassCaller<unsigned short>, minMaxLocMultipassCaller<short>, minMaxLocMultipassCaller<unsigned short>, minMaxLocMultipassCaller<short>,
minMaxLocMultipassCaller<int>, minMaxLocMultipassCaller<float>, 0 minMaxLocMultipassCaller<int>, minMaxLocMultipassCaller<float>, 0
}; };
static Caller singlepass_callers[7] = static Caller singlepass_callers[] =
{ {
minMaxLocCaller<unsigned char>, minMaxLocCaller<char>, minMaxLocCaller<unsigned char>, minMaxLocCaller<char>,
minMaxLocCaller<unsigned short>, minMaxLocCaller<short>, minMaxLocCaller<unsigned short>, minMaxLocCaller<short>,
minMaxLocCaller<int>, minMaxLocCaller<float>, minMaxLocCaller<double> minMaxLocCaller<int>, minMaxLocCaller<float>, minMaxLocCaller<double>
}; };
static MaskedCaller masked_multipass_callers[7] = static MaskedCaller masked_multipass_callers[] =
{ {
minMaxLocMaskMultipassCaller<unsigned char>, minMaxLocMaskMultipassCaller<char>, minMaxLocMaskMultipassCaller<unsigned char>, minMaxLocMaskMultipassCaller<char>,
minMaxLocMaskMultipassCaller<unsigned short>, minMaxLocMaskMultipassCaller<short>, minMaxLocMaskMultipassCaller<unsigned short>, minMaxLocMaskMultipassCaller<short>,
minMaxLocMaskMultipassCaller<int>, minMaxLocMaskMultipassCaller<float>, 0 minMaxLocMaskMultipassCaller<int>, minMaxLocMaskMultipassCaller<float>, 0
}; };
static MaskedCaller masked_singlepass_callers[7] = static MaskedCaller masked_singlepass_callers[] =
{ {
minMaxLocMaskCaller<unsigned char>, minMaxLocMaskCaller<char>, minMaxLocMaskCaller<unsigned char>, minMaxLocMaskCaller<char>,
minMaxLocMaskCaller<unsigned short>, minMaxLocMaskCaller<short>, minMaxLocMaskCaller<unsigned short>, minMaxLocMaskCaller<short>,
minMaxLocMaskCaller<int>, minMaxLocMaskCaller<float>, minMaxLocMaskCaller<double> minMaxLocMaskCaller<int>, minMaxLocMaskCaller<float>, minMaxLocMaskCaller<double>
}; };
CV_Assert(src.depth() <= CV_64F);
CV_Assert(src.channels() == 1); CV_Assert(src.channels() == 1);
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size())); CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
double minVal_; if (!minVal) minVal = &minVal_; double minVal_; if (!minVal) minVal = &minVal_;
double maxVal_; if (!maxVal) maxVal = &maxVal_; double maxVal_; if (!maxVal) maxVal = &maxVal_;
int minLoc_[2]; int minLoc_[2];
int maxLoc_[2]; int maxLoc_[2];
Size valbuf_size, locbuf_size; Size valbuf_size, locbuf_size;
getBufSizeRequired(src.cols, src.rows, static_cast<int>(src.elemSize()), valbuf_size.width, getBufSizeRequired(src.cols, src.rows, static_cast<int>(src.elemSize()), valbuf_size.width,
valbuf_size.height, locbuf_size.width, locbuf_size.height); valbuf_size.height, locbuf_size.width, locbuf_size.height);
ensureSizeIsEnough(valbuf_size, CV_8U, valBuf); ensureSizeIsEnough(valbuf_size, CV_8U, valBuf);
ensureSizeIsEnough(locbuf_size, CV_8U, locBuf); ensureSizeIsEnough(locbuf_size, CV_8U, locBuf);
...@@ -558,7 +569,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ...@@ -558,7 +569,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.type()]; Caller caller = callers[src.type()];
if (!caller) CV_Error(CV_StsBadArg, "minMaxLoc: unsupported type"); CV_Assert(caller != 0);
caller(src, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf); caller(src, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf);
} }
else else
...@@ -568,7 +579,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ...@@ -568,7 +579,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
callers = masked_singlepass_callers; callers = masked_singlepass_callers;
MaskedCaller caller = callers[src.type()]; MaskedCaller caller = callers[src.type()];
if (!caller) CV_Error(CV_StsBadArg, "minMaxLoc: unsupported type"); CV_Assert(caller != 0);
caller(src, mask, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf); caller(src, mask, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf);
} }
...@@ -579,18 +590,18 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ...@@ -579,18 +590,18 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// Count non-zero elements // Count non-zero elements
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
namespace matrix_reductions namespace matrix_reductions
{ {
namespace countnonzero namespace countnonzero
{ {
void getBufSizeRequired(int cols, int rows, int& bufcols, int& bufrows); void getBufSizeRequired(int cols, int rows, int& bufcols, int& bufrows);
template <typename T> template <typename T>
int countNonZeroCaller(const DevMem2Db src, PtrStepb buf); int countNonZeroCaller(const DevMem2Db src, PtrStepb buf);
template <typename T> template <typename T>
int countNonZeroMultipassCaller(const DevMem2Db src, PtrStepb buf); int countNonZeroMultipassCaller(const DevMem2Db src, PtrStepb buf);
} }
} }
...@@ -609,21 +620,28 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) ...@@ -609,21 +620,28 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
typedef int (*Caller)(const DevMem2Db src, PtrStepb buf); typedef int (*Caller)(const DevMem2Db src, PtrStepb buf);
static Caller multipass_callers[7] = static Caller multipass_callers[7] =
{ {
countNonZeroMultipassCaller<unsigned char>, countNonZeroMultipassCaller<char>, countNonZeroMultipassCaller<unsigned char>, countNonZeroMultipassCaller<char>,
countNonZeroMultipassCaller<unsigned short>, countNonZeroMultipassCaller<short>, countNonZeroMultipassCaller<unsigned short>, countNonZeroMultipassCaller<short>,
countNonZeroMultipassCaller<int>, countNonZeroMultipassCaller<float>, 0 countNonZeroMultipassCaller<int>, countNonZeroMultipassCaller<float>, 0
}; };
static Caller singlepass_callers[7] = static Caller singlepass_callers[7] =
{ {
countNonZeroCaller<unsigned char>, countNonZeroCaller<char>, countNonZeroCaller<unsigned char>, countNonZeroCaller<char>,
countNonZeroCaller<unsigned short>, countNonZeroCaller<short>, countNonZeroCaller<unsigned short>, countNonZeroCaller<short>,
countNonZeroCaller<int>, countNonZeroCaller<float>, countNonZeroCaller<double> }; countNonZeroCaller<int>, countNonZeroCaller<float>, countNonZeroCaller<double> };
CV_Assert(src.depth() <= CV_64F);
CV_Assert(src.channels() == 1); CV_Assert(src.channels() == 1);
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
Size buf_size; Size buf_size;
getBufSizeRequired(src.cols, src.rows, buf_size.width, buf_size.height); getBufSizeRequired(src.cols, src.rows, buf_size.width, buf_size.height);
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
...@@ -633,16 +651,16 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) ...@@ -633,16 +651,16 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.type()]; Caller caller = callers[src.type()];
if (!caller) CV_Error(CV_StsBadArg, "countNonZero: unsupported type"); CV_Assert(caller != 0);
return caller(src, buf); return caller(src, buf);
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// reduce // reduce
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
namespace matrix_reductions namespace matrix_reductions
{ {
template <typename T, typename S, typename D> void reduceRows_gpu(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream); template <typename T, typename S, typename D> void reduceRows_gpu(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
template <typename T, typename S, typename D> void reduceCols_gpu(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream); template <typename T, typename S, typename D> void reduceCols_gpu(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
...@@ -666,7 +684,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int ...@@ -666,7 +684,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
{ {
typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream); typedef void (*caller_t)(const DevMem2Db& src, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
static const caller_t callers[6][6] = static const caller_t callers[6][6] =
{ {
{ {
reduceRows_gpu<unsigned char, int, unsigned char>, reduceRows_gpu<unsigned char, int, unsigned char>,
...@@ -719,6 +737,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int ...@@ -719,6 +737,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
}; };
const caller_t func = callers[src.depth()][dst.depth()]; const caller_t func = callers[src.depth()][dst.depth()];
if (!func) if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of input and output array formats"); CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of input and output array formats");
...@@ -728,7 +747,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int ...@@ -728,7 +747,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
{ {
typedef void (*caller_t)(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream); typedef void (*caller_t)(const DevMem2Db& src, int cn, const DevMem2Db& dst, int reduceOp, cudaStream_t stream);
static const caller_t callers[6][6] = static const caller_t callers[6][6] =
{ {
{ {
reduceCols_gpu<unsigned char, int, unsigned char>, reduceCols_gpu<unsigned char, int, unsigned char>,
...@@ -781,10 +800,11 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int ...@@ -781,10 +800,11 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
}; };
const caller_t func = callers[src.depth()][dst.depth()]; const caller_t func = callers[src.depth()][dst.depth()];
if (!func) if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of input and output array formats"); CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of input and output array formats");
func(src, src.channels(), dst, reduceOp, StreamAccessor::getStream(stream)); func(src, src.channels(), dst, reduceOp, StreamAccessor::getStream(stream));
} }
} }
......
...@@ -74,7 +74,7 @@ ...@@ -74,7 +74,7 @@
#include "cuda.h" #include "cuda.h"
#include "cuda_runtime_api.h" #include "cuda_runtime_api.h"
#include "npp.h" #include "npp.h"
#ifdef HAVE_CUFFT #ifdef HAVE_CUFFT
#include "cufft.h" #include "cufft.h"
#endif #endif
...@@ -85,7 +85,7 @@ ...@@ -85,7 +85,7 @@
#include "internal_shared.hpp" #include "internal_shared.hpp"
#include "opencv2/gpu/stream_accessor.hpp" #include "opencv2/gpu/stream_accessor.hpp"
#include "nvidia/core/NCV.hpp" #include "nvidia/core/NCV.hpp"
#include "nvidia/NPP_staging/NPP_staging.hpp" #include "nvidia/NPP_staging/NPP_staging.hpp"
#include "nvidia/NCVHaarObjectDetection.hpp" #include "nvidia/NCVHaarObjectDetection.hpp"
...@@ -106,7 +106,7 @@ ...@@ -106,7 +106,7 @@
#error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0" #error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
#endif #endif
static inline void throw_nogpu() { CV_Error(CV_GpuNotSupported, "The called functionality is disabled for current build or platform"); } static inline void throw_nogpu() { CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); }
#else /* defined(HAVE_CUDA) */ #else /* defined(HAVE_CUDA) */
......
...@@ -995,13 +995,28 @@ TEST_P(AbsDiff, Array) ...@@ -995,13 +995,28 @@ TEST_P(AbsDiff, Array)
cv::Mat src1 = randomMat(size, depth); cv::Mat src1 = randomMat(size, depth);
cv::Mat src2 = randomMat(size, depth); cv::Mat src2 = randomMat(size, depth);
cv::gpu::GpuMat dst = createMat(size, depth, useRoi); if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::absdiff(loadMat(src1, useRoi), loadMat(src2, useRoi), dst); {
try
{
cv::gpu::GpuMat dst;
cv::gpu::absdiff(loadMat(src1), loadMat(src2), dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::absdiff(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::absdiff(src1, src2, dst_gold); cv::absdiff(src1, src2, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0); EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
} }
TEST_P(AbsDiff, Scalar) TEST_P(AbsDiff, Scalar)
...@@ -1009,13 +1024,28 @@ TEST_P(AbsDiff, Scalar) ...@@ -1009,13 +1024,28 @@ TEST_P(AbsDiff, Scalar)
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
cv::Scalar val = randomScalar(0.0, 255.0); cv::Scalar val = randomScalar(0.0, 255.0);
cv::gpu::GpuMat dst = createMat(size, depth, useRoi); if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::absdiff(loadMat(src, useRoi), val, dst); {
try
{
cv::gpu::GpuMat dst;
cv::gpu::absdiff(loadMat(src), val, dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::absdiff(loadMat(src, useRoi), val, dst);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::absdiff(src, val, dst_gold); cv::absdiff(src, val, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, depth <= CV_32F ? 1.0 : 1e-5); EXPECT_MAT_NEAR(dst_gold, dst, depth <= CV_32F ? 1.0 : 1e-5);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, AbsDiff, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, AbsDiff, testing::Combine(
...@@ -1243,6 +1273,40 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine( ...@@ -1243,6 +1273,40 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine(
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Exp // Exp
template <typename T> void expImpl(const cv::Mat& src, cv::Mat& dst)
{
dst.create(src.size(), src.type());
for (int y = 0; y < src.rows; ++y)
{
for (int x = 0; x < src.cols; ++x)
dst.at<T>(y, x) = cv::saturate_cast<T>(static_cast<int>(std::exp(static_cast<float>(src.at<T>(y, x)))));
}
}
void expImpl_float(const cv::Mat& src, cv::Mat& dst)
{
dst.create(src.size(), src.type());
for (int y = 0; y < src.rows; ++y)
{
for (int x = 0; x < src.cols; ++x)
dst.at<float>(y, x) = std::exp(static_cast<float>(src.at<float>(y, x)));
}
}
void expGold(const cv::Mat& src, cv::Mat& dst)
{
typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst);
const func_t funcs[] =
{
expImpl<uchar>, expImpl<schar>, expImpl<ushort>, expImpl<short>,
expImpl<int>, expImpl_float
};
funcs[src.depth()](src, dst);
}
PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
...@@ -1269,7 +1333,7 @@ TEST_P(Exp, Accuracy) ...@@ -1269,7 +1333,7 @@ TEST_P(Exp, Accuracy)
cv::gpu::exp(loadMat(src, useRoi), dst); cv::gpu::exp(loadMat(src, useRoi), dst);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::exp(src, dst_gold); expGold(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-2); EXPECT_MAT_NEAR(dst_gold, dst, 1e-2);
} }
...@@ -1277,7 +1341,10 @@ TEST_P(Exp, Accuracy) ...@@ -1277,7 +1341,10 @@ TEST_P(Exp, Accuracy)
INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine(
ALL_DEVICES, ALL_DEVICES,
DIFFERENT_SIZES, DIFFERENT_SIZES,
testing::Values(MatType(CV_32FC1)), testing::Values(MatType(CV_8UC1),
MatType(CV_16UC1),
MatType(CV_16SC1),
MatType(CV_32FC1)),
WHOLE_SUBMAT)); WHOLE_SUBMAT));
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -1311,13 +1378,28 @@ TEST_P(Compare, Accuracy) ...@@ -1311,13 +1378,28 @@ TEST_P(Compare, Accuracy)
cv::Mat src1 = randomMat(size, depth); cv::Mat src1 = randomMat(size, depth);
cv::Mat src2 = randomMat(size, depth); cv::Mat src2 = randomMat(size, depth);
cv::gpu::GpuMat dst = createMat(size, CV_8UC1, useRoi); if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::compare(loadMat(src1, useRoi), loadMat(src2, useRoi), dst, cmp_code); {
try
{
cv::gpu::GpuMat dst;
cv::gpu::compare(loadMat(src1), loadMat(src2), dst, cmp_code);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, CV_8UC1, useRoi);
cv::gpu::compare(loadMat(src1, useRoi), loadMat(src2, useRoi), dst, cmp_code);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::compare(src1, src2, dst_gold, cmp_code); cv::compare(src1, src2, dst_gold, cmp_code);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0); EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Compare, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Compare, testing::Combine(
...@@ -1635,17 +1717,60 @@ PARAM_TEST_CASE(Min, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi) ...@@ -1635,17 +1717,60 @@ PARAM_TEST_CASE(Min, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
} }
}; };
TEST_P(Min, Accuracy) TEST_P(Min, Array)
{ {
cv::Mat src1 = randomMat(size, depth); cv::Mat src1 = randomMat(size, depth);
cv::Mat src2 = randomMat(size, depth); cv::Mat src2 = randomMat(size, depth);
cv::gpu::GpuMat dst = createMat(size, depth, useRoi); if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::min(loadMat(src1, useRoi), loadMat(src2, useRoi), dst); {
try
{
cv::gpu::GpuMat dst;
cv::gpu::min(loadMat(src1), loadMat(src2), dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::min(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
cv::Mat dst_gold = cv::min(src1, src2); cv::Mat dst_gold = cv::min(src1, src2);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0); EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
}
TEST_P(Min, Scalar)
{
cv::Mat src = randomMat(size, depth);
double val = randomDouble(0.0, 255.0);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::min(loadMat(src), val, dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::min(loadMat(src, useRoi), val, dst);
cv::Mat dst_gold = cv::min(src, val);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Min, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Min, testing::Combine(
...@@ -1675,17 +1800,60 @@ PARAM_TEST_CASE(Max, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi) ...@@ -1675,17 +1800,60 @@ PARAM_TEST_CASE(Max, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
} }
}; };
TEST_P(Max, Accuracy) TEST_P(Max, Array)
{ {
cv::Mat src1 = randomMat(size, depth); cv::Mat src1 = randomMat(size, depth);
cv::Mat src2 = randomMat(size, depth); cv::Mat src2 = randomMat(size, depth);
cv::gpu::GpuMat dst = createMat(size, depth, useRoi); if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::max(loadMat(src1, useRoi), loadMat(src2, useRoi), dst); {
try
{
cv::gpu::GpuMat dst;
cv::gpu::max(loadMat(src1), loadMat(src2), dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::max(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
cv::Mat dst_gold = cv::max(src1, src2);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
}
cv::Mat dst_gold = cv::max(src1, src2); TEST_P(Max, Scalar)
{
cv::Mat src = randomMat(size, depth);
double val = randomDouble(0.0, 255.0);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0); if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::max(loadMat(src), val, dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::max(loadMat(src, useRoi), val, dst);
cv::Mat dst_gold = cv::max(src, val);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Max, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Max, testing::Combine(
...@@ -1723,13 +1891,28 @@ TEST_P(Pow, Accuracy) ...@@ -1723,13 +1891,28 @@ TEST_P(Pow, Accuracy)
if (src.depth() < CV_32F) if (src.depth() < CV_32F)
power = static_cast<int>(power); power = static_cast<int>(power);
cv::gpu::GpuMat dst = createMat(size, depth, useRoi); if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::pow(loadMat(src, useRoi), power, dst); {
try
{
cv::gpu::GpuMat dst;
cv::gpu::pow(loadMat(src), power, dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::pow(loadMat(src, useRoi), power, dst);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::pow(src, power, dst_gold); cv::pow(src, power, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 0.0 : 1e-1); EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 0.0 : 1e-1);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Pow, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Pow, testing::Combine(
...@@ -1750,7 +1933,6 @@ PARAM_TEST_CASE(AddWeighted, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth, ...@@ -1750,7 +1933,6 @@ PARAM_TEST_CASE(AddWeighted, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth,
int dst_depth; int dst_depth;
bool useRoi; bool useRoi;
virtual void SetUp() virtual void SetUp()
{ {
devInfo = GET_PARAM(0); devInfo = GET_PARAM(0);
...@@ -1772,13 +1954,28 @@ TEST_P(AddWeighted, Accuracy) ...@@ -1772,13 +1954,28 @@ TEST_P(AddWeighted, Accuracy)
double beta = randomDouble(-10.0, 10.0); double beta = randomDouble(-10.0, 10.0);
double gamma = randomDouble(-10.0, 10.0); double gamma = randomDouble(-10.0, 10.0);
cv::gpu::GpuMat dst = createMat(size, dst_depth, useRoi); if ((depth1 == CV_64F || depth2 == CV_64F || dst_depth == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::addWeighted(loadMat(src1, useRoi), alpha, loadMat(src2, useRoi), beta, gamma, dst, dst_depth); {
try
{
cv::gpu::GpuMat dst;
cv::gpu::addWeighted(loadMat(src1), alpha, loadMat(src2), beta, gamma, dst, dst_depth);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, dst_depth, useRoi);
cv::gpu::addWeighted(loadMat(src1, useRoi), alpha, loadMat(src2, useRoi), beta, gamma, dst, dst_depth);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::addWeighted(src1, alpha, src2, beta, gamma, dst_gold, dst_depth); cv::addWeighted(src1, alpha, src2, beta, gamma, dst_gold, dst_depth);
EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 1.0 : 1e-12); EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 1.0 : 1e-12);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, AddWeighted, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, AddWeighted, testing::Combine(
...@@ -1823,13 +2020,52 @@ TEST_P(GEMM, Accuracy) ...@@ -1823,13 +2020,52 @@ TEST_P(GEMM, Accuracy)
double alpha = randomDouble(-10.0, 10.0); double alpha = randomDouble(-10.0, 10.0);
double beta = randomDouble(-10.0, 10.0); double beta = randomDouble(-10.0, 10.0);
cv::gpu::GpuMat dst = createMat(size, type, useRoi); #ifndef HAVE_CUBLAS
cv::gpu::gemm(loadMat(src1, useRoi), loadMat(src2, useRoi), alpha, loadMat(src3, useRoi), beta, dst, flags); try
{
cv::gpu::GpuMat dst;
cv::gpu::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
#else
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else if (type == CV_64FC2 && flags != 0)
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, type, useRoi);
cv::gpu::gemm(loadMat(src1, useRoi), loadMat(src2, useRoi), alpha, loadMat(src3, useRoi), beta, dst, flags);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::gemm(src1, src2, alpha, src3, beta, dst_gold, flags); cv::gemm(src1, src2, alpha, src3, beta, dst_gold, flags);
EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1e-10); EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1e-10);
}
#endif
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, GEMM, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, GEMM, testing::Combine(
...@@ -1864,13 +2100,28 @@ TEST_P(Transpose, Accuracy) ...@@ -1864,13 +2100,28 @@ TEST_P(Transpose, Accuracy)
{ {
cv::Mat src = randomMat(size, type); cv::Mat src = randomMat(size, type);
cv::gpu::GpuMat dst = createMat(cv::Size(size.height, size.width), type, useRoi); if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::transpose(loadMat(src, useRoi), dst); {
try
{
cv::gpu::GpuMat dst;
cv::gpu::transpose(loadMat(src), dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(cv::Size(size.height, size.width), type, useRoi);
cv::gpu::transpose(loadMat(src, useRoi), dst);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::transpose(src, dst_gold); cv::transpose(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0); EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Transpose, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Transpose, testing::Combine(
...@@ -2498,14 +2749,29 @@ TEST_P(MinMax, WithoutMask) ...@@ -2498,14 +2749,29 @@ TEST_P(MinMax, WithoutMask)
{ {
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
double minVal, maxVal; if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal); {
try
{
double minVal, maxVal;
cv::gpu::minMax(loadMat(src), &minVal, &maxVal);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
double minVal, maxVal;
cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal);
double minVal_gold, maxVal_gold; double minVal_gold, maxVal_gold;
minMaxLocGold(src, &minVal_gold, &maxVal_gold); minMaxLocGold(src, &minVal_gold, &maxVal_gold);
EXPECT_DOUBLE_EQ(minVal_gold, minVal); EXPECT_DOUBLE_EQ(minVal_gold, minVal);
EXPECT_DOUBLE_EQ(maxVal_gold, maxVal); EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
}
} }
TEST_P(MinMax, WithMask) TEST_P(MinMax, WithMask)
...@@ -2513,21 +2779,60 @@ TEST_P(MinMax, WithMask) ...@@ -2513,21 +2779,60 @@ TEST_P(MinMax, WithMask)
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
double minVal, maxVal; if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal, loadMat(mask, useRoi)); {
try
{
double minVal, maxVal;
cv::gpu::minMax(loadMat(src), &minVal, &maxVal, loadMat(mask));
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
double minVal, maxVal;
cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal, loadMat(mask, useRoi));
double minVal_gold, maxVal_gold; double minVal_gold, maxVal_gold;
minMaxLocGold(src, &minVal_gold, &maxVal_gold, 0, 0, mask); minMaxLocGold(src, &minVal_gold, &maxVal_gold, 0, 0, mask);
EXPECT_DOUBLE_EQ(minVal_gold, minVal); EXPECT_DOUBLE_EQ(minVal_gold, minVal);
EXPECT_DOUBLE_EQ(maxVal_gold, maxVal); EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
}
} }
TEST_P(MinMax, NullPtr) TEST_P(MinMax, NullPtr)
{ {
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
cv::gpu::minMax(loadMat(src, useRoi), 0, 0); if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
double minVal, maxVal;
cv::gpu::minMax(loadMat(src), &minVal, 0);
cv::gpu::minMax(loadMat(src), 0, &maxVal);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
double minVal, maxVal;
cv::gpu::minMax(loadMat(src, useRoi), &minVal, 0);
cv::gpu::minMax(loadMat(src, useRoi), 0, &maxVal);
double minVal_gold, maxVal_gold;
minMaxLocGold(src, &minVal_gold, &maxVal_gold, 0, 0);
EXPECT_DOUBLE_EQ(minVal_gold, minVal);
EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, MinMax, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, MinMax, testing::Combine(
...@@ -2585,19 +2890,35 @@ TEST_P(MinMaxLoc, WithoutMask) ...@@ -2585,19 +2890,35 @@ TEST_P(MinMaxLoc, WithoutMask)
{ {
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
double minVal, maxVal; if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::Point minLoc, maxLoc; {
cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc); try
{
double minVal, maxVal;
cv::Point minLoc, maxLoc;
cv::gpu::minMaxLoc(loadMat(src), &minVal, &maxVal, &minLoc, &maxLoc);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
double minVal, maxVal;
cv::Point minLoc, maxLoc;
cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc);
double minVal_gold, maxVal_gold; double minVal_gold, maxVal_gold;
cv::Point minLoc_gold, maxLoc_gold; cv::Point minLoc_gold, maxLoc_gold;
minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold); minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold);
EXPECT_DOUBLE_EQ(minVal_gold, minVal); EXPECT_DOUBLE_EQ(minVal_gold, minVal);
EXPECT_DOUBLE_EQ(maxVal_gold, maxVal); EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
expectEqual(src, minLoc_gold, minLoc); expectEqual(src, minLoc_gold, minLoc);
expectEqual(src, maxLoc_gold, maxLoc); expectEqual(src, maxLoc_gold, maxLoc);
}
} }
TEST_P(MinMaxLoc, WithMask) TEST_P(MinMaxLoc, WithMask)
...@@ -2605,26 +2926,76 @@ TEST_P(MinMaxLoc, WithMask) ...@@ -2605,26 +2926,76 @@ TEST_P(MinMaxLoc, WithMask)
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
double minVal, maxVal; if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
cv::Point minLoc, maxLoc; {
cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask, useRoi)); try
{
double minVal, maxVal;
cv::Point minLoc, maxLoc;
cv::gpu::minMaxLoc(loadMat(src), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask));
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
double minVal, maxVal;
cv::Point minLoc, maxLoc;
cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask, useRoi));
double minVal_gold, maxVal_gold; double minVal_gold, maxVal_gold;
cv::Point minLoc_gold, maxLoc_gold; cv::Point minLoc_gold, maxLoc_gold;
minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold, mask); minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold, mask);
EXPECT_DOUBLE_EQ(minVal_gold, minVal); EXPECT_DOUBLE_EQ(minVal_gold, minVal);
EXPECT_DOUBLE_EQ(maxVal_gold, maxVal); EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
expectEqual(src, minLoc_gold, minLoc); expectEqual(src, minLoc_gold, minLoc);
expectEqual(src, maxLoc_gold, maxLoc); expectEqual(src, maxLoc_gold, maxLoc);
}
} }
TEST_P(MinMaxLoc, NullPtr) TEST_P(MinMaxLoc, NullPtr)
{ {
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, 0); if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
double minVal, maxVal;
cv::Point minLoc, maxLoc;
cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, 0, 0, 0);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, &maxVal, 0, 0);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, &minLoc, 0);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, &maxLoc);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
double minVal, maxVal;
cv::Point minLoc, maxLoc;
cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, 0, 0, 0);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, &maxVal, 0, 0);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, &minLoc, 0);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, &maxLoc);
double minVal_gold, maxVal_gold;
cv::Point minLoc_gold, maxLoc_gold;
minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold);
EXPECT_DOUBLE_EQ(minVal_gold, minVal);
EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
expectEqual(src, minLoc_gold, minLoc);
expectEqual(src, maxLoc_gold, maxLoc);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, MinMaxLoc, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, MinMaxLoc, testing::Combine(
...@@ -2661,12 +3032,25 @@ TEST_P(CountNonZero, Accuracy) ...@@ -2661,12 +3032,25 @@ TEST_P(CountNonZero, Accuracy)
cv::Mat src; cv::Mat src;
srcBase.convertTo(src, depth); srcBase.convertTo(src, depth);
int val = cv::gpu::countNonZero(loadMat(src, useRoi)); if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
int val_gold = cv::countNonZero(src); try
{
cv::gpu::countNonZero(loadMat(src));
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
int val = cv::gpu::countNonZero(loadMat(src, useRoi));
int val_gold = cv::countNonZero(src);
ASSERT_EQ(val_gold, val); ASSERT_EQ(val_gold, val);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, CountNonZero, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, CountNonZero, testing::Combine(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment