Commit 26691e00 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

fixed gpu core tests (added additional check for device's feature support)

added assertion on double types for old devices
parent 98d7b10c
...@@ -69,16 +69,7 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G ...@@ -69,16 +69,7 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
{ {
#ifndef HAVE_CUBLAS #ifndef HAVE_CUBLAS
OPENCV_GPU_UNUSED(src1); CV_Error(CV_StsNotImplemented, "The library was build without CUBLAS");
OPENCV_GPU_UNUSED(src2);
OPENCV_GPU_UNUSED(alpha);
OPENCV_GPU_UNUSED(src3);
OPENCV_GPU_UNUSED(beta);
OPENCV_GPU_UNUSED(dst);
OPENCV_GPU_UNUSED(flags);
OPENCV_GPU_UNUSED(stream);
throw_nogpu();
#else #else
...@@ -87,6 +78,12 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G ...@@ -87,6 +78,12 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
CV_Assert(src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2); CV_Assert(src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2);
CV_Assert(src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type())); CV_Assert(src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()));
if (src1.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
bool tr1 = (flags & GEMM_1_T) != 0; bool tr1 = (flags & GEMM_1_T) != 0;
bool tr2 = (flags & GEMM_2_T) != 0; bool tr2 = (flags & GEMM_2_T) != 0;
bool tr3 = (flags & GEMM_3_T) != 0; bool tr3 = (flags & GEMM_3_T) != 0;
...@@ -230,6 +227,9 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s) ...@@ -230,6 +227,9 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst, Stream& s)
} }
else // if (src.elemSize() == 8) else // if (src.elemSize() == 8)
{ {
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
NppStStreamHandler h(stream); NppStStreamHandler h(stream);
NcvSize32u sz; NcvSize32u sz;
...@@ -290,7 +290,6 @@ namespace ...@@ -290,7 +290,6 @@ namespace
void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream) void cv::gpu::flip(const GpuMat& src, GpuMat& dst, int flipCode, Stream& stream)
{ {
typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, GpuMat& dst, int flipCode, cudaStream_t stream);
static const func_t funcs[6][4] = static const func_t funcs[6][4] =
{ {
{NppMirror<CV_8U, nppiMirror_8u_C1R>::call, 0, NppMirror<CV_8U, nppiMirror_8u_C3R>::call, NppMirror<CV_8U, nppiMirror_8u_C4R>::call}, {NppMirror<CV_8U, nppiMirror_8u_C1R>::call, 0, NppMirror<CV_8U, nppiMirror_8u_C3R>::call, NppMirror<CV_8U, nppiMirror_8u_C4R>::call},
...@@ -403,12 +402,12 @@ namespace ...@@ -403,12 +402,12 @@ namespace
void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst, Stream& stream) void cv::gpu::magnitude(const GpuMat& src, GpuMat& dst, Stream& stream)
{ {
::npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream)); npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream));
} }
void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream) void cv::gpu::magnitudeSqr(const GpuMat& src, GpuMat& dst, Stream& stream)
{ {
::npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream)); npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream));
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
...@@ -429,7 +428,7 @@ namespace ...@@ -429,7 +428,7 @@ namespace
{ {
using namespace ::cv::gpu::device::mathfunc; using namespace ::cv::gpu::device::mathfunc;
CV_DbgAssert(x.size() == y.size() && x.type() == y.type()); CV_Assert(x.size() == y.size() && x.type() == y.type());
CV_Assert(x.depth() == CV_32F); CV_Assert(x.depth() == CV_32F);
if (mag) if (mag)
...@@ -449,7 +448,7 @@ namespace ...@@ -449,7 +448,7 @@ namespace
{ {
using namespace ::cv::gpu::device::mathfunc; using namespace ::cv::gpu::device::mathfunc;
CV_DbgAssert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type()); CV_Assert((mag.empty() || mag.size() == angle.size()) && mag.type() == angle.type());
CV_Assert(mag.depth() == CV_32F); CV_Assert(mag.depth() == CV_32F);
x.create(mag.size(), mag.type()); x.create(mag.size(), mag.type());
......
...@@ -1096,18 +1096,18 @@ namespace cv { namespace gpu { namespace device ...@@ -1096,18 +1096,18 @@ namespace cv { namespace gpu { namespace device
enum { smart_shift = 4 }; enum { smart_shift = 4 };
}; };
template <typename T> void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream) template <typename T> void absdiff_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream)
{ {
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), WithOutMask(), stream); cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, Absdiff<T>(), WithOutMask(), stream);
} }
template void absdiff_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); //template void absdiff_gpu<uchar >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<schar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<schar >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<ushort>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); //template void absdiff_gpu<ushort>(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<short >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<short >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<int >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<int >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<float >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); //template void absdiff_gpu<float >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<double>(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<double>(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template <typename T> struct AbsdiffScalar : unary_function<T, T> template <typename T> struct AbsdiffScalar : unary_function<T, T>
{ {
...@@ -1140,20 +1140,20 @@ namespace cv { namespace gpu { namespace device ...@@ -1140,20 +1140,20 @@ namespace cv { namespace gpu { namespace device
enum { smart_shift = 4 }; enum { smart_shift = 4 };
}; };
template <typename T> void absdiff_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream) template <typename T> void absdiff_gpu(const DevMem2Db src1, double val, DevMem2Db dst, cudaStream_t stream)
{ {
cudaSafeCall( cudaSetDoubleForDevice(&val) ); cudaSafeCall( cudaSetDoubleForDevice(&val) );
AbsdiffScalar<T> op(val); AbsdiffScalar<T> op(val);
cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, WithOutMask(), stream); cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)dst, op, WithOutMask(), stream);
} }
//template void absdiff_gpu<uchar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); //template void absdiff_gpu<uchar >(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<schar >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<schar >(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
//template void absdiff_gpu<ushort>(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); //template void absdiff_gpu<ushort>(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<short >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<short >(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<int >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<int >(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
//template void absdiff_gpu<float >(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); //template void absdiff_gpu<float >(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
template void absdiff_gpu<double>(const DevMem2Db& src1, double src2, const DevMem2Db& dst, cudaStream_t stream); template void absdiff_gpu<double>(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////
// Compare // Compare
...@@ -1587,60 +1587,60 @@ namespace cv { namespace gpu { namespace device ...@@ -1587,60 +1587,60 @@ namespace cv { namespace gpu { namespace device
}; };
template <typename T> template <typename T>
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream) void min_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream)
{ {
cv::gpu::device::transform(src1, src2, dst, minimum<T>(), WithOutMask(), stream); cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, minimum<T>(), WithOutMask(), stream);
} }
template void min_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); template void min_gpu<uchar >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<schar >(const DevMem2D_<schar>& src1, const DevMem2D_<schar>& src2, const DevMem2D_<schar>& dst, cudaStream_t stream); template void min_gpu<schar >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<ushort>(const DevMem2D_<ushort>& src1, const DevMem2D_<ushort>& src2, const DevMem2D_<ushort>& dst, cudaStream_t stream); template void min_gpu<ushort>(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<short >(const DevMem2D_<short>& src1, const DevMem2D_<short>& src2, const DevMem2D_<short>& dst, cudaStream_t stream); template void min_gpu<short >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<int >(const DevMem2D_<int>& src1, const DevMem2D_<int>& src2, const DevMem2D_<int>& dst, cudaStream_t stream); template void min_gpu<int >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<float >(const DevMem2D_<float>& src1, const DevMem2D_<float>& src2, const DevMem2D_<float>& dst, cudaStream_t stream); template void min_gpu<float >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<double>& src2, const DevMem2D_<double>& dst, cudaStream_t stream); template void min_gpu<double>(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template <typename T> template <typename T>
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream) void max_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream)
{ {
cv::gpu::device::transform(src1, src2, dst, maximum<T>(), WithOutMask(), stream); cv::gpu::device::transform((DevMem2D_<T>)src1, (DevMem2D_<T>)src2, (DevMem2D_<T>)dst, maximum<T>(), WithOutMask(), stream);
} }
template void max_gpu<uchar >(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); template void max_gpu<uchar >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<schar >(const DevMem2D_<schar>& src1, const DevMem2D_<schar>& src2, const DevMem2D_<schar>& dst, cudaStream_t stream); template void max_gpu<schar >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<ushort>(const DevMem2D_<ushort>& src1, const DevMem2D_<ushort>& src2, const DevMem2D_<ushort>& dst, cudaStream_t stream); template void max_gpu<ushort>(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<short >(const DevMem2D_<short>& src1, const DevMem2D_<short>& src2, const DevMem2D_<short>& dst, cudaStream_t stream); template void max_gpu<short >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<int >(const DevMem2D_<int>& src1, const DevMem2D_<int>& src2, const DevMem2D_<int>& dst, cudaStream_t stream); template void max_gpu<int >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<float >(const DevMem2D_<float>& src1, const DevMem2D_<float>& src2, const DevMem2D_<float>& dst, cudaStream_t stream); template void max_gpu<float >(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<double>(const DevMem2D_<double>& src1, const DevMem2D_<double>& src2, const DevMem2D_<double>& dst, cudaStream_t stream); template void max_gpu<double>(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template <typename T> template <typename T>
void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream) void min_gpu(const DevMem2Db src, T val, DevMem2Db dst, cudaStream_t stream)
{ {
cv::gpu::device::transform(src1, dst, device::bind2nd(minimum<T>(), src2), WithOutMask(), stream); cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, device::bind2nd(minimum<T>(), val), WithOutMask(), stream);
} }
template void min_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream); template void min_gpu<uchar >(const DevMem2Db src, uchar val, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<schar >(const DevMem2D_<schar>& src1, schar src2, const DevMem2D_<schar>& dst, cudaStream_t stream); template void min_gpu<schar >(const DevMem2Db src, schar val, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<ushort>(const DevMem2D_<ushort>& src1, ushort src2, const DevMem2D_<ushort>& dst, cudaStream_t stream); template void min_gpu<ushort>(const DevMem2Db src, ushort val, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<short >(const DevMem2D_<short>& src1, short src2, const DevMem2D_<short>& dst, cudaStream_t stream); template void min_gpu<short >(const DevMem2Db src, short val, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<int >(const DevMem2D_<int>& src1, int src2, const DevMem2D_<int>& dst, cudaStream_t stream); template void min_gpu<int >(const DevMem2Db src, int val, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<float >(const DevMem2D_<float>& src1, float src2, const DevMem2D_<float>& dst, cudaStream_t stream); template void min_gpu<float >(const DevMem2Db src, float val, DevMem2Db dst, cudaStream_t stream);
template void min_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream); template void min_gpu<double>(const DevMem2Db src, double val, DevMem2Db dst, cudaStream_t stream);
template <typename T> template <typename T>
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream) void max_gpu(const DevMem2Db src, T val, DevMem2Db dst, cudaStream_t stream)
{ {
cv::gpu::device::transform(src1, dst, device::bind2nd(maximum<T>(), src2), WithOutMask(), stream); cv::gpu::device::transform((DevMem2D_<T>)src, (DevMem2D_<T>)dst, device::bind2nd(maximum<T>(), val), WithOutMask(), stream);
} }
template void max_gpu<uchar >(const DevMem2Db& src1, uchar src2, const DevMem2Db& dst, cudaStream_t stream); template void max_gpu<uchar >(const DevMem2Db src, uchar val, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<schar >(const DevMem2D_<schar>& src1, schar src2, const DevMem2D_<schar>& dst, cudaStream_t stream); template void max_gpu<schar >(const DevMem2Db src, schar val, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<ushort>(const DevMem2D_<ushort>& src1, ushort src2, const DevMem2D_<ushort>& dst, cudaStream_t stream); template void max_gpu<ushort>(const DevMem2Db src, ushort val, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<short >(const DevMem2D_<short>& src1, short src2, const DevMem2D_<short>& dst, cudaStream_t stream); template void max_gpu<short >(const DevMem2Db src, short val, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<int >(const DevMem2D_<int>& src1, int src2, const DevMem2D_<int>& dst, cudaStream_t stream); template void max_gpu<int >(const DevMem2Db src, int val, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<float >(const DevMem2D_<float>& src1, float src2, const DevMem2D_<float>& dst, cudaStream_t stream); template void max_gpu<float >(const DevMem2Db src, float val, DevMem2Db dst, cudaStream_t stream);
template void max_gpu<double>(const DevMem2D_<double>& src1, double src2, const DevMem2D_<double>& dst, cudaStream_t stream); template void max_gpu<double>(const DevMem2Db src, double val, DevMem2Db dst, cudaStream_t stream);
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// threshold // threshold
...@@ -1805,19 +1805,64 @@ namespace cv { namespace gpu { namespace device ...@@ -1805,19 +1805,64 @@ namespace cv { namespace gpu { namespace device
////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////
// addWeighted // addWeighted
template <typename T1, typename T2, typename D> struct AddWeighted : binary_function<T1, T2, D> namespace detail
{
template <typename T> struct UseDouble
{
enum {value = 0};
};
template <> struct UseDouble<int>
{
enum {value = 1};
};
template <> struct UseDouble<float>
{
enum {value = 1};
};
template <> struct UseDouble<double>
{
enum {value = 1};
};
}
template <typename T1, typename T2, typename D> struct UseDouble
{ {
__host__ __device__ __forceinline__ AddWeighted(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {} enum {value = (detail::UseDouble<T1>::value || detail::UseDouble<T2>::value || detail::UseDouble<D>::value)};
};
__device__ __forceinline__ D operator ()(typename TypeTraits<T1>::ParameterType a, typename TypeTraits<T2>::ParameterType b) const namespace detail
{
template <typename T1, typename T2, typename D, bool useDouble> struct AddWeighted;
template <typename T1, typename T2, typename D> struct AddWeighted<T1, T2, D, false> : binary_function<T1, T2, D>
{ {
return saturate_cast<D>(alpha * a + beta * b + gamma); AddWeighted(double alpha_, double beta_, double gamma_) : alpha(static_cast<float>(alpha_)), beta(static_cast<float>(beta_)), gamma(static_cast<float>(gamma_)) {}
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
{
return saturate_cast<D>(a * alpha + b * beta + gamma);
}
const float alpha;
const float beta;
const float gamma;
};
template <typename T1, typename T2, typename D> struct AddWeighted<T1, T2, D, true> : binary_function<T1, T2, D>
{
AddWeighted(double alpha_, double beta_, double gamma_) : alpha(alpha_), beta(beta_), gamma(gamma_) {}
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
{
return saturate_cast<D>(a * alpha + b * beta + gamma);
} }
const double alpha; const double alpha;
const double beta; const double beta;
const double gamma; const double gamma;
}; };
}
template <typename T1, typename T2, typename D> struct AddWeighted : detail::AddWeighted<T1, T2, D, UseDouble<T1, T2, D>::value>
{
AddWeighted(double alpha_, double beta_, double gamma_) : detail::AddWeighted<T1, T2, D, UseDouble<T1, T2, D>::value>(alpha_, beta_, gamma_) {}
};
template <> struct TransformFunctorTraits< AddWeighted<ushort, ushort, ushort> > : DefaultTransformFunctorTraits< AddWeighted<ushort, ushort, ushort> > template <> struct TransformFunctorTraits< AddWeighted<ushort, ushort, ushort> > : DefaultTransformFunctorTraits< AddWeighted<ushort, ushort, ushort> >
{ {
...@@ -1877,10 +1922,13 @@ namespace cv { namespace gpu { namespace device ...@@ -1877,10 +1922,13 @@ namespace cv { namespace gpu { namespace device
template <typename T1, typename T2, typename D> template <typename T1, typename T2, typename D>
void addWeighted_gpu(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream) void addWeighted_gpu(const DevMem2Db& src1, double alpha, const DevMem2Db& src2, double beta, double gamma, const DevMem2Db& dst, cudaStream_t stream)
{
if (UseDouble<T1, T2, D>::value)
{ {
cudaSafeCall( cudaSetDoubleForDevice(&alpha) ); cudaSafeCall( cudaSetDoubleForDevice(&alpha) );
cudaSafeCall( cudaSetDoubleForDevice(&beta) ); cudaSafeCall( cudaSetDoubleForDevice(&beta) );
cudaSafeCall( cudaSetDoubleForDevice(&gamma) ); cudaSafeCall( cudaSetDoubleForDevice(&gamma) );
}
AddWeighted<T1, T2, D> op(alpha, beta, gamma); AddWeighted<T1, T2, D> op(alpha, beta, gamma);
......
...@@ -950,90 +950,62 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St ...@@ -950,90 +950,62 @@ void cv::gpu::divide(double scale, const GpuMat& src, GpuMat& dst, int dtype, St
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
template <typename T> template <typename T>
void absdiff_gpu(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); void absdiff_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template <typename T> template <typename T>
void absdiff_gpu(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream); void absdiff_gpu(const DevMem2Db src1, double val, DevMem2Db dst, cudaStream_t stream);
}}} }}}
void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& s) namespace
{ {
using namespace ::cv::gpu::device; template <int DEPTH> struct NppAbsDiffFunc
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
static const func_t funcs[] =
{ {
absdiff_gpu<unsigned char>, absdiff_gpu<signed char>, absdiff_gpu<unsigned short>, absdiff_gpu<short>, absdiff_gpu<int>, absdiff_gpu<float>, absdiff_gpu<double> typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
};
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
dst.create( src1.size(), src1.type() );
cudaStream_t stream = StreamAccessor::getStream(s);
NppiSize sz; typedef NppStatus (*func_t)(const npp_t* src1, int src1_step, const npp_t* src2, int src2_step, npp_t* dst, int dst_step, NppiSize sz);
sz.width = src1.cols * src1.channels(); };
sz.height = src1.rows;
if (src1.depth() == CV_8U) template <int DEPTH, typename NppAbsDiffFunc<DEPTH>::func_t func> struct NppAbsDiff
{ {
NppStreamHandler h(stream); typedef typename NppAbsDiffFunc<DEPTH>::npp_t npp_t;
nppSafeCall( nppiAbsDiff_8u_C1R(src1.ptr<Npp8u>(), static_cast<int>(src1.step), src2.ptr<Npp8u>(), static_cast<int>(src2.step),
dst.ptr<Npp8u>(), static_cast<int>(dst.step), sz) );
if (stream == 0) static void call(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
else if (src1.depth() == CV_16U)
{ {
NppStreamHandler h(stream); NppStreamHandler h(stream);
nppSafeCall( nppiAbsDiff_16u_C1R(src1.ptr<Npp16u>(), static_cast<int>(src1.step), src2.ptr<Npp16u>(), static_cast<int>(src2.step), NppiSize sz;
dst.ptr<Npp16u>(), static_cast<int>(dst.step), sz) ); sz.width = src1.cols;
sz.height = src1.rows;
if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() );
}
else if (src1.depth() == CV_32F)
{
NppStreamHandler h(stream);
nppSafeCall( nppiAbsDiff_32f_C1R(src1.ptr<Npp32f>(), static_cast<int>(src1.step), src2.ptr<Npp32f>(), static_cast<int>(src2.step), nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (const npp_t*)src2.data, static_cast<int>(src2.step),
dst.ptr<Npp32f>(), static_cast<int>(dst.step), sz) ); (npp_t*)dst.data, static_cast<int>(dst.step), sz) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
} }
else };
{
const func_t func = funcs[src1.depth()];
CV_Assert(func != 0);
func(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
}
}
namespace
{
template <int DEPTH> struct NppAbsDiffCFunc template <int DEPTH> struct NppAbsDiffCFunc
{ {
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t; typedef typename NppTypeTraits<DEPTH>::npp_t npp_t;
typedef npp_t scalar_t;
typedef NppStatus (*func_t)(const npp_t* pSrc1, int nSrc1Step, npp_t* pDst, int nDstStep, NppiSize oSizeROI, npp_t nConstant); typedef NppStatus (*func_t)(const npp_t* pSrc1, int nSrc1Step, npp_t* pDst, int nDstStep, NppiSize oSizeROI, npp_t nConstant);
}; };
template <> struct NppAbsDiffCFunc<CV_16U> template <> struct NppAbsDiffCFunc<CV_16U>
{ {
typedef NppTypeTraits<CV_16U>::npp_t npp_t;
typedef Npp32u scalar_t;
typedef NppStatus (*func_t)(const Npp16u* pSrc1, int nSrc1Step, Npp16u* pDst, int nDstStep, NppiSize oSizeROI, Npp32u nConstant); typedef NppStatus (*func_t)(const Npp16u* pSrc1, int nSrc1Step, Npp16u* pDst, int nDstStep, NppiSize oSizeROI, Npp32u nConstant);
}; };
template <int DEPTH, typename NppAbsDiffCFunc<DEPTH>::func_t func> struct NppAbsDiffC template <int DEPTH, typename NppAbsDiffCFunc<DEPTH>::func_t func> struct NppAbsDiffC
{ {
typedef typename NppTypeTraits<DEPTH>::npp_t npp_t; typedef typename NppAbsDiffCFunc<DEPTH>::npp_t npp_t;
typedef typename NppAbsDiffCFunc<DEPTH>::scalar_t scalar_t;
static void call(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream) static void call(const DevMem2Db src1, double val, DevMem2Db dst, cudaStream_t stream)
{ {
NppStreamHandler h(stream); NppStreamHandler h(stream);
...@@ -1041,8 +1013,8 @@ namespace ...@@ -1041,8 +1013,8 @@ namespace
sz.width = src1.cols; sz.width = src1.cols;
sz.height = src1.rows; sz.height = src1.rows;
nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step), (npp_t*)dst.data, static_cast<int>(dst.step), nppSafeCall( func((const npp_t*)src1.data, static_cast<int>(src1.step),
sz, static_cast<npp_t>(val)) ); (npp_t*)dst.data, static_cast<int>(dst.step), sz, static_cast<scalar_t>(val)) );
if (stream == 0) if (stream == 0)
cudaSafeCall( cudaDeviceSynchronize() ); cudaSafeCall( cudaDeviceSynchronize() );
...@@ -1050,12 +1022,41 @@ namespace ...@@ -1050,12 +1022,41 @@ namespace
}; };
} }
void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Stream& s) void cv::gpu::absdiff(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{ {
using namespace cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db& src1, double val, const DevMem2Db& dst, cudaStream_t stream); typedef void (*func_t)(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
static const func_t funcs[] =
{
NppAbsDiff<CV_8U, nppiAbsDiff_8u_C1R>::call,
absdiff_gpu<signed char>,
NppAbsDiff<CV_16U, nppiAbsDiff_16u_C1R>::call,
absdiff_gpu<short>,
absdiff_gpu<int>,
NppAbsDiff<CV_32F, nppiAbsDiff_32f_C1R>::call,
absdiff_gpu<double>
};
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
if (src1.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src1.size(), src1.type());
funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), StreamAccessor::getStream(stream));
}
void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Stream& stream)
{
using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db src1, double val, DevMem2Db dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
NppAbsDiffC<CV_8U, nppiAbsDiffC_8u_C1R>::call, NppAbsDiffC<CV_8U, nppiAbsDiffC_8u_C1R>::call,
...@@ -1067,13 +1068,18 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea ...@@ -1067,13 +1068,18 @@ void cv::gpu::absdiff(const GpuMat& src1, const Scalar& src2, GpuMat& dst, Strea
absdiff_gpu<double> absdiff_gpu<double>
}; };
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.channels() == 1); CV_Assert(src1.channels() == 1);
dst.create(src1.size(), src1.type()); if (src1.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
cudaStream_t stream = StreamAccessor::getStream(s); dst.create(src1.size(), src1.type());
funcs[src1.depth()](src1, src2.val[0], dst, stream); funcs[src1.depth()](src1, src2.val[0], dst, StreamAccessor::getStream(stream));
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
...@@ -1359,34 +1365,38 @@ namespace cv { namespace gpu { namespace device ...@@ -1359,34 +1365,38 @@ namespace cv { namespace gpu { namespace device
void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop, Stream& stream) void cv::gpu::compare(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, int cmpop, Stream& stream)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream); typedef void (*func_t)(const DevMem2Db& src1, const DevMem2Db& src2, const DevMem2Db& dst, cudaStream_t stream);
static const func_t funcs[7][4] = static const func_t funcs[7][4] =
{ {
{compare_eq<unsigned char>, compare_ne<unsigned char>, compare_lt<unsigned char>, compare_le<unsigned char>}, {compare_eq<unsigned char> , compare_ne<unsigned char> , compare_lt<unsigned char> , compare_le<unsigned char> },
{compare_eq<signed char>, compare_ne<signed char>, compare_lt<signed char>, compare_le<signed char>}, {compare_eq<signed char> , compare_ne<signed char> , compare_lt<signed char> , compare_le<signed char> },
{compare_eq<unsigned short>, compare_ne<unsigned short>, compare_lt<unsigned short>, compare_le<unsigned short>}, {compare_eq<unsigned short>, compare_ne<unsigned short>, compare_lt<unsigned short>, compare_le<unsigned short>},
{compare_eq<short>, compare_ne<short>, compare_lt<short>, compare_le<short>}, {compare_eq<short> , compare_ne<short> , compare_lt<short> , compare_le<short> },
{compare_eq<int>, compare_ne<int>, compare_lt<int>, compare_le<int>}, {compare_eq<int> , compare_ne<int> , compare_lt<int> , compare_le<int> },
{compare_eq<float>, compare_ne<float>, compare_lt<float>, compare_le<float>}, {compare_eq<float> , compare_ne<float> , compare_lt<float> , compare_le<float> },
{compare_eq<double>, compare_ne<double>, compare_lt<double>, compare_le<double>} {compare_eq<double> , compare_ne<double> , compare_lt<double> , compare_le<double> }
}; };
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(cmpop >= CMP_EQ && cmpop <= CMP_NE); CV_Assert(cmpop >= CMP_EQ && cmpop <= CMP_NE);
if (src1.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
static const int codes[] = static const int codes[] =
{ {
0, 2, 3, 2, 3, 1 0, 2, 3, 2, 3, 1
}; };
const GpuMat* psrc1[] = const GpuMat* psrc1[] =
{ {
&src1, &src2, &src2, &src1, &src1, &src1 &src1, &src2, &src2, &src1, &src1, &src1
}; };
const GpuMat* psrc2[] = const GpuMat* psrc2[] =
{ {
&src2, &src1, &src1, &src2, &src2, &src2 &src2, &src1, &src1, &src2, &src2, &src2
...@@ -1415,17 +1425,15 @@ namespace ...@@ -1415,17 +1425,15 @@ namespace
{ {
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
::cv::gpu::device::bitwiseNotCaller(src.rows, src.cols, src.elemSize1(), dst.channels(), src, dst, stream); cv::gpu::device::bitwiseNotCaller(src.rows, src.cols, src.elemSize1(), dst.channels(), src, dst, stream);
} }
void bitwiseNotCaller(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) void bitwiseNotCaller(const GpuMat& src, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static Caller callers[] = typedef void (*func_t)(int, int, int, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static func_t funcs[] =
{ {
bitwiseMaskNotCaller<unsigned char>, bitwiseMaskNotCaller<unsigned char>, bitwiseMaskNotCaller<unsigned char>, bitwiseMaskNotCaller<unsigned char>,
bitwiseMaskNotCaller<unsigned short>, bitwiseMaskNotCaller<unsigned short>, bitwiseMaskNotCaller<unsigned short>, bitwiseMaskNotCaller<unsigned short>,
...@@ -1433,19 +1441,19 @@ namespace ...@@ -1433,19 +1441,19 @@ namespace
bitwiseMaskNotCaller<unsigned int> bitwiseMaskNotCaller<unsigned int>
}; };
CV_Assert(src.depth() <= CV_64F);
CV_Assert(mask.type() == CV_8U && mask.size() == src.size()); CV_Assert(mask.type() == CV_8U && mask.size() == src.size());
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
Caller caller = callers[src.depth()]; const func_t func = funcs[src.depth()];
CV_Assert(caller);
int cn = src.depth() != CV_64F ? src.channels() : src.channels() * (sizeof(double) / sizeof(unsigned int)); int cn = src.depth() != CV_64F ? src.channels() : src.channels() * (sizeof(double) / sizeof(unsigned int));
caller(src.rows, src.cols, cn, src, mask, dst, stream);
}
func(src.rows, src.cols, cn, src, mask, dst, stream);
}
} }
void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& stream) void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, Stream& stream)
{ {
if (mask.empty()) if (mask.empty())
...@@ -1454,7 +1462,6 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St ...@@ -1454,7 +1462,6 @@ void cv::gpu::bitwise_not(const GpuMat& src, GpuMat& dst, const GpuMat& mask, St
bitwiseNotCaller(src, dst, mask, StreamAccessor::getStream(stream)); bitwiseNotCaller(src, dst, mask, StreamAccessor::getStream(stream));
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
// Binary bitwise logical operations // Binary bitwise logical operations
...@@ -1481,18 +1488,18 @@ namespace ...@@ -1481,18 +1488,18 @@ namespace
void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
::cv::gpu::device::bitwiseOrCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream); cv::gpu::device::bitwiseOrCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
} }
void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) void bitwiseOrCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static Caller callers[] = typedef void (*func_t)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static func_t funcs[] =
{ {
bitwiseMaskOrCaller<unsigned char>, bitwiseMaskOrCaller<unsigned char>, bitwiseMaskOrCaller<unsigned char>, bitwiseMaskOrCaller<unsigned char>,
bitwiseMaskOrCaller<unsigned short>, bitwiseMaskOrCaller<unsigned short>, bitwiseMaskOrCaller<unsigned short>, bitwiseMaskOrCaller<unsigned short>,
...@@ -1500,33 +1507,35 @@ namespace ...@@ -1500,33 +1507,35 @@ namespace
bitwiseMaskOrCaller<unsigned int> bitwiseMaskOrCaller<unsigned int>
}; };
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(mask.type() == CV_8U && mask.size() == src1.size());
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
Caller caller = callers[src1.depth()]; const func_t func = funcs[src1.depth()];
CV_Assert(caller);
int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int)); int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int));
caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
func(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
} }
void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
::cv::gpu::device::bitwiseAndCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream); cv::gpu::device::bitwiseAndCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
} }
void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) void bitwiseAndCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static Caller callers[] = typedef void (*func_t)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static func_t funcs[] =
{ {
bitwiseMaskAndCaller<unsigned char>, bitwiseMaskAndCaller<unsigned char>, bitwiseMaskAndCaller<unsigned char>, bitwiseMaskAndCaller<unsigned char>,
bitwiseMaskAndCaller<unsigned short>, bitwiseMaskAndCaller<unsigned short>, bitwiseMaskAndCaller<unsigned short>, bitwiseMaskAndCaller<unsigned short>,
...@@ -1534,33 +1543,35 @@ namespace ...@@ -1534,33 +1543,35 @@ namespace
bitwiseMaskAndCaller<unsigned int> bitwiseMaskAndCaller<unsigned int>
}; };
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(mask.type() == CV_8U && mask.size() == src1.size());
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
Caller caller = callers[src1.depth()]; const func_t func = funcs[src1.depth()];
CV_Assert(caller);
int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int)); int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int));
caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
func(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
} }
void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream) void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
::cv::gpu::device::bitwiseXorCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream); cv::gpu::device::bitwiseXorCaller(dst.rows, dst.cols, dst.elemSize1(), dst.channels(), src1, src2, dst, stream);
} }
void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream) void bitwiseXorCaller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, cudaStream_t stream)
{ {
using namespace ::cv::gpu::device; using namespace cv::gpu::device;
typedef void (*Caller)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static Caller callers[] = typedef void (*func_t)(int, int, int, const PtrStepb, const PtrStepb, const PtrStepb, PtrStepb, cudaStream_t);
static func_t funcs[] =
{ {
bitwiseMaskXorCaller<unsigned char>, bitwiseMaskXorCaller<unsigned char>, bitwiseMaskXorCaller<unsigned char>, bitwiseMaskXorCaller<unsigned char>,
bitwiseMaskXorCaller<unsigned short>, bitwiseMaskXorCaller<unsigned short>, bitwiseMaskXorCaller<unsigned short>, bitwiseMaskXorCaller<unsigned short>,
...@@ -1568,14 +1579,17 @@ namespace ...@@ -1568,14 +1579,17 @@ namespace
bitwiseMaskXorCaller<unsigned int> bitwiseMaskXorCaller<unsigned int>
}; };
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(mask.type() == CV_8U && mask.size() == src1.size());
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
Caller caller = callers[src1.depth()]; const func_t func = funcs[src1.depth()];
CV_Assert(caller);
int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int)); int cn = dst.depth() != CV_64F ? dst.channels() : dst.channels() * (sizeof(double) / sizeof(unsigned int));
caller(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
func(dst.rows, dst.cols, cn, src1, src2, mask, dst, stream);
} }
} }
...@@ -1661,10 +1675,9 @@ namespace ...@@ -1661,10 +1675,9 @@ namespace
void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
{ {
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] = static const func_t funcs[5][4] =
{ {
{NppBitwiseC<CV_8U, 1, nppiOrC_8u_C1R>::call, 0, NppBitwiseC<CV_8U, 3, nppiOrC_8u_C3R>::call, NppBitwiseC<CV_8U, 4, nppiOrC_8u_C4R>::call}, {NppBitwiseC<CV_8U , 1, nppiOrC_8u_C1R >::call, 0, NppBitwiseC<CV_8U , 3, nppiOrC_8u_C3R >::call, NppBitwiseC<CV_8U , 4, nppiOrC_8u_C4R >::call},
{0,0,0,0}, {0,0,0,0},
{NppBitwiseC<CV_16U, 1, nppiOrC_16u_C1R>::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call}, {NppBitwiseC<CV_16U, 1, nppiOrC_16u_C1R>::call, 0, NppBitwiseC<CV_16U, 3, nppiOrC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiOrC_16u_C4R>::call},
{0,0,0,0}, {0,0,0,0},
...@@ -1682,10 +1695,9 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea ...@@ -1682,10 +1695,9 @@ void cv::gpu::bitwise_or(const GpuMat& src, const Scalar& sc, GpuMat& dst, Strea
void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
{ {
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] = static const func_t funcs[5][4] =
{ {
{NppBitwiseC<CV_8U, 1, nppiAndC_8u_C1R>::call, 0, NppBitwiseC<CV_8U, 3, nppiAndC_8u_C3R>::call, NppBitwiseC<CV_8U, 4, nppiAndC_8u_C4R>::call}, {NppBitwiseC<CV_8U , 1, nppiAndC_8u_C1R >::call, 0, NppBitwiseC<CV_8U , 3, nppiAndC_8u_C3R >::call, NppBitwiseC<CV_8U , 4, nppiAndC_8u_C4R >::call},
{0,0,0,0}, {0,0,0,0},
{NppBitwiseC<CV_16U, 1, nppiAndC_16u_C1R>::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call}, {NppBitwiseC<CV_16U, 1, nppiAndC_16u_C1R>::call, 0, NppBitwiseC<CV_16U, 3, nppiAndC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiAndC_16u_C4R>::call},
{0,0,0,0}, {0,0,0,0},
...@@ -1703,10 +1715,9 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre ...@@ -1703,10 +1715,9 @@ void cv::gpu::bitwise_and(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stre
void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream) void cv::gpu::bitwise_xor(const GpuMat& src, const Scalar& sc, GpuMat& dst, Stream& stream)
{ {
typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, Scalar sc, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[5][4] = static const func_t funcs[5][4] =
{ {
{NppBitwiseC<CV_8U, 1, nppiXorC_8u_C1R>::call, 0, NppBitwiseC<CV_8U, 3, nppiXorC_8u_C3R>::call, NppBitwiseC<CV_8U, 4, nppiXorC_8u_C4R>::call}, {NppBitwiseC<CV_8U , 1, nppiXorC_8u_C1R >::call, 0, NppBitwiseC<CV_8U , 3, nppiXorC_8u_C3R >::call, NppBitwiseC<CV_8U , 4, nppiXorC_8u_C4R >::call},
{0,0,0,0}, {0,0,0,0},
{NppBitwiseC<CV_16U, 1, nppiXorC_16u_C1R>::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call}, {NppBitwiseC<CV_16U, 1, nppiXorC_16u_C1R>::call, 0, NppBitwiseC<CV_16U, 3, nppiXorC_16u_C3R>::call, NppBitwiseC<CV_16U, 4, nppiXorC_16u_C4R>::call},
{0,0,0,0}, {0,0,0,0},
...@@ -1822,107 +1833,140 @@ void cv::gpu::lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& st ...@@ -1822,107 +1833,140 @@ void cv::gpu::lshift(const GpuMat& src, Scalar_<int> sc, GpuMat& dst, Stream& st
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
template <typename T> template <typename T> void min_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
void min_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream); template <typename T> void max_gpu(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
template <typename T>
void max_gpu(const DevMem2D_<T>& src1, const DevMem2D_<T>& src2, const DevMem2D_<T>& dst, cudaStream_t stream);
template <typename T> template <typename T> void min_gpu(const DevMem2Db src, T val, DevMem2Db dst, cudaStream_t stream);
void min_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream); template <typename T> void max_gpu(const DevMem2Db src, T val, DevMem2Db dst, cudaStream_t stream);
template <typename T>
void max_gpu(const DevMem2D_<T>& src1, T src2, const DevMem2D_<T>& dst, cudaStream_t stream);
}}} }}}
namespace void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{ {
template <typename T> using namespace cv::gpu::device;
void min_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
typedef void (*func_t)(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
static const func_t funcs[] =
{ {
min_gpu<unsigned char>,
min_gpu<signed char>,
min_gpu<unsigned short>,
min_gpu<short>,
min_gpu<int>,
min_gpu<float>,
min_gpu<double>
};
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
dst.create(src1.size(), src1.type());
::cv::gpu::device::min_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
}
template <typename T> if (src1.depth() == CV_64F)
void min_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
{ {
dst.create(src1.size(), src1.type()); if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
::cv::gpu::device::min_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream); CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
} }
template <typename T>
void max_caller(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream)
{
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
dst.create(src1.size(), src1.type()); dst.create(src1.size(), src1.type());
::cv::gpu::device::max_gpu<T>(src1.reshape(1), src2.reshape(1), dst.reshape(1), stream);
}
template <typename T> funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), StreamAccessor::getStream(stream));
void max_caller(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream)
{
dst.create(src1.size(), src1.type());
::cv::gpu::device::max_gpu<T>(src1.reshape(1), saturate_cast<T>(src2), dst.reshape(1), stream);
}
} }
void cv::gpu::min(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); using namespace cv::gpu::device;
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream); typedef void (*func_t)(const DevMem2Db src1, const DevMem2Db src2, DevMem2Db dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
min_caller<unsigned char>, min_caller<signed char>, min_caller<unsigned short>, min_caller<short>, min_caller<int>, max_gpu<unsigned char>,
min_caller<float>, min_caller<double> max_gpu<signed char>,
max_gpu<unsigned short>,
max_gpu<short>,
max_gpu<int>,
max_gpu<float>,
max_gpu<double>
}; };
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
CV_Assert(src1.depth() <= CV_64F);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
if (src1.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src1.size(), src1.type());
funcs[src1.depth()](src1.reshape(1), src2.reshape(1), dst.reshape(1), StreamAccessor::getStream(stream));
} }
void cv::gpu::min(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream)
namespace
{ {
CV_Assert((src1.depth() != CV_64F) || template <typename T> void minScalar(const DevMem2Db src, double val, DevMem2Db dst, cudaStream_t stream)
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE))); {
cv::gpu::device::min_gpu(src, saturate_cast<T>(val), dst, stream);
}
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream); template <typename T> void maxScalar(const DevMem2Db src, double val, DevMem2Db dst, cudaStream_t stream)
static const func_t funcs[] =
{ {
min_caller<unsigned char>, min_caller<signed char>, min_caller<unsigned short>, min_caller<short>, min_caller<int>, cv::gpu::device::max_gpu(src, saturate_cast<T>(val), dst, stream);
min_caller<float>, min_caller<double> }
};
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
} }
void cv::gpu::max(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, Stream& stream) void cv::gpu::min(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
{ {
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type()); typedef void (*func_t)(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
CV_Assert((src1.depth() != CV_64F) ||
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
max_caller<unsigned char>, max_caller<signed char>, max_caller<unsigned short>, max_caller<short>, max_caller<int>, minScalar<unsigned char>,
max_caller<float>, max_caller<double> minScalar<signed char>,
minScalar<unsigned short>,
minScalar<short>,
minScalar<int>,
minScalar<float>,
minScalar<double>
}; };
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
CV_Assert(src.depth() <= CV_64F);
CV_Assert(src.channels() == 1);
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), src.type());
funcs[src.depth()](src, val, dst, StreamAccessor::getStream(stream));
} }
void cv::gpu::max(const GpuMat& src1, double src2, GpuMat& dst, Stream& stream) void cv::gpu::max(const GpuMat& src, double val, GpuMat& dst, Stream& stream)
{ {
CV_Assert((src1.depth() != CV_64F) || typedef void (*func_t)(const DevMem2Db src1, double src2, DevMem2Db dst, cudaStream_t stream);
(TargetArchs::builtWith(NATIVE_DOUBLE) && DeviceInfo().supports(NATIVE_DOUBLE)));
typedef void (*func_t)(const GpuMat& src1, double src2, GpuMat& dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
max_caller<unsigned char>, max_caller<signed char>, max_caller<unsigned short>, max_caller<short>, max_caller<int>, maxScalar<unsigned char>,
max_caller<float>, max_caller<double> maxScalar<signed char>,
maxScalar<unsigned short>,
maxScalar<short>,
maxScalar<int>,
maxScalar<float>,
maxScalar<double>
}; };
funcs[src1.depth()](src1, src2, dst, StreamAccessor::getStream(stream));
CV_Assert(src.depth() <= CV_64F);
CV_Assert(src.channels() == 1);
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), src.type());
funcs[src.depth()](src, val, dst, StreamAccessor::getStream(stream));
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
...@@ -1947,6 +1991,12 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double ...@@ -1947,6 +1991,12 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
CV_Assert(src.channels() == 1 && src.depth() <= CV_64F); CV_Assert(src.channels() == 1 && src.depth() <= CV_64F);
CV_Assert(type <= THRESH_TOZERO_INV); CV_Assert(type <= THRESH_TOZERO_INV);
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
cudaStream_t stream = StreamAccessor::getStream(s); cudaStream_t stream = StreamAccessor::getStream(s);
...@@ -1967,9 +2017,8 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double ...@@ -1967,9 +2017,8 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
} }
else else
{ {
typedef void (*caller_t)(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, cudaStream_t stream); typedef void (*func_t)(const GpuMat& src, GpuMat& dst, double thresh, double maxVal, int type, cudaStream_t stream);
static const func_t funcs[] =
static const caller_t callers[] =
{ {
threshold_caller<unsigned char>, threshold_caller<signed char>, threshold_caller<unsigned char>, threshold_caller<signed char>,
threshold_caller<unsigned short>, threshold_caller<short>, threshold_caller<unsigned short>, threshold_caller<short>,
...@@ -1982,7 +2031,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double ...@@ -1982,7 +2031,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
maxVal = cvRound(maxVal); maxVal = cvRound(maxVal);
} }
callers[src.depth()](src, dst, thresh, maxVal, type, stream); funcs[src.depth()](src, dst, thresh, maxVal, type, stream);
} }
return thresh; return thresh;
...@@ -1993,8 +2042,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double ...@@ -1993,8 +2042,7 @@ double cv::gpu::threshold(const GpuMat& src, GpuMat& dst, double thresh, double
namespace cv { namespace gpu { namespace device namespace cv { namespace gpu { namespace device
{ {
template<typename T> template<typename T> void pow_caller(DevMem2Db src, double power, DevMem2Db dst, cudaStream_t stream);
void pow_caller(DevMem2Db src, double power, DevMem2Db dst, cudaStream_t stream);
}}} }}}
void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
...@@ -2002,7 +2050,6 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) ...@@ -2002,7 +2050,6 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
using namespace cv::gpu::device; using namespace cv::gpu::device;
typedef void (*func_t)(DevMem2Db src, double power, DevMem2Db dst, cudaStream_t stream); typedef void (*func_t)(DevMem2Db src, double power, DevMem2Db dst, cudaStream_t stream);
static const func_t funcs[] = static const func_t funcs[] =
{ {
pow_caller<unsigned char>, pow_caller<signed char>, pow_caller<unsigned char>, pow_caller<signed char>,
...@@ -2010,6 +2057,14 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream) ...@@ -2010,6 +2057,14 @@ void cv::gpu::pow(const GpuMat& src, double power, GpuMat& dst, Stream& stream)
pow_caller<int>, pow_caller<float>, pow_caller<double> pow_caller<int>, pow_caller<float>, pow_caller<double>
}; };
CV_Assert(src.depth() <= CV_64F);
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src.size(), src.type()); dst.create(src.size(), src.type());
funcs[src.depth()](src.reshape(1), power, dst.reshape(1), StreamAccessor::getStream(stream)); funcs[src.depth()](src.reshape(1), power, dst.reshape(1), StreamAccessor::getStream(stream));
...@@ -2075,8 +2130,7 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int ...@@ -2075,8 +2130,7 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int
NppAlphaComp<CV_16U, nppiAlphaComp_16u_AC4R>::call, NppAlphaComp<CV_16U, nppiAlphaComp_16u_AC4R>::call,
0, 0,
NppAlphaComp<CV_32S, nppiAlphaComp_32s_AC4R>::call, NppAlphaComp<CV_32S, nppiAlphaComp_32s_AC4R>::call,
NppAlphaComp<CV_32F, nppiAlphaComp_32f_AC4R>::call, NppAlphaComp<CV_32F, nppiAlphaComp_32f_AC4R>::call
0
}; };
CV_Assert(img1.type() == CV_8UC4 || img1.type() == CV_16UC4 || img1.type() == CV_32SC4 || img1.type() == CV_32FC4); CV_Assert(img1.type() == CV_8UC4 || img1.type() == CV_16UC4 || img1.type() == CV_32SC4 || img1.type() == CV_32FC4);
...@@ -2085,7 +2139,6 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int ...@@ -2085,7 +2139,6 @@ void cv::gpu::alphaComp(const GpuMat& img1, const GpuMat& img2, GpuMat& dst, int
dst.create(img1.size(), img1.type()); dst.create(img1.size(), img1.type());
const func_t func = funcs[img1.depth()]; const func_t func = funcs[img1.depth()];
CV_Assert(func != 0);
func(img1, img2, dst, npp_alpha_ops[alpha_op], StreamAccessor::getStream(stream)); func(img1, img2, dst, npp_alpha_ops[alpha_op], StreamAccessor::getStream(stream));
} }
...@@ -2569,6 +2622,14 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, ...@@ -2569,6 +2622,14 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2,
dtype = dtype >= 0 ? CV_MAKETYPE(dtype, src1.channels()) : src1.type(); dtype = dtype >= 0 ? CV_MAKETYPE(dtype, src1.channels()) : src1.type();
CV_Assert(src1.depth() <= CV_64F && src2.depth() <= CV_64F && CV_MAT_DEPTH(dtype) <= CV_64F);
if (src1.depth() == CV_64F || src2.depth() == CV_64F || CV_MAT_DEPTH(dtype) == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
dst.create(src1.size(), dtype); dst.create(src1.size(), dtype);
const GpuMat* psrc1 = &src1; const GpuMat* psrc1 = &src1;
...@@ -2581,7 +2642,9 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2, ...@@ -2581,7 +2642,9 @@ void cv::gpu::addWeighted(const GpuMat& src1, double alpha, const GpuMat& src2,
} }
const func_t func = funcs[psrc1->depth()][psrc2->depth()][dst.depth()]; const func_t func = funcs[psrc1->depth()][psrc2->depth()][dst.depth()];
CV_Assert(func != 0);
if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(psrc1->reshape(1), alpha, psrc2->reshape(1), beta, gamma, dst.reshape(1), StreamAccessor::getStream(stream)); func(psrc1->reshape(1), alpha, psrc2->reshape(1), beta, gamma, dst.reshape(1), StreamAccessor::getStream(stream));
} }
......
...@@ -148,6 +148,8 @@ double cv::gpu::norm(const GpuMat& src, int normType) ...@@ -148,6 +148,8 @@ double cv::gpu::norm(const GpuMat& src, int normType)
double cv::gpu::norm(const GpuMat& src, int normType, GpuMat& buf) double cv::gpu::norm(const GpuMat& src, int normType, GpuMat& buf)
{ {
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
GpuMat src_single_channel = src.reshape(1); GpuMat src_single_channel = src.reshape(1);
if (normType == NORM_L1) if (normType == NORM_L1)
...@@ -156,22 +158,16 @@ double cv::gpu::norm(const GpuMat& src, int normType, GpuMat& buf) ...@@ -156,22 +158,16 @@ double cv::gpu::norm(const GpuMat& src, int normType, GpuMat& buf)
if (normType == NORM_L2) if (normType == NORM_L2)
return std::sqrt(sqrSum(src_single_channel, buf)[0]); return std::sqrt(sqrSum(src_single_channel, buf)[0]);
if (normType == NORM_INF) // NORM_INF
{
double min_val, max_val; double min_val, max_val;
minMax(src_single_channel, &min_val, &max_val, GpuMat(), buf); minMax(src_single_channel, &min_val, &max_val, GpuMat(), buf);
return std::max(std::abs(min_val), std::abs(max_val)); return std::max(std::abs(min_val), std::abs(max_val));
}
CV_Error(CV_StsBadArg, "norm: unsupported norm type");
return 0;
} }
double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType) double cv::gpu::norm(const GpuMat& src1, const GpuMat& src2, int normType)
{ {
CV_DbgAssert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(src1.type() == CV_8UC1); CV_Assert(src1.type() == CV_8UC1);
CV_Assert(src1.size() == src2.size() && src1.type() == src2.type());
CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2); CV_Assert(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2);
typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2, typedef NppStatus (*npp_norm_diff_func_t)(const Npp8u* pSrc1, int nSrcStep1, const Npp8u* pSrc2, int nSrcStep2,
...@@ -239,23 +235,25 @@ Scalar cv::gpu::sum(const GpuMat& src) ...@@ -239,23 +235,25 @@ Scalar cv::gpu::sum(const GpuMat& src)
Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
{ {
using namespace ::cv::gpu::device::matrix_reductions::sum; using namespace cv::gpu::device::matrix_reductions::sum;
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int); typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
static Caller multipass_callers[7] = static Caller multipass_callers[] =
{ {
sumMultipassCaller<unsigned char>, sumMultipassCaller<char>, sumMultipassCaller<unsigned char>, sumMultipassCaller<char>,
sumMultipassCaller<unsigned short>, sumMultipassCaller<short>, sumMultipassCaller<unsigned short>, sumMultipassCaller<short>,
sumMultipassCaller<int>, sumMultipassCaller<float>, 0 sumMultipassCaller<int>, sumMultipassCaller<float>
}; };
static Caller singlepass_callers[7] = { static Caller singlepass_callers[] = {
sumCaller<unsigned char>, sumCaller<char>, sumCaller<unsigned char>, sumCaller<char>,
sumCaller<unsigned short>, sumCaller<short>, sumCaller<unsigned short>, sumCaller<short>,
sumCaller<int>, sumCaller<float>, 0 sumCaller<int>, sumCaller<float>
}; };
CV_Assert(src.depth() <= CV_32F);
Size buf_size; Size buf_size;
getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height); getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height);
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
...@@ -265,7 +263,6 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) ...@@ -265,7 +263,6 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.depth()]; Caller caller = callers[src.depth()];
if (!caller) CV_Error(CV_StsBadArg, "sum: unsupported type");
double result[4]; double result[4];
caller(src, buf, result, src.channels()); caller(src, buf, result, src.channels());
...@@ -282,24 +279,26 @@ Scalar cv::gpu::absSum(const GpuMat& src) ...@@ -282,24 +279,26 @@ Scalar cv::gpu::absSum(const GpuMat& src)
Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
{ {
using namespace ::cv::gpu::device::matrix_reductions::sum; using namespace cv::gpu::device::matrix_reductions::sum;
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int); typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
static Caller multipass_callers[7] = static Caller multipass_callers[] =
{ {
absSumMultipassCaller<unsigned char>, absSumMultipassCaller<char>, absSumMultipassCaller<unsigned char>, absSumMultipassCaller<char>,
absSumMultipassCaller<unsigned short>, absSumMultipassCaller<short>, absSumMultipassCaller<unsigned short>, absSumMultipassCaller<short>,
absSumMultipassCaller<int>, absSumMultipassCaller<float>, 0 absSumMultipassCaller<int>, absSumMultipassCaller<float>
}; };
static Caller singlepass_callers[7] = static Caller singlepass_callers[] =
{ {
absSumCaller<unsigned char>, absSumCaller<char>, absSumCaller<unsigned char>, absSumCaller<char>,
absSumCaller<unsigned short>, absSumCaller<short>, absSumCaller<unsigned short>, absSumCaller<short>,
absSumCaller<int>, absSumCaller<float>, 0 absSumCaller<int>, absSumCaller<float>
}; };
CV_Assert(src.depth() <= CV_32F);
Size buf_size; Size buf_size;
getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height); getBufSizeRequired(src.cols, src.rows, src.channels(), buf_size.width, buf_size.height);
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
...@@ -309,7 +308,6 @@ Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf) ...@@ -309,7 +308,6 @@ Scalar cv::gpu::absSum(const GpuMat& src, GpuMat& buf)
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.depth()]; Caller caller = callers[src.depth()];
if (!caller) CV_Error(CV_StsBadArg, "absSum: unsupported type");
double result[4]; double result[4];
caller(src, buf, result, src.channels()); caller(src, buf, result, src.channels());
...@@ -326,24 +324,26 @@ Scalar cv::gpu::sqrSum(const GpuMat& src) ...@@ -326,24 +324,26 @@ Scalar cv::gpu::sqrSum(const GpuMat& src)
Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
{ {
using namespace ::cv::gpu::device::matrix_reductions::sum; using namespace cv::gpu::device::matrix_reductions::sum;
typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int); typedef void (*Caller)(const DevMem2Db, PtrStepb, double*, int);
static Caller multipass_callers[7] = static Caller multipass_callers[] =
{ {
sqrSumMultipassCaller<unsigned char>, sqrSumMultipassCaller<char>, sqrSumMultipassCaller<unsigned char>, sqrSumMultipassCaller<char>,
sqrSumMultipassCaller<unsigned short>, sqrSumMultipassCaller<short>, sqrSumMultipassCaller<unsigned short>, sqrSumMultipassCaller<short>,
sqrSumMultipassCaller<int>, sqrSumMultipassCaller<float>, 0 sqrSumMultipassCaller<int>, sqrSumMultipassCaller<float>
}; };
static Caller singlepass_callers[7] = static Caller singlepass_callers[7] =
{ {
sqrSumCaller<unsigned char>, sqrSumCaller<char>, sqrSumCaller<unsigned char>, sqrSumCaller<char>,
sqrSumCaller<unsigned short>, sqrSumCaller<short>, sqrSumCaller<unsigned short>, sqrSumCaller<short>,
sqrSumCaller<int>, sqrSumCaller<float>, 0 sqrSumCaller<int>, sqrSumCaller<float>
}; };
CV_Assert(src.depth() <= CV_32F);
Caller* callers = multipass_callers; Caller* callers = multipass_callers;
if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS)) if (TargetArchs::builtWith(GLOBAL_ATOMICS) && DeviceInfo().supports(GLOBAL_ATOMICS))
callers = singlepass_callers; callers = singlepass_callers;
...@@ -353,7 +353,6 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) ...@@ -353,7 +353,6 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
Caller caller = callers[src.depth()]; Caller caller = callers[src.depth()];
if (!caller) CV_Error(CV_StsBadArg, "sqrSum: unsupported type");
double result[4]; double result[4];
caller(src, buf, result, src.channels()); caller(src, buf, result, src.channels());
...@@ -401,38 +400,44 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ...@@ -401,38 +400,44 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
typedef void (*Caller)(const DevMem2Db, double*, double*, PtrStepb); typedef void (*Caller)(const DevMem2Db, double*, double*, PtrStepb);
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb); typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, PtrStepb);
static Caller multipass_callers[7] = static Caller multipass_callers[] =
{ {
minMaxMultipassCaller<unsigned char>, minMaxMultipassCaller<char>, minMaxMultipassCaller<unsigned char>, minMaxMultipassCaller<char>,
minMaxMultipassCaller<unsigned short>, minMaxMultipassCaller<short>, minMaxMultipassCaller<unsigned short>, minMaxMultipassCaller<short>,
minMaxMultipassCaller<int>, minMaxMultipassCaller<float>, 0 minMaxMultipassCaller<int>, minMaxMultipassCaller<float>, 0
}; };
static Caller singlepass_callers[7] = static Caller singlepass_callers[] =
{ {
minMaxCaller<unsigned char>, minMaxCaller<char>, minMaxCaller<unsigned char>, minMaxCaller<char>,
minMaxCaller<unsigned short>, minMaxCaller<short>, minMaxCaller<unsigned short>, minMaxCaller<short>,
minMaxCaller<int>, minMaxCaller<float>, minMaxCaller<double> minMaxCaller<int>, minMaxCaller<float>, minMaxCaller<double>
}; };
static MaskedCaller masked_multipass_callers[7] = static MaskedCaller masked_multipass_callers[] =
{ {
minMaxMaskMultipassCaller<unsigned char>, minMaxMaskMultipassCaller<char>, minMaxMaskMultipassCaller<unsigned char>, minMaxMaskMultipassCaller<char>,
minMaxMaskMultipassCaller<unsigned short>, minMaxMaskMultipassCaller<short>, minMaxMaskMultipassCaller<unsigned short>, minMaxMaskMultipassCaller<short>,
minMaxMaskMultipassCaller<int>, minMaxMaskMultipassCaller<float>, 0 minMaxMaskMultipassCaller<int>, minMaxMaskMultipassCaller<float>, 0
}; };
static MaskedCaller masked_singlepass_callers[7] = static MaskedCaller masked_singlepass_callers[] =
{ {
minMaxMaskCaller<unsigned char>, minMaxMaskCaller<char>, minMaxMaskCaller<unsigned char>, minMaxMaskCaller<char>,
minMaxMaskCaller<unsigned short>, minMaxMaskCaller<short>, minMaxMaskCaller<unsigned short>, minMaxMaskCaller<short>,
minMaxMaskCaller<int>, minMaxMaskCaller<float>, minMaxMaskCaller<double> minMaxMaskCaller<int>, minMaxMaskCaller<float>, minMaxMaskCaller<double>
}; };
CV_Assert(src.depth() <= CV_64F);
CV_Assert(src.channels() == 1); CV_Assert(src.channels() == 1);
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size())); CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
double minVal_; if (!minVal) minVal = &minVal_; double minVal_; if (!minVal) minVal = &minVal_;
double maxVal_; if (!maxVal) maxVal = &maxVal_; double maxVal_; if (!maxVal) maxVal = &maxVal_;
...@@ -447,7 +452,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ...@@ -447,7 +452,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.type()]; Caller caller = callers[src.type()];
if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type"); CV_Assert(caller != 0);
caller(src, minVal, maxVal, buf); caller(src, minVal, maxVal, buf);
} }
else else
...@@ -457,7 +462,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ...@@ -457,7 +462,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
callers = masked_singlepass_callers; callers = masked_singlepass_callers;
MaskedCaller caller = callers[src.type()]; MaskedCaller caller = callers[src.type()];
if (!caller) CV_Error(CV_StsBadArg, "minMax: unsupported type"); CV_Assert(caller != 0);
caller(src, mask, minVal, maxVal, buf); caller(src, mask, minVal, maxVal, buf);
} }
} }
...@@ -508,38 +513,44 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ...@@ -508,38 +513,44 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
typedef void (*Caller)(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb); typedef void (*Caller)(const DevMem2Db, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb); typedef void (*MaskedCaller)(const DevMem2Db, const PtrStepb, double*, double*, int[2], int[2], PtrStepb, PtrStepb);
static Caller multipass_callers[7] = static Caller multipass_callers[] =
{ {
minMaxLocMultipassCaller<unsigned char>, minMaxLocMultipassCaller<char>, minMaxLocMultipassCaller<unsigned char>, minMaxLocMultipassCaller<char>,
minMaxLocMultipassCaller<unsigned short>, minMaxLocMultipassCaller<short>, minMaxLocMultipassCaller<unsigned short>, minMaxLocMultipassCaller<short>,
minMaxLocMultipassCaller<int>, minMaxLocMultipassCaller<float>, 0 minMaxLocMultipassCaller<int>, minMaxLocMultipassCaller<float>, 0
}; };
static Caller singlepass_callers[7] = static Caller singlepass_callers[] =
{ {
minMaxLocCaller<unsigned char>, minMaxLocCaller<char>, minMaxLocCaller<unsigned char>, minMaxLocCaller<char>,
minMaxLocCaller<unsigned short>, minMaxLocCaller<short>, minMaxLocCaller<unsigned short>, minMaxLocCaller<short>,
minMaxLocCaller<int>, minMaxLocCaller<float>, minMaxLocCaller<double> minMaxLocCaller<int>, minMaxLocCaller<float>, minMaxLocCaller<double>
}; };
static MaskedCaller masked_multipass_callers[7] = static MaskedCaller masked_multipass_callers[] =
{ {
minMaxLocMaskMultipassCaller<unsigned char>, minMaxLocMaskMultipassCaller<char>, minMaxLocMaskMultipassCaller<unsigned char>, minMaxLocMaskMultipassCaller<char>,
minMaxLocMaskMultipassCaller<unsigned short>, minMaxLocMaskMultipassCaller<short>, minMaxLocMaskMultipassCaller<unsigned short>, minMaxLocMaskMultipassCaller<short>,
minMaxLocMaskMultipassCaller<int>, minMaxLocMaskMultipassCaller<float>, 0 minMaxLocMaskMultipassCaller<int>, minMaxLocMaskMultipassCaller<float>, 0
}; };
static MaskedCaller masked_singlepass_callers[7] = static MaskedCaller masked_singlepass_callers[] =
{ {
minMaxLocMaskCaller<unsigned char>, minMaxLocMaskCaller<char>, minMaxLocMaskCaller<unsigned char>, minMaxLocMaskCaller<char>,
minMaxLocMaskCaller<unsigned short>, minMaxLocMaskCaller<short>, minMaxLocMaskCaller<unsigned short>, minMaxLocMaskCaller<short>,
minMaxLocMaskCaller<int>, minMaxLocMaskCaller<float>, minMaxLocMaskCaller<double> minMaxLocMaskCaller<int>, minMaxLocMaskCaller<float>, minMaxLocMaskCaller<double>
}; };
CV_Assert(src.depth() <= CV_64F);
CV_Assert(src.channels() == 1); CV_Assert(src.channels() == 1);
CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size())); CV_Assert(mask.empty() || (mask.type() == CV_8U && src.size() == mask.size()));
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
double minVal_; if (!minVal) minVal = &minVal_; double minVal_; if (!minVal) minVal = &minVal_;
double maxVal_; if (!maxVal) maxVal = &maxVal_; double maxVal_; if (!maxVal) maxVal = &maxVal_;
int minLoc_[2]; int minLoc_[2];
...@@ -558,7 +569,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ...@@ -558,7 +569,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.type()]; Caller caller = callers[src.type()];
if (!caller) CV_Error(CV_StsBadArg, "minMaxLoc: unsupported type"); CV_Assert(caller != 0);
caller(src, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf); caller(src, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf);
} }
else else
...@@ -568,7 +579,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ...@@ -568,7 +579,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
callers = masked_singlepass_callers; callers = masked_singlepass_callers;
MaskedCaller caller = callers[src.type()]; MaskedCaller caller = callers[src.type()];
if (!caller) CV_Error(CV_StsBadArg, "minMaxLoc: unsupported type"); CV_Assert(caller != 0);
caller(src, mask, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf); caller(src, mask, minVal, maxVal, minLoc_, maxLoc_, valBuf, locBuf);
} }
...@@ -622,8 +633,15 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) ...@@ -622,8 +633,15 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
countNonZeroCaller<unsigned short>, countNonZeroCaller<short>, countNonZeroCaller<unsigned short>, countNonZeroCaller<short>,
countNonZeroCaller<int>, countNonZeroCaller<float>, countNonZeroCaller<double> }; countNonZeroCaller<int>, countNonZeroCaller<float>, countNonZeroCaller<double> };
CV_Assert(src.depth() <= CV_64F);
CV_Assert(src.channels() == 1); CV_Assert(src.channels() == 1);
if (src.depth() == CV_64F)
{
if (!TargetArchs::builtWith(NATIVE_DOUBLE) || !DeviceInfo().supports(NATIVE_DOUBLE))
CV_Error(CV_StsUnsupportedFormat, "The device doesn't support double");
}
Size buf_size; Size buf_size;
getBufSizeRequired(src.cols, src.rows, buf_size.width, buf_size.height); getBufSizeRequired(src.cols, src.rows, buf_size.width, buf_size.height);
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
...@@ -633,7 +651,7 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) ...@@ -633,7 +651,7 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.type()]; Caller caller = callers[src.type()];
if (!caller) CV_Error(CV_StsBadArg, "countNonZero: unsupported type"); CV_Assert(caller != 0);
return caller(src, buf); return caller(src, buf);
} }
...@@ -719,6 +737,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int ...@@ -719,6 +737,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
}; };
const caller_t func = callers[src.depth()][dst.depth()]; const caller_t func = callers[src.depth()][dst.depth()];
if (!func) if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of input and output array formats"); CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of input and output array formats");
...@@ -781,6 +800,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int ...@@ -781,6 +800,7 @@ void cv::gpu::reduce(const GpuMat& src, GpuMat& dst, int dim, int reduceOp, int
}; };
const caller_t func = callers[src.depth()][dst.depth()]; const caller_t func = callers[src.depth()][dst.depth()];
if (!func) if (!func)
CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of input and output array formats"); CV_Error(CV_StsUnsupportedFormat, "Unsupported combination of input and output array formats");
......
...@@ -106,7 +106,7 @@ ...@@ -106,7 +106,7 @@
#error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0" #error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
#endif #endif
static inline void throw_nogpu() { CV_Error(CV_GpuNotSupported, "The called functionality is disabled for current build or platform"); } static inline void throw_nogpu() { CV_Error(CV_StsNotImplemented, "The called functionality is disabled for current build or platform"); }
#else /* defined(HAVE_CUDA) */ #else /* defined(HAVE_CUDA) */
......
...@@ -995,6 +995,20 @@ TEST_P(AbsDiff, Array) ...@@ -995,6 +995,20 @@ TEST_P(AbsDiff, Array)
cv::Mat src1 = randomMat(size, depth); cv::Mat src1 = randomMat(size, depth);
cv::Mat src2 = randomMat(size, depth); cv::Mat src2 = randomMat(size, depth);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::absdiff(loadMat(src1), loadMat(src2), dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi); cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::absdiff(loadMat(src1, useRoi), loadMat(src2, useRoi), dst); cv::gpu::absdiff(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
...@@ -1002,6 +1016,7 @@ TEST_P(AbsDiff, Array) ...@@ -1002,6 +1016,7 @@ TEST_P(AbsDiff, Array)
cv::absdiff(src1, src2, dst_gold); cv::absdiff(src1, src2, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0); EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
} }
TEST_P(AbsDiff, Scalar) TEST_P(AbsDiff, Scalar)
...@@ -1009,6 +1024,20 @@ TEST_P(AbsDiff, Scalar) ...@@ -1009,6 +1024,20 @@ TEST_P(AbsDiff, Scalar)
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
cv::Scalar val = randomScalar(0.0, 255.0); cv::Scalar val = randomScalar(0.0, 255.0);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::absdiff(loadMat(src), val, dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi); cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::absdiff(loadMat(src, useRoi), val, dst); cv::gpu::absdiff(loadMat(src, useRoi), val, dst);
...@@ -1016,6 +1045,7 @@ TEST_P(AbsDiff, Scalar) ...@@ -1016,6 +1045,7 @@ TEST_P(AbsDiff, Scalar)
cv::absdiff(src, val, dst_gold); cv::absdiff(src, val, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, depth <= CV_32F ? 1.0 : 1e-5); EXPECT_MAT_NEAR(dst_gold, dst, depth <= CV_32F ? 1.0 : 1e-5);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, AbsDiff, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, AbsDiff, testing::Combine(
...@@ -1243,6 +1273,40 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine( ...@@ -1243,6 +1273,40 @@ INSTANTIATE_TEST_CASE_P(GPU_Core, Log, testing::Combine(
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
// Exp // Exp
template <typename T> void expImpl(const cv::Mat& src, cv::Mat& dst)
{
dst.create(src.size(), src.type());
for (int y = 0; y < src.rows; ++y)
{
for (int x = 0; x < src.cols; ++x)
dst.at<T>(y, x) = cv::saturate_cast<T>(static_cast<int>(std::exp(static_cast<float>(src.at<T>(y, x)))));
}
}
void expImpl_float(const cv::Mat& src, cv::Mat& dst)
{
dst.create(src.size(), src.type());
for (int y = 0; y < src.rows; ++y)
{
for (int x = 0; x < src.cols; ++x)
dst.at<float>(y, x) = std::exp(static_cast<float>(src.at<float>(y, x)));
}
}
void expGold(const cv::Mat& src, cv::Mat& dst)
{
typedef void (*func_t)(const cv::Mat& src, cv::Mat& dst);
const func_t funcs[] =
{
expImpl<uchar>, expImpl<schar>, expImpl<ushort>, expImpl<short>,
expImpl<int>, expImpl_float
};
funcs[src.depth()](src, dst);
}
PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi) PARAM_TEST_CASE(Exp, cv::gpu::DeviceInfo, cv::Size, MatType, UseRoi)
{ {
cv::gpu::DeviceInfo devInfo; cv::gpu::DeviceInfo devInfo;
...@@ -1269,7 +1333,7 @@ TEST_P(Exp, Accuracy) ...@@ -1269,7 +1333,7 @@ TEST_P(Exp, Accuracy)
cv::gpu::exp(loadMat(src, useRoi), dst); cv::gpu::exp(loadMat(src, useRoi), dst);
cv::Mat dst_gold; cv::Mat dst_gold;
cv::exp(src, dst_gold); expGold(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 1e-2); EXPECT_MAT_NEAR(dst_gold, dst, 1e-2);
} }
...@@ -1277,7 +1341,10 @@ TEST_P(Exp, Accuracy) ...@@ -1277,7 +1341,10 @@ TEST_P(Exp, Accuracy)
INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Exp, testing::Combine(
ALL_DEVICES, ALL_DEVICES,
DIFFERENT_SIZES, DIFFERENT_SIZES,
testing::Values(MatType(CV_32FC1)), testing::Values(MatType(CV_8UC1),
MatType(CV_16UC1),
MatType(CV_16SC1),
MatType(CV_32FC1)),
WHOLE_SUBMAT)); WHOLE_SUBMAT));
//////////////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////////////
...@@ -1311,6 +1378,20 @@ TEST_P(Compare, Accuracy) ...@@ -1311,6 +1378,20 @@ TEST_P(Compare, Accuracy)
cv::Mat src1 = randomMat(size, depth); cv::Mat src1 = randomMat(size, depth);
cv::Mat src2 = randomMat(size, depth); cv::Mat src2 = randomMat(size, depth);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::compare(loadMat(src1), loadMat(src2), dst, cmp_code);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, CV_8UC1, useRoi); cv::gpu::GpuMat dst = createMat(size, CV_8UC1, useRoi);
cv::gpu::compare(loadMat(src1, useRoi), loadMat(src2, useRoi), dst, cmp_code); cv::gpu::compare(loadMat(src1, useRoi), loadMat(src2, useRoi), dst, cmp_code);
...@@ -1318,6 +1399,7 @@ TEST_P(Compare, Accuracy) ...@@ -1318,6 +1399,7 @@ TEST_P(Compare, Accuracy)
cv::compare(src1, src2, dst_gold, cmp_code); cv::compare(src1, src2, dst_gold, cmp_code);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0); EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Compare, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Compare, testing::Combine(
...@@ -1635,17 +1717,60 @@ PARAM_TEST_CASE(Min, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi) ...@@ -1635,17 +1717,60 @@ PARAM_TEST_CASE(Min, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
} }
}; };
TEST_P(Min, Accuracy) TEST_P(Min, Array)
{ {
cv::Mat src1 = randomMat(size, depth); cv::Mat src1 = randomMat(size, depth);
cv::Mat src2 = randomMat(size, depth); cv::Mat src2 = randomMat(size, depth);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::min(loadMat(src1), loadMat(src2), dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi); cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::min(loadMat(src1, useRoi), loadMat(src2, useRoi), dst); cv::gpu::min(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
cv::Mat dst_gold = cv::min(src1, src2); cv::Mat dst_gold = cv::min(src1, src2);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0); EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
}
TEST_P(Min, Scalar)
{
cv::Mat src = randomMat(size, depth);
double val = randomDouble(0.0, 255.0);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::min(loadMat(src), val, dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::min(loadMat(src, useRoi), val, dst);
cv::Mat dst_gold = cv::min(src, val);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Min, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Min, testing::Combine(
...@@ -1675,17 +1800,60 @@ PARAM_TEST_CASE(Max, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi) ...@@ -1675,17 +1800,60 @@ PARAM_TEST_CASE(Max, cv::gpu::DeviceInfo, cv::Size, MatDepth, UseRoi)
} }
}; };
TEST_P(Max, Accuracy) TEST_P(Max, Array)
{ {
cv::Mat src1 = randomMat(size, depth); cv::Mat src1 = randomMat(size, depth);
cv::Mat src2 = randomMat(size, depth); cv::Mat src2 = randomMat(size, depth);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::max(loadMat(src1), loadMat(src2), dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi); cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::max(loadMat(src1, useRoi), loadMat(src2, useRoi), dst); cv::gpu::max(loadMat(src1, useRoi), loadMat(src2, useRoi), dst);
cv::Mat dst_gold = cv::max(src1, src2); cv::Mat dst_gold = cv::max(src1, src2);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0); EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
}
TEST_P(Max, Scalar)
{
cv::Mat src = randomMat(size, depth);
double val = randomDouble(0.0, 255.0);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::max(loadMat(src), val, dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::max(loadMat(src, useRoi), val, dst);
cv::Mat dst_gold = cv::max(src, val);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Max, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Max, testing::Combine(
...@@ -1723,6 +1891,20 @@ TEST_P(Pow, Accuracy) ...@@ -1723,6 +1891,20 @@ TEST_P(Pow, Accuracy)
if (src.depth() < CV_32F) if (src.depth() < CV_32F)
power = static_cast<int>(power); power = static_cast<int>(power);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::pow(loadMat(src), power, dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, depth, useRoi); cv::gpu::GpuMat dst = createMat(size, depth, useRoi);
cv::gpu::pow(loadMat(src, useRoi), power, dst); cv::gpu::pow(loadMat(src, useRoi), power, dst);
...@@ -1730,6 +1912,7 @@ TEST_P(Pow, Accuracy) ...@@ -1730,6 +1912,7 @@ TEST_P(Pow, Accuracy)
cv::pow(src, power, dst_gold); cv::pow(src, power, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 0.0 : 1e-1); EXPECT_MAT_NEAR(dst_gold, dst, depth < CV_32F ? 0.0 : 1e-1);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Pow, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Pow, testing::Combine(
...@@ -1750,7 +1933,6 @@ PARAM_TEST_CASE(AddWeighted, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth, ...@@ -1750,7 +1933,6 @@ PARAM_TEST_CASE(AddWeighted, cv::gpu::DeviceInfo, cv::Size, MatDepth, MatDepth,
int dst_depth; int dst_depth;
bool useRoi; bool useRoi;
virtual void SetUp() virtual void SetUp()
{ {
devInfo = GET_PARAM(0); devInfo = GET_PARAM(0);
...@@ -1772,6 +1954,20 @@ TEST_P(AddWeighted, Accuracy) ...@@ -1772,6 +1954,20 @@ TEST_P(AddWeighted, Accuracy)
double beta = randomDouble(-10.0, 10.0); double beta = randomDouble(-10.0, 10.0);
double gamma = randomDouble(-10.0, 10.0); double gamma = randomDouble(-10.0, 10.0);
if ((depth1 == CV_64F || depth2 == CV_64F || dst_depth == CV_64F) && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::addWeighted(loadMat(src1), alpha, loadMat(src2), beta, gamma, dst, dst_depth);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, dst_depth, useRoi); cv::gpu::GpuMat dst = createMat(size, dst_depth, useRoi);
cv::gpu::addWeighted(loadMat(src1, useRoi), alpha, loadMat(src2, useRoi), beta, gamma, dst, dst_depth); cv::gpu::addWeighted(loadMat(src1, useRoi), alpha, loadMat(src2, useRoi), beta, gamma, dst, dst_depth);
...@@ -1779,6 +1975,7 @@ TEST_P(AddWeighted, Accuracy) ...@@ -1779,6 +1975,7 @@ TEST_P(AddWeighted, Accuracy)
cv::addWeighted(src1, alpha, src2, beta, gamma, dst_gold, dst_depth); cv::addWeighted(src1, alpha, src2, beta, gamma, dst_gold, dst_depth);
EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 1.0 : 1e-12); EXPECT_MAT_NEAR(dst_gold, dst, dst_depth < CV_32F ? 1.0 : 1e-12);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, AddWeighted, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, AddWeighted, testing::Combine(
...@@ -1823,6 +2020,43 @@ TEST_P(GEMM, Accuracy) ...@@ -1823,6 +2020,43 @@ TEST_P(GEMM, Accuracy)
double alpha = randomDouble(-10.0, 10.0); double alpha = randomDouble(-10.0, 10.0);
double beta = randomDouble(-10.0, 10.0); double beta = randomDouble(-10.0, 10.0);
#ifndef HAVE_CUBLAS
try
{
cv::gpu::GpuMat dst;
cv::gpu::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
#else
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else if (type == CV_64FC2 && flags != 0)
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsNotImplemented, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(size, type, useRoi); cv::gpu::GpuMat dst = createMat(size, type, useRoi);
cv::gpu::gemm(loadMat(src1, useRoi), loadMat(src2, useRoi), alpha, loadMat(src3, useRoi), beta, dst, flags); cv::gpu::gemm(loadMat(src1, useRoi), loadMat(src2, useRoi), alpha, loadMat(src3, useRoi), beta, dst, flags);
...@@ -1830,6 +2064,8 @@ TEST_P(GEMM, Accuracy) ...@@ -1830,6 +2064,8 @@ TEST_P(GEMM, Accuracy)
cv::gemm(src1, src2, alpha, src3, beta, dst_gold, flags); cv::gemm(src1, src2, alpha, src3, beta, dst_gold, flags);
EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1e-10); EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1e-10);
}
#endif
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, GEMM, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, GEMM, testing::Combine(
...@@ -1864,6 +2100,20 @@ TEST_P(Transpose, Accuracy) ...@@ -1864,6 +2100,20 @@ TEST_P(Transpose, Accuracy)
{ {
cv::Mat src = randomMat(size, type); cv::Mat src = randomMat(size, type);
if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::GpuMat dst;
cv::gpu::transpose(loadMat(src), dst);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
cv::gpu::GpuMat dst = createMat(cv::Size(size.height, size.width), type, useRoi); cv::gpu::GpuMat dst = createMat(cv::Size(size.height, size.width), type, useRoi);
cv::gpu::transpose(loadMat(src, useRoi), dst); cv::gpu::transpose(loadMat(src, useRoi), dst);
...@@ -1871,6 +2121,7 @@ TEST_P(Transpose, Accuracy) ...@@ -1871,6 +2121,7 @@ TEST_P(Transpose, Accuracy)
cv::transpose(src, dst_gold); cv::transpose(src, dst_gold);
EXPECT_MAT_NEAR(dst_gold, dst, 0.0); EXPECT_MAT_NEAR(dst_gold, dst, 0.0);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, Transpose, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, Transpose, testing::Combine(
...@@ -2498,6 +2749,20 @@ TEST_P(MinMax, WithoutMask) ...@@ -2498,6 +2749,20 @@ TEST_P(MinMax, WithoutMask)
{ {
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
double minVal, maxVal;
cv::gpu::minMax(loadMat(src), &minVal, &maxVal);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
double minVal, maxVal; double minVal, maxVal;
cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal); cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal);
...@@ -2506,6 +2771,7 @@ TEST_P(MinMax, WithoutMask) ...@@ -2506,6 +2771,7 @@ TEST_P(MinMax, WithoutMask)
EXPECT_DOUBLE_EQ(minVal_gold, minVal); EXPECT_DOUBLE_EQ(minVal_gold, minVal);
EXPECT_DOUBLE_EQ(maxVal_gold, maxVal); EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
}
} }
TEST_P(MinMax, WithMask) TEST_P(MinMax, WithMask)
...@@ -2513,6 +2779,20 @@ TEST_P(MinMax, WithMask) ...@@ -2513,6 +2779,20 @@ TEST_P(MinMax, WithMask)
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
double minVal, maxVal;
cv::gpu::minMax(loadMat(src), &minVal, &maxVal, loadMat(mask));
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
double minVal, maxVal; double minVal, maxVal;
cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal, loadMat(mask, useRoi)); cv::gpu::minMax(loadMat(src, useRoi), &minVal, &maxVal, loadMat(mask, useRoi));
...@@ -2521,13 +2801,38 @@ TEST_P(MinMax, WithMask) ...@@ -2521,13 +2801,38 @@ TEST_P(MinMax, WithMask)
EXPECT_DOUBLE_EQ(minVal_gold, minVal); EXPECT_DOUBLE_EQ(minVal_gold, minVal);
EXPECT_DOUBLE_EQ(maxVal_gold, maxVal); EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
}
} }
TEST_P(MinMax, NullPtr) TEST_P(MinMax, NullPtr)
{ {
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
cv::gpu::minMax(loadMat(src, useRoi), 0, 0); if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
double minVal, maxVal;
cv::gpu::minMax(loadMat(src), &minVal, 0);
cv::gpu::minMax(loadMat(src), 0, &maxVal);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
double minVal, maxVal;
cv::gpu::minMax(loadMat(src, useRoi), &minVal, 0);
cv::gpu::minMax(loadMat(src, useRoi), 0, &maxVal);
double minVal_gold, maxVal_gold;
minMaxLocGold(src, &minVal_gold, &maxVal_gold, 0, 0);
EXPECT_DOUBLE_EQ(minVal_gold, minVal);
EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, MinMax, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, MinMax, testing::Combine(
...@@ -2585,6 +2890,21 @@ TEST_P(MinMaxLoc, WithoutMask) ...@@ -2585,6 +2890,21 @@ TEST_P(MinMaxLoc, WithoutMask)
{ {
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
double minVal, maxVal;
cv::Point minLoc, maxLoc;
cv::gpu::minMaxLoc(loadMat(src), &minVal, &maxVal, &minLoc, &maxLoc);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
double minVal, maxVal; double minVal, maxVal;
cv::Point minLoc, maxLoc; cv::Point minLoc, maxLoc;
cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc); cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc);
...@@ -2598,6 +2918,7 @@ TEST_P(MinMaxLoc, WithoutMask) ...@@ -2598,6 +2918,7 @@ TEST_P(MinMaxLoc, WithoutMask)
expectEqual(src, minLoc_gold, minLoc); expectEqual(src, minLoc_gold, minLoc);
expectEqual(src, maxLoc_gold, maxLoc); expectEqual(src, maxLoc_gold, maxLoc);
}
} }
TEST_P(MinMaxLoc, WithMask) TEST_P(MinMaxLoc, WithMask)
...@@ -2605,6 +2926,21 @@ TEST_P(MinMaxLoc, WithMask) ...@@ -2605,6 +2926,21 @@ TEST_P(MinMaxLoc, WithMask)
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0); cv::Mat mask = randomMat(size, CV_8UC1, 0.0, 2.0);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
double minVal, maxVal;
cv::Point minLoc, maxLoc;
cv::gpu::minMaxLoc(loadMat(src), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask));
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
double minVal, maxVal; double minVal, maxVal;
cv::Point minLoc, maxLoc; cv::Point minLoc, maxLoc;
cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask, useRoi)); cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, &maxVal, &minLoc, &maxLoc, loadMat(mask, useRoi));
...@@ -2618,13 +2954,48 @@ TEST_P(MinMaxLoc, WithMask) ...@@ -2618,13 +2954,48 @@ TEST_P(MinMaxLoc, WithMask)
expectEqual(src, minLoc_gold, minLoc); expectEqual(src, minLoc_gold, minLoc);
expectEqual(src, maxLoc_gold, maxLoc); expectEqual(src, maxLoc_gold, maxLoc);
}
} }
TEST_P(MinMaxLoc, NullPtr) TEST_P(MinMaxLoc, NullPtr)
{ {
cv::Mat src = randomMat(size, depth); cv::Mat src = randomMat(size, depth);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, 0); if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
double minVal, maxVal;
cv::Point minLoc, maxLoc;
cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, 0, 0, 0);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, &maxVal, 0, 0);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, &minLoc, 0);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, &maxLoc);
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
double minVal, maxVal;
cv::Point minLoc, maxLoc;
cv::gpu::minMaxLoc(loadMat(src, useRoi), &minVal, 0, 0, 0);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, &maxVal, 0, 0);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, &minLoc, 0);
cv::gpu::minMaxLoc(loadMat(src, useRoi), 0, 0, 0, &maxLoc);
double minVal_gold, maxVal_gold;
cv::Point minLoc_gold, maxLoc_gold;
minMaxLocGold(src, &minVal_gold, &maxVal_gold, &minLoc_gold, &maxLoc_gold);
EXPECT_DOUBLE_EQ(minVal_gold, minVal);
EXPECT_DOUBLE_EQ(maxVal_gold, maxVal);
expectEqual(src, minLoc_gold, minLoc);
expectEqual(src, maxLoc_gold, maxLoc);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, MinMaxLoc, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, MinMaxLoc, testing::Combine(
...@@ -2661,12 +3032,25 @@ TEST_P(CountNonZero, Accuracy) ...@@ -2661,12 +3032,25 @@ TEST_P(CountNonZero, Accuracy)
cv::Mat src; cv::Mat src;
srcBase.convertTo(src, depth); srcBase.convertTo(src, depth);
if (depth == CV_64F && !supportFeature(devInfo, cv::gpu::NATIVE_DOUBLE))
{
try
{
cv::gpu::countNonZero(loadMat(src));
}
catch (const cv::Exception& e)
{
ASSERT_EQ(CV_StsUnsupportedFormat, e.code);
}
}
else
{
int val = cv::gpu::countNonZero(loadMat(src, useRoi)); int val = cv::gpu::countNonZero(loadMat(src, useRoi));
int val_gold = cv::countNonZero(src); int val_gold = cv::countNonZero(src);
ASSERT_EQ(val_gold, val); ASSERT_EQ(val_gold, val);
}
} }
INSTANTIATE_TEST_CASE_P(GPU_Core, CountNonZero, testing::Combine( INSTANTIATE_TEST_CASE_P(GPU_Core, CountNonZero, testing::Combine(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment