Commit 7454189c authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

use new getInputMat/getOutputMat/syncOutput methods in cudaarithm routines

parent 3d0410c1
...@@ -169,9 +169,9 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray ...@@ -169,9 +169,9 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
#else #else
// CUBLAS works with column-major matrices // CUBLAS works with column-major matrices
GpuMat src1 = _src1.getGpuMat(); GpuMat src1 = getInputMat(_src1, stream);
GpuMat src2 = _src2.getGpuMat(); GpuMat src2 = getInputMat(_src2, stream);
GpuMat src3 = _src3.getGpuMat(); GpuMat src3 = getInputMat(_src3, stream);
CV_Assert( src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2 ); CV_Assert( src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2 );
CV_Assert( src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()) ); CV_Assert( src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()) );
...@@ -200,8 +200,7 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray ...@@ -200,8 +200,7 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
CV_Assert( src1Size.width == src2Size.height ); CV_Assert( src1Size.width == src2Size.height );
CV_Assert( src3.empty() || src3Size == dstSize ); CV_Assert( src3.empty() || src3Size == dstSize );
_dst.create(dstSize, src1.type()); GpuMat dst = getOutputMat(_dst, dstSize, src1.type(), stream);
GpuMat dst = _dst.getGpuMat();
if (beta != 0) if (beta != 0)
{ {
...@@ -281,6 +280,8 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray ...@@ -281,6 +280,8 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
} }
cublasSafeCall( cublasDestroy_v2(handle) ); cublasSafeCall( cublasDestroy_v2(handle) );
syncOutput(dst, _dst, stream);
#endif #endif
} }
...@@ -297,7 +298,7 @@ void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, ...@@ -297,7 +298,7 @@ void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags,
(void) stream; (void) stream;
throw_no_cuda(); throw_no_cuda();
#else #else
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 ); CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 );
...@@ -462,16 +463,15 @@ namespace ...@@ -462,16 +463,15 @@ namespace
void ConvolutionImpl::convolve(InputArray _image, InputArray _templ, OutputArray _result, bool ccorr, Stream& _stream) void ConvolutionImpl::convolve(InputArray _image, InputArray _templ, OutputArray _result, bool ccorr, Stream& _stream)
{ {
GpuMat image = _image.getGpuMat(); GpuMat image = getInputMat(_image, _stream);
GpuMat templ = _templ.getGpuMat(); GpuMat templ = getInputMat(_templ, _stream);
CV_Assert( image.type() == CV_32FC1 ); CV_Assert( image.type() == CV_32FC1 );
CV_Assert( templ.type() == CV_32FC1 ); CV_Assert( templ.type() == CV_32FC1 );
create(image.size(), templ.size()); create(image.size(), templ.size());
_result.create(result_size, CV_32FC1); GpuMat result = getOutputMat(_result, result_size, CV_32FC1, _stream);
GpuMat result = _result.getGpuMat();
cudaStream_t stream = StreamAccessor::getStream(_stream); cudaStream_t stream = StreamAccessor::getStream(_stream);
...@@ -520,6 +520,8 @@ namespace ...@@ -520,6 +520,8 @@ namespace
cufftSafeCall( cufftDestroy(planR2C) ); cufftSafeCall( cufftDestroy(planR2C) );
cufftSafeCall( cufftDestroy(planC2R) ); cufftSafeCall( cufftDestroy(planC2R) );
syncOutput(result, _result, _stream);
} }
} }
......
...@@ -119,15 +119,17 @@ void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& str ...@@ -119,15 +119,17 @@ void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& str
{NppMirror<CV_32F, nppiMirror_32f_C1R>::call, 0, NppMirror<CV_32F, nppiMirror_32f_C3R>::call, NppMirror<CV_32F, nppiMirror_32f_C4R>::call} {NppMirror<CV_32F, nppiMirror_32f_C1R>::call, 0, NppMirror<CV_32F, nppiMirror_32f_C3R>::call, NppMirror<CV_32F, nppiMirror_32f_C4R>::call}
}; };
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F); CV_Assert(src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S || src.depth() == CV_32F);
CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4); CV_Assert(src.channels() == 1 || src.channels() == 3 || src.channels() == 4);
_dst.create(src.size(), src.type()); _dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat(); GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream)); funcs[src.depth()][src.channels() - 1](src, dst, flipCode, StreamAccessor::getStream(stream));
syncOutput(dst, _dst, stream);
} }
#endif /* !defined (HAVE_CUDA) */ #endif /* !defined (HAVE_CUDA) */
...@@ -50,7 +50,10 @@ ...@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp" #include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp" #include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev; using namespace cv::cudev;
namespace namespace
...@@ -63,7 +66,7 @@ namespace ...@@ -63,7 +66,7 @@ namespace
__device__ __forceinline__ D operator ()(T1 a, T2 b) const __device__ __forceinline__ D operator ()(T1 a, T2 b) const
{ {
return saturate_cast<D>(a * alpha + b * beta + gamma); return cudev::saturate_cast<D>(a * alpha + b * beta + gamma);
} }
}; };
...@@ -555,8 +558,8 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou ...@@ -555,8 +558,8 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
} }
}; };
GpuMat src1 = _src1.getGpuMat(); GpuMat src1 = getInputMat(_src1, stream);
GpuMat src2 = _src2.getGpuMat(); GpuMat src2 = getInputMat(_src2, stream);
int sdepth1 = src1.depth(); int sdepth1 = src1.depth();
int sdepth2 = src2.depth(); int sdepth2 = src2.depth();
...@@ -564,19 +567,18 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou ...@@ -564,19 +567,18 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
ddepth = ddepth >= 0 ? CV_MAT_DEPTH(ddepth) : std::max(sdepth1, sdepth2); ddepth = ddepth >= 0 ? CV_MAT_DEPTH(ddepth) : std::max(sdepth1, sdepth2);
const int cn = src1.channels(); const int cn = src1.channels();
CV_DbgAssert( src2.size() == src1.size() && src2.channels() == cn ); CV_Assert( src2.size() == src1.size() && src2.channels() == cn );
CV_DbgAssert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F ); CV_Assert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F );
_dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn)); GpuMat dst = getOutputMat(_dst, src1.size(), CV_MAKE_TYPE(ddepth, cn), stream);
GpuMat dst = _dst.getGpuMat();
GpuMat src1_ = src1.reshape(1); GpuMat src1_single = src1.reshape(1);
GpuMat src2_ = src2.reshape(1); GpuMat src2_single = src2.reshape(1);
GpuMat dst_ = dst.reshape(1); GpuMat dst_single = dst.reshape(1);
if (sdepth1 > sdepth2) if (sdepth1 > sdepth2)
{ {
src1_.swap(src2_); src1_single.swap(src2_single);
std::swap(alpha, beta); std::swap(alpha, beta);
std::swap(sdepth1, sdepth2); std::swap(sdepth1, sdepth2);
} }
...@@ -586,7 +588,9 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou ...@@ -586,7 +588,9 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
if (!func) if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1_, alpha, src2_, beta, gamma, dst_, stream); func(src1_single, alpha, src2_single, beta, gamma, dst_single, stream);
syncOutput(dst, _dst, stream);
} }
#endif #endif
...@@ -50,7 +50,10 @@ ...@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp" #include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp" #include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev; using namespace cv::cudev;
void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op); void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
...@@ -60,16 +63,15 @@ void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& m ...@@ -60,16 +63,15 @@ void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& m
void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream) void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
{ {
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
GpuMat mask = _mask.getGpuMat(); GpuMat mask = getInputMat(_mask, stream);
const int depth = src.depth(); const int depth = src.depth();
CV_DbgAssert( depth <= CV_32F ); CV_DbgAssert( depth <= CV_32F );
CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) ); CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
_dst.create(src.size(), src.type()); GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
GpuMat dst = _dst.getGpuMat();
if (mask.empty()) if (mask.empty())
{ {
...@@ -125,6 +127,8 @@ void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, ...@@ -125,6 +127,8 @@ void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask,
gridTransformUnary(vsrc, vdst, bit_not<uchar>(), singleMaskChannels(globPtr<uchar>(mask), src.channels()), stream); gridTransformUnary(vsrc, vdst, bit_not<uchar>(), singleMaskChannels(globPtr<uchar>(mask), src.channels()), stream);
} }
} }
syncOutput(dst, _dst, stream);
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
......
...@@ -50,7 +50,10 @@ ...@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp" #include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp" #include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev; using namespace cv::cudev;
namespace namespace
...@@ -133,7 +136,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo ...@@ -133,7 +136,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
{ copyMakeBorderImpl<float , 1> , 0 /*copyMakeBorderImpl<float , 2>*/, copyMakeBorderImpl<float , 3> , copyMakeBorderImpl<float ,4> } { copyMakeBorderImpl<float , 1> , 0 /*copyMakeBorderImpl<float , 2>*/, copyMakeBorderImpl<float , 3> , copyMakeBorderImpl<float ,4> }
}; };
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
const int depth = src.depth(); const int depth = src.depth();
const int cn = src.channels(); const int cn = src.channels();
...@@ -141,8 +144,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo ...@@ -141,8 +144,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
CV_Assert( depth <= CV_32F && cn <= 4 ); CV_Assert( depth <= CV_32F && cn <= 4 );
CV_Assert( borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP ); CV_Assert( borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP );
_dst.create(src.rows + top + bottom, src.cols + left + right, src.type()); GpuMat dst = getOutputMat(_dst, src.rows + top + bottom, src.cols + left + right, src.type(), stream);
GpuMat dst = _dst.getGpuMat();
const func_t func = funcs[depth][cn - 1]; const func_t func = funcs[depth][cn - 1];
...@@ -150,6 +152,8 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo ...@@ -150,6 +152,8 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types"); CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src, dst, top, left, borderType, value, stream); func(src, dst, top, left, borderType, value, stream);
syncOutput(dst, _dst, stream);
} }
#endif #endif
...@@ -50,8 +50,10 @@ ...@@ -50,8 +50,10 @@
#include "opencv2/cudaarithm.hpp" #include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp" #include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv; using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev; using namespace cv::cudev;
namespace namespace
...@@ -165,7 +167,7 @@ namespace ...@@ -165,7 +167,7 @@ namespace
void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& stream) void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& stream)
{ {
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
const int cn = src.channels(); const int cn = src.channels();
const int lut_cn = d_lut.channels(); const int lut_cn = d_lut.channels();
...@@ -173,8 +175,7 @@ namespace ...@@ -173,8 +175,7 @@ namespace
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 ); CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 );
CV_Assert( lut_cn == 1 || lut_cn == cn ); CV_Assert( lut_cn == 1 || lut_cn == cn );
_dst.create(src.size(), src.type()); GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
GpuMat dst = _dst.getGpuMat();
if (lut_cn == 1) if (lut_cn == 1)
{ {
...@@ -196,6 +197,8 @@ namespace ...@@ -196,6 +197,8 @@ namespace
dst3.assign(lut_(src3, tbl), stream); dst3.assign(lut_(src3, tbl), stream);
} }
syncOutput(dst, _dst, stream);
} }
} }
......
...@@ -50,7 +50,10 @@ ...@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp" #include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp" #include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev; using namespace cv::cudev;
namespace namespace
...@@ -92,16 +95,15 @@ void cv::cuda::abs(InputArray _src, OutputArray _dst, Stream& stream) ...@@ -92,16 +95,15 @@ void cv::cuda::abs(InputArray _src, OutputArray _dst, Stream& stream)
absMat<double> absMat<double>
}; };
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
const int depth = src.depth(); CV_Assert( src.depth() <= CV_64F );
CV_DbgAssert( depth <= CV_64F ); GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
_dst.create(src.size(), src.type()); funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
GpuMat dst = _dst.getGpuMat();
funcs[depth](src.reshape(1), dst.reshape(1), stream); syncOutput(dst, _dst, stream);
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
...@@ -113,7 +115,7 @@ namespace ...@@ -113,7 +115,7 @@ namespace
{ {
__device__ __forceinline__ T operator ()(T x) const __device__ __forceinline__ T operator ()(T x) const
{ {
return saturate_cast<T>(x * x); return cudev::saturate_cast<T>(x * x);
} }
}; };
...@@ -138,16 +140,15 @@ void cv::cuda::sqr(InputArray _src, OutputArray _dst, Stream& stream) ...@@ -138,16 +140,15 @@ void cv::cuda::sqr(InputArray _src, OutputArray _dst, Stream& stream)
sqrMat<double> sqrMat<double>
}; };
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
const int depth = src.depth(); CV_Assert( src.depth() <= CV_64F );
CV_DbgAssert( depth <= CV_64F ); GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
_dst.create(src.size(), src.type()); funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
GpuMat dst = _dst.getGpuMat();
funcs[depth](src.reshape(1), dst.reshape(1), stream); syncOutput(dst, _dst, stream);
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
...@@ -176,16 +177,15 @@ void cv::cuda::sqrt(InputArray _src, OutputArray _dst, Stream& stream) ...@@ -176,16 +177,15 @@ void cv::cuda::sqrt(InputArray _src, OutputArray _dst, Stream& stream)
sqrtMat<double> sqrtMat<double>
}; };
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
const int depth = src.depth(); CV_Assert( src.depth() <= CV_64F );
CV_DbgAssert( depth <= CV_64F ); GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
_dst.create(src.size(), src.type()); funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
GpuMat dst = _dst.getGpuMat();
funcs[depth](src.reshape(1), dst.reshape(1), stream); syncOutput(dst, _dst, stream);
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
...@@ -198,7 +198,7 @@ namespace ...@@ -198,7 +198,7 @@ namespace
__device__ __forceinline__ T operator ()(T x) const __device__ __forceinline__ T operator ()(T x) const
{ {
exp_func<T> f; exp_func<T> f;
return saturate_cast<T>(f(x)); return cudev::saturate_cast<T>(f(x));
} }
}; };
...@@ -223,16 +223,15 @@ void cv::cuda::exp(InputArray _src, OutputArray _dst, Stream& stream) ...@@ -223,16 +223,15 @@ void cv::cuda::exp(InputArray _src, OutputArray _dst, Stream& stream)
expMat<double> expMat<double>
}; };
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
const int depth = src.depth(); CV_Assert( src.depth() <= CV_64F );
CV_DbgAssert( depth <= CV_64F ); GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
_dst.create(src.size(), src.type()); funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
GpuMat dst = _dst.getGpuMat();
funcs[depth](src.reshape(1), dst.reshape(1), stream); syncOutput(dst, _dst, stream);
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
...@@ -261,16 +260,15 @@ void cv::cuda::log(InputArray _src, OutputArray _dst, Stream& stream) ...@@ -261,16 +260,15 @@ void cv::cuda::log(InputArray _src, OutputArray _dst, Stream& stream)
logMat<double> logMat<double>
}; };
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
const int depth = src.depth(); CV_Assert( src.depth() <= CV_64F );
CV_DbgAssert( depth <= CV_64F ); GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
_dst.create(src.size(), src.type()); funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
GpuMat dst = _dst.getGpuMat();
funcs[depth](src.reshape(1), dst.reshape(1), stream); syncOutput(dst, _dst, stream);
} }
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
...@@ -284,7 +282,7 @@ namespace ...@@ -284,7 +282,7 @@ namespace
__device__ __forceinline__ T operator()(T e) const __device__ __forceinline__ T operator()(T e) const
{ {
return saturate_cast<T>(__powf((float)e, power)); return cudev::saturate_cast<T>(__powf((float)e, power));
} }
}; };
template<typename T> struct PowOp<T, true> : unary_function<T, T> template<typename T> struct PowOp<T, true> : unary_function<T, T>
...@@ -293,7 +291,7 @@ namespace ...@@ -293,7 +291,7 @@ namespace
__device__ __forceinline__ T operator()(T e) const __device__ __forceinline__ T operator()(T e) const
{ {
T res = saturate_cast<T>(__powf((float)e, power)); T res = cudev::saturate_cast<T>(__powf((float)e, power));
if ((e < 0) && (1 & static_cast<int>(power))) if ((e < 0) && (1 & static_cast<int>(power)))
res *= -1; res *= -1;
...@@ -344,16 +342,15 @@ void cv::cuda::pow(InputArray _src, double power, OutputArray _dst, Stream& stre ...@@ -344,16 +342,15 @@ void cv::cuda::pow(InputArray _src, double power, OutputArray _dst, Stream& stre
powMat<double> powMat<double>
}; };
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
const int depth = src.depth(); CV_Assert( src.depth() <= CV_64F );
CV_DbgAssert(depth <= CV_64F); GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
_dst.create(src.size(), src.type()); funcs[src.depth()](src.reshape(1), power, dst.reshape(1), stream);
GpuMat dst = _dst.getGpuMat();
funcs[depth](src.reshape(1), power, dst.reshape(1), stream); syncOutput(dst, _dst, stream);
} }
#endif #endif
...@@ -50,7 +50,10 @@ ...@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp" #include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp" #include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev; using namespace cv::cudev;
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
...@@ -120,33 +123,33 @@ void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst ...@@ -120,33 +123,33 @@ void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst
{ {
(void) flags; (void) flags;
GpuMat src1 = _src1.getGpuMat(); GpuMat src1 = getInputMat(_src1, stream);
GpuMat src2 = _src2.getGpuMat(); GpuMat src2 = getInputMat(_src2, stream);
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2 ); CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2 );
CV_Assert( src1.size() == src2.size() ); CV_Assert( src1.size() == src2.size() );
_dst.create(src1.size(), CV_32FC2); GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream);
GpuMat dst = _dst.getGpuMat();
if (conjB) if (conjB)
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul_conj(), stream); gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul_conj(), stream);
else else
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul(), stream); gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul(), stream);
syncOutput(dst, _dst, stream);
} }
void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream) void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream)
{ {
(void) flags; (void) flags;
GpuMat src1 = _src1.getGpuMat(); GpuMat src1 = getInputMat(_src1, stream);
GpuMat src2 = _src2.getGpuMat(); GpuMat src2 = getInputMat(_src2, stream);
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2); CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2);
CV_Assert( src1.size() == src2.size() ); CV_Assert( src1.size() == src2.size() );
_dst.create(src1.size(), CV_32FC2); GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream);
GpuMat dst = _dst.getGpuMat();
if (conjB) if (conjB)
{ {
...@@ -160,6 +163,8 @@ void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputAr ...@@ -160,6 +163,8 @@ void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputAr
op.scale = scale; op.scale = scale;
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream); gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
} }
syncOutput(dst, _dst, stream);
} }
#endif #endif
...@@ -50,55 +50,59 @@ ...@@ -50,55 +50,59 @@
#include "opencv2/cudaarithm.hpp" #include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp" #include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev; using namespace cv::cudev;
void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream) void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
{ {
GpuMat x = _x.getGpuMat(); GpuMat x = getInputMat(_x, stream);
GpuMat y = _y.getGpuMat(); GpuMat y = getInputMat(_y, stream);
CV_DbgAssert( x.depth() == CV_32F ); CV_Assert( x.depth() == CV_32F );
CV_DbgAssert( y.type() == x.type() && y.size() == x.size() ); CV_Assert( y.type() == x.type() && y.size() == x.size() );
_dst.create(x.size(), CV_32FC1); GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
GpuMat dst = _dst.getGpuMat();
GpuMat_<float> xc(x.reshape(1)); GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1)); GpuMat_<float> yc(y.reshape(1));
GpuMat_<float> magc(dst.reshape(1)); GpuMat_<float> magc(dst.reshape(1));
gridTransformBinary(xc, yc, magc, magnitude_func<float>(), stream); gridTransformBinary(xc, yc, magc, magnitude_func<float>(), stream);
syncOutput(dst, _dst, stream);
} }
void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream) void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
{ {
GpuMat x = _x.getGpuMat(); GpuMat x = getInputMat(_x, stream);
GpuMat y = _y.getGpuMat(); GpuMat y = getInputMat(_y, stream);
CV_DbgAssert( x.depth() == CV_32F ); CV_Assert( x.depth() == CV_32F );
CV_DbgAssert( y.type() == x.type() && y.size() == x.size() ); CV_Assert( y.type() == x.type() && y.size() == x.size() );
_dst.create(x.size(), CV_32FC1); GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
GpuMat dst = _dst.getGpuMat();
GpuMat_<float> xc(x.reshape(1)); GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1)); GpuMat_<float> yc(y.reshape(1));
GpuMat_<float> magc(dst.reshape(1)); GpuMat_<float> magc(dst.reshape(1));
gridTransformBinary(xc, yc, magc, magnitude_sqr_func<float>(), stream); gridTransformBinary(xc, yc, magc, magnitude_sqr_func<float>(), stream);
syncOutput(dst, _dst, stream);
} }
void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleInDegrees, Stream& stream) void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleInDegrees, Stream& stream)
{ {
GpuMat x = _x.getGpuMat(); GpuMat x = getInputMat(_x, stream);
GpuMat y = _y.getGpuMat(); GpuMat y = getInputMat(_y, stream);
CV_DbgAssert( x.depth() == CV_32F ); CV_Assert( x.depth() == CV_32F );
CV_DbgAssert( y.type() == x.type() && y.size() == x.size() ); CV_Assert( y.type() == x.type() && y.size() == x.size() );
_dst.create(x.size(), CV_32FC1); GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
GpuMat dst = _dst.getGpuMat();
GpuMat_<float> xc(x.reshape(1)); GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1)); GpuMat_<float> yc(y.reshape(1));
...@@ -108,21 +112,20 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI ...@@ -108,21 +112,20 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI
gridTransformBinary(xc, yc, anglec, direction_func<float, true>(), stream); gridTransformBinary(xc, yc, anglec, direction_func<float, true>(), stream);
else else
gridTransformBinary(xc, yc, anglec, direction_func<float, false>(), stream); gridTransformBinary(xc, yc, anglec, direction_func<float, false>(), stream);
syncOutput(dst, _dst, stream);
} }
void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream) void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream)
{ {
GpuMat x = _x.getGpuMat(); GpuMat x = getInputMat(_x, stream);
GpuMat y = _y.getGpuMat(); GpuMat y = getInputMat(_y, stream);
CV_DbgAssert( x.depth() == CV_32F );
CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );
_mag.create(x.size(), CV_32FC1); CV_Assert( x.depth() == CV_32F );
GpuMat mag = _mag.getGpuMat(); CV_Assert( y.type() == x.type() && y.size() == x.size() );
_angle.create(x.size(), CV_32FC1); GpuMat mag = getOutputMat(_mag, x.size(), CV_32FC1, stream);
GpuMat angle = _angle.getGpuMat(); GpuMat angle = getOutputMat(_angle, x.size(), CV_32FC1, stream);
GpuMat_<float> xc(x.reshape(1)); GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1)); GpuMat_<float> yc(y.reshape(1));
...@@ -147,6 +150,9 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu ...@@ -147,6 +150,9 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu
binaryTupleAdapter<0, 1>(direction_func<float, false>())), binaryTupleAdapter<0, 1>(direction_func<float, false>())),
stream); stream);
} }
syncOutput(mag, _mag, stream);
syncOutput(angle, _angle, stream);
} }
namespace namespace
...@@ -173,17 +179,14 @@ namespace ...@@ -173,17 +179,14 @@ namespace
void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& _stream) void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& _stream)
{ {
GpuMat mag = _mag.getGpuMat(); GpuMat mag = getInputMat(_mag, _stream);
GpuMat angle = _angle.getGpuMat(); GpuMat angle = getInputMat(_angle, _stream);
CV_DbgAssert( angle.depth() == CV_32F );
CV_DbgAssert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) );
_x.create(angle.size(), CV_32FC1); CV_Assert( angle.depth() == CV_32F );
GpuMat x = _x.getGpuMat(); CV_Assert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) );
_y.create(angle.size(), CV_32FC1); GpuMat x = getOutputMat(_x, angle.size(), CV_32FC1, _stream);
GpuMat y = _y.getGpuMat(); GpuMat y = getOutputMat(_y, angle.size(), CV_32FC1, _stream);
GpuMat_<float> xc(x.reshape(1)); GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1)); GpuMat_<float> yc(y.reshape(1));
...@@ -204,6 +207,9 @@ void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, O ...@@ -204,6 +207,9 @@ void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, O
CV_CUDEV_SAFE_CALL( cudaGetLastError() ); CV_CUDEV_SAFE_CALL( cudaGetLastError() );
syncOutput(x, _x, _stream);
syncOutput(y, _y, _stream);
if (stream == 0) if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() ); CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
} }
......
...@@ -50,7 +50,10 @@ ...@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp" #include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp" #include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev; using namespace cv::cudev;
namespace namespace
...@@ -125,7 +128,7 @@ namespace ...@@ -125,7 +128,7 @@ namespace
void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream) void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream)
{ {
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
CV_Assert( src.channels() <= 4 ); CV_Assert( src.channels() <= 4 );
CV_Assert( dim == 0 || dim == 1 ); CV_Assert( dim == 0 || dim == 1 );
...@@ -134,8 +137,7 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, ...@@ -134,8 +137,7 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp,
if (dtype < 0) if (dtype < 0)
dtype = src.depth(); dtype = src.depth();
_dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels())); GpuMat dst = getOutputMat(_dst, 1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()), stream);
GpuMat dst = _dst.getGpuMat();
if (dim == 0) if (dim == 0)
{ {
...@@ -292,6 +294,8 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, ...@@ -292,6 +294,8 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp,
func(src, dst, reduceOp, stream); func(src, dst, reduceOp, stream);
} }
syncOutput(dst, _dst, stream);
} }
#endif #endif
...@@ -50,7 +50,10 @@ ...@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp" #include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp" #include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev; using namespace cv::cudev;
//////////////////////////////////////////////////////////////////////// ////////////////////////////////////////////////////////////////////////
...@@ -92,20 +95,18 @@ namespace ...@@ -92,20 +95,18 @@ namespace
void mergeImpl(const GpuMat* src, size_t n, cv::OutputArray _dst, Stream& stream) void mergeImpl(const GpuMat* src, size_t n, cv::OutputArray _dst, Stream& stream)
{ {
CV_DbgAssert( src != 0 ); CV_Assert( src != 0 );
CV_DbgAssert( n > 0 && n <= 4 ); CV_Assert( n > 0 && n <= 4 );
const int depth = src[0].depth(); const int depth = src[0].depth();
const cv::Size size = src[0].size(); const cv::Size size = src[0].size();
#ifdef _DEBUG
for (size_t i = 0; i < n; ++i) for (size_t i = 0; i < n; ++i)
{ {
CV_Assert( src[i].size() == size ); CV_Assert( src[i].size() == size );
CV_Assert( src[i].depth() == depth ); CV_Assert( src[i].depth() == depth );
CV_Assert( src[i].channels() == 1 ); CV_Assert( src[i].channels() == 1 );
} }
#endif
if (n == 1) if (n == 1)
{ {
...@@ -123,8 +124,7 @@ namespace ...@@ -123,8 +124,7 @@ namespace
const int channels = static_cast<int>(n); const int channels = static_cast<int>(n);
_dst.create(size, CV_MAKE_TYPE(depth, channels)); GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(depth, channels), stream);
GpuMat dst = _dst.getGpuMat();
const func_t func = funcs[channels - 2][CV_ELEM_SIZE(depth) / 2]; const func_t func = funcs[channels - 2][CV_ELEM_SIZE(depth) / 2];
...@@ -132,6 +132,8 @@ namespace ...@@ -132,6 +132,8 @@ namespace
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported channel count or data type"); CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported channel count or data type");
func(src, dst, stream); func(src, dst, stream);
syncOutput(dst, _dst, stream);
} }
} }
} }
...@@ -203,12 +205,12 @@ namespace ...@@ -203,12 +205,12 @@ namespace
{SplitFunc<4, uchar>::call, SplitFunc<4, ushort>::call, SplitFunc<4, int>::call, 0, SplitFunc<4, double>::call} {SplitFunc<4, uchar>::call, SplitFunc<4, ushort>::call, SplitFunc<4, int>::call, 0, SplitFunc<4, double>::call}
}; };
CV_DbgAssert( dst != 0 ); CV_Assert( dst != 0 );
const int depth = src.depth(); const int depth = src.depth();
const int channels = src.channels(); const int channels = src.channels();
CV_DbgAssert( channels <= 4 ); CV_Assert( channels <= 4 );
if (channels == 0) if (channels == 0)
return; return;
...@@ -233,13 +235,13 @@ namespace ...@@ -233,13 +235,13 @@ namespace
void cv::cuda::split(InputArray _src, GpuMat* dst, Stream& stream) void cv::cuda::split(InputArray _src, GpuMat* dst, Stream& stream)
{ {
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
splitImpl(src, dst, stream); splitImpl(src, dst, stream);
} }
void cv::cuda::split(InputArray _src, std::vector<GpuMat>& dst, Stream& stream) void cv::cuda::split(InputArray _src, std::vector<GpuMat>& dst, Stream& stream)
{ {
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
dst.resize(src.channels()); dst.resize(src.channels());
if (src.channels() > 0) if (src.channels() > 0)
splitImpl(src, &dst[0], stream); splitImpl(src, &dst[0], stream);
......
...@@ -52,6 +52,8 @@ ...@@ -52,6 +52,8 @@
#include "opencv2/cudev.hpp" #include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp" #include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev; using namespace cv::cudev;
namespace namespace
...@@ -95,15 +97,14 @@ namespace ...@@ -95,15 +97,14 @@ namespace
double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, double maxVal, int type, Stream& stream) double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, double maxVal, int type, Stream& stream)
{ {
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
const int depth = src.depth(); const int depth = src.depth();
CV_DbgAssert( src.channels() == 1 && depth <= CV_64F ); CV_Assert( src.channels() == 1 && depth <= CV_64F );
CV_DbgAssert( type <= 4 /*THRESH_TOZERO_INV*/ ); CV_Assert( type <= 4 /*THRESH_TOZERO_INV*/ );
_dst.create(src.size(), src.type()); GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
GpuMat dst = _dst.getGpuMat();
if (depth == CV_32F && type == 2 /*THRESH_TRUNC*/) if (depth == CV_32F && type == 2 /*THRESH_TRUNC*/)
{ {
...@@ -142,6 +143,8 @@ double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, dou ...@@ -142,6 +143,8 @@ double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, dou
funcs[depth](src, dst, thresh, maxVal, type, stream); funcs[depth](src, dst, thresh, maxVal, type, stream);
} }
syncOutput(dst, _dst, stream);
return thresh; return thresh;
} }
......
...@@ -52,18 +52,19 @@ ...@@ -52,18 +52,19 @@
#include "opencv2/cudev.hpp" #include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp" #include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev; using namespace cv::cudev;
void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream) void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream)
{ {
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
const size_t elemSize = src.elemSize(); const size_t elemSize = src.elemSize();
CV_Assert( elemSize == 1 || elemSize == 4 || elemSize == 8 ); CV_Assert( elemSize == 1 || elemSize == 4 || elemSize == 8 );
_dst.create( src.cols, src.rows, src.type() ); GpuMat dst = getOutputMat(_dst, src.cols, src.rows, src.type(), stream);
GpuMat dst = _dst.getGpuMat();
if (elemSize == 1) if (elemSize == 1)
{ {
...@@ -87,6 +88,8 @@ void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream) ...@@ -87,6 +88,8 @@ void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream)
{ {
gridTranspose(globPtr<double>(src), globPtr<double>(dst), stream); gridTranspose(globPtr<double>(src), globPtr<double>(dst), stream);
} }
syncOutput(dst, _dst, stream);
} }
#endif #endif
...@@ -107,11 +107,11 @@ namespace ...@@ -107,11 +107,11 @@ namespace
GpuMat src1; GpuMat src1;
if (!isScalar1) if (!isScalar1)
src1 = _src1.getGpuMat(); src1 = getInputMat(_src1, stream);
GpuMat src2; GpuMat src2;
if (!isScalar2) if (!isScalar2)
src2 = _src2.getGpuMat(); src2 = getInputMat(_src2, stream);
Mat scalar; Mat scalar;
if (isScalar1) if (isScalar1)
...@@ -126,7 +126,7 @@ namespace ...@@ -126,7 +126,7 @@ namespace
scalar.convertTo(Mat_<double>(scalar.rows, scalar.cols, &val[0]), CV_64F); scalar.convertTo(Mat_<double>(scalar.rows, scalar.cols, &val[0]), CV_64F);
} }
GpuMat mask = _mask.getGpuMat(); GpuMat mask = getInputMat(_mask, stream);
const int sdepth = src1.empty() ? src2.depth() : src1.depth(); const int sdepth = src1.empty() ? src2.depth() : src1.depth();
const int cn = src1.empty() ? src2.channels() : src1.channels(); const int cn = src1.empty() ? src2.channels() : src1.channels();
...@@ -147,8 +147,7 @@ namespace ...@@ -147,8 +147,7 @@ namespace
CV_Error(Error::StsUnsupportedFormat, "The device doesn't support double"); CV_Error(Error::StsUnsupportedFormat, "The device doesn't support double");
} }
_dst.create(size, CV_MAKE_TYPE(ddepth, cn)); GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(ddepth, cn), stream);
GpuMat dst = _dst.getGpuMat();
if (isScalar1) if (isScalar1)
mat_scalar_func(src2, val, true, dst, mask, scale, stream, op); mat_scalar_func(src2, val, true, dst, mask, scale, stream, op);
...@@ -156,6 +155,8 @@ namespace ...@@ -156,6 +155,8 @@ namespace
mat_scalar_func(src1, val, false, dst, mask, scale, stream, op); mat_scalar_func(src1, val, false, dst, mask, scale, stream, op);
else else
mat_mat_func(src1, src2, dst, mask, scale, stream, op); mat_mat_func(src1, src2, dst, mask, scale, stream, op);
syncOutput(dst, _dst, stream);
} }
} }
...@@ -196,27 +197,29 @@ void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, do ...@@ -196,27 +197,29 @@ void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, do
{ {
if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1) if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1)
{ {
GpuMat src1 = _src1.getGpuMat(); GpuMat src1 = getInputMat(_src1, stream);
GpuMat src2 = _src2.getGpuMat(); GpuMat src2 = getInputMat(_src2, stream);
CV_Assert( src1.size() == src2.size() ); CV_Assert( src1.size() == src2.size() );
_dst.create(src1.size(), src1.type()); GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
GpuMat dst = _dst.getGpuMat();
mulMat_8uc4_32f(src1, src2, dst, stream); mulMat_8uc4_32f(src1, src2, dst, stream);
syncOutput(dst, _dst, stream);
} }
else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1) else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1)
{ {
GpuMat src1 = _src1.getGpuMat(); GpuMat src1 = getInputMat(_src1, stream);
GpuMat src2 = _src2.getGpuMat(); GpuMat src2 = getInputMat(_src2, stream);
CV_Assert( src1.size() == src2.size() ); CV_Assert( src1.size() == src2.size() );
_dst.create(src1.size(), src1.type()); GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
GpuMat dst = _dst.getGpuMat();
mulMat_16sc4_32f(src1, src2, dst, stream); mulMat_16sc4_32f(src1, src2, dst, stream);
syncOutput(dst, _dst, stream);
} }
else else
{ {
...@@ -237,27 +240,29 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub ...@@ -237,27 +240,29 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub
{ {
if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1) if (_src1.type() == CV_8UC4 && _src2.type() == CV_32FC1)
{ {
GpuMat src1 = _src1.getGpuMat(); GpuMat src1 = getInputMat(_src1, stream);
GpuMat src2 = _src2.getGpuMat(); GpuMat src2 = getInputMat(_src2, stream);
CV_Assert( src1.size() == src2.size() ); CV_Assert( src1.size() == src2.size() );
_dst.create(src1.size(), src1.type()); GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
GpuMat dst = _dst.getGpuMat();
divMat_8uc4_32f(src1, src2, dst, stream); divMat_8uc4_32f(src1, src2, dst, stream);
syncOutput(dst, _dst, stream);
} }
else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1) else if (_src1.type() == CV_16SC4 && _src2.type() == CV_32FC1)
{ {
GpuMat src1 = _src1.getGpuMat(); GpuMat src1 = getInputMat(_src1, stream);
GpuMat src2 = _src2.getGpuMat(); GpuMat src2 = getInputMat(_src2, stream);
CV_Assert( src1.size() == src2.size() ); CV_Assert( src1.size() == src2.size() );
_dst.create(src1.size(), src1.type()); GpuMat dst = getOutputMat(_dst, src1.size(), src1.type(), stream);
GpuMat dst = _dst.getGpuMat();
divMat_16sc4_32f(src1, src2, dst, stream); divMat_16sc4_32f(src1, src2, dst, stream);
syncOutput(dst, _dst, stream);
} }
else else
{ {
...@@ -389,15 +394,16 @@ void cv::cuda::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea ...@@ -389,15 +394,16 @@ void cv::cuda::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea
{NppShift<CV_32S, 1, nppiRShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiRShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiRShiftC_32s_C4R>::call}, {NppShift<CV_32S, 1, nppiRShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiRShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiRShiftC_32s_C4R>::call},
}; };
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
CV_Assert( src.depth() < CV_32F ); CV_Assert( src.depth() < CV_32F );
CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
_dst.create(src.size(), src.type()); GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream)); funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream));
syncOutput(dst, _dst, stream);
} }
void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream) void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Stream& stream)
...@@ -412,15 +418,16 @@ void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea ...@@ -412,15 +418,16 @@ void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea
{NppShift<CV_32S, 1, nppiLShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiLShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiLShiftC_32s_C4R>::call}, {NppShift<CV_32S, 1, nppiLShiftC_32s_C1R>::call, 0, NppShift<CV_32S, 3, nppiLShiftC_32s_C3R>::call, NppShift<CV_32S, 4, nppiLShiftC_32s_C4R>::call},
}; };
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S ); CV_Assert( src.depth() == CV_8U || src.depth() == CV_16U || src.depth() == CV_32S );
CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 ); CV_Assert( src.channels() == 1 || src.channels() == 3 || src.channels() == 4 );
_dst.create(src.size(), src.type()); GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream)); funcs[src.depth()][src.channels() - 1](src, val, dst, StreamAccessor::getStream(stream));
syncOutput(dst, _dst, stream);
} }
////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////
...@@ -475,22 +482,24 @@ namespace ...@@ -475,22 +482,24 @@ namespace
void cv::cuda::magnitude(InputArray _src, OutputArray _dst, Stream& stream) void cv::cuda::magnitude(InputArray _src, OutputArray _dst, Stream& stream)
{ {
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
_dst.create(src.size(), CV_32FC1); GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream);
GpuMat dst = _dst.getGpuMat();
npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream)); npp_magnitude(src, dst, nppiMagnitude_32fc32f_C1R, StreamAccessor::getStream(stream));
syncOutput(dst, _dst, stream);
} }
void cv::cuda::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream) void cv::cuda::magnitudeSqr(InputArray _src, OutputArray _dst, Stream& stream)
{ {
GpuMat src = _src.getGpuMat(); GpuMat src = getInputMat(_src, stream);
_dst.create(src.size(), CV_32FC1); GpuMat dst = getOutputMat(_dst, src.size(), CV_32FC1, stream);
GpuMat dst = _dst.getGpuMat();
npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream)); npp_magnitude(src, dst, nppiMagnitudeSqr_32fc32f_C1R, StreamAccessor::getStream(stream));
syncOutput(dst, _dst, stream);
} }
#endif #endif
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment