Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
7454189c
Commit
7454189c
authored
Dec 24, 2014
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
use new getInputMat/getOutputMat/syncOutput methods in cudaarithm routines
parent
3d0410c1
Hide whitespace changes
Inline
Side-by-side
Showing
14 changed files
with
220 additions
and
172 deletions
+220
-172
arithm.cpp
modules/cudaarithm/src/arithm.cpp
+12
-10
core.cpp
modules/cudaarithm/src/core.cpp
+4
-2
add_weighted.cu
modules/cudaarithm/src/cuda/add_weighted.cu
+16
-12
bitwise_mat.cu
modules/cudaarithm/src/cuda/bitwise_mat.cu
+8
-4
copy_make_border.cu
modules/cudaarithm/src/cuda/copy_make_border.cu
+7
-3
lut.cu
modules/cudaarithm/src/cuda/lut.cu
+6
-3
math.cu
modules/cudaarithm/src/cuda/math.cu
+37
-40
mul_spectrums.cu
modules/cudaarithm/src/cuda/mul_spectrums.cu
+13
-8
polar_cart.cu
modules/cudaarithm/src/cuda/polar_cart.cu
+42
-36
reduce.cu
modules/cudaarithm/src/cuda/reduce.cu
+7
-3
split_merge.cu
modules/cudaarithm/src/cuda/split_merge.cu
+12
-10
threshold.cu
modules/cudaarithm/src/cuda/threshold.cu
+8
-5
transpose.cu
modules/cudaarithm/src/cuda/transpose.cu
+6
-3
element_operations.cpp
modules/cudaarithm/src/element_operations.cpp
+42
-33
No files found.
modules/cudaarithm/src/arithm.cpp
View file @
7454189c
...
...
@@ -169,9 +169,9 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
#else
// CUBLAS works with column-major matrices
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src3
=
_src3
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
GpuMat
src3
=
getInputMat
(
_src3
,
stream
);
CV_Assert
(
src1
.
type
()
==
CV_32FC1
||
src1
.
type
()
==
CV_32FC2
||
src1
.
type
()
==
CV_64FC1
||
src1
.
type
()
==
CV_64FC2
);
CV_Assert
(
src2
.
type
()
==
src1
.
type
()
&&
(
src3
.
empty
()
||
src3
.
type
()
==
src1
.
type
())
);
...
...
@@ -200,8 +200,7 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
CV_Assert
(
src1Size
.
width
==
src2Size
.
height
);
CV_Assert
(
src3
.
empty
()
||
src3Size
==
dstSize
);
_dst
.
create
(
dstSize
,
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
dstSize
,
src1
.
type
(),
stream
);
if
(
beta
!=
0
)
{
...
...
@@ -281,6 +280,8 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
}
cublasSafeCall
(
cublasDestroy_v2
(
handle
)
);
syncOutput
(
dst
,
_dst
,
stream
);
#endif
}
...
...
@@ -297,7 +298,7 @@ void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags,
(
void
)
stream
;
throw_no_cuda
();
#else
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
...
...
@@ -462,16 +463,15 @@ namespace
void
ConvolutionImpl
::
convolve
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
,
bool
ccorr
,
Stream
&
_stream
)
{
GpuMat
image
=
_image
.
getGpuMat
(
);
GpuMat
templ
=
_templ
.
getGpuMat
(
);
GpuMat
image
=
getInputMat
(
_image
,
_stream
);
GpuMat
templ
=
getInputMat
(
_templ
,
_stream
);
CV_Assert
(
image
.
type
()
==
CV_32FC1
);
CV_Assert
(
templ
.
type
()
==
CV_32FC1
);
create
(
image
.
size
(),
templ
.
size
());
_result
.
create
(
result_size
,
CV_32FC1
);
GpuMat
result
=
_result
.
getGpuMat
();
GpuMat
result
=
getOutputMat
(
_result
,
result_size
,
CV_32FC1
,
_stream
);
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
...
...
@@ -520,6 +520,8 @@ namespace
cufftSafeCall
(
cufftDestroy
(
planR2C
)
);
cufftSafeCall
(
cufftDestroy
(
planC2R
)
);
syncOutput
(
result
,
_result
,
_stream
);
}
}
...
...
modules/cudaarithm/src/core.cpp
View file @
7454189c
...
...
@@ -119,15 +119,17 @@ void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& str
{
NppMirror
<
CV_32F
,
nppiMirror_32f_C1R
>::
call
,
0
,
NppMirror
<
CV_32F
,
nppiMirror_32f_C3R
>::
call
,
NppMirror
<
CV_32F
,
nppiMirror_32f_C4R
>::
call
}
};
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
CV_Assert
(
src
.
depth
()
==
CV_8U
||
src
.
depth
()
==
CV_16U
||
src
.
depth
()
==
CV_32S
||
src
.
depth
()
==
CV_32F
);
CV_Assert
(
src
.
channels
()
==
1
||
src
.
channels
()
==
3
||
src
.
channels
()
==
4
);
_dst
.
create
(
src
.
size
(),
src
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
(
);
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
src
.
type
(),
stream
);
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
dst
,
flipCode
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
#endif
/* !defined (HAVE_CUDA) */
modules/cudaarithm/src/cuda/add_weighted.cu
View file @
7454189c
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -63,7 +66,7 @@ namespace
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
{
return saturate_cast<D>(a * alpha + b * beta + gamma);
return
cudev::
saturate_cast<D>(a * alpha + b * beta + gamma);
}
};
...
...
@@ -555,8 +558,8 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
}
};
GpuMat src1 =
_src1.getGpuMat(
);
GpuMat src2 =
_src2.getGpuMat(
);
GpuMat src1 =
getInputMat(_src1, stream
);
GpuMat src2 =
getInputMat(_src2, stream
);
int sdepth1 = src1.depth();
int sdepth2 = src2.depth();
...
...
@@ -564,19 +567,18 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
ddepth = ddepth >= 0 ? CV_MAT_DEPTH(ddepth) : std::max(sdepth1, sdepth2);
const int cn = src1.channels();
CV_
Dbg
Assert( src2.size() == src1.size() && src2.channels() == cn );
CV_
Dbg
Assert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F );
CV_Assert( src2.size() == src1.size() && src2.channels() == cn );
CV_Assert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F );
_dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn));
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src1.size(), CV_MAKE_TYPE(ddepth, cn), stream);
GpuMat src1_ = src1.reshape(1);
GpuMat src2_ = src2.reshape(1);
GpuMat dst_ = dst.reshape(1);
GpuMat src1_
single
= src1.reshape(1);
GpuMat src2_
single
= src2.reshape(1);
GpuMat dst_
single
= dst.reshape(1);
if (sdepth1 > sdepth2)
{
src1_
.swap(src2_
);
src1_
single.swap(src2_single
);
std::swap(alpha, beta);
std::swap(sdepth1, sdepth2);
}
...
...
@@ -586,7 +588,9 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1_, alpha, src2_, beta, gamma, dst_, stream);
func(src1_single, alpha, src2_single, beta, gamma, dst_single, stream);
syncOutput(dst, _dst, stream);
}
#endif
modules/cudaarithm/src/cuda/bitwise_mat.cu
View file @
7454189c
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
...
...
@@ -60,16 +63,15 @@ void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& m
void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat mask =
_mask.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
GpuMat mask =
getInputMat(_mask, stream
);
const int depth = src.depth();
CV_DbgAssert( depth <= CV_32F );
CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
if (mask.empty())
{
...
...
@@ -125,6 +127,8 @@ void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask,
gridTransformUnary(vsrc, vdst, bit_not<uchar>(), singleMaskChannels(globPtr<uchar>(mask), src.channels()), stream);
}
}
syncOutput(dst, _dst, stream);
}
//////////////////////////////////////////////////////////////////////////////
...
...
modules/cudaarithm/src/cuda/copy_make_border.cu
View file @
7454189c
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -133,7 +136,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
{ copyMakeBorderImpl<float , 1> , 0 /*copyMakeBorderImpl<float , 2>*/, copyMakeBorderImpl<float , 3> , copyMakeBorderImpl<float ,4> }
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth();
const int cn = src.channels();
...
...
@@ -141,8 +144,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
CV_Assert( depth <= CV_32F && cn <= 4 );
CV_Assert( borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP );
_dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.rows + top + bottom, src.cols + left + right, src.type(), stream);
const func_t func = funcs[depth][cn - 1];
...
...
@@ -150,6 +152,8 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src, dst, top, left, borderType, value, stream);
syncOutput(dst, _dst, stream);
}
#endif
modules/cudaarithm/src/cuda/lut.cu
View file @
7454189c
...
...
@@ -50,8 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -165,7 +167,7 @@ namespace
void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int cn = src.channels();
const int lut_cn = d_lut.channels();
...
...
@@ -173,8 +175,7 @@ namespace
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 );
CV_Assert( lut_cn == 1 || lut_cn == cn );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
if (lut_cn == 1)
{
...
...
@@ -196,6 +197,8 @@ namespace
dst3.assign(lut_(src3, tbl), stream);
}
syncOutput(dst, _dst, stream);
}
}
...
...
modules/cudaarithm/src/cuda/math.cu
View file @
7454189c
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -92,16 +95,15 @@ void cv::cuda::abs(InputArray _src, OutputArray _dst, Stream& stream)
absMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
//////////////////////////////////////////////////////////////////////////////
...
...
@@ -113,7 +115,7 @@ namespace
{
__device__ __forceinline__ T operator ()(T x) const
{
return saturate_cast<T>(x * x);
return
cudev::
saturate_cast<T>(x * x);
}
};
...
...
@@ -138,16 +140,15 @@ void cv::cuda::sqr(InputArray _src, OutputArray _dst, Stream& stream)
sqrMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
//////////////////////////////////////////////////////////////////////////////
...
...
@@ -176,16 +177,15 @@ void cv::cuda::sqrt(InputArray _src, OutputArray _dst, Stream& stream)
sqrtMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -198,7 +198,7 @@ namespace
__device__ __forceinline__ T operator ()(T x) const
{
exp_func<T> f;
return saturate_cast<T>(f(x));
return
cudev::
saturate_cast<T>(f(x));
}
};
...
...
@@ -223,16 +223,15 @@ void cv::cuda::exp(InputArray _src, OutputArray _dst, Stream& stream)
expMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -261,16 +260,15 @@ void cv::cuda::log(InputArray _src, OutputArray _dst, Stream& stream)
logMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -284,7 +282,7 @@ namespace
__device__ __forceinline__ T operator()(T e) const
{
return saturate_cast<T>(__powf((float)e, power));
return
cudev::
saturate_cast<T>(__powf((float)e, power));
}
};
template<typename T> struct PowOp<T, true> : unary_function<T, T>
...
...
@@ -293,7 +291,7 @@ namespace
__device__ __forceinline__ T operator()(T e) const
{
T res = saturate_cast<T>(__powf((float)e, power));
T res =
cudev::
saturate_cast<T>(__powf((float)e, power));
if ((e < 0) && (1 & static_cast<int>(power)))
res *= -1;
...
...
@@ -344,16 +342,15 @@ void cv::cuda::pow(InputArray _src, double power, OutputArray _dst, Stream& stre
powMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert(depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), power, dst.reshape(1), stream);
funcs[depth](src.reshape(1), power, dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
#endif
modules/cudaarithm/src/cuda/mul_spectrums.cu
View file @
7454189c
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
//////////////////////////////////////////////////////////////////////////////
...
...
@@ -120,33 +123,33 @@ void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst
{
(void) flags;
GpuMat src1 =
_src1.getGpuMat(
);
GpuMat src2 =
_src2.getGpuMat(
);
GpuMat src1 =
getInputMat(_src1, stream
);
GpuMat src2 =
getInputMat(_src2, stream
);
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2 );
CV_Assert( src1.size() == src2.size() );
_dst.create(src1.size(), CV_32FC2);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream);
if (conjB)
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul_conj(), stream);
else
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul(), stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream)
{
(void) flags;
GpuMat src1 =
_src1.getGpuMat(
);
GpuMat src2 =
_src2.getGpuMat(
);
GpuMat src1 =
getInputMat(_src1, stream
);
GpuMat src2 =
getInputMat(_src2, stream
);
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2);
CV_Assert( src1.size() == src2.size() );
_dst.create(src1.size(), CV_32FC2);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream);
if (conjB)
{
...
...
@@ -160,6 +163,8 @@ void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputAr
op.scale = scale;
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
}
syncOutput(dst, _dst, stream);
}
#endif
modules/cudaarithm/src/cuda/polar_cart.cu
View file @
7454189c
...
...
@@ -50,55 +50,59 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
{
GpuMat x =
_x.getGpuMat(
);
GpuMat y =
_y.getGpuMat(
);
GpuMat x =
getInputMat(_x, stream
);
GpuMat y =
getInputMat(_y, stream
);
CV_
Dbg
Assert( x.depth() == CV_32F );
CV_
Dbg
Assert( y.type() == x.type() && y.size() == x.size() );
CV_Assert( x.depth() == CV_32F );
CV_Assert( y.type() == x.type() && y.size() == x.size() );
_dst.create(x.size(), CV_32FC1);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
GpuMat_<float> magc(dst.reshape(1));
gridTransformBinary(xc, yc, magc, magnitude_func<float>(), stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
{
GpuMat x =
_x.getGpuMat(
);
GpuMat y =
_y.getGpuMat(
);
GpuMat x =
getInputMat(_x, stream
);
GpuMat y =
getInputMat(_y, stream
);
CV_
Dbg
Assert( x.depth() == CV_32F );
CV_
Dbg
Assert( y.type() == x.type() && y.size() == x.size() );
CV_Assert( x.depth() == CV_32F );
CV_Assert( y.type() == x.type() && y.size() == x.size() );
_dst.create(x.size(), CV_32FC1);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
GpuMat_<float> magc(dst.reshape(1));
gridTransformBinary(xc, yc, magc, magnitude_sqr_func<float>(), stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleInDegrees, Stream& stream)
{
GpuMat x =
_x.getGpuMat(
);
GpuMat y =
_y.getGpuMat(
);
GpuMat x =
getInputMat(_x, stream
);
GpuMat y =
getInputMat(_y, stream
);
CV_
Dbg
Assert( x.depth() == CV_32F );
CV_
Dbg
Assert( y.type() == x.type() && y.size() == x.size() );
CV_Assert( x.depth() == CV_32F );
CV_Assert( y.type() == x.type() && y.size() == x.size() );
_dst.create(x.size(), CV_32FC1);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
...
...
@@ -108,21 +112,20 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI
gridTransformBinary(xc, yc, anglec, direction_func<float, true>(), stream);
else
gridTransformBinary(xc, yc, anglec, direction_func<float, false>(), stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream)
{
GpuMat x = _x.getGpuMat();
GpuMat y = _y.getGpuMat();
CV_DbgAssert( x.depth() == CV_32F );
CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );
GpuMat x = getInputMat(_x, stream);
GpuMat y = getInputMat(_y, stream);
_mag.create(x.size(), CV_32FC1
);
GpuMat mag = _mag.getGpuMat(
);
CV_Assert( x.depth() == CV_32F
);
CV_Assert( y.type() == x.type() && y.size() == x.size()
);
_angle.create(x.size(), CV_32FC1
);
GpuMat angle =
_angle.getGpuMat(
);
GpuMat mag = getOutputMat(_mag, x.size(), CV_32FC1, stream
);
GpuMat angle =
getOutputMat(_angle, x.size(), CV_32FC1, stream
);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
...
...
@@ -147,6 +150,9 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu
binaryTupleAdapter<0, 1>(direction_func<float, false>())),
stream);
}
syncOutput(mag, _mag, stream);
syncOutput(angle, _angle, stream);
}
namespace
...
...
@@ -173,17 +179,14 @@ namespace
void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& _stream)
{
GpuMat mag = _mag.getGpuMat();
GpuMat angle = _angle.getGpuMat();
CV_DbgAssert( angle.depth() == CV_32F );
CV_DbgAssert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) );
GpuMat mag = getInputMat(_mag, _stream);
GpuMat angle = getInputMat(_angle, _stream);
_x.create(angle.size(), CV_32FC1
);
GpuMat x = _x.getGpuMat(
);
CV_Assert( angle.depth() == CV_32F
);
CV_Assert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size())
);
_y.create(angle.size(), CV_32FC1
);
GpuMat y =
_y.getGpuMat(
);
GpuMat x = getOutputMat(_x, angle.size(), CV_32FC1, _stream
);
GpuMat y =
getOutputMat(_y, angle.size(), CV_32FC1, _stream
);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
...
...
@@ -204,6 +207,9 @@ void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, O
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
syncOutput(x, _x, _stream);
syncOutput(y, _y, _stream);
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
...
...
modules/cudaarithm/src/cuda/reduce.cu
View file @
7454189c
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -125,7 +128,7 @@ namespace
void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
CV_Assert( src.channels() <= 4 );
CV_Assert( dim == 0 || dim == 1 );
...
...
@@ -134,8 +137,7 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp,
if (dtype < 0)
dtype = src.depth();
_dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, 1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()), stream);
if (dim == 0)
{
...
...
@@ -292,6 +294,8 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp,
func(src, dst, reduceOp, stream);
}
syncOutput(dst, _dst, stream);
}
#endif
modules/cudaarithm/src/cuda/split_merge.cu
View file @
7454189c
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
////////////////////////////////////////////////////////////////////////
...
...
@@ -92,20 +95,18 @@ namespace
void mergeImpl(const GpuMat* src, size_t n, cv::OutputArray _dst, Stream& stream)
{
CV_
Dbg
Assert( src != 0 );
CV_
Dbg
Assert( n > 0 && n <= 4 );
CV_Assert( src != 0 );
CV_Assert( n > 0 && n <= 4 );
const int depth = src[0].depth();
const cv::Size size = src[0].size();
#ifdef _DEBUG
for (size_t i = 0; i < n; ++i)
{
CV_Assert( src[i].size() == size );
CV_Assert( src[i].depth() == depth );
CV_Assert( src[i].channels() == 1 );
}
#endif
if (n == 1)
{
...
...
@@ -123,8 +124,7 @@ namespace
const int channels = static_cast<int>(n);
_dst.create(size, CV_MAKE_TYPE(depth, channels));
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(depth, channels), stream);
const func_t func = funcs[channels - 2][CV_ELEM_SIZE(depth) / 2];
...
...
@@ -132,6 +132,8 @@ namespace
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported channel count or data type");
func(src, dst, stream);
syncOutput(dst, _dst, stream);
}
}
}
...
...
@@ -203,12 +205,12 @@ namespace
{SplitFunc<4, uchar>::call, SplitFunc<4, ushort>::call, SplitFunc<4, int>::call, 0, SplitFunc<4, double>::call}
};
CV_
Dbg
Assert( dst != 0 );
CV_Assert( dst != 0 );
const int depth = src.depth();
const int channels = src.channels();
CV_
Dbg
Assert( channels <= 4 );
CV_Assert( channels <= 4 );
if (channels == 0)
return;
...
...
@@ -233,13 +235,13 @@ namespace
void cv::cuda::split(InputArray _src, GpuMat* dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
splitImpl(src, dst, stream);
}
void cv::cuda::split(InputArray _src, std::vector<GpuMat>& dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
dst.resize(src.channels());
if (src.channels() > 0)
splitImpl(src, &dst[0], stream);
...
...
modules/cudaarithm/src/cuda/threshold.cu
View file @
7454189c
...
...
@@ -52,6 +52,8 @@
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -95,15 +97,14 @@ namespace
double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, double maxVal, int type, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth();
CV_
Dbg
Assert( src.channels() == 1 && depth <= CV_64F );
CV_
Dbg
Assert( type <= 4 /*THRESH_TOZERO_INV*/ );
CV_Assert( src.channels() == 1 && depth <= CV_64F );
CV_Assert( type <= 4 /*THRESH_TOZERO_INV*/ );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
if (depth == CV_32F && type == 2 /*THRESH_TRUNC*/)
{
...
...
@@ -142,6 +143,8 @@ double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, dou
funcs[depth](src, dst, thresh, maxVal, type, stream);
}
syncOutput(dst, _dst, stream);
return thresh;
}
...
...
modules/cudaarithm/src/cuda/transpose.cu
View file @
7454189c
...
...
@@ -52,18 +52,19 @@
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const size_t elemSize = src.elemSize();
CV_Assert( elemSize == 1 || elemSize == 4 || elemSize == 8 );
_dst.create( src.cols, src.rows, src.type() );
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.cols, src.rows, src.type(), stream);
if (elemSize == 1)
{
...
...
@@ -87,6 +88,8 @@ void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream)
{
gridTranspose(globPtr<double>(src), globPtr<double>(dst), stream);
}
syncOutput(dst, _dst, stream);
}
#endif
modules/cudaarithm/src/element_operations.cpp
View file @
7454189c
...
...
@@ -107,11 +107,11 @@ namespace
GpuMat
src1
;
if
(
!
isScalar1
)
src1
=
_src1
.
getGpuMat
(
);
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
;
if
(
!
isScalar2
)
src2
=
_src2
.
getGpuMat
(
);
src2
=
getInputMat
(
_src2
,
stream
);
Mat
scalar
;
if
(
isScalar1
)
...
...
@@ -126,7 +126,7 @@ namespace
scalar
.
convertTo
(
Mat_
<
double
>
(
scalar
.
rows
,
scalar
.
cols
,
&
val
[
0
]),
CV_64F
);
}
GpuMat
mask
=
_mask
.
getGpuMat
(
);
GpuMat
mask
=
getInputMat
(
_mask
,
stream
);
const
int
sdepth
=
src1
.
empty
()
?
src2
.
depth
()
:
src1
.
depth
();
const
int
cn
=
src1
.
empty
()
?
src2
.
channels
()
:
src1
.
channels
();
...
...
@@ -147,8 +147,7 @@ namespace
CV_Error
(
Error
::
StsUnsupportedFormat
,
"The device doesn't support double"
);
}
_dst
.
create
(
size
,
CV_MAKE_TYPE
(
ddepth
,
cn
));
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
size
,
CV_MAKE_TYPE
(
ddepth
,
cn
),
stream
);
if
(
isScalar1
)
mat_scalar_func
(
src2
,
val
,
true
,
dst
,
mask
,
scale
,
stream
,
op
);
...
...
@@ -156,6 +155,8 @@ namespace
mat_scalar_func
(
src1
,
val
,
false
,
dst
,
mask
,
scale
,
stream
,
op
);
else
mat_mat_func
(
src1
,
src2
,
dst
,
mask
,
scale
,
stream
,
op
);
syncOutput
(
dst
,
_dst
,
stream
);
}
}
...
...
@@ -196,27 +197,29 @@ void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, do
{
if
(
_src1
.
type
()
==
CV_8UC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src1
.
size
(),
src1
.
type
(),
stream
);
mulMat_8uc4_32f
(
src1
,
src2
,
dst
,
stream
);
syncOutput
(
dst
,
_dst
,
stream
);
}
else
if
(
_src1
.
type
()
==
CV_16SC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src1
.
size
(),
src1
.
type
(),
stream
);
mulMat_16sc4_32f
(
src1
,
src2
,
dst
,
stream
);
syncOutput
(
dst
,
_dst
,
stream
);
}
else
{
...
...
@@ -237,27 +240,29 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub
{
if
(
_src1
.
type
()
==
CV_8UC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src1
.
size
(),
src1
.
type
(),
stream
);
divMat_8uc4_32f
(
src1
,
src2
,
dst
,
stream
);
syncOutput
(
dst
,
_dst
,
stream
);
}
else
if
(
_src1
.
type
()
==
CV_16SC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src1
.
size
(),
src1
.
type
(),
stream
);
divMat_16sc4_32f
(
src1
,
src2
,
dst
,
stream
);
syncOutput
(
dst
,
_dst
,
stream
);
}
else
{
...
...
@@ -389,15 +394,16 @@ void cv::cuda::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea
{
NppShift
<
CV_32S
,
1
,
nppiRShiftC_32s_C1R
>::
call
,
0
,
NppShift
<
CV_32S
,
3
,
nppiRShiftC_32s_C3R
>::
call
,
NppShift
<
CV_32S
,
4
,
nppiRShiftC_32s_C4R
>::
call
},
};
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
CV_Assert
(
src
.
depth
()
<
CV_32F
);
CV_Assert
(
src
.
channels
()
==
1
||
src
.
channels
()
==
3
||
src
.
channels
()
==
4
);
_dst
.
create
(
src
.
size
(),
src
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
src
.
type
(),
stream
);
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
val
,
dst
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
void
cv
::
cuda
::
lshift
(
InputArray
_src
,
Scalar_
<
int
>
val
,
OutputArray
_dst
,
Stream
&
stream
)
...
...
@@ -412,15 +418,16 @@ void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea
{
NppShift
<
CV_32S
,
1
,
nppiLShiftC_32s_C1R
>::
call
,
0
,
NppShift
<
CV_32S
,
3
,
nppiLShiftC_32s_C3R
>::
call
,
NppShift
<
CV_32S
,
4
,
nppiLShiftC_32s_C4R
>::
call
},
};
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
CV_Assert
(
src
.
depth
()
==
CV_8U
||
src
.
depth
()
==
CV_16U
||
src
.
depth
()
==
CV_32S
);
CV_Assert
(
src
.
channels
()
==
1
||
src
.
channels
()
==
3
||
src
.
channels
()
==
4
);
_dst
.
create
(
src
.
size
(),
src
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
src
.
type
(),
stream
);
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
val
,
dst
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
//////////////////////////////////////////////////////////////////////////////
...
...
@@ -475,22 +482,24 @@ namespace
void
cv
::
cuda
::
magnitude
(
InputArray
_src
,
OutputArray
_dst
,
Stream
&
stream
)
{
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
_dst
.
create
(
src
.
size
(),
CV_32FC1
);
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
CV_32FC1
,
stream
);
npp_magnitude
(
src
,
dst
,
nppiMagnitude_32fc32f_C1R
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
void
cv
::
cuda
::
magnitudeSqr
(
InputArray
_src
,
OutputArray
_dst
,
Stream
&
stream
)
{
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
_dst
.
create
(
src
.
size
(),
CV_32FC1
);
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
CV_32FC1
,
stream
);
npp_magnitude
(
src
,
dst
,
nppiMagnitudeSqr_32fc32f_C1R
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment