Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
16b56e71
Commit
16b56e71
authored
Jan 15, 2015
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3561 from jet47:cuda-arithm-refactoring
parents
c58373db
b4e7ee46
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
34 changed files
with
1168 additions
and
368 deletions
+1168
-368
private.cuda.hpp
modules/core/include/opencv2/core/private.cuda.hpp
+10
-0
cuda_gpu_mat.cpp
modules/core/src/cuda_gpu_mat.cpp
+69
-0
cascadeclassifier.cpp
modules/cuda/src/cascadeclassifier.cpp
+1
-2
cudaarithm.hpp
modules/cudaarithm/include/opencv2/cudaarithm.hpp
+0
-0
perf_reductions.cpp
modules/cudaarithm/perf/perf_reductions.cpp
+12
-22
arithm.cpp
modules/cudaarithm/src/arithm.cpp
+23
-14
core.cpp
modules/cudaarithm/src/core.cpp
+4
-2
add_weighted.cu
modules/cudaarithm/src/cuda/add_weighted.cu
+16
-12
bitwise_mat.cu
modules/cudaarithm/src/cuda/bitwise_mat.cu
+8
-4
copy_make_border.cu
modules/cudaarithm/src/cuda/copy_make_border.cu
+7
-3
countnonzero.cu
modules/cudaarithm/src/cuda/countnonzero.cu
+37
-20
integral.cu
modules/cudaarithm/src/cuda/integral.cu
+17
-10
lut.cu
modules/cudaarithm/src/cuda/lut.cu
+6
-3
math.cu
modules/cudaarithm/src/cuda/math.cu
+37
-40
minmax.cu
modules/cudaarithm/src/cuda/minmax.cu
+108
-30
minmaxloc.cu
modules/cudaarithm/src/cuda/minmaxloc.cu
+78
-46
mul_spectrums.cu
modules/cudaarithm/src/cuda/mul_spectrums.cu
+13
-8
norm.cu
modules/cudaarithm/src/cuda/norm.cu
+98
-28
normalize.cu
modules/cudaarithm/src/cuda/normalize.cu
+290
-0
polar_cart.cu
modules/cudaarithm/src/cuda/polar_cart.cu
+42
-36
reduce.cu
modules/cudaarithm/src/cuda/reduce.cu
+7
-3
split_merge.cu
modules/cudaarithm/src/cuda/split_merge.cu
+12
-10
sum.cu
modules/cudaarithm/src/cuda/sum.cu
+0
-0
threshold.cu
modules/cudaarithm/src/cuda/threshold.cu
+8
-5
transpose.cu
modules/cudaarithm/src/cuda/transpose.cu
+6
-3
element_operations.cpp
modules/cudaarithm/src/element_operations.cpp
+42
-33
reductions.cpp
modules/cudaarithm/src/reductions.cpp
+0
-0
test_element_operations.cpp
modules/cudaarithm/test/test_element_operations.cpp
+2
-2
test_reductions.cpp
modules/cudaarithm/test/test_reductions.cpp
+198
-7
fgd.cpp
modules/cudabgsegm/src/fgd.cpp
+3
-5
filtering.cpp
modules/cudafilters/src/filtering.cpp
+1
-1
gftt.cpp
modules/cudaimgproc/src/gftt.cpp
+1
-2
match_template.cpp
modules/cudaimgproc/src/match_template.cpp
+9
-14
tests.cpp
samples/gpu/performance/tests.cpp
+3
-3
No files found.
modules/core/include/opencv2/core/private.cuda.hpp
View file @
16b56e71
...
...
@@ -80,6 +80,16 @@
namespace
cv
{
namespace
cuda
{
CV_EXPORTS
cv
::
String
getNppErrorMessage
(
int
code
);
CV_EXPORTS
cv
::
String
getCudaDriverApiErrorMessage
(
int
code
);
CV_EXPORTS
GpuMat
getInputMat
(
InputArray
_src
,
Stream
&
stream
);
CV_EXPORTS
GpuMat
getOutputMat
(
OutputArray
_dst
,
int
rows
,
int
cols
,
int
type
,
Stream
&
stream
);
static
inline
GpuMat
getOutputMat
(
OutputArray
_dst
,
Size
size
,
int
type
,
Stream
&
stream
)
{
return
getOutputMat
(
_dst
,
size
.
height
,
size
.
width
,
type
,
stream
);
}
CV_EXPORTS
void
syncOutput
(
const
GpuMat
&
dst
,
OutputArray
_dst
,
Stream
&
stream
);
}}
#ifndef HAVE_CUDA
...
...
modules/core/src/cuda_gpu_mat.cpp
View file @
16b56e71
...
...
@@ -342,6 +342,75 @@ void cv::cuda::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
}
}
GpuMat
cv
::
cuda
::
getInputMat
(
InputArray
_src
,
Stream
&
stream
)
{
GpuMat
src
;
#ifndef HAVE_CUDA
(
void
)
_src
;
(
void
)
stream
;
throw_no_cuda
();
#else
if
(
_src
.
kind
()
==
_InputArray
::
CUDA_GPU_MAT
)
{
src
=
_src
.
getGpuMat
();
}
else
if
(
!
_src
.
empty
())
{
BufferPool
pool
(
stream
);
src
=
pool
.
getBuffer
(
_src
.
size
(),
_src
.
type
());
src
.
upload
(
_src
,
stream
);
}
#endif
return
src
;
}
GpuMat
cv
::
cuda
::
getOutputMat
(
OutputArray
_dst
,
int
rows
,
int
cols
,
int
type
,
Stream
&
stream
)
{
GpuMat
dst
;
#ifndef HAVE_CUDA
(
void
)
_dst
;
(
void
)
rows
;
(
void
)
cols
;
(
void
)
type
;
(
void
)
stream
;
throw_no_cuda
();
#else
if
(
_dst
.
kind
()
==
_InputArray
::
CUDA_GPU_MAT
)
{
_dst
.
create
(
rows
,
cols
,
type
);
dst
=
_dst
.
getGpuMat
();
}
else
{
BufferPool
pool
(
stream
);
dst
=
pool
.
getBuffer
(
rows
,
cols
,
type
);
}
#endif
return
dst
;
}
void
cv
::
cuda
::
syncOutput
(
const
GpuMat
&
dst
,
OutputArray
_dst
,
Stream
&
stream
)
{
#ifndef HAVE_CUDA
(
void
)
dst
;
(
void
)
_dst
;
(
void
)
stream
;
throw_no_cuda
();
#else
if
(
_dst
.
kind
()
!=
_InputArray
::
CUDA_GPU_MAT
)
{
if
(
stream
)
dst
.
download
(
_dst
,
stream
);
else
dst
.
download
(
_dst
);
}
#endif
}
#ifndef HAVE_CUDA
GpuMat
::
Allocator
*
cv
::
cuda
::
GpuMat
::
defaultAllocator
()
...
...
modules/cuda/src/cascadeclassifier.cpp
View file @
16b56e71
...
...
@@ -454,11 +454,10 @@ public:
// create sutable matrix headers
GpuMat
src
=
resuzeBuffer
(
cv
::
Rect
(
0
,
0
,
level
.
sFrame
.
width
,
level
.
sFrame
.
height
));
GpuMat
sint
=
integral
(
cv
::
Rect
(
prev
,
0
,
level
.
sFrame
.
width
+
1
,
level
.
sFrame
.
height
+
1
));
GpuMat
buff
=
integralBuffer
;
// generate integral for scale
cuda
::
resize
(
image
,
src
,
level
.
sFrame
,
0
,
0
,
cv
::
INTER_LINEAR
);
cuda
::
integral
(
src
,
sint
,
buff
);
cuda
::
integral
(
src
,
sint
);
// calculate job
int
totalWidth
=
level
.
workArea
.
width
/
step
;
...
...
modules/cudaarithm/include/opencv2/cudaarithm.hpp
View file @
16b56e71
This diff is collapsed.
Click to expand it.
modules/cudaarithm/perf/perf_reductions.cpp
View file @
16b56e71
...
...
@@ -108,10 +108,9 @@ PERF_TEST_P(Sz_Norm, NormDiff,
{
const
cv
::
cuda
::
GpuMat
d_src1
(
src1
);
const
cv
::
cuda
::
GpuMat
d_src2
(
src2
);
cv
::
cuda
::
GpuMat
d_buf
;
double
gpu_dst
;
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
norm
(
d_src1
,
d_src2
,
d_buf
,
normType
);
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
norm
(
d_src1
,
d_src2
,
normType
);
SANITY_CHECK
(
gpu_dst
);
...
...
@@ -146,10 +145,9 @@ PERF_TEST_P(Sz_Depth_Cn, Sum,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_buf
;
cv
::
Scalar
gpu_dst
;
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
sum
(
d_src
,
d_buf
);
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
sum
(
d_src
);
SANITY_CHECK
(
gpu_dst
,
1e-5
,
ERROR_RELATIVE
);
}
...
...
@@ -183,10 +181,9 @@ PERF_TEST_P(Sz_Depth_Cn, SumAbs,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_buf
;
cv
::
Scalar
gpu_dst
;
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
absSum
(
d_src
,
d_buf
);
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
absSum
(
d_src
);
SANITY_CHECK
(
gpu_dst
,
1e-6
,
ERROR_RELATIVE
);
}
...
...
@@ -216,10 +213,9 @@ PERF_TEST_P(Sz_Depth_Cn, SumSqr,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_buf
;
cv
::
Scalar
gpu_dst
;
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
sqrSum
(
d_src
,
d_buf
);
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
sqrSum
(
d_src
);
SANITY_CHECK
(
gpu_dst
,
1e-6
,
ERROR_RELATIVE
);
}
...
...
@@ -248,10 +244,9 @@ PERF_TEST_P(Sz_Depth, MinMax,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_buf
;
double
gpu_minVal
,
gpu_maxVal
;
TEST_CYCLE
()
cv
::
cuda
::
minMax
(
d_src
,
&
gpu_minVal
,
&
gpu_maxVal
,
cv
::
cuda
::
GpuMat
()
,
d_buf
);
TEST_CYCLE
()
cv
::
cuda
::
minMax
(
d_src
,
&
gpu_minVal
,
&
gpu_maxVal
,
cv
::
cuda
::
GpuMat
());
SANITY_CHECK
(
gpu_minVal
,
1e-10
);
SANITY_CHECK
(
gpu_maxVal
,
1e-10
);
...
...
@@ -286,11 +281,10 @@ PERF_TEST_P(Sz_Depth, MinMaxLoc,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_valbuf
,
d_locbuf
;
double
gpu_minVal
,
gpu_maxVal
;
cv
::
Point
gpu_minLoc
,
gpu_maxLoc
;
TEST_CYCLE
()
cv
::
cuda
::
minMaxLoc
(
d_src
,
&
gpu_minVal
,
&
gpu_maxVal
,
&
gpu_minLoc
,
&
gpu_maxLoc
,
cv
::
cuda
::
GpuMat
(),
d_valbuf
,
d_locbuf
);
TEST_CYCLE
()
cv
::
cuda
::
minMaxLoc
(
d_src
,
&
gpu_minVal
,
&
gpu_maxVal
,
&
gpu_minLoc
,
&
gpu_maxLoc
);
SANITY_CHECK
(
gpu_minVal
,
1e-10
);
SANITY_CHECK
(
gpu_maxVal
,
1e-10
);
...
...
@@ -323,10 +317,9 @@ PERF_TEST_P(Sz_Depth, CountNonZero,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_buf
;
int
gpu_dst
=
0
;
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
countNonZero
(
d_src
,
d_buf
);
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
countNonZero
(
d_src
);
SANITY_CHECK
(
gpu_dst
);
}
...
...
@@ -414,9 +407,8 @@ PERF_TEST_P(Sz_Depth_NormType, Normalize,
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
dst
;
cv
::
cuda
::
GpuMat
d_norm_buf
,
d_cvt_buf
;
TEST_CYCLE
()
cv
::
cuda
::
normalize
(
d_src
,
dst
,
alpha
,
beta
,
norm_type
,
type
,
cv
::
cuda
::
GpuMat
()
,
d_norm_buf
,
d_cvt_buf
);
TEST_CYCLE
()
cv
::
cuda
::
normalize
(
d_src
,
dst
,
alpha
,
beta
,
norm_type
,
type
,
cv
::
cuda
::
GpuMat
());
CUDA_SANITY_CHECK
(
dst
,
1e-6
);
}
...
...
@@ -445,11 +437,10 @@ PERF_TEST_P(Sz, MeanStdDev,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_buf
;
cv
::
Scalar
gpu_mean
;
cv
::
Scalar
gpu_stddev
;
TEST_CYCLE
()
cv
::
cuda
::
meanStdDev
(
d_src
,
gpu_mean
,
gpu_stddev
,
d_buf
);
TEST_CYCLE
()
cv
::
cuda
::
meanStdDev
(
d_src
,
gpu_mean
,
gpu_stddev
);
SANITY_CHECK
(
gpu_mean
);
SANITY_CHECK
(
gpu_stddev
);
...
...
@@ -481,9 +472,8 @@ PERF_TEST_P(Sz, Integral,
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
dst
;
cv
::
cuda
::
GpuMat
d_buf
;
TEST_CYCLE
()
cv
::
cuda
::
integral
(
d_src
,
dst
,
d_buf
);
TEST_CYCLE
()
cv
::
cuda
::
integral
(
d_src
,
dst
);
CUDA_SANITY_CHECK
(
dst
);
}
...
...
@@ -511,9 +501,9 @@ PERF_TEST_P(Sz, IntegralSqr,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
dst
,
buf
;
cv
::
cuda
::
GpuMat
dst
;
TEST_CYCLE
()
cv
::
cuda
::
sqrIntegral
(
d_src
,
dst
,
buf
);
TEST_CYCLE
()
cv
::
cuda
::
sqrIntegral
(
d_src
,
dst
);
CUDA_SANITY_CHECK
(
dst
);
}
...
...
modules/cudaarithm/src/arithm.cpp
View file @
16b56e71
...
...
@@ -169,9 +169,9 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
#else
// CUBLAS works with column-major matrices
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src3
=
_src3
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
GpuMat
src3
=
getInputMat
(
_src3
,
stream
);
CV_Assert
(
src1
.
type
()
==
CV_32FC1
||
src1
.
type
()
==
CV_32FC2
||
src1
.
type
()
==
CV_64FC1
||
src1
.
type
()
==
CV_64FC2
);
CV_Assert
(
src2
.
type
()
==
src1
.
type
()
&&
(
src3
.
empty
()
||
src3
.
type
()
==
src1
.
type
())
);
...
...
@@ -200,8 +200,7 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
CV_Assert
(
src1Size
.
width
==
src2Size
.
height
);
CV_Assert
(
src3
.
empty
()
||
src3Size
==
dstSize
);
_dst
.
create
(
dstSize
,
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
dstSize
,
src1
.
type
(),
stream
);
if
(
beta
!=
0
)
{
...
...
@@ -281,6 +280,8 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
}
cublasSafeCall
(
cublasDestroy_v2
(
handle
)
);
syncOutput
(
dst
,
_dst
,
stream
);
#endif
}
...
...
@@ -297,7 +298,7 @@ void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags,
(
void
)
stream
;
throw_no_cuda
();
#else
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
...
...
@@ -314,13 +315,20 @@ void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags,
// We don't support real-to-real transform
CV_Assert
(
is_complex_input
||
is_complex_output
);
GpuMat
src_cont
=
src
;
// Make sure here we work with the continuous input,
// as CUFFT can't handle gaps
createContinuous
(
src
.
rows
,
src
.
cols
,
src
.
type
(),
src_cont
);
if
(
src_cont
.
data
!=
src
.
data
)
GpuMat
src_cont
;
if
(
src
.
isContinuous
())
{
src_cont
=
src
;
}
else
{
BufferPool
pool
(
stream
);
src_cont
.
allocator
=
pool
.
getAllocator
();
createContinuous
(
src
.
rows
,
src
.
cols
,
src
.
type
(),
src_cont
);
src
.
copyTo
(
src_cont
,
stream
);
}
Size
dft_size_opt
=
dft_size
;
if
(
is_1d_input
&&
!
is_row_dft
)
...
...
@@ -462,16 +470,15 @@ namespace
void
ConvolutionImpl
::
convolve
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
,
bool
ccorr
,
Stream
&
_stream
)
{
GpuMat
image
=
_image
.
getGpuMat
(
);
GpuMat
templ
=
_templ
.
getGpuMat
(
);
GpuMat
image
=
getInputMat
(
_image
,
_stream
);
GpuMat
templ
=
getInputMat
(
_templ
,
_stream
);
CV_Assert
(
image
.
type
()
==
CV_32FC1
);
CV_Assert
(
templ
.
type
()
==
CV_32FC1
);
create
(
image
.
size
(),
templ
.
size
());
_result
.
create
(
result_size
,
CV_32FC1
);
GpuMat
result
=
_result
.
getGpuMat
();
GpuMat
result
=
getOutputMat
(
_result
,
result_size
,
CV_32FC1
,
_stream
);
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
...
...
@@ -520,6 +527,8 @@ namespace
cufftSafeCall
(
cufftDestroy
(
planR2C
)
);
cufftSafeCall
(
cufftDestroy
(
planC2R
)
);
syncOutput
(
result
,
_result
,
_stream
);
}
}
...
...
modules/cudaarithm/src/core.cpp
View file @
16b56e71
...
...
@@ -119,15 +119,17 @@ void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& str
{
NppMirror
<
CV_32F
,
nppiMirror_32f_C1R
>::
call
,
0
,
NppMirror
<
CV_32F
,
nppiMirror_32f_C3R
>::
call
,
NppMirror
<
CV_32F
,
nppiMirror_32f_C4R
>::
call
}
};
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
CV_Assert
(
src
.
depth
()
==
CV_8U
||
src
.
depth
()
==
CV_16U
||
src
.
depth
()
==
CV_32S
||
src
.
depth
()
==
CV_32F
);
CV_Assert
(
src
.
channels
()
==
1
||
src
.
channels
()
==
3
||
src
.
channels
()
==
4
);
_dst
.
create
(
src
.
size
(),
src
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
(
);
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
src
.
type
(),
stream
);
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
dst
,
flipCode
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
#endif
/* !defined (HAVE_CUDA) */
modules/cudaarithm/src/cuda/add_weighted.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -63,7 +66,7 @@ namespace
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
{
return saturate_cast<D>(a * alpha + b * beta + gamma);
return
cudev::
saturate_cast<D>(a * alpha + b * beta + gamma);
}
};
...
...
@@ -555,8 +558,8 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
}
};
GpuMat src1 =
_src1.getGpuMat(
);
GpuMat src2 =
_src2.getGpuMat(
);
GpuMat src1 =
getInputMat(_src1, stream
);
GpuMat src2 =
getInputMat(_src2, stream
);
int sdepth1 = src1.depth();
int sdepth2 = src2.depth();
...
...
@@ -564,19 +567,18 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
ddepth = ddepth >= 0 ? CV_MAT_DEPTH(ddepth) : std::max(sdepth1, sdepth2);
const int cn = src1.channels();
CV_
Dbg
Assert( src2.size() == src1.size() && src2.channels() == cn );
CV_
Dbg
Assert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F );
CV_Assert( src2.size() == src1.size() && src2.channels() == cn );
CV_Assert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F );
_dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn));
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src1.size(), CV_MAKE_TYPE(ddepth, cn), stream);
GpuMat src1_ = src1.reshape(1);
GpuMat src2_ = src2.reshape(1);
GpuMat dst_ = dst.reshape(1);
GpuMat src1_
single
= src1.reshape(1);
GpuMat src2_
single
= src2.reshape(1);
GpuMat dst_
single
= dst.reshape(1);
if (sdepth1 > sdepth2)
{
src1_
.swap(src2_
);
src1_
single.swap(src2_single
);
std::swap(alpha, beta);
std::swap(sdepth1, sdepth2);
}
...
...
@@ -586,7 +588,9 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1_, alpha, src2_, beta, gamma, dst_, stream);
func(src1_single, alpha, src2_single, beta, gamma, dst_single, stream);
syncOutput(dst, _dst, stream);
}
#endif
modules/cudaarithm/src/cuda/bitwise_mat.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
...
...
@@ -60,16 +63,15 @@ void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& m
void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat mask =
_mask.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
GpuMat mask =
getInputMat(_mask, stream
);
const int depth = src.depth();
CV_DbgAssert( depth <= CV_32F );
CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
if (mask.empty())
{
...
...
@@ -125,6 +127,8 @@ void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask,
gridTransformUnary(vsrc, vdst, bit_not<uchar>(), singleMaskChannels(globPtr<uchar>(mask), src.channels()), stream);
}
}
syncOutput(dst, _dst, stream);
}
//////////////////////////////////////////////////////////////////////////////
...
...
modules/cudaarithm/src/cuda/copy_make_border.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -133,7 +136,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
{ copyMakeBorderImpl<float , 1> , 0 /*copyMakeBorderImpl<float , 2>*/, copyMakeBorderImpl<float , 3> , copyMakeBorderImpl<float ,4> }
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth();
const int cn = src.channels();
...
...
@@ -141,8 +144,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
CV_Assert( depth <= CV_32F && cn <= 4 );
CV_Assert( borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP );
_dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.rows + top + bottom, src.cols + left + right, src.type(), stream);
const func_t func = funcs[depth][cn - 1];
...
...
@@ -150,6 +152,8 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src, dst, top, left, borderType, value, stream);
syncOutput(dst, _dst, stream);
}
#endif
modules/cudaarithm/src/cuda/countnonzero.cu
View file @
16b56e71
...
...
@@ -50,47 +50,64 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
{
template <typename T>
int countNonZeroImpl(const GpuMat& _src, GpuMat& _buf
)
template <typename T
, typename D
>
void countNonZeroImpl(const GpuMat& _src, GpuMat& _dst, Stream& stream
)
{
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
GpuMat_<
int>& buf = (GpuMat_<int>&) _buf
;
GpuMat_<
D>& dst = (GpuMat_<D>&) _dst
;
gridCountNonZero(src, buf);
int data;
buf.download(cv::Mat(1, 1, buf.type(), &data));
return data;
gridCountNonZero(src, dst, stream);
}
}
int cv::cuda::countNonZero(InputArray _src, GpuMat& buf
)
void cv::cuda::countNonZero(InputArray _src, OutputArray _dst, Stream& stream
)
{
typedef
int (*func_t)(const GpuMat& _src, GpuMat& _buf
);
typedef
void (*func_t)(const GpuMat& src, GpuMat& dst, Stream& stream
);
static const func_t funcs[] =
{
countNonZeroImpl<uchar>,
countNonZeroImpl<schar>,
countNonZeroImpl<ushort>,
countNonZeroImpl<short>,
countNonZeroImpl<int>,
countNonZeroImpl<float>,
countNonZeroImpl<double
>
countNonZeroImpl<uchar
, int
>,
countNonZeroImpl<schar
, int
>,
countNonZeroImpl<ushort
, int
>,
countNonZeroImpl<short
, int
>,
countNonZeroImpl<int
, int
>,
countNonZeroImpl<float
, int
>,
countNonZeroImpl<double
, int>,
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
CV_Assert( src.depth() <= CV_64F );
CV_Assert( src.channels() == 1 );
GpuMat dst = getOutputMat(_dst, 1, 1, CV_32SC1, stream);
const func_t func = funcs[src.depth()];
func(src, dst, stream);
syncOutput(dst, _dst, stream);
}
int cv::cuda::countNonZero(InputArray _src)
{
Stream& stream = Stream::Null();
BufferPool pool(stream);
GpuMat buf = pool.getBuffer(1, 1, CV_32SC1);
countNonZero(_src, buf, stream);
int data;
buf.download(Mat(1, 1, CV_32SC1, &data));
return
func(src, buf)
;
return
data
;
}
#endif
modules/cudaarithm/src/cuda/integral.cu
View file @
16b56e71
...
...
@@ -50,51 +50,58 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
////////////////////////////////////////////////////////////////////////
// integral
void cv::cuda::integral(InputArray _src, OutputArray _dst,
GpuMat& buffer,
Stream& stream)
void cv::cuda::integral(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
CV_Assert( src.type() == CV_8UC1 );
GpuMat_<int>& res = (GpuMat_<int>&) buffer;
BufferPool pool(stream);
GpuMat_<int> res(src.size(), pool.getAllocator());
gridIntegral(globPtr<uchar>(src), res, stream);
_dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.rows + 1, src.cols + 1, CV_32SC1, stream);
dst.setTo(Scalar::all(0), stream);
GpuMat inner = dst(Rect(1, 1, src.cols, src.rows));
res.copyTo(inner, stream);
syncOutput(dst, _dst, stream);
}
//////////////////////////////////////////////////////////////////////////////
// sqrIntegral
void cv::cuda::sqrIntegral(InputArray _src, OutputArray _dst,
GpuMat& buf,
Stream& stream)
void cv::cuda::sqrIntegral(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
CV_Assert( src.type() == CV_8UC1 );
GpuMat_<double>& res = (GpuMat_<double>&) buf;
BufferPool pool(Stream::Null());
GpuMat_<double> res(pool.getBuffer(src.size(), CV_64FC1));
gridIntegral(sqr_(cvt_<int>(globPtr<uchar>(src))), res, stream);
_dst.create(src.rows + 1, src.cols + 1, CV_64FC1);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.rows + 1, src.cols + 1, CV_64FC1, stream);
dst.setTo(Scalar::all(0), stream);
GpuMat inner = dst(Rect(1, 1, src.cols, src.rows));
res.copyTo(inner, stream);
syncOutput(dst, _dst, stream);
}
#endif
modules/cudaarithm/src/cuda/lut.cu
View file @
16b56e71
...
...
@@ -50,8 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -165,7 +167,7 @@ namespace
void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int cn = src.channels();
const int lut_cn = d_lut.channels();
...
...
@@ -173,8 +175,7 @@ namespace
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 );
CV_Assert( lut_cn == 1 || lut_cn == cn );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
if (lut_cn == 1)
{
...
...
@@ -196,6 +197,8 @@ namespace
dst3.assign(lut_(src3, tbl), stream);
}
syncOutput(dst, _dst, stream);
}
}
...
...
modules/cudaarithm/src/cuda/math.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -92,16 +95,15 @@ void cv::cuda::abs(InputArray _src, OutputArray _dst, Stream& stream)
absMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
//////////////////////////////////////////////////////////////////////////////
...
...
@@ -113,7 +115,7 @@ namespace
{
__device__ __forceinline__ T operator ()(T x) const
{
return saturate_cast<T>(x * x);
return
cudev::
saturate_cast<T>(x * x);
}
};
...
...
@@ -138,16 +140,15 @@ void cv::cuda::sqr(InputArray _src, OutputArray _dst, Stream& stream)
sqrMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
//////////////////////////////////////////////////////////////////////////////
...
...
@@ -176,16 +177,15 @@ void cv::cuda::sqrt(InputArray _src, OutputArray _dst, Stream& stream)
sqrtMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -198,7 +198,7 @@ namespace
__device__ __forceinline__ T operator ()(T x) const
{
exp_func<T> f;
return saturate_cast<T>(f(x));
return
cudev::
saturate_cast<T>(f(x));
}
};
...
...
@@ -223,16 +223,15 @@ void cv::cuda::exp(InputArray _src, OutputArray _dst, Stream& stream)
expMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -261,16 +260,15 @@ void cv::cuda::log(InputArray _src, OutputArray _dst, Stream& stream)
logMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -284,7 +282,7 @@ namespace
__device__ __forceinline__ T operator()(T e) const
{
return saturate_cast<T>(__powf((float)e, power));
return
cudev::
saturate_cast<T>(__powf((float)e, power));
}
};
template<typename T> struct PowOp<T, true> : unary_function<T, T>
...
...
@@ -293,7 +291,7 @@ namespace
__device__ __forceinline__ T operator()(T e) const
{
T res = saturate_cast<T>(__powf((float)e, power));
T res =
cudev::
saturate_cast<T>(__powf((float)e, power));
if ((e < 0) && (1 & static_cast<int>(power)))
res *= -1;
...
...
@@ -344,16 +342,15 @@ void cv::cuda::pow(InputArray _src, double power, OutputArray _dst, Stream& stre
powMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert(depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), power, dst.reshape(1), stream);
funcs[depth](src.reshape(1), power, dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
#endif
modules/cudaarithm/src/cuda/minmax.cu
View file @
16b56e71
...
...
@@ -50,62 +50,140 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
{
template <typename T>
void minMaxImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _
buf, double* minVal, double* maxVal
)
template <typename T
, typename R
>
void minMaxImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _
dst, Stream& stream
)
{
typedef typename SelectIf<
TypesEquals<T, double>::value,
double,
typename SelectIf<TypesEquals<T, float>::value, float, int>::type
>::type work_type;
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
GpuMat_<
work_type>& buf = (GpuMat_<work_type>&) _buf
;
GpuMat_<
R>& dst = (GpuMat_<R>&) _dst
;
if (mask.empty())
gridFindMinMaxVal(src,
buf
);
gridFindMinMaxVal(src,
dst, stream
);
else
gridFindMinMaxVal(src, buf, globPtr<uchar>(mask));
gridFindMinMaxVal(src, dst, globPtr<uchar>(mask), stream);
}
template <typename T, typename R>
void minMaxImpl(const GpuMat& src, const GpuMat& mask, double* minVal, double* maxVal)
{
BufferPool pool(Stream::Null());
GpuMat buf(pool.getBuffer(1, 2, DataType<R>::type));
work_type data[2];
buf.download(cv::Mat(1, 2, buf.type(), data));
minMaxImpl<T, R>(src, mask, buf, Stream::Null());
if (minVal)
*minVal = data[0]
;
R data[2];
buf.download(Mat(1, 2, buf.type(), data))
;
if (maxVal)
*maxVal = data[1];
}
}
void cv::cuda::
minMax(InputArray _src, double* minVal, double* maxVal, InputArray _mask, GpuMat& buf
)
void cv::cuda::
findMinMax(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream
)
{
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _
buf, double* minVal, double* maxVal
);
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _
dst, Stream& stream
);
static const func_t funcs[] =
{
minMaxImpl<uchar>,
minMaxImpl<schar>,
minMaxImpl<ushort>,
minMaxImpl<short>,
minMaxImpl<int>,
minMaxImpl<float>,
minMaxImpl<double>
minMaxImpl<uchar
, int
>,
minMaxImpl<schar
, int
>,
minMaxImpl<ushort
, int
>,
minMaxImpl<short
, int
>,
minMaxImpl<int
, int
>,
minMaxImpl<float
, float
>,
minMaxImpl<double
, double
>
};
GpuMat src = _src.getGpuMat(
);
GpuMat mask = _mask.getGpuMat(
);
const GpuMat src = getInputMat(_src, stream
);
const GpuMat mask = getInputMat(_mask, stream
);
CV_Assert( src.channels() == 1 );
CV_DbgAssert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
const int src_depth = src.depth();
const int dst_depth = src_depth < CV_32F ? CV_32S : src_depth;
GpuMat dst = getOutputMat(_dst, 1, 2, dst_depth, stream);
const func_t func = funcs[src.depth()];
func(src, mask, dst, stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::minMax(InputArray _src, double* minVal, double* maxVal, InputArray _mask)
{
Stream& stream = Stream::Null();
HostMem dst;
findMinMax(_src, dst, _mask, stream);
stream.waitForCompletion();
double vals[2];
dst.createMatHeader().convertTo(Mat(1, 2, CV_64FC1, &vals[0]), CV_64F);
if (minVal)
*minVal = vals[0];
if (maxVal)
*maxVal = vals[1];
}
namespace cv { namespace cuda { namespace internal {
void findMaxAbs(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream);
}}}
namespace
{
template <typename T, typename R>
void findMaxAbsImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream)
{
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
GpuMat_<R>& dst = (GpuMat_<R>&) _dst;
if (mask.empty())
gridFindMaxVal(abs_(src), dst, stream);
else
gridFindMaxVal(abs_(src), dst, globPtr<uchar>(mask), stream);
}
}
void cv::cuda::internal::findMaxAbs(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
{
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream);
static const func_t funcs[] =
{
findMaxAbsImpl<uchar, int>,
findMaxAbsImpl<schar, int>,
findMaxAbsImpl<ushort, int>,
findMaxAbsImpl<short, int>,
findMaxAbsImpl<int, int>,
findMaxAbsImpl<float, float>,
findMaxAbsImpl<double, double>
};
const GpuMat src = getInputMat(_src, stream);
const GpuMat mask = getInputMat(_mask, stream);
CV_Assert( src.channels() == 1 );
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
const int src_depth = src.depth();
const int dst_depth = src_depth < CV_32F ? CV_32S : src_depth;
GpuMat dst = getOutputMat(_dst, 1, 1, dst_depth, stream);
const func_t func = funcs[src.depth()];
func(src, mask, dst, stream);
func(src, mask, buf, minVal, maxVal
);
syncOutput(dst, _dst, stream
);
}
#endif
modules/cudaarithm/src/cuda/minmaxloc.cu
View file @
16b56e71
...
...
@@ -50,78 +50,110 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
{
template <typename T>
void minMaxLocImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf,
double* minVal, double* maxVal, cv::Point* minLoc, cv::Point* maxLoc
)
template <typename T
, typename R
>
void minMaxLocImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf,
Stream& stream
)
{
typedef typename SelectIf<
TypesEquals<T, double>::value,
double,
typename SelectIf<TypesEquals<T, float>::value, float, int>::type
>::type work_type;
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
GpuMat_<
work_type>& valBuf = (GpuMat_<work_type
>&) _valBuf;
GpuMat_<
R>& valBuf = (GpuMat_<R
>&) _valBuf;
GpuMat_<int>& locBuf = (GpuMat_<int>&) _locBuf;
if (mask.empty())
gridMinMaxLoc(src, valBuf, locBuf);
gridMinMaxLoc(src, valBuf, locBuf
, stream
);
else
gridMinMaxLoc(src, valBuf, locBuf, globPtr<uchar>(mask));
gridMinMaxLoc(src, valBuf, locBuf, globPtr<uchar>(mask), stream);
}
}
cv::Mat_<work_type> h_valBuf;
cv::Mat_<int> h_locBuf;
void cv::cuda::findMinMaxLoc(InputArray _src, OutputArray _minMaxVals, OutputArray _loc, InputArray _mask, Stream& stream)
{
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf, Stream& stream);
static const func_t funcs[] =
{
minMaxLocImpl<uchar, int>,
minMaxLocImpl<schar, int>,
minMaxLocImpl<ushort, int>,
minMaxLocImpl<short, int>,
minMaxLocImpl<int, int>,
minMaxLocImpl<float, float>,
minMaxLocImpl<double, double>
};
valBuf.download(h_valBuf
);
locBuf.download(h_locBuf
);
const GpuMat src = getInputMat(_src, stream
);
const GpuMat mask = getInputMat(_mask, stream
);
if (minVal)
*minVal = h_valBuf(0, 0);
CV_Assert( src.channels() == 1 );
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
const int src_depth = src.depth();
BufferPool pool(stream);
GpuMat valBuf(pool.getAllocator());
GpuMat locBuf(pool.getAllocator());
if (maxVal)
*maxVal = h_valBuf(1, 0
);
const func_t func = funcs[src_depth];
func(src, mask, valBuf, locBuf, stream
);
if (minLoc)
{
const int idx = h_locBuf(0, 0);
*minLoc = cv::Point(idx % src.cols, idx / src.cols);
}
GpuMat minMaxVals = valBuf.colRange(0, 1);
GpuMat loc = locBuf.colRange(0, 1);
if (maxLoc)
{
const int idx = h_locBuf(1, 0);
*maxLoc = cv::Point(idx % src.cols, idx / src.cols);
}
if (_minMaxVals.kind() == _InputArray::CUDA_GPU_MAT)
{
minMaxVals.copyTo(_minMaxVals, stream);
}
else
{
minMaxVals.download(_minMaxVals, stream);
}
if (_loc.kind() == _InputArray::CUDA_GPU_MAT)
{
loc.copyTo(_loc, stream);
}
else
{
loc.download(_loc, stream);
}
}
void cv::cuda::minMaxLoc(InputArray _src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, InputArray _mask
, GpuMat& valBuf, GpuMat& locBuf
)
void cv::cuda::minMaxLoc(InputArray _src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, InputArray _mask)
{
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf, double* minVal, double* maxVal, cv::Point* minLoc, cv::Point* maxLoc);
static const func_t funcs[] =
{
minMaxLocImpl<uchar>,
minMaxLocImpl<schar>,
minMaxLocImpl<ushort>,
minMaxLocImpl<short>,
minMaxLocImpl<int>,
minMaxLocImpl<float>,
minMaxLocImpl<double>
Stream& stream = Stream::Null();
HostMem minMaxVals, locVals;
findMinMaxLoc(_src, minMaxVals, locVals, _mask, stream);
stream.waitForCompletion();
double vals[2];
minMaxVals.createMatHeader().convertTo(Mat(minMaxVals.size(), CV_64FC1, &vals[0]), CV_64F);
int locs[2];
locVals.createMatHeader().copyTo(Mat(locVals.size(), CV_32SC1, &locs[0]));
Size size = _src.size();
cv::Point locs2D[] = {
cv::Point(locs[0] % size.width, locs[0] / size.width),
cv::Point(locs[1] % size.width, locs[1] / size.width),
};
GpuMat src = _src.getGpuMat();
GpuMat mask = _mask.getGpuMat()
;
if (minVal)
*minVal = vals[0]
;
CV_Assert( src.channels() == 1 );
CV_DbgAssert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) )
;
if (maxVal)
*maxVal = vals[1]
;
const func_t func = funcs[src.depth()];
if (minLoc)
*minLoc = locs2D[0];
func(src, mask, valBuf, locBuf, minVal, maxVal, minLoc, maxLoc);
if (maxLoc)
*maxLoc = locs2D[1];
}
#endif
modules/cudaarithm/src/cuda/mul_spectrums.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
//////////////////////////////////////////////////////////////////////////////
...
...
@@ -120,33 +123,33 @@ void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst
{
(void) flags;
GpuMat src1 =
_src1.getGpuMat(
);
GpuMat src2 =
_src2.getGpuMat(
);
GpuMat src1 =
getInputMat(_src1, stream
);
GpuMat src2 =
getInputMat(_src2, stream
);
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2 );
CV_Assert( src1.size() == src2.size() );
_dst.create(src1.size(), CV_32FC2);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream);
if (conjB)
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul_conj(), stream);
else
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul(), stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream)
{
(void) flags;
GpuMat src1 =
_src1.getGpuMat(
);
GpuMat src2 =
_src2.getGpuMat(
);
GpuMat src1 =
getInputMat(_src1, stream
);
GpuMat src2 =
getInputMat(_src2, stream
);
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2);
CV_Assert( src1.size() == src2.size() );
_dst.create(src1.size(), CV_32FC2);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream);
if (conjB)
{
...
...
@@ -160,6 +163,8 @@ void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputAr
op.scale = scale;
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
}
syncOutput(dst, _dst, stream);
}
#endif
modules/cudaarithm/src/cuda/norm.cu
View file @
16b56e71
...
...
@@ -50,70 +50,140 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
{
double normDiffInf(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf
)
void normDiffInf(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream
)
{
const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
GpuMat_<int>&
buf = (GpuMat_<int>&) _buf
;
GpuMat_<int>&
dst = (GpuMat_<int>&) _dst
;
gridFindMinMaxVal(abs_(cvt_<int>(src1) - cvt_<int>(src2)), buf);
int data[2];
buf.download(cv::Mat(1, 2, buf.type(), data));
return data[1];
gridFindMaxVal(abs_(cvt_<int>(src1) - cvt_<int>(src2)), dst, stream);
}
double normDiffL1(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf
)
void normDiffL1(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream
)
{
const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
GpuMat_<int>& buf = (GpuMat_<int>&) _buf;
gridCalcSum(abs_(cvt_<int>(src1) - cvt_<int>(src2)), buf);
GpuMat_<int>& dst = (GpuMat_<int>&) _dst;
int data;
buf.download(cv::Mat(1, 1, buf.type(), &data));
return data;
gridCalcSum(abs_(cvt_<int>(src1) - cvt_<int>(src2)), dst, stream);
}
double normDiffL2(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf
)
void normDiffL2(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream
)
{
const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
GpuMat_<double>& buf = (GpuMat_<double>&) _buf;
gridCalcSum(sqr_(cvt_<double>(src1) - cvt_<double>(src2)), buf);
GpuMat_<double>& dst = (GpuMat_<double>&) _dst;
double data
;
buf.download(cv::Mat(1, 1, buf.type(), &data
));
BufferPool pool(stream)
;
GpuMat_<double> buf(1, 1, pool.getAllocator(
));
return std::sqrt(data);
gridCalcSum(sqr_(cvt_<double>(src1) - cvt_<double>(src2)), buf, stream);
gridTransformUnary(buf, dst, sqrt_func<double>(), stream);
}
}
double cv::cuda::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normType
)
void cv::cuda::calcNormDiff(InputArray _src1, InputArray _src2, OutputArray _dst, int normType, Stream& stream
)
{
typedef
double (*func_t)(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf
);
typedef
void (*func_t)(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream
);
static const func_t funcs[] =
{
0, normDiffInf, normDiffL1, 0, normDiffL2
};
GpuMat src1 =
_src1.getGpuMat(
);
GpuMat src2 =
_src2.getGpuMat(
);
GpuMat src1 =
getInputMat(_src1, stream
);
GpuMat src2 =
getInputMat(_src2, stream
);
CV_Assert( src1.type() == CV_8UC1 );
CV_Assert( src1.size() == src2.size() && src1.type() == src2.type() );
CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
return funcs[normType](src1, src2, buf);
GpuMat dst = getOutputMat(_dst, 1, 1, normType == NORM_L2 ? CV_64FC1 : CV_32SC1, stream);
const func_t func = funcs[normType];
func(src1, src2, dst, stream);
syncOutput(dst, _dst, stream);
}
double cv::cuda::norm(InputArray _src1, InputArray _src2, int normType)
{
Stream& stream = Stream::Null();
HostMem dst;
calcNormDiff(_src1, _src2, dst, normType, stream);
stream.waitForCompletion();
double val;
dst.createMatHeader().convertTo(Mat(1, 1, CV_64FC1, &val), CV_64F);
return val;
}
namespace cv { namespace cuda { namespace internal {
void normL2(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _mask, Stream& stream);
}}}
namespace
{
template <typename T, typename R>
void normL2Impl(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream)
{
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
GpuMat_<R>& dst = (GpuMat_<R>&) _dst;
BufferPool pool(stream);
GpuMat_<double> buf(1, 1, pool.getAllocator());
if (mask.empty())
{
gridCalcSum(sqr_(cvt_<double>(src)), buf, stream);
}
else
{
gridCalcSum(sqr_(cvt_<double>(src)), buf, globPtr<uchar>(mask), stream);
}
gridTransformUnary(buf, dst, sqrt_func<double>(), stream);
}
}
void cv::cuda::internal::normL2(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
{
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream);
static const func_t funcs[] =
{
normL2Impl<uchar, double>,
normL2Impl<schar, double>,
normL2Impl<ushort, double>,
normL2Impl<short, double>,
normL2Impl<int, double>,
normL2Impl<float, double>,
normL2Impl<double, double>
};
const GpuMat src = getInputMat(_src, stream);
const GpuMat mask = getInputMat(_mask, stream);
CV_Assert( src.channels() == 1 );
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
GpuMat dst = getOutputMat(_dst, 1, 1, CV_64FC1, stream);
const func_t func = funcs[src.depth()];
func(src, mask, dst, stream);
syncOutput(dst, _dst, stream);
}
#endif
modules/cudaarithm/src/cuda/normalize.cu
0 → 100644
View file @
16b56e71
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/opencv_modules.hpp"
#ifndef HAVE_OPENCV_CUDEV
#error "opencv_cudev is required"
#else
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace {
template <typename T, typename R, typename I>
struct ConvertorMinMax : unary_function<T, R>
{
typedef typename LargerType<T, R>::type larger_type1;
typedef typename LargerType<larger_type1, I>::type larger_type2;
typedef typename LargerType<larger_type2, float>::type scalar_type;
scalar_type dmin, dmax;
const I* minMaxVals;
__device__ R operator ()(typename TypeTraits<T>::parameter_type src) const
{
const scalar_type smin = minMaxVals[0];
const scalar_type smax = minMaxVals[1];
const scalar_type scale = (dmax - dmin) * (smax - smin > numeric_limits<scalar_type>::epsilon() ? 1.0 / (smax - smin) : 0.0);
const scalar_type shift = dmin - smin * scale;
return cudev::saturate_cast<R>(scale * src + shift);
}
};
template <typename T, typename R, typename I>
void normalizeMinMax(const GpuMat& _src, GpuMat& _dst, double a, double b, const GpuMat& mask, Stream& stream)
{
const GpuMat_<T>& src = (const GpuMat_<T>&)_src;
GpuMat_<R>& dst = (GpuMat_<R>&)_dst;
BufferPool pool(stream);
GpuMat_<I> minMaxVals(1, 2, pool.getAllocator());
if (mask.empty())
{
gridFindMinMaxVal(src, minMaxVals, stream);
}
else
{
gridFindMinMaxVal(src, minMaxVals, globPtr<uchar>(mask), stream);
}
ConvertorMinMax<T, R, I> cvt;
cvt.dmin = std::min(a, b);
cvt.dmax = std::max(a, b);
cvt.minMaxVals = minMaxVals[0];
if (mask.empty())
{
gridTransformUnary(src, dst, cvt, stream);
}
else
{
dst.setTo(Scalar::all(0), stream);
gridTransformUnary(src, dst, cvt, globPtr<uchar>(mask), stream);
}
}
template <typename T, typename R, typename I, bool normL2>
struct ConvertorNorm : unary_function<T, R>
{
typedef typename LargerType<T, R>::type larger_type1;
typedef typename LargerType<larger_type1, I>::type larger_type2;
typedef typename LargerType<larger_type2, float>::type scalar_type;
scalar_type a;
const I* normVal;
__device__ R operator ()(typename TypeTraits<T>::parameter_type src) const
{
sqrt_func<scalar_type> sqrt;
scalar_type scale = normL2 ? sqrt(*normVal) : *normVal;
scale = scale > numeric_limits<scalar_type>::epsilon() ? a / scale : 0.0;
return cudev::saturate_cast<R>(scale * src);
}
};
template <typename T, typename R, typename I>
void normalizeNorm(const GpuMat& _src, GpuMat& _dst, double a, int normType, const GpuMat& mask, Stream& stream)
{
const GpuMat_<T>& src = (const GpuMat_<T>&)_src;
GpuMat_<R>& dst = (GpuMat_<R>&)_dst;
BufferPool pool(stream);
GpuMat_<I> normVal(1, 1, pool.getAllocator());
if (normType == NORM_L1)
{
if (mask.empty())
{
gridCalcSum(abs_(cvt_<I>(src)), normVal, stream);
}
else
{
gridCalcSum(abs_(cvt_<I>(src)), normVal, globPtr<uchar>(mask), stream);
}
}
else if (normType == NORM_L2)
{
if (mask.empty())
{
gridCalcSum(sqr_(cvt_<I>(src)), normVal, stream);
}
else
{
gridCalcSum(sqr_(cvt_<I>(src)), normVal, globPtr<uchar>(mask), stream);
}
}
else // NORM_INF
{
if (mask.empty())
{
gridFindMaxVal(abs_(cvt_<I>(src)), normVal, stream);
}
else
{
gridFindMaxVal(abs_(cvt_<I>(src)), normVal, globPtr<uchar>(mask), stream);
}
}
if (normType == NORM_L2)
{
ConvertorNorm<T, R, I, true> cvt;
cvt.a = a;
cvt.normVal = normVal[0];
if (mask.empty())
{
gridTransformUnary(src, dst, cvt, stream);
}
else
{
dst.setTo(Scalar::all(0), stream);
gridTransformUnary(src, dst, cvt, globPtr<uchar>(mask), stream);
}
}
else
{
ConvertorNorm<T, R, I, false> cvt;
cvt.a = a;
cvt.normVal = normVal[0];
if (mask.empty())
{
gridTransformUnary(src, dst, cvt, stream);
}
else
{
dst.setTo(Scalar::all(0), stream);
gridTransformUnary(src, dst, cvt, globPtr<uchar>(mask), stream);
}
}
}
} // namespace
void cv::cuda::normalize(InputArray _src, OutputArray _dst, double a, double b, int normType, int dtype, InputArray _mask, Stream& stream)
{
typedef void (*func_minmax_t)(const GpuMat& _src, GpuMat& _dst, double a, double b, const GpuMat& mask, Stream& stream);
typedef void (*func_norm_t)(const GpuMat& _src, GpuMat& _dst, double a, int normType, const GpuMat& mask, Stream& stream);
static const func_minmax_t funcs_minmax[] =
{
normalizeMinMax<uchar, float, float>,
normalizeMinMax<schar, float, float>,
normalizeMinMax<ushort, float, float>,
normalizeMinMax<short, float, float>,
normalizeMinMax<int, float, float>,
normalizeMinMax<float, float, float>,
normalizeMinMax<double, double, double>
};
static const func_norm_t funcs_norm[] =
{
normalizeNorm<uchar, float, float>,
normalizeNorm<schar, float, float>,
normalizeNorm<ushort, float, float>,
normalizeNorm<short, float, float>,
normalizeNorm<int, float, float>,
normalizeNorm<float, float, float>,
normalizeNorm<double, double, double>
};
CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_MINMAX );
const GpuMat src = getInputMat(_src, stream);
const GpuMat mask = getInputMat(_mask, stream);
CV_Assert( src.channels() == 1 );
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
dtype = CV_MAT_DEPTH(dtype);
const int src_depth = src.depth();
const int tmp_depth = src_depth <= CV_32F ? CV_32F : src_depth;
GpuMat dst;
if (dtype == tmp_depth)
{
_dst.create(src.size(), tmp_depth);
dst = getOutputMat(_dst, src.size(), tmp_depth, stream);
}
else
{
BufferPool pool(stream);
dst = pool.getBuffer(src.size(), tmp_depth);
}
if (normType == NORM_MINMAX)
{
const func_minmax_t func = funcs_minmax[src_depth];
func(src, dst, a, b, mask, stream);
}
else
{
const func_norm_t func = funcs_norm[src_depth];
func(src, dst, a, normType, mask, stream);
}
if (dtype == tmp_depth)
{
syncOutput(dst, _dst, stream);
}
else
{
dst.convertTo(_dst, dtype, stream);
}
}
#endif
modules/cudaarithm/src/cuda/polar_cart.cu
View file @
16b56e71
...
...
@@ -50,55 +50,59 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
{
GpuMat x =
_x.getGpuMat(
);
GpuMat y =
_y.getGpuMat(
);
GpuMat x =
getInputMat(_x, stream
);
GpuMat y =
getInputMat(_y, stream
);
CV_
Dbg
Assert( x.depth() == CV_32F );
CV_
Dbg
Assert( y.type() == x.type() && y.size() == x.size() );
CV_Assert( x.depth() == CV_32F );
CV_Assert( y.type() == x.type() && y.size() == x.size() );
_dst.create(x.size(), CV_32FC1);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
GpuMat_<float> magc(dst.reshape(1));
gridTransformBinary(xc, yc, magc, magnitude_func<float>(), stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
{
GpuMat x =
_x.getGpuMat(
);
GpuMat y =
_y.getGpuMat(
);
GpuMat x =
getInputMat(_x, stream
);
GpuMat y =
getInputMat(_y, stream
);
CV_
Dbg
Assert( x.depth() == CV_32F );
CV_
Dbg
Assert( y.type() == x.type() && y.size() == x.size() );
CV_Assert( x.depth() == CV_32F );
CV_Assert( y.type() == x.type() && y.size() == x.size() );
_dst.create(x.size(), CV_32FC1);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
GpuMat_<float> magc(dst.reshape(1));
gridTransformBinary(xc, yc, magc, magnitude_sqr_func<float>(), stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleInDegrees, Stream& stream)
{
GpuMat x =
_x.getGpuMat(
);
GpuMat y =
_y.getGpuMat(
);
GpuMat x =
getInputMat(_x, stream
);
GpuMat y =
getInputMat(_y, stream
);
CV_
Dbg
Assert( x.depth() == CV_32F );
CV_
Dbg
Assert( y.type() == x.type() && y.size() == x.size() );
CV_Assert( x.depth() == CV_32F );
CV_Assert( y.type() == x.type() && y.size() == x.size() );
_dst.create(x.size(), CV_32FC1);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
...
...
@@ -108,21 +112,20 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI
gridTransformBinary(xc, yc, anglec, direction_func<float, true>(), stream);
else
gridTransformBinary(xc, yc, anglec, direction_func<float, false>(), stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream)
{
GpuMat x = _x.getGpuMat();
GpuMat y = _y.getGpuMat();
CV_DbgAssert( x.depth() == CV_32F );
CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );
GpuMat x = getInputMat(_x, stream);
GpuMat y = getInputMat(_y, stream);
_mag.create(x.size(), CV_32FC1
);
GpuMat mag = _mag.getGpuMat(
);
CV_Assert( x.depth() == CV_32F
);
CV_Assert( y.type() == x.type() && y.size() == x.size()
);
_angle.create(x.size(), CV_32FC1
);
GpuMat angle =
_angle.getGpuMat(
);
GpuMat mag = getOutputMat(_mag, x.size(), CV_32FC1, stream
);
GpuMat angle =
getOutputMat(_angle, x.size(), CV_32FC1, stream
);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
...
...
@@ -147,6 +150,9 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu
binaryTupleAdapter<0, 1>(direction_func<float, false>())),
stream);
}
syncOutput(mag, _mag, stream);
syncOutput(angle, _angle, stream);
}
namespace
...
...
@@ -173,17 +179,14 @@ namespace
void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& _stream)
{
GpuMat mag = _mag.getGpuMat();
GpuMat angle = _angle.getGpuMat();
CV_DbgAssert( angle.depth() == CV_32F );
CV_DbgAssert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) );
GpuMat mag = getInputMat(_mag, _stream);
GpuMat angle = getInputMat(_angle, _stream);
_x.create(angle.size(), CV_32FC1
);
GpuMat x = _x.getGpuMat(
);
CV_Assert( angle.depth() == CV_32F
);
CV_Assert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size())
);
_y.create(angle.size(), CV_32FC1
);
GpuMat y =
_y.getGpuMat(
);
GpuMat x = getOutputMat(_x, angle.size(), CV_32FC1, _stream
);
GpuMat y =
getOutputMat(_y, angle.size(), CV_32FC1, _stream
);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
...
...
@@ -204,6 +207,9 @@ void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, O
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
syncOutput(x, _x, _stream);
syncOutput(y, _y, _stream);
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
...
...
modules/cudaarithm/src/cuda/reduce.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -125,7 +128,7 @@ namespace
void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
CV_Assert( src.channels() <= 4 );
CV_Assert( dim == 0 || dim == 1 );
...
...
@@ -134,8 +137,7 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp,
if (dtype < 0)
dtype = src.depth();
_dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, 1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()), stream);
if (dim == 0)
{
...
...
@@ -292,6 +294,8 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp,
func(src, dst, reduceOp, stream);
}
syncOutput(dst, _dst, stream);
}
#endif
modules/cudaarithm/src/cuda/split_merge.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
////////////////////////////////////////////////////////////////////////
...
...
@@ -92,20 +95,18 @@ namespace
void mergeImpl(const GpuMat* src, size_t n, cv::OutputArray _dst, Stream& stream)
{
CV_
Dbg
Assert( src != 0 );
CV_
Dbg
Assert( n > 0 && n <= 4 );
CV_Assert( src != 0 );
CV_Assert( n > 0 && n <= 4 );
const int depth = src[0].depth();
const cv::Size size = src[0].size();
#ifdef _DEBUG
for (size_t i = 0; i < n; ++i)
{
CV_Assert( src[i].size() == size );
CV_Assert( src[i].depth() == depth );
CV_Assert( src[i].channels() == 1 );
}
#endif
if (n == 1)
{
...
...
@@ -123,8 +124,7 @@ namespace
const int channels = static_cast<int>(n);
_dst.create(size, CV_MAKE_TYPE(depth, channels));
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(depth, channels), stream);
const func_t func = funcs[channels - 2][CV_ELEM_SIZE(depth) / 2];
...
...
@@ -132,6 +132,8 @@ namespace
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported channel count or data type");
func(src, dst, stream);
syncOutput(dst, _dst, stream);
}
}
}
...
...
@@ -203,12 +205,12 @@ namespace
{SplitFunc<4, uchar>::call, SplitFunc<4, ushort>::call, SplitFunc<4, int>::call, 0, SplitFunc<4, double>::call}
};
CV_
Dbg
Assert( dst != 0 );
CV_Assert( dst != 0 );
const int depth = src.depth();
const int channels = src.channels();
CV_
Dbg
Assert( channels <= 4 );
CV_Assert( channels <= 4 );
if (channels == 0)
return;
...
...
@@ -233,13 +235,13 @@ namespace
void cv::cuda::split(InputArray _src, GpuMat* dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
splitImpl(src, dst, stream);
}
void cv::cuda::split(InputArray _src, std::vector<GpuMat>& dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
dst.resize(src.channels());
if (src.channels() > 0)
splitImpl(src, &dst[0], stream);
...
...
modules/cudaarithm/src/cuda/sum.cu
View file @
16b56e71
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/threshold.cu
View file @
16b56e71
...
...
@@ -52,6 +52,8 @@
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -95,15 +97,14 @@ namespace
double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, double maxVal, int type, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth();
CV_
Dbg
Assert( src.channels() == 1 && depth <= CV_64F );
CV_
Dbg
Assert( type <= 4 /*THRESH_TOZERO_INV*/ );
CV_Assert( src.channels() == 1 && depth <= CV_64F );
CV_Assert( type <= 4 /*THRESH_TOZERO_INV*/ );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
if (depth == CV_32F && type == 2 /*THRESH_TRUNC*/)
{
...
...
@@ -142,6 +143,8 @@ double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, dou
funcs[depth](src, dst, thresh, maxVal, type, stream);
}
syncOutput(dst, _dst, stream);
return thresh;
}
...
...
modules/cudaarithm/src/cuda/transpose.cu
View file @
16b56e71
...
...
@@ -52,18 +52,19 @@
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const size_t elemSize = src.elemSize();
CV_Assert( elemSize == 1 || elemSize == 4 || elemSize == 8 );
_dst.create( src.cols, src.rows, src.type() );
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.cols, src.rows, src.type(), stream);
if (elemSize == 1)
{
...
...
@@ -87,6 +88,8 @@ void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream)
{
gridTranspose(globPtr<double>(src), globPtr<double>(dst), stream);
}
syncOutput(dst, _dst, stream);
}
#endif
modules/cudaarithm/src/element_operations.cpp
View file @
16b56e71
...
...
@@ -107,11 +107,11 @@ namespace
GpuMat
src1
;
if
(
!
isScalar1
)
src1
=
_src1
.
getGpuMat
(
);
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
;
if
(
!
isScalar2
)
src2
=
_src2
.
getGpuMat
(
);
src2
=
getInputMat
(
_src2
,
stream
);
Mat
scalar
;
if
(
isScalar1
)
...
...
@@ -126,7 +126,7 @@ namespace
scalar
.
convertTo
(
Mat_
<
double
>
(
scalar
.
rows
,
scalar
.
cols
,
&
val
[
0
]),
CV_64F
);
}
GpuMat
mask
=
_mask
.
getGpuMat
(
);
GpuMat
mask
=
getInputMat
(
_mask
,
stream
);
const
int
sdepth
=
src1
.
empty
()
?
src2
.
depth
()
:
src1
.
depth
();
const
int
cn
=
src1
.
empty
()
?
src2
.
channels
()
:
src1
.
channels
();
...
...
@@ -147,8 +147,7 @@ namespace
CV_Error
(
Error
::
StsUnsupportedFormat
,
"The device doesn't support double"
);
}
_dst
.
create
(
size
,
CV_MAKE_TYPE
(
ddepth
,
cn
));
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
size
,
CV_MAKE_TYPE
(
ddepth
,
cn
),
stream
);
if
(
isScalar1
)
mat_scalar_func
(
src2
,
val
,
true
,
dst
,
mask
,
scale
,
stream
,
op
);
...
...
@@ -156,6 +155,8 @@ namespace
mat_scalar_func
(
src1
,
val
,
false
,
dst
,
mask
,
scale
,
stream
,
op
);
else
mat_mat_func
(
src1
,
src2
,
dst
,
mask
,
scale
,
stream
,
op
);
syncOutput
(
dst
,
_dst
,
stream
);
}
}
...
...
@@ -196,27 +197,29 @@ void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, do
{
if
(
_src1
.
type
()
==
CV_8UC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src1
.
size
(),
src1
.
type
(),
stream
);
mulMat_8uc4_32f
(
src1
,
src2
,
dst
,
stream
);
syncOutput
(
dst
,
_dst
,
stream
);
}
else
if
(
_src1
.
type
()
==
CV_16SC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src1
.
size
(),
src1
.
type
(),
stream
);
mulMat_16sc4_32f
(
src1
,
src2
,
dst
,
stream
);
syncOutput
(
dst
,
_dst
,
stream
);
}
else
{
...
...
@@ -237,27 +240,29 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub
{
if
(
_src1
.
type
()
==
CV_8UC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src1
.
size
(),
src1
.
type
(),
stream
);
divMat_8uc4_32f
(
src1
,
src2
,
dst
,
stream
);
syncOutput
(
dst
,
_dst
,
stream
);
}
else
if
(
_src1
.
type
()
==
CV_16SC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src1
.
size
(),
src1
.
type
(),
stream
);
divMat_16sc4_32f
(
src1
,
src2
,
dst
,
stream
);
syncOutput
(
dst
,
_dst
,
stream
);
}
else
{
...
...
@@ -389,15 +394,16 @@ void cv::cuda::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea
{
NppShift
<
CV_32S
,
1
,
nppiRShiftC_32s_C1R
>::
call
,
0
,
NppShift
<
CV_32S
,
3
,
nppiRShiftC_32s_C3R
>::
call
,
NppShift
<
CV_32S
,
4
,
nppiRShiftC_32s_C4R
>::
call
},
};
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
CV_Assert
(
src
.
depth
()
<
CV_32F
);
CV_Assert
(
src
.
channels
()
==
1
||
src
.
channels
()
==
3
||
src
.
channels
()
==
4
);
_dst
.
create
(
src
.
size
(),
src
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
src
.
type
(),
stream
);
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
val
,
dst
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
void
cv
::
cuda
::
lshift
(
InputArray
_src
,
Scalar_
<
int
>
val
,
OutputArray
_dst
,
Stream
&
stream
)
...
...
@@ -412,15 +418,16 @@ void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea
{
NppShift
<
CV_32S
,
1
,
nppiLShiftC_32s_C1R
>::
call
,
0
,
NppShift
<
CV_32S
,
3
,
nppiLShiftC_32s_C3R
>::
call
,
NppShift
<
CV_32S
,
4
,
nppiLShiftC_32s_C4R
>::
call
},
};
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
CV_Assert
(
src
.
depth
()
==
CV_8U
||
src
.
depth
()
==
CV_16U
||
src
.
depth
()
==
CV_32S
);
CV_Assert
(
src
.
channels
()
==
1
||
src
.
channels
()
==
3
||
src
.
channels
()
==
4
);
_dst
.
create
(
src
.
size
(),
src
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
src
.
type
(),
stream
);
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
val
,
dst
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
//////////////////////////////////////////////////////////////////////////////
...
...
@@ -475,22 +482,24 @@ namespace
void
cv
::
cuda
::
magnitude
(
InputArray
_src
,
OutputArray
_dst
,
Stream
&
stream
)
{
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
_dst
.
create
(
src
.
size
(),
CV_32FC1
);
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
CV_32FC1
,
stream
);
npp_magnitude
(
src
,
dst
,
nppiMagnitude_32fc32f_C1R
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
void
cv
::
cuda
::
magnitudeSqr
(
InputArray
_src
,
OutputArray
_dst
,
Stream
&
stream
)
{
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
_dst
.
create
(
src
.
size
(),
CV_32FC1
);
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
CV_32FC1
,
stream
);
npp_magnitude
(
src
,
dst
,
nppiMagnitudeSqr_32fc32f_C1R
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
#endif
modules/cudaarithm/src/reductions.cpp
View file @
16b56e71
This diff is collapsed.
Click to expand it.
modules/cudaarithm/test/test_element_operations.cpp
View file @
16b56e71
...
...
@@ -1329,7 +1329,7 @@ CUDA_TEST_P(Divide_Scalar_First, Accuracy)
try
{
cv
::
cuda
::
GpuMat
dst
;
cv
::
cuda
::
divide
(
scale
,
loadMat
(
mat
),
dst
,
depth
.
second
);
cv
::
cuda
::
divide
(
scale
,
loadMat
(
mat
),
dst
,
1.0
,
depth
.
second
);
}
catch
(
const
cv
::
Exception
&
e
)
{
...
...
@@ -1339,7 +1339,7 @@ CUDA_TEST_P(Divide_Scalar_First, Accuracy)
else
{
cv
::
cuda
::
GpuMat
dst
=
createMat
(
size
,
depth
.
second
,
useRoi
);
cv
::
cuda
::
divide
(
scale
,
loadMat
(
mat
,
useRoi
),
dst
,
depth
.
second
);
cv
::
cuda
::
divide
(
scale
,
loadMat
(
mat
,
useRoi
),
dst
,
1.0
,
depth
.
second
);
cv
::
Mat
dst_gold
;
cv
::
divide
(
scale
,
mat
,
dst_gold
,
depth
.
second
);
...
...
modules/cudaarithm/test/test_reductions.cpp
View file @
16b56e71
...
...
@@ -74,8 +74,27 @@ CUDA_TEST_P(Norm, Accuracy)
cv
::
Mat
src
=
randomMat
(
size
,
depth
);
cv
::
Mat
mask
=
randomMat
(
size
,
CV_8UC1
,
0
,
2
);
cv
::
cuda
::
GpuMat
d_buf
;
double
val
=
cv
::
cuda
::
norm
(
loadMat
(
src
,
useRoi
),
normCode
,
loadMat
(
mask
,
useRoi
),
d_buf
);
double
val
=
cv
::
cuda
::
norm
(
loadMat
(
src
,
useRoi
),
normCode
,
loadMat
(
mask
,
useRoi
));
double
val_gold
=
cv
::
norm
(
src
,
normCode
,
mask
);
EXPECT_NEAR
(
val_gold
,
val
,
depth
<
CV_32F
?
0.0
:
1.0
);
}
CUDA_TEST_P
(
Norm
,
Async
)
{
cv
::
Mat
src
=
randomMat
(
size
,
depth
);
cv
::
Mat
mask
=
randomMat
(
size
,
CV_8UC1
,
0
,
2
);
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
calcNorm
(
loadMat
(
src
,
useRoi
),
dst
,
normCode
,
loadMat
(
mask
,
useRoi
),
stream
);
stream
.
waitForCompletion
();
double
val
;
dst
.
createMatHeader
().
convertTo
(
cv
::
Mat
(
1
,
1
,
CV_64FC1
,
&
val
),
CV_64F
);
double
val_gold
=
cv
::
norm
(
src
,
normCode
,
mask
);
...
...
@@ -127,6 +146,27 @@ CUDA_TEST_P(NormDiff, Accuracy)
EXPECT_NEAR
(
val_gold
,
val
,
0.0
);
}
CUDA_TEST_P
(
NormDiff
,
Async
)
{
cv
::
Mat
src1
=
randomMat
(
size
,
CV_8UC1
);
cv
::
Mat
src2
=
randomMat
(
size
,
CV_8UC1
);
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
calcNormDiff
(
loadMat
(
src1
,
useRoi
),
loadMat
(
src2
,
useRoi
),
dst
,
normCode
,
stream
);
stream
.
waitForCompletion
();
double
val
;
const
cv
::
Mat
val_mat
(
1
,
1
,
CV_64FC1
,
&
val
);
dst
.
createMatHeader
().
convertTo
(
val_mat
,
CV_64F
);
double
val_gold
=
cv
::
norm
(
src1
,
src2
,
normCode
);
EXPECT_NEAR
(
val_gold
,
val
,
0.0
);
}
INSTANTIATE_TEST_CASE_P
(
CUDA_Arithm
,
NormDiff
,
testing
::
Combine
(
ALL_DEVICES
,
DIFFERENT_SIZES
,
...
...
@@ -247,6 +287,24 @@ CUDA_TEST_P(Sum, Simple)
EXPECT_SCALAR_NEAR
(
val_gold
,
val
,
CV_MAT_DEPTH
(
type
)
<
CV_32F
?
0.0
:
0.5
);
}
CUDA_TEST_P
(
Sum
,
Simple_Async
)
{
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
calcSum
(
loadMat
(
src
,
useRoi
),
dst
,
cv
::
noArray
(),
stream
);
stream
.
waitForCompletion
();
cv
::
Scalar
val
;
cv
::
Mat
val_mat
(
dst
.
size
(),
CV_64FC
(
dst
.
channels
()),
val
.
val
);
dst
.
createMatHeader
().
convertTo
(
val_mat
,
CV_64F
);
cv
::
Scalar
val_gold
=
cv
::
sum
(
src
);
EXPECT_SCALAR_NEAR
(
val_gold
,
val
,
CV_MAT_DEPTH
(
type
)
<
CV_32F
?
0.0
:
0.5
);
}
CUDA_TEST_P
(
Sum
,
Abs
)
{
cv
::
Scalar
val
=
cv
::
cuda
::
absSum
(
loadMat
(
src
,
useRoi
));
...
...
@@ -256,6 +314,24 @@ CUDA_TEST_P(Sum, Abs)
EXPECT_SCALAR_NEAR
(
val_gold
,
val
,
CV_MAT_DEPTH
(
type
)
<
CV_32F
?
0.0
:
0.5
);
}
CUDA_TEST_P
(
Sum
,
Abs_Async
)
{
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
calcAbsSum
(
loadMat
(
src
,
useRoi
),
dst
,
cv
::
noArray
(),
stream
);
stream
.
waitForCompletion
();
cv
::
Scalar
val
;
cv
::
Mat
val_mat
(
dst
.
size
(),
CV_64FC
(
dst
.
channels
()),
val
.
val
);
dst
.
createMatHeader
().
convertTo
(
val_mat
,
CV_64F
);
cv
::
Scalar
val_gold
=
absSumGold
(
src
);
EXPECT_SCALAR_NEAR
(
val_gold
,
val
,
CV_MAT_DEPTH
(
type
)
<
CV_32F
?
0.0
:
0.5
);
}
CUDA_TEST_P
(
Sum
,
Sqr
)
{
cv
::
Scalar
val
=
cv
::
cuda
::
sqrSum
(
loadMat
(
src
,
useRoi
));
...
...
@@ -265,6 +341,24 @@ CUDA_TEST_P(Sum, Sqr)
EXPECT_SCALAR_NEAR
(
val_gold
,
val
,
CV_MAT_DEPTH
(
type
)
<
CV_32F
?
0.0
:
0.5
);
}
CUDA_TEST_P
(
Sum
,
Sqr_Async
)
{
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
calcSqrSum
(
loadMat
(
src
,
useRoi
),
dst
,
cv
::
noArray
(),
stream
);
stream
.
waitForCompletion
();
cv
::
Scalar
val
;
cv
::
Mat
val_mat
(
dst
.
size
(),
CV_64FC
(
dst
.
channels
()),
val
.
val
);
dst
.
createMatHeader
().
convertTo
(
val_mat
,
CV_64F
);
cv
::
Scalar
val_gold
=
sqrSumGold
(
src
);
EXPECT_SCALAR_NEAR
(
val_gold
,
val
,
CV_MAT_DEPTH
(
type
)
<
CV_32F
?
0.0
:
0.5
);
}
INSTANTIATE_TEST_CASE_P
(
CUDA_Arithm
,
Sum
,
testing
::
Combine
(
ALL_DEVICES
,
DIFFERENT_SIZES
,
...
...
@@ -321,6 +415,28 @@ CUDA_TEST_P(MinMax, WithoutMask)
}
}
CUDA_TEST_P
(
MinMax
,
Async
)
{
cv
::
Mat
src
=
randomMat
(
size
,
depth
);
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
findMinMax
(
loadMat
(
src
,
useRoi
),
dst
,
cv
::
noArray
(),
stream
);
stream
.
waitForCompletion
();
double
vals
[
2
];
const
cv
::
Mat
vals_mat
(
1
,
2
,
CV_64FC1
,
&
vals
[
0
]);
dst
.
createMatHeader
().
convertTo
(
vals_mat
,
CV_64F
);
double
minVal_gold
,
maxVal_gold
;
minMaxLocGold
(
src
,
&
minVal_gold
,
&
maxVal_gold
);
EXPECT_DOUBLE_EQ
(
minVal_gold
,
vals
[
0
]);
EXPECT_DOUBLE_EQ
(
maxVal_gold
,
vals
[
1
]);
}
CUDA_TEST_P
(
MinMax
,
WithMask
)
{
cv
::
Mat
src
=
randomMat
(
size
,
depth
);
...
...
@@ -471,6 +587,41 @@ CUDA_TEST_P(MinMaxLoc, WithoutMask)
}
}
CUDA_TEST_P
(
MinMaxLoc
,
Async
)
{
cv
::
Mat
src
=
randomMat
(
size
,
depth
);
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
minMaxVals
,
locVals
;
cv
::
cuda
::
findMinMaxLoc
(
loadMat
(
src
,
useRoi
),
minMaxVals
,
locVals
,
cv
::
noArray
(),
stream
);
stream
.
waitForCompletion
();
double
vals
[
2
];
const
cv
::
Mat
vals_mat
(
2
,
1
,
CV_64FC1
,
&
vals
[
0
]);
minMaxVals
.
createMatHeader
().
convertTo
(
vals_mat
,
CV_64F
);
int
locs
[
2
];
const
cv
::
Mat
locs_mat
(
2
,
1
,
CV_32SC1
,
&
locs
[
0
]);
locVals
.
createMatHeader
().
copyTo
(
locs_mat
);
cv
::
Point
locs2D
[]
=
{
cv
::
Point
(
locs
[
0
]
%
src
.
cols
,
locs
[
0
]
/
src
.
cols
),
cv
::
Point
(
locs
[
1
]
%
src
.
cols
,
locs
[
1
]
/
src
.
cols
),
};
double
minVal_gold
,
maxVal_gold
;
cv
::
Point
minLoc_gold
,
maxLoc_gold
;
minMaxLocGold
(
src
,
&
minVal_gold
,
&
maxVal_gold
,
&
minLoc_gold
,
&
maxLoc_gold
);
EXPECT_DOUBLE_EQ
(
minVal_gold
,
vals
[
0
]);
EXPECT_DOUBLE_EQ
(
maxVal_gold
,
vals
[
1
]);
expectEqual
(
src
,
minLoc_gold
,
locs2D
[
0
]);
expectEqual
(
src
,
maxLoc_gold
,
locs2D
[
1
]);
}
CUDA_TEST_P
(
MinMaxLoc
,
WithMask
)
{
cv
::
Mat
src
=
randomMat
(
size
,
depth
);
...
...
@@ -564,6 +715,7 @@ PARAM_TEST_CASE(CountNonZero, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
int
depth
;
bool
useRoi
;
cv
::
Mat
src
;
virtual
void
SetUp
()
{
...
...
@@ -573,15 +725,14 @@ PARAM_TEST_CASE(CountNonZero, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
useRoi
=
GET_PARAM
(
3
);
cv
::
cuda
::
setDevice
(
devInfo
.
deviceID
());
cv
::
Mat
srcBase
=
randomMat
(
size
,
CV_8U
,
0.0
,
1.5
);
srcBase
.
convertTo
(
src
,
depth
);
}
};
CUDA_TEST_P
(
CountNonZero
,
Accuracy
)
{
cv
::
Mat
srcBase
=
randomMat
(
size
,
CV_8U
,
0.0
,
1.5
);
cv
::
Mat
src
;
srcBase
.
convertTo
(
src
,
depth
);
if
(
depth
==
CV_64F
&&
!
supportFeature
(
devInfo
,
cv
::
cuda
::
NATIVE_DOUBLE
))
{
try
...
...
@@ -603,6 +754,24 @@ CUDA_TEST_P(CountNonZero, Accuracy)
}
}
CUDA_TEST_P
(
CountNonZero
,
Async
)
{
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
countNonZero
(
loadMat
(
src
,
useRoi
),
dst
,
stream
);
stream
.
waitForCompletion
();
int
val
;
const
cv
::
Mat
val_mat
(
1
,
1
,
CV_32SC1
,
&
val
);
dst
.
createMatHeader
().
copyTo
(
val_mat
);
int
val_gold
=
cv
::
countNonZero
(
src
);
ASSERT_EQ
(
val_gold
,
val
);
}
INSTANTIATE_TEST_CASE_P
(
CUDA_Arithm
,
CountNonZero
,
testing
::
Combine
(
ALL_DEVICES
,
DIFFERENT_SIZES
,
...
...
@@ -750,7 +919,7 @@ CUDA_TEST_P(Normalize, WithMask)
dst_gold
.
setTo
(
cv
::
Scalar
::
all
(
0
));
cv
::
normalize
(
src
,
dst_gold
,
alpha
,
beta
,
norm_type
,
type
,
mask
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
1e-6
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
type
<
CV_32F
?
1.0
:
1e-4
);
}
INSTANTIATE_TEST_CASE_P
(
CUDA_Arithm
,
Normalize
,
testing
::
Combine
(
...
...
@@ -811,6 +980,28 @@ CUDA_TEST_P(MeanStdDev, Accuracy)
}
}
CUDA_TEST_P
(
MeanStdDev
,
Async
)
{
cv
::
Mat
src
=
randomMat
(
size
,
CV_8UC1
);
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
meanStdDev
(
loadMat
(
src
,
useRoi
),
dst
,
stream
);
stream
.
waitForCompletion
();
double
vals
[
2
];
dst
.
createMatHeader
().
copyTo
(
cv
::
Mat
(
1
,
2
,
CV_64FC1
,
&
vals
[
0
]));
cv
::
Scalar
mean_gold
;
cv
::
Scalar
stddev_gold
;
cv
::
meanStdDev
(
src
,
mean_gold
,
stddev_gold
);
EXPECT_SCALAR_NEAR
(
mean_gold
,
cv
::
Scalar
(
vals
[
0
]),
1e-5
);
EXPECT_SCALAR_NEAR
(
stddev_gold
,
cv
::
Scalar
(
vals
[
1
]),
1e-5
);
}
INSTANTIATE_TEST_CASE_P
(
CUDA_Arithm
,
MeanStdDev
,
testing
::
Combine
(
ALL_DEVICES
,
DIFFERENT_SIZES
,
...
...
modules/cudabgsegm/src/fgd.cpp
View file @
16b56e71
...
...
@@ -266,7 +266,7 @@ namespace
{
int
bgfgClassification
(
const
GpuMat
&
prevFrame
,
const
GpuMat
&
curFrame
,
const
GpuMat
&
Ftd
,
const
GpuMat
&
Fbd
,
GpuMat
&
foreground
,
GpuMat
&
countBuf
,
GpuMat
&
foreground
,
const
FGDParams
&
params
,
int
out_cn
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
prevFrame
,
PtrStepSzb
curFrame
,
PtrStepSzb
Ftd
,
PtrStepSzb
Fbd
,
PtrStepSzb
foreground
,
...
...
@@ -298,7 +298,7 @@ namespace
deltaC
,
deltaCC
,
params
.
alpha2
,
params
.
N1c
,
params
.
N1cc
,
0
);
int
count
=
cuda
::
countNonZero
(
foreground
,
countBuf
);
int
count
=
cuda
::
countNonZero
(
foreground
);
cuda
::
multiply
(
foreground
,
Scalar
::
all
(
255
),
foreground
);
...
...
@@ -605,8 +605,6 @@ namespace
GpuMat
hist_
;
GpuMat
histBuf_
;
GpuMat
countBuf_
;
GpuMat
buf_
;
GpuMat
filterBrd_
;
...
...
@@ -649,7 +647,7 @@ namespace
changeDetection
(
prevFrame_
,
curFrame
,
Ftd_
,
hist_
,
histBuf_
);
changeDetection
(
background_
,
curFrame
,
Fbd_
,
hist_
,
histBuf_
);
int
FG_pixels_count
=
bgfgClassification
(
prevFrame_
,
curFrame
,
Ftd_
,
Fbd_
,
foreground_
,
countBuf_
,
params_
,
4
);
int
FG_pixels_count
=
bgfgClassification
(
prevFrame_
,
curFrame
,
Ftd_
,
Fbd_
,
foreground_
,
params_
,
4
);
#ifdef HAVE_OPENCV_CUDAFILTERS
if
(
params_
.
perform_morphing
>
0
)
...
...
modules/cudafilters/src/filtering.cpp
View file @
16b56e71
...
...
@@ -542,7 +542,7 @@ namespace
anchor_
=
Point
(
iters_
,
iters_
);
iters_
=
1
;
}
else
if
(
iters_
>
1
&&
countNonZero
(
kernel
)
==
(
int
)
kernel
.
total
())
else
if
(
iters_
>
1
&&
c
v
::
c
ountNonZero
(
kernel
)
==
(
int
)
kernel
.
total
())
{
anchor_
=
Point
(
anchor_
.
x
*
iters_
,
anchor_
.
y
*
iters_
);
kernel
=
getStructuringElement
(
MORPH_RECT
,
...
...
modules/cudaimgproc/src/gftt.cpp
View file @
16b56e71
...
...
@@ -81,7 +81,6 @@ namespace
GpuMat
Dy_
;
GpuMat
buf_
;
GpuMat
eig_
;
GpuMat
minMaxbuf_
;
GpuMat
tmpCorners_
;
};
...
...
@@ -112,7 +111,7 @@ namespace
cornerCriteria_
->
compute
(
image
,
eig_
);
double
maxVal
=
0
;
cuda
::
minMax
(
eig_
,
0
,
&
maxVal
,
noArray
(),
minMaxbuf_
);
cuda
::
minMax
(
eig_
,
0
,
&
maxVal
);
ensureSizeIsEnough
(
1
,
std
::
max
(
1000
,
static_cast
<
int
>
(
image
.
size
().
area
()
*
0.05
)),
CV_32FC2
,
tmpCorners_
);
...
...
modules/cudaimgproc/src/match_template.cpp
View file @
16b56e71
...
...
@@ -271,7 +271,6 @@ namespace
private
:
Match_CCORR_8U
match_CCORR_
;
GpuMat
image_sqsums_
;
GpuMat
intBuffer_
;
};
void
Match_CCORR_NORMED_8U
::
match
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
,
Stream
&
stream
)
...
...
@@ -288,7 +287,7 @@ namespace
match_CCORR_
.
match
(
image
,
templ
,
_result
,
stream
);
GpuMat
result
=
_result
.
getGpuMat
();
cuda
::
sqrIntegral
(
image
.
reshape
(
1
),
image_sqsums_
,
intBuffer_
,
stream
);
cuda
::
sqrIntegral
(
image
.
reshape
(
1
),
image_sqsums_
,
stream
);
double
templ_sqsum
=
cuda
::
sqrSum
(
templ
.
reshape
(
1
))[
0
];
...
...
@@ -335,7 +334,6 @@ namespace
private
:
GpuMat
image_sqsums_
;
GpuMat
intBuffer_
;
Match_CCORR_8U
match_CCORR_
;
};
...
...
@@ -359,7 +357,7 @@ namespace
return
;
}
cuda
::
sqrIntegral
(
image
.
reshape
(
1
),
image_sqsums_
,
intBuffer_
,
stream
);
cuda
::
sqrIntegral
(
image
.
reshape
(
1
),
image_sqsums_
,
stream
);
double
templ_sqsum
=
cuda
::
sqrSum
(
templ
.
reshape
(
1
))[
0
];
...
...
@@ -383,7 +381,6 @@ namespace
private
:
GpuMat
image_sqsums_
;
GpuMat
intBuffer_
;
Match_CCORR_8U
match_CCORR_
;
};
...
...
@@ -398,7 +395,7 @@ namespace
CV_Assert
(
image
.
type
()
==
templ
.
type
()
);
CV_Assert
(
image
.
cols
>=
templ
.
cols
&&
image
.
rows
>=
templ
.
rows
);
cuda
::
sqrIntegral
(
image
.
reshape
(
1
),
image_sqsums_
,
intBuffer_
,
stream
);
cuda
::
sqrIntegral
(
image
.
reshape
(
1
),
image_sqsums_
,
stream
);
double
templ_sqsum
=
cuda
::
sqrSum
(
templ
.
reshape
(
1
))[
0
];
...
...
@@ -421,7 +418,6 @@ namespace
void
match
(
InputArray
image
,
InputArray
templ
,
OutputArray
result
,
Stream
&
stream
=
Stream
::
Null
());
private
:
GpuMat
intBuffer_
;
std
::
vector
<
GpuMat
>
images_
;
std
::
vector
<
GpuMat
>
image_sums_
;
Match_CCORR_8U
match_CCORR_
;
...
...
@@ -444,7 +440,7 @@ namespace
if
(
image
.
channels
()
==
1
)
{
image_sums_
.
resize
(
1
);
cuda
::
integral
(
image
,
image_sums_
[
0
],
intBuffer_
,
stream
);
cuda
::
integral
(
image
,
image_sums_
[
0
],
stream
);
int
templ_sum
=
(
int
)
cuda
::
sum
(
templ
)[
0
];
...
...
@@ -456,7 +452,7 @@ namespace
image_sums_
.
resize
(
images_
.
size
());
for
(
int
i
=
0
;
i
<
image
.
channels
();
++
i
)
cuda
::
integral
(
images_
[
i
],
image_sums_
[
i
],
intBuffer_
,
stream
);
cuda
::
integral
(
images_
[
i
],
image_sums_
[
i
],
stream
);
Scalar
templ_sum
=
cuda
::
sum
(
templ
);
...
...
@@ -501,7 +497,6 @@ namespace
private
:
GpuMat
imagef_
,
templf_
;
Match_CCORR_32F
match_CCORR_32F_
;
GpuMat
intBuffer_
;
std
::
vector
<
GpuMat
>
images_
;
std
::
vector
<
GpuMat
>
image_sums_
;
std
::
vector
<
GpuMat
>
image_sqsums_
;
...
...
@@ -527,10 +522,10 @@ namespace
if
(
image
.
channels
()
==
1
)
{
image_sums_
.
resize
(
1
);
cuda
::
integral
(
image
,
image_sums_
[
0
],
intBuffer_
,
stream
);
cuda
::
integral
(
image
,
image_sums_
[
0
],
stream
);
image_sqsums_
.
resize
(
1
);
cuda
::
sqrIntegral
(
image
,
image_sqsums_
[
0
],
intBuffer_
,
stream
);
cuda
::
sqrIntegral
(
image
,
image_sqsums_
[
0
],
stream
);
int
templ_sum
=
(
int
)
cuda
::
sum
(
templ
)[
0
];
double
templ_sqsum
=
cuda
::
sqrSum
(
templ
)[
0
];
...
...
@@ -547,8 +542,8 @@ namespace
image_sqsums_
.
resize
(
images_
.
size
());
for
(
int
i
=
0
;
i
<
image
.
channels
();
++
i
)
{
cuda
::
integral
(
images_
[
i
],
image_sums_
[
i
],
intBuffer_
,
stream
);
cuda
::
sqrIntegral
(
images_
[
i
],
image_sqsums_
[
i
],
intBuffer_
,
stream
);
cuda
::
integral
(
images_
[
i
],
image_sums_
[
i
],
stream
);
cuda
::
sqrIntegral
(
images_
[
i
],
image_sqsums_
[
i
],
stream
);
}
Scalar
templ_sum
=
cuda
::
sum
(
templ
);
...
...
samples/gpu/performance/tests.cpp
View file @
16b56e71
...
...
@@ -193,7 +193,7 @@ TEST(cornerHarris)
TEST
(
integral
)
{
Mat
src
,
sum
;
cuda
::
GpuMat
d_src
,
d_sum
,
d_buf
;
cuda
::
GpuMat
d_src
,
d_sum
;
for
(
int
size
=
1000
;
size
<=
4000
;
size
*=
2
)
{
...
...
@@ -209,10 +209,10 @@ TEST(integral)
d_src
.
upload
(
src
);
cuda
::
integral
Buffered
(
d_src
,
d_sum
,
d_buf
);
cuda
::
integral
(
d_src
,
d_sum
);
CUDA_ON
;
cuda
::
integral
Buffered
(
d_src
,
d_sum
,
d_buf
);
cuda
::
integral
(
d_src
,
d_sum
);
CUDA_OFF
;
}
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment