Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
16b56e71
Commit
16b56e71
authored
10 years ago
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3561 from jet47:cuda-arithm-refactoring
parents
c58373db
b4e7ee46
Hide whitespace changes
Inline
Side-by-side
Showing
34 changed files
with
1403 additions
and
712 deletions
+1403
-712
private.cuda.hpp
modules/core/include/opencv2/core/private.cuda.hpp
+10
-0
cuda_gpu_mat.cpp
modules/core/src/cuda_gpu_mat.cpp
+69
-0
cascadeclassifier.cpp
modules/cuda/src/cascadeclassifier.cpp
+1
-2
cudaarithm.hpp
modules/cudaarithm/include/opencv2/cudaarithm.hpp
+34
-165
perf_reductions.cpp
modules/cudaarithm/perf/perf_reductions.cpp
+12
-22
arithm.cpp
modules/cudaarithm/src/arithm.cpp
+23
-14
core.cpp
modules/cudaarithm/src/core.cpp
+4
-2
add_weighted.cu
modules/cudaarithm/src/cuda/add_weighted.cu
+16
-12
bitwise_mat.cu
modules/cudaarithm/src/cuda/bitwise_mat.cu
+8
-4
copy_make_border.cu
modules/cudaarithm/src/cuda/copy_make_border.cu
+7
-3
countnonzero.cu
modules/cudaarithm/src/cuda/countnonzero.cu
+37
-20
integral.cu
modules/cudaarithm/src/cuda/integral.cu
+17
-10
lut.cu
modules/cudaarithm/src/cuda/lut.cu
+6
-3
math.cu
modules/cudaarithm/src/cuda/math.cu
+37
-40
minmax.cu
modules/cudaarithm/src/cuda/minmax.cu
+108
-30
minmaxloc.cu
modules/cudaarithm/src/cuda/minmaxloc.cu
+78
-46
mul_spectrums.cu
modules/cudaarithm/src/cuda/mul_spectrums.cu
+13
-8
norm.cu
modules/cudaarithm/src/cuda/norm.cu
+98
-28
normalize.cu
modules/cudaarithm/src/cuda/normalize.cu
+290
-0
polar_cart.cu
modules/cudaarithm/src/cuda/polar_cart.cu
+42
-36
reduce.cu
modules/cudaarithm/src/cuda/reduce.cu
+7
-3
split_merge.cu
modules/cudaarithm/src/cuda/split_merge.cu
+12
-10
sum.cu
modules/cudaarithm/src/cuda/sum.cu
+111
-63
threshold.cu
modules/cudaarithm/src/cuda/threshold.cu
+8
-5
transpose.cu
modules/cudaarithm/src/cuda/transpose.cu
+6
-3
element_operations.cpp
modules/cudaarithm/src/element_operations.cpp
+42
-33
reductions.cpp
modules/cudaarithm/src/reductions.cpp
+90
-116
test_element_operations.cpp
modules/cudaarithm/test/test_element_operations.cpp
+2
-2
test_reductions.cpp
modules/cudaarithm/test/test_reductions.cpp
+198
-7
fgd.cpp
modules/cudabgsegm/src/fgd.cpp
+3
-5
filtering.cpp
modules/cudafilters/src/filtering.cpp
+1
-1
gftt.cpp
modules/cudaimgproc/src/gftt.cpp
+1
-2
match_template.cpp
modules/cudaimgproc/src/match_template.cpp
+9
-14
tests.cpp
samples/gpu/performance/tests.cpp
+3
-3
No files found.
modules/core/include/opencv2/core/private.cuda.hpp
View file @
16b56e71
...
...
@@ -80,6 +80,16 @@
namespace
cv
{
namespace
cuda
{
CV_EXPORTS
cv
::
String
getNppErrorMessage
(
int
code
);
CV_EXPORTS
cv
::
String
getCudaDriverApiErrorMessage
(
int
code
);
CV_EXPORTS
GpuMat
getInputMat
(
InputArray
_src
,
Stream
&
stream
);
CV_EXPORTS
GpuMat
getOutputMat
(
OutputArray
_dst
,
int
rows
,
int
cols
,
int
type
,
Stream
&
stream
);
static
inline
GpuMat
getOutputMat
(
OutputArray
_dst
,
Size
size
,
int
type
,
Stream
&
stream
)
{
return
getOutputMat
(
_dst
,
size
.
height
,
size
.
width
,
type
,
stream
);
}
CV_EXPORTS
void
syncOutput
(
const
GpuMat
&
dst
,
OutputArray
_dst
,
Stream
&
stream
);
}}
#ifndef HAVE_CUDA
...
...
This diff is collapsed.
Click to expand it.
modules/core/src/cuda_gpu_mat.cpp
View file @
16b56e71
...
...
@@ -342,6 +342,75 @@ void cv::cuda::ensureSizeIsEnough(int rows, int cols, int type, OutputArray arr)
}
}
GpuMat
cv
::
cuda
::
getInputMat
(
InputArray
_src
,
Stream
&
stream
)
{
GpuMat
src
;
#ifndef HAVE_CUDA
(
void
)
_src
;
(
void
)
stream
;
throw_no_cuda
();
#else
if
(
_src
.
kind
()
==
_InputArray
::
CUDA_GPU_MAT
)
{
src
=
_src
.
getGpuMat
();
}
else
if
(
!
_src
.
empty
())
{
BufferPool
pool
(
stream
);
src
=
pool
.
getBuffer
(
_src
.
size
(),
_src
.
type
());
src
.
upload
(
_src
,
stream
);
}
#endif
return
src
;
}
GpuMat
cv
::
cuda
::
getOutputMat
(
OutputArray
_dst
,
int
rows
,
int
cols
,
int
type
,
Stream
&
stream
)
{
GpuMat
dst
;
#ifndef HAVE_CUDA
(
void
)
_dst
;
(
void
)
rows
;
(
void
)
cols
;
(
void
)
type
;
(
void
)
stream
;
throw_no_cuda
();
#else
if
(
_dst
.
kind
()
==
_InputArray
::
CUDA_GPU_MAT
)
{
_dst
.
create
(
rows
,
cols
,
type
);
dst
=
_dst
.
getGpuMat
();
}
else
{
BufferPool
pool
(
stream
);
dst
=
pool
.
getBuffer
(
rows
,
cols
,
type
);
}
#endif
return
dst
;
}
void
cv
::
cuda
::
syncOutput
(
const
GpuMat
&
dst
,
OutputArray
_dst
,
Stream
&
stream
)
{
#ifndef HAVE_CUDA
(
void
)
dst
;
(
void
)
_dst
;
(
void
)
stream
;
throw_no_cuda
();
#else
if
(
_dst
.
kind
()
!=
_InputArray
::
CUDA_GPU_MAT
)
{
if
(
stream
)
dst
.
download
(
_dst
,
stream
);
else
dst
.
download
(
_dst
);
}
#endif
}
#ifndef HAVE_CUDA
GpuMat
::
Allocator
*
cv
::
cuda
::
GpuMat
::
defaultAllocator
()
...
...
This diff is collapsed.
Click to expand it.
modules/cuda/src/cascadeclassifier.cpp
View file @
16b56e71
...
...
@@ -454,11 +454,10 @@ public:
// create sutable matrix headers
GpuMat
src
=
resuzeBuffer
(
cv
::
Rect
(
0
,
0
,
level
.
sFrame
.
width
,
level
.
sFrame
.
height
));
GpuMat
sint
=
integral
(
cv
::
Rect
(
prev
,
0
,
level
.
sFrame
.
width
+
1
,
level
.
sFrame
.
height
+
1
));
GpuMat
buff
=
integralBuffer
;
// generate integral for scale
cuda
::
resize
(
image
,
src
,
level
.
sFrame
,
0
,
0
,
cv
::
INTER_LINEAR
);
cuda
::
integral
(
src
,
sint
,
buff
);
cuda
::
integral
(
src
,
sint
);
// calculate job
int
totalWidth
=
level
.
workArea
.
width
/
step
;
...
...
This diff is collapsed.
Click to expand it.
modules/cudaarithm/include/opencv2/cudaarithm.hpp
View file @
16b56e71
...
...
@@ -130,12 +130,6 @@ This function, in contrast to divide, uses a round-down rounding mode.
*/
CV_EXPORTS
void
divide
(
InputArray
src1
,
InputArray
src2
,
OutputArray
dst
,
double
scale
=
1
,
int
dtype
=
-
1
,
Stream
&
stream
=
Stream
::
Null
());
//! computes element-wise weighted reciprocal of an array (dst = scale/src2)
static
inline
void
divide
(
double
src1
,
InputArray
src2
,
OutputArray
dst
,
int
dtype
=
-
1
,
Stream
&
stream
=
Stream
::
Null
())
{
divide
(
src1
,
src2
,
dst
,
1.0
,
dtype
,
stream
);
}
/** @brief Computes per-element absolute difference of two matrices (or of a matrix and scalar).
@param src1 First source matrix or scalar.
...
...
@@ -530,116 +524,53 @@ CV_EXPORTS void copyMakeBorder(InputArray src, OutputArray dst, int top, int bot
@param src1 Source matrix. Any matrices except 64F are supported.
@param normType Norm type. NORM_L1 , NORM_L2 , and NORM_INF are supported for now.
@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
@sa norm
*/
CV_EXPORTS
double
norm
(
InputArray
src1
,
int
normType
,
InputArray
mask
,
GpuMat
&
buf
);
/** @overload
uses new buffer, no mask
*/
static
inline
double
norm
(
InputArray
src
,
int
normType
)
{
GpuMat
buf
;
return
norm
(
src
,
normType
,
GpuMat
(),
buf
);
}
/** @overload
no mask
*/
static
inline
double
norm
(
InputArray
src
,
int
normType
,
GpuMat
&
buf
)
{
return
norm
(
src
,
normType
,
GpuMat
(),
buf
);
}
CV_EXPORTS
double
norm
(
InputArray
src1
,
int
normType
,
InputArray
mask
=
noArray
());
/** @overload */
CV_EXPORTS
void
calcNorm
(
InputArray
src
,
OutputArray
dst
,
int
normType
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
());
/** @brief Returns the difference of two matrices.
@param src1 Source matrix. Any matrices except 64F are supported.
@param src2 Second source matrix (if any) with the same size and type as src1.
@param normType Norm type. NORM_L1 , NORM_L2 , and NORM_INF are supported for now.
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
@sa norm
*/
CV_EXPORTS
double
norm
(
InputArray
src1
,
InputArray
src2
,
GpuMat
&
buf
,
int
normType
=
NORM_L2
);
/** @overload
uses new buffer
*/
static
inline
double
norm
(
InputArray
src1
,
InputArray
src2
,
int
normType
=
NORM_L2
)
{
GpuMat
buf
;
return
norm
(
src1
,
src2
,
buf
,
normType
);
}
CV_EXPORTS
double
norm
(
InputArray
src1
,
InputArray
src2
,
int
normType
=
NORM_L2
);
/** @overload */
CV_EXPORTS
void
calcNormDiff
(
InputArray
src1
,
InputArray
src2
,
OutputArray
dst
,
int
normType
=
NORM_L2
,
Stream
&
stream
=
Stream
::
Null
());
/** @brief Returns the sum of matrix elements.
@param src Source image of any depth except for CV_64F .
@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
@sa sum
*/
CV_EXPORTS
Scalar
sum
(
InputArray
src
,
InputArray
mask
,
GpuMat
&
buf
);
/** @overload
uses new buffer, no mask
*/
static
inline
Scalar
sum
(
InputArray
src
)
{
GpuMat
buf
;
return
sum
(
src
,
GpuMat
(),
buf
);
}
/** @overload
no mask
*/
static
inline
Scalar
sum
(
InputArray
src
,
GpuMat
&
buf
)
{
return
sum
(
src
,
GpuMat
(),
buf
);
}
CV_EXPORTS
Scalar
sum
(
InputArray
src
,
InputArray
mask
=
noArray
());
/** @overload */
CV_EXPORTS
void
calcSum
(
InputArray
src
,
OutputArray
dst
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
());
/** @brief Returns the sum of absolute values for matrix elements.
@param src Source image of any depth except for CV_64F .
@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
*/
CV_EXPORTS
Scalar
absSum
(
InputArray
src
,
InputArray
mask
,
GpuMat
&
buf
);
/** @overload
uses new buffer, no mask
*/
static
inline
Scalar
absSum
(
InputArray
src
)
{
GpuMat
buf
;
return
absSum
(
src
,
GpuMat
(),
buf
);
}
/** @overload
no mask
*/
static
inline
Scalar
absSum
(
InputArray
src
,
GpuMat
&
buf
)
{
return
absSum
(
src
,
GpuMat
(),
buf
);
}
CV_EXPORTS
Scalar
absSum
(
InputArray
src
,
InputArray
mask
=
noArray
());
/** @overload */
CV_EXPORTS
void
calcAbsSum
(
InputArray
src
,
OutputArray
dst
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
());
/** @brief Returns the squared sum of matrix elements.
@param src Source image of any depth except for CV_64F .
@param mask optional operation mask; it must have the same size as src1 and CV_8UC1 type.
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
*/
CV_EXPORTS
Scalar
sqrSum
(
InputArray
src
,
InputArray
mask
,
GpuMat
&
buf
);
/** @overload
uses new buffer, no mask
*/
static
inline
Scalar
sqrSum
(
InputArray
src
)
{
GpuMat
buf
;
return
sqrSum
(
src
,
GpuMat
(),
buf
);
}
/** @overload
no mask
*/
static
inline
Scalar
sqrSum
(
InputArray
src
,
GpuMat
&
buf
)
{
return
sqrSum
(
src
,
GpuMat
(),
buf
);
}
CV_EXPORTS
Scalar
sqrSum
(
InputArray
src
,
InputArray
mask
=
noArray
());
/** @overload */
CV_EXPORTS
void
calcSqrSum
(
InputArray
src
,
OutputArray
dst
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
());
/** @brief Finds global minimum and maximum matrix elements and returns their values.
...
...
@@ -647,21 +578,14 @@ static inline Scalar sqrSum(InputArray src, GpuMat& buf)
@param minVal Pointer to the returned minimum value. Use NULL if not required.
@param maxVal Pointer to the returned maximum value. Use NULL if not required.
@param mask Optional mask to select a sub-matrix.
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
The function does not work with CV_64F images on GPUs with the compute capability \< 1.3.
@sa minMaxLoc
*/
CV_EXPORTS
void
minMax
(
InputArray
src
,
double
*
minVal
,
double
*
maxVal
,
InputArray
mask
,
GpuMat
&
buf
);
/** @overload
uses new buffer
*/
static
inline
void
minMax
(
InputArray
src
,
double
*
minVal
,
double
*
maxVal
=
0
,
InputArray
mask
=
noArray
())
{
GpuMat
buf
;
minMax
(
src
,
minVal
,
maxVal
,
mask
,
buf
);
}
CV_EXPORTS
void
minMax
(
InputArray
src
,
double
*
minVal
,
double
*
maxVal
,
InputArray
mask
=
noArray
());
/** @overload */
CV_EXPORTS
void
findMinMax
(
InputArray
src
,
OutputArray
dst
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
());
/** @brief Finds global minimum and maximum matrix elements and returns their values with locations.
...
...
@@ -671,44 +595,28 @@ static inline void minMax(InputArray src, double* minVal, double* maxVal=0, Inpu
@param minLoc Pointer to the returned minimum location. Use NULL if not required.
@param maxLoc Pointer to the returned maximum location. Use NULL if not required.
@param mask Optional mask to select a sub-matrix.
@param valbuf Optional values buffer to avoid extra memory allocations. It is resized
automatically.
@param locbuf Optional locations buffer to avoid extra memory allocations. It is resized
automatically.
The function does not work with CV_64F images on GPU with the compute capability \< 1.3.
@sa minMaxLoc
*/
CV_EXPORTS
void
minMaxLoc
(
InputArray
src
,
double
*
minVal
,
double
*
maxVal
,
Point
*
minLoc
,
Point
*
maxLoc
,
InputArray
mask
,
GpuMat
&
valbuf
,
GpuMat
&
locbuf
);
/** @overload
uses new buffer
*/
static
inline
void
minMaxLoc
(
InputArray
src
,
double
*
minVal
,
double
*
maxVal
=
0
,
Point
*
minLoc
=
0
,
Point
*
maxLoc
=
0
,
InputArray
mask
=
noArray
())
{
GpuMat
valBuf
,
locBuf
;
minMaxLoc
(
src
,
minVal
,
maxVal
,
minLoc
,
maxLoc
,
mask
,
valBuf
,
locBuf
);
}
InputArray
mask
=
noArray
());
/** @overload */
CV_EXPORTS
void
findMinMaxLoc
(
InputArray
src
,
OutputArray
minMaxVals
,
OutputArray
loc
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
());
/** @brief Counts non-zero matrix elements.
@param src Single-channel source image.
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
The function does not work with CV_64F images on GPUs with the compute capability \< 1.3.
@sa countNonZero
*/
CV_EXPORTS
int
countNonZero
(
InputArray
src
,
GpuMat
&
buf
);
/** @overload
uses new buffer
*/
static
inline
int
countNonZero
(
const
GpuMat
&
src
)
{
GpuMat
buf
;
return
countNonZero
(
src
,
buf
);
}
CV_EXPORTS
int
countNonZero
(
InputArray
src
);
/** @overload */
CV_EXPORTS
void
countNonZero
(
InputArray
src
,
OutputArray
dst
,
Stream
&
stream
=
Stream
::
Null
());
/** @brief Reduces a matrix to a vector.
...
...
@@ -743,19 +651,12 @@ CV_EXPORTS void reduce(InputArray mtx, OutputArray vec, int dim, int reduceOp, i
@param mtx Source matrix. CV_8UC1 matrices are supported for now.
@param mean Mean value.
@param stddev Standard deviation value.
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
@sa meanStdDev
*/
CV_EXPORTS
void
meanStdDev
(
InputArray
mtx
,
Scalar
&
mean
,
Scalar
&
stddev
,
GpuMat
&
buf
);
/** @overload
uses new buffer
*/
static
inline
void
meanStdDev
(
InputArray
src
,
Scalar
&
mean
,
Scalar
&
stddev
)
{
GpuMat
buf
;
meanStdDev
(
src
,
mean
,
stddev
,
buf
);
}
CV_EXPORTS
void
meanStdDev
(
InputArray
mtx
,
Scalar
&
mean
,
Scalar
&
stddev
);
/** @overload */
CV_EXPORTS
void
meanStdDev
(
InputArray
mtx
,
OutputArray
dst
,
Stream
&
stream
=
Stream
::
Null
());
/** @brief Computes a standard deviation of integral images.
...
...
@@ -779,64 +680,32 @@ normalization.
@param dtype When negative, the output array has the same type as src; otherwise, it has the same
number of channels as src and the depth =CV_MAT_DEPTH(dtype).
@param mask Optional operation mask.
@param norm_buf Optional buffer to avoid extra memory allocations. It is resized automatically.
@param cvt_buf Optional buffer to avoid extra memory allocations. It is resized automatically.
@param stream Stream for the asynchronous version.
@sa normalize
*/
CV_EXPORTS
void
normalize
(
InputArray
src
,
OutputArray
dst
,
double
alpha
,
double
beta
,
int
norm_type
,
int
dtype
,
InputArray
mask
,
GpuMat
&
norm_buf
,
GpuMat
&
cvt_buf
);
/** @overload
uses new buffers
*/
static
inline
void
normalize
(
InputArray
src
,
OutputArray
dst
,
double
alpha
=
1
,
double
beta
=
0
,
int
norm_type
=
NORM_L2
,
int
dtype
=
-
1
,
InputArray
mask
=
noArray
())
{
GpuMat
norm_buf
;
GpuMat
cvt_buf
;
normalize
(
src
,
dst
,
alpha
,
beta
,
norm_type
,
dtype
,
mask
,
norm_buf
,
cvt_buf
);
}
int
norm_type
,
int
dtype
,
InputArray
mask
=
noArray
(),
Stream
&
stream
=
Stream
::
Null
());
/** @brief Computes an integral image.
@param src Source image. Only CV_8UC1 images are supported for now.
@param sum Integral image containing 32-bit unsigned integer values packed into CV_32SC1 .
@param buffer Optional buffer to avoid extra memory allocations. It is resized automatically.
@param stream Stream for the asynchronous version.
@sa integral
*/
CV_EXPORTS
void
integral
(
InputArray
src
,
OutputArray
sum
,
GpuMat
&
buffer
,
Stream
&
stream
=
Stream
::
Null
());
static
inline
void
integralBuffered
(
InputArray
src
,
OutputArray
sum
,
GpuMat
&
buffer
,
Stream
&
stream
=
Stream
::
Null
())
{
integral
(
src
,
sum
,
buffer
,
stream
);
}
/** @overload
uses new buffer
*/
static
inline
void
integral
(
InputArray
src
,
OutputArray
sum
,
Stream
&
stream
=
Stream
::
Null
())
{
GpuMat
buffer
;
integral
(
src
,
sum
,
buffer
,
stream
);
}
CV_EXPORTS
void
integral
(
InputArray
src
,
OutputArray
sum
,
Stream
&
stream
=
Stream
::
Null
());
/** @brief Computes a squared integral image.
@param src Source image. Only CV_8UC1 images are supported for now.
@param sqsum Squared integral image containing 64-bit unsigned integer values packed into
CV_64FC1 .
@param buf Optional buffer to avoid extra memory allocations. It is resized automatically.
@param stream Stream for the asynchronous version.
*/
CV_EXPORTS
void
sqrIntegral
(
InputArray
src
,
OutputArray
sqsum
,
GpuMat
&
buf
,
Stream
&
stream
=
Stream
::
Null
());
/** @overload
uses new buffer
*/
static
inline
void
sqrIntegral
(
InputArray
src
,
OutputArray
sqsum
,
Stream
&
stream
=
Stream
::
Null
())
{
GpuMat
buffer
;
sqrIntegral
(
src
,
sqsum
,
buffer
,
stream
);
}
CV_EXPORTS
void
sqrIntegral
(
InputArray
src
,
OutputArray
sqsum
,
Stream
&
stream
=
Stream
::
Null
());
//! @} cudaarithm_reduce
...
...
This diff is collapsed.
Click to expand it.
modules/cudaarithm/perf/perf_reductions.cpp
View file @
16b56e71
...
...
@@ -108,10 +108,9 @@ PERF_TEST_P(Sz_Norm, NormDiff,
{
const
cv
::
cuda
::
GpuMat
d_src1
(
src1
);
const
cv
::
cuda
::
GpuMat
d_src2
(
src2
);
cv
::
cuda
::
GpuMat
d_buf
;
double
gpu_dst
;
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
norm
(
d_src1
,
d_src2
,
d_buf
,
normType
);
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
norm
(
d_src1
,
d_src2
,
normType
);
SANITY_CHECK
(
gpu_dst
);
...
...
@@ -146,10 +145,9 @@ PERF_TEST_P(Sz_Depth_Cn, Sum,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_buf
;
cv
::
Scalar
gpu_dst
;
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
sum
(
d_src
,
d_buf
);
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
sum
(
d_src
);
SANITY_CHECK
(
gpu_dst
,
1e-5
,
ERROR_RELATIVE
);
}
...
...
@@ -183,10 +181,9 @@ PERF_TEST_P(Sz_Depth_Cn, SumAbs,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_buf
;
cv
::
Scalar
gpu_dst
;
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
absSum
(
d_src
,
d_buf
);
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
absSum
(
d_src
);
SANITY_CHECK
(
gpu_dst
,
1e-6
,
ERROR_RELATIVE
);
}
...
...
@@ -216,10 +213,9 @@ PERF_TEST_P(Sz_Depth_Cn, SumSqr,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_buf
;
cv
::
Scalar
gpu_dst
;
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
sqrSum
(
d_src
,
d_buf
);
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
sqrSum
(
d_src
);
SANITY_CHECK
(
gpu_dst
,
1e-6
,
ERROR_RELATIVE
);
}
...
...
@@ -248,10 +244,9 @@ PERF_TEST_P(Sz_Depth, MinMax,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_buf
;
double
gpu_minVal
,
gpu_maxVal
;
TEST_CYCLE
()
cv
::
cuda
::
minMax
(
d_src
,
&
gpu_minVal
,
&
gpu_maxVal
,
cv
::
cuda
::
GpuMat
()
,
d_buf
);
TEST_CYCLE
()
cv
::
cuda
::
minMax
(
d_src
,
&
gpu_minVal
,
&
gpu_maxVal
,
cv
::
cuda
::
GpuMat
());
SANITY_CHECK
(
gpu_minVal
,
1e-10
);
SANITY_CHECK
(
gpu_maxVal
,
1e-10
);
...
...
@@ -286,11 +281,10 @@ PERF_TEST_P(Sz_Depth, MinMaxLoc,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_valbuf
,
d_locbuf
;
double
gpu_minVal
,
gpu_maxVal
;
cv
::
Point
gpu_minLoc
,
gpu_maxLoc
;
TEST_CYCLE
()
cv
::
cuda
::
minMaxLoc
(
d_src
,
&
gpu_minVal
,
&
gpu_maxVal
,
&
gpu_minLoc
,
&
gpu_maxLoc
,
cv
::
cuda
::
GpuMat
(),
d_valbuf
,
d_locbuf
);
TEST_CYCLE
()
cv
::
cuda
::
minMaxLoc
(
d_src
,
&
gpu_minVal
,
&
gpu_maxVal
,
&
gpu_minLoc
,
&
gpu_maxLoc
);
SANITY_CHECK
(
gpu_minVal
,
1e-10
);
SANITY_CHECK
(
gpu_maxVal
,
1e-10
);
...
...
@@ -323,10 +317,9 @@ PERF_TEST_P(Sz_Depth, CountNonZero,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_buf
;
int
gpu_dst
=
0
;
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
countNonZero
(
d_src
,
d_buf
);
TEST_CYCLE
()
gpu_dst
=
cv
::
cuda
::
countNonZero
(
d_src
);
SANITY_CHECK
(
gpu_dst
);
}
...
...
@@ -414,9 +407,8 @@ PERF_TEST_P(Sz_Depth_NormType, Normalize,
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
dst
;
cv
::
cuda
::
GpuMat
d_norm_buf
,
d_cvt_buf
;
TEST_CYCLE
()
cv
::
cuda
::
normalize
(
d_src
,
dst
,
alpha
,
beta
,
norm_type
,
type
,
cv
::
cuda
::
GpuMat
()
,
d_norm_buf
,
d_cvt_buf
);
TEST_CYCLE
()
cv
::
cuda
::
normalize
(
d_src
,
dst
,
alpha
,
beta
,
norm_type
,
type
,
cv
::
cuda
::
GpuMat
());
CUDA_SANITY_CHECK
(
dst
,
1e-6
);
}
...
...
@@ -445,11 +437,10 @@ PERF_TEST_P(Sz, MeanStdDev,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
d_buf
;
cv
::
Scalar
gpu_mean
;
cv
::
Scalar
gpu_stddev
;
TEST_CYCLE
()
cv
::
cuda
::
meanStdDev
(
d_src
,
gpu_mean
,
gpu_stddev
,
d_buf
);
TEST_CYCLE
()
cv
::
cuda
::
meanStdDev
(
d_src
,
gpu_mean
,
gpu_stddev
);
SANITY_CHECK
(
gpu_mean
);
SANITY_CHECK
(
gpu_stddev
);
...
...
@@ -481,9 +472,8 @@ PERF_TEST_P(Sz, Integral,
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
dst
;
cv
::
cuda
::
GpuMat
d_buf
;
TEST_CYCLE
()
cv
::
cuda
::
integral
(
d_src
,
dst
,
d_buf
);
TEST_CYCLE
()
cv
::
cuda
::
integral
(
d_src
,
dst
);
CUDA_SANITY_CHECK
(
dst
);
}
...
...
@@ -511,9 +501,9 @@ PERF_TEST_P(Sz, IntegralSqr,
if
(
PERF_RUN_CUDA
())
{
const
cv
::
cuda
::
GpuMat
d_src
(
src
);
cv
::
cuda
::
GpuMat
dst
,
buf
;
cv
::
cuda
::
GpuMat
dst
;
TEST_CYCLE
()
cv
::
cuda
::
sqrIntegral
(
d_src
,
dst
,
buf
);
TEST_CYCLE
()
cv
::
cuda
::
sqrIntegral
(
d_src
,
dst
);
CUDA_SANITY_CHECK
(
dst
);
}
...
...
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/arithm.cpp
View file @
16b56e71
...
...
@@ -169,9 +169,9 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
#else
// CUBLAS works with column-major matrices
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src3
=
_src3
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
GpuMat
src3
=
getInputMat
(
_src3
,
stream
);
CV_Assert
(
src1
.
type
()
==
CV_32FC1
||
src1
.
type
()
==
CV_32FC2
||
src1
.
type
()
==
CV_64FC1
||
src1
.
type
()
==
CV_64FC2
);
CV_Assert
(
src2
.
type
()
==
src1
.
type
()
&&
(
src3
.
empty
()
||
src3
.
type
()
==
src1
.
type
())
);
...
...
@@ -200,8 +200,7 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
CV_Assert
(
src1Size
.
width
==
src2Size
.
height
);
CV_Assert
(
src3
.
empty
()
||
src3Size
==
dstSize
);
_dst
.
create
(
dstSize
,
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
dstSize
,
src1
.
type
(),
stream
);
if
(
beta
!=
0
)
{
...
...
@@ -281,6 +280,8 @@ void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray
}
cublasSafeCall
(
cublasDestroy_v2
(
handle
)
);
syncOutput
(
dst
,
_dst
,
stream
);
#endif
}
...
...
@@ -297,7 +298,7 @@ void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags,
(
void
)
stream
;
throw_no_cuda
();
#else
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
...
...
@@ -314,13 +315,20 @@ void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags,
// We don't support real-to-real transform
CV_Assert
(
is_complex_input
||
is_complex_output
);
GpuMat
src_cont
=
src
;
// Make sure here we work with the continuous input,
// as CUFFT can't handle gaps
createContinuous
(
src
.
rows
,
src
.
cols
,
src
.
type
(),
src_cont
);
if
(
src_cont
.
data
!=
src
.
data
)
GpuMat
src_cont
;
if
(
src
.
isContinuous
())
{
src_cont
=
src
;
}
else
{
BufferPool
pool
(
stream
);
src_cont
.
allocator
=
pool
.
getAllocator
();
createContinuous
(
src
.
rows
,
src
.
cols
,
src
.
type
(),
src_cont
);
src
.
copyTo
(
src_cont
,
stream
);
}
Size
dft_size_opt
=
dft_size
;
if
(
is_1d_input
&&
!
is_row_dft
)
...
...
@@ -462,16 +470,15 @@ namespace
void
ConvolutionImpl
::
convolve
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
,
bool
ccorr
,
Stream
&
_stream
)
{
GpuMat
image
=
_image
.
getGpuMat
(
);
GpuMat
templ
=
_templ
.
getGpuMat
(
);
GpuMat
image
=
getInputMat
(
_image
,
_stream
);
GpuMat
templ
=
getInputMat
(
_templ
,
_stream
);
CV_Assert
(
image
.
type
()
==
CV_32FC1
);
CV_Assert
(
templ
.
type
()
==
CV_32FC1
);
create
(
image
.
size
(),
templ
.
size
());
_result
.
create
(
result_size
,
CV_32FC1
);
GpuMat
result
=
_result
.
getGpuMat
();
GpuMat
result
=
getOutputMat
(
_result
,
result_size
,
CV_32FC1
,
_stream
);
cudaStream_t
stream
=
StreamAccessor
::
getStream
(
_stream
);
...
...
@@ -520,6 +527,8 @@ namespace
cufftSafeCall
(
cufftDestroy
(
planR2C
)
);
cufftSafeCall
(
cufftDestroy
(
planC2R
)
);
syncOutput
(
result
,
_result
,
_stream
);
}
}
...
...
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/core.cpp
View file @
16b56e71
...
...
@@ -119,15 +119,17 @@ void cv::cuda::flip(InputArray _src, OutputArray _dst, int flipCode, Stream& str
{
NppMirror
<
CV_32F
,
nppiMirror_32f_C1R
>::
call
,
0
,
NppMirror
<
CV_32F
,
nppiMirror_32f_C3R
>::
call
,
NppMirror
<
CV_32F
,
nppiMirror_32f_C4R
>::
call
}
};
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
CV_Assert
(
src
.
depth
()
==
CV_8U
||
src
.
depth
()
==
CV_16U
||
src
.
depth
()
==
CV_32S
||
src
.
depth
()
==
CV_32F
);
CV_Assert
(
src
.
channels
()
==
1
||
src
.
channels
()
==
3
||
src
.
channels
()
==
4
);
_dst
.
create
(
src
.
size
(),
src
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
(
);
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
src
.
type
(),
stream
);
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
dst
,
flipCode
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
#endif
/* !defined (HAVE_CUDA) */
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/add_weighted.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -63,7 +66,7 @@ namespace
__device__ __forceinline__ D operator ()(T1 a, T2 b) const
{
return saturate_cast<D>(a * alpha + b * beta + gamma);
return
cudev::
saturate_cast<D>(a * alpha + b * beta + gamma);
}
};
...
...
@@ -555,8 +558,8 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
}
};
GpuMat src1 =
_src1.getGpuMat(
);
GpuMat src2 =
_src2.getGpuMat(
);
GpuMat src1 =
getInputMat(_src1, stream
);
GpuMat src2 =
getInputMat(_src2, stream
);
int sdepth1 = src1.depth();
int sdepth2 = src2.depth();
...
...
@@ -564,19 +567,18 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
ddepth = ddepth >= 0 ? CV_MAT_DEPTH(ddepth) : std::max(sdepth1, sdepth2);
const int cn = src1.channels();
CV_
Dbg
Assert( src2.size() == src1.size() && src2.channels() == cn );
CV_
Dbg
Assert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F );
CV_Assert( src2.size() == src1.size() && src2.channels() == cn );
CV_Assert( sdepth1 <= CV_64F && sdepth2 <= CV_64F && ddepth <= CV_64F );
_dst.create(src1.size(), CV_MAKE_TYPE(ddepth, cn));
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src1.size(), CV_MAKE_TYPE(ddepth, cn), stream);
GpuMat src1_ = src1.reshape(1);
GpuMat src2_ = src2.reshape(1);
GpuMat dst_ = dst.reshape(1);
GpuMat src1_
single
= src1.reshape(1);
GpuMat src2_
single
= src2.reshape(1);
GpuMat dst_
single
= dst.reshape(1);
if (sdepth1 > sdepth2)
{
src1_
.swap(src2_
);
src1_
single.swap(src2_single
);
std::swap(alpha, beta);
std::swap(sdepth1, sdepth2);
}
...
...
@@ -586,7 +588,9 @@ void cv::cuda::addWeighted(InputArray _src1, double alpha, InputArray _src2, dou
if (!func)
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src1_, alpha, src2_, beta, gamma, dst_, stream);
func(src1_single, alpha, src2_single, beta, gamma, dst_single, stream);
syncOutput(dst, _dst, stream);
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/bitwise_mat.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& mask, double, Stream& stream, int op);
...
...
@@ -60,16 +63,15 @@ void bitMat(const GpuMat& src1, const GpuMat& src2, GpuMat& dst, const GpuMat& m
void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat mask =
_mask.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
GpuMat mask =
getInputMat(_mask, stream
);
const int depth = src.depth();
CV_DbgAssert( depth <= CV_32F );
CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
if (mask.empty())
{
...
...
@@ -125,6 +127,8 @@ void cv::cuda::bitwise_not(InputArray _src, OutputArray _dst, InputArray _mask,
gridTransformUnary(vsrc, vdst, bit_not<uchar>(), singleMaskChannels(globPtr<uchar>(mask), src.channels()), stream);
}
}
syncOutput(dst, _dst, stream);
}
//////////////////////////////////////////////////////////////////////////////
...
...
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/copy_make_border.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -133,7 +136,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
{ copyMakeBorderImpl<float , 1> , 0 /*copyMakeBorderImpl<float , 2>*/, copyMakeBorderImpl<float , 3> , copyMakeBorderImpl<float ,4> }
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth();
const int cn = src.channels();
...
...
@@ -141,8 +144,7 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
CV_Assert( depth <= CV_32F && cn <= 4 );
CV_Assert( borderType == BORDER_REFLECT_101 || borderType == BORDER_REPLICATE || borderType == BORDER_CONSTANT || borderType == BORDER_REFLECT || borderType == BORDER_WRAP );
_dst.create(src.rows + top + bottom, src.cols + left + right, src.type());
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.rows + top + bottom, src.cols + left + right, src.type(), stream);
const func_t func = funcs[depth][cn - 1];
...
...
@@ -150,6 +152,8 @@ void cv::cuda::copyMakeBorder(InputArray _src, OutputArray _dst, int top, int bo
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported combination of source and destination types");
func(src, dst, top, left, borderType, value, stream);
syncOutput(dst, _dst, stream);
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/countnonzero.cu
View file @
16b56e71
...
...
@@ -50,47 +50,64 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
{
template <typename T>
int countNonZeroImpl(const GpuMat& _src, GpuMat& _buf
)
template <typename T
, typename D
>
void countNonZeroImpl(const GpuMat& _src, GpuMat& _dst, Stream& stream
)
{
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
GpuMat_<
int>& buf = (GpuMat_<int>&) _buf
;
GpuMat_<
D>& dst = (GpuMat_<D>&) _dst
;
gridCountNonZero(src, buf);
int data;
buf.download(cv::Mat(1, 1, buf.type(), &data));
return data;
gridCountNonZero(src, dst, stream);
}
}
int cv::cuda::countNonZero(InputArray _src, GpuMat& buf
)
void cv::cuda::countNonZero(InputArray _src, OutputArray _dst, Stream& stream
)
{
typedef
int (*func_t)(const GpuMat& _src, GpuMat& _buf
);
typedef
void (*func_t)(const GpuMat& src, GpuMat& dst, Stream& stream
);
static const func_t funcs[] =
{
countNonZeroImpl<uchar>,
countNonZeroImpl<schar>,
countNonZeroImpl<ushort>,
countNonZeroImpl<short>,
countNonZeroImpl<int>,
countNonZeroImpl<float>,
countNonZeroImpl<double
>
countNonZeroImpl<uchar
, int
>,
countNonZeroImpl<schar
, int
>,
countNonZeroImpl<ushort
, int
>,
countNonZeroImpl<short
, int
>,
countNonZeroImpl<int
, int
>,
countNonZeroImpl<float
, int
>,
countNonZeroImpl<double
, int>,
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
CV_Assert( src.depth() <= CV_64F );
CV_Assert( src.channels() == 1 );
GpuMat dst = getOutputMat(_dst, 1, 1, CV_32SC1, stream);
const func_t func = funcs[src.depth()];
func(src, dst, stream);
syncOutput(dst, _dst, stream);
}
int cv::cuda::countNonZero(InputArray _src)
{
Stream& stream = Stream::Null();
BufferPool pool(stream);
GpuMat buf = pool.getBuffer(1, 1, CV_32SC1);
countNonZero(_src, buf, stream);
int data;
buf.download(Mat(1, 1, CV_32SC1, &data));
return
func(src, buf)
;
return
data
;
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/integral.cu
View file @
16b56e71
...
...
@@ -50,51 +50,58 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
////////////////////////////////////////////////////////////////////////
// integral
void cv::cuda::integral(InputArray _src, OutputArray _dst,
GpuMat& buffer,
Stream& stream)
void cv::cuda::integral(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
CV_Assert( src.type() == CV_8UC1 );
GpuMat_<int>& res = (GpuMat_<int>&) buffer;
BufferPool pool(stream);
GpuMat_<int> res(src.size(), pool.getAllocator());
gridIntegral(globPtr<uchar>(src), res, stream);
_dst.create(src.rows + 1, src.cols + 1, CV_32SC1);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.rows + 1, src.cols + 1, CV_32SC1, stream);
dst.setTo(Scalar::all(0), stream);
GpuMat inner = dst(Rect(1, 1, src.cols, src.rows));
res.copyTo(inner, stream);
syncOutput(dst, _dst, stream);
}
//////////////////////////////////////////////////////////////////////////////
// sqrIntegral
void cv::cuda::sqrIntegral(InputArray _src, OutputArray _dst,
GpuMat& buf,
Stream& stream)
void cv::cuda::sqrIntegral(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
CV_Assert( src.type() == CV_8UC1 );
GpuMat_<double>& res = (GpuMat_<double>&) buf;
BufferPool pool(Stream::Null());
GpuMat_<double> res(pool.getBuffer(src.size(), CV_64FC1));
gridIntegral(sqr_(cvt_<int>(globPtr<uchar>(src))), res, stream);
_dst.create(src.rows + 1, src.cols + 1, CV_64FC1);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.rows + 1, src.cols + 1, CV_64FC1, stream);
dst.setTo(Scalar::all(0), stream);
GpuMat inner = dst(Rect(1, 1, src.cols, src.rows));
res.copyTo(inner, stream);
syncOutput(dst, _dst, stream);
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/lut.cu
View file @
16b56e71
...
...
@@ -50,8 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -165,7 +167,7 @@ namespace
void LookUpTableImpl::transform(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int cn = src.channels();
const int lut_cn = d_lut.channels();
...
...
@@ -173,8 +175,7 @@ namespace
CV_Assert( src.type() == CV_8UC1 || src.type() == CV_8UC3 );
CV_Assert( lut_cn == 1 || lut_cn == cn );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
if (lut_cn == 1)
{
...
...
@@ -196,6 +197,8 @@ namespace
dst3.assign(lut_(src3, tbl), stream);
}
syncOutput(dst, _dst, stream);
}
}
...
...
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/math.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -92,16 +95,15 @@ void cv::cuda::abs(InputArray _src, OutputArray _dst, Stream& stream)
absMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
//////////////////////////////////////////////////////////////////////////////
...
...
@@ -113,7 +115,7 @@ namespace
{
__device__ __forceinline__ T operator ()(T x) const
{
return saturate_cast<T>(x * x);
return
cudev::
saturate_cast<T>(x * x);
}
};
...
...
@@ -138,16 +140,15 @@ void cv::cuda::sqr(InputArray _src, OutputArray _dst, Stream& stream)
sqrMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
//////////////////////////////////////////////////////////////////////////////
...
...
@@ -176,16 +177,15 @@ void cv::cuda::sqrt(InputArray _src, OutputArray _dst, Stream& stream)
sqrtMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -198,7 +198,7 @@ namespace
__device__ __forceinline__ T operator ()(T x) const
{
exp_func<T> f;
return saturate_cast<T>(f(x));
return
cudev::
saturate_cast<T>(f(x));
}
};
...
...
@@ -223,16 +223,15 @@ void cv::cuda::exp(InputArray _src, OutputArray _dst, Stream& stream)
expMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -261,16 +260,15 @@ void cv::cuda::log(InputArray _src, OutputArray _dst, Stream& stream)
logMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert( depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), dst.reshape(1), stream);
funcs[depth](src.reshape(1), dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
////////////////////////////////////////////////////////////////////////
...
...
@@ -284,7 +282,7 @@ namespace
__device__ __forceinline__ T operator()(T e) const
{
return saturate_cast<T>(__powf((float)e, power));
return
cudev::
saturate_cast<T>(__powf((float)e, power));
}
};
template<typename T> struct PowOp<T, true> : unary_function<T, T>
...
...
@@ -293,7 +291,7 @@ namespace
__device__ __forceinline__ T operator()(T e) const
{
T res = saturate_cast<T>(__powf((float)e, power));
T res =
cudev::
saturate_cast<T>(__powf((float)e, power));
if ((e < 0) && (1 & static_cast<int>(power)))
res *= -1;
...
...
@@ -344,16 +342,15 @@ void cv::cuda::pow(InputArray _src, double power, OutputArray _dst, Stream& stre
powMat<double>
};
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth(
);
CV_Assert( src.depth() <= CV_64F
);
CV_DbgAssert(depth <= CV_64F
);
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream
);
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
funcs[src.depth()](src.reshape(1), power, dst.reshape(1), stream);
funcs[depth](src.reshape(1), power, dst.reshape(1)
, stream);
syncOutput(dst, _dst
, stream);
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/minmax.cu
View file @
16b56e71
...
...
@@ -50,62 +50,140 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
{
template <typename T>
void minMaxImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _
buf, double* minVal, double* maxVal
)
template <typename T
, typename R
>
void minMaxImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _
dst, Stream& stream
)
{
typedef typename SelectIf<
TypesEquals<T, double>::value,
double,
typename SelectIf<TypesEquals<T, float>::value, float, int>::type
>::type work_type;
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
GpuMat_<
work_type>& buf = (GpuMat_<work_type>&) _buf
;
GpuMat_<
R>& dst = (GpuMat_<R>&) _dst
;
if (mask.empty())
gridFindMinMaxVal(src,
buf
);
gridFindMinMaxVal(src,
dst, stream
);
else
gridFindMinMaxVal(src, buf, globPtr<uchar>(mask));
gridFindMinMaxVal(src, dst, globPtr<uchar>(mask), stream);
}
template <typename T, typename R>
void minMaxImpl(const GpuMat& src, const GpuMat& mask, double* minVal, double* maxVal)
{
BufferPool pool(Stream::Null());
GpuMat buf(pool.getBuffer(1, 2, DataType<R>::type));
work_type data[2];
buf.download(cv::Mat(1, 2, buf.type(), data));
minMaxImpl<T, R>(src, mask, buf, Stream::Null());
if (minVal)
*minVal = data[0]
;
R data[2];
buf.download(Mat(1, 2, buf.type(), data))
;
if (maxVal)
*maxVal = data[1];
}
}
void cv::cuda::
minMax(InputArray _src, double* minVal, double* maxVal, InputArray _mask, GpuMat& buf
)
void cv::cuda::
findMinMax(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream
)
{
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _
buf, double* minVal, double* maxVal
);
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _
dst, Stream& stream
);
static const func_t funcs[] =
{
minMaxImpl<uchar>,
minMaxImpl<schar>,
minMaxImpl<ushort>,
minMaxImpl<short>,
minMaxImpl<int>,
minMaxImpl<float>,
minMaxImpl<double>
minMaxImpl<uchar
, int
>,
minMaxImpl<schar
, int
>,
minMaxImpl<ushort
, int
>,
minMaxImpl<short
, int
>,
minMaxImpl<int
, int
>,
minMaxImpl<float
, float
>,
minMaxImpl<double
, double
>
};
GpuMat src = _src.getGpuMat(
);
GpuMat mask = _mask.getGpuMat(
);
const GpuMat src = getInputMat(_src, stream
);
const GpuMat mask = getInputMat(_mask, stream
);
CV_Assert( src.channels() == 1 );
CV_DbgAssert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
const int src_depth = src.depth();
const int dst_depth = src_depth < CV_32F ? CV_32S : src_depth;
GpuMat dst = getOutputMat(_dst, 1, 2, dst_depth, stream);
const func_t func = funcs[src.depth()];
func(src, mask, dst, stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::minMax(InputArray _src, double* minVal, double* maxVal, InputArray _mask)
{
Stream& stream = Stream::Null();
HostMem dst;
findMinMax(_src, dst, _mask, stream);
stream.waitForCompletion();
double vals[2];
dst.createMatHeader().convertTo(Mat(1, 2, CV_64FC1, &vals[0]), CV_64F);
if (minVal)
*minVal = vals[0];
if (maxVal)
*maxVal = vals[1];
}
namespace cv { namespace cuda { namespace internal {
void findMaxAbs(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream);
}}}
namespace
{
template <typename T, typename R>
void findMaxAbsImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream)
{
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
GpuMat_<R>& dst = (GpuMat_<R>&) _dst;
if (mask.empty())
gridFindMaxVal(abs_(src), dst, stream);
else
gridFindMaxVal(abs_(src), dst, globPtr<uchar>(mask), stream);
}
}
void cv::cuda::internal::findMaxAbs(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
{
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream);
static const func_t funcs[] =
{
findMaxAbsImpl<uchar, int>,
findMaxAbsImpl<schar, int>,
findMaxAbsImpl<ushort, int>,
findMaxAbsImpl<short, int>,
findMaxAbsImpl<int, int>,
findMaxAbsImpl<float, float>,
findMaxAbsImpl<double, double>
};
const GpuMat src = getInputMat(_src, stream);
const GpuMat mask = getInputMat(_mask, stream);
CV_Assert( src.channels() == 1 );
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
const int src_depth = src.depth();
const int dst_depth = src_depth < CV_32F ? CV_32S : src_depth;
GpuMat dst = getOutputMat(_dst, 1, 1, dst_depth, stream);
const func_t func = funcs[src.depth()];
func(src, mask, dst, stream);
func(src, mask, buf, minVal, maxVal
);
syncOutput(dst, _dst, stream
);
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/minmaxloc.cu
View file @
16b56e71
...
...
@@ -50,78 +50,110 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
{
template <typename T>
void minMaxLocImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf,
double* minVal, double* maxVal, cv::Point* minLoc, cv::Point* maxLoc
)
template <typename T
, typename R
>
void minMaxLocImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf,
Stream& stream
)
{
typedef typename SelectIf<
TypesEquals<T, double>::value,
double,
typename SelectIf<TypesEquals<T, float>::value, float, int>::type
>::type work_type;
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
GpuMat_<
work_type>& valBuf = (GpuMat_<work_type
>&) _valBuf;
GpuMat_<
R>& valBuf = (GpuMat_<R
>&) _valBuf;
GpuMat_<int>& locBuf = (GpuMat_<int>&) _locBuf;
if (mask.empty())
gridMinMaxLoc(src, valBuf, locBuf);
gridMinMaxLoc(src, valBuf, locBuf
, stream
);
else
gridMinMaxLoc(src, valBuf, locBuf, globPtr<uchar>(mask));
gridMinMaxLoc(src, valBuf, locBuf, globPtr<uchar>(mask), stream);
}
}
cv::Mat_<work_type> h_valBuf;
cv::Mat_<int> h_locBuf;
void cv::cuda::findMinMaxLoc(InputArray _src, OutputArray _minMaxVals, OutputArray _loc, InputArray _mask, Stream& stream)
{
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf, Stream& stream);
static const func_t funcs[] =
{
minMaxLocImpl<uchar, int>,
minMaxLocImpl<schar, int>,
minMaxLocImpl<ushort, int>,
minMaxLocImpl<short, int>,
minMaxLocImpl<int, int>,
minMaxLocImpl<float, float>,
minMaxLocImpl<double, double>
};
valBuf.download(h_valBuf
);
locBuf.download(h_locBuf
);
const GpuMat src = getInputMat(_src, stream
);
const GpuMat mask = getInputMat(_mask, stream
);
if (minVal)
*minVal = h_valBuf(0, 0);
CV_Assert( src.channels() == 1 );
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
const int src_depth = src.depth();
BufferPool pool(stream);
GpuMat valBuf(pool.getAllocator());
GpuMat locBuf(pool.getAllocator());
if (maxVal)
*maxVal = h_valBuf(1, 0
);
const func_t func = funcs[src_depth];
func(src, mask, valBuf, locBuf, stream
);
if (minLoc)
{
const int idx = h_locBuf(0, 0);
*minLoc = cv::Point(idx % src.cols, idx / src.cols);
}
GpuMat minMaxVals = valBuf.colRange(0, 1);
GpuMat loc = locBuf.colRange(0, 1);
if (maxLoc)
{
const int idx = h_locBuf(1, 0);
*maxLoc = cv::Point(idx % src.cols, idx / src.cols);
}
if (_minMaxVals.kind() == _InputArray::CUDA_GPU_MAT)
{
minMaxVals.copyTo(_minMaxVals, stream);
}
else
{
minMaxVals.download(_minMaxVals, stream);
}
if (_loc.kind() == _InputArray::CUDA_GPU_MAT)
{
loc.copyTo(_loc, stream);
}
else
{
loc.download(_loc, stream);
}
}
void cv::cuda::minMaxLoc(InputArray _src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, InputArray _mask
, GpuMat& valBuf, GpuMat& locBuf
)
void cv::cuda::minMaxLoc(InputArray _src, double* minVal, double* maxVal, Point* minLoc, Point* maxLoc, InputArray _mask)
{
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _valBuf, GpuMat& _locBuf, double* minVal, double* maxVal, cv::Point* minLoc, cv::Point* maxLoc);
static const func_t funcs[] =
{
minMaxLocImpl<uchar>,
minMaxLocImpl<schar>,
minMaxLocImpl<ushort>,
minMaxLocImpl<short>,
minMaxLocImpl<int>,
minMaxLocImpl<float>,
minMaxLocImpl<double>
Stream& stream = Stream::Null();
HostMem minMaxVals, locVals;
findMinMaxLoc(_src, minMaxVals, locVals, _mask, stream);
stream.waitForCompletion();
double vals[2];
minMaxVals.createMatHeader().convertTo(Mat(minMaxVals.size(), CV_64FC1, &vals[0]), CV_64F);
int locs[2];
locVals.createMatHeader().copyTo(Mat(locVals.size(), CV_32SC1, &locs[0]));
Size size = _src.size();
cv::Point locs2D[] = {
cv::Point(locs[0] % size.width, locs[0] / size.width),
cv::Point(locs[1] % size.width, locs[1] / size.width),
};
GpuMat src = _src.getGpuMat();
GpuMat mask = _mask.getGpuMat()
;
if (minVal)
*minVal = vals[0]
;
CV_Assert( src.channels() == 1 );
CV_DbgAssert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) )
;
if (maxVal)
*maxVal = vals[1]
;
const func_t func = funcs[src.depth()];
if (minLoc)
*minLoc = locs2D[0];
func(src, mask, valBuf, locBuf, minVal, maxVal, minLoc, maxLoc);
if (maxLoc)
*maxLoc = locs2D[1];
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/mul_spectrums.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
//////////////////////////////////////////////////////////////////////////////
...
...
@@ -120,33 +123,33 @@ void cv::cuda::mulSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst
{
(void) flags;
GpuMat src1 =
_src1.getGpuMat(
);
GpuMat src2 =
_src2.getGpuMat(
);
GpuMat src1 =
getInputMat(_src1, stream
);
GpuMat src2 =
getInputMat(_src2, stream
);
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2 );
CV_Assert( src1.size() == src2.size() );
_dst.create(src1.size(), CV_32FC2);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream);
if (conjB)
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul_conj(), stream);
else
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), comlex_mul(), stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputArray _dst, int flags, float scale, bool conjB, Stream& stream)
{
(void) flags;
GpuMat src1 =
_src1.getGpuMat(
);
GpuMat src2 =
_src2.getGpuMat(
);
GpuMat src1 =
getInputMat(_src1, stream
);
GpuMat src2 =
getInputMat(_src2, stream
);
CV_Assert( src1.type() == src2.type() && src1.type() == CV_32FC2);
CV_Assert( src1.size() == src2.size() );
_dst.create(src1.size(), CV_32FC2);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src1.size(), CV_32FC2, stream);
if (conjB)
{
...
...
@@ -160,6 +163,8 @@ void cv::cuda::mulAndScaleSpectrums(InputArray _src1, InputArray _src2, OutputAr
op.scale = scale;
gridTransformBinary(globPtr<float2>(src1), globPtr<float2>(src2), globPtr<float2>(dst), op, stream);
}
syncOutput(dst, _dst, stream);
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/norm.cu
View file @
16b56e71
...
...
@@ -50,70 +50,140 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
{
double normDiffInf(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf
)
void normDiffInf(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream
)
{
const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
GpuMat_<int>&
buf = (GpuMat_<int>&) _buf
;
GpuMat_<int>&
dst = (GpuMat_<int>&) _dst
;
gridFindMinMaxVal(abs_(cvt_<int>(src1) - cvt_<int>(src2)), buf);
int data[2];
buf.download(cv::Mat(1, 2, buf.type(), data));
return data[1];
gridFindMaxVal(abs_(cvt_<int>(src1) - cvt_<int>(src2)), dst, stream);
}
double normDiffL1(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf
)
void normDiffL1(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream
)
{
const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
GpuMat_<int>& buf = (GpuMat_<int>&) _buf;
gridCalcSum(abs_(cvt_<int>(src1) - cvt_<int>(src2)), buf);
GpuMat_<int>& dst = (GpuMat_<int>&) _dst;
int data;
buf.download(cv::Mat(1, 1, buf.type(), &data));
return data;
gridCalcSum(abs_(cvt_<int>(src1) - cvt_<int>(src2)), dst, stream);
}
double normDiffL2(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf
)
void normDiffL2(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream
)
{
const GpuMat_<uchar>& src1 = (const GpuMat_<uchar>&) _src1;
const GpuMat_<uchar>& src2 = (const GpuMat_<uchar>&) _src2;
GpuMat_<double>& buf = (GpuMat_<double>&) _buf;
gridCalcSum(sqr_(cvt_<double>(src1) - cvt_<double>(src2)), buf);
GpuMat_<double>& dst = (GpuMat_<double>&) _dst;
double data
;
buf.download(cv::Mat(1, 1, buf.type(), &data
));
BufferPool pool(stream)
;
GpuMat_<double> buf(1, 1, pool.getAllocator(
));
return std::sqrt(data);
gridCalcSum(sqr_(cvt_<double>(src1) - cvt_<double>(src2)), buf, stream);
gridTransformUnary(buf, dst, sqrt_func<double>(), stream);
}
}
double cv::cuda::norm(InputArray _src1, InputArray _src2, GpuMat& buf, int normType
)
void cv::cuda::calcNormDiff(InputArray _src1, InputArray _src2, OutputArray _dst, int normType, Stream& stream
)
{
typedef
double (*func_t)(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _buf
);
typedef
void (*func_t)(const GpuMat& _src1, const GpuMat& _src2, GpuMat& _dst, Stream& stream
);
static const func_t funcs[] =
{
0, normDiffInf, normDiffL1, 0, normDiffL2
};
GpuMat src1 =
_src1.getGpuMat(
);
GpuMat src2 =
_src2.getGpuMat(
);
GpuMat src1 =
getInputMat(_src1, stream
);
GpuMat src2 =
getInputMat(_src2, stream
);
CV_Assert( src1.type() == CV_8UC1 );
CV_Assert( src1.size() == src2.size() && src1.type() == src2.type() );
CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 );
return funcs[normType](src1, src2, buf);
GpuMat dst = getOutputMat(_dst, 1, 1, normType == NORM_L2 ? CV_64FC1 : CV_32SC1, stream);
const func_t func = funcs[normType];
func(src1, src2, dst, stream);
syncOutput(dst, _dst, stream);
}
double cv::cuda::norm(InputArray _src1, InputArray _src2, int normType)
{
Stream& stream = Stream::Null();
HostMem dst;
calcNormDiff(_src1, _src2, dst, normType, stream);
stream.waitForCompletion();
double val;
dst.createMatHeader().convertTo(Mat(1, 1, CV_64FC1, &val), CV_64F);
return val;
}
namespace cv { namespace cuda { namespace internal {
void normL2(cv::InputArray _src, cv::OutputArray _dst, cv::InputArray _mask, Stream& stream);
}}}
namespace
{
template <typename T, typename R>
void normL2Impl(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream)
{
const GpuMat_<T>& src = (const GpuMat_<T>&) _src;
GpuMat_<R>& dst = (GpuMat_<R>&) _dst;
BufferPool pool(stream);
GpuMat_<double> buf(1, 1, pool.getAllocator());
if (mask.empty())
{
gridCalcSum(sqr_(cvt_<double>(src)), buf, stream);
}
else
{
gridCalcSum(sqr_(cvt_<double>(src)), buf, globPtr<uchar>(mask), stream);
}
gridTransformUnary(buf, dst, sqrt_func<double>(), stream);
}
}
void cv::cuda::internal::normL2(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
{
typedef void (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _dst, Stream& stream);
static const func_t funcs[] =
{
normL2Impl<uchar, double>,
normL2Impl<schar, double>,
normL2Impl<ushort, double>,
normL2Impl<short, double>,
normL2Impl<int, double>,
normL2Impl<float, double>,
normL2Impl<double, double>
};
const GpuMat src = getInputMat(_src, stream);
const GpuMat mask = getInputMat(_mask, stream);
CV_Assert( src.channels() == 1 );
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
GpuMat dst = getOutputMat(_dst, 1, 1, CV_64FC1, stream);
const func_t func = funcs[src.depth()];
func(src, mask, dst, stream);
syncOutput(dst, _dst, stream);
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/normalize.cu
0 → 100644
View file @
16b56e71
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/opencv_modules.hpp"
#ifndef HAVE_OPENCV_CUDEV
#error "opencv_cudev is required"
#else
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace {
template <typename T, typename R, typename I>
struct ConvertorMinMax : unary_function<T, R>
{
typedef typename LargerType<T, R>::type larger_type1;
typedef typename LargerType<larger_type1, I>::type larger_type2;
typedef typename LargerType<larger_type2, float>::type scalar_type;
scalar_type dmin, dmax;
const I* minMaxVals;
__device__ R operator ()(typename TypeTraits<T>::parameter_type src) const
{
const scalar_type smin = minMaxVals[0];
const scalar_type smax = minMaxVals[1];
const scalar_type scale = (dmax - dmin) * (smax - smin > numeric_limits<scalar_type>::epsilon() ? 1.0 / (smax - smin) : 0.0);
const scalar_type shift = dmin - smin * scale;
return cudev::saturate_cast<R>(scale * src + shift);
}
};
template <typename T, typename R, typename I>
void normalizeMinMax(const GpuMat& _src, GpuMat& _dst, double a, double b, const GpuMat& mask, Stream& stream)
{
const GpuMat_<T>& src = (const GpuMat_<T>&)_src;
GpuMat_<R>& dst = (GpuMat_<R>&)_dst;
BufferPool pool(stream);
GpuMat_<I> minMaxVals(1, 2, pool.getAllocator());
if (mask.empty())
{
gridFindMinMaxVal(src, minMaxVals, stream);
}
else
{
gridFindMinMaxVal(src, minMaxVals, globPtr<uchar>(mask), stream);
}
ConvertorMinMax<T, R, I> cvt;
cvt.dmin = std::min(a, b);
cvt.dmax = std::max(a, b);
cvt.minMaxVals = minMaxVals[0];
if (mask.empty())
{
gridTransformUnary(src, dst, cvt, stream);
}
else
{
dst.setTo(Scalar::all(0), stream);
gridTransformUnary(src, dst, cvt, globPtr<uchar>(mask), stream);
}
}
template <typename T, typename R, typename I, bool normL2>
struct ConvertorNorm : unary_function<T, R>
{
typedef typename LargerType<T, R>::type larger_type1;
typedef typename LargerType<larger_type1, I>::type larger_type2;
typedef typename LargerType<larger_type2, float>::type scalar_type;
scalar_type a;
const I* normVal;
__device__ R operator ()(typename TypeTraits<T>::parameter_type src) const
{
sqrt_func<scalar_type> sqrt;
scalar_type scale = normL2 ? sqrt(*normVal) : *normVal;
scale = scale > numeric_limits<scalar_type>::epsilon() ? a / scale : 0.0;
return cudev::saturate_cast<R>(scale * src);
}
};
template <typename T, typename R, typename I>
void normalizeNorm(const GpuMat& _src, GpuMat& _dst, double a, int normType, const GpuMat& mask, Stream& stream)
{
const GpuMat_<T>& src = (const GpuMat_<T>&)_src;
GpuMat_<R>& dst = (GpuMat_<R>&)_dst;
BufferPool pool(stream);
GpuMat_<I> normVal(1, 1, pool.getAllocator());
if (normType == NORM_L1)
{
if (mask.empty())
{
gridCalcSum(abs_(cvt_<I>(src)), normVal, stream);
}
else
{
gridCalcSum(abs_(cvt_<I>(src)), normVal, globPtr<uchar>(mask), stream);
}
}
else if (normType == NORM_L2)
{
if (mask.empty())
{
gridCalcSum(sqr_(cvt_<I>(src)), normVal, stream);
}
else
{
gridCalcSum(sqr_(cvt_<I>(src)), normVal, globPtr<uchar>(mask), stream);
}
}
else // NORM_INF
{
if (mask.empty())
{
gridFindMaxVal(abs_(cvt_<I>(src)), normVal, stream);
}
else
{
gridFindMaxVal(abs_(cvt_<I>(src)), normVal, globPtr<uchar>(mask), stream);
}
}
if (normType == NORM_L2)
{
ConvertorNorm<T, R, I, true> cvt;
cvt.a = a;
cvt.normVal = normVal[0];
if (mask.empty())
{
gridTransformUnary(src, dst, cvt, stream);
}
else
{
dst.setTo(Scalar::all(0), stream);
gridTransformUnary(src, dst, cvt, globPtr<uchar>(mask), stream);
}
}
else
{
ConvertorNorm<T, R, I, false> cvt;
cvt.a = a;
cvt.normVal = normVal[0];
if (mask.empty())
{
gridTransformUnary(src, dst, cvt, stream);
}
else
{
dst.setTo(Scalar::all(0), stream);
gridTransformUnary(src, dst, cvt, globPtr<uchar>(mask), stream);
}
}
}
} // namespace
void cv::cuda::normalize(InputArray _src, OutputArray _dst, double a, double b, int normType, int dtype, InputArray _mask, Stream& stream)
{
typedef void (*func_minmax_t)(const GpuMat& _src, GpuMat& _dst, double a, double b, const GpuMat& mask, Stream& stream);
typedef void (*func_norm_t)(const GpuMat& _src, GpuMat& _dst, double a, int normType, const GpuMat& mask, Stream& stream);
static const func_minmax_t funcs_minmax[] =
{
normalizeMinMax<uchar, float, float>,
normalizeMinMax<schar, float, float>,
normalizeMinMax<ushort, float, float>,
normalizeMinMax<short, float, float>,
normalizeMinMax<int, float, float>,
normalizeMinMax<float, float, float>,
normalizeMinMax<double, double, double>
};
static const func_norm_t funcs_norm[] =
{
normalizeNorm<uchar, float, float>,
normalizeNorm<schar, float, float>,
normalizeNorm<ushort, float, float>,
normalizeNorm<short, float, float>,
normalizeNorm<int, float, float>,
normalizeNorm<float, float, float>,
normalizeNorm<double, double, double>
};
CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_MINMAX );
const GpuMat src = getInputMat(_src, stream);
const GpuMat mask = getInputMat(_mask, stream);
CV_Assert( src.channels() == 1 );
CV_Assert( mask.empty() || (mask.size() == src.size() && mask.type() == CV_8U) );
dtype = CV_MAT_DEPTH(dtype);
const int src_depth = src.depth();
const int tmp_depth = src_depth <= CV_32F ? CV_32F : src_depth;
GpuMat dst;
if (dtype == tmp_depth)
{
_dst.create(src.size(), tmp_depth);
dst = getOutputMat(_dst, src.size(), tmp_depth, stream);
}
else
{
BufferPool pool(stream);
dst = pool.getBuffer(src.size(), tmp_depth);
}
if (normType == NORM_MINMAX)
{
const func_minmax_t func = funcs_minmax[src_depth];
func(src, dst, a, b, mask, stream);
}
else
{
const func_norm_t func = funcs_norm[src_depth];
func(src, dst, a, normType, mask, stream);
}
if (dtype == tmp_depth)
{
syncOutput(dst, _dst, stream);
}
else
{
dst.convertTo(_dst, dtype, stream);
}
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/polar_cart.cu
View file @
16b56e71
...
...
@@ -50,55 +50,59 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
void cv::cuda::magnitude(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
{
GpuMat x =
_x.getGpuMat(
);
GpuMat y =
_y.getGpuMat(
);
GpuMat x =
getInputMat(_x, stream
);
GpuMat y =
getInputMat(_y, stream
);
CV_
Dbg
Assert( x.depth() == CV_32F );
CV_
Dbg
Assert( y.type() == x.type() && y.size() == x.size() );
CV_Assert( x.depth() == CV_32F );
CV_Assert( y.type() == x.type() && y.size() == x.size() );
_dst.create(x.size(), CV_32FC1);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
GpuMat_<float> magc(dst.reshape(1));
gridTransformBinary(xc, yc, magc, magnitude_func<float>(), stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::magnitudeSqr(InputArray _x, InputArray _y, OutputArray _dst, Stream& stream)
{
GpuMat x =
_x.getGpuMat(
);
GpuMat y =
_y.getGpuMat(
);
GpuMat x =
getInputMat(_x, stream
);
GpuMat y =
getInputMat(_y, stream
);
CV_
Dbg
Assert( x.depth() == CV_32F );
CV_
Dbg
Assert( y.type() == x.type() && y.size() == x.size() );
CV_Assert( x.depth() == CV_32F );
CV_Assert( y.type() == x.type() && y.size() == x.size() );
_dst.create(x.size(), CV_32FC1);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
GpuMat_<float> magc(dst.reshape(1));
gridTransformBinary(xc, yc, magc, magnitude_sqr_func<float>(), stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleInDegrees, Stream& stream)
{
GpuMat x =
_x.getGpuMat(
);
GpuMat y =
_y.getGpuMat(
);
GpuMat x =
getInputMat(_x, stream
);
GpuMat y =
getInputMat(_y, stream
);
CV_
Dbg
Assert( x.depth() == CV_32F );
CV_
Dbg
Assert( y.type() == x.type() && y.size() == x.size() );
CV_Assert( x.depth() == CV_32F );
CV_Assert( y.type() == x.type() && y.size() == x.size() );
_dst.create(x.size(), CV_32FC1);
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, x.size(), CV_32FC1, stream);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
...
...
@@ -108,21 +112,20 @@ void cv::cuda::phase(InputArray _x, InputArray _y, OutputArray _dst, bool angleI
gridTransformBinary(xc, yc, anglec, direction_func<float, true>(), stream);
else
gridTransformBinary(xc, yc, anglec, direction_func<float, false>(), stream);
syncOutput(dst, _dst, stream);
}
void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, OutputArray _angle, bool angleInDegrees, Stream& stream)
{
GpuMat x = _x.getGpuMat();
GpuMat y = _y.getGpuMat();
CV_DbgAssert( x.depth() == CV_32F );
CV_DbgAssert( y.type() == x.type() && y.size() == x.size() );
GpuMat x = getInputMat(_x, stream);
GpuMat y = getInputMat(_y, stream);
_mag.create(x.size(), CV_32FC1
);
GpuMat mag = _mag.getGpuMat(
);
CV_Assert( x.depth() == CV_32F
);
CV_Assert( y.type() == x.type() && y.size() == x.size()
);
_angle.create(x.size(), CV_32FC1
);
GpuMat angle =
_angle.getGpuMat(
);
GpuMat mag = getOutputMat(_mag, x.size(), CV_32FC1, stream
);
GpuMat angle =
getOutputMat(_angle, x.size(), CV_32FC1, stream
);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
...
...
@@ -147,6 +150,9 @@ void cv::cuda::cartToPolar(InputArray _x, InputArray _y, OutputArray _mag, Outpu
binaryTupleAdapter<0, 1>(direction_func<float, false>())),
stream);
}
syncOutput(mag, _mag, stream);
syncOutput(angle, _angle, stream);
}
namespace
...
...
@@ -173,17 +179,14 @@ namespace
void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, OutputArray _y, bool angleInDegrees, Stream& _stream)
{
GpuMat mag = _mag.getGpuMat();
GpuMat angle = _angle.getGpuMat();
CV_DbgAssert( angle.depth() == CV_32F );
CV_DbgAssert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size()) );
GpuMat mag = getInputMat(_mag, _stream);
GpuMat angle = getInputMat(_angle, _stream);
_x.create(angle.size(), CV_32FC1
);
GpuMat x = _x.getGpuMat(
);
CV_Assert( angle.depth() == CV_32F
);
CV_Assert( mag.empty() || (mag.type() == angle.type() && mag.size() == angle.size())
);
_y.create(angle.size(), CV_32FC1
);
GpuMat y =
_y.getGpuMat(
);
GpuMat x = getOutputMat(_x, angle.size(), CV_32FC1, _stream
);
GpuMat y =
getOutputMat(_y, angle.size(), CV_32FC1, _stream
);
GpuMat_<float> xc(x.reshape(1));
GpuMat_<float> yc(y.reshape(1));
...
...
@@ -204,6 +207,9 @@ void cv::cuda::polarToCart(InputArray _mag, InputArray _angle, OutputArray _x, O
CV_CUDEV_SAFE_CALL( cudaGetLastError() );
syncOutput(x, _x, _stream);
syncOutput(y, _y, _stream);
if (stream == 0)
CV_CUDEV_SAFE_CALL( cudaDeviceSynchronize() );
}
...
...
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/reduce.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -125,7 +128,7 @@ namespace
void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp, int dtype, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
CV_Assert( src.channels() <= 4 );
CV_Assert( dim == 0 || dim == 1 );
...
...
@@ -134,8 +137,7 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp,
if (dtype < 0)
dtype = src.depth();
_dst.create(1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()));
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, 1, dim == 0 ? src.cols : src.rows, CV_MAKE_TYPE(CV_MAT_DEPTH(dtype), src.channels()), stream);
if (dim == 0)
{
...
...
@@ -292,6 +294,8 @@ void cv::cuda::reduce(InputArray _src, OutputArray _dst, int dim, int reduceOp,
func(src, dst, reduceOp, stream);
}
syncOutput(dst, _dst, stream);
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/split_merge.cu
View file @
16b56e71
...
...
@@ -50,7 +50,10 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
////////////////////////////////////////////////////////////////////////
...
...
@@ -92,20 +95,18 @@ namespace
void mergeImpl(const GpuMat* src, size_t n, cv::OutputArray _dst, Stream& stream)
{
CV_
Dbg
Assert( src != 0 );
CV_
Dbg
Assert( n > 0 && n <= 4 );
CV_Assert( src != 0 );
CV_Assert( n > 0 && n <= 4 );
const int depth = src[0].depth();
const cv::Size size = src[0].size();
#ifdef _DEBUG
for (size_t i = 0; i < n; ++i)
{
CV_Assert( src[i].size() == size );
CV_Assert( src[i].depth() == depth );
CV_Assert( src[i].channels() == 1 );
}
#endif
if (n == 1)
{
...
...
@@ -123,8 +124,7 @@ namespace
const int channels = static_cast<int>(n);
_dst.create(size, CV_MAKE_TYPE(depth, channels));
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, size, CV_MAKE_TYPE(depth, channels), stream);
const func_t func = funcs[channels - 2][CV_ELEM_SIZE(depth) / 2];
...
...
@@ -132,6 +132,8 @@ namespace
CV_Error(cv::Error::StsUnsupportedFormat, "Unsupported channel count or data type");
func(src, dst, stream);
syncOutput(dst, _dst, stream);
}
}
}
...
...
@@ -203,12 +205,12 @@ namespace
{SplitFunc<4, uchar>::call, SplitFunc<4, ushort>::call, SplitFunc<4, int>::call, 0, SplitFunc<4, double>::call}
};
CV_
Dbg
Assert( dst != 0 );
CV_Assert( dst != 0 );
const int depth = src.depth();
const int channels = src.channels();
CV_
Dbg
Assert( channels <= 4 );
CV_Assert( channels <= 4 );
if (channels == 0)
return;
...
...
@@ -233,13 +235,13 @@ namespace
void cv::cuda::split(InputArray _src, GpuMat* dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
splitImpl(src, dst, stream);
}
void cv::cuda::split(InputArray _src, std::vector<GpuMat>& dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
dst.resize(src.channels());
if (src.channels() > 0)
splitImpl(src, &dst[0], stream);
...
...
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/sum.cu
View file @
16b56e71
...
...
@@ -50,126 +50,153 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
{
template <typename T, typename R, int cn>
cv::Scalar sumImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf
)
void sumImpl(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream
)
{
typedef typename MakeVec<T, cn>::type src_type;
typedef typename MakeVec<R, cn>::type res_type;
const GpuMat_<src_type>& src = (const GpuMat_<src_type>&) _src;
GpuMat_<res_type>&
buf = (GpuMat_<res_type>&) _buf
;
GpuMat_<res_type>&
dst = (GpuMat_<res_type>&) _dst
;
if (mask.empty())
gridCalcSum(src,
buf
);
gridCalcSum(src,
dst, stream
);
else
gridCalcSum(src, buf, globPtr<uchar>(mask));
cv::Scalar_<R> res;
cv::Mat res_mat(buf.size(), buf.type(), res.val);
buf.download(res_mat);
return res;
gridCalcSum(src, dst, globPtr<uchar>(mask), stream);
}
template <typename T, typename R, int cn>
cv::Scalar sumAbsImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf
)
void sumAbsImpl(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream
)
{
typedef typename MakeVec<T, cn>::type src_type;
typedef typename MakeVec<R, cn>::type res_type;
const GpuMat_<src_type>& src = (const GpuMat_<src_type>&) _src;
GpuMat_<res_type>&
buf = (GpuMat_<res_type>&) _buf
;
GpuMat_<res_type>&
dst = (GpuMat_<res_type>&) _dst
;
if (mask.empty())
gridCalcSum(abs_(cvt_<res_type>(src)),
buf
);
gridCalcSum(abs_(cvt_<res_type>(src)),
dst, stream
);
else
gridCalcSum(abs_(cvt_<res_type>(src)), buf, globPtr<uchar>(mask));
cv::Scalar_<R> res;
cv::Mat res_mat(buf.size(), buf.type(), res.val);
buf.download(res_mat);
return res;
gridCalcSum(abs_(cvt_<res_type>(src)), dst, globPtr<uchar>(mask), stream);
}
template <typename T, typename R, int cn>
cv::Scalar sumSqrImpl(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf
)
void sumSqrImpl(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream
)
{
typedef typename MakeVec<T, cn>::type src_type;
typedef typename MakeVec<R, cn>::type res_type;
const GpuMat_<src_type>& src = (const GpuMat_<src_type>&) _src;
GpuMat_<res_type>&
buf = (GpuMat_<res_type>&) _buf
;
GpuMat_<res_type>&
dst = (GpuMat_<res_type>&) _dst
;
if (mask.empty())
gridCalcSum(sqr_(cvt_<res_type>(src)),
buf
);
gridCalcSum(sqr_(cvt_<res_type>(src)),
dst, stream
);
else
gridCalcSum(sqr_(cvt_<res_type>(src)), buf, globPtr<uchar>(mask));
cv::Scalar_<R> res;
cv::Mat res_mat(buf.size(), buf.type(), res.val);
buf.download(res_mat);
return res;
gridCalcSum(sqr_(cvt_<res_type>(src)), dst, globPtr<uchar>(mask), stream);
}
}
cv::Scalar cv::cuda::sum(InputArray _src, InputArray _mask, GpuMat& buf
)
void cv::cuda::calcSum(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream
)
{
typedef
cv::Scalar (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf
);
typedef
void (*func_t)(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream
);
static const func_t funcs[7][4] =
{
{sumImpl<uchar ,
uint , 1>, sumImpl<uchar , uint , 2>, sumImpl<uchar , uint , 3>, sumImpl<uchar , uint
, 4>},
{sumImpl<schar ,
int , 1>, sumImpl<schar , int , 2>, sumImpl<schar , int , 3>, sumImpl<schar , int
, 4>},
{sumImpl<ushort,
uint , 1>, sumImpl<ushort, uint , 2>, sumImpl<ushort, uint , 3>, sumImpl<ushort, uint
, 4>},
{sumImpl<short ,
int , 1>, sumImpl<short , int , 2>, sumImpl<short , int , 3>, sumImpl<short , int
, 4>},
{sumImpl<int ,
int , 1>, sumImpl<int , int , 2>, sumImpl<int , int , 3>, sumImpl<int , int
, 4>},
{sumImpl<float ,
float , 1>, sumImpl<float , float , 2>, sumImpl<float , float , 3>, sumImpl<float , float
, 4>},
{sumImpl<uchar ,
double, 1>, sumImpl<uchar , double, 2>, sumImpl<uchar , double, 3>, sumImpl<uchar , double
, 4>},
{sumImpl<schar ,
double, 1>, sumImpl<schar , double, 2>, sumImpl<schar , double, 3>, sumImpl<schar , double
, 4>},
{sumImpl<ushort,
double, 1>, sumImpl<ushort, double, 2>, sumImpl<ushort, double, 3>, sumImpl<ushort, double
, 4>},
{sumImpl<short ,
double, 1>, sumImpl<short , double, 2>, sumImpl<short , double, 3>, sumImpl<short , double
, 4>},
{sumImpl<int ,
double, 1>, sumImpl<int , double, 2>, sumImpl<int , double, 3>, sumImpl<int , double
, 4>},
{sumImpl<float ,
double, 1>, sumImpl<float , double, 2>, sumImpl<float , double, 3>, sumImpl<float , double
, 4>},
{sumImpl<double, double, 1>, sumImpl<double, double, 2>, sumImpl<double, double, 3>, sumImpl<double, double, 4>}
};
GpuMat src = _src.getGpuMat();
GpuMat mask = _mask.getGpuMat();
const GpuMat src = getInputMat(_src, stream);
const GpuMat mask = getInputMat(_mask, stream);
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
const int src_depth = src.depth();
const int channels = src.channels();
const func_t func = funcs[src.depth()][src.channels() - 1]
;
GpuMat dst = getOutputMat(_dst, 1, 1, CV_64FC(channels), stream)
;
return func(src, mask, buf);
const func_t func = funcs[src_depth][channels - 1];
func(src, dst, mask, stream);
syncOutput(dst, _dst, stream);
}
cv::Scalar cv::cuda::
absSum(InputArray _src, InputArray _mask, GpuMat& buf
)
cv::Scalar cv::cuda::
sum(InputArray _src, InputArray _mask
)
{
typedef cv::Scalar (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf);
Stream& stream = Stream::Null();
HostMem dst;
calcSum(_src, dst, _mask, stream);
stream.waitForCompletion();
cv::Scalar val;
dst.createMatHeader().convertTo(cv::Mat(dst.size(), CV_64FC(dst.channels()), val.val), CV_64F);
return val;
}
void cv::cuda::calcAbsSum(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
{
typedef void (*func_t)(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream);
static const func_t funcs[7][4] =
{
{sumAbsImpl<uchar ,
uint , 1>, sumAbsImpl<uchar , uint , 2>, sumAbsImpl<uchar , uint , 3>, sumAbsImpl<uchar , uint
, 4>},
{sumAbsImpl<schar ,
int , 1>, sumAbsImpl<schar , int , 2>, sumAbsImpl<schar , int , 3>, sumAbsImpl<schar , int
, 4>},
{sumAbsImpl<ushort,
uint , 1>, sumAbsImpl<ushort, uint , 2>, sumAbsImpl<ushort, uint , 3>, sumAbsImpl<ushort, uint
, 4>},
{sumAbsImpl<short ,
int , 1>, sumAbsImpl<short , int , 2>, sumAbsImpl<short , int , 3>, sumAbsImpl<short , int
, 4>},
{sumAbsImpl<int ,
int , 1>, sumAbsImpl<int , int , 2>, sumAbsImpl<int , int , 3>, sumAbsImpl<int , int
, 4>},
{sumAbsImpl<float ,
float , 1>, sumAbsImpl<float , float , 2>, sumAbsImpl<float , float , 3>, sumAbsImpl<float , float
, 4>},
{sumAbsImpl<uchar ,
double, 1>, sumAbsImpl<uchar , double, 2>, sumAbsImpl<uchar , double, 3>, sumAbsImpl<uchar , double
, 4>},
{sumAbsImpl<schar ,
double, 1>, sumAbsImpl<schar , double, 2>, sumAbsImpl<schar , double, 3>, sumAbsImpl<schar , double
, 4>},
{sumAbsImpl<ushort,
double, 1>, sumAbsImpl<ushort, double, 2>, sumAbsImpl<ushort, double, 3>, sumAbsImpl<ushort, double
, 4>},
{sumAbsImpl<short ,
double, 1>, sumAbsImpl<short , double, 2>, sumAbsImpl<short , double, 3>, sumAbsImpl<short , double
, 4>},
{sumAbsImpl<int ,
double, 1>, sumAbsImpl<int , double, 2>, sumAbsImpl<int , double, 3>, sumAbsImpl<int , double
, 4>},
{sumAbsImpl<float ,
double, 1>, sumAbsImpl<float , double, 2>, sumAbsImpl<float , double, 3>, sumAbsImpl<float , double
, 4>},
{sumAbsImpl<double, double, 1>, sumAbsImpl<double, double, 2>, sumAbsImpl<double, double, 3>, sumAbsImpl<double, double, 4>}
};
GpuMat src = _src.getGpuMat();
GpuMat mask = _mask.getGpuMat();
const GpuMat src = getInputMat(_src, stream);
const GpuMat mask = getInputMat(_mask, stream);
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
const int src_depth = src.depth();
const int channels = src.channels();
CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size())
);
GpuMat dst = getOutputMat(_dst, 1, 1, CV_64FC(channels), stream
);
const func_t func = funcs[src.depth()][src.channels() - 1];
const func_t func = funcs[src_depth][channels - 1];
func(src, dst, mask, stream);
return func(src, mask, buf
);
syncOutput(dst, _dst, stream
);
}
cv::Scalar cv::cuda::
sqrSum(InputArray _src, InputArray _mask, GpuMat& buf
)
cv::Scalar cv::cuda::
absSum(InputArray _src, InputArray _mask
)
{
typedef cv::Scalar (*func_t)(const GpuMat& _src, const GpuMat& mask, GpuMat& _buf);
Stream& stream = Stream::Null();
HostMem dst;
calcAbsSum(_src, dst, _mask, stream);
stream.waitForCompletion();
cv::Scalar val;
dst.createMatHeader().convertTo(cv::Mat(dst.size(), CV_64FC(dst.channels()), val.val), CV_64F);
return val;
}
void cv::cuda::calcSqrSum(InputArray _src, OutputArray _dst, InputArray _mask, Stream& stream)
{
typedef void (*func_t)(const GpuMat& _src, GpuMat& _dst, const GpuMat& mask, Stream& stream);
static const func_t funcs[7][4] =
{
{sumSqrImpl<uchar , double, 1>, sumSqrImpl<uchar , double, 2>, sumSqrImpl<uchar , double, 3>, sumSqrImpl<uchar , double, 4>},
...
...
@@ -181,14 +208,35 @@ cv::Scalar cv::cuda::sqrSum(InputArray _src, InputArray _mask, GpuMat& buf)
{sumSqrImpl<double, double, 1>, sumSqrImpl<double, double, 2>, sumSqrImpl<double, double, 3>, sumSqrImpl<double, double, 4>}
};
GpuMat src = _src.getGpuMat();
GpuMat mask = _mask.getGpuMat();
const GpuMat src = getInputMat(_src, stream);
const GpuMat mask = getInputMat(_mask, stream);
CV_Assert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size()) );
const int src_depth = src.depth();
const int channels = src.channels();
GpuMat dst = getOutputMat(_dst, 1, 1, CV_64FC(channels), stream);
const func_t func = funcs[src_depth][channels - 1];
func(src, dst, mask, stream);
syncOutput(dst, _dst, stream);
}
cv::Scalar cv::cuda::sqrSum(InputArray _src, InputArray _mask)
{
Stream& stream = Stream::Null();
HostMem dst;
calcSqrSum(_src, dst, _mask, stream);
CV_DbgAssert( mask.empty() || (mask.type() == CV_8UC1 && mask.size() == src.size())
);
stream.waitForCompletion(
);
const func_t func = funcs[src.depth()][src.channels() - 1];
cv::Scalar val;
dst.createMatHeader().convertTo(cv::Mat(dst.size(), CV_64FC(dst.channels()), val.val), CV_64F);
return
func(src, mask, buf)
;
return
val
;
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/threshold.cu
View file @
16b56e71
...
...
@@ -52,6 +52,8 @@
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
namespace
...
...
@@ -95,15 +97,14 @@ namespace
double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, double maxVal, int type, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const int depth = src.depth();
CV_
Dbg
Assert( src.channels() == 1 && depth <= CV_64F );
CV_
Dbg
Assert( type <= 4 /*THRESH_TOZERO_INV*/ );
CV_Assert( src.channels() == 1 && depth <= CV_64F );
CV_Assert( type <= 4 /*THRESH_TOZERO_INV*/ );
_dst.create(src.size(), src.type());
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.size(), src.type(), stream);
if (depth == CV_32F && type == 2 /*THRESH_TRUNC*/)
{
...
...
@@ -142,6 +143,8 @@ double cv::cuda::threshold(InputArray _src, OutputArray _dst, double thresh, dou
funcs[depth](src, dst, thresh, maxVal, type, stream);
}
syncOutput(dst, _dst, stream);
return thresh;
}
...
...
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/cuda/transpose.cu
View file @
16b56e71
...
...
@@ -52,18 +52,19 @@
#include "opencv2/cudev.hpp"
#include "opencv2/core/private.cuda.hpp"
using namespace cv;
using namespace cv::cuda;
using namespace cv::cudev;
void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream)
{
GpuMat src =
_src.getGpuMat(
);
GpuMat src =
getInputMat(_src, stream
);
const size_t elemSize = src.elemSize();
CV_Assert( elemSize == 1 || elemSize == 4 || elemSize == 8 );
_dst.create( src.cols, src.rows, src.type() );
GpuMat dst = _dst.getGpuMat();
GpuMat dst = getOutputMat(_dst, src.cols, src.rows, src.type(), stream);
if (elemSize == 1)
{
...
...
@@ -87,6 +88,8 @@ void cv::cuda::transpose(InputArray _src, OutputArray _dst, Stream& stream)
{
gridTranspose(globPtr<double>(src), globPtr<double>(dst), stream);
}
syncOutput(dst, _dst, stream);
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/element_operations.cpp
View file @
16b56e71
...
...
@@ -107,11 +107,11 @@ namespace
GpuMat
src1
;
if
(
!
isScalar1
)
src1
=
_src1
.
getGpuMat
(
);
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
;
if
(
!
isScalar2
)
src2
=
_src2
.
getGpuMat
(
);
src2
=
getInputMat
(
_src2
,
stream
);
Mat
scalar
;
if
(
isScalar1
)
...
...
@@ -126,7 +126,7 @@ namespace
scalar
.
convertTo
(
Mat_
<
double
>
(
scalar
.
rows
,
scalar
.
cols
,
&
val
[
0
]),
CV_64F
);
}
GpuMat
mask
=
_mask
.
getGpuMat
(
);
GpuMat
mask
=
getInputMat
(
_mask
,
stream
);
const
int
sdepth
=
src1
.
empty
()
?
src2
.
depth
()
:
src1
.
depth
();
const
int
cn
=
src1
.
empty
()
?
src2
.
channels
()
:
src1
.
channels
();
...
...
@@ -147,8 +147,7 @@ namespace
CV_Error
(
Error
::
StsUnsupportedFormat
,
"The device doesn't support double"
);
}
_dst
.
create
(
size
,
CV_MAKE_TYPE
(
ddepth
,
cn
));
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
size
,
CV_MAKE_TYPE
(
ddepth
,
cn
),
stream
);
if
(
isScalar1
)
mat_scalar_func
(
src2
,
val
,
true
,
dst
,
mask
,
scale
,
stream
,
op
);
...
...
@@ -156,6 +155,8 @@ namespace
mat_scalar_func
(
src1
,
val
,
false
,
dst
,
mask
,
scale
,
stream
,
op
);
else
mat_mat_func
(
src1
,
src2
,
dst
,
mask
,
scale
,
stream
,
op
);
syncOutput
(
dst
,
_dst
,
stream
);
}
}
...
...
@@ -196,27 +197,29 @@ void cv::cuda::multiply(InputArray _src1, InputArray _src2, OutputArray _dst, do
{
if
(
_src1
.
type
()
==
CV_8UC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src1
.
size
(),
src1
.
type
(),
stream
);
mulMat_8uc4_32f
(
src1
,
src2
,
dst
,
stream
);
syncOutput
(
dst
,
_dst
,
stream
);
}
else
if
(
_src1
.
type
()
==
CV_16SC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src1
.
size
(),
src1
.
type
(),
stream
);
mulMat_16sc4_32f
(
src1
,
src2
,
dst
,
stream
);
syncOutput
(
dst
,
_dst
,
stream
);
}
else
{
...
...
@@ -237,27 +240,29 @@ void cv::cuda::divide(InputArray _src1, InputArray _src2, OutputArray _dst, doub
{
if
(
_src1
.
type
()
==
CV_8UC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src1
.
size
(),
src1
.
type
(),
stream
);
divMat_8uc4_32f
(
src1
,
src2
,
dst
,
stream
);
syncOutput
(
dst
,
_dst
,
stream
);
}
else
if
(
_src1
.
type
()
==
CV_16SC4
&&
_src2
.
type
()
==
CV_32FC1
)
{
GpuMat
src1
=
_src1
.
getGpuMat
(
);
GpuMat
src2
=
_src2
.
getGpuMat
(
);
GpuMat
src1
=
getInputMat
(
_src1
,
stream
);
GpuMat
src2
=
getInputMat
(
_src2
,
stream
);
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
);
_dst
.
create
(
src1
.
size
(),
src1
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src1
.
size
(),
src1
.
type
(),
stream
);
divMat_16sc4_32f
(
src1
,
src2
,
dst
,
stream
);
syncOutput
(
dst
,
_dst
,
stream
);
}
else
{
...
...
@@ -389,15 +394,16 @@ void cv::cuda::rshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea
{
NppShift
<
CV_32S
,
1
,
nppiRShiftC_32s_C1R
>::
call
,
0
,
NppShift
<
CV_32S
,
3
,
nppiRShiftC_32s_C3R
>::
call
,
NppShift
<
CV_32S
,
4
,
nppiRShiftC_32s_C4R
>::
call
},
};
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
CV_Assert
(
src
.
depth
()
<
CV_32F
);
CV_Assert
(
src
.
channels
()
==
1
||
src
.
channels
()
==
3
||
src
.
channels
()
==
4
);
_dst
.
create
(
src
.
size
(),
src
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
src
.
type
(),
stream
);
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
val
,
dst
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
void
cv
::
cuda
::
lshift
(
InputArray
_src
,
Scalar_
<
int
>
val
,
OutputArray
_dst
,
Stream
&
stream
)
...
...
@@ -412,15 +418,16 @@ void cv::cuda::lshift(InputArray _src, Scalar_<int> val, OutputArray _dst, Strea
{
NppShift
<
CV_32S
,
1
,
nppiLShiftC_32s_C1R
>::
call
,
0
,
NppShift
<
CV_32S
,
3
,
nppiLShiftC_32s_C3R
>::
call
,
NppShift
<
CV_32S
,
4
,
nppiLShiftC_32s_C4R
>::
call
},
};
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
CV_Assert
(
src
.
depth
()
==
CV_8U
||
src
.
depth
()
==
CV_16U
||
src
.
depth
()
==
CV_32S
);
CV_Assert
(
src
.
channels
()
==
1
||
src
.
channels
()
==
3
||
src
.
channels
()
==
4
);
_dst
.
create
(
src
.
size
(),
src
.
type
());
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
src
.
type
(),
stream
);
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
](
src
,
val
,
dst
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
//////////////////////////////////////////////////////////////////////////////
...
...
@@ -475,22 +482,24 @@ namespace
void
cv
::
cuda
::
magnitude
(
InputArray
_src
,
OutputArray
_dst
,
Stream
&
stream
)
{
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
_dst
.
create
(
src
.
size
(),
CV_32FC1
);
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
CV_32FC1
,
stream
);
npp_magnitude
(
src
,
dst
,
nppiMagnitude_32fc32f_C1R
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
void
cv
::
cuda
::
magnitudeSqr
(
InputArray
_src
,
OutputArray
_dst
,
Stream
&
stream
)
{
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
_dst
.
create
(
src
.
size
(),
CV_32FC1
);
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
CV_32FC1
,
stream
);
npp_magnitude
(
src
,
dst
,
nppiMagnitudeSqr_32fc32f_C1R
,
StreamAccessor
::
getStream
(
stream
));
syncOutput
(
dst
,
_dst
,
stream
);
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/src/reductions.cpp
View file @
16b56e71
...
...
@@ -47,110 +47,106 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
double
cv
::
cuda
::
norm
(
InputArray
,
int
,
InputArray
,
GpuMat
&
)
{
throw_no_cuda
();
return
0.0
;
}
double
cv
::
cuda
::
norm
(
InputArray
,
InputArray
,
GpuMat
&
,
int
)
{
throw_no_cuda
();
return
0.0
;
}
Scalar
cv
::
cuda
::
sum
(
InputArray
,
InputArray
,
GpuMat
&
)
{
throw_no_cuda
();
return
Scalar
();
}
Scalar
cv
::
cuda
::
absSum
(
InputArray
,
InputArray
,
GpuMat
&
)
{
throw_no_cuda
();
return
Scalar
();
}
Scalar
cv
::
cuda
::
sqrSum
(
InputArray
,
InputArray
,
GpuMat
&
)
{
throw_no_cuda
();
return
Scalar
();
}
void
cv
::
cuda
::
minMax
(
InputArray
,
double
*
,
double
*
,
InputArray
,
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
minMaxLoc
(
InputArray
,
double
*
,
double
*
,
Point
*
,
Point
*
,
InputArray
,
GpuMat
&
,
GpuMat
&
)
{
throw_no_cuda
();
}
int
cv
::
cuda
::
countNonZero
(
InputArray
,
GpuMat
&
)
{
throw_no_cuda
();
return
0
;
}
double
cv
::
cuda
::
norm
(
InputArray
,
int
,
InputArray
)
{
throw_no_cuda
();
return
0.0
;
}
void
cv
::
cuda
::
calcNorm
(
InputArray
,
OutputArray
,
int
,
InputArray
,
Stream
&
)
{
throw_no_cuda
();
}
double
cv
::
cuda
::
norm
(
InputArray
,
InputArray
,
int
)
{
throw_no_cuda
();
return
0.0
;
}
void
cv
::
cuda
::
calcNormDiff
(
InputArray
,
InputArray
,
OutputArray
,
int
,
Stream
&
)
{
throw_no_cuda
();
}
Scalar
cv
::
cuda
::
sum
(
InputArray
,
InputArray
)
{
throw_no_cuda
();
return
Scalar
();
}
void
cv
::
cuda
::
calcSum
(
InputArray
,
OutputArray
,
InputArray
,
Stream
&
)
{
throw_no_cuda
();
}
Scalar
cv
::
cuda
::
absSum
(
InputArray
,
InputArray
)
{
throw_no_cuda
();
return
Scalar
();
}
void
cv
::
cuda
::
calcAbsSum
(
InputArray
,
OutputArray
,
InputArray
,
Stream
&
)
{
throw_no_cuda
();
}
Scalar
cv
::
cuda
::
sqrSum
(
InputArray
,
InputArray
)
{
throw_no_cuda
();
return
Scalar
();
}
void
cv
::
cuda
::
calcSqrSum
(
InputArray
,
OutputArray
,
InputArray
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
minMax
(
InputArray
,
double
*
,
double
*
,
InputArray
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
findMinMax
(
InputArray
,
OutputArray
,
InputArray
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
minMaxLoc
(
InputArray
,
double
*
,
double
*
,
Point
*
,
Point
*
,
InputArray
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
findMinMaxLoc
(
InputArray
,
OutputArray
,
OutputArray
,
InputArray
,
Stream
&
)
{
throw_no_cuda
();
}
int
cv
::
cuda
::
countNonZero
(
InputArray
)
{
throw_no_cuda
();
return
0
;
}
void
cv
::
cuda
::
countNonZero
(
InputArray
,
OutputArray
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
reduce
(
InputArray
,
OutputArray
,
int
,
int
,
int
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
meanStdDev
(
InputArray
,
Scalar
&
,
Scalar
&
,
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
meanStdDev
(
InputArray
,
Scalar
&
,
Scalar
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
meanStdDev
(
InputArray
,
OutputArray
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
rectStdDev
(
InputArray
,
InputArray
,
OutputArray
,
Rect
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
normalize
(
InputArray
,
OutputArray
,
double
,
double
,
int
,
int
,
InputArray
,
GpuMat
&
,
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
normalize
(
InputArray
,
OutputArray
,
double
,
double
,
int
,
int
,
InputArray
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
integral
(
InputArray
,
OutputArray
,
GpuMat
&
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
sqrIntegral
(
InputArray
,
OutputArray
,
GpuMat
&
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
integral
(
InputArray
,
OutputArray
,
Stream
&
)
{
throw_no_cuda
();
}
void
cv
::
cuda
::
sqrIntegral
(
InputArray
,
OutputArray
,
Stream
&
)
{
throw_no_cuda
();
}
#else
namespace
{
class
DeviceBuffer
{
public
:
explicit
DeviceBuffer
(
int
count_
=
1
)
:
count
(
count_
)
{
cudaSafeCall
(
cudaMalloc
(
&
pdev
,
count
*
sizeof
(
double
))
);
}
~
DeviceBuffer
()
{
cudaSafeCall
(
cudaFree
(
pdev
)
);
}
operator
double
*
()
{
return
pdev
;}
void
download
(
double
*
hptr
)
{
double
hbuf
;
cudaSafeCall
(
cudaMemcpy
(
&
hbuf
,
pdev
,
sizeof
(
double
),
cudaMemcpyDeviceToHost
)
);
*
hptr
=
hbuf
;
}
void
download
(
double
**
hptrs
)
{
AutoBuffer
<
double
,
2
*
sizeof
(
double
)
>
hbuf
(
count
);
cudaSafeCall
(
cudaMemcpy
((
void
*
)
hbuf
,
pdev
,
count
*
sizeof
(
double
),
cudaMemcpyDeviceToHost
)
);
for
(
int
i
=
0
;
i
<
count
;
++
i
)
*
hptrs
[
i
]
=
hbuf
[
i
];
}
private
:
double
*
pdev
;
int
count
;
};
}
////////////////////////////////////////////////////////////////////////
// norm
double
cv
::
cuda
::
norm
(
InputArray
_src
,
int
normType
,
InputArray
_mask
,
GpuMat
&
buf
)
{
GpuMat
src
=
_src
.
getGpuMat
();
GpuMat
mask
=
_mask
.
getGpuMat
();
namespace
cv
{
namespace
cuda
{
namespace
internal
{
void
normL2
(
cv
::
InputArray
_src
,
cv
::
OutputArray
_dst
,
cv
::
InputArray
_mask
,
Stream
&
stream
);
void
findMaxAbs
(
cv
::
InputArray
_src
,
cv
::
OutputArray
_dst
,
cv
::
InputArray
_mask
,
Stream
&
stream
);
}}}
void
cv
::
cuda
::
calcNorm
(
InputArray
_src
,
OutputArray
dst
,
int
normType
,
InputArray
mask
,
Stream
&
stream
)
{
CV_Assert
(
normType
==
NORM_INF
||
normType
==
NORM_L1
||
normType
==
NORM_L2
);
CV_Assert
(
mask
.
empty
()
||
(
mask
.
type
()
==
CV_8UC1
&&
mask
.
size
()
==
src
.
size
()
&&
src
.
channels
()
==
1
)
);
GpuMat
src
=
getInputMat
(
_src
,
stream
);
GpuMat
src_single_channel
=
src
.
reshape
(
1
);
if
(
normType
==
NORM_L1
)
return
cuda
::
absSum
(
src_single_channel
,
mask
,
buf
)[
0
];
{
calcAbsSum
(
src_single_channel
,
dst
,
mask
,
stream
);
}
else
if
(
normType
==
NORM_L2
)
{
internal
::
normL2
(
src_single_channel
,
dst
,
mask
,
stream
);
}
else
// NORM_INF
{
internal
::
findMaxAbs
(
src_single_channel
,
dst
,
mask
,
stream
);
}
}
if
(
normType
==
NORM_L2
)
return
std
::
sqrt
(
cuda
::
sqrSum
(
src_single_channel
,
mask
,
buf
)[
0
]);
double
cv
::
cuda
::
norm
(
InputArray
_src
,
int
normType
,
InputArray
_mask
)
{
Stream
&
stream
=
Stream
::
Null
();
// NORM_INF
double
min_val
,
max_val
;
cuda
::
minMax
(
src_single_channel
,
&
min_val
,
&
max_val
,
mask
,
buf
);
return
std
::
max
(
std
::
abs
(
min_val
),
std
::
abs
(
max_val
));
HostMem
dst
;
calcNorm
(
_src
,
dst
,
normType
,
_mask
,
stream
);
stream
.
waitForCompletion
();
double
val
;
dst
.
createMatHeader
().
convertTo
(
Mat
(
1
,
1
,
CV_64FC1
,
&
val
),
CV_64F
);
return
val
;
}
////////////////////////////////////////////////////////////////////////
// meanStdDev
void
cv
::
cuda
::
meanStdDev
(
InputArray
_src
,
Scalar
&
mean
,
Scalar
&
stddev
,
GpuMat
&
buf
)
void
cv
::
cuda
::
meanStdDev
(
InputArray
_src
,
OutputArray
_dst
,
Stream
&
stream
)
{
GpuMat
src
=
_src
.
getGpuMat
();
if
(
!
deviceSupports
(
FEATURE_SET_COMPUTE_13
))
CV_Error
(
cv
::
Error
::
StsNotImplemented
,
"Not sufficient compute capebility"
);
const
GpuMat
src
=
getInputMat
(
_src
,
stream
);
CV_Assert
(
src
.
type
()
==
CV_8UC1
);
if
(
!
deviceSupports
(
FEATURE_SET_COMPUTE_13
))
CV_Error
(
cv
::
Error
::
StsNotImplemented
,
"Not sufficient compute capebility"
);
GpuMat
dst
=
getOutputMat
(
_dst
,
1
,
2
,
CV_64FC1
,
stream
);
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
DeviceBuffer
dbuf
(
2
);
int
bufSize
;
#if (CUDA_VERSION <= 4020)
nppSafeCall
(
nppiMeanStdDev8uC1RGetBufferHostSize
(
sz
,
&
bufSize
)
);
...
...
@@ -158,14 +154,30 @@ void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, GpuMat&
nppSafeCall
(
nppiMeanStdDevGetBufferHostSize_8u_C1R
(
sz
,
&
bufSize
)
);
#endif
ensureSizeIsEnough
(
1
,
bufSize
,
CV_8UC1
,
buf
);
BufferPool
pool
(
stream
);
GpuMat
buf
=
pool
.
getBuffer
(
1
,
bufSize
,
CV_8UC1
);
NppStreamHandler
h
(
StreamAccessor
::
getStream
(
stream
));
nppSafeCall
(
nppiMean_StdDev_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
,
buf
.
ptr
<
Npp8u
>
(),
dst
.
ptr
<
Npp64f
>
(),
dst
.
ptr
<
Npp64f
>
()
+
1
)
);
syncOutput
(
dst
,
_dst
,
stream
);
}
void
cv
::
cuda
::
meanStdDev
(
InputArray
_src
,
Scalar
&
mean
,
Scalar
&
stddev
)
{
Stream
&
stream
=
Stream
::
Null
();
HostMem
dst
;
meanStdDev
(
_src
,
dst
,
stream
);
nppSafeCall
(
nppiMean_StdDev_8u_C1R
(
src
.
ptr
<
Npp8u
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
,
buf
.
ptr
<
Npp8u
>
(),
dbuf
,
(
double
*
)
dbuf
+
1
)
);
stream
.
waitForCompletion
(
);
cudaSafeCall
(
cudaDeviceSynchronize
()
);
double
vals
[
2
];
dst
.
createMatHeader
().
copyTo
(
Mat
(
1
,
2
,
CV_64FC1
,
&
vals
[
0
]));
double
*
ptrs
[
2
]
=
{
mean
.
val
,
stddev
.
val
}
;
dbuf
.
download
(
ptrs
);
mean
=
Scalar
(
vals
[
0
])
;
stddev
=
Scalar
(
vals
[
1
]
);
}
//////////////////////////////////////////////////////////////////////////////
...
...
@@ -173,13 +185,12 @@ void cv::cuda::meanStdDev(InputArray _src, Scalar& mean, Scalar& stddev, GpuMat&
void
cv
::
cuda
::
rectStdDev
(
InputArray
_src
,
InputArray
_sqr
,
OutputArray
_dst
,
Rect
rect
,
Stream
&
_stream
)
{
GpuMat
src
=
_src
.
getGpuMat
(
);
GpuMat
sqr
=
_sqr
.
getGpuMat
(
);
GpuMat
src
=
getInputMat
(
_src
,
_stream
);
GpuMat
sqr
=
getInputMat
(
_sqr
,
_stream
);
CV_Assert
(
src
.
type
()
==
CV_32SC1
&&
sqr
.
type
()
==
CV_64FC1
);
_dst
.
create
(
src
.
size
(),
CV_32FC1
);
GpuMat
dst
=
_dst
.
getGpuMat
();
GpuMat
dst
=
getOutputMat
(
_dst
,
src
.
size
(),
CV_32FC1
,
_stream
);
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
...
...
@@ -200,45 +211,8 @@ void cv::cuda::rectStdDev(InputArray _src, InputArray _sqr, OutputArray _dst, Re
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
////////////////////////////////////////////////////////////////////////
// normalize
void
cv
::
cuda
::
normalize
(
InputArray
_src
,
OutputArray
dst
,
double
a
,
double
b
,
int
norm_type
,
int
dtype
,
InputArray
mask
,
GpuMat
&
norm_buf
,
GpuMat
&
cvt_buf
)
{
GpuMat
src
=
_src
.
getGpuMat
();
double
scale
=
1
,
shift
=
0
;
if
(
norm_type
==
NORM_MINMAX
)
{
double
smin
=
0
,
smax
=
0
;
double
dmin
=
std
::
min
(
a
,
b
),
dmax
=
std
::
max
(
a
,
b
);
cuda
::
minMax
(
src
,
&
smin
,
&
smax
,
mask
,
norm_buf
);
scale
=
(
dmax
-
dmin
)
*
(
smax
-
smin
>
std
::
numeric_limits
<
double
>::
epsilon
()
?
1.0
/
(
smax
-
smin
)
:
0.0
);
shift
=
dmin
-
smin
*
scale
;
}
else
if
(
norm_type
==
NORM_L2
||
norm_type
==
NORM_L1
||
norm_type
==
NORM_INF
)
{
scale
=
cuda
::
norm
(
src
,
norm_type
,
mask
,
norm_buf
);
scale
=
scale
>
std
::
numeric_limits
<
double
>::
epsilon
()
?
a
/
scale
:
0.0
;
shift
=
0
;
}
else
{
CV_Error
(
cv
::
Error
::
StsBadArg
,
"Unknown/unsupported norm type"
);
}
if
(
mask
.
empty
())
{
src
.
convertTo
(
dst
,
dtype
,
scale
,
shift
);
}
else
{
src
.
convertTo
(
cvt_buf
,
dtype
,
scale
,
shift
);
cvt_buf
.
copyTo
(
dst
,
mask
);
}
syncOutput
(
dst
,
_dst
,
_stream
);
}
#endif
This diff is collapsed.
Click to expand it.
modules/cudaarithm/test/test_element_operations.cpp
View file @
16b56e71
...
...
@@ -1329,7 +1329,7 @@ CUDA_TEST_P(Divide_Scalar_First, Accuracy)
try
{
cv
::
cuda
::
GpuMat
dst
;
cv
::
cuda
::
divide
(
scale
,
loadMat
(
mat
),
dst
,
depth
.
second
);
cv
::
cuda
::
divide
(
scale
,
loadMat
(
mat
),
dst
,
1.0
,
depth
.
second
);
}
catch
(
const
cv
::
Exception
&
e
)
{
...
...
@@ -1339,7 +1339,7 @@ CUDA_TEST_P(Divide_Scalar_First, Accuracy)
else
{
cv
::
cuda
::
GpuMat
dst
=
createMat
(
size
,
depth
.
second
,
useRoi
);
cv
::
cuda
::
divide
(
scale
,
loadMat
(
mat
,
useRoi
),
dst
,
depth
.
second
);
cv
::
cuda
::
divide
(
scale
,
loadMat
(
mat
,
useRoi
),
dst
,
1.0
,
depth
.
second
);
cv
::
Mat
dst_gold
;
cv
::
divide
(
scale
,
mat
,
dst_gold
,
depth
.
second
);
...
...
This diff is collapsed.
Click to expand it.
modules/cudaarithm/test/test_reductions.cpp
View file @
16b56e71
...
...
@@ -74,8 +74,27 @@ CUDA_TEST_P(Norm, Accuracy)
cv
::
Mat
src
=
randomMat
(
size
,
depth
);
cv
::
Mat
mask
=
randomMat
(
size
,
CV_8UC1
,
0
,
2
);
cv
::
cuda
::
GpuMat
d_buf
;
double
val
=
cv
::
cuda
::
norm
(
loadMat
(
src
,
useRoi
),
normCode
,
loadMat
(
mask
,
useRoi
),
d_buf
);
double
val
=
cv
::
cuda
::
norm
(
loadMat
(
src
,
useRoi
),
normCode
,
loadMat
(
mask
,
useRoi
));
double
val_gold
=
cv
::
norm
(
src
,
normCode
,
mask
);
EXPECT_NEAR
(
val_gold
,
val
,
depth
<
CV_32F
?
0.0
:
1.0
);
}
CUDA_TEST_P
(
Norm
,
Async
)
{
cv
::
Mat
src
=
randomMat
(
size
,
depth
);
cv
::
Mat
mask
=
randomMat
(
size
,
CV_8UC1
,
0
,
2
);
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
calcNorm
(
loadMat
(
src
,
useRoi
),
dst
,
normCode
,
loadMat
(
mask
,
useRoi
),
stream
);
stream
.
waitForCompletion
();
double
val
;
dst
.
createMatHeader
().
convertTo
(
cv
::
Mat
(
1
,
1
,
CV_64FC1
,
&
val
),
CV_64F
);
double
val_gold
=
cv
::
norm
(
src
,
normCode
,
mask
);
...
...
@@ -127,6 +146,27 @@ CUDA_TEST_P(NormDiff, Accuracy)
EXPECT_NEAR
(
val_gold
,
val
,
0.0
);
}
CUDA_TEST_P
(
NormDiff
,
Async
)
{
cv
::
Mat
src1
=
randomMat
(
size
,
CV_8UC1
);
cv
::
Mat
src2
=
randomMat
(
size
,
CV_8UC1
);
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
calcNormDiff
(
loadMat
(
src1
,
useRoi
),
loadMat
(
src2
,
useRoi
),
dst
,
normCode
,
stream
);
stream
.
waitForCompletion
();
double
val
;
const
cv
::
Mat
val_mat
(
1
,
1
,
CV_64FC1
,
&
val
);
dst
.
createMatHeader
().
convertTo
(
val_mat
,
CV_64F
);
double
val_gold
=
cv
::
norm
(
src1
,
src2
,
normCode
);
EXPECT_NEAR
(
val_gold
,
val
,
0.0
);
}
INSTANTIATE_TEST_CASE_P
(
CUDA_Arithm
,
NormDiff
,
testing
::
Combine
(
ALL_DEVICES
,
DIFFERENT_SIZES
,
...
...
@@ -247,6 +287,24 @@ CUDA_TEST_P(Sum, Simple)
EXPECT_SCALAR_NEAR
(
val_gold
,
val
,
CV_MAT_DEPTH
(
type
)
<
CV_32F
?
0.0
:
0.5
);
}
CUDA_TEST_P
(
Sum
,
Simple_Async
)
{
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
calcSum
(
loadMat
(
src
,
useRoi
),
dst
,
cv
::
noArray
(),
stream
);
stream
.
waitForCompletion
();
cv
::
Scalar
val
;
cv
::
Mat
val_mat
(
dst
.
size
(),
CV_64FC
(
dst
.
channels
()),
val
.
val
);
dst
.
createMatHeader
().
convertTo
(
val_mat
,
CV_64F
);
cv
::
Scalar
val_gold
=
cv
::
sum
(
src
);
EXPECT_SCALAR_NEAR
(
val_gold
,
val
,
CV_MAT_DEPTH
(
type
)
<
CV_32F
?
0.0
:
0.5
);
}
CUDA_TEST_P
(
Sum
,
Abs
)
{
cv
::
Scalar
val
=
cv
::
cuda
::
absSum
(
loadMat
(
src
,
useRoi
));
...
...
@@ -256,6 +314,24 @@ CUDA_TEST_P(Sum, Abs)
EXPECT_SCALAR_NEAR
(
val_gold
,
val
,
CV_MAT_DEPTH
(
type
)
<
CV_32F
?
0.0
:
0.5
);
}
CUDA_TEST_P
(
Sum
,
Abs_Async
)
{
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
calcAbsSum
(
loadMat
(
src
,
useRoi
),
dst
,
cv
::
noArray
(),
stream
);
stream
.
waitForCompletion
();
cv
::
Scalar
val
;
cv
::
Mat
val_mat
(
dst
.
size
(),
CV_64FC
(
dst
.
channels
()),
val
.
val
);
dst
.
createMatHeader
().
convertTo
(
val_mat
,
CV_64F
);
cv
::
Scalar
val_gold
=
absSumGold
(
src
);
EXPECT_SCALAR_NEAR
(
val_gold
,
val
,
CV_MAT_DEPTH
(
type
)
<
CV_32F
?
0.0
:
0.5
);
}
CUDA_TEST_P
(
Sum
,
Sqr
)
{
cv
::
Scalar
val
=
cv
::
cuda
::
sqrSum
(
loadMat
(
src
,
useRoi
));
...
...
@@ -265,6 +341,24 @@ CUDA_TEST_P(Sum, Sqr)
EXPECT_SCALAR_NEAR
(
val_gold
,
val
,
CV_MAT_DEPTH
(
type
)
<
CV_32F
?
0.0
:
0.5
);
}
CUDA_TEST_P
(
Sum
,
Sqr_Async
)
{
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
calcSqrSum
(
loadMat
(
src
,
useRoi
),
dst
,
cv
::
noArray
(),
stream
);
stream
.
waitForCompletion
();
cv
::
Scalar
val
;
cv
::
Mat
val_mat
(
dst
.
size
(),
CV_64FC
(
dst
.
channels
()),
val
.
val
);
dst
.
createMatHeader
().
convertTo
(
val_mat
,
CV_64F
);
cv
::
Scalar
val_gold
=
sqrSumGold
(
src
);
EXPECT_SCALAR_NEAR
(
val_gold
,
val
,
CV_MAT_DEPTH
(
type
)
<
CV_32F
?
0.0
:
0.5
);
}
INSTANTIATE_TEST_CASE_P
(
CUDA_Arithm
,
Sum
,
testing
::
Combine
(
ALL_DEVICES
,
DIFFERENT_SIZES
,
...
...
@@ -321,6 +415,28 @@ CUDA_TEST_P(MinMax, WithoutMask)
}
}
CUDA_TEST_P
(
MinMax
,
Async
)
{
cv
::
Mat
src
=
randomMat
(
size
,
depth
);
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
findMinMax
(
loadMat
(
src
,
useRoi
),
dst
,
cv
::
noArray
(),
stream
);
stream
.
waitForCompletion
();
double
vals
[
2
];
const
cv
::
Mat
vals_mat
(
1
,
2
,
CV_64FC1
,
&
vals
[
0
]);
dst
.
createMatHeader
().
convertTo
(
vals_mat
,
CV_64F
);
double
minVal_gold
,
maxVal_gold
;
minMaxLocGold
(
src
,
&
minVal_gold
,
&
maxVal_gold
);
EXPECT_DOUBLE_EQ
(
minVal_gold
,
vals
[
0
]);
EXPECT_DOUBLE_EQ
(
maxVal_gold
,
vals
[
1
]);
}
CUDA_TEST_P
(
MinMax
,
WithMask
)
{
cv
::
Mat
src
=
randomMat
(
size
,
depth
);
...
...
@@ -471,6 +587,41 @@ CUDA_TEST_P(MinMaxLoc, WithoutMask)
}
}
CUDA_TEST_P
(
MinMaxLoc
,
Async
)
{
cv
::
Mat
src
=
randomMat
(
size
,
depth
);
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
minMaxVals
,
locVals
;
cv
::
cuda
::
findMinMaxLoc
(
loadMat
(
src
,
useRoi
),
minMaxVals
,
locVals
,
cv
::
noArray
(),
stream
);
stream
.
waitForCompletion
();
double
vals
[
2
];
const
cv
::
Mat
vals_mat
(
2
,
1
,
CV_64FC1
,
&
vals
[
0
]);
minMaxVals
.
createMatHeader
().
convertTo
(
vals_mat
,
CV_64F
);
int
locs
[
2
];
const
cv
::
Mat
locs_mat
(
2
,
1
,
CV_32SC1
,
&
locs
[
0
]);
locVals
.
createMatHeader
().
copyTo
(
locs_mat
);
cv
::
Point
locs2D
[]
=
{
cv
::
Point
(
locs
[
0
]
%
src
.
cols
,
locs
[
0
]
/
src
.
cols
),
cv
::
Point
(
locs
[
1
]
%
src
.
cols
,
locs
[
1
]
/
src
.
cols
),
};
double
minVal_gold
,
maxVal_gold
;
cv
::
Point
minLoc_gold
,
maxLoc_gold
;
minMaxLocGold
(
src
,
&
minVal_gold
,
&
maxVal_gold
,
&
minLoc_gold
,
&
maxLoc_gold
);
EXPECT_DOUBLE_EQ
(
minVal_gold
,
vals
[
0
]);
EXPECT_DOUBLE_EQ
(
maxVal_gold
,
vals
[
1
]);
expectEqual
(
src
,
minLoc_gold
,
locs2D
[
0
]);
expectEqual
(
src
,
maxLoc_gold
,
locs2D
[
1
]);
}
CUDA_TEST_P
(
MinMaxLoc
,
WithMask
)
{
cv
::
Mat
src
=
randomMat
(
size
,
depth
);
...
...
@@ -564,6 +715,7 @@ PARAM_TEST_CASE(CountNonZero, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
int
depth
;
bool
useRoi
;
cv
::
Mat
src
;
virtual
void
SetUp
()
{
...
...
@@ -573,15 +725,14 @@ PARAM_TEST_CASE(CountNonZero, cv::cuda::DeviceInfo, cv::Size, MatDepth, UseRoi)
useRoi
=
GET_PARAM
(
3
);
cv
::
cuda
::
setDevice
(
devInfo
.
deviceID
());
cv
::
Mat
srcBase
=
randomMat
(
size
,
CV_8U
,
0.0
,
1.5
);
srcBase
.
convertTo
(
src
,
depth
);
}
};
CUDA_TEST_P
(
CountNonZero
,
Accuracy
)
{
cv
::
Mat
srcBase
=
randomMat
(
size
,
CV_8U
,
0.0
,
1.5
);
cv
::
Mat
src
;
srcBase
.
convertTo
(
src
,
depth
);
if
(
depth
==
CV_64F
&&
!
supportFeature
(
devInfo
,
cv
::
cuda
::
NATIVE_DOUBLE
))
{
try
...
...
@@ -603,6 +754,24 @@ CUDA_TEST_P(CountNonZero, Accuracy)
}
}
CUDA_TEST_P
(
CountNonZero
,
Async
)
{
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
countNonZero
(
loadMat
(
src
,
useRoi
),
dst
,
stream
);
stream
.
waitForCompletion
();
int
val
;
const
cv
::
Mat
val_mat
(
1
,
1
,
CV_32SC1
,
&
val
);
dst
.
createMatHeader
().
copyTo
(
val_mat
);
int
val_gold
=
cv
::
countNonZero
(
src
);
ASSERT_EQ
(
val_gold
,
val
);
}
INSTANTIATE_TEST_CASE_P
(
CUDA_Arithm
,
CountNonZero
,
testing
::
Combine
(
ALL_DEVICES
,
DIFFERENT_SIZES
,
...
...
@@ -750,7 +919,7 @@ CUDA_TEST_P(Normalize, WithMask)
dst_gold
.
setTo
(
cv
::
Scalar
::
all
(
0
));
cv
::
normalize
(
src
,
dst_gold
,
alpha
,
beta
,
norm_type
,
type
,
mask
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
1e-6
);
EXPECT_MAT_NEAR
(
dst_gold
,
dst
,
type
<
CV_32F
?
1.0
:
1e-4
);
}
INSTANTIATE_TEST_CASE_P
(
CUDA_Arithm
,
Normalize
,
testing
::
Combine
(
...
...
@@ -811,6 +980,28 @@ CUDA_TEST_P(MeanStdDev, Accuracy)
}
}
CUDA_TEST_P
(
MeanStdDev
,
Async
)
{
cv
::
Mat
src
=
randomMat
(
size
,
CV_8UC1
);
cv
::
cuda
::
Stream
stream
;
cv
::
cuda
::
HostMem
dst
;
cv
::
cuda
::
meanStdDev
(
loadMat
(
src
,
useRoi
),
dst
,
stream
);
stream
.
waitForCompletion
();
double
vals
[
2
];
dst
.
createMatHeader
().
copyTo
(
cv
::
Mat
(
1
,
2
,
CV_64FC1
,
&
vals
[
0
]));
cv
::
Scalar
mean_gold
;
cv
::
Scalar
stddev_gold
;
cv
::
meanStdDev
(
src
,
mean_gold
,
stddev_gold
);
EXPECT_SCALAR_NEAR
(
mean_gold
,
cv
::
Scalar
(
vals
[
0
]),
1e-5
);
EXPECT_SCALAR_NEAR
(
stddev_gold
,
cv
::
Scalar
(
vals
[
1
]),
1e-5
);
}
INSTANTIATE_TEST_CASE_P
(
CUDA_Arithm
,
MeanStdDev
,
testing
::
Combine
(
ALL_DEVICES
,
DIFFERENT_SIZES
,
...
...
This diff is collapsed.
Click to expand it.
modules/cudabgsegm/src/fgd.cpp
View file @
16b56e71
...
...
@@ -266,7 +266,7 @@ namespace
{
int
bgfgClassification
(
const
GpuMat
&
prevFrame
,
const
GpuMat
&
curFrame
,
const
GpuMat
&
Ftd
,
const
GpuMat
&
Fbd
,
GpuMat
&
foreground
,
GpuMat
&
countBuf
,
GpuMat
&
foreground
,
const
FGDParams
&
params
,
int
out_cn
)
{
typedef
void
(
*
func_t
)(
PtrStepSzb
prevFrame
,
PtrStepSzb
curFrame
,
PtrStepSzb
Ftd
,
PtrStepSzb
Fbd
,
PtrStepSzb
foreground
,
...
...
@@ -298,7 +298,7 @@ namespace
deltaC
,
deltaCC
,
params
.
alpha2
,
params
.
N1c
,
params
.
N1cc
,
0
);
int
count
=
cuda
::
countNonZero
(
foreground
,
countBuf
);
int
count
=
cuda
::
countNonZero
(
foreground
);
cuda
::
multiply
(
foreground
,
Scalar
::
all
(
255
),
foreground
);
...
...
@@ -605,8 +605,6 @@ namespace
GpuMat
hist_
;
GpuMat
histBuf_
;
GpuMat
countBuf_
;
GpuMat
buf_
;
GpuMat
filterBrd_
;
...
...
@@ -649,7 +647,7 @@ namespace
changeDetection
(
prevFrame_
,
curFrame
,
Ftd_
,
hist_
,
histBuf_
);
changeDetection
(
background_
,
curFrame
,
Fbd_
,
hist_
,
histBuf_
);
int
FG_pixels_count
=
bgfgClassification
(
prevFrame_
,
curFrame
,
Ftd_
,
Fbd_
,
foreground_
,
countBuf_
,
params_
,
4
);
int
FG_pixels_count
=
bgfgClassification
(
prevFrame_
,
curFrame
,
Ftd_
,
Fbd_
,
foreground_
,
params_
,
4
);
#ifdef HAVE_OPENCV_CUDAFILTERS
if
(
params_
.
perform_morphing
>
0
)
...
...
This diff is collapsed.
Click to expand it.
modules/cudafilters/src/filtering.cpp
View file @
16b56e71
...
...
@@ -542,7 +542,7 @@ namespace
anchor_
=
Point
(
iters_
,
iters_
);
iters_
=
1
;
}
else
if
(
iters_
>
1
&&
countNonZero
(
kernel
)
==
(
int
)
kernel
.
total
())
else
if
(
iters_
>
1
&&
c
v
::
c
ountNonZero
(
kernel
)
==
(
int
)
kernel
.
total
())
{
anchor_
=
Point
(
anchor_
.
x
*
iters_
,
anchor_
.
y
*
iters_
);
kernel
=
getStructuringElement
(
MORPH_RECT
,
...
...
This diff is collapsed.
Click to expand it.
modules/cudaimgproc/src/gftt.cpp
View file @
16b56e71
...
...
@@ -81,7 +81,6 @@ namespace
GpuMat
Dy_
;
GpuMat
buf_
;
GpuMat
eig_
;
GpuMat
minMaxbuf_
;
GpuMat
tmpCorners_
;
};
...
...
@@ -112,7 +111,7 @@ namespace
cornerCriteria_
->
compute
(
image
,
eig_
);
double
maxVal
=
0
;
cuda
::
minMax
(
eig_
,
0
,
&
maxVal
,
noArray
(),
minMaxbuf_
);
cuda
::
minMax
(
eig_
,
0
,
&
maxVal
);
ensureSizeIsEnough
(
1
,
std
::
max
(
1000
,
static_cast
<
int
>
(
image
.
size
().
area
()
*
0.05
)),
CV_32FC2
,
tmpCorners_
);
...
...
This diff is collapsed.
Click to expand it.
modules/cudaimgproc/src/match_template.cpp
View file @
16b56e71
...
...
@@ -271,7 +271,6 @@ namespace
private
:
Match_CCORR_8U
match_CCORR_
;
GpuMat
image_sqsums_
;
GpuMat
intBuffer_
;
};
void
Match_CCORR_NORMED_8U
::
match
(
InputArray
_image
,
InputArray
_templ
,
OutputArray
_result
,
Stream
&
stream
)
...
...
@@ -288,7 +287,7 @@ namespace
match_CCORR_
.
match
(
image
,
templ
,
_result
,
stream
);
GpuMat
result
=
_result
.
getGpuMat
();
cuda
::
sqrIntegral
(
image
.
reshape
(
1
),
image_sqsums_
,
intBuffer_
,
stream
);
cuda
::
sqrIntegral
(
image
.
reshape
(
1
),
image_sqsums_
,
stream
);
double
templ_sqsum
=
cuda
::
sqrSum
(
templ
.
reshape
(
1
))[
0
];
...
...
@@ -335,7 +334,6 @@ namespace
private
:
GpuMat
image_sqsums_
;
GpuMat
intBuffer_
;
Match_CCORR_8U
match_CCORR_
;
};
...
...
@@ -359,7 +357,7 @@ namespace
return
;
}
cuda
::
sqrIntegral
(
image
.
reshape
(
1
),
image_sqsums_
,
intBuffer_
,
stream
);
cuda
::
sqrIntegral
(
image
.
reshape
(
1
),
image_sqsums_
,
stream
);
double
templ_sqsum
=
cuda
::
sqrSum
(
templ
.
reshape
(
1
))[
0
];
...
...
@@ -383,7 +381,6 @@ namespace
private
:
GpuMat
image_sqsums_
;
GpuMat
intBuffer_
;
Match_CCORR_8U
match_CCORR_
;
};
...
...
@@ -398,7 +395,7 @@ namespace
CV_Assert
(
image
.
type
()
==
templ
.
type
()
);
CV_Assert
(
image
.
cols
>=
templ
.
cols
&&
image
.
rows
>=
templ
.
rows
);
cuda
::
sqrIntegral
(
image
.
reshape
(
1
),
image_sqsums_
,
intBuffer_
,
stream
);
cuda
::
sqrIntegral
(
image
.
reshape
(
1
),
image_sqsums_
,
stream
);
double
templ_sqsum
=
cuda
::
sqrSum
(
templ
.
reshape
(
1
))[
0
];
...
...
@@ -421,7 +418,6 @@ namespace
void
match
(
InputArray
image
,
InputArray
templ
,
OutputArray
result
,
Stream
&
stream
=
Stream
::
Null
());
private
:
GpuMat
intBuffer_
;
std
::
vector
<
GpuMat
>
images_
;
std
::
vector
<
GpuMat
>
image_sums_
;
Match_CCORR_8U
match_CCORR_
;
...
...
@@ -444,7 +440,7 @@ namespace
if
(
image
.
channels
()
==
1
)
{
image_sums_
.
resize
(
1
);
cuda
::
integral
(
image
,
image_sums_
[
0
],
intBuffer_
,
stream
);
cuda
::
integral
(
image
,
image_sums_
[
0
],
stream
);
int
templ_sum
=
(
int
)
cuda
::
sum
(
templ
)[
0
];
...
...
@@ -456,7 +452,7 @@ namespace
image_sums_
.
resize
(
images_
.
size
());
for
(
int
i
=
0
;
i
<
image
.
channels
();
++
i
)
cuda
::
integral
(
images_
[
i
],
image_sums_
[
i
],
intBuffer_
,
stream
);
cuda
::
integral
(
images_
[
i
],
image_sums_
[
i
],
stream
);
Scalar
templ_sum
=
cuda
::
sum
(
templ
);
...
...
@@ -501,7 +497,6 @@ namespace
private
:
GpuMat
imagef_
,
templf_
;
Match_CCORR_32F
match_CCORR_32F_
;
GpuMat
intBuffer_
;
std
::
vector
<
GpuMat
>
images_
;
std
::
vector
<
GpuMat
>
image_sums_
;
std
::
vector
<
GpuMat
>
image_sqsums_
;
...
...
@@ -527,10 +522,10 @@ namespace
if
(
image
.
channels
()
==
1
)
{
image_sums_
.
resize
(
1
);
cuda
::
integral
(
image
,
image_sums_
[
0
],
intBuffer_
,
stream
);
cuda
::
integral
(
image
,
image_sums_
[
0
],
stream
);
image_sqsums_
.
resize
(
1
);
cuda
::
sqrIntegral
(
image
,
image_sqsums_
[
0
],
intBuffer_
,
stream
);
cuda
::
sqrIntegral
(
image
,
image_sqsums_
[
0
],
stream
);
int
templ_sum
=
(
int
)
cuda
::
sum
(
templ
)[
0
];
double
templ_sqsum
=
cuda
::
sqrSum
(
templ
)[
0
];
...
...
@@ -547,8 +542,8 @@ namespace
image_sqsums_
.
resize
(
images_
.
size
());
for
(
int
i
=
0
;
i
<
image
.
channels
();
++
i
)
{
cuda
::
integral
(
images_
[
i
],
image_sums_
[
i
],
intBuffer_
,
stream
);
cuda
::
sqrIntegral
(
images_
[
i
],
image_sqsums_
[
i
],
intBuffer_
,
stream
);
cuda
::
integral
(
images_
[
i
],
image_sums_
[
i
],
stream
);
cuda
::
sqrIntegral
(
images_
[
i
],
image_sqsums_
[
i
],
stream
);
}
Scalar
templ_sum
=
cuda
::
sum
(
templ
);
...
...
This diff is collapsed.
Click to expand it.
samples/gpu/performance/tests.cpp
View file @
16b56e71
...
...
@@ -193,7 +193,7 @@ TEST(cornerHarris)
TEST
(
integral
)
{
Mat
src
,
sum
;
cuda
::
GpuMat
d_src
,
d_sum
,
d_buf
;
cuda
::
GpuMat
d_src
,
d_sum
;
for
(
int
size
=
1000
;
size
<=
4000
;
size
*=
2
)
{
...
...
@@ -209,10 +209,10 @@ TEST(integral)
d_src
.
upload
(
src
);
cuda
::
integral
Buffered
(
d_src
,
d_sum
,
d_buf
);
cuda
::
integral
(
d_src
,
d_sum
);
CUDA_ON
;
cuda
::
integral
Buffered
(
d_src
,
d_sum
,
d_buf
);
cuda
::
integral
(
d_src
,
d_sum
);
CUDA_OFF
;
}
}
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment