Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
0465b89e
Commit
0465b89e
authored
Dec 20, 2010
by
Alexey Spizhevoy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
gpu module refactoring: moved per-element operations into separated file
parent
6891a601
Show whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
1049 additions
and
915 deletions
+1049
-915
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+77
-71
arithm.cpp
modules/gpu/src/arithm.cpp
+0
-530
element_operations.cu
modules/gpu/src/cuda/element_operations.cu
+348
-0
mathfunc.cu
modules/gpu/src/cuda/mathfunc.cu
+23
-307
element_operations.cpp
modules/gpu/src/element_operations.cpp
+600
-6
bitwise_oper.cpp
tests/gpu/src/bitwise_oper.cpp
+1
-1
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
0465b89e
...
...
@@ -362,46 +362,10 @@ namespace cv
////////////////////////////// Arithmetics ///////////////////////////////////
//! adds one matrix to another (c = a + b)
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
CV_EXPORTS
void
add
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
);
//! adds scalar to a matrix (c = a + s)
//! supports CV_32FC1 and CV_32FC2 type
CV_EXPORTS
void
add
(
const
GpuMat
&
a
,
const
Scalar
&
sc
,
GpuMat
&
c
);
//! subtracts one matrix from another (c = a - b)
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
CV_EXPORTS
void
subtract
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
);
//! subtracts scalar from a matrix (c = a - s)
//! supports CV_32FC1 and CV_32FC2 type
CV_EXPORTS
void
subtract
(
const
GpuMat
&
a
,
const
Scalar
&
sc
,
GpuMat
&
c
);
//! computes element-wise product of the two arrays (c = a * b)
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
CV_EXPORTS
void
multiply
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
);
//! multiplies matrix to a scalar (c = a * s)
//! supports CV_32FC1 and CV_32FC2 type
CV_EXPORTS
void
multiply
(
const
GpuMat
&
a
,
const
Scalar
&
sc
,
GpuMat
&
c
);
//! computes element-wise quotient of the two arrays (c = a / b)
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
CV_EXPORTS
void
divide
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
);
//! computes element-wise quotient of matrix and scalar (c = a / s)
//! supports CV_32FC1 and CV_32FC2 type
CV_EXPORTS
void
divide
(
const
GpuMat
&
a
,
const
Scalar
&
sc
,
GpuMat
&
c
);
//! transposes the matrix
//! supports CV_8UC1, CV_8SC1, CV_8UC4, CV_8SC4, CV_16UC2, CV_16SC2, CV_32SC1, CV_32FC1 type
CV_EXPORTS
void
transpose
(
const
GpuMat
&
src1
,
GpuMat
&
dst
);
//! computes element-wise absolute difference of two arrays (c = abs(a - b))
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
CV_EXPORTS
void
absdiff
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
);
//! computes element-wise absolute difference of array and scalar (c = abs(a - s))
//! supports only CV_32FC1 type
CV_EXPORTS
void
absdiff
(
const
GpuMat
&
a
,
const
Scalar
&
s
,
GpuMat
&
c
);
//! compares elements of two arrays (c = a <cmpop> b)
//! supports CV_8UC4, CV_32FC1 types
CV_EXPORTS
void
compare
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
,
int
cmpop
);
//! computes mean value and standard deviation of all or selected array elements
//! supports only CV_8UC1 type
CV_EXPORTS
void
meanStdDev
(
const
GpuMat
&
mtx
,
Scalar
&
mean
,
Scalar
&
stddev
);
...
...
@@ -485,14 +449,6 @@ namespace cv
//! copies each plane of a multi-channel array to a dedicated array (async version)
CV_EXPORTS
void
split
(
const
GpuMat
&
src
,
vector
<
GpuMat
>&
dst
,
const
Stream
&
stream
);
//! computes exponent of each matrix element (b = e**a)
//! supports only CV_32FC1 type
CV_EXPORTS
void
exp
(
const
GpuMat
&
a
,
GpuMat
&
b
);
//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
//! supports only CV_32FC1 type
CV_EXPORTS
void
log
(
const
GpuMat
&
a
,
GpuMat
&
b
);
//! computes magnitude of complex (x(i).re, x(i).im) vector
//! supports only CV_32FC2 type
CV_EXPORTS
void
magnitude
(
const
GpuMat
&
x
,
GpuMat
&
magnitude
);
...
...
@@ -531,33 +487,6 @@ namespace cv
//! async version
CV_EXPORTS
void
polarToCart
(
const
GpuMat
&
magnitude
,
const
GpuMat
&
angle
,
GpuMat
&
x
,
GpuMat
&
y
,
bool
angleInDegrees
,
const
Stream
&
stream
);
//! perfroms per-elements bit-wise inversion
CV_EXPORTS
void
bitwise_not
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
=
GpuMat
());
//! async version
CV_EXPORTS
void
bitwise_not
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
);
//! calculates per-element bit-wise disjunction of two arrays
CV_EXPORTS
void
bitwise_or
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
=
GpuMat
());
//! async version
CV_EXPORTS
void
bitwise_or
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
);
//! calculates per-element bit-wise conjunction of two arrays
CV_EXPORTS
void
bitwise_and
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
=
GpuMat
());
//! async version
CV_EXPORTS
void
bitwise_and
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
);
//! calculates per-element bit-wise "exclusive or" operation
CV_EXPORTS
void
bitwise_xor
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
=
GpuMat
());
//! async version
CV_EXPORTS
void
bitwise_xor
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
);
//! Logical operators
CV_EXPORTS
GpuMat
operator
~
(
const
GpuMat
&
src
);
CV_EXPORTS
GpuMat
operator
|
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
);
CV_EXPORTS
GpuMat
operator
&
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
);
CV_EXPORTS
GpuMat
operator
^
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
);
//! computes per-element minimum of two arrays (dst = min(src1, src2))
CV_EXPORTS
void
min
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
);
//! Async version
...
...
@@ -578,6 +507,83 @@ namespace cv
//! Async version
CV_EXPORTS
void
max
(
const
GpuMat
&
src1
,
double
src2
,
GpuMat
&
dst
,
const
Stream
&
stream
);
//////////////////////////// Per-element operations ////////////////////////////////////
//! adds one matrix to another (c = a + b)
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
CV_EXPORTS
void
add
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
);
//! adds scalar to a matrix (c = a + s)
//! supports CV_32FC1 and CV_32FC2 type
CV_EXPORTS
void
add
(
const
GpuMat
&
a
,
const
Scalar
&
sc
,
GpuMat
&
c
);
//! subtracts one matrix from another (c = a - b)
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
CV_EXPORTS
void
subtract
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
);
//! subtracts scalar from a matrix (c = a - s)
//! supports CV_32FC1 and CV_32FC2 type
CV_EXPORTS
void
subtract
(
const
GpuMat
&
a
,
const
Scalar
&
sc
,
GpuMat
&
c
);
//! computes element-wise product of the two arrays (c = a * b)
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
CV_EXPORTS
void
multiply
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
);
//! multiplies matrix to a scalar (c = a * s)
//! supports CV_32FC1 and CV_32FC2 type
CV_EXPORTS
void
multiply
(
const
GpuMat
&
a
,
const
Scalar
&
sc
,
GpuMat
&
c
);
//! computes element-wise quotient of the two arrays (c = a / b)
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
CV_EXPORTS
void
divide
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
);
//! computes element-wise quotient of matrix and scalar (c = a / s)
//! supports CV_32FC1 and CV_32FC2 type
CV_EXPORTS
void
divide
(
const
GpuMat
&
a
,
const
Scalar
&
sc
,
GpuMat
&
c
);
//! computes exponent of each matrix element (b = e**a)
//! supports only CV_32FC1 type
CV_EXPORTS
void
exp
(
const
GpuMat
&
a
,
GpuMat
&
b
);
//! computes natural logarithm of absolute value of each matrix element: b = log(abs(a))
//! supports only CV_32FC1 type
CV_EXPORTS
void
log
(
const
GpuMat
&
a
,
GpuMat
&
b
);
//! computes element-wise absolute difference of two arrays (c = abs(a - b))
//! supports CV_8UC1, CV_8UC4, CV_32SC1, CV_32FC1 types
CV_EXPORTS
void
absdiff
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
);
//! computes element-wise absolute difference of array and scalar (c = abs(a - s))
//! supports only CV_32FC1 type
CV_EXPORTS
void
absdiff
(
const
GpuMat
&
a
,
const
Scalar
&
s
,
GpuMat
&
c
);
//! compares elements of two arrays (c = a <cmpop> b)
//! supports CV_8UC4, CV_32FC1 types
CV_EXPORTS
void
compare
(
const
GpuMat
&
a
,
const
GpuMat
&
b
,
GpuMat
&
c
,
int
cmpop
);
//! performs per-elements bit-wise inversion
CV_EXPORTS
void
bitwise_not
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
=
GpuMat
());
//! version without mask
CV_EXPORTS
GpuMat
operator
~
(
const
GpuMat
&
src
);
//! async version
CV_EXPORTS
void
bitwise_not
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
);
//! calculates per-element bit-wise disjunction of two arrays
CV_EXPORTS
void
bitwise_or
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
=
GpuMat
());
//! version without mask
CV_EXPORTS
GpuMat
operator
|
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
);
//! async version
CV_EXPORTS
void
bitwise_or
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
);
//! calculates per-element bit-wise conjunction of two arrays
CV_EXPORTS
void
bitwise_and
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
=
GpuMat
());
//! version without mask
CV_EXPORTS
GpuMat
operator
&
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
);
//! async version
CV_EXPORTS
void
bitwise_and
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
);
//! calculates per-element bit-wise "exclusive or" operation
CV_EXPORTS
void
bitwise_xor
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
=
GpuMat
());
//! version without mask
CV_EXPORTS
GpuMat
operator
^
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
);
//! async version
CV_EXPORTS
void
bitwise_xor
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
);
////////////////////////////// Image processing //////////////////////////////
...
...
modules/gpu/src/arithm.cpp
View file @
0465b89e
...
...
@@ -48,18 +48,7 @@ using namespace std;
#if !defined (HAVE_CUDA)
void
cv
::
gpu
::
add
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
add
(
const
GpuMat
&
,
const
Scalar
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
subtract
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
subtract
(
const
GpuMat
&
,
const
Scalar
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
multiply
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
multiply
(
const
GpuMat
&
,
const
Scalar
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
divide
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
divide
(
const
GpuMat
&
,
const
Scalar
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
transpose
(
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
absdiff
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
absdiff
(
const
GpuMat
&
,
const
Scalar
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
compare
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
meanStdDev
(
const
GpuMat
&
,
Scalar
&
,
Scalar
&
)
{
throw_nogpu
();
}
double
cv
::
gpu
::
norm
(
const
GpuMat
&
,
int
)
{
throw_nogpu
();
return
0.0
;
}
double
cv
::
gpu
::
norm
(
const
GpuMat
&
,
const
GpuMat
&
,
int
)
{
throw_nogpu
();
return
0.0
;
}
...
...
@@ -89,18 +78,6 @@ void cv::gpu::cartToPolar(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, bool)
void
cv
::
gpu
::
cartToPolar
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
bool
,
const
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
polarToCart
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
bool
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
polarToCart
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
GpuMat
&
,
bool
,
const
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_not
(
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_not
(
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_or
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_or
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_and
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_and
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_xor
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_xor
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
~
(
const
GpuMat
&
)
{
throw_nogpu
();
return
GpuMat
();
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
|
(
const
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
return
GpuMat
();
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
&
(
const
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
return
GpuMat
();
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
^
(
const
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
return
GpuMat
();
}
void
cv
::
gpu
::
min
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
min
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
min
(
const
GpuMat
&
,
double
,
GpuMat
&
)
{
throw_nogpu
();
}
...
...
@@ -112,164 +89,6 @@ void cv::gpu::max(const GpuMat&, double, GpuMat&, const Stream&) { throw_nogpu()
#else
/* !defined (HAVE_CUDA) */
////////////////////////////////////////////////////////////////////////
// add subtract multiply divide
namespace
{
typedef
NppStatus
(
*
npp_arithm_8u_t
)(
const
Npp8u
*
pSrc1
,
int
nSrc1Step
,
const
Npp8u
*
pSrc2
,
int
nSrc2Step
,
Npp8u
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
int
nScaleFactor
);
typedef
NppStatus
(
*
npp_arithm_32s_t
)(
const
Npp32s
*
pSrc1
,
int
nSrc1Step
,
const
Npp32s
*
pSrc2
,
int
nSrc2Step
,
Npp32s
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
typedef
NppStatus
(
*
npp_arithm_32f_t
)(
const
Npp32f
*
pSrc1
,
int
nSrc1Step
,
const
Npp32f
*
pSrc2
,
int
nSrc2Step
,
Npp32f
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
void
nppArithmCaller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
npp_arithm_8u_t
npp_func_8uc1
,
npp_arithm_8u_t
npp_func_8uc4
,
npp_arithm_32s_t
npp_func_32sc1
,
npp_arithm_32f_t
npp_func_32fc1
)
{
CV_DbgAssert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
type
()
==
CV_8UC1
||
src1
.
type
()
==
CV_8UC4
||
src1
.
type
()
==
CV_32SC1
||
src1
.
type
()
==
CV_32FC1
);
dst
.
create
(
src1
.
size
(),
src1
.
type
()
);
NppiSize
sz
;
sz
.
width
=
src1
.
cols
;
sz
.
height
=
src1
.
rows
;
switch
(
src1
.
type
())
{
case
CV_8UC1
:
nppSafeCall
(
npp_func_8uc1
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
0
)
);
break
;
case
CV_8UC4
:
nppSafeCall
(
npp_func_8uc4
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
0
)
);
break
;
case
CV_32SC1
:
nppSafeCall
(
npp_func_32sc1
(
src1
.
ptr
<
Npp32s
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32s
>
(),
src2
.
step
,
dst
.
ptr
<
Npp32s
>
(),
dst
.
step
,
sz
)
);
break
;
case
CV_32FC1
:
nppSafeCall
(
npp_func_32fc1
(
src1
.
ptr
<
Npp32f
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32f
>
(),
src2
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
break
;
default:
CV_Assert
(
!
"Unsupported source type"
);
}
}
template
<
int
SCN
>
struct
NppArithmScalarFunc
;
template
<>
struct
NppArithmScalarFunc
<
1
>
{
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc
,
int
nSrcStep
,
Npp32f
nValue
,
Npp32f
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<>
struct
NppArithmScalarFunc
<
2
>
{
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32fc
*
pSrc
,
int
nSrcStep
,
Npp32fc
nValue
,
Npp32fc
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<
int
SCN
,
typename
NppArithmScalarFunc
<
SCN
>::
func_ptr
func
>
struct
NppArithmScalar
;
template
<
typename
NppArithmScalarFunc
<
1
>::
func_ptr
func
>
struct
NppArithmScalar
<
1
,
func
>
{
static
void
calc
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
dst
.
create
(
src
.
size
(),
src
.
type
());
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
(
Npp32f
)
sc
[
0
],
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
}
};
template
<
typename
NppArithmScalarFunc
<
2
>::
func_ptr
func
>
struct
NppArithmScalar
<
2
,
func
>
{
static
void
calc
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
dst
.
create
(
src
.
size
(),
src
.
type
());
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Npp32fc
nValue
;
nValue
.
re
=
(
Npp32f
)
sc
[
0
];
nValue
.
im
=
(
Npp32f
)
sc
[
1
];
nppSafeCall
(
func
(
src
.
ptr
<
Npp32fc
>
(),
src
.
step
,
nValue
,
dst
.
ptr
<
Npp32fc
>
(),
dst
.
step
,
sz
)
);
}
};
}
void
cv
::
gpu
::
add
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
nppArithmCaller
(
src1
,
src2
,
dst
,
nppiAdd_8u_C1RSfs
,
nppiAdd_8u_C4RSfs
,
nppiAdd_32s_C1R
,
nppiAdd_32f_C1R
);
}
void
cv
::
gpu
::
subtract
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiSub_8u_C1RSfs
,
nppiSub_8u_C4RSfs
,
nppiSub_32s_C1R
,
nppiSub_32f_C1R
);
}
void
cv
::
gpu
::
multiply
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
nppArithmCaller
(
src1
,
src2
,
dst
,
nppiMul_8u_C1RSfs
,
nppiMul_8u_C4RSfs
,
nppiMul_32s_C1R
,
nppiMul_32f_C1R
);
}
void
cv
::
gpu
::
divide
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiDiv_8u_C1RSfs
,
nppiDiv_8u_C4RSfs
,
nppiDiv_32s_C1R
,
nppiDiv_32f_C1R
);
}
void
cv
::
gpu
::
add
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
typedef
void
(
*
caller_t
)(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
);
static
const
caller_t
callers
[]
=
{
0
,
NppArithmScalar
<
1
,
nppiAddC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiAddC_32fc_C1R
>::
calc
};
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
callers
[
src
.
channels
()](
src
,
sc
,
dst
);
}
void
cv
::
gpu
::
subtract
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
typedef
void
(
*
caller_t
)(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
);
static
const
caller_t
callers
[]
=
{
0
,
NppArithmScalar
<
1
,
nppiSubC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiSubC_32fc_C1R
>::
calc
};
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
callers
[
src
.
channels
()](
src
,
sc
,
dst
);
}
void
cv
::
gpu
::
multiply
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
typedef
void
(
*
caller_t
)(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
);
static
const
caller_t
callers
[]
=
{
0
,
NppArithmScalar
<
1
,
nppiMulC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiMulC_32fc_C1R
>::
calc
};
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
callers
[
src
.
channels
()](
src
,
sc
,
dst
);
}
void
cv
::
gpu
::
divide
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
typedef
void
(
*
caller_t
)(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
);
static
const
caller_t
callers
[]
=
{
0
,
NppArithmScalar
<
1
,
nppiDivC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiDivC_32fc_C1R
>::
calc
};
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
callers
[
src
.
channels
()](
src
,
sc
,
dst
);
}
////////////////////////////////////////////////////////////////////////
// transpose
...
...
@@ -299,112 +118,6 @@ void cv::gpu::transpose(const GpuMat& src, GpuMat& dst)
}
}
////////////////////////////////////////////////////////////////////////
// absdiff
void
cv
::
gpu
::
absdiff
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
CV_DbgAssert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
type
()
==
CV_8UC1
||
src1
.
type
()
==
CV_8UC4
||
src1
.
type
()
==
CV_32SC1
||
src1
.
type
()
==
CV_32FC1
);
dst
.
create
(
src1
.
size
(),
src1
.
type
()
);
NppiSize
sz
;
sz
.
width
=
src1
.
cols
;
sz
.
height
=
src1
.
rows
;
switch
(
src1
.
type
())
{
case
CV_8UC1
:
nppSafeCall
(
nppiAbsDiff_8u_C1R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
)
);
break
;
case
CV_8UC4
:
nppSafeCall
(
nppiAbsDiff_8u_C4R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
)
);
break
;
case
CV_32SC1
:
nppSafeCall
(
nppiAbsDiff_32s_C1R
(
src1
.
ptr
<
Npp32s
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32s
>
(),
src2
.
step
,
dst
.
ptr
<
Npp32s
>
(),
dst
.
step
,
sz
)
);
break
;
case
CV_32FC1
:
nppSafeCall
(
nppiAbsDiff_32f_C1R
(
src1
.
ptr
<
Npp32f
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32f
>
(),
src2
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
break
;
default
:
CV_Assert
(
!
"Unsupported source type"
);
}
}
void
cv
::
gpu
::
absdiff
(
const
GpuMat
&
src
,
const
Scalar
&
s
,
GpuMat
&
dst
)
{
CV_Assert
(
src
.
type
()
==
CV_32FC1
);
dst
.
create
(
src
.
size
(),
src
.
type
()
);
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiAbsDiffC_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
(
Npp32f
)
s
[
0
])
);
}
////////////////////////////////////////////////////////////////////////
// compare
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
{
void
compare_ne_8uc4
(
const
DevMem2D
&
src1
,
const
DevMem2D
&
src2
,
const
DevMem2D
&
dst
);
void
compare_ne_32f
(
const
DevMem2D
&
src1
,
const
DevMem2D
&
src2
,
const
DevMem2D
&
dst
);
}}}
void
cv
::
gpu
::
compare
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
int
cmpop
)
{
CV_DbgAssert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
type
()
==
CV_8UC4
||
src1
.
type
()
==
CV_32FC1
);
dst
.
create
(
src1
.
size
(),
CV_8UC1
);
static
const
NppCmpOp
nppCmpOp
[]
=
{
NPP_CMP_EQ
,
NPP_CMP_GREATER
,
NPP_CMP_GREATER_EQ
,
NPP_CMP_LESS
,
NPP_CMP_LESS_EQ
};
NppiSize
sz
;
sz
.
width
=
src1
.
cols
;
sz
.
height
=
src1
.
rows
;
if
(
src1
.
type
()
==
CV_8UC4
)
{
if
(
cmpop
!=
CMP_NE
)
{
nppSafeCall
(
nppiCompare_8u_C4R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppCmpOp
[
cmpop
])
);
}
else
{
mathfunc
::
compare_ne_8uc4
(
src1
,
src2
,
dst
);
}
}
else
{
if
(
cmpop
!=
CMP_NE
)
{
nppSafeCall
(
nppiCompare_32f_C1R
(
src1
.
ptr
<
Npp32f
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32f
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppCmpOp
[
cmpop
])
);
}
else
{
mathfunc
::
compare_ne_32f
(
src1
,
src2
,
dst
);
}
}
}
////////////////////////////////////////////////////////////////////////
// meanStdDev
...
...
@@ -997,249 +710,6 @@ void cv::gpu::polarToCart(const GpuMat& magnitude, const GpuMat& angle, GpuMat&
::
polarToCart_caller
(
magnitude
,
angle
,
x
,
y
,
angleInDegrees
,
StreamAccessor
::
getStream
(
stream
));
}
//////////////////////////////////////////////////////////////////////////////
// Per-element bit-wise logical matrix operations
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
{
void
bitwise_not_caller
(
int
rows
,
int
cols
,
int
elem_size1
,
int
cn
,
const
PtrStep
src
,
PtrStep
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
bitwise_mask_not_caller
(
int
rows
,
int
cols
,
int
cn
,
const
PtrStep
src
,
const
PtrStep
mask
,
PtrStep
dst
,
cudaStream_t
stream
);
void
bitwise_or_caller
(
int
rows
,
int
cols
,
int
elem_size1
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
PtrStep
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
bitwise_mask_or_caller
(
int
rows
,
int
cols
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
const
PtrStep
mask
,
PtrStep
dst
,
cudaStream_t
stream
);
void
bitwise_and_caller
(
int
rows
,
int
cols
,
int
elem_size1
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
PtrStep
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
bitwise_mask_and_caller
(
int
rows
,
int
cols
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
const
PtrStep
mask
,
PtrStep
dst
,
cudaStream_t
stream
);
void
bitwise_xor_caller
(
int
rows
,
int
cols
,
int
elem_size1
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
PtrStep
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
bitwise_mask_xor_caller
(
int
rows
,
int
cols
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
const
PtrStep
mask
,
PtrStep
dst
,
cudaStream_t
stream
);
}}}
namespace
{
void
bitwise_not_caller
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
dst
.
create
(
src
.
size
(),
src
.
type
());
cv
::
gpu
::
mathfunc
::
bitwise_not_caller
(
src
.
rows
,
src
.
cols
,
src
.
elemSize1
(),
dst
.
channels
(),
src
,
dst
,
stream
);
}
void
bitwise_not_caller
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
using
namespace
cv
::
gpu
;
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
static
Caller
callers
[]
=
{
mathfunc
::
bitwise_mask_not_caller
<
unsigned
char
>
,
mathfunc
::
bitwise_mask_not_caller
<
unsigned
char
>
,
mathfunc
::
bitwise_mask_not_caller
<
unsigned
short
>
,
mathfunc
::
bitwise_mask_not_caller
<
unsigned
short
>
,
mathfunc
::
bitwise_mask_not_caller
<
unsigned
int
>
,
mathfunc
::
bitwise_mask_not_caller
<
unsigned
int
>
,
mathfunc
::
bitwise_mask_not_caller
<
unsigned
int
>
};
CV_Assert
(
mask
.
type
()
==
CV_8U
&&
mask
.
size
()
==
src
.
size
());
dst
.
create
(
src
.
size
(),
src
.
type
());
Caller
caller
=
callers
[
src
.
depth
()];
CV_Assert
(
caller
);
int
cn
=
src
.
depth
()
!=
CV_64F
?
src
.
channels
()
:
src
.
channels
()
*
(
sizeof
(
double
)
/
sizeof
(
unsigned
int
));
caller
(
src
.
rows
,
src
.
cols
,
cn
,
src
,
mask
,
dst
,
stream
);
}
void
bitwise_or_caller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
cv
::
gpu
::
mathfunc
::
bitwise_or_caller
(
dst
.
rows
,
dst
.
cols
,
dst
.
elemSize1
(),
dst
.
channels
(),
src1
,
src2
,
dst
,
stream
);
}
void
bitwise_or_caller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
using
namespace
cv
::
gpu
;
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
static
Caller
callers
[]
=
{
mathfunc
::
bitwise_mask_or_caller
<
unsigned
char
>
,
mathfunc
::
bitwise_mask_or_caller
<
unsigned
char
>
,
mathfunc
::
bitwise_mask_or_caller
<
unsigned
short
>
,
mathfunc
::
bitwise_mask_or_caller
<
unsigned
short
>
,
mathfunc
::
bitwise_mask_or_caller
<
unsigned
int
>
,
mathfunc
::
bitwise_mask_or_caller
<
unsigned
int
>
,
mathfunc
::
bitwise_mask_or_caller
<
unsigned
int
>
};
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
Caller
caller
=
callers
[
src1
.
depth
()];
CV_Assert
(
caller
);
int
cn
=
dst
.
depth
()
!=
CV_64F
?
dst
.
channels
()
:
dst
.
channels
()
*
(
sizeof
(
double
)
/
sizeof
(
unsigned
int
));
caller
(
dst
.
rows
,
dst
.
cols
,
cn
,
src1
,
src2
,
mask
,
dst
,
stream
);
}
void
bitwise_and_caller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
cv
::
gpu
::
mathfunc
::
bitwise_and_caller
(
dst
.
rows
,
dst
.
cols
,
dst
.
elemSize1
(),
dst
.
channels
(),
src1
,
src2
,
dst
,
stream
);
}
void
bitwise_and_caller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
using
namespace
cv
::
gpu
;
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
static
Caller
callers
[]
=
{
mathfunc
::
bitwise_mask_and_caller
<
unsigned
char
>
,
mathfunc
::
bitwise_mask_and_caller
<
unsigned
char
>
,
mathfunc
::
bitwise_mask_and_caller
<
unsigned
short
>
,
mathfunc
::
bitwise_mask_and_caller
<
unsigned
short
>
,
mathfunc
::
bitwise_mask_and_caller
<
unsigned
int
>
,
mathfunc
::
bitwise_mask_and_caller
<
unsigned
int
>
,
mathfunc
::
bitwise_mask_and_caller
<
unsigned
int
>
};
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
Caller
caller
=
callers
[
src1
.
depth
()];
CV_Assert
(
caller
);
int
cn
=
dst
.
depth
()
!=
CV_64F
?
dst
.
channels
()
:
dst
.
channels
()
*
(
sizeof
(
double
)
/
sizeof
(
unsigned
int
));
caller
(
dst
.
rows
,
dst
.
cols
,
cn
,
src1
,
src2
,
mask
,
dst
,
stream
);
}
void
bitwise_xor_caller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
cv
::
gpu
::
mathfunc
::
bitwise_xor_caller
(
dst
.
rows
,
dst
.
cols
,
dst
.
elemSize1
(),
dst
.
channels
(),
src1
,
src2
,
dst
,
stream
);
}
void
bitwise_xor_caller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
using
namespace
cv
::
gpu
;
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
static
Caller
callers
[]
=
{
mathfunc
::
bitwise_mask_xor_caller
<
unsigned
char
>
,
mathfunc
::
bitwise_mask_xor_caller
<
unsigned
char
>
,
mathfunc
::
bitwise_mask_xor_caller
<
unsigned
short
>
,
mathfunc
::
bitwise_mask_xor_caller
<
unsigned
short
>
,
mathfunc
::
bitwise_mask_xor_caller
<
unsigned
int
>
,
mathfunc
::
bitwise_mask_xor_caller
<
unsigned
int
>
,
mathfunc
::
bitwise_mask_xor_caller
<
unsigned
int
>
};
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
Caller
caller
=
callers
[
src1
.
depth
()];
CV_Assert
(
caller
);
int
cn
=
dst
.
depth
()
!=
CV_64F
?
dst
.
channels
()
:
dst
.
channels
()
*
(
sizeof
(
double
)
/
sizeof
(
unsigned
int
));
caller
(
dst
.
rows
,
dst
.
cols
,
cn
,
src1
,
src2
,
mask
,
dst
,
stream
);
}
}
void
cv
::
gpu
::
bitwise_not
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
)
{
if
(
mask
.
empty
())
::
bitwise_not_caller
(
src
,
dst
,
0
);
else
::
bitwise_not_caller
(
src
,
dst
,
mask
,
0
);
}
void
cv
::
gpu
::
bitwise_not
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
)
{
if
(
mask
.
empty
())
::
bitwise_not_caller
(
src
,
dst
,
StreamAccessor
::
getStream
(
stream
));
else
::
bitwise_not_caller
(
src
,
dst
,
mask
,
StreamAccessor
::
getStream
(
stream
));
}
void
cv
::
gpu
::
bitwise_or
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
)
{
if
(
mask
.
empty
())
::
bitwise_or_caller
(
src1
,
src2
,
dst
,
0
);
else
::
bitwise_or_caller
(
src1
,
src2
,
dst
,
mask
,
0
);
}
void
cv
::
gpu
::
bitwise_or
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
)
{
if
(
mask
.
empty
())
::
bitwise_or_caller
(
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
));
else
::
bitwise_or_caller
(
src1
,
src2
,
dst
,
mask
,
StreamAccessor
::
getStream
(
stream
));
}
void
cv
::
gpu
::
bitwise_and
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
)
{
if
(
mask
.
empty
())
::
bitwise_and_caller
(
src1
,
src2
,
dst
,
0
);
else
::
bitwise_and_caller
(
src1
,
src2
,
dst
,
mask
,
0
);
}
void
cv
::
gpu
::
bitwise_and
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
)
{
if
(
mask
.
empty
())
::
bitwise_and_caller
(
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
));
else
::
bitwise_and_caller
(
src1
,
src2
,
dst
,
mask
,
StreamAccessor
::
getStream
(
stream
));
}
void
cv
::
gpu
::
bitwise_xor
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
)
{
if
(
mask
.
empty
())
::
bitwise_xor_caller
(
src1
,
src2
,
dst
,
0
);
else
::
bitwise_xor_caller
(
src1
,
src2
,
dst
,
mask
,
0
);
}
void
cv
::
gpu
::
bitwise_xor
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
)
{
if
(
mask
.
empty
())
::
bitwise_xor_caller
(
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
));
else
::
bitwise_xor_caller
(
src1
,
src2
,
dst
,
mask
,
StreamAccessor
::
getStream
(
stream
));
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
~
(
const
GpuMat
&
src
)
{
GpuMat
dst
;
bitwise_not
(
src
,
dst
);
return
dst
;
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
|
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
)
{
GpuMat
dst
;
bitwise_or
(
src1
,
src2
,
dst
);
return
dst
;
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
&
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
)
{
GpuMat
dst
;
bitwise_and
(
src1
,
src2
,
dst
);
return
dst
;
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
^
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
)
{
GpuMat
dst
;
bitwise_xor
(
src1
,
src2
,
dst
);
return
dst
;
}
//////////////////////////////////////////////////////////////////////////////
// min/max
...
...
modules/gpu/src/cuda/element_operations.cu
View file @
0465b89e
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/gpu/device/vecmath.hpp"
#include "transform.hpp"
#include "internal_shared.hpp"
using namespace cv::gpu;
using namespace cv::gpu::device;
namespace cv { namespace gpu { namespace mathfunc
{
//////////////////////////////////////////////////////////////////////////////////////
// Compare
template <typename T1, typename T2>
struct NotEqual
{
__device__ uchar operator()(const T1& src1, const T2& src2)
{
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
}
};
template <typename T1, typename T2>
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
NotEqual<T1, T2> op;
transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), dst, op, 0);
}
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
compare_ne<uint, uint>(src1, src2, dst);
}
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
compare_ne<float, float>(src1, src2, dst);
}
//////////////////////////////////////////////////////////////////////////
// Unary bitwise logical matrix operations
enum { UN_OP_NOT };
template <typename T, int opid>
struct UnOp;
template <typename T>
struct UnOp<T, UN_OP_NOT>
{
static __device__ T call(T v) { return ~v; }
};
template <int opid>
__global__ void bitwiseUnOpKernel(int rows, int width, const PtrStep src, PtrStep dst)
{
const int x = (blockDim.x * blockIdx.x + threadIdx.x) * 4;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (y < rows)
{
uchar* dst_ptr = dst.ptr(y) + x;
const uchar* src_ptr = src.ptr(y) + x;
if (x + sizeof(uint) - 1 < width)
{
*(uint*)dst_ptr = UnOp<uint, opid>::call(*(uint*)src_ptr);
}
else
{
const uchar* src_end = src.ptr(y) + width;
while (src_ptr < src_end)
{
*dst_ptr++ = UnOp<uchar, opid>::call(*src_ptr++);
}
}
}
}
template <int opid>
void bitwiseUnOp(int rows, int width, const PtrStep src, PtrStep dst,
cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(width, threads.x * sizeof(uint)),
divUp(rows, threads.y));
bitwiseUnOpKernel<opid><<<grid, threads>>>(rows, width, src, dst);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T, int opid>
__global__ void bitwiseUnOpKernel(int rows, int cols, int cn, const PtrStep src,
const PtrStep mask, PtrStep dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < cols && y < rows && mask.ptr(y)[x / cn])
{
T* dst_row = (T*)dst.ptr(y);
const T* src_row = (const T*)src.ptr(y);
dst_row[x] = UnOp<T, opid>::call(src_row[x]);
}
}
template <typename T, int opid>
void bitwiseUnOp(int rows, int cols, int cn, const PtrStep src,
const PtrStep mask, PtrStep dst, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
bitwiseUnOpKernel<T, opid><<<grid, threads>>>(rows, cols, cn, src, mask, dst);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
void bitwiseNotCaller(int rows, int cols, int elem_size1, int cn,
const PtrStep src, PtrStep dst, cudaStream_t stream)
{
bitwiseUnOp<UN_OP_NOT>(rows, cols * elem_size1 * cn, src, dst, stream);
}
template <typename T>
void bitwiseMaskNotCaller(int rows, int cols, int cn, const PtrStep src,
const PtrStep mask, PtrStep dst, cudaStream_t stream)
{
bitwiseUnOp<T, UN_OP_NOT>(rows, cols * cn, cn, src, mask, dst, stream);
}
template void bitwiseMaskNotCaller<uchar>(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwiseMaskNotCaller<ushort>(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwiseMaskNotCaller<uint>(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
//////////////////////////////////////////////////////////////////////////
// Binary bitwise logical matrix operations
enum { BIN_OP_OR, BIN_OP_AND, BIN_OP_XOR };
template <typename T, int opid>
struct BinOp;
template <typename T>
struct BinOp<T, BIN_OP_OR>
{
static __device__ T call(T a, T b) { return a | b; }
};
template <typename T>
struct BinOp<T, BIN_OP_AND>
{
static __device__ T call(T a, T b) { return a & b; }
};
template <typename T>
struct BinOp<T, BIN_OP_XOR>
{
static __device__ T call(T a, T b) { return a ^ b; }
};
template <int opid>
__global__ void bitwiseBinOpKernel(int rows, int width, const PtrStep src1,
const PtrStep src2, PtrStep dst)
{
const int x = (blockDim.x * blockIdx.x + threadIdx.x) * 4;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (y < rows)
{
uchar* dst_ptr = dst.ptr(y) + x;
const uchar* src1_ptr = src1.ptr(y) + x;
const uchar* src2_ptr = src2.ptr(y) + x;
if (x + sizeof(uint) - 1 < width)
{
*(uint*)dst_ptr = BinOp<uint, opid>::call(*(uint*)src1_ptr, *(uint*)src2_ptr);
}
else
{
const uchar* src1_end = src1.ptr(y) + width;
while (src1_ptr < src1_end)
{
*dst_ptr++ = BinOp<uchar, opid>::call(*src1_ptr++, *src2_ptr++);
}
}
}
}
template <int opid>
void bitwiseBinOp(int rows, int width, const PtrStep src1, const PtrStep src2,
PtrStep dst, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(width, threads.x * sizeof(uint)), divUp(rows, threads.y));
bitwiseBinOpKernel<opid><<<grid, threads>>>(rows, width, src1, src2, dst);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T, int opid>
__global__ void bitwiseBinOpKernel(
int rows, int cols, int cn, const PtrStep src1, const PtrStep src2,
const PtrStep mask, PtrStep dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < cols && y < rows && mask.ptr(y)[x / cn])
{
T* dst_row = (T*)dst.ptr(y);
const T* src1_row = (const T*)src1.ptr(y);
const T* src2_row = (const T*)src2.ptr(y);
dst_row[x] = BinOp<T, opid>::call(src1_row[x], src2_row[x]);
}
}
template <typename T, int opid>
void bitwiseBinOp(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2,
const PtrStep mask, PtrStep dst, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
bitwiseBinOpKernel<T, opid><<<grid, threads>>>(rows, cols, cn, src1, src2, mask, dst);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
void bitwiseOrCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1,
const PtrStep src2, PtrStep dst, cudaStream_t stream)
{
bitwiseBinOp<BIN_OP_OR>(rows, cols * elem_size1 * cn, src1, src2, dst, stream);
}
template <typename T>
void bitwiseMaskOrCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2,
const PtrStep mask, PtrStep dst, cudaStream_t stream)
{
bitwiseBinOp<T, BIN_OP_OR>(rows, cols * cn, cn, src1, src2, mask, dst, stream);
}
template void bitwiseMaskOrCaller<uchar>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwiseMaskOrCaller<ushort>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwiseMaskOrCaller<uint>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
void bitwiseAndCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1,
const PtrStep src2, PtrStep dst, cudaStream_t stream)
{
bitwiseBinOp<BIN_OP_AND>(rows, cols * elem_size1 * cn, src1, src2, dst, stream);
}
template <typename T>
void bitwiseMaskAndCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2,
const PtrStep mask, PtrStep dst, cudaStream_t stream)
{
bitwiseBinOp<T, BIN_OP_AND>(rows, cols * cn, cn, src1, src2, mask, dst, stream);
}
template void bitwiseMaskAndCaller<uchar>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwiseMaskAndCaller<ushort>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwiseMaskAndCaller<uint>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
void bitwiseXorCaller(int rows, int cols, int elem_size1, int cn, const PtrStep src1,
const PtrStep src2, PtrStep dst, cudaStream_t stream)
{
bitwiseBinOp<BIN_OP_XOR>(rows, cols * elem_size1 * cn, src1, src2, dst, stream);
}
template <typename T>
void bitwiseMaskXorCaller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2,
const PtrStep mask, PtrStep dst, cudaStream_t stream)
{
bitwiseBinOp<T, BIN_OP_XOR>(rows, cols * cn, cn, src1, src2, mask, dst, stream);
}
template void bitwiseMaskXorCaller<uchar>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwiseMaskXorCaller<ushort>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwiseMaskXorCaller<uint>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
}}}
modules/gpu/src/cuda/mathfunc.cu
View file @
0465b89e
...
...
@@ -78,6 +78,29 @@ namespace cv { namespace gpu { namespace mathfunc
}
}
struct Mask8U
{
explicit Mask8U(PtrStep mask): mask(mask) {}
__device__ bool operator()(int y, int x) const
{
return mask.ptr(y)[x];
}
PtrStep mask;
};
struct MaskTrue
{
__device__ bool operator()(int y, int x) const
{
return true;
}
};
struct Nothing
{
static __device__ void calc(int, int, float, float, float*, size_t, float)
...
...
@@ -235,313 +258,6 @@ namespace cv { namespace gpu { namespace mathfunc
callers[mag.data == 0](mag, angle, x, y, angleInDegrees, stream);
}
//////////////////////////////////////////////////////////////////////////////////////
// Compare
template <typename T1, typename T2>
struct NotEqual
{
__device__ uchar operator()(const T1& src1, const T2& src2)
{
return static_cast<uchar>(static_cast<int>(src1 != src2) * 255);
}
};
template <typename T1, typename T2>
inline void compare_ne(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
NotEqual<T1, T2> op;
transform(static_cast< DevMem2D_<T1> >(src1), static_cast< DevMem2D_<T2> >(src2), dst, op, 0);
}
void compare_ne_8uc4(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
compare_ne<uint, uint>(src1, src2, dst);
}
void compare_ne_32f(const DevMem2D& src1, const DevMem2D& src2, const DevMem2D& dst)
{
compare_ne<float, float>(src1, src2, dst);
}
//////////////////////////////////////////////////////////////////////////////
// Per-element bit-wise logical matrix operations
struct Mask8U
{
explicit Mask8U(PtrStep mask): mask(mask) {}
__device__ bool operator()(int y, int x) const
{
return mask.ptr(y)[x];
}
PtrStep mask;
};
struct MaskTrue
{
__device__ bool operator()(int y, int x) const
{
return true;
}
};
//------------------------------------------------------------------------
// Unary operations
enum { UN_OP_NOT };
template <typename T, int opid>
struct UnOp;
template <typename T>
struct UnOp<T, UN_OP_NOT>
{
static __device__ T call(T v) { return ~v; }
};
template <int opid>
__global__ void bitwise_un_op_kernel(int rows, int width, const PtrStep src, PtrStep dst)
{
const int x = (blockDim.x * blockIdx.x + threadIdx.x) * 4;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (y < rows)
{
uchar* dst_ptr = dst.ptr(y) + x;
const uchar* src_ptr = src.ptr(y) + x;
if (x + sizeof(uint) - 1 < width)
{
*(uint*)dst_ptr = UnOp<uint, opid>::call(*(uint*)src_ptr);
}
else
{
const uchar* src_end = src.ptr(y) + width;
while (src_ptr < src_end)
{
*dst_ptr++ = UnOp<uchar, opid>::call(*src_ptr++);
}
}
}
}
template <int opid>
void bitwise_un_op(int rows, int width, const PtrStep src, PtrStep dst, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(width, threads.x * sizeof(uint)),
divUp(rows, threads.y));
bitwise_un_op_kernel<opid><<<grid, threads>>>(rows, width, src, dst);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T, int opid>
__global__ void bitwise_un_op_kernel(int rows, int cols, int cn, const PtrStep src, const PtrStep mask, PtrStep dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < cols && y < rows && mask.ptr(y)[x / cn])
{
T* dst_row = (T*)dst.ptr(y);
const T* src_row = (const T*)src.ptr(y);
dst_row[x] = UnOp<T, opid>::call(src_row[x]);
}
}
template <typename T, int opid>
void bitwise_un_op(int rows, int cols, int cn, const PtrStep src, const PtrStep mask, PtrStep dst, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
bitwise_un_op_kernel<T, opid><<<grid, threads>>>(rows, cols, cn, src, mask, dst);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
void bitwise_not_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src, PtrStep dst, cudaStream_t stream)
{
bitwise_un_op<UN_OP_NOT>(rows, cols * elem_size1 * cn, src, dst, stream);
}
template <typename T>
void bitwise_mask_not_caller(int rows, int cols, int cn, const PtrStep src, const PtrStep mask, PtrStep dst, cudaStream_t stream)
{
bitwise_un_op<T, UN_OP_NOT>(rows, cols * cn, cn, src, mask, dst, stream);
}
template void bitwise_mask_not_caller<uchar>(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwise_mask_not_caller<ushort>(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwise_mask_not_caller<uint>(int, int, int, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
//------------------------------------------------------------------------
// Binary operations
enum { BIN_OP_OR, BIN_OP_AND, BIN_OP_XOR };
template <typename T, int opid>
struct BinOp;
template <typename T>
struct BinOp<T, BIN_OP_OR>
{
static __device__ T call(T a, T b) { return a | b; }
};
template <typename T>
struct BinOp<T, BIN_OP_AND>
{
static __device__ T call(T a, T b) { return a & b; }
};
template <typename T>
struct BinOp<T, BIN_OP_XOR>
{
static __device__ T call(T a, T b) { return a ^ b; }
};
template <int opid>
__global__ void bitwise_bin_op_kernel(int rows, int width, const PtrStep src1, const PtrStep src2, PtrStep dst)
{
const int x = (blockDim.x * blockIdx.x + threadIdx.x) * 4;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (y < rows)
{
uchar* dst_ptr = dst.ptr(y) + x;
const uchar* src1_ptr = src1.ptr(y) + x;
const uchar* src2_ptr = src2.ptr(y) + x;
if (x + sizeof(uint) - 1 < width)
{
*(uint*)dst_ptr = BinOp<uint, opid>::call(*(uint*)src1_ptr, *(uint*)src2_ptr);
}
else
{
const uchar* src1_end = src1.ptr(y) + width;
while (src1_ptr < src1_end)
{
*dst_ptr++ = BinOp<uchar, opid>::call(*src1_ptr++, *src2_ptr++);
}
}
}
}
template <int opid>
void bitwise_bin_op(int rows, int width, const PtrStep src1, const PtrStep src2, PtrStep dst,
cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(width, threads.x * sizeof(uint)), divUp(rows, threads.y));
bitwise_bin_op_kernel<opid><<<grid, threads>>>(rows, width, src1, src2, dst);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
template <typename T, int opid>
__global__ void bitwise_bin_op_kernel(
int rows, int cols, int cn, const PtrStep src1, const PtrStep src2,
const PtrStep mask, PtrStep dst)
{
const int x = blockDim.x * blockIdx.x + threadIdx.x;
const int y = blockDim.y * blockIdx.y + threadIdx.y;
if (x < cols && y < rows && mask.ptr(y)[x / cn])
{
T* dst_row = (T*)dst.ptr(y);
const T* src1_row = (const T*)src1.ptr(y);
const T* src2_row = (const T*)src2.ptr(y);
dst_row[x] = BinOp<T, opid>::call(src1_row[x], src2_row[x]);
}
}
template <typename T, int opid>
void bitwise_bin_op(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2,
const PtrStep mask, PtrStep dst, cudaStream_t stream)
{
dim3 threads(16, 16);
dim3 grid(divUp(cols, threads.x), divUp(rows, threads.y));
bitwise_bin_op_kernel<T, opid><<<grid, threads>>>(rows, cols, cn, src1, src2, mask, dst);
if (stream == 0)
cudaSafeCall(cudaThreadSynchronize());
}
void bitwise_or_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream)
{
bitwise_bin_op<BIN_OP_OR>(rows, cols * elem_size1 * cn, src1, src2, dst, stream);
}
template <typename T>
void bitwise_mask_or_caller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream)
{
bitwise_bin_op<T, BIN_OP_OR>(rows, cols * cn, cn, src1, src2, mask, dst, stream);
}
template void bitwise_mask_or_caller<uchar>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwise_mask_or_caller<ushort>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwise_mask_or_caller<uint>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
void bitwise_and_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream)
{
bitwise_bin_op<BIN_OP_AND>(rows, cols * elem_size1 * cn, src1, src2, dst, stream);
}
template <typename T>
void bitwise_mask_and_caller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream)
{
bitwise_bin_op<T, BIN_OP_AND>(rows, cols * cn, cn, src1, src2, mask, dst, stream);
}
template void bitwise_mask_and_caller<uchar>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwise_mask_and_caller<ushort>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwise_mask_and_caller<uint>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
void bitwise_xor_caller(int rows, int cols, int elem_size1, int cn, const PtrStep src1, const PtrStep src2, PtrStep dst, cudaStream_t stream)
{
bitwise_bin_op<BIN_OP_XOR>(rows, cols * elem_size1 * cn, src1, src2, dst, stream);
}
template <typename T>
void bitwise_mask_xor_caller(int rows, int cols, int cn, const PtrStep src1, const PtrStep src2, const PtrStep mask, PtrStep dst, cudaStream_t stream)
{
bitwise_bin_op<T, BIN_OP_XOR>(rows, cols * cn, cn, src1, src2, mask, dst, stream);
}
template void bitwise_mask_xor_caller<uchar>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwise_mask_xor_caller<ushort>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
template void bitwise_mask_xor_caller<uint>(int, int, int, const PtrStep, const PtrStep, const PtrStep, PtrStep, cudaStream_t);
//////////////////////////////////////////////////////////////////////////////
// Min max
...
...
modules/gpu/src/element_operations.cpp
View file @
0465b89e
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or bpied warranties, including, but not limited to, the bpied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using
namespace
cv
;
using
namespace
cv
::
gpu
;
#if !defined (HAVE_CUDA)
void
cv
::
gpu
::
add
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
add
(
const
GpuMat
&
,
const
Scalar
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
subtract
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
subtract
(
const
GpuMat
&
,
const
Scalar
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
multiply
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
multiply
(
const
GpuMat
&
,
const
Scalar
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
divide
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
divide
(
const
GpuMat
&
,
const
Scalar
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
absdiff
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
absdiff
(
const
GpuMat
&
,
const
Scalar
&
,
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
compare
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
int
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_not
(
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_not
(
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_or
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_or
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_and
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_and
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_xor
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
bitwise_xor
(
const
GpuMat
&
,
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
,
const
Stream
&
)
{
throw_nogpu
();
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
~
(
const
GpuMat
&
)
{
throw_nogpu
();
return
GpuMat
();
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
|
(
const
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
return
GpuMat
();
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
&
(
const
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
return
GpuMat
();
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
^
(
const
GpuMat
&
,
const
GpuMat
&
)
{
throw_nogpu
();
return
GpuMat
();
}
#else
//////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////// Unary per-element operations /////////////////////////////////////////
// operation(GpuMat src, GpuMat dst)
////////////////////////////////////////////////////////////////////////
// Basic arithmetical operations (add subtract multiply divide)
namespace
{
typedef
NppStatus
(
*
npp_arithm_8u_t
)(
const
Npp8u
*
pSrc1
,
int
nSrc1Step
,
const
Npp8u
*
pSrc2
,
int
nSrc2Step
,
Npp8u
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
int
nScaleFactor
);
typedef
NppStatus
(
*
npp_arithm_32s_t
)(
const
Npp32s
*
pSrc1
,
int
nSrc1Step
,
const
Npp32s
*
pSrc2
,
int
nSrc2Step
,
Npp32s
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
typedef
NppStatus
(
*
npp_arithm_32f_t
)(
const
Npp32f
*
pSrc1
,
int
nSrc1Step
,
const
Npp32f
*
pSrc2
,
int
nSrc2Step
,
Npp32f
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
void
nppArithmCaller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
npp_arithm_8u_t
npp_func_8uc1
,
npp_arithm_8u_t
npp_func_8uc4
,
npp_arithm_32s_t
npp_func_32sc1
,
npp_arithm_32f_t
npp_func_32fc1
)
{
CV_DbgAssert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
type
()
==
CV_8UC1
||
src1
.
type
()
==
CV_8UC4
||
src1
.
type
()
==
CV_32SC1
||
src1
.
type
()
==
CV_32FC1
);
//////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////// Binary per-element operations ////////////////////////////////////////
// operation(GpuMat src1, GpuMat src2, GpuMat dst)
dst
.
create
(
src1
.
size
(),
src1
.
type
()
);
NppiSize
sz
;
sz
.
width
=
src1
.
cols
;
sz
.
height
=
src1
.
rows
;
switch
(
src1
.
type
())
{
case
CV_8UC1
:
nppSafeCall
(
npp_func_8uc1
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
0
)
);
break
;
case
CV_8UC4
:
nppSafeCall
(
npp_func_8uc4
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
0
)
);
break
;
case
CV_32SC1
:
nppSafeCall
(
npp_func_32sc1
(
src1
.
ptr
<
Npp32s
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32s
>
(),
src2
.
step
,
dst
.
ptr
<
Npp32s
>
(),
dst
.
step
,
sz
)
);
break
;
case
CV_32FC1
:
nppSafeCall
(
npp_func_32fc1
(
src1
.
ptr
<
Npp32f
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32f
>
(),
src2
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
break
;
default:
CV_Assert
(
!
"Unsupported source type"
);
}
}
template
<
int
SCN
>
struct
NppArithmScalarFunc
;
template
<>
struct
NppArithmScalarFunc
<
1
>
{
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc
,
int
nSrcStep
,
Npp32f
nValue
,
Npp32f
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<>
struct
NppArithmScalarFunc
<
2
>
{
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32fc
*
pSrc
,
int
nSrcStep
,
Npp32fc
nValue
,
Npp32fc
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<
int
SCN
,
typename
NppArithmScalarFunc
<
SCN
>::
func_ptr
func
>
struct
NppArithmScalar
;
template
<
typename
NppArithmScalarFunc
<
1
>::
func_ptr
func
>
struct
NppArithmScalar
<
1
,
func
>
{
static
void
calc
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
dst
.
create
(
src
.
size
(),
src
.
type
());
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
(
Npp32f
)
sc
[
0
],
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
}
};
template
<
typename
NppArithmScalarFunc
<
2
>::
func_ptr
func
>
struct
NppArithmScalar
<
2
,
func
>
{
static
void
calc
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
dst
.
create
(
src
.
size
(),
src
.
type
());
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Npp32fc
nValue
;
nValue
.
re
=
(
Npp32f
)
sc
[
0
];
nValue
.
im
=
(
Npp32f
)
sc
[
1
];
nppSafeCall
(
func
(
src
.
ptr
<
Npp32fc
>
(),
src
.
step
,
nValue
,
dst
.
ptr
<
Npp32fc
>
(),
dst
.
step
,
sz
)
);
}
};
}
void
cv
::
gpu
::
add
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
nppArithmCaller
(
src1
,
src2
,
dst
,
nppiAdd_8u_C1RSfs
,
nppiAdd_8u_C4RSfs
,
nppiAdd_32s_C1R
,
nppiAdd_32f_C1R
);
}
void
cv
::
gpu
::
subtract
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiSub_8u_C1RSfs
,
nppiSub_8u_C4RSfs
,
nppiSub_32s_C1R
,
nppiSub_32f_C1R
);
}
void
cv
::
gpu
::
multiply
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
nppArithmCaller
(
src1
,
src2
,
dst
,
nppiMul_8u_C1RSfs
,
nppiMul_8u_C4RSfs
,
nppiMul_32s_C1R
,
nppiMul_32f_C1R
);
}
void
cv
::
gpu
::
divide
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
nppArithmCaller
(
src2
,
src1
,
dst
,
nppiDiv_8u_C1RSfs
,
nppiDiv_8u_C4RSfs
,
nppiDiv_32s_C1R
,
nppiDiv_32f_C1R
);
}
void
cv
::
gpu
::
add
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
typedef
void
(
*
caller_t
)(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
);
static
const
caller_t
callers
[]
=
{
0
,
NppArithmScalar
<
1
,
nppiAddC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiAddC_32fc_C1R
>::
calc
};
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
callers
[
src
.
channels
()](
src
,
sc
,
dst
);
}
void
cv
::
gpu
::
subtract
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
typedef
void
(
*
caller_t
)(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
);
static
const
caller_t
callers
[]
=
{
0
,
NppArithmScalar
<
1
,
nppiSubC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiSubC_32fc_C1R
>::
calc
};
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
callers
[
src
.
channels
()](
src
,
sc
,
dst
);
}
void
cv
::
gpu
::
multiply
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
typedef
void
(
*
caller_t
)(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
);
static
const
caller_t
callers
[]
=
{
0
,
NppArithmScalar
<
1
,
nppiMulC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiMulC_32fc_C1R
>::
calc
};
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
callers
[
src
.
channels
()](
src
,
sc
,
dst
);
}
void
cv
::
gpu
::
divide
(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
)
{
typedef
void
(
*
caller_t
)(
const
GpuMat
&
src
,
const
Scalar
&
sc
,
GpuMat
&
dst
);
static
const
caller_t
callers
[]
=
{
0
,
NppArithmScalar
<
1
,
nppiDivC_32f_C1R
>::
calc
,
NppArithmScalar
<
2
,
nppiDivC_32fc_C1R
>::
calc
};
CV_Assert
(
src
.
type
()
==
CV_32FC1
||
src
.
type
()
==
CV_32FC2
);
callers
[
src
.
channels
()](
src
,
sc
,
dst
);
}
//////////////////////////////////////////////////////////////////////////////
// Absolute difference
void
cv
::
gpu
::
absdiff
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
)
{
CV_DbgAssert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
type
()
==
CV_8UC1
||
src1
.
type
()
==
CV_8UC4
||
src1
.
type
()
==
CV_32SC1
||
src1
.
type
()
==
CV_32FC1
);
dst
.
create
(
src1
.
size
(),
src1
.
type
()
);
NppiSize
sz
;
sz
.
width
=
src1
.
cols
;
sz
.
height
=
src1
.
rows
;
switch
(
src1
.
type
())
{
case
CV_8UC1
:
nppSafeCall
(
nppiAbsDiff_8u_C1R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
)
);
break
;
case
CV_8UC4
:
nppSafeCall
(
nppiAbsDiff_8u_C4R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
)
);
break
;
case
CV_32SC1
:
nppSafeCall
(
nppiAbsDiff_32s_C1R
(
src1
.
ptr
<
Npp32s
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32s
>
(),
src2
.
step
,
dst
.
ptr
<
Npp32s
>
(),
dst
.
step
,
sz
)
);
break
;
case
CV_32FC1
:
nppSafeCall
(
nppiAbsDiff_32f_C1R
(
src1
.
ptr
<
Npp32f
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32f
>
(),
src2
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
)
);
break
;
default
:
CV_Assert
(
!
"Unsupported source type"
);
}
}
void
cv
::
gpu
::
absdiff
(
const
GpuMat
&
src
,
const
Scalar
&
s
,
GpuMat
&
dst
)
{
CV_Assert
(
src
.
type
()
==
CV_32FC1
);
dst
.
create
(
src
.
size
(),
src
.
type
()
);
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
nppiAbsDiffC_32f_C1R
(
src
.
ptr
<
Npp32f
>
(),
src
.
step
,
dst
.
ptr
<
Npp32f
>
(),
dst
.
step
,
sz
,
(
Npp32f
)
s
[
0
])
);
}
//////////////////////////////////////////////////////////////////////////////
// Comparison of two matrixes
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
{
void
compare_ne_8uc4
(
const
DevMem2D
&
src1
,
const
DevMem2D
&
src2
,
const
DevMem2D
&
dst
);
void
compare_ne_32f
(
const
DevMem2D
&
src1
,
const
DevMem2D
&
src2
,
const
DevMem2D
&
dst
);
}}}
void
cv
::
gpu
::
compare
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
int
cmpop
)
{
CV_DbgAssert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
CV_Assert
(
src1
.
type
()
==
CV_8UC4
||
src1
.
type
()
==
CV_32FC1
);
dst
.
create
(
src1
.
size
(),
CV_8UC1
);
static
const
NppCmpOp
nppCmpOp
[]
=
{
NPP_CMP_EQ
,
NPP_CMP_GREATER
,
NPP_CMP_GREATER_EQ
,
NPP_CMP_LESS
,
NPP_CMP_LESS_EQ
};
NppiSize
sz
;
sz
.
width
=
src1
.
cols
;
sz
.
height
=
src1
.
rows
;
if
(
src1
.
type
()
==
CV_8UC4
)
{
if
(
cmpop
!=
CMP_NE
)
{
nppSafeCall
(
nppiCompare_8u_C4R
(
src1
.
ptr
<
Npp8u
>
(),
src1
.
step
,
src2
.
ptr
<
Npp8u
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppCmpOp
[
cmpop
])
);
}
else
{
mathfunc
::
compare_ne_8uc4
(
src1
,
src2
,
dst
);
}
}
else
{
if
(
cmpop
!=
CMP_NE
)
{
nppSafeCall
(
nppiCompare_32f_C1R
(
src1
.
ptr
<
Npp32f
>
(),
src1
.
step
,
src2
.
ptr
<
Npp32f
>
(),
src2
.
step
,
dst
.
ptr
<
Npp8u
>
(),
dst
.
step
,
sz
,
nppCmpOp
[
cmpop
])
);
}
else
{
mathfunc
::
compare_ne_32f
(
src1
,
src2
,
dst
);
}
}
}
//////////////////////////////////////////////////////////////////////////////
// Unary bitwise logical operations
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
{
void
bitwiseNotCaller
(
int
rows
,
int
cols
,
int
elem_size1
,
int
cn
,
const
PtrStep
src
,
PtrStep
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
bitwiseMaskNotCaller
(
int
rows
,
int
cols
,
int
cn
,
const
PtrStep
src
,
const
PtrStep
mask
,
PtrStep
dst
,
cudaStream_t
stream
);
}}}
namespace
{
void
bitwiseNotCaller
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
dst
.
create
(
src
.
size
(),
src
.
type
());
cv
::
gpu
::
mathfunc
::
bitwiseNotCaller
(
src
.
rows
,
src
.
cols
,
src
.
elemSize1
(),
dst
.
channels
(),
src
,
dst
,
stream
);
}
void
bitwiseNotCaller
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
using
namespace
cv
::
gpu
;
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
static
Caller
callers
[]
=
{
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskNotCaller
<
unsigned
int
>
};
CV_Assert
(
mask
.
type
()
==
CV_8U
&&
mask
.
size
()
==
src
.
size
());
dst
.
create
(
src
.
size
(),
src
.
type
());
Caller
caller
=
callers
[
src
.
depth
()];
CV_Assert
(
caller
);
int
cn
=
src
.
depth
()
!=
CV_64F
?
src
.
channels
()
:
src
.
channels
()
*
(
sizeof
(
double
)
/
sizeof
(
unsigned
int
));
caller
(
src
.
rows
,
src
.
cols
,
cn
,
src
,
mask
,
dst
,
stream
);
}
}
void
cv
::
gpu
::
bitwise_not
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
)
{
if
(
mask
.
empty
())
::
bitwiseNotCaller
(
src
,
dst
,
0
);
else
::
bitwiseNotCaller
(
src
,
dst
,
mask
,
0
);
}
void
cv
::
gpu
::
bitwise_not
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
)
{
if
(
mask
.
empty
())
::
bitwiseNotCaller
(
src
,
dst
,
StreamAccessor
::
getStream
(
stream
));
else
::
bitwiseNotCaller
(
src
,
dst
,
mask
,
StreamAccessor
::
getStream
(
stream
));
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
~
(
const
GpuMat
&
src
)
{
GpuMat
dst
;
bitwise_not
(
src
,
dst
);
return
dst
;
}
//////////////////////////////////////////////////////////////////////////////
// Binary bitwise logical operations
namespace
cv
{
namespace
gpu
{
namespace
mathfunc
{
void
bitwiseOrCaller
(
int
rows
,
int
cols
,
int
elem_size1
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
PtrStep
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
bitwiseMaskOrCaller
(
int
rows
,
int
cols
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
const
PtrStep
mask
,
PtrStep
dst
,
cudaStream_t
stream
);
void
bitwiseAndCaller
(
int
rows
,
int
cols
,
int
elem_size1
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
PtrStep
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
bitwiseMaskAndCaller
(
int
rows
,
int
cols
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
const
PtrStep
mask
,
PtrStep
dst
,
cudaStream_t
stream
);
void
bitwiseXorCaller
(
int
rows
,
int
cols
,
int
elem_size1
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
PtrStep
dst
,
cudaStream_t
stream
);
template
<
typename
T
>
void
bitwiseMaskXorCaller
(
int
rows
,
int
cols
,
int
cn
,
const
PtrStep
src1
,
const
PtrStep
src2
,
const
PtrStep
mask
,
PtrStep
dst
,
cudaStream_t
stream
);
}}}
namespace
{
void
bitwiseOrCaller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
cv
::
gpu
::
mathfunc
::
bitwiseOrCaller
(
dst
.
rows
,
dst
.
cols
,
dst
.
elemSize1
(),
dst
.
channels
(),
src1
,
src2
,
dst
,
stream
);
}
void
bitwiseOrCaller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
using
namespace
cv
::
gpu
;
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
static
Caller
callers
[]
=
{
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskOrCaller
<
unsigned
int
>
};
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
Caller
caller
=
callers
[
src1
.
depth
()];
CV_Assert
(
caller
);
int
cn
=
dst
.
depth
()
!=
CV_64F
?
dst
.
channels
()
:
dst
.
channels
()
*
(
sizeof
(
double
)
/
sizeof
(
unsigned
int
));
caller
(
dst
.
rows
,
dst
.
cols
,
cn
,
src1
,
src2
,
mask
,
dst
,
stream
);
}
void
bitwiseAndCaller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
cv
::
gpu
::
mathfunc
::
bitwiseAndCaller
(
dst
.
rows
,
dst
.
cols
,
dst
.
elemSize1
(),
dst
.
channels
(),
src1
,
src2
,
dst
,
stream
);
}
void
bitwiseAndCaller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
using
namespace
cv
::
gpu
;
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
static
Caller
callers
[]
=
{
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskAndCaller
<
unsigned
int
>
};
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
Caller
caller
=
callers
[
src1
.
depth
()];
CV_Assert
(
caller
);
int
cn
=
dst
.
depth
()
!=
CV_64F
?
dst
.
channels
()
:
dst
.
channels
()
*
(
sizeof
(
double
)
/
sizeof
(
unsigned
int
));
caller
(
dst
.
rows
,
dst
.
cols
,
cn
,
src1
,
src2
,
mask
,
dst
,
stream
);
}
void
bitwiseXorCaller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
cv
::
gpu
::
mathfunc
::
bitwiseXorCaller
(
dst
.
rows
,
dst
.
cols
,
dst
.
elemSize1
(),
dst
.
channels
(),
src1
,
src2
,
dst
,
stream
);
}
void
bitwiseXorCaller
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
using
namespace
cv
::
gpu
;
typedef
void
(
*
Caller
)(
int
,
int
,
int
,
const
PtrStep
,
const
PtrStep
,
const
PtrStep
,
PtrStep
,
cudaStream_t
);
static
Caller
callers
[]
=
{
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
char
>
,
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
short
>
,
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
int
>
,
mathfunc
::
bitwiseMaskXorCaller
<
unsigned
int
>
};
CV_Assert
(
src1
.
size
()
==
src2
.
size
()
&&
src1
.
type
()
==
src2
.
type
());
dst
.
create
(
src1
.
size
(),
src1
.
type
());
Caller
caller
=
callers
[
src1
.
depth
()];
CV_Assert
(
caller
);
int
cn
=
dst
.
depth
()
!=
CV_64F
?
dst
.
channels
()
:
dst
.
channels
()
*
(
sizeof
(
double
)
/
sizeof
(
unsigned
int
));
caller
(
dst
.
rows
,
dst
.
cols
,
cn
,
src1
,
src2
,
mask
,
dst
,
stream
);
}
}
void
cv
::
gpu
::
bitwise_or
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
)
{
if
(
mask
.
empty
())
::
bitwiseOrCaller
(
src1
,
src2
,
dst
,
0
);
else
::
bitwiseOrCaller
(
src1
,
src2
,
dst
,
mask
,
0
);
}
void
cv
::
gpu
::
bitwise_or
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
)
{
if
(
mask
.
empty
())
::
bitwiseOrCaller
(
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
));
else
::
bitwiseOrCaller
(
src1
,
src2
,
dst
,
mask
,
StreamAccessor
::
getStream
(
stream
));
}
void
cv
::
gpu
::
bitwise_and
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
)
{
if
(
mask
.
empty
())
::
bitwiseAndCaller
(
src1
,
src2
,
dst
,
0
);
else
::
bitwiseAndCaller
(
src1
,
src2
,
dst
,
mask
,
0
);
}
void
cv
::
gpu
::
bitwise_and
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
)
{
if
(
mask
.
empty
())
::
bitwiseAndCaller
(
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
));
else
::
bitwiseAndCaller
(
src1
,
src2
,
dst
,
mask
,
StreamAccessor
::
getStream
(
stream
));
}
void
cv
::
gpu
::
bitwise_xor
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
)
{
if
(
mask
.
empty
())
::
bitwiseXorCaller
(
src1
,
src2
,
dst
,
0
);
else
::
bitwiseXorCaller
(
src1
,
src2
,
dst
,
mask
,
0
);
}
void
cv
::
gpu
::
bitwise_xor
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
const
Stream
&
stream
)
{
if
(
mask
.
empty
())
::
bitwiseXorCaller
(
src1
,
src2
,
dst
,
StreamAccessor
::
getStream
(
stream
));
else
::
bitwiseXorCaller
(
src1
,
src2
,
dst
,
mask
,
StreamAccessor
::
getStream
(
stream
));
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
|
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
)
{
GpuMat
dst
;
bitwise_or
(
src1
,
src2
,
dst
);
return
dst
;
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
&
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
)
{
GpuMat
dst
;
bitwise_and
(
src1
,
src2
,
dst
);
return
dst
;
}
cv
::
gpu
::
GpuMat
cv
::
gpu
::
operator
^
(
const
GpuMat
&
src1
,
const
GpuMat
&
src2
)
{
GpuMat
dst
;
bitwise_xor
(
src1
,
src2
,
dst
);
return
dst
;
}
#endif
\ No newline at end of file
tests/gpu/src/bitwise_oper.cpp
View file @
0465b89e
...
...
@@ -53,7 +53,7 @@ using namespace std;
struct
CV_GpuBitwiseTest
:
public
CvTest
{
CV_GpuBitwiseTest
()
:
CvTest
(
"GPU-BitwiseOpers"
,
"bitwiseMatOperators"
)
{}
CV_GpuBitwiseTest
()
:
CvTest
(
"GPU-BitwiseOpers
Test
"
,
"bitwiseMatOperators"
)
{}
void
run
(
int
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment