Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
db1178b5
Commit
db1178b5
authored
Apr 16, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
moved GpuMat implementation to separate file
parent
2153a148
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
1377 additions
and
1303 deletions
+1377
-1303
gpu.hpp
modules/core/include/opencv2/core/gpu.hpp
+175
-163
gpu.inl.hpp
modules/core/include/opencv2/core/gpu.inl.hpp
+90
-46
matrix_operations.cu
modules/core/src/cuda/matrix_operations.cu
+55
-73
matrix_operations.hpp
modules/core/src/cuda/matrix_operations.hpp
+57
-0
gpu.cpp
modules/core/src/gpu.cpp
+0
-1014
gpu_mat.cpp
modules/core/src/gpu_mat.cpp
+993
-0
gpu_stream.cpp
modules/core/src/gpu_stream.cpp
+7
-7
No files found.
modules/core/include/opencv2/core/gpu.hpp
View file @
db1178b5
...
...
@@ -53,6 +53,181 @@
namespace
cv
{
namespace
gpu
{
//////////////////////////////// GpuMat ///////////////////////////////
// Smart pointer for GPU memory with reference counting.
// Its interface is mostly similar with cv::Mat.
class
CV_EXPORTS
GpuMat
{
public
:
//! default constructor
GpuMat
();
//! constructs GpuMat of the specified size and type
GpuMat
(
int
rows
,
int
cols
,
int
type
);
GpuMat
(
Size
size
,
int
type
);
//! constucts GpuMat and fills it with the specified value _s
GpuMat
(
int
rows
,
int
cols
,
int
type
,
Scalar
s
);
GpuMat
(
Size
size
,
int
type
,
Scalar
s
);
//! copy constructor
GpuMat
(
const
GpuMat
&
m
);
//! constructor for GpuMat headers pointing to user-allocated data
GpuMat
(
int
rows
,
int
cols
,
int
type
,
void
*
data
,
size_t
step
=
Mat
::
AUTO_STEP
);
GpuMat
(
Size
size
,
int
type
,
void
*
data
,
size_t
step
=
Mat
::
AUTO_STEP
);
//! creates a GpuMat header for a part of the bigger matrix
GpuMat
(
const
GpuMat
&
m
,
Range
rowRange
,
Range
colRange
);
GpuMat
(
const
GpuMat
&
m
,
Rect
roi
);
//! builds GpuMat from Mat. Perfom blocking upload to device
explicit
GpuMat
(
const
Mat
&
m
);
//! destructor - calls release()
~
GpuMat
();
//! assignment operators
GpuMat
&
operator
=
(
const
GpuMat
&
m
);
//! allocates new GpuMat data unless the GpuMat already has specified size and type
void
create
(
int
rows
,
int
cols
,
int
type
);
void
create
(
Size
size
,
int
type
);
//! decreases reference counter, deallocate the data when reference counter reaches 0
void
release
();
//! swaps with other smart pointer
void
swap
(
GpuMat
&
mat
);
//! pefroms blocking upload data to GpuMat
void
upload
(
const
Mat
&
m
);
//! downloads data from device to host memory (Blocking calls)
void
download
(
Mat
&
m
)
const
;
//! returns deep copy of the GpuMat, i.e. the data is copied
GpuMat
clone
()
const
;
//! copies the GpuMat content to "m"
void
copyTo
(
GpuMat
&
m
)
const
;
//! copies those GpuMat elements to "m" that are marked with non-zero mask elements
void
copyTo
(
GpuMat
&
m
,
const
GpuMat
&
mask
)
const
;
//! sets some of the GpuMat elements to s, according to the mask
GpuMat
&
setTo
(
Scalar
s
,
const
GpuMat
&
mask
=
GpuMat
());
//! converts GpuMat to another datatype with optional scaling
void
convertTo
(
GpuMat
&
m
,
int
rtype
,
double
alpha
=
1
,
double
beta
=
0
)
const
;
void
assignTo
(
GpuMat
&
m
,
int
type
=-
1
)
const
;
//! returns pointer to y-th row
uchar
*
ptr
(
int
y
=
0
);
const
uchar
*
ptr
(
int
y
=
0
)
const
;
//! template version of the above method
template
<
typename
_Tp
>
_Tp
*
ptr
(
int
y
=
0
);
template
<
typename
_Tp
>
const
_Tp
*
ptr
(
int
y
=
0
)
const
;
template
<
typename
_Tp
>
operator
PtrStepSz
<
_Tp
>
()
const
;
template
<
typename
_Tp
>
operator
PtrStep
<
_Tp
>
()
const
;
//! returns a new GpuMat header for the specified row
GpuMat
row
(
int
y
)
const
;
//! returns a new GpuMat header for the specified column
GpuMat
col
(
int
x
)
const
;
//! ... for the specified row span
GpuMat
rowRange
(
int
startrow
,
int
endrow
)
const
;
GpuMat
rowRange
(
Range
r
)
const
;
//! ... for the specified column span
GpuMat
colRange
(
int
startcol
,
int
endcol
)
const
;
GpuMat
colRange
(
Range
r
)
const
;
//! extracts a rectangular sub-GpuMat (this is a generalized form of row, rowRange etc.)
GpuMat
operator
()(
Range
rowRange
,
Range
colRange
)
const
;
GpuMat
operator
()(
Rect
roi
)
const
;
//! creates alternative GpuMat header for the same data, with different
//! number of channels and/or different number of rows
GpuMat
reshape
(
int
cn
,
int
rows
=
0
)
const
;
//! locates GpuMat header within a parent GpuMat
void
locateROI
(
Size
&
wholeSize
,
Point
&
ofs
)
const
;
//! moves/resizes the current GpuMat ROI inside the parent GpuMat
GpuMat
&
adjustROI
(
int
dtop
,
int
dbottom
,
int
dleft
,
int
dright
);
//! returns true iff the GpuMat data is continuous
//! (i.e. when there are no gaps between successive rows)
bool
isContinuous
()
const
;
//! returns element size in bytes
size_t
elemSize
()
const
;
//! returns the size of element channel in bytes
size_t
elemSize1
()
const
;
//! returns element type
int
type
()
const
;
//! returns element type
int
depth
()
const
;
//! returns number of channels
int
channels
()
const
;
//! returns step/elemSize1()
size_t
step1
()
const
;
//! returns GpuMat size : width == number of columns, height == number of rows
Size
size
()
const
;
//! returns true if GpuMat data is NULL
bool
empty
()
const
;
/*! includes several bit-fields:
- the magic signature
- continuity flag
- depth
- number of channels
*/
int
flags
;
//! the number of rows and columns
int
rows
,
cols
;
//! a distance between successive rows in bytes; includes the gap if any
size_t
step
;
//! pointer to the data
uchar
*
data
;
//! pointer to the reference counter;
//! when GpuMat points to user-allocated data, the pointer is NULL
int
*
refcount
;
//! helper fields used in locateROI and adjustROI
uchar
*
datastart
;
uchar
*
dataend
;
};
//! Creates continuous GPU matrix
CV_EXPORTS
void
createContinuous
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
m
);
//! Ensures that size of the given matrix is not less than (rows, cols) size
//! and matrix type is match specified one too
CV_EXPORTS
void
ensureSizeIsEnough
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
m
);
CV_EXPORTS
GpuMat
allocMatFromBuf
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
mat
);
//////////////////////////////// CudaMem ////////////////////////////////
// CudaMem is limited cv::Mat with page locked memory allocation.
// Page locked memory is only needed for async and faster coping to GPU.
...
...
@@ -289,169 +464,6 @@ CV_EXPORTS void printCudaDeviceInfo(int device);
CV_EXPORTS
void
printShortCudaDeviceInfo
(
int
device
);
//////////////////////////////// GpuMat ///////////////////////////////
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
class
CV_EXPORTS
GpuMat
{
public
:
//! default constructor
GpuMat
();
//! constructs GpuMatrix of the specified size and type (_type is CV_8UC1, CV_64FC3, CV_32SC(12) etc.)
GpuMat
(
int
rows
,
int
cols
,
int
type
);
GpuMat
(
Size
size
,
int
type
);
//! constucts GpuMatrix and fills it with the specified value _s.
GpuMat
(
int
rows
,
int
cols
,
int
type
,
Scalar
s
);
GpuMat
(
Size
size
,
int
type
,
Scalar
s
);
//! copy constructor
GpuMat
(
const
GpuMat
&
m
);
//! constructor for GpuMatrix headers pointing to user-allocated data
GpuMat
(
int
rows
,
int
cols
,
int
type
,
void
*
data
,
size_t
step
=
Mat
::
AUTO_STEP
);
GpuMat
(
Size
size
,
int
type
,
void
*
data
,
size_t
step
=
Mat
::
AUTO_STEP
);
//! creates a matrix header for a part of the bigger matrix
GpuMat
(
const
GpuMat
&
m
,
Range
rowRange
,
Range
colRange
);
GpuMat
(
const
GpuMat
&
m
,
Rect
roi
);
//! builds GpuMat from Mat. Perfom blocking upload to device.
explicit
GpuMat
(
const
Mat
&
m
);
//! destructor - calls release()
~
GpuMat
();
//! assignment operators
GpuMat
&
operator
=
(
const
GpuMat
&
m
);
//! pefroms blocking upload data to GpuMat.
void
upload
(
const
Mat
&
m
);
//! downloads data from device to host memory. Blocking calls.
void
download
(
Mat
&
m
)
const
;
//! returns a new GpuMatrix header for the specified row
GpuMat
row
(
int
y
)
const
;
//! returns a new GpuMatrix header for the specified column
GpuMat
col
(
int
x
)
const
;
//! ... for the specified row span
GpuMat
rowRange
(
int
startrow
,
int
endrow
)
const
;
GpuMat
rowRange
(
Range
r
)
const
;
//! ... for the specified column span
GpuMat
colRange
(
int
startcol
,
int
endcol
)
const
;
GpuMat
colRange
(
Range
r
)
const
;
//! returns deep copy of the GpuMatrix, i.e. the data is copied
GpuMat
clone
()
const
;
//! copies the GpuMatrix content to "m".
// It calls m.create(this->size(), this->type()).
void
copyTo
(
GpuMat
&
m
)
const
;
//! copies those GpuMatrix elements to "m" that are marked with non-zero mask elements.
void
copyTo
(
GpuMat
&
m
,
const
GpuMat
&
mask
)
const
;
//! converts GpuMatrix to another datatype with optional scalng. See cvConvertScale.
void
convertTo
(
GpuMat
&
m
,
int
rtype
,
double
alpha
=
1
,
double
beta
=
0
)
const
;
void
assignTo
(
GpuMat
&
m
,
int
type
=-
1
)
const
;
//! sets every GpuMatrix element to s
GpuMat
&
operator
=
(
Scalar
s
);
//! sets some of the GpuMatrix elements to s, according to the mask
GpuMat
&
setTo
(
Scalar
s
,
const
GpuMat
&
mask
=
GpuMat
());
//! creates alternative GpuMatrix header for the same data, with different
// number of channels and/or different number of rows. see cvReshape.
GpuMat
reshape
(
int
cn
,
int
rows
=
0
)
const
;
//! allocates new GpuMatrix data unless the GpuMatrix already has specified size and type.
// previous data is unreferenced if needed.
void
create
(
int
rows
,
int
cols
,
int
type
);
void
create
(
Size
size
,
int
type
);
//! decreases reference counter;
// deallocate the data when reference counter reaches 0.
void
release
();
//! swaps with other smart pointer
void
swap
(
GpuMat
&
mat
);
//! locates GpuMatrix header within a parent GpuMatrix. See below
void
locateROI
(
Size
&
wholeSize
,
Point
&
ofs
)
const
;
//! moves/resizes the current GpuMatrix ROI inside the parent GpuMatrix.
GpuMat
&
adjustROI
(
int
dtop
,
int
dbottom
,
int
dleft
,
int
dright
);
//! extracts a rectangular sub-GpuMatrix
// (this is a generalized form of row, rowRange etc.)
GpuMat
operator
()(
Range
rowRange
,
Range
colRange
)
const
;
GpuMat
operator
()(
Rect
roi
)
const
;
//! returns true iff the GpuMatrix data is continuous
// (i.e. when there are no gaps between successive rows).
// similar to CV_IS_GpuMat_CONT(cvGpuMat->type)
bool
isContinuous
()
const
;
//! returns element size in bytes,
// similar to CV_ELEM_SIZE(cvMat->type)
size_t
elemSize
()
const
;
//! returns the size of element channel in bytes.
size_t
elemSize1
()
const
;
//! returns element type, similar to CV_MAT_TYPE(cvMat->type)
int
type
()
const
;
//! returns element type, similar to CV_MAT_DEPTH(cvMat->type)
int
depth
()
const
;
//! returns element type, similar to CV_MAT_CN(cvMat->type)
int
channels
()
const
;
//! returns step/elemSize1()
size_t
step1
()
const
;
//! returns GpuMatrix size:
// width == number of columns, height == number of rows
Size
size
()
const
;
//! returns true if GpuMatrix data is NULL
bool
empty
()
const
;
//! returns pointer to y-th row
uchar
*
ptr
(
int
y
=
0
);
const
uchar
*
ptr
(
int
y
=
0
)
const
;
//! template version of the above method
template
<
typename
_Tp
>
_Tp
*
ptr
(
int
y
=
0
);
template
<
typename
_Tp
>
const
_Tp
*
ptr
(
int
y
=
0
)
const
;
template
<
typename
_Tp
>
operator
PtrStepSz
<
_Tp
>
()
const
;
template
<
typename
_Tp
>
operator
PtrStep
<
_Tp
>
()
const
;
/*! includes several bit-fields:
- the magic signature
- continuity flag
- depth
- number of channels
*/
int
flags
;
//! the number of rows and columns
int
rows
,
cols
;
//! a distance between successive rows in bytes; includes the gap if any
size_t
step
;
//! pointer to the data
uchar
*
data
;
//! pointer to the reference counter;
// when GpuMatrix points to user-allocated data, the pointer is NULL
int
*
refcount
;
//! helper fields used in locateROI and adjustROI
uchar
*
datastart
;
uchar
*
dataend
;
};
//! Creates continuous GPU matrix
CV_EXPORTS
void
createContinuous
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
m
);
//! Ensures that size of the given matrix is not less than (rows, cols) size
//! and matrix type is match specified one too
CV_EXPORTS
void
ensureSizeIsEnough
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
m
);
CV_EXPORTS
GpuMat
allocMatFromBuf
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
mat
);
}}
// cv::gpu
#include "opencv2/core/gpu.inl.hpp"
...
...
modules/core/include/opencv2/core/gpu.inl.hpp
View file @
db1178b5
...
...
@@ -94,12 +94,58 @@ GpuMat::GpuMat(Size size_, int type_, Scalar s_)
}
}
inline
GpuMat
::
GpuMat
(
const
GpuMat
&
m
)
:
flags
(
m
.
flags
),
rows
(
m
.
rows
),
cols
(
m
.
cols
),
step
(
m
.
step
),
data
(
m
.
data
),
refcount
(
m
.
refcount
),
datastart
(
m
.
datastart
),
dataend
(
m
.
dataend
)
{
if
(
refcount
)
CV_XADD
(
refcount
,
1
);
}
inline
GpuMat
::
GpuMat
(
const
Mat
&
m
)
:
flags
(
0
),
rows
(
0
),
cols
(
0
),
step
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
)
{
upload
(
m
);
}
inline
GpuMat
::~
GpuMat
()
{
release
();
}
inline
GpuMat
&
GpuMat
::
operator
=
(
const
GpuMat
&
m
)
{
if
(
this
!=
&
m
)
{
GpuMat
temp
(
m
);
swap
(
temp
);
}
return
*
this
;
}
inline
void
GpuMat
::
create
(
Size
size_
,
int
type_
)
{
create
(
size_
.
height
,
size_
.
width
,
type_
);
}
inline
void
GpuMat
::
swap
(
GpuMat
&
b
)
{
std
::
swap
(
flags
,
b
.
flags
);
std
::
swap
(
rows
,
b
.
rows
);
std
::
swap
(
cols
,
b
.
cols
);
std
::
swap
(
step
,
b
.
step
);
std
::
swap
(
data
,
b
.
data
);
std
::
swap
(
datastart
,
b
.
datastart
);
std
::
swap
(
dataend
,
b
.
dataend
);
std
::
swap
(
refcount
,
b
.
refcount
);
}
inline
GpuMat
GpuMat
::
clone
()
const
{
...
...
@@ -118,15 +164,17 @@ void GpuMat::assignTo(GpuMat& m, int _type) const
}
inline
size_t
GpuMat
::
step1
()
const
uchar
*
GpuMat
::
ptr
(
int
y
)
{
return
step
/
elemSize1
();
CV_DbgAssert
(
(
unsigned
)
y
<
(
unsigned
)
rows
);
return
data
+
step
*
y
;
}
inline
bool
GpuMat
::
empty
(
)
const
const
uchar
*
GpuMat
::
ptr
(
int
y
)
const
{
return
data
==
0
;
CV_DbgAssert
(
(
unsigned
)
y
<
(
unsigned
)
rows
);
return
data
+
step
*
y
;
}
template
<
typename
_Tp
>
inline
...
...
@@ -141,6 +189,18 @@ const _Tp* GpuMat::ptr(int y) const
return
(
const
_Tp
*
)
ptr
(
y
);
}
template
<
class
T
>
inline
GpuMat
::
operator
PtrStepSz
<
T
>
()
const
{
return
PtrStepSz
<
T
>
(
rows
,
cols
,
(
T
*
)
data
,
step
);
}
template
<
class
T
>
inline
GpuMat
::
operator
PtrStep
<
T
>
()
const
{
return
PtrStep
<
T
>
((
T
*
)
data
,
step
);
}
inline
GpuMat
GpuMat
::
row
(
int
y
)
const
{
...
...
@@ -178,19 +238,13 @@ GpuMat GpuMat::colRange(Range r) const
}
inline
void
GpuMat
::
create
(
Size
size_
,
int
type_
)
{
create
(
size_
.
height
,
size_
.
width
,
type_
);
}
inline
GpuMat
GpuMat
::
operator
()(
Range
_rowRange
,
Range
_colRange
)
const
GpuMat
GpuMat
::
operator
()(
Range
rowRange_
,
Range
colRange_
)
const
{
return
GpuMat
(
*
this
,
_rowRange
,
_colRange
);
return
GpuMat
(
*
this
,
rowRange_
,
colRange_
);
}
inline
GpuMat
GpuMat
::
operator
()(
Rect
roi
)
const
GpuMat
GpuMat
::
operator
()(
Rect
roi
)
const
{
return
GpuMat
(
*
this
,
roi
);
}
...
...
@@ -232,48 +286,21 @@ int GpuMat::channels() const
}
inline
Size
GpuMat
::
size
()
const
{
return
Size
(
cols
,
rows
);
}
inline
uchar
*
GpuMat
::
ptr
(
int
y
)
size_t
GpuMat
::
step1
()
const
{
CV_DbgAssert
((
unsigned
)
y
<
(
unsigned
)
rows
);
return
data
+
step
*
y
;
return
step
/
elemSize1
();
}
inline
const
uchar
*
GpuMat
::
ptr
(
int
y
)
const
Size
GpuMat
::
size
(
)
const
{
CV_DbgAssert
((
unsigned
)
y
<
(
unsigned
)
rows
);
return
data
+
step
*
y
;
return
Size
(
cols
,
rows
);
}
inline
GpuMat
&
GpuMat
::
operator
=
(
Scalar
s
)
{
setTo
(
s
);
return
*
this
;
}
template
<
class
T
>
inline
GpuMat
::
operator
PtrStepSz
<
T
>
()
const
{
return
PtrStepSz
<
T
>
(
rows
,
cols
,
(
T
*
)
data
,
step
);
}
template
<
class
T
>
inline
GpuMat
::
operator
PtrStep
<
T
>
()
const
{
return
PtrStep
<
T
>
((
T
*
)
data
,
step
);
}
static
inline
void
swap
(
GpuMat
&
a
,
GpuMat
&
b
)
bool
GpuMat
::
empty
()
const
{
a
.
swap
(
b
)
;
return
data
==
0
;
}
static
inline
...
...
@@ -304,6 +331,23 @@ void ensureSizeIsEnough(Size size, int type, GpuMat& m)
ensureSizeIsEnough
(
size
.
height
,
size
.
width
,
type
,
m
);
}
static
inline
void
swap
(
GpuMat
&
a
,
GpuMat
&
b
)
{
a
.
swap
(
b
);
}
}}
// namespace cv { namespace gpu
namespace
cv
{
inline
Mat
::
Mat
(
const
gpu
::
GpuMat
&
m
)
:
flags
(
0
),
dims
(
0
),
rows
(
0
),
cols
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
),
datalimit
(
0
),
allocator
(
0
),
size
(
&
rows
)
{
m
.
download
(
*
this
);
}
}
#endif // __OPENCV_CORE_GPUINL_HPP__
modules/core/src/cuda/matrix_operations.cu
View file @
db1178b5
...
...
@@ -45,18 +45,7 @@
#include "opencv2/core/cuda/functional.hpp"
#include "opencv2/core/cuda/type_traits.hpp"
namespace cv { namespace gpu { namespace cudev
{
void writeScalar(const uchar*);
void writeScalar(const schar*);
void writeScalar(const ushort*);
void writeScalar(const short int*);
void writeScalar(const int*);
void writeScalar(const float*);
void writeScalar(const double*);
void copyToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream);
void convert_gpu(PtrStepSzb, int, PtrStepSzb, int, double, double, cudaStream_t);
}}}
#include "matrix_operations.hpp"
namespace cv { namespace gpu { namespace cudev
{
...
...
@@ -73,32 +62,33 @@ namespace cv { namespace gpu { namespace cudev
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
template <typename T> void copyToWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool colorMask, cudaStream_t stream)
template <typename T>
void copyWithMask(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool multiChannelMask, cudaStream_t stream)
{
if (
color
Mask)
if (
multiChannel
Mask)
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<T>)dst, identity<T>(), SingleMask(mask), stream);
else
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<T>)dst, identity<T>(), SingleMaskChannels(mask, cn), stream);
}
void copy
ToWithMask_gpu(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool color
Mask, cudaStream_t stream)
void copy
WithMask(PtrStepSzb src, PtrStepSzb dst, size_t elemSize1, int cn, PtrStepSzb mask, bool multiChannel
Mask, cudaStream_t stream)
{
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool
color
Mask, cudaStream_t stream);
typedef void (*func_t)(PtrStepSzb src, PtrStepSzb dst, int cn, PtrStepSzb mask, bool
multiChannel
Mask, cudaStream_t stream);
static func_t tab[] =
{
0,
copy
To
WithMask<unsigned char>,
copy
To
WithMask<unsigned short>,
copyWithMask<unsigned char>,
copyWithMask<unsigned short>,
0,
copy
To
WithMask<int>,
copyWithMask<int>,
0,
0,
0,
copy
To
WithMask<double>
copyWithMask<double>
};
tab[elemSize1](src, dst, cn, mask,
color
Mask, stream);
tab[elemSize1](src, dst, cn, mask,
multiChannel
Mask, stream);
}
///////////////////////////////////////////////////////////////////////////
...
...
@@ -122,37 +112,37 @@ namespace cv { namespace gpu { namespace cudev
template <> __device__ __forceinline__ float readScalar<float>(int i) {return scalar_32f[i];}
template <> __device__ __forceinline__ double readScalar<double>(int i) {return scalar_64f[i];}
void writeScalar(const uchar* vals)
static inline
void writeScalar(const uchar* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_8u, vals, sizeof(uchar) * 4) );
}
void writeScalar(const schar* vals)
static inline
void writeScalar(const schar* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_8s, vals, sizeof(schar) * 4) );
}
void writeScalar(const ushort* vals)
static inline
void writeScalar(const ushort* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_16u, vals, sizeof(ushort) * 4) );
}
void writeScalar(const short* vals)
static inline
void writeScalar(const short* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_16s, vals, sizeof(short) * 4) );
}
void writeScalar(const int* vals)
static inline
void writeScalar(const int* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_32s, vals, sizeof(int) * 4) );
}
void writeScalar(const float* vals)
static inline
void writeScalar(const float* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_32f, vals, sizeof(float) * 4) );
}
void writeScalar(const double* vals)
static inline
void writeScalar(const double* vals)
{
cudaSafeCall( cudaMemcpyToSymbol(scalar_64f, vals, sizeof(double) * 4) );
}
template<typename T>
__global__ void set
_to_without_mask
(T* mat, int cols, int rows, size_t step, int channels)
__global__ void set(T* mat, int cols, int rows, size_t step, int channels)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
...
...
@@ -164,64 +154,65 @@ namespace cv { namespace gpu { namespace cudev
}
}
template<typename T>
__global__ void set_to_with_mask(T* mat, const uchar* mask, int cols, int rows, size_t step, int channels, size_t step_mask)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
if ((x < cols * channels ) && (y < rows))
if (mask[y * step_mask + x / channels] != 0)
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = readScalar<T>(x % channels);
}
}
template <typename T>
void set
_to_gpu(PtrStepSzb mat, const T* scalar, PtrStepSzb mask
, int channels, cudaStream_t stream)
void set
(PtrStepSz<T> mat, const T* scalar
, int channels, cudaStream_t stream)
{
writeScalar(scalar);
dim3 threadsPerBlock(32, 8, 1);
dim3 numBlocks
(mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
dim3 numBlocks(mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
set
_to_with_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, (uchar*)mask.data, mat.cols, mat.rows, mat.step, channels, mask.step
);
set
<T><<<numBlocks, threadsPerBlock, 0, stream>>>(mat.data, mat.cols, mat.rows, mat.step, channels
);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall ( cudaDeviceSynchronize() );
}
template void set_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<schar >(PtrStepSzb mat, const schar* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<ushort>(PtrStepSzb mat, const ushort* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<short >(PtrStepSzb mat, const short* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<int >(PtrStepSzb mat, const int* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<float >(PtrStepSzb mat, const float* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set_to_gpu<double>(PtrStepSzb mat, const double* scalar, PtrStepSzb mask, int channels, cudaStream_t stream);
template void set<uchar >(PtrStepSz<uchar > mat, const uchar* scalar, int channels, cudaStream_t stream);
template void set<schar >(PtrStepSz<schar > mat, const schar* scalar, int channels, cudaStream_t stream);
template void set<ushort>(PtrStepSz<ushort> mat, const ushort* scalar, int channels, cudaStream_t stream);
template void set<short >(PtrStepSz<short > mat, const short* scalar, int channels, cudaStream_t stream);
template void set<int >(PtrStepSz<int > mat, const int* scalar, int channels, cudaStream_t stream);
template void set<float >(PtrStepSz<float > mat, const float* scalar, int channels, cudaStream_t stream);
template void set<double>(PtrStepSz<double> mat, const double* scalar, int channels, cudaStream_t stream);
template<typename T>
__global__ void set(T* mat, const uchar* mask, int cols, int rows, size_t step, int channels, size_t step_mask)
{
size_t x = blockIdx.x * blockDim.x + threadIdx.x;
size_t y = blockIdx.y * blockDim.y + threadIdx.y;
if ((x < cols * channels ) && (y < rows))
if (mask[y * step_mask + x / channels] != 0)
{
size_t idx = y * ( step >> shift_and_sizeof<T>::shift ) + x;
mat[idx] = readScalar<T>(x % channels);
}
}
template <typename T>
void set
_to_gpu(PtrStepSzb mat, const T* scalar
, int channels, cudaStream_t stream)
void set
(PtrStepSz<T> mat, const T* scalar, PtrStepSzb mask
, int channels, cudaStream_t stream)
{
writeScalar(scalar);
dim3 threadsPerBlock(32, 8, 1);
dim3 numBlocks
(mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
dim3 numBlocks(mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
set
_to_without_mask<T><<<numBlocks, threadsPerBlock, 0, stream>>>((T*)mat.data, mat.cols, mat.rows, mat.step, channels
);
set
<T><<<numBlocks, threadsPerBlock, 0, stream>>>(mat.data, mask.data, mat.cols, mat.rows, mat.step, channels, mask.step
);
cudaSafeCall( cudaGetLastError() );
if (stream == 0)
cudaSafeCall ( cudaDeviceSynchronize() );
}
template void set
_to_gpu<uchar >(PtrStepSzb mat, const uchar* scalar
, int channels, cudaStream_t stream);
template void set
_to_gpu<schar >(PtrStepSzb mat, const schar* scalar
, int channels, cudaStream_t stream);
template void set
_to_gpu<ushort>(PtrStepSzb mat, const ushort* scalar
, int channels, cudaStream_t stream);
template void set
_to_gpu<short >(PtrStepSzb mat, const short* scalar
, int channels, cudaStream_t stream);
template void set
_to_gpu<int >(PtrStepSzb mat, const int* scalar
, int channels, cudaStream_t stream);
template void set
_to_gpu<float >(PtrStepSzb mat, const float* scalar
, int channels, cudaStream_t stream);
template void set
_to_gpu<double>(PtrStepSzb mat, const double* scalar
, int channels, cudaStream_t stream);
template void set
<uchar >(PtrStepSz<uchar > mat, const uchar* scalar, PtrStepSzb mask
, int channels, cudaStream_t stream);
template void set
<schar >(PtrStepSz<schar > mat, const schar* scalar, PtrStepSzb mask
, int channels, cudaStream_t stream);
template void set
<ushort>(PtrStepSz<ushort> mat, const ushort* scalar, PtrStepSzb mask
, int channels, cudaStream_t stream);
template void set
<short >(PtrStepSz<short > mat, const short* scalar, PtrStepSzb mask
, int channels, cudaStream_t stream);
template void set
<int >(PtrStepSz<int > mat, const int* scalar, PtrStepSzb mask
, int channels, cudaStream_t stream);
template void set
<float >(PtrStepSz<float > mat, const float* scalar, PtrStepSzb mask
, int channels, cudaStream_t stream);
template void set
<double>(PtrStepSz<double> mat, const double* scalar, PtrStepSzb mask
, int channels, cudaStream_t stream);
///////////////////////////////////////////////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
...
...
@@ -296,12 +287,7 @@ namespace cv { namespace gpu { namespace cudev
cv::gpu::cudev::transform((PtrStepSz<T>)src, (PtrStepSz<D>)dst, op, WithOutMask(), stream);
}
#if defined __clang__
# pragma clang diagnostic push
# pragma clang diagnostic ignored "-Wmissing-declarations"
#endif
void convert_gpu(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream)
void convert(PtrStepSzb src, int sdepth, PtrStepSzb dst, int ddepth, double alpha, double beta, cudaStream_t stream)
{
typedef void (*caller_t)(PtrStepSzb src, PtrStepSzb dst, double alpha, double beta, cudaStream_t stream);
...
...
@@ -372,11 +358,7 @@ namespace cv { namespace gpu { namespace cudev
}
};
caller_t func = tab[sdepth][ddepth];
c
onst c
aller_t func = tab[sdepth][ddepth];
func(src, dst, alpha, beta, stream);
}
#if defined __clang__
# pragma clang diagnostic pop
#endif
}}} // namespace cv { namespace gpu { namespace cudev
modules/core/src/cuda/matrix_operations.hpp
0 → 100644
View file @
db1178b5
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "opencv2/core/cuda/common.hpp"
namespace
cv
{
namespace
gpu
{
namespace
cudev
{
void
copyWithMask
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
size_t
elemSize1
,
int
cn
,
PtrStepSzb
mask
,
bool
multiChannelMask
,
cudaStream_t
stream
);
template
<
typename
T
>
void
set
(
PtrStepSz
<
T
>
mat
,
const
T
*
scalar
,
int
channels
,
cudaStream_t
stream
);
template
<
typename
T
>
void
set
(
PtrStepSz
<
T
>
mat
,
const
T
*
scalar
,
PtrStepSzb
mask
,
int
channels
,
cudaStream_t
stream
);
void
convert
(
PtrStepSzb
src
,
int
sdepth
,
PtrStepSzb
dst
,
int
ddepth
,
double
alpha
,
double
beta
,
cudaStream_t
stream
);
}}}
modules/core/src/gpu.cpp
View file @
db1178b5
...
...
@@ -509,1020 +509,6 @@ void cv::gpu::printShortCudaDeviceInfo(int device)
#endif // HAVE_CUDA
//////////////////////////////// GpuMat ///////////////////////////////
cv
::
gpu
::
GpuMat
::
GpuMat
(
const
GpuMat
&
m
)
:
flags
(
m
.
flags
),
rows
(
m
.
rows
),
cols
(
m
.
cols
),
step
(
m
.
step
),
data
(
m
.
data
),
refcount
(
m
.
refcount
),
datastart
(
m
.
datastart
),
dataend
(
m
.
dataend
)
{
if
(
refcount
)
CV_XADD
(
refcount
,
1
);
}
cv
::
gpu
::
GpuMat
::
GpuMat
(
int
rows_
,
int
cols_
,
int
type_
,
void
*
data_
,
size_t
step_
)
:
flags
(
Mat
::
MAGIC_VAL
+
(
type_
&
Mat
::
TYPE_MASK
)),
rows
(
rows_
),
cols
(
cols_
),
step
(
step_
),
data
((
uchar
*
)
data_
),
refcount
(
0
),
datastart
((
uchar
*
)
data_
),
dataend
((
uchar
*
)
data_
)
{
size_t
minstep
=
cols
*
elemSize
();
if
(
step
==
Mat
::
AUTO_STEP
)
{
step
=
minstep
;
flags
|=
Mat
::
CONTINUOUS_FLAG
;
}
else
{
if
(
rows
==
1
)
step
=
minstep
;
CV_DbgAssert
(
step
>=
minstep
);
flags
|=
step
==
minstep
?
Mat
::
CONTINUOUS_FLAG
:
0
;
}
dataend
+=
step
*
(
rows
-
1
)
+
minstep
;
}
cv
::
gpu
::
GpuMat
::
GpuMat
(
Size
size_
,
int
type_
,
void
*
data_
,
size_t
step_
)
:
flags
(
Mat
::
MAGIC_VAL
+
(
type_
&
Mat
::
TYPE_MASK
)),
rows
(
size_
.
height
),
cols
(
size_
.
width
),
step
(
step_
),
data
((
uchar
*
)
data_
),
refcount
(
0
),
datastart
((
uchar
*
)
data_
),
dataend
((
uchar
*
)
data_
)
{
size_t
minstep
=
cols
*
elemSize
();
if
(
step
==
Mat
::
AUTO_STEP
)
{
step
=
minstep
;
flags
|=
Mat
::
CONTINUOUS_FLAG
;
}
else
{
if
(
rows
==
1
)
step
=
minstep
;
CV_DbgAssert
(
step
>=
minstep
);
flags
|=
step
==
minstep
?
Mat
::
CONTINUOUS_FLAG
:
0
;
}
dataend
+=
step
*
(
rows
-
1
)
+
minstep
;
}
cv
::
gpu
::
GpuMat
::
GpuMat
(
const
GpuMat
&
m
,
Range
_rowRange
,
Range
_colRange
)
{
flags
=
m
.
flags
;
step
=
m
.
step
;
refcount
=
m
.
refcount
;
data
=
m
.
data
;
datastart
=
m
.
datastart
;
dataend
=
m
.
dataend
;
if
(
_rowRange
==
Range
::
all
())
rows
=
m
.
rows
;
else
{
CV_Assert
(
0
<=
_rowRange
.
start
&&
_rowRange
.
start
<=
_rowRange
.
end
&&
_rowRange
.
end
<=
m
.
rows
);
rows
=
_rowRange
.
size
();
data
+=
step
*
_rowRange
.
start
;
}
if
(
_colRange
==
Range
::
all
())
cols
=
m
.
cols
;
else
{
CV_Assert
(
0
<=
_colRange
.
start
&&
_colRange
.
start
<=
_colRange
.
end
&&
_colRange
.
end
<=
m
.
cols
);
cols
=
_colRange
.
size
();
data
+=
_colRange
.
start
*
elemSize
();
flags
&=
cols
<
m
.
cols
?
~
Mat
::
CONTINUOUS_FLAG
:
-
1
;
}
if
(
rows
==
1
)
flags
|=
Mat
::
CONTINUOUS_FLAG
;
if
(
refcount
)
CV_XADD
(
refcount
,
1
);
if
(
rows
<=
0
||
cols
<=
0
)
rows
=
cols
=
0
;
}
cv
::
gpu
::
GpuMat
::
GpuMat
(
const
GpuMat
&
m
,
Rect
roi
)
:
flags
(
m
.
flags
),
rows
(
roi
.
height
),
cols
(
roi
.
width
),
step
(
m
.
step
),
data
(
m
.
data
+
roi
.
y
*
step
),
refcount
(
m
.
refcount
),
datastart
(
m
.
datastart
),
dataend
(
m
.
dataend
)
{
flags
&=
roi
.
width
<
m
.
cols
?
~
Mat
::
CONTINUOUS_FLAG
:
-
1
;
data
+=
roi
.
x
*
elemSize
();
CV_Assert
(
0
<=
roi
.
x
&&
0
<=
roi
.
width
&&
roi
.
x
+
roi
.
width
<=
m
.
cols
&&
0
<=
roi
.
y
&&
0
<=
roi
.
height
&&
roi
.
y
+
roi
.
height
<=
m
.
rows
);
if
(
refcount
)
CV_XADD
(
refcount
,
1
);
if
(
rows
<=
0
||
cols
<=
0
)
rows
=
cols
=
0
;
}
cv
::
gpu
::
GpuMat
::
GpuMat
(
const
Mat
&
m
)
:
flags
(
0
),
rows
(
0
),
cols
(
0
),
step
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
)
{
upload
(
m
);
}
GpuMat
&
cv
::
gpu
::
GpuMat
::
operator
=
(
const
GpuMat
&
m
)
{
if
(
this
!=
&
m
)
{
GpuMat
temp
(
m
);
swap
(
temp
);
}
return
*
this
;
}
void
cv
::
gpu
::
GpuMat
::
swap
(
GpuMat
&
b
)
{
std
::
swap
(
flags
,
b
.
flags
);
std
::
swap
(
rows
,
b
.
rows
);
std
::
swap
(
cols
,
b
.
cols
);
std
::
swap
(
step
,
b
.
step
);
std
::
swap
(
data
,
b
.
data
);
std
::
swap
(
datastart
,
b
.
datastart
);
std
::
swap
(
dataend
,
b
.
dataend
);
std
::
swap
(
refcount
,
b
.
refcount
);
}
void
cv
::
gpu
::
GpuMat
::
locateROI
(
Size
&
wholeSize
,
Point
&
ofs
)
const
{
size_t
esz
=
elemSize
();
ptrdiff_t
delta1
=
data
-
datastart
;
ptrdiff_t
delta2
=
dataend
-
datastart
;
CV_DbgAssert
(
step
>
0
);
if
(
delta1
==
0
)
ofs
.
x
=
ofs
.
y
=
0
;
else
{
ofs
.
y
=
static_cast
<
int
>
(
delta1
/
step
);
ofs
.
x
=
static_cast
<
int
>
((
delta1
-
step
*
ofs
.
y
)
/
esz
);
CV_DbgAssert
(
data
==
datastart
+
ofs
.
y
*
step
+
ofs
.
x
*
esz
);
}
size_t
minstep
=
(
ofs
.
x
+
cols
)
*
esz
;
wholeSize
.
height
=
std
::
max
(
static_cast
<
int
>
((
delta2
-
minstep
)
/
step
+
1
),
ofs
.
y
+
rows
);
wholeSize
.
width
=
std
::
max
(
static_cast
<
int
>
((
delta2
-
step
*
(
wholeSize
.
height
-
1
))
/
esz
),
ofs
.
x
+
cols
);
}
GpuMat
&
cv
::
gpu
::
GpuMat
::
adjustROI
(
int
dtop
,
int
dbottom
,
int
dleft
,
int
dright
)
{
Size
wholeSize
;
Point
ofs
;
locateROI
(
wholeSize
,
ofs
);
size_t
esz
=
elemSize
();
int
row1
=
std
::
max
(
ofs
.
y
-
dtop
,
0
);
int
row2
=
std
::
min
(
ofs
.
y
+
rows
+
dbottom
,
wholeSize
.
height
);
int
col1
=
std
::
max
(
ofs
.
x
-
dleft
,
0
);
int
col2
=
std
::
min
(
ofs
.
x
+
cols
+
dright
,
wholeSize
.
width
);
data
+=
(
row1
-
ofs
.
y
)
*
step
+
(
col1
-
ofs
.
x
)
*
esz
;
rows
=
row2
-
row1
;
cols
=
col2
-
col1
;
if
(
esz
*
cols
==
step
||
rows
==
1
)
flags
|=
Mat
::
CONTINUOUS_FLAG
;
else
flags
&=
~
Mat
::
CONTINUOUS_FLAG
;
return
*
this
;
}
GpuMat
cv
::
gpu
::
GpuMat
::
reshape
(
int
new_cn
,
int
new_rows
)
const
{
GpuMat
hdr
=
*
this
;
int
cn
=
channels
();
if
(
new_cn
==
0
)
new_cn
=
cn
;
int
total_width
=
cols
*
cn
;
if
((
new_cn
>
total_width
||
total_width
%
new_cn
!=
0
)
&&
new_rows
==
0
)
new_rows
=
rows
*
total_width
/
new_cn
;
if
(
new_rows
!=
0
&&
new_rows
!=
rows
)
{
int
total_size
=
total_width
*
rows
;
if
(
!
isContinuous
())
CV_Error
(
CV_BadStep
,
"The matrix is not continuous, thus its number of rows can not be changed"
);
if
((
unsigned
)
new_rows
>
(
unsigned
)
total_size
)
CV_Error
(
CV_StsOutOfRange
,
"Bad new number of rows"
);
total_width
=
total_size
/
new_rows
;
if
(
total_width
*
new_rows
!=
total_size
)
CV_Error
(
CV_StsBadArg
,
"The total number of matrix elements is not divisible by the new number of rows"
);
hdr
.
rows
=
new_rows
;
hdr
.
step
=
total_width
*
elemSize1
();
}
int
new_width
=
total_width
/
new_cn
;
if
(
new_width
*
new_cn
!=
total_width
)
CV_Error
(
CV_BadNumChannels
,
"The total width is not divisible by the new number of channels"
);
hdr
.
cols
=
new_width
;
hdr
.
flags
=
(
hdr
.
flags
&
~
CV_MAT_CN_MASK
)
|
((
new_cn
-
1
)
<<
CV_CN_SHIFT
);
return
hdr
;
}
cv
::
Mat
::
Mat
(
const
GpuMat
&
m
)
:
flags
(
0
),
dims
(
0
),
rows
(
0
),
cols
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
),
datalimit
(
0
),
allocator
(
0
),
size
(
&
rows
)
{
m
.
download
(
*
this
);
}
void
cv
::
gpu
::
createContinuous
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
m
)
{
int
area
=
rows
*
cols
;
if
(
m
.
empty
()
||
m
.
type
()
!=
type
||
!
m
.
isContinuous
()
||
m
.
size
().
area
()
<
area
)
m
.
create
(
1
,
area
,
type
);
m
.
cols
=
cols
;
m
.
rows
=
rows
;
m
.
step
=
m
.
elemSize
()
*
cols
;
m
.
flags
|=
Mat
::
CONTINUOUS_FLAG
;
}
void
cv
::
gpu
::
ensureSizeIsEnough
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
m
)
{
if
(
m
.
empty
()
||
m
.
type
()
!=
type
||
m
.
data
!=
m
.
datastart
)
m
.
create
(
rows
,
cols
,
type
);
else
{
const
size_t
esz
=
m
.
elemSize
();
const
ptrdiff_t
delta2
=
m
.
dataend
-
m
.
datastart
;
const
size_t
minstep
=
m
.
cols
*
esz
;
Size
wholeSize
;
wholeSize
.
height
=
std
::
max
(
static_cast
<
int
>
((
delta2
-
minstep
)
/
m
.
step
+
1
),
m
.
rows
);
wholeSize
.
width
=
std
::
max
(
static_cast
<
int
>
((
delta2
-
m
.
step
*
(
wholeSize
.
height
-
1
))
/
esz
),
m
.
cols
);
if
(
wholeSize
.
height
<
rows
||
wholeSize
.
width
<
cols
)
m
.
create
(
rows
,
cols
,
type
);
else
{
m
.
cols
=
cols
;
m
.
rows
=
rows
;
}
}
}
GpuMat
cv
::
gpu
::
allocMatFromBuf
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
mat
)
{
if
(
!
mat
.
empty
()
&&
mat
.
type
()
==
type
&&
mat
.
rows
>=
rows
&&
mat
.
cols
>=
cols
)
return
mat
(
Rect
(
0
,
0
,
cols
,
rows
));
return
mat
=
GpuMat
(
rows
,
cols
,
type
);
}
namespace
{
class
GpuFuncTable
{
public
:
virtual
~
GpuFuncTable
()
{}
virtual
void
copy
(
const
Mat
&
src
,
GpuMat
&
dst
)
const
=
0
;
virtual
void
copy
(
const
GpuMat
&
src
,
Mat
&
dst
)
const
=
0
;
virtual
void
copy
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
const
=
0
;
virtual
void
copyWithMask
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
)
const
=
0
;
virtual
void
convert
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
const
=
0
;
virtual
void
convert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
alpha
,
double
beta
)
const
=
0
;
virtual
void
setTo
(
GpuMat
&
m
,
Scalar
s
,
const
GpuMat
&
mask
)
const
=
0
;
virtual
void
mallocPitch
(
void
**
devPtr
,
size_t
*
step
,
size_t
width
,
size_t
height
)
const
=
0
;
virtual
void
free
(
void
*
devPtr
)
const
=
0
;
};
}
#ifndef HAVE_CUDA
namespace
{
class
EmptyFuncTable
:
public
GpuFuncTable
{
public
:
void
copy
(
const
Mat
&
,
GpuMat
&
)
const
{
throw_no_cuda
();
}
void
copy
(
const
GpuMat
&
,
Mat
&
)
const
{
throw_no_cuda
();
}
void
copy
(
const
GpuMat
&
,
GpuMat
&
)
const
{
throw_no_cuda
();
}
void
copyWithMask
(
const
GpuMat
&
,
GpuMat
&
,
const
GpuMat
&
)
const
{
throw_no_cuda
();
}
void
convert
(
const
GpuMat
&
,
GpuMat
&
)
const
{
throw_no_cuda
();
}
void
convert
(
const
GpuMat
&
,
GpuMat
&
,
double
,
double
)
const
{
throw_no_cuda
();
}
void
setTo
(
GpuMat
&
,
Scalar
,
const
GpuMat
&
)
const
{
throw_no_cuda
();
}
void
mallocPitch
(
void
**
,
size_t
*
,
size_t
,
size_t
)
const
{
throw_no_cuda
();
}
void
free
(
void
*
)
const
{}
};
const
GpuFuncTable
*
gpuFuncTable
()
{
static
EmptyFuncTable
empty
;
return
&
empty
;
}
}
#else // HAVE_CUDA
namespace
cv
{
namespace
gpu
{
namespace
cudev
{
void
copyToWithMask_gpu
(
PtrStepSzb
src
,
PtrStepSzb
dst
,
size_t
elemSize1
,
int
cn
,
PtrStepSzb
mask
,
bool
colorMask
,
cudaStream_t
stream
);
template
<
typename
T
>
void
set_to_gpu
(
PtrStepSzb
mat
,
const
T
*
scalar
,
int
channels
,
cudaStream_t
stream
);
template
<
typename
T
>
void
set_to_gpu
(
PtrStepSzb
mat
,
const
T
*
scalar
,
PtrStepSzb
mask
,
int
channels
,
cudaStream_t
stream
);
void
convert_gpu
(
PtrStepSzb
src
,
int
sdepth
,
PtrStepSzb
dst
,
int
ddepth
,
double
alpha
,
double
beta
,
cudaStream_t
stream
);
}}}
namespace
{
template
<
typename
T
>
void
kernelSetCaller
(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
)
{
Scalar_
<
T
>
sf
=
s
;
cv
::
gpu
::
cudev
::
set_to_gpu
(
src
,
sf
.
val
,
src
.
channels
(),
stream
);
}
template
<
typename
T
>
void
kernelSetCaller
(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
Scalar_
<
T
>
sf
=
s
;
cv
::
gpu
::
cudev
::
set_to_gpu
(
src
,
sf
.
val
,
mask
,
src
.
channels
(),
stream
);
}
}
namespace
cv
{
namespace
gpu
{
CV_EXPORTS
void
copyWithMask
(
const
cv
::
gpu
::
GpuMat
&
,
cv
::
gpu
::
GpuMat
&
,
const
cv
::
gpu
::
GpuMat
&
,
CUstream_st
*
);
CV_EXPORTS
void
convertTo
(
const
cv
::
gpu
::
GpuMat
&
,
cv
::
gpu
::
GpuMat
&
);
CV_EXPORTS
void
convertTo
(
const
cv
::
gpu
::
GpuMat
&
,
cv
::
gpu
::
GpuMat
&
,
double
,
double
,
CUstream_st
*
);
CV_EXPORTS
void
setTo
(
cv
::
gpu
::
GpuMat
&
,
cv
::
Scalar
,
CUstream_st
*
);
CV_EXPORTS
void
setTo
(
cv
::
gpu
::
GpuMat
&
,
cv
::
Scalar
,
const
cv
::
gpu
::
GpuMat
&
,
CUstream_st
*
);
CV_EXPORTS
void
setTo
(
cv
::
gpu
::
GpuMat
&
,
cv
::
Scalar
);
CV_EXPORTS
void
setTo
(
cv
::
gpu
::
GpuMat
&
,
cv
::
Scalar
,
const
cv
::
gpu
::
GpuMat
&
);
}}
namespace
cv
{
namespace
gpu
{
void
copyWithMask
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
=
0
)
{
CV_Assert
(
src
.
size
()
==
dst
.
size
()
&&
src
.
type
()
==
dst
.
type
());
CV_Assert
(
src
.
size
()
==
mask
.
size
()
&&
mask
.
depth
()
==
CV_8U
&&
(
mask
.
channels
()
==
1
||
mask
.
channels
()
==
src
.
channels
()));
cv
::
gpu
::
cudev
::
copyToWithMask_gpu
(
src
.
reshape
(
1
),
dst
.
reshape
(
1
),
src
.
elemSize1
(),
src
.
channels
(),
mask
.
reshape
(
1
),
mask
.
channels
()
!=
1
,
stream
);
}
void
convertTo
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
cv
::
gpu
::
cudev
::
convert_gpu
(
src
.
reshape
(
1
),
src
.
depth
(),
dst
.
reshape
(
1
),
dst
.
depth
(),
1.0
,
0.0
,
0
);
}
void
convertTo
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
alpha
,
double
beta
,
cudaStream_t
stream
=
0
)
{
cv
::
gpu
::
cudev
::
convert_gpu
(
src
.
reshape
(
1
),
src
.
depth
(),
dst
.
reshape
(
1
),
dst
.
depth
(),
alpha
,
beta
,
stream
);
}
void
setTo
(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
)
{
typedef
void
(
*
caller_t
)(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
);
static
const
caller_t
callers
[]
=
{
kernelSetCaller
<
uchar
>
,
kernelSetCaller
<
schar
>
,
kernelSetCaller
<
ushort
>
,
kernelSetCaller
<
short
>
,
kernelSetCaller
<
int
>
,
kernelSetCaller
<
float
>
,
kernelSetCaller
<
double
>
};
callers
[
src
.
depth
()](
src
,
s
,
stream
);
}
void
setTo
(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
typedef
void
(
*
caller_t
)(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
);
static
const
caller_t
callers
[]
=
{
kernelSetCaller
<
uchar
>
,
kernelSetCaller
<
schar
>
,
kernelSetCaller
<
ushort
>
,
kernelSetCaller
<
short
>
,
kernelSetCaller
<
int
>
,
kernelSetCaller
<
float
>
,
kernelSetCaller
<
double
>
};
callers
[
src
.
depth
()](
src
,
s
,
mask
,
stream
);
}
void
setTo
(
GpuMat
&
src
,
Scalar
s
)
{
setTo
(
src
,
s
,
0
);
}
void
setTo
(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
)
{
setTo
(
src
,
s
,
mask
,
0
);
}
}}
namespace
{
template
<
int
n
>
struct
NPPTypeTraits
;
template
<>
struct
NPPTypeTraits
<
CV_8U
>
{
typedef
Npp8u
npp_type
;
};
template
<>
struct
NPPTypeTraits
<
CV_8S
>
{
typedef
Npp8s
npp_type
;
};
template
<>
struct
NPPTypeTraits
<
CV_16U
>
{
typedef
Npp16u
npp_type
;
};
template
<>
struct
NPPTypeTraits
<
CV_16S
>
{
typedef
Npp16s
npp_type
;
};
template
<>
struct
NPPTypeTraits
<
CV_32S
>
{
typedef
Npp32s
npp_type
;
};
template
<>
struct
NPPTypeTraits
<
CV_32F
>
{
typedef
Npp32f
npp_type
;
};
template
<>
struct
NPPTypeTraits
<
CV_64F
>
{
typedef
Npp64f
npp_type
;
};
//////////////////////////////////////////////////////////////////////////
// Convert
template
<
int
SDEPTH
,
int
DDEPTH
>
struct
NppConvertFunc
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
dst_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<
int
DDEPTH
>
struct
NppConvertFunc
<
CV_32F
,
DDEPTH
>
{
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc
,
int
nSrcStep
,
dst_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
NppRoundMode
eRoundMode
);
};
template
<
int
SDEPTH
,
int
DDEPTH
,
typename
NppConvertFunc
<
SDEPTH
,
DDEPTH
>::
func_ptr
func
>
struct
NppCvt
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
static
void
call
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
dst
.
ptr
<
dst_t
>
(),
static_cast
<
int
>
(
dst
.
step
),
sz
)
);
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
DDEPTH
,
typename
NppConvertFunc
<
CV_32F
,
DDEPTH
>::
func_ptr
func
>
struct
NppCvt
<
CV_32F
,
DDEPTH
,
func
>
{
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
static
void
call
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
Npp32f
>
(),
static_cast
<
int
>
(
src
.
step
),
dst
.
ptr
<
dst_t
>
(),
static_cast
<
int
>
(
dst
.
step
),
sz
,
NPP_RND_NEAR
)
);
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
//////////////////////////////////////////////////////////////////////////
// Set
template
<
int
SDEPTH
,
int
SCN
>
struct
NppSetFunc
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
values
[],
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
);
};
template
<
int
SDEPTH
>
struct
NppSetFunc
<
SDEPTH
,
1
>
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
src_t
val
,
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
);
};
template
<
int
SCN
>
struct
NppSetFunc
<
CV_8S
,
SCN
>
{
typedef
NppStatus
(
*
func_ptr
)(
Npp8s
values
[],
Npp8s
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
);
};
template
<>
struct
NppSetFunc
<
CV_8S
,
1
>
{
typedef
NppStatus
(
*
func_ptr
)(
Npp8s
val
,
Npp8s
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
);
};
template
<
int
SDEPTH
,
int
SCN
,
typename
NppSetFunc
<
SDEPTH
,
SCN
>::
func_ptr
func
>
struct
NppSet
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
GpuMat
&
src
,
Scalar
s
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
nppSafeCall
(
func
(
nppS
.
val
,
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
)
);
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
SDEPTH
,
typename
NppSetFunc
<
SDEPTH
,
1
>::
func_ptr
func
>
struct
NppSet
<
SDEPTH
,
1
,
func
>
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
GpuMat
&
src
,
Scalar
s
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
nppSafeCall
(
func
(
nppS
[
0
],
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
)
);
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
SDEPTH
,
int
SCN
>
struct
NppSetMaskFunc
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
values
[],
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
const
Npp8u
*
pMask
,
int
nMaskStep
);
};
template
<
int
SDEPTH
>
struct
NppSetMaskFunc
<
SDEPTH
,
1
>
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
src_t
val
,
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
const
Npp8u
*
pMask
,
int
nMaskStep
);
};
template
<
int
SDEPTH
,
int
SCN
,
typename
NppSetMaskFunc
<
SDEPTH
,
SCN
>::
func_ptr
func
>
struct
NppSetMask
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
nppSafeCall
(
func
(
nppS
.
val
,
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
,
mask
.
ptr
<
Npp8u
>
(),
static_cast
<
int
>
(
mask
.
step
))
);
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
SDEPTH
,
typename
NppSetMaskFunc
<
SDEPTH
,
1
>::
func_ptr
func
>
struct
NppSetMask
<
SDEPTH
,
1
,
func
>
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
nppSafeCall
(
func
(
nppS
[
0
],
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
,
mask
.
ptr
<
Npp8u
>
(),
static_cast
<
int
>
(
mask
.
step
))
);
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
//////////////////////////////////////////////////////////////////////////
// CopyMasked
template
<
int
SDEPTH
>
struct
NppCopyMaskedFunc
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
src_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
const
Npp8u
*
pMask
,
int
nMaskStep
);
};
template
<
int
SDEPTH
,
typename
NppCopyMaskedFunc
<
SDEPTH
>::
func_ptr
func
>
struct
NppCopyMasked
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
/*stream*/
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
dst
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
dst
.
step
),
sz
,
mask
.
ptr
<
Npp8u
>
(),
static_cast
<
int
>
(
mask
.
step
))
);
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
typename
T
>
static
inline
bool
isAligned
(
const
T
*
ptr
,
size_t
size
)
{
return
reinterpret_cast
<
size_t
>
(
ptr
)
%
size
==
0
;
}
//////////////////////////////////////////////////////////////////////////
// CudaFuncTable
class
CudaFuncTable
:
public
GpuFuncTable
{
public
:
void
copy
(
const
Mat
&
src
,
GpuMat
&
dst
)
const
{
cudaSafeCall
(
cudaMemcpy2D
(
dst
.
data
,
dst
.
step
,
src
.
data
,
src
.
step
,
src
.
cols
*
src
.
elemSize
(),
src
.
rows
,
cudaMemcpyHostToDevice
)
);
}
void
copy
(
const
GpuMat
&
src
,
Mat
&
dst
)
const
{
cudaSafeCall
(
cudaMemcpy2D
(
dst
.
data
,
dst
.
step
,
src
.
data
,
src
.
step
,
src
.
cols
*
src
.
elemSize
(),
src
.
rows
,
cudaMemcpyDeviceToHost
)
);
}
void
copy
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
const
{
cudaSafeCall
(
cudaMemcpy2D
(
dst
.
data
,
dst
.
step
,
src
.
data
,
src
.
step
,
src
.
cols
*
src
.
elemSize
(),
src
.
rows
,
cudaMemcpyDeviceToDevice
)
);
}
void
copyWithMask
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
)
const
{
CV_Assert
(
src
.
depth
()
<=
CV_64F
&&
src
.
channels
()
<=
4
);
CV_Assert
(
src
.
size
()
==
dst
.
size
()
&&
src
.
type
()
==
dst
.
type
());
CV_Assert
(
src
.
size
()
==
mask
.
size
()
&&
mask
.
depth
()
==
CV_8U
&&
(
mask
.
channels
()
==
1
||
mask
.
channels
()
==
src
.
channels
()));
if
(
src
.
depth
()
==
CV_64F
)
{
if
(
!
TargetArchs
::
builtWith
(
NATIVE_DOUBLE
)
||
!
DeviceInfo
().
supports
(
NATIVE_DOUBLE
))
CV_Error
(
CV_StsUnsupportedFormat
,
"The device doesn't support double"
);
}
typedef
void
(
*
func_t
)(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
4
]
=
{
/* 8U */
{
NppCopyMasked
<
CV_8U
,
nppiCopy_8u_C1MR
>::
call
,
cv
::
gpu
::
copyWithMask
,
NppCopyMasked
<
CV_8U
,
nppiCopy_8u_C3MR
>::
call
,
NppCopyMasked
<
CV_8U
,
nppiCopy_8u_C4MR
>::
call
},
/* 8S */
{
cv
::
gpu
::
copyWithMask
,
cv
::
gpu
::
copyWithMask
,
cv
::
gpu
::
copyWithMask
,
cv
::
gpu
::
copyWithMask
},
/* 16U */
{
NppCopyMasked
<
CV_16U
,
nppiCopy_16u_C1MR
>::
call
,
cv
::
gpu
::
copyWithMask
,
NppCopyMasked
<
CV_16U
,
nppiCopy_16u_C3MR
>::
call
,
NppCopyMasked
<
CV_16U
,
nppiCopy_16u_C4MR
>::
call
},
/* 16S */
{
NppCopyMasked
<
CV_16S
,
nppiCopy_16s_C1MR
>::
call
,
cv
::
gpu
::
copyWithMask
,
NppCopyMasked
<
CV_16S
,
nppiCopy_16s_C3MR
>::
call
,
NppCopyMasked
<
CV_16S
,
nppiCopy_16s_C4MR
>::
call
},
/* 32S */
{
NppCopyMasked
<
CV_32S
,
nppiCopy_32s_C1MR
>::
call
,
cv
::
gpu
::
copyWithMask
,
NppCopyMasked
<
CV_32S
,
nppiCopy_32s_C3MR
>::
call
,
NppCopyMasked
<
CV_32S
,
nppiCopy_32s_C4MR
>::
call
},
/* 32F */
{
NppCopyMasked
<
CV_32F
,
nppiCopy_32f_C1MR
>::
call
,
cv
::
gpu
::
copyWithMask
,
NppCopyMasked
<
CV_32F
,
nppiCopy_32f_C3MR
>::
call
,
NppCopyMasked
<
CV_32F
,
nppiCopy_32f_C4MR
>::
call
},
/* 64F */
{
cv
::
gpu
::
copyWithMask
,
cv
::
gpu
::
copyWithMask
,
cv
::
gpu
::
copyWithMask
,
cv
::
gpu
::
copyWithMask
}
};
const
func_t
func
=
mask
.
channels
()
==
src
.
channels
()
?
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
]
:
cv
::
gpu
::
copyWithMask
;
func
(
src
,
dst
,
mask
,
0
);
}
void
convert
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
const
{
typedef
void
(
*
func_t
)(
const
GpuMat
&
src
,
GpuMat
&
dst
);
static
const
func_t
funcs
[
7
][
7
][
4
]
=
{
{
/* 8U -> 8U */
{
0
,
0
,
0
,
0
},
/* 8U -> 8S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 8U -> 16U */
{
NppCvt
<
CV_8U
,
CV_16U
,
nppiConvert_8u16u_C1R
>::
call
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
NppCvt
<
CV_8U
,
CV_16U
,
nppiConvert_8u16u_C4R
>::
call
},
/* 8U -> 16S */
{
NppCvt
<
CV_8U
,
CV_16S
,
nppiConvert_8u16s_C1R
>::
call
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
NppCvt
<
CV_8U
,
CV_16S
,
nppiConvert_8u16s_C4R
>::
call
},
/* 8U -> 32S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 8U -> 32F */
{
NppCvt
<
CV_8U
,
CV_32F
,
nppiConvert_8u32f_C1R
>::
call
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 8U -> 64F */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
}
},
{
/* 8S -> 8U */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 8S -> 8S */
{
0
,
0
,
0
,
0
},
/* 8S -> 16U */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 8S -> 16S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 8S -> 32S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 8S -> 32F */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 8S -> 64F */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
}
},
{
/* 16U -> 8U */
{
NppCvt
<
CV_16U
,
CV_8U
,
nppiConvert_16u8u_C1R
>::
call
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
NppCvt
<
CV_16U
,
CV_8U
,
nppiConvert_16u8u_C4R
>::
call
},
/* 16U -> 8S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 16U -> 16U */
{
0
,
0
,
0
,
0
},
/* 16U -> 16S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 16U -> 32S */
{
NppCvt
<
CV_16U
,
CV_32S
,
nppiConvert_16u32s_C1R
>::
call
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 16U -> 32F */
{
NppCvt
<
CV_16U
,
CV_32F
,
nppiConvert_16u32f_C1R
>::
call
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 16U -> 64F */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
}
},
{
/* 16S -> 8U */
{
NppCvt
<
CV_16S
,
CV_8U
,
nppiConvert_16s8u_C1R
>::
call
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
NppCvt
<
CV_16S
,
CV_8U
,
nppiConvert_16s8u_C4R
>::
call
},
/* 16S -> 8S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 16S -> 16U */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 16S -> 16S */
{
0
,
0
,
0
,
0
},
/* 16S -> 32S */
{
NppCvt
<
CV_16S
,
CV_32S
,
nppiConvert_16s32s_C1R
>::
call
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 16S -> 32F */
{
NppCvt
<
CV_16S
,
CV_32F
,
nppiConvert_16s32f_C1R
>::
call
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 16S -> 64F */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
}
},
{
/* 32S -> 8U */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 32S -> 8S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 32S -> 16U */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 32S -> 16S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 32S -> 32S */
{
0
,
0
,
0
,
0
},
/* 32S -> 32F */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 32S -> 64F */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
}
},
{
/* 32F -> 8U */
{
NppCvt
<
CV_32F
,
CV_8U
,
nppiConvert_32f8u_C1R
>::
call
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 32F -> 8S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 32F -> 16U */
{
NppCvt
<
CV_32F
,
CV_16U
,
nppiConvert_32f16u_C1R
>::
call
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 32F -> 16S */
{
NppCvt
<
CV_32F
,
CV_16S
,
nppiConvert_32f16s_C1R
>::
call
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 32F -> 32S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 32F -> 32F */
{
0
,
0
,
0
,
0
},
/* 32F -> 64F */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
}
},
{
/* 64F -> 8U */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 64F -> 8S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 64F -> 16U */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 64F -> 16S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 64F -> 32S */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 64F -> 32F */
{
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
,
cv
::
gpu
::
convertTo
},
/* 64F -> 64F */
{
0
,
0
,
0
,
0
}
}
};
CV_Assert
(
src
.
depth
()
<=
CV_64F
&&
src
.
channels
()
<=
4
);
CV_Assert
(
dst
.
depth
()
<=
CV_64F
);
CV_Assert
(
src
.
size
()
==
dst
.
size
()
&&
src
.
channels
()
==
dst
.
channels
());
if
(
src
.
depth
()
==
CV_64F
||
dst
.
depth
()
==
CV_64F
)
{
if
(
!
TargetArchs
::
builtWith
(
NATIVE_DOUBLE
)
||
!
DeviceInfo
().
supports
(
NATIVE_DOUBLE
))
CV_Error
(
CV_StsUnsupportedFormat
,
"The device doesn't support double"
);
}
bool
aligned
=
isAligned
(
src
.
data
,
16
)
&&
isAligned
(
dst
.
data
,
16
);
if
(
!
aligned
)
{
cv
::
gpu
::
convertTo
(
src
,
dst
);
return
;
}
const
func_t
func
=
funcs
[
src
.
depth
()][
dst
.
depth
()][
src
.
channels
()
-
1
];
CV_DbgAssert
(
func
!=
0
);
func
(
src
,
dst
);
}
void
convert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
alpha
,
double
beta
)
const
{
CV_Assert
(
src
.
depth
()
<=
CV_64F
&&
src
.
channels
()
<=
4
);
CV_Assert
(
dst
.
depth
()
<=
CV_64F
);
if
(
src
.
depth
()
==
CV_64F
||
dst
.
depth
()
==
CV_64F
)
{
if
(
!
TargetArchs
::
builtWith
(
NATIVE_DOUBLE
)
||
!
DeviceInfo
().
supports
(
NATIVE_DOUBLE
))
CV_Error
(
CV_StsUnsupportedFormat
,
"The device doesn't support double"
);
}
cv
::
gpu
::
convertTo
(
src
,
dst
,
alpha
,
beta
);
}
void
setTo
(
GpuMat
&
m
,
Scalar
s
,
const
GpuMat
&
mask
)
const
{
if
(
mask
.
empty
())
{
if
(
s
[
0
]
==
0.0
&&
s
[
1
]
==
0.0
&&
s
[
2
]
==
0.0
&&
s
[
3
]
==
0.0
)
{
cudaSafeCall
(
cudaMemset2D
(
m
.
data
,
m
.
step
,
0
,
m
.
cols
*
m
.
elemSize
(),
m
.
rows
)
);
return
;
}
if
(
m
.
depth
()
==
CV_8U
)
{
int
cn
=
m
.
channels
();
if
(
cn
==
1
||
(
cn
==
2
&&
s
[
0
]
==
s
[
1
])
||
(
cn
==
3
&&
s
[
0
]
==
s
[
1
]
&&
s
[
0
]
==
s
[
2
])
||
(
cn
==
4
&&
s
[
0
]
==
s
[
1
]
&&
s
[
0
]
==
s
[
2
]
&&
s
[
0
]
==
s
[
3
]))
{
int
val
=
saturate_cast
<
uchar
>
(
s
[
0
]);
cudaSafeCall
(
cudaMemset2D
(
m
.
data
,
m
.
step
,
val
,
m
.
cols
*
m
.
elemSize
(),
m
.
rows
)
);
return
;
}
}
typedef
void
(
*
func_t
)(
GpuMat
&
src
,
Scalar
s
);
static
const
func_t
funcs
[
7
][
4
]
=
{
{
NppSet
<
CV_8U
,
1
,
nppiSet_8u_C1R
>::
call
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
NppSet
<
CV_8U
,
4
,
nppiSet_8u_C4R
>::
call
},
{
NppSet
<
CV_8S
,
1
,
nppiSet_8s_C1R
>::
call
,
NppSet
<
CV_8S
,
2
,
nppiSet_8s_C2R
>::
call
,
NppSet
<
CV_8S
,
3
,
nppiSet_8s_C3R
>::
call
,
NppSet
<
CV_8S
,
4
,
nppiSet_8s_C4R
>::
call
},
{
NppSet
<
CV_16U
,
1
,
nppiSet_16u_C1R
>::
call
,
NppSet
<
CV_16U
,
2
,
nppiSet_16u_C2R
>::
call
,
cv
::
gpu
::
setTo
,
NppSet
<
CV_16U
,
4
,
nppiSet_16u_C4R
>::
call
},
{
NppSet
<
CV_16S
,
1
,
nppiSet_16s_C1R
>::
call
,
NppSet
<
CV_16S
,
2
,
nppiSet_16s_C2R
>::
call
,
cv
::
gpu
::
setTo
,
NppSet
<
CV_16S
,
4
,
nppiSet_16s_C4R
>::
call
},
{
NppSet
<
CV_32S
,
1
,
nppiSet_32s_C1R
>::
call
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
NppSet
<
CV_32S
,
4
,
nppiSet_32s_C4R
>::
call
},
{
NppSet
<
CV_32F
,
1
,
nppiSet_32f_C1R
>::
call
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
NppSet
<
CV_32F
,
4
,
nppiSet_32f_C4R
>::
call
},
{
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
}
};
CV_Assert
(
m
.
depth
()
<=
CV_64F
&&
m
.
channels
()
<=
4
);
if
(
m
.
depth
()
==
CV_64F
)
{
if
(
!
TargetArchs
::
builtWith
(
NATIVE_DOUBLE
)
||
!
DeviceInfo
().
supports
(
NATIVE_DOUBLE
))
CV_Error
(
CV_StsUnsupportedFormat
,
"The device doesn't support double"
);
}
funcs
[
m
.
depth
()][
m
.
channels
()
-
1
](
m
,
s
);
}
else
{
typedef
void
(
*
func_t
)(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
);
static
const
func_t
funcs
[
7
][
4
]
=
{
{
NppSetMask
<
CV_8U
,
1
,
nppiSet_8u_C1MR
>::
call
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
NppSetMask
<
CV_8U
,
4
,
nppiSet_8u_C4MR
>::
call
},
{
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
},
{
NppSetMask
<
CV_16U
,
1
,
nppiSet_16u_C1MR
>::
call
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
NppSetMask
<
CV_16U
,
4
,
nppiSet_16u_C4MR
>::
call
},
{
NppSetMask
<
CV_16S
,
1
,
nppiSet_16s_C1MR
>::
call
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
NppSetMask
<
CV_16S
,
4
,
nppiSet_16s_C4MR
>::
call
},
{
NppSetMask
<
CV_32S
,
1
,
nppiSet_32s_C1MR
>::
call
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
NppSetMask
<
CV_32S
,
4
,
nppiSet_32s_C4MR
>::
call
},
{
NppSetMask
<
CV_32F
,
1
,
nppiSet_32f_C1MR
>::
call
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
NppSetMask
<
CV_32F
,
4
,
nppiSet_32f_C4MR
>::
call
},
{
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
,
cv
::
gpu
::
setTo
}
};
CV_Assert
(
m
.
depth
()
<=
CV_64F
&&
m
.
channels
()
<=
4
);
if
(
m
.
depth
()
==
CV_64F
)
{
if
(
!
TargetArchs
::
builtWith
(
NATIVE_DOUBLE
)
||
!
DeviceInfo
().
supports
(
NATIVE_DOUBLE
))
CV_Error
(
CV_StsUnsupportedFormat
,
"The device doesn't support double"
);
}
funcs
[
m
.
depth
()][
m
.
channels
()
-
1
](
m
,
s
,
mask
);
}
}
void
mallocPitch
(
void
**
devPtr
,
size_t
*
step
,
size_t
width
,
size_t
height
)
const
{
cudaSafeCall
(
cudaMallocPitch
(
devPtr
,
step
,
width
,
height
)
);
}
void
free
(
void
*
devPtr
)
const
{
cudaFree
(
devPtr
);
}
};
const
GpuFuncTable
*
gpuFuncTable
()
{
static
CudaFuncTable
funcTable
;
return
&
funcTable
;
}
}
#endif // HAVE_CUDA
void
cv
::
gpu
::
GpuMat
::
upload
(
const
Mat
&
m
)
{
CV_DbgAssert
(
!
m
.
empty
());
create
(
m
.
size
(),
m
.
type
());
gpuFuncTable
()
->
copy
(
m
,
*
this
);
}
void
cv
::
gpu
::
GpuMat
::
download
(
Mat
&
m
)
const
{
CV_DbgAssert
(
!
empty
());
m
.
create
(
size
(),
type
());
gpuFuncTable
()
->
copy
(
*
this
,
m
);
}
void
cv
::
gpu
::
GpuMat
::
copyTo
(
GpuMat
&
m
)
const
{
CV_DbgAssert
(
!
empty
());
m
.
create
(
size
(),
type
());
gpuFuncTable
()
->
copy
(
*
this
,
m
);
}
void
cv
::
gpu
::
GpuMat
::
copyTo
(
GpuMat
&
mat
,
const
GpuMat
&
mask
)
const
{
if
(
mask
.
empty
())
copyTo
(
mat
);
else
{
mat
.
create
(
size
(),
type
());
gpuFuncTable
()
->
copyWithMask
(
*
this
,
mat
,
mask
);
}
}
void
cv
::
gpu
::
GpuMat
::
convertTo
(
GpuMat
&
dst
,
int
rtype
,
double
alpha
,
double
beta
)
const
{
bool
noScale
=
fabs
(
alpha
-
1
)
<
std
::
numeric_limits
<
double
>::
epsilon
()
&&
fabs
(
beta
)
<
std
::
numeric_limits
<
double
>::
epsilon
();
if
(
rtype
<
0
)
rtype
=
type
();
else
rtype
=
CV_MAKETYPE
(
CV_MAT_DEPTH
(
rtype
),
channels
());
int
sdepth
=
depth
();
int
ddepth
=
CV_MAT_DEPTH
(
rtype
);
if
(
sdepth
==
ddepth
&&
noScale
)
{
copyTo
(
dst
);
return
;
}
GpuMat
temp
;
const
GpuMat
*
psrc
=
this
;
if
(
sdepth
!=
ddepth
&&
psrc
==
&
dst
)
{
temp
=
*
this
;
psrc
=
&
temp
;
}
dst
.
create
(
size
(),
rtype
);
if
(
noScale
)
gpuFuncTable
()
->
convert
(
*
psrc
,
dst
);
else
gpuFuncTable
()
->
convert
(
*
psrc
,
dst
,
alpha
,
beta
);
}
GpuMat
&
cv
::
gpu
::
GpuMat
::
setTo
(
Scalar
s
,
const
GpuMat
&
mask
)
{
CV_Assert
(
mask
.
empty
()
||
mask
.
type
()
==
CV_8UC1
);
CV_DbgAssert
(
!
empty
());
gpuFuncTable
()
->
setTo
(
*
this
,
s
,
mask
);
return
*
this
;
}
void
cv
::
gpu
::
GpuMat
::
create
(
int
_rows
,
int
_cols
,
int
_type
)
{
_type
&=
Mat
::
TYPE_MASK
;
if
(
rows
==
_rows
&&
cols
==
_cols
&&
type
()
==
_type
&&
data
)
return
;
if
(
data
)
release
();
CV_DbgAssert
(
_rows
>=
0
&&
_cols
>=
0
);
if
(
_rows
>
0
&&
_cols
>
0
)
{
flags
=
Mat
::
MAGIC_VAL
+
_type
;
rows
=
_rows
;
cols
=
_cols
;
size_t
esz
=
elemSize
();
void
*
devPtr
;
gpuFuncTable
()
->
mallocPitch
(
&
devPtr
,
&
step
,
esz
*
cols
,
rows
);
// Single row must be continuous
if
(
rows
==
1
)
step
=
esz
*
cols
;
if
(
esz
*
cols
==
step
)
flags
|=
Mat
::
CONTINUOUS_FLAG
;
int64
_nettosize
=
static_cast
<
int64
>
(
step
)
*
rows
;
size_t
nettosize
=
static_cast
<
size_t
>
(
_nettosize
);
datastart
=
data
=
static_cast
<
uchar
*>
(
devPtr
);
dataend
=
data
+
nettosize
;
refcount
=
static_cast
<
int
*>
(
fastMalloc
(
sizeof
(
*
refcount
)));
*
refcount
=
1
;
}
}
void
cv
::
gpu
::
GpuMat
::
release
()
{
if
(
refcount
&&
CV_XADD
(
refcount
,
-
1
)
==
1
)
{
fastFree
(
refcount
);
gpuFuncTable
()
->
free
(
datastart
);
}
data
=
datastart
=
dataend
=
0
;
step
=
rows
=
cols
=
0
;
refcount
=
0
;
}
////////////////////////////////////////////////////////////////////////
// Error handling
...
...
modules/core/src/gpu_mat.cpp
0 → 100644
View file @
db1178b5
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using
namespace
cv
;
using
namespace
cv
::
gpu
;
/////////////////////////// matrix operations /////////////////////////
#ifdef HAVE_CUDA
// CUDA implementation
#include "cuda/matrix_operations.hpp"
namespace
{
template
<
typename
T
>
void
cudaSet_
(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
)
{
Scalar_
<
T
>
sf
=
s
;
cudev
::
set
<
T
>
(
PtrStepSz
<
T
>
(
src
),
sf
.
val
,
src
.
channels
(),
stream
);
}
void
cudaSet
(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
)
{
typedef
void
(
*
func_t
)(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
);
static
const
func_t
funcs
[]
=
{
cudaSet_
<
uchar
>
,
cudaSet_
<
schar
>
,
cudaSet_
<
ushort
>
,
cudaSet_
<
short
>
,
cudaSet_
<
int
>
,
cudaSet_
<
float
>
,
cudaSet_
<
double
>
};
funcs
[
src
.
depth
()](
src
,
s
,
stream
);
}
template
<
typename
T
>
void
cudaSet_
(
GpuMat
&
src
,
Scalar
s
,
PtrStepSzb
mask
,
cudaStream_t
stream
)
{
Scalar_
<
T
>
sf
=
s
;
cudev
::
set
<
T
>
(
PtrStepSz
<
T
>
(
src
),
sf
.
val
,
mask
,
src
.
channels
(),
stream
);
}
void
cudaSet
(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
typedef
void
(
*
func_t
)(
GpuMat
&
src
,
Scalar
s
,
PtrStepSzb
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs
[]
=
{
cudaSet_
<
uchar
>
,
cudaSet_
<
schar
>
,
cudaSet_
<
ushort
>
,
cudaSet_
<
short
>
,
cudaSet_
<
int
>
,
cudaSet_
<
float
>
,
cudaSet_
<
double
>
};
funcs
[
src
.
depth
()](
src
,
s
,
mask
,
stream
);
}
void
cudaCopyWithMask
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
cudev
::
copyWithMask
(
src
.
reshape
(
1
),
dst
.
reshape
(
1
),
src
.
elemSize1
(),
src
.
channels
(),
mask
.
reshape
(
1
),
mask
.
channels
()
!=
1
,
stream
);
}
void
cudaConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
cudev
::
convert
(
src
.
reshape
(
1
),
src
.
depth
(),
dst
.
reshape
(
1
),
dst
.
depth
(),
1.0
,
0.0
,
stream
);
}
void
cudaConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
alpha
,
double
beta
,
cudaStream_t
stream
)
{
cudev
::
convert
(
src
.
reshape
(
1
),
src
.
depth
(),
dst
.
reshape
(
1
),
dst
.
depth
(),
alpha
,
beta
,
stream
);
}
}
// NPP implementation
namespace
{
//////////////////////////////////////////////////////////////////////////
// Convert
template
<
int
SDEPTH
,
int
DDEPTH
>
struct
NppConvertFunc
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
dst_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
);
};
template
<
int
DDEPTH
>
struct
NppConvertFunc
<
CV_32F
,
DDEPTH
>
{
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
Npp32f
*
pSrc
,
int
nSrcStep
,
dst_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
NppRoundMode
eRoundMode
);
};
template
<
int
SDEPTH
,
int
DDEPTH
,
typename
NppConvertFunc
<
SDEPTH
,
DDEPTH
>::
func_ptr
func
>
struct
NppCvt
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
static
void
call
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
dst
.
ptr
<
dst_t
>
(),
static_cast
<
int
>
(
dst
.
step
),
sz
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
DDEPTH
,
typename
NppConvertFunc
<
CV_32F
,
DDEPTH
>::
func_ptr
func
>
struct
NppCvt
<
CV_32F
,
DDEPTH
,
func
>
{
typedef
typename
NPPTypeTraits
<
DDEPTH
>::
npp_type
dst_t
;
static
void
call
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
src
.
ptr
<
Npp32f
>
(),
static_cast
<
int
>
(
src
.
step
),
dst
.
ptr
<
dst_t
>
(),
static_cast
<
int
>
(
dst
.
step
),
sz
,
NPP_RND_NEAR
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
//////////////////////////////////////////////////////////////////////////
// Set
template
<
int
SDEPTH
,
int
SCN
>
struct
NppSetFunc
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
values
[],
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
);
};
template
<
int
SDEPTH
>
struct
NppSetFunc
<
SDEPTH
,
1
>
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
src_t
val
,
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
);
};
template
<
int
SCN
>
struct
NppSetFunc
<
CV_8S
,
SCN
>
{
typedef
NppStatus
(
*
func_ptr
)(
Npp8s
values
[],
Npp8s
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
);
};
template
<>
struct
NppSetFunc
<
CV_8S
,
1
>
{
typedef
NppStatus
(
*
func_ptr
)(
Npp8s
val
,
Npp8s
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
);
};
template
<
int
SDEPTH
,
int
SCN
,
typename
NppSetFunc
<
SDEPTH
,
SCN
>::
func_ptr
func
>
struct
NppSet
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
nppS
.
val
,
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
SDEPTH
,
typename
NppSetFunc
<
SDEPTH
,
1
>::
func_ptr
func
>
struct
NppSet
<
SDEPTH
,
1
,
func
>
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
nppS
[
0
],
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
)
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
SDEPTH
,
int
SCN
>
struct
NppSetMaskFunc
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
values
[],
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
const
Npp8u
*
pMask
,
int
nMaskStep
);
};
template
<
int
SDEPTH
>
struct
NppSetMaskFunc
<
SDEPTH
,
1
>
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
src_t
val
,
src_t
*
pSrc
,
int
nSrcStep
,
NppiSize
oSizeROI
,
const
Npp8u
*
pMask
,
int
nMaskStep
);
};
template
<
int
SDEPTH
,
int
SCN
,
typename
NppSetMaskFunc
<
SDEPTH
,
SCN
>::
func_ptr
func
>
struct
NppSetMask
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
nppS
.
val
,
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
,
mask
.
ptr
<
Npp8u
>
(),
static_cast
<
int
>
(
mask
.
step
))
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
template
<
int
SDEPTH
,
typename
NppSetMaskFunc
<
SDEPTH
,
1
>::
func_ptr
func
>
struct
NppSetMask
<
SDEPTH
,
1
,
func
>
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
Scalar_
<
src_t
>
nppS
=
s
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
nppS
[
0
],
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
sz
,
mask
.
ptr
<
Npp8u
>
(),
static_cast
<
int
>
(
mask
.
step
))
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
//////////////////////////////////////////////////////////////////////////
// CopyMasked
template
<
int
SDEPTH
>
struct
NppCopyWithMaskFunc
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
typedef
NppStatus
(
*
func_ptr
)(
const
src_t
*
pSrc
,
int
nSrcStep
,
src_t
*
pDst
,
int
nDstStep
,
NppiSize
oSizeROI
,
const
Npp8u
*
pMask
,
int
nMaskStep
);
};
template
<
int
SDEPTH
,
typename
NppCopyWithMaskFunc
<
SDEPTH
>::
func_ptr
func
>
struct
NppCopyWithMask
{
typedef
typename
NPPTypeTraits
<
SDEPTH
>::
npp_type
src_t
;
static
void
call
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
NppiSize
sz
;
sz
.
width
=
src
.
cols
;
sz
.
height
=
src
.
rows
;
NppStreamHandler
h
(
stream
);
nppSafeCall
(
func
(
src
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
src
.
step
),
dst
.
ptr
<
src_t
>
(),
static_cast
<
int
>
(
dst
.
step
),
sz
,
mask
.
ptr
<
Npp8u
>
(),
static_cast
<
int
>
(
mask
.
step
))
);
if
(
stream
==
0
)
cudaSafeCall
(
cudaDeviceSynchronize
()
);
}
};
}
// Dispatcher
namespace
cv
{
namespace
gpu
{
void
copyWithMask
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
=
0
);
void
convert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
cudaStream_t
stream
=
0
);
void
convert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
alpha
,
double
beta
,
cudaStream_t
stream
=
0
);
void
set
(
GpuMat
&
m
,
Scalar
s
,
cudaStream_t
stream
=
0
);
void
set
(
GpuMat
&
m
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
=
0
);
}}
namespace
cv
{
namespace
gpu
{
void
copyWithMask
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
CV_DbgAssert
(
src
.
size
()
==
dst
.
size
()
&&
src
.
type
()
==
dst
.
type
()
);
CV_Assert
(
src
.
depth
()
<=
CV_64F
&&
src
.
channels
()
<=
4
);
CV_Assert
(
src
.
size
()
==
mask
.
size
()
&&
mask
.
depth
()
==
CV_8U
&&
(
mask
.
channels
()
==
1
||
mask
.
channels
()
==
src
.
channels
())
);
if
(
src
.
depth
()
==
CV_64F
)
{
CV_Assert
(
deviceSupports
(
NATIVE_DOUBLE
)
);
}
typedef
void
(
*
func_t
)(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
4
]
=
{
/* 8U */
{
NppCopyWithMask
<
CV_8U
,
nppiCopy_8u_C1MR
>::
call
,
cudaCopyWithMask
,
NppCopyWithMask
<
CV_8U
,
nppiCopy_8u_C3MR
>::
call
,
NppCopyWithMask
<
CV_8U
,
nppiCopy_8u_C4MR
>::
call
},
/* 8S */
{
cudaCopyWithMask
,
cudaCopyWithMask
,
cudaCopyWithMask
,
cudaCopyWithMask
},
/* 16U */
{
NppCopyWithMask
<
CV_16U
,
nppiCopy_16u_C1MR
>::
call
,
cudaCopyWithMask
,
NppCopyWithMask
<
CV_16U
,
nppiCopy_16u_C3MR
>::
call
,
NppCopyWithMask
<
CV_16U
,
nppiCopy_16u_C4MR
>::
call
},
/* 16S */
{
NppCopyWithMask
<
CV_16S
,
nppiCopy_16s_C1MR
>::
call
,
cudaCopyWithMask
,
NppCopyWithMask
<
CV_16S
,
nppiCopy_16s_C3MR
>::
call
,
NppCopyWithMask
<
CV_16S
,
nppiCopy_16s_C4MR
>::
call
},
/* 32S */
{
NppCopyWithMask
<
CV_32S
,
nppiCopy_32s_C1MR
>::
call
,
cudaCopyWithMask
,
NppCopyWithMask
<
CV_32S
,
nppiCopy_32s_C3MR
>::
call
,
NppCopyWithMask
<
CV_32S
,
nppiCopy_32s_C4MR
>::
call
},
/* 32F */
{
NppCopyWithMask
<
CV_32F
,
nppiCopy_32f_C1MR
>::
call
,
cudaCopyWithMask
,
NppCopyWithMask
<
CV_32F
,
nppiCopy_32f_C3MR
>::
call
,
NppCopyWithMask
<
CV_32F
,
nppiCopy_32f_C4MR
>::
call
},
/* 64F */
{
cudaCopyWithMask
,
cudaCopyWithMask
,
cudaCopyWithMask
,
cudaCopyWithMask
}
};
const
func_t
func
=
mask
.
channels
()
==
src
.
channels
()
?
funcs
[
src
.
depth
()][
src
.
channels
()
-
1
]
:
cudaCopyWithMask
;
func
(
src
,
dst
,
mask
,
stream
);
}
void
convert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
cudaStream_t
stream
)
{
CV_DbgAssert
(
src
.
size
()
==
dst
.
size
()
&&
src
.
channels
()
==
dst
.
channels
()
);
CV_Assert
(
src
.
depth
()
<=
CV_64F
&&
src
.
channels
()
<=
4
);
CV_Assert
(
dst
.
depth
()
<=
CV_64F
);
if
(
src
.
depth
()
==
CV_64F
||
dst
.
depth
()
==
CV_64F
)
{
CV_Assert
(
deviceSupports
(
NATIVE_DOUBLE
)
);
}
typedef
void
(
*
func_t
)(
const
GpuMat
&
src
,
GpuMat
&
dst
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
7
][
4
]
=
{
{
/* 8U -> 8U */
{
0
,
0
,
0
,
0
},
/* 8U -> 8S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8U -> 16U */
{
NppCvt
<
CV_8U
,
CV_16U
,
nppiConvert_8u16u_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
NppCvt
<
CV_8U
,
CV_16U
,
nppiConvert_8u16u_C4R
>::
call
},
/* 8U -> 16S */
{
NppCvt
<
CV_8U
,
CV_16S
,
nppiConvert_8u16s_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
NppCvt
<
CV_8U
,
CV_16S
,
nppiConvert_8u16s_C4R
>::
call
},
/* 8U -> 32S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8U -> 32F */
{
NppCvt
<
CV_8U
,
CV_32F
,
nppiConvert_8u32f_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8U -> 64F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
}
},
{
/* 8S -> 8U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8S -> 8S */
{
0
,
0
,
0
,
0
},
/* 8S -> 16U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8S -> 16S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8S -> 32S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8S -> 32F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 8S -> 64F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
}
},
{
/* 16U -> 8U */
{
NppCvt
<
CV_16U
,
CV_8U
,
nppiConvert_16u8u_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
NppCvt
<
CV_16U
,
CV_8U
,
nppiConvert_16u8u_C4R
>::
call
},
/* 16U -> 8S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16U -> 16U */
{
0
,
0
,
0
,
0
},
/* 16U -> 16S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16U -> 32S */
{
NppCvt
<
CV_16U
,
CV_32S
,
nppiConvert_16u32s_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16U -> 32F */
{
NppCvt
<
CV_16U
,
CV_32F
,
nppiConvert_16u32f_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16U -> 64F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
}
},
{
/* 16S -> 8U */
{
NppCvt
<
CV_16S
,
CV_8U
,
nppiConvert_16s8u_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
NppCvt
<
CV_16S
,
CV_8U
,
nppiConvert_16s8u_C4R
>::
call
},
/* 16S -> 8S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16S -> 16U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16S -> 16S */
{
0
,
0
,
0
,
0
},
/* 16S -> 32S */
{
NppCvt
<
CV_16S
,
CV_32S
,
nppiConvert_16s32s_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16S -> 32F */
{
NppCvt
<
CV_16S
,
CV_32F
,
nppiConvert_16s32f_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 16S -> 64F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
}
},
{
/* 32S -> 8U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32S -> 8S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32S -> 16U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32S -> 16S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32S -> 32S */
{
0
,
0
,
0
,
0
},
/* 32S -> 32F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32S -> 64F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
}
},
{
/* 32F -> 8U */
{
NppCvt
<
CV_32F
,
CV_8U
,
nppiConvert_32f8u_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32F -> 8S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32F -> 16U */
{
NppCvt
<
CV_32F
,
CV_16U
,
nppiConvert_32f16u_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32F -> 16S */
{
NppCvt
<
CV_32F
,
CV_16S
,
nppiConvert_32f16s_C1R
>::
call
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32F -> 32S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 32F -> 32F */
{
0
,
0
,
0
,
0
},
/* 32F -> 64F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
}
},
{
/* 64F -> 8U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 64F -> 8S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 64F -> 16U */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 64F -> 16S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 64F -> 32S */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 64F -> 32F */
{
cudaConvert
,
cudaConvert
,
cudaConvert
,
cudaConvert
},
/* 64F -> 64F */
{
0
,
0
,
0
,
0
}
}
};
const
bool
aligned
=
isAligned
(
src
.
data
,
16
)
&&
isAligned
(
dst
.
data
,
16
);
if
(
!
aligned
)
{
cudaConvert
(
src
,
dst
,
stream
);
return
;
}
const
func_t
func
=
funcs
[
src
.
depth
()][
dst
.
depth
()][
src
.
channels
()
-
1
];
CV_DbgAssert
(
func
!=
0
);
func
(
src
,
dst
,
stream
);
}
void
convert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
alpha
,
double
beta
,
cudaStream_t
stream
)
{
CV_DbgAssert
(
src
.
size
()
==
dst
.
size
()
&&
src
.
channels
()
==
dst
.
channels
()
);
CV_Assert
(
src
.
depth
()
<=
CV_64F
&&
src
.
channels
()
<=
4
);
CV_Assert
(
dst
.
depth
()
<=
CV_64F
);
if
(
src
.
depth
()
==
CV_64F
||
dst
.
depth
()
==
CV_64F
)
{
CV_Assert
(
deviceSupports
(
NATIVE_DOUBLE
)
);
}
cudaConvert
(
src
,
dst
,
alpha
,
beta
,
stream
);
}
void
set
(
GpuMat
&
m
,
Scalar
s
,
cudaStream_t
stream
)
{
if
(
s
[
0
]
==
0.0
&&
s
[
1
]
==
0.0
&&
s
[
2
]
==
0.0
&&
s
[
3
]
==
0.0
)
{
if
(
stream
)
cudaSafeCall
(
cudaMemset2DAsync
(
m
.
data
,
m
.
step
,
0
,
m
.
cols
*
m
.
elemSize
(),
m
.
rows
,
stream
)
);
else
cudaSafeCall
(
cudaMemset2D
(
m
.
data
,
m
.
step
,
0
,
m
.
cols
*
m
.
elemSize
(),
m
.
rows
)
);
return
;
}
if
(
m
.
depth
()
==
CV_8U
)
{
int
cn
=
m
.
channels
();
if
(
cn
==
1
||
(
cn
==
2
&&
s
[
0
]
==
s
[
1
])
||
(
cn
==
3
&&
s
[
0
]
==
s
[
1
]
&&
s
[
0
]
==
s
[
2
])
||
(
cn
==
4
&&
s
[
0
]
==
s
[
1
]
&&
s
[
0
]
==
s
[
2
]
&&
s
[
0
]
==
s
[
3
]))
{
int
val
=
saturate_cast
<
uchar
>
(
s
[
0
]);
if
(
stream
)
cudaSafeCall
(
cudaMemset2DAsync
(
m
.
data
,
m
.
step
,
val
,
m
.
cols
*
m
.
elemSize
(),
m
.
rows
,
stream
)
);
else
cudaSafeCall
(
cudaMemset2D
(
m
.
data
,
m
.
step
,
val
,
m
.
cols
*
m
.
elemSize
(),
m
.
rows
)
);
return
;
}
}
typedef
void
(
*
func_t
)(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
4
]
=
{
{
NppSet
<
CV_8U
,
1
,
nppiSet_8u_C1R
>::
call
,
cudaSet
,
cudaSet
,
NppSet
<
CV_8U
,
4
,
nppiSet_8u_C4R
>::
call
},
{
NppSet
<
CV_8S
,
1
,
nppiSet_8s_C1R
>::
call
,
NppSet
<
CV_8S
,
2
,
nppiSet_8s_C2R
>::
call
,
NppSet
<
CV_8S
,
3
,
nppiSet_8s_C3R
>::
call
,
NppSet
<
CV_8S
,
4
,
nppiSet_8s_C4R
>::
call
},
{
NppSet
<
CV_16U
,
1
,
nppiSet_16u_C1R
>::
call
,
NppSet
<
CV_16U
,
2
,
nppiSet_16u_C2R
>::
call
,
cudaSet
,
NppSet
<
CV_16U
,
4
,
nppiSet_16u_C4R
>::
call
},
{
NppSet
<
CV_16S
,
1
,
nppiSet_16s_C1R
>::
call
,
NppSet
<
CV_16S
,
2
,
nppiSet_16s_C2R
>::
call
,
cudaSet
,
NppSet
<
CV_16S
,
4
,
nppiSet_16s_C4R
>::
call
},
{
NppSet
<
CV_32S
,
1
,
nppiSet_32s_C1R
>::
call
,
cudaSet
,
cudaSet
,
NppSet
<
CV_32S
,
4
,
nppiSet_32s_C4R
>::
call
},
{
NppSet
<
CV_32F
,
1
,
nppiSet_32f_C1R
>::
call
,
cudaSet
,
cudaSet
,
NppSet
<
CV_32F
,
4
,
nppiSet_32f_C4R
>::
call
},
{
cudaSet
,
cudaSet
,
cudaSet
,
cudaSet
}
};
CV_Assert
(
m
.
depth
()
<=
CV_64F
&&
m
.
channels
()
<=
4
);
if
(
m
.
depth
()
==
CV_64F
)
{
CV_Assert
(
deviceSupports
(
NATIVE_DOUBLE
)
);
}
funcs
[
m
.
depth
()][
m
.
channels
()
-
1
](
m
,
s
,
stream
);
}
void
set
(
GpuMat
&
m
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
)
{
CV_DbgAssert
(
!
mask
.
empty
()
);
CV_Assert
(
m
.
depth
()
<=
CV_64F
&&
m
.
channels
()
<=
4
);
if
(
m
.
depth
()
==
CV_64F
)
{
CV_Assert
(
deviceSupports
(
NATIVE_DOUBLE
)
);
}
typedef
void
(
*
func_t
)(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
);
static
const
func_t
funcs
[
7
][
4
]
=
{
{
NppSetMask
<
CV_8U
,
1
,
nppiSet_8u_C1MR
>::
call
,
cudaSet
,
cudaSet
,
NppSetMask
<
CV_8U
,
4
,
nppiSet_8u_C4MR
>::
call
},
{
cudaSet
,
cudaSet
,
cudaSet
,
cudaSet
},
{
NppSetMask
<
CV_16U
,
1
,
nppiSet_16u_C1MR
>::
call
,
cudaSet
,
cudaSet
,
NppSetMask
<
CV_16U
,
4
,
nppiSet_16u_C4MR
>::
call
},
{
NppSetMask
<
CV_16S
,
1
,
nppiSet_16s_C1MR
>::
call
,
cudaSet
,
cudaSet
,
NppSetMask
<
CV_16S
,
4
,
nppiSet_16s_C4MR
>::
call
},
{
NppSetMask
<
CV_32S
,
1
,
nppiSet_32s_C1MR
>::
call
,
cudaSet
,
cudaSet
,
NppSetMask
<
CV_32S
,
4
,
nppiSet_32s_C4MR
>::
call
},
{
NppSetMask
<
CV_32F
,
1
,
nppiSet_32f_C1MR
>::
call
,
cudaSet
,
cudaSet
,
NppSetMask
<
CV_32F
,
4
,
nppiSet_32f_C4MR
>::
call
},
{
cudaSet
,
cudaSet
,
cudaSet
,
cudaSet
}
};
funcs
[
m
.
depth
()][
m
.
channels
()
-
1
](
m
,
s
,
mask
,
stream
);
}
}}
#endif // HAVE_CUDA
cv
::
gpu
::
GpuMat
::
GpuMat
(
int
rows_
,
int
cols_
,
int
type_
,
void
*
data_
,
size_t
step_
)
:
flags
(
Mat
::
MAGIC_VAL
+
(
type_
&
Mat
::
TYPE_MASK
)),
rows
(
rows_
),
cols
(
cols_
),
step
(
step_
),
data
((
uchar
*
)
data_
),
refcount
(
0
),
datastart
((
uchar
*
)
data_
),
dataend
((
uchar
*
)
data_
)
{
size_t
minstep
=
cols
*
elemSize
();
if
(
step
==
Mat
::
AUTO_STEP
)
{
step
=
minstep
;
flags
|=
Mat
::
CONTINUOUS_FLAG
;
}
else
{
if
(
rows
==
1
)
step
=
minstep
;
CV_DbgAssert
(
step
>=
minstep
);
flags
|=
step
==
minstep
?
Mat
::
CONTINUOUS_FLAG
:
0
;
}
dataend
+=
step
*
(
rows
-
1
)
+
minstep
;
}
cv
::
gpu
::
GpuMat
::
GpuMat
(
Size
size_
,
int
type_
,
void
*
data_
,
size_t
step_
)
:
flags
(
Mat
::
MAGIC_VAL
+
(
type_
&
Mat
::
TYPE_MASK
)),
rows
(
size_
.
height
),
cols
(
size_
.
width
),
step
(
step_
),
data
((
uchar
*
)
data_
),
refcount
(
0
),
datastart
((
uchar
*
)
data_
),
dataend
((
uchar
*
)
data_
)
{
size_t
minstep
=
cols
*
elemSize
();
if
(
step
==
Mat
::
AUTO_STEP
)
{
step
=
minstep
;
flags
|=
Mat
::
CONTINUOUS_FLAG
;
}
else
{
if
(
rows
==
1
)
step
=
minstep
;
CV_DbgAssert
(
step
>=
minstep
);
flags
|=
step
==
minstep
?
Mat
::
CONTINUOUS_FLAG
:
0
;
}
dataend
+=
step
*
(
rows
-
1
)
+
minstep
;
}
cv
::
gpu
::
GpuMat
::
GpuMat
(
const
GpuMat
&
m
,
Range
rowRange_
,
Range
colRange_
)
{
flags
=
m
.
flags
;
step
=
m
.
step
;
refcount
=
m
.
refcount
;
data
=
m
.
data
;
datastart
=
m
.
datastart
;
dataend
=
m
.
dataend
;
if
(
rowRange_
==
Range
::
all
())
{
rows
=
m
.
rows
;
}
else
{
CV_Assert
(
0
<=
rowRange_
.
start
&&
rowRange_
.
start
<=
rowRange_
.
end
&&
rowRange_
.
end
<=
m
.
rows
);
rows
=
rowRange_
.
size
();
data
+=
step
*
rowRange_
.
start
;
}
if
(
colRange_
==
Range
::
all
())
{
cols
=
m
.
cols
;
}
else
{
CV_Assert
(
0
<=
colRange_
.
start
&&
colRange_
.
start
<=
colRange_
.
end
&&
colRange_
.
end
<=
m
.
cols
);
cols
=
colRange_
.
size
();
data
+=
colRange_
.
start
*
elemSize
();
flags
&=
cols
<
m
.
cols
?
~
Mat
::
CONTINUOUS_FLAG
:
-
1
;
}
if
(
rows
==
1
)
flags
|=
Mat
::
CONTINUOUS_FLAG
;
if
(
refcount
)
CV_XADD
(
refcount
,
1
);
if
(
rows
<=
0
||
cols
<=
0
)
rows
=
cols
=
0
;
}
cv
::
gpu
::
GpuMat
::
GpuMat
(
const
GpuMat
&
m
,
Rect
roi
)
:
flags
(
m
.
flags
),
rows
(
roi
.
height
),
cols
(
roi
.
width
),
step
(
m
.
step
),
data
(
m
.
data
+
roi
.
y
*
step
),
refcount
(
m
.
refcount
),
datastart
(
m
.
datastart
),
dataend
(
m
.
dataend
)
{
flags
&=
roi
.
width
<
m
.
cols
?
~
Mat
::
CONTINUOUS_FLAG
:
-
1
;
data
+=
roi
.
x
*
elemSize
();
CV_Assert
(
0
<=
roi
.
x
&&
0
<=
roi
.
width
&&
roi
.
x
+
roi
.
width
<=
m
.
cols
&&
0
<=
roi
.
y
&&
0
<=
roi
.
height
&&
roi
.
y
+
roi
.
height
<=
m
.
rows
);
if
(
refcount
)
CV_XADD
(
refcount
,
1
);
if
(
rows
<=
0
||
cols
<=
0
)
rows
=
cols
=
0
;
}
void
cv
::
gpu
::
GpuMat
::
create
(
int
_rows
,
int
_cols
,
int
_type
)
{
#ifndef HAVE_CUDA
(
void
)
_rows
;
(
void
)
_cols
;
(
void
)
_type
;
throw_no_cuda
();
#else
_type
&=
Mat
::
TYPE_MASK
;
if
(
rows
==
_rows
&&
cols
==
_cols
&&
type
()
==
_type
&&
data
)
return
;
if
(
data
)
release
();
CV_DbgAssert
(
_rows
>=
0
&&
_cols
>=
0
);
if
(
_rows
>
0
&&
_cols
>
0
)
{
flags
=
Mat
::
MAGIC_VAL
+
_type
;
rows
=
_rows
;
cols
=
_cols
;
size_t
esz
=
elemSize
();
void
*
devPtr
;
cudaSafeCall
(
cudaMallocPitch
(
&
devPtr
,
&
step
,
esz
*
cols
,
rows
)
);
// Single row must be continuous
if
(
rows
==
1
)
step
=
esz
*
cols
;
if
(
esz
*
cols
==
step
)
flags
|=
Mat
::
CONTINUOUS_FLAG
;
int64
_nettosize
=
static_cast
<
int64
>
(
step
)
*
rows
;
size_t
nettosize
=
static_cast
<
size_t
>
(
_nettosize
);
datastart
=
data
=
static_cast
<
uchar
*>
(
devPtr
);
dataend
=
data
+
nettosize
;
refcount
=
static_cast
<
int
*>
(
fastMalloc
(
sizeof
(
*
refcount
)));
*
refcount
=
1
;
}
#endif
}
void
cv
::
gpu
::
GpuMat
::
release
()
{
#ifdef HAVE_CUDA
if
(
refcount
&&
CV_XADD
(
refcount
,
-
1
)
==
1
)
{
cudaFree
(
datastart
);
fastFree
(
refcount
);
}
data
=
datastart
=
dataend
=
0
;
step
=
rows
=
cols
=
0
;
refcount
=
0
;
#endif
}
void
cv
::
gpu
::
GpuMat
::
upload
(
const
Mat
&
m
)
{
#ifndef HAVE_CUDA
(
void
)
m
;
throw_no_cuda
();
#else
CV_DbgAssert
(
!
m
.
empty
()
);
create
(
m
.
size
(),
m
.
type
());
cudaSafeCall
(
cudaMemcpy2D
(
data
,
step
,
m
.
data
,
m
.
step
,
cols
*
elemSize
(),
rows
,
cudaMemcpyHostToDevice
)
);
#endif
}
void
cv
::
gpu
::
GpuMat
::
download
(
Mat
&
m
)
const
{
#ifndef HAVE_CUDA
(
void
)
m
;
throw_no_cuda
();
#else
CV_DbgAssert
(
!
empty
()
);
m
.
create
(
size
(),
type
());
cudaSafeCall
(
cudaMemcpy2D
(
m
.
data
,
m
.
step
,
data
,
step
,
cols
*
elemSize
(),
rows
,
cudaMemcpyDeviceToHost
)
);
#endif
}
void
cv
::
gpu
::
GpuMat
::
copyTo
(
GpuMat
&
m
)
const
{
#ifndef HAVE_CUDA
(
void
)
m
;
throw_no_cuda
();
#else
CV_DbgAssert
(
!
empty
()
);
m
.
create
(
size
(),
type
());
cudaSafeCall
(
cudaMemcpy2D
(
m
.
data
,
m
.
step
,
data
,
step
,
cols
*
elemSize
(),
rows
,
cudaMemcpyDeviceToDevice
)
);
#endif
}
void
cv
::
gpu
::
GpuMat
::
copyTo
(
GpuMat
&
mat
,
const
GpuMat
&
mask
)
const
{
#ifndef HAVE_CUDA
(
void
)
mat
;
(
void
)
mask
;
throw_no_cuda
();
#else
CV_DbgAssert
(
!
empty
()
);
if
(
mask
.
empty
())
{
copyTo
(
mat
);
}
else
{
mat
.
create
(
size
(),
type
());
copyWithMask
(
*
this
,
mat
,
mask
);
}
#endif
}
GpuMat
&
cv
::
gpu
::
GpuMat
::
setTo
(
Scalar
s
,
const
GpuMat
&
mask
)
{
#ifndef HAVE_CUDA
(
void
)
s
;
(
void
)
mask
;
throw_no_cuda
();
return
*
this
;
#else
CV_DbgAssert
(
!
empty
()
);
if
(
mask
.
empty
())
set
(
*
this
,
s
);
else
set
(
*
this
,
s
,
mask
);
return
*
this
;
#endif
}
void
cv
::
gpu
::
GpuMat
::
convertTo
(
GpuMat
&
dst
,
int
rtype
,
double
alpha
,
double
beta
)
const
{
#ifndef HAVE_CUDA
(
void
)
dst
;
(
void
)
rtype
;
(
void
)
alpha
;
(
void
)
beta
;
throw_no_cuda
();
#else
bool
noScale
=
fabs
(
alpha
-
1
)
<
std
::
numeric_limits
<
double
>::
epsilon
()
&&
fabs
(
beta
)
<
std
::
numeric_limits
<
double
>::
epsilon
();
if
(
rtype
<
0
)
rtype
=
type
();
else
rtype
=
CV_MAKETYPE
(
CV_MAT_DEPTH
(
rtype
),
channels
());
int
sdepth
=
depth
();
int
ddepth
=
CV_MAT_DEPTH
(
rtype
);
if
(
sdepth
==
ddepth
&&
noScale
)
{
copyTo
(
dst
);
return
;
}
GpuMat
temp
;
const
GpuMat
*
psrc
=
this
;
if
(
sdepth
!=
ddepth
&&
psrc
==
&
dst
)
{
temp
=
*
this
;
psrc
=
&
temp
;
}
dst
.
create
(
size
(),
rtype
);
if
(
noScale
)
convert
(
*
psrc
,
dst
);
else
convert
(
*
psrc
,
dst
,
alpha
,
beta
);
#endif
}
GpuMat
cv
::
gpu
::
GpuMat
::
reshape
(
int
new_cn
,
int
new_rows
)
const
{
GpuMat
hdr
=
*
this
;
int
cn
=
channels
();
if
(
new_cn
==
0
)
new_cn
=
cn
;
int
total_width
=
cols
*
cn
;
if
((
new_cn
>
total_width
||
total_width
%
new_cn
!=
0
)
&&
new_rows
==
0
)
new_rows
=
rows
*
total_width
/
new_cn
;
if
(
new_rows
!=
0
&&
new_rows
!=
rows
)
{
int
total_size
=
total_width
*
rows
;
if
(
!
isContinuous
())
CV_Error
(
cv
::
Error
::
BadStep
,
"The matrix is not continuous, thus its number of rows can not be changed"
);
if
((
unsigned
)
new_rows
>
(
unsigned
)
total_size
)
CV_Error
(
cv
::
Error
::
StsOutOfRange
,
"Bad new number of rows"
);
total_width
=
total_size
/
new_rows
;
if
(
total_width
*
new_rows
!=
total_size
)
CV_Error
(
cv
::
Error
::
StsBadArg
,
"The total number of matrix elements is not divisible by the new number of rows"
);
hdr
.
rows
=
new_rows
;
hdr
.
step
=
total_width
*
elemSize1
();
}
int
new_width
=
total_width
/
new_cn
;
if
(
new_width
*
new_cn
!=
total_width
)
CV_Error
(
cv
::
Error
::
BadNumChannels
,
"The total width is not divisible by the new number of channels"
);
hdr
.
cols
=
new_width
;
hdr
.
flags
=
(
hdr
.
flags
&
~
CV_MAT_CN_MASK
)
|
((
new_cn
-
1
)
<<
CV_CN_SHIFT
);
return
hdr
;
}
void
cv
::
gpu
::
GpuMat
::
locateROI
(
Size
&
wholeSize
,
Point
&
ofs
)
const
{
CV_DbgAssert
(
step
>
0
);
size_t
esz
=
elemSize
();
ptrdiff_t
delta1
=
data
-
datastart
;
ptrdiff_t
delta2
=
dataend
-
datastart
;
if
(
delta1
==
0
)
{
ofs
.
x
=
ofs
.
y
=
0
;
}
else
{
ofs
.
y
=
static_cast
<
int
>
(
delta1
/
step
);
ofs
.
x
=
static_cast
<
int
>
((
delta1
-
step
*
ofs
.
y
)
/
esz
);
CV_DbgAssert
(
data
==
datastart
+
ofs
.
y
*
step
+
ofs
.
x
*
esz
);
}
size_t
minstep
=
(
ofs
.
x
+
cols
)
*
esz
;
wholeSize
.
height
=
std
::
max
(
static_cast
<
int
>
((
delta2
-
minstep
)
/
step
+
1
),
ofs
.
y
+
rows
);
wholeSize
.
width
=
std
::
max
(
static_cast
<
int
>
((
delta2
-
step
*
(
wholeSize
.
height
-
1
))
/
esz
),
ofs
.
x
+
cols
);
}
GpuMat
&
cv
::
gpu
::
GpuMat
::
adjustROI
(
int
dtop
,
int
dbottom
,
int
dleft
,
int
dright
)
{
Size
wholeSize
;
Point
ofs
;
locateROI
(
wholeSize
,
ofs
);
size_t
esz
=
elemSize
();
int
row1
=
std
::
max
(
ofs
.
y
-
dtop
,
0
);
int
row2
=
std
::
min
(
ofs
.
y
+
rows
+
dbottom
,
wholeSize
.
height
);
int
col1
=
std
::
max
(
ofs
.
x
-
dleft
,
0
);
int
col2
=
std
::
min
(
ofs
.
x
+
cols
+
dright
,
wholeSize
.
width
);
data
+=
(
row1
-
ofs
.
y
)
*
step
+
(
col1
-
ofs
.
x
)
*
esz
;
rows
=
row2
-
row1
;
cols
=
col2
-
col1
;
if
(
esz
*
cols
==
step
||
rows
==
1
)
flags
|=
Mat
::
CONTINUOUS_FLAG
;
else
flags
&=
~
Mat
::
CONTINUOUS_FLAG
;
return
*
this
;
}
void
cv
::
gpu
::
createContinuous
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
m
)
{
const
int
area
=
rows
*
cols
;
if
(
m
.
empty
()
||
m
.
type
()
!=
type
||
!
m
.
isContinuous
()
||
m
.
size
().
area
()
<
area
)
m
.
create
(
1
,
area
,
type
);
m
.
cols
=
cols
;
m
.
rows
=
rows
;
m
.
step
=
m
.
elemSize
()
*
cols
;
m
.
flags
|=
Mat
::
CONTINUOUS_FLAG
;
}
void
cv
::
gpu
::
ensureSizeIsEnough
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
m
)
{
if
(
m
.
empty
()
||
m
.
type
()
!=
type
||
m
.
data
!=
m
.
datastart
)
{
m
.
create
(
rows
,
cols
,
type
);
}
else
{
const
size_t
esz
=
m
.
elemSize
();
const
ptrdiff_t
delta2
=
m
.
dataend
-
m
.
datastart
;
const
size_t
minstep
=
m
.
cols
*
esz
;
Size
wholeSize
;
wholeSize
.
height
=
std
::
max
(
static_cast
<
int
>
((
delta2
-
minstep
)
/
m
.
step
+
1
),
m
.
rows
);
wholeSize
.
width
=
std
::
max
(
static_cast
<
int
>
((
delta2
-
m
.
step
*
(
wholeSize
.
height
-
1
))
/
esz
),
m
.
cols
);
if
(
wholeSize
.
height
<
rows
||
wholeSize
.
width
<
cols
)
{
m
.
create
(
rows
,
cols
,
type
);
}
else
{
m
.
cols
=
cols
;
m
.
rows
=
rows
;
}
}
}
GpuMat
cv
::
gpu
::
allocMatFromBuf
(
int
rows
,
int
cols
,
int
type
,
GpuMat
&
mat
)
{
if
(
!
mat
.
empty
()
&&
mat
.
type
()
==
type
&&
mat
.
rows
>=
rows
&&
mat
.
cols
>=
cols
)
return
mat
(
Rect
(
0
,
0
,
cols
,
rows
));
return
mat
=
GpuMat
(
rows
,
cols
,
type
);
}
modules/core/src/gpu_stream.cpp
View file @
db1178b5
...
...
@@ -72,10 +72,10 @@ void cv::gpu::Stream::release() { throw_no_cuda(); }
namespace
cv
{
namespace
gpu
{
void
copyWithMask
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
);
void
convert
To
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
alpha
,
double
beta
,
cudaStream_t
stream
);
void
set
To
(
GpuMat
&
src
,
Scalar
s
,
cudaStream_t
stream
);
void
set
To
(
GpuMat
&
src
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
);
void
copyWithMask
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
const
GpuMat
&
mask
,
cudaStream_t
stream
=
0
);
void
convert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
double
alpha
,
double
beta
,
cudaStream_t
stream
=
0
);
void
set
(
GpuMat
&
m
,
Scalar
s
,
cudaStream_t
stream
=
0
);
void
set
(
GpuMat
&
m
,
Scalar
s
,
const
GpuMat
&
mask
,
cudaStream_t
stream
=
0
);
}}
struct
Stream
::
Impl
...
...
@@ -217,7 +217,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val)
}
}
set
To
(
src
,
val
,
stream
);
set
(
src
,
val
,
stream
);
}
void
cv
::
gpu
::
Stream
::
enqueueMemSet
(
GpuMat
&
src
,
Scalar
val
,
const
GpuMat
&
mask
)
...
...
@@ -234,7 +234,7 @@ void cv::gpu::Stream::enqueueMemSet(GpuMat& src, Scalar val, const GpuMat& mask)
cudaStream_t
stream
=
Impl
::
getStream
(
impl
);
set
To
(
src
,
val
,
mask
,
stream
);
set
(
src
,
val
,
mask
,
stream
);
}
void
cv
::
gpu
::
Stream
::
enqueueConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
dtype
,
double
alpha
,
double
beta
)
...
...
@@ -265,7 +265,7 @@ void cv::gpu::Stream::enqueueConvert(const GpuMat& src, GpuMat& dst, int dtype,
dst
.
create
(
src
.
size
(),
dtype
);
cudaStream_t
stream
=
Impl
::
getStream
(
impl
);
convert
To
(
src
,
dst
,
alpha
,
beta
,
stream
);
convert
(
src
,
dst
,
alpha
,
beta
,
stream
);
}
#if CUDART_VERSION >= 5000
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment