Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
769564c1
Commit
769564c1
authored
Jul 26, 2010
by
Andrey Morozov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
implemented asynchronous call for gpumat::setTo(), gpumat::copyTo(), gpumat::converTo()
parent
1ead3a5b
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
57 additions
and
57 deletions
+57
-57
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+51
-52
cuda_shared.hpp
modules/gpu/src/cuda/cuda_shared.hpp
+4
-4
matrix_operations.cu
modules/gpu/src/cuda/matrix_operations.cu
+0
-0
cudastream.cpp
modules/gpu/src/cudastream.cpp
+2
-1
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
769564c1
...
...
@@ -49,24 +49,24 @@
namespace
cv
{
namespace
gpu
{
{
//////////////////////////////// Initialization ////////////////////////
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
CV_EXPORTS
int
getCudaEnabledDeviceCount
();
//! Functions below throw cv::Expception if the library is compiled without Cuda.
CV_EXPORTS
string
getDeviceName
(
int
device
);
CV_EXPORTS
void
setDevice
(
int
device
);
CV_EXPORTS
int
getDevice
();
CV_EXPORTS
void
setDevice
(
int
device
);
CV_EXPORTS
int
getDevice
();
CV_EXPORTS
void
getComputeCapability
(
int
device
,
int
*
major
,
int
*
minor
);
CV_EXPORTS
int
getNumberOfSMs
(
int
device
);
//////////////////////////////// GpuMat ////////////////////////////////
class
CudaStrem
;
class
CudaStre
a
m
;
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
class
CV_EXPORTS
GpuMat
{
public
:
...
...
@@ -81,7 +81,7 @@ namespace cv
GpuMat
(
Size
_size
,
int
_type
,
const
Scalar
&
_s
);
//! copy constructor
GpuMat
(
const
GpuMat
&
m
);
//! constructor for GpuMatrix headers pointing to user-allocated data
GpuMat
(
int
_rows
,
int
_cols
,
int
_type
,
void
*
_data
,
size_t
_step
=
Mat
::
AUTO_STEP
);
GpuMat
(
Size
_size
,
int
_type
,
void
*
_data
,
size_t
_step
=
Mat
::
AUTO_STEP
);
...
...
@@ -89,7 +89,7 @@ namespace cv
//! creates a matrix header for a part of the bigger matrix
GpuMat
(
const
GpuMat
&
m
,
const
Range
&
rowRange
,
const
Range
&
colRange
);
GpuMat
(
const
GpuMat
&
m
,
const
Rect
&
roi
);
//! builds GpuMat from Mat. Perfom blocking upload to device.
explicit
GpuMat
(
const
Mat
&
m
);
...
...
@@ -99,7 +99,7 @@ namespace cv
//! assignment operators
GpuMat
&
operator
=
(
const
GpuMat
&
m
);
//! assignment operator. Perfom blocking upload to device.
GpuMat
&
operator
=
(
const
Mat
&
m
);
GpuMat
&
operator
=
(
const
Mat
&
m
);
//! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code.
// Contains just image size, data ptr and step.
...
...
@@ -110,7 +110,7 @@ namespace cv
//! Downloads data from device to host memory. Blocking calls.
operator
Mat
()
const
;
void
download
(
cv
::
Mat
&
m
)
const
;
void
download
(
cv
::
Mat
&
m
)
const
;
//! returns a new GpuMatrix header for the specified row
GpuMat
row
(
int
y
)
const
;
...
...
@@ -161,7 +161,7 @@ namespace cv
//! extracts a rectangular sub-GpuMatrix
// (this is a generalized form of row, rowRange etc.)
GpuMat
operator
()(
Range
rowRange
,
Range
colRange
)
const
;
GpuMat
operator
()(
const
Rect
&
roi
)
const
;
GpuMat
operator
()(
const
Rect
&
roi
)
const
;
//! returns true iff the GpuMatrix data is continuous
// (i.e. when there are no gaps between successive rows).
...
...
@@ -222,33 +222,33 @@ namespace cv
// Page locked memory is only needed for async and faster coping to GPU.
// It is convertable to cv::Mat header without reference counting
// so you can use it with other opencv functions.
class
CV_EXPORTS
MatPL
{
public
:
public
:
//Not supported. Now behaviour is like ALLOC_DEFAULT.
//enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 }
MatPL
();
MatPL
(
const
MatPL
&
m
);
MatPL
();
MatPL
(
const
MatPL
&
m
);
MatPL
(
int
_rows
,
int
_cols
,
int
_type
);
MatPL
(
Size
_size
,
int
_type
);
MatPL
(
Size
_size
,
int
_type
);
//! creates from cv::Mat with coping data
explicit
MatPL
(
const
Mat
&
m
);
~
MatPL
();
~
MatPL
();
MatPL
&
operator
=
(
const
MatPL
&
m
);
//! returns deep copy of the matrix, i.e. the data is copied
MatPL
clone
()
const
;
//! allocates new matrix data unless the matrix already has specified size and type.
//! allocates new matrix data unless the matrix already has specified size and type.
void
create
(
int
_rows
,
int
_cols
,
int
_type
);
void
create
(
Size
_size
,
int
_type
);
void
create
(
Size
_size
,
int
_type
);
//! decrements reference counter and released memory if needed.
void
release
();
...
...
@@ -256,25 +256,25 @@ namespace cv
//! returns matrix header with disabled reference counting for MatPL data.
Mat
createMatHeader
()
const
;
operator
Mat
()
const
;
// Please see cv::Mat for descriptions
bool
isContinuous
()
const
;
size_t
elemSize
()
const
;
size_t
elemSize1
()
const
;
int
type
()
const
;
int
depth
()
const
;
int
channels
()
const
;
size_t
step1
()
const
;
Size
size
()
const
;
bool
isContinuous
()
const
;
size_t
elemSize
()
const
;
size_t
elemSize1
()
const
;
int
type
()
const
;
int
depth
()
const
;
int
channels
()
const
;
size_t
step1
()
const
;
Size
size
()
const
;
bool
empty
()
const
;
// Please see cv::Mat for descriptions
int
flags
;
int
rows
,
cols
;
int
flags
;
int
rows
,
cols
;
size_t
step
;
uchar
*
data
;
int
*
refcount
;
uchar
*
data
;
int
*
refcount
;
uchar
*
datastart
;
uchar
*
dataend
;
...
...
@@ -288,37 +288,37 @@ namespace cv
class
CV_EXPORTS
CudaStream
{
public
:
CudaStream
();
CudaStream
();
~
CudaStream
();
CudaStream
(
const
CudaStream
&
);
CudaStream
(
const
CudaStream
&
);
CudaStream
&
operator
=
(
const
CudaStream
&
);
bool
queryIfComplete
();
void
waitForCompletion
();
void
waitForCompletion
();
//! downloads asynchronously.
//! downloads asynchronously.
// Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its subMat)
void
enqueueDownload
(
const
GpuMat
&
src
,
MatPL
&
dst
);
void
enqueueDownload
(
const
GpuMat
&
src
,
Mat
&
dst
);
//! uploads asynchronously.
//! uploads asynchronously.
// Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its ROI)
void
enqueueUpload
(
const
MatPL
&
src
,
GpuMat
&
dst
);
void
enqueueUpload
(
const
MatPL
&
src
,
GpuMat
&
dst
);
void
enqueueUpload
(
const
Mat
&
src
,
GpuMat
&
dst
);
void
enqueueCopy
(
const
GpuMat
&
src
,
GpuMat
&
dst
);
void
enqueueMemSet
(
const
GpuMat
&
src
,
Scalar
val
);
void
enqueueMemSet
(
const
GpuMat
&
src
,
Scalar
val
);
void
enqueueMemSet
(
const
GpuMat
&
src
,
Scalar
val
,
const
GpuMat
&
mask
);
// converts matrix type, ex from float to uchar depending on type
void
enqueueConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
type
,
double
a
=
1
,
double
b
=
0
);
void
enqueueConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
type
,
double
a
=
1
,
double
b
=
0
);
private
:
void
create
();
void
release
();
struct
Impl
;
Impl
*
impl
;
Impl
*
impl
;
friend
struct
StreamAccessor
;
};
...
...
@@ -348,7 +348,7 @@ namespace cv
//! Acync version
void
operator
()
(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
,
const
CudaStream
&
stream
);
//! Some heuristics that tries to estmate
//! Some heuristics that tries to estmate
// if current GPU will be faster then CPU in this algorithm.
// It queries current active device.
static
bool
checkIfGpuCallReasonable
();
...
...
@@ -356,11 +356,11 @@ namespace cv
int
ndisp
;
int
winSize
;
int
preset
;
// If avergeTexThreshold == 0 => post procesing is disabled
// If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
// SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
// i.e. input left image is low textured.
// i.e. input left image is low textured.
float
avergeTexThreshold
;
private
:
GpuMat
minSSD
,
leBuf
,
riBuf
;
...
...
@@ -369,4 +369,4 @@ namespace cv
}
#include "opencv2/gpu/matrix_operations.hpp"
#endif
/* __OPENCV_GPU_HPP__ */
\ No newline at end of file
#endif
/* __OPENCV_GPU_HPP__ */
modules/gpu/src/cuda/cuda_shared.hpp
View file @
769564c1
...
...
@@ -61,12 +61,12 @@ namespace cv
{
static
inline
int
divUp
(
int
a
,
int
b
)
{
return
(
a
%
b
==
0
)
?
a
/
b
:
a
/
b
+
1
;
}
extern
"C"
void
copy_to_with_mask
(
const
DevMem2D
&
mat_src
,
const
DevMem2D
&
mat_dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
);
extern
"C"
void
copy_to_with_mask
(
const
DevMem2D
&
mat_src
,
const
DevMem2D
&
mat_dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
extern
"C"
void
set_to_without_mask
(
const
DevMem2D
&
mat
,
int
depth
,
const
double
*
scalar
,
int
channels
);
extern
"C"
void
set_to_with_mask
(
const
DevMem2D
&
mat
,
int
depth
,
const
double
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
);
extern
"C"
void
set_to_without_mask
(
const
DevMem2D
&
mat
,
int
depth
,
const
double
*
scalar
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
extern
"C"
void
set_to_with_mask
(
const
DevMem2D
&
mat
,
int
depth
,
const
double
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
extern
"C"
void
convert_to
(
const
DevMem2D
&
src
,
int
sdepth
,
DevMem2D
dst
,
int
ddepth
,
size_t
width
,
size_t
height
,
double
alpha
,
double
beta
);
extern
"C"
void
convert_to
(
const
DevMem2D
&
src
,
int
sdepth
,
DevMem2D
dst
,
int
ddepth
,
size_t
width
,
size_t
height
,
double
alpha
,
double
beta
,
const
cudaStream_t
&
stream
=
0
);
}
}
}
...
...
modules/gpu/src/cuda/matrix_operations.cu
View file @
769564c1
This diff is collapsed.
Click to expand it.
modules/gpu/src/cudastream.cpp
View file @
769564c1
...
...
@@ -74,6 +74,7 @@ struct CudaStream::Impl
cudaStream_t
stream
;
int
ref_counter
;
};
namespace
{
template
<
class
S
,
class
D
>
void
devcopy
(
const
S
&
src
,
D
&
dst
,
cudaStream_t
s
,
cudaMemcpyKind
k
)
...
...
@@ -147,7 +148,7 @@ void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst)
{
// if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert
(
src
.
cols
==
dst
.
cols
&&
src
.
rows
==
dst
.
rows
&&
src
.
type
()
==
dst
.
type
()
)
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToHost
);
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToHost
);
}
void
cv
::
gpu
::
CudaStream
::
enqueueDownload
(
const
GpuMat
&
src
,
MatPL
&
dst
)
{
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToHost
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment