Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
769564c1
Commit
769564c1
authored
Jul 26, 2010
by
Andrey Morozov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
implemented asynchronous call for gpumat::setTo(), gpumat::copyTo(), gpumat::converTo()
parent
1ead3a5b
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
114 additions
and
87 deletions
+114
-87
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+51
-52
cuda_shared.hpp
modules/gpu/src/cuda/cuda_shared.hpp
+4
-4
matrix_operations.cu
modules/gpu/src/cuda/matrix_operations.cu
+57
-30
cudastream.cpp
modules/gpu/src/cudastream.cpp
+2
-1
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
769564c1
...
@@ -49,24 +49,24 @@
...
@@ -49,24 +49,24 @@
namespace
cv
namespace
cv
{
{
namespace
gpu
namespace
gpu
{
{
//////////////////////////////// Initialization ////////////////////////
//////////////////////////////// Initialization ////////////////////////
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
CV_EXPORTS
int
getCudaEnabledDeviceCount
();
CV_EXPORTS
int
getCudaEnabledDeviceCount
();
//! Functions below throw cv::Expception if the library is compiled without Cuda.
//! Functions below throw cv::Expception if the library is compiled without Cuda.
CV_EXPORTS
string
getDeviceName
(
int
device
);
CV_EXPORTS
string
getDeviceName
(
int
device
);
CV_EXPORTS
void
setDevice
(
int
device
);
CV_EXPORTS
void
setDevice
(
int
device
);
CV_EXPORTS
int
getDevice
();
CV_EXPORTS
int
getDevice
();
CV_EXPORTS
void
getComputeCapability
(
int
device
,
int
*
major
,
int
*
minor
);
CV_EXPORTS
void
getComputeCapability
(
int
device
,
int
*
major
,
int
*
minor
);
CV_EXPORTS
int
getNumberOfSMs
(
int
device
);
CV_EXPORTS
int
getNumberOfSMs
(
int
device
);
//////////////////////////////// GpuMat ////////////////////////////////
//////////////////////////////// GpuMat ////////////////////////////////
class
CudaStrem
;
class
CudaStre
a
m
;
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
class
CV_EXPORTS
GpuMat
class
CV_EXPORTS
GpuMat
{
{
public
:
public
:
...
@@ -81,7 +81,7 @@ namespace cv
...
@@ -81,7 +81,7 @@ namespace cv
GpuMat
(
Size
_size
,
int
_type
,
const
Scalar
&
_s
);
GpuMat
(
Size
_size
,
int
_type
,
const
Scalar
&
_s
);
//! copy constructor
//! copy constructor
GpuMat
(
const
GpuMat
&
m
);
GpuMat
(
const
GpuMat
&
m
);
//! constructor for GpuMatrix headers pointing to user-allocated data
//! constructor for GpuMatrix headers pointing to user-allocated data
GpuMat
(
int
_rows
,
int
_cols
,
int
_type
,
void
*
_data
,
size_t
_step
=
Mat
::
AUTO_STEP
);
GpuMat
(
int
_rows
,
int
_cols
,
int
_type
,
void
*
_data
,
size_t
_step
=
Mat
::
AUTO_STEP
);
GpuMat
(
Size
_size
,
int
_type
,
void
*
_data
,
size_t
_step
=
Mat
::
AUTO_STEP
);
GpuMat
(
Size
_size
,
int
_type
,
void
*
_data
,
size_t
_step
=
Mat
::
AUTO_STEP
);
...
@@ -89,7 +89,7 @@ namespace cv
...
@@ -89,7 +89,7 @@ namespace cv
//! creates a matrix header for a part of the bigger matrix
//! creates a matrix header for a part of the bigger matrix
GpuMat
(
const
GpuMat
&
m
,
const
Range
&
rowRange
,
const
Range
&
colRange
);
GpuMat
(
const
GpuMat
&
m
,
const
Range
&
rowRange
,
const
Range
&
colRange
);
GpuMat
(
const
GpuMat
&
m
,
const
Rect
&
roi
);
GpuMat
(
const
GpuMat
&
m
,
const
Rect
&
roi
);
//! builds GpuMat from Mat. Perfom blocking upload to device.
//! builds GpuMat from Mat. Perfom blocking upload to device.
explicit
GpuMat
(
const
Mat
&
m
);
explicit
GpuMat
(
const
Mat
&
m
);
...
@@ -99,7 +99,7 @@ namespace cv
...
@@ -99,7 +99,7 @@ namespace cv
//! assignment operators
//! assignment operators
GpuMat
&
operator
=
(
const
GpuMat
&
m
);
GpuMat
&
operator
=
(
const
GpuMat
&
m
);
//! assignment operator. Perfom blocking upload to device.
//! assignment operator. Perfom blocking upload to device.
GpuMat
&
operator
=
(
const
Mat
&
m
);
GpuMat
&
operator
=
(
const
Mat
&
m
);
//! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code.
//! returns lightweight DevMem2D_ structure for passing to nvcc-compiled code.
// Contains just image size, data ptr and step.
// Contains just image size, data ptr and step.
...
@@ -110,7 +110,7 @@ namespace cv
...
@@ -110,7 +110,7 @@ namespace cv
//! Downloads data from device to host memory. Blocking calls.
//! Downloads data from device to host memory. Blocking calls.
operator
Mat
()
const
;
operator
Mat
()
const
;
void
download
(
cv
::
Mat
&
m
)
const
;
void
download
(
cv
::
Mat
&
m
)
const
;
//! returns a new GpuMatrix header for the specified row
//! returns a new GpuMatrix header for the specified row
GpuMat
row
(
int
y
)
const
;
GpuMat
row
(
int
y
)
const
;
...
@@ -161,7 +161,7 @@ namespace cv
...
@@ -161,7 +161,7 @@ namespace cv
//! extracts a rectangular sub-GpuMatrix
//! extracts a rectangular sub-GpuMatrix
// (this is a generalized form of row, rowRange etc.)
// (this is a generalized form of row, rowRange etc.)
GpuMat
operator
()(
Range
rowRange
,
Range
colRange
)
const
;
GpuMat
operator
()(
Range
rowRange
,
Range
colRange
)
const
;
GpuMat
operator
()(
const
Rect
&
roi
)
const
;
GpuMat
operator
()(
const
Rect
&
roi
)
const
;
//! returns true iff the GpuMatrix data is continuous
//! returns true iff the GpuMatrix data is continuous
// (i.e. when there are no gaps between successive rows).
// (i.e. when there are no gaps between successive rows).
...
@@ -222,33 +222,33 @@ namespace cv
...
@@ -222,33 +222,33 @@ namespace cv
// Page locked memory is only needed for async and faster coping to GPU.
// Page locked memory is only needed for async and faster coping to GPU.
// It is convertable to cv::Mat header without reference counting
// It is convertable to cv::Mat header without reference counting
// so you can use it with other opencv functions.
// so you can use it with other opencv functions.
class
CV_EXPORTS
MatPL
class
CV_EXPORTS
MatPL
{
{
public
:
public
:
//Not supported. Now behaviour is like ALLOC_DEFAULT.
//Not supported. Now behaviour is like ALLOC_DEFAULT.
//enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 }
//enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 }
MatPL
();
MatPL
();
MatPL
(
const
MatPL
&
m
);
MatPL
(
const
MatPL
&
m
);
MatPL
(
int
_rows
,
int
_cols
,
int
_type
);
MatPL
(
int
_rows
,
int
_cols
,
int
_type
);
MatPL
(
Size
_size
,
int
_type
);
MatPL
(
Size
_size
,
int
_type
);
//! creates from cv::Mat with coping data
//! creates from cv::Mat with coping data
explicit
MatPL
(
const
Mat
&
m
);
explicit
MatPL
(
const
Mat
&
m
);
~
MatPL
();
~
MatPL
();
MatPL
&
operator
=
(
const
MatPL
&
m
);
MatPL
&
operator
=
(
const
MatPL
&
m
);
//! returns deep copy of the matrix, i.e. the data is copied
//! returns deep copy of the matrix, i.e. the data is copied
MatPL
clone
()
const
;
MatPL
clone
()
const
;
//! allocates new matrix data unless the matrix already has specified size and type.
//! allocates new matrix data unless the matrix already has specified size and type.
void
create
(
int
_rows
,
int
_cols
,
int
_type
);
void
create
(
int
_rows
,
int
_cols
,
int
_type
);
void
create
(
Size
_size
,
int
_type
);
void
create
(
Size
_size
,
int
_type
);
//! decrements reference counter and released memory if needed.
//! decrements reference counter and released memory if needed.
void
release
();
void
release
();
...
@@ -256,25 +256,25 @@ namespace cv
...
@@ -256,25 +256,25 @@ namespace cv
//! returns matrix header with disabled reference counting for MatPL data.
//! returns matrix header with disabled reference counting for MatPL data.
Mat
createMatHeader
()
const
;
Mat
createMatHeader
()
const
;
operator
Mat
()
const
;
operator
Mat
()
const
;
// Please see cv::Mat for descriptions
// Please see cv::Mat for descriptions
bool
isContinuous
()
const
;
bool
isContinuous
()
const
;
size_t
elemSize
()
const
;
size_t
elemSize
()
const
;
size_t
elemSize1
()
const
;
size_t
elemSize1
()
const
;
int
type
()
const
;
int
type
()
const
;
int
depth
()
const
;
int
depth
()
const
;
int
channels
()
const
;
int
channels
()
const
;
size_t
step1
()
const
;
size_t
step1
()
const
;
Size
size
()
const
;
Size
size
()
const
;
bool
empty
()
const
;
bool
empty
()
const
;
// Please see cv::Mat for descriptions
// Please see cv::Mat for descriptions
int
flags
;
int
flags
;
int
rows
,
cols
;
int
rows
,
cols
;
size_t
step
;
size_t
step
;
uchar
*
data
;
uchar
*
data
;
int
*
refcount
;
int
*
refcount
;
uchar
*
datastart
;
uchar
*
datastart
;
uchar
*
dataend
;
uchar
*
dataend
;
...
@@ -288,37 +288,37 @@ namespace cv
...
@@ -288,37 +288,37 @@ namespace cv
class
CV_EXPORTS
CudaStream
class
CV_EXPORTS
CudaStream
{
{
public
:
public
:
CudaStream
();
CudaStream
();
~
CudaStream
();
~
CudaStream
();
CudaStream
(
const
CudaStream
&
);
CudaStream
(
const
CudaStream
&
);
CudaStream
&
operator
=
(
const
CudaStream
&
);
CudaStream
&
operator
=
(
const
CudaStream
&
);
bool
queryIfComplete
();
bool
queryIfComplete
();
void
waitForCompletion
();
void
waitForCompletion
();
//! downloads asynchronously.
//! downloads asynchronously.
// Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its subMat)
// Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its subMat)
void
enqueueDownload
(
const
GpuMat
&
src
,
MatPL
&
dst
);
void
enqueueDownload
(
const
GpuMat
&
src
,
MatPL
&
dst
);
void
enqueueDownload
(
const
GpuMat
&
src
,
Mat
&
dst
);
void
enqueueDownload
(
const
GpuMat
&
src
,
Mat
&
dst
);
//! uploads asynchronously.
//! uploads asynchronously.
// Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its ROI)
// Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its ROI)
void
enqueueUpload
(
const
MatPL
&
src
,
GpuMat
&
dst
);
void
enqueueUpload
(
const
MatPL
&
src
,
GpuMat
&
dst
);
void
enqueueUpload
(
const
Mat
&
src
,
GpuMat
&
dst
);
void
enqueueUpload
(
const
Mat
&
src
,
GpuMat
&
dst
);
void
enqueueCopy
(
const
GpuMat
&
src
,
GpuMat
&
dst
);
void
enqueueCopy
(
const
GpuMat
&
src
,
GpuMat
&
dst
);
void
enqueueMemSet
(
const
GpuMat
&
src
,
Scalar
val
);
void
enqueueMemSet
(
const
GpuMat
&
src
,
Scalar
val
);
void
enqueueMemSet
(
const
GpuMat
&
src
,
Scalar
val
,
const
GpuMat
&
mask
);
void
enqueueMemSet
(
const
GpuMat
&
src
,
Scalar
val
,
const
GpuMat
&
mask
);
// converts matrix type, ex from float to uchar depending on type
// converts matrix type, ex from float to uchar depending on type
void
enqueueConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
type
,
double
a
=
1
,
double
b
=
0
);
void
enqueueConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
type
,
double
a
=
1
,
double
b
=
0
);
private
:
private
:
void
create
();
void
create
();
void
release
();
void
release
();
struct
Impl
;
struct
Impl
;
Impl
*
impl
;
Impl
*
impl
;
friend
struct
StreamAccessor
;
friend
struct
StreamAccessor
;
};
};
...
@@ -348,7 +348,7 @@ namespace cv
...
@@ -348,7 +348,7 @@ namespace cv
//! Acync version
//! Acync version
void
operator
()
(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
,
const
CudaStream
&
stream
);
void
operator
()
(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
,
const
CudaStream
&
stream
);
//! Some heuristics that tries to estmate
//! Some heuristics that tries to estmate
// if current GPU will be faster then CPU in this algorithm.
// if current GPU will be faster then CPU in this algorithm.
// It queries current active device.
// It queries current active device.
static
bool
checkIfGpuCallReasonable
();
static
bool
checkIfGpuCallReasonable
();
...
@@ -356,11 +356,11 @@ namespace cv
...
@@ -356,11 +356,11 @@ namespace cv
int
ndisp
;
int
ndisp
;
int
winSize
;
int
winSize
;
int
preset
;
int
preset
;
// If avergeTexThreshold == 0 => post procesing is disabled
// If avergeTexThreshold == 0 => post procesing is disabled
// If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
// If avergeTexThreshold != 0 then disparity is set 0 in each point (x,y) where for left image
// SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
// SumOfHorizontalGradiensInWindow(x, y, winSize) < (winSize * winSize) * avergeTexThreshold
// i.e. input left image is low textured.
// i.e. input left image is low textured.
float
avergeTexThreshold
;
float
avergeTexThreshold
;
private
:
private
:
GpuMat
minSSD
,
leBuf
,
riBuf
;
GpuMat
minSSD
,
leBuf
,
riBuf
;
...
@@ -369,4 +369,4 @@ namespace cv
...
@@ -369,4 +369,4 @@ namespace cv
}
}
#include "opencv2/gpu/matrix_operations.hpp"
#include "opencv2/gpu/matrix_operations.hpp"
#endif
/* __OPENCV_GPU_HPP__ */
#endif
/* __OPENCV_GPU_HPP__ */
\ No newline at end of file
modules/gpu/src/cuda/cuda_shared.hpp
View file @
769564c1
...
@@ -61,12 +61,12 @@ namespace cv
...
@@ -61,12 +61,12 @@ namespace cv
{
{
static
inline
int
divUp
(
int
a
,
int
b
)
{
return
(
a
%
b
==
0
)
?
a
/
b
:
a
/
b
+
1
;
}
static
inline
int
divUp
(
int
a
,
int
b
)
{
return
(
a
%
b
==
0
)
?
a
/
b
:
a
/
b
+
1
;
}
extern
"C"
void
copy_to_with_mask
(
const
DevMem2D
&
mat_src
,
const
DevMem2D
&
mat_dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
);
extern
"C"
void
copy_to_with_mask
(
const
DevMem2D
&
mat_src
,
const
DevMem2D
&
mat_dst
,
int
depth
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
extern
"C"
void
set_to_without_mask
(
const
DevMem2D
&
mat
,
int
depth
,
const
double
*
scalar
,
int
channels
);
extern
"C"
void
set_to_without_mask
(
const
DevMem2D
&
mat
,
int
depth
,
const
double
*
scalar
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
extern
"C"
void
set_to_with_mask
(
const
DevMem2D
&
mat
,
int
depth
,
const
double
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
);
extern
"C"
void
set_to_with_mask
(
const
DevMem2D
&
mat
,
int
depth
,
const
double
*
scalar
,
const
DevMem2D
&
mask
,
int
channels
,
const
cudaStream_t
&
stream
=
0
);
extern
"C"
void
convert_to
(
const
DevMem2D
&
src
,
int
sdepth
,
DevMem2D
dst
,
int
ddepth
,
size_t
width
,
size_t
height
,
double
alpha
,
double
beta
);
extern
"C"
void
convert_to
(
const
DevMem2D
&
src
,
int
sdepth
,
DevMem2D
dst
,
int
ddepth
,
size_t
width
,
size_t
height
,
double
alpha
,
double
beta
,
const
cudaStream_t
&
stream
=
0
);
}
}
}
}
}
}
...
...
modules/gpu/src/cuda/matrix_operations.cu
View file @
769564c1
...
@@ -42,7 +42,6 @@
...
@@ -42,7 +42,6 @@
#include <stddef.h>
#include <stddef.h>
#include <stdio.h>
#include <stdio.h>
//#include <iostream>
#include "cuda_shared.hpp"
#include "cuda_shared.hpp"
#include "cuda_runtime.h"
#include "cuda_runtime.h"
...
@@ -239,19 +238,27 @@ namespace cv
...
@@ -239,19 +238,27 @@ namespace cv
////////////////////////////////// CopyTo /////////////////////////////////
////////////////////////////////// CopyTo /////////////////////////////////
///////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels);
typedef void (*CopyToFunc)(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels
, const cudaStream_t & stream
);
template<typename T>
template<typename T>
void copy_to_with_mask_run(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels)
void copy_to_with_mask_run(const DevMem2D& mat_src, const DevMem2D& mat_dst, const DevMem2D& mask, int channels
, const cudaStream_t & stream
)
{
{
dim3 threadsPerBlock(16,16, 1);
dim3 threadsPerBlock(16,16, 1);
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
dim3 numBlocks ( divUp(mat_src.cols * channels , threadsPerBlock.x) , divUp(mat_src.rows , threadsPerBlock.y), 1);
::mat_operators::kernel_copy_to_with_mask<T><<<numBlocks,threadsPerBlock>>>
if (stream == 0)
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
{
cudaSafeCall ( cudaThreadSynchronize() );
::mat_operators::kernel_copy_to_with_mask<T><<<numBlocks,threadsPerBlock>>>
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
cudaSafeCall ( cudaThreadSynchronize() );
}
else
{
::mat_operators::kernel_copy_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>
((T*)mat_src.ptr, (T*)mat_dst.ptr, (unsigned char*)mask.ptr, mat_src.cols, mat_src.rows, mat_src.step, mask.step, channels);
}
}
}
extern "C" void copy_to_with_mask(const DevMem2D& mat_src, const DevMem2D& mat_dst, int depth, const DevMem2D& mask, int channels)
extern "C" void copy_to_with_mask(const DevMem2D& mat_src, const DevMem2D& mat_dst, int depth, const DevMem2D& mask, int channels
, const cudaStream_t & stream
)
{
{
static CopyToFunc tab[8] =
static CopyToFunc tab[8] =
{
{
...
@@ -269,7 +276,7 @@ namespace cv
...
@@ -269,7 +276,7 @@ namespace cv
if (func == 0) cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
if (func == 0) cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
func(mat_src, mat_dst, mask, channels);
func(mat_src, mat_dst, mask, channels
, stream
);
}
}
...
@@ -277,28 +284,43 @@ namespace cv
...
@@ -277,28 +284,43 @@ namespace cv
////////////////////////////////// SetTo //////////////////////////////////
////////////////////////////////// SetTo //////////////////////////////////
///////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels);
typedef void (*SetToFunc_with_mask)(const DevMem2D& mat, const DevMem2D& mask, int channels
, const cudaStream_t & stream
);
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels);
typedef void (*SetToFunc_without_mask)(const DevMem2D& mat, int channels
, const cudaStream_t & stream
);
template <typename T>
template <typename T>
void set_to_with_mask_run(const DevMem2D& mat, const DevMem2D& mask, int channels)
void set_to_with_mask_run(const DevMem2D& mat, const DevMem2D& mask, int channels
, const cudaStream_t & stream
)
{
{
dim3 threadsPerBlock(32, 8, 1);
dim3 threadsPerBlock(32, 8, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
::mat_operators::kernel_set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
if (stream == 0)
cudaSafeCall ( cudaThreadSynchronize() );
{
::mat_operators::kernel_set_to_with_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
cudaSafeCall ( cudaThreadSynchronize() );
}
else
{
::mat_operators::kernel_set_to_with_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, (unsigned char *)mask.ptr, mat.cols, mat.rows, mat.step, channels, mask.step);
}
}
}
template <typename T>
template <typename T>
void set_to_without_mask_run(const DevMem2D& mat, int channels)
void set_to_without_mask_run(const DevMem2D& mat, int channels
, const cudaStream_t & stream
)
{
{
dim3 threadsPerBlock(32, 8, 1);
dim3 threadsPerBlock(32, 8, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
dim3 numBlocks (mat.cols * channels / threadsPerBlock.x + 1, mat.rows / threadsPerBlock.y + 1, 1);
::mat_operators::kernel_set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
if (stream == 0)
cudaSafeCall ( cudaThreadSynchronize() );
{
::mat_operators::kernel_set_to_without_mask<T><<<numBlocks,threadsPerBlock>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
cudaSafeCall ( cudaThreadSynchronize() );
}
else
{
::mat_operators::kernel_set_to_without_mask<T><<<numBlocks,threadsPerBlock, 0, stream>>>((T*)mat.ptr, mat.cols, mat.rows, mat.step, channels);
}
}
}
extern "C" void set_to_without_mask(const DevMem2D& mat, int depth, const double * scalar, int channels)
extern "C" void set_to_without_mask(const DevMem2D& mat, int depth, const double * scalar, int channels
, const cudaStream_t & stream
)
{
{
double data[4];
double data[4];
data[0] = scalar[0];
data[0] = scalar[0];
...
@@ -323,11 +345,11 @@ namespace cv
...
@@ -323,11 +345,11 @@ namespace cv
if (func == 0) cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
if (func == 0) cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
func(mat, channels);
func(mat, channels
, stream
);
}
}
extern "C" void set_to_with_mask(const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels)
extern "C" void set_to_with_mask(const DevMem2D& mat, int depth, const double * scalar, const DevMem2D& mask, int channels
, const cudaStream_t & stream
)
{
{
double data[4];
double data[4];
data[0] = scalar[0];
data[0] = scalar[0];
...
@@ -352,7 +374,7 @@ namespace cv
...
@@ -352,7 +374,7 @@ namespace cv
if (func == 0) cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
if (func == 0) cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
func(mat, mask, channels);
func(mat, mask, channels
, stream
);
}
}
...
@@ -360,22 +382,27 @@ namespace cv
...
@@ -360,22 +382,27 @@ namespace cv
//////////////////////////////// ConvertTo ////////////////////////////////
//////////////////////////////// ConvertTo ////////////////////////////////
///////////////////////////////////////////////////////////////////////////
///////////////////////////////////////////////////////////////////////////
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta);
typedef void (*CvtFunc)(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta
, const cudaStream_t & stream
);
template<typename T, typename DT>
template<typename T, typename DT>
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta)
void cvt_(const DevMem2D& src, DevMem2D& dst, size_t width, size_t height, double alpha, double beta
, const cudaStream_t & stream
)
{
{
const int shift = ::mat_operators::ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
const int shift = ::mat_operators::ReadWriteTraits<T, DT, sizeof(T), sizeof(DT)>::shift;
dim3 block(32, 8);
dim3 block(32, 8);
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
dim3 grid(divUp(width, block.x * shift), divUp(height, block.y));
if (stream == 0)
::mat_operators::kernel_convert_to<T, DT><<<grid, block>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
{
::mat_operators::kernel_convert_to<T, DT><<<grid, block>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
cudaSafeCall( cudaThreadSynchronize() );
cudaSafeCall( cudaThreadSynchronize() );
}
}
else
extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, size_t width, size_t height, double alpha, double beta)
{
::mat_operators::kernel_convert_to<T, DT><<<grid, block, 0, stream>>>(src.ptr, src.step, dst.ptr, dst.step, width, height, alpha, beta);
}
}
extern "C" void convert_to(const DevMem2D& src, int sdepth, DevMem2D dst, int ddepth, size_t width, size_t height, double alpha, double beta, const cudaStream_t & stream)
{
{
static CvtFunc tab[8][8] =
static CvtFunc tab[8][8] =
{
{
...
@@ -406,7 +433,7 @@ namespace cv
...
@@ -406,7 +433,7 @@ namespace cv
CvtFunc func = tab[sdepth][ddepth];
CvtFunc func = tab[sdepth][ddepth];
if (func == 0)
if (func == 0)
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
cv::gpu::error("Unsupported convert operation", __FILE__, __LINE__);
func(src, dst, width, height, alpha, beta);
func(src, dst, width, height, alpha, beta
, stream
);
}
}
} // namespace impl
} // namespace impl
} // namespace gpu
} // namespace gpu
...
...
modules/gpu/src/cudastream.cpp
View file @
769564c1
...
@@ -74,6 +74,7 @@ struct CudaStream::Impl
...
@@ -74,6 +74,7 @@ struct CudaStream::Impl
cudaStream_t
stream
;
cudaStream_t
stream
;
int
ref_counter
;
int
ref_counter
;
};
};
namespace
namespace
{
{
template
<
class
S
,
class
D
>
void
devcopy
(
const
S
&
src
,
D
&
dst
,
cudaStream_t
s
,
cudaMemcpyKind
k
)
template
<
class
S
,
class
D
>
void
devcopy
(
const
S
&
src
,
D
&
dst
,
cudaStream_t
s
,
cudaMemcpyKind
k
)
...
@@ -147,7 +148,7 @@ void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst)
...
@@ -147,7 +148,7 @@ void cv::gpu::CudaStream::enqueueDownload(const GpuMat& src, Mat& dst)
{
{
// if not -> allocation will be done, but after that dst will not point to page locked memory
// if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert
(
src
.
cols
==
dst
.
cols
&&
src
.
rows
==
dst
.
rows
&&
src
.
type
()
==
dst
.
type
()
)
CV_Assert
(
src
.
cols
==
dst
.
cols
&&
src
.
rows
==
dst
.
rows
&&
src
.
type
()
==
dst
.
type
()
)
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToHost
);
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToHost
);
}
}
void
cv
::
gpu
::
CudaStream
::
enqueueDownload
(
const
GpuMat
&
src
,
MatPL
&
dst
)
{
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToHost
);
}
void
cv
::
gpu
::
CudaStream
::
enqueueDownload
(
const
GpuMat
&
src
,
MatPL
&
dst
)
{
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToHost
);
}
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment