Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
76f4b02b
Commit
76f4b02b
authored
Apr 16, 2013
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
refactored gpu::Stream (minor fixes)
parent
a52af84d
Hide whitespace changes
Inline
Side-by-side
Showing
16 changed files
with
227 additions
and
411 deletions
+227
-411
gpu.hpp
modules/core/include/opencv2/core/gpu.hpp
+37
-35
gpu.inl.hpp
modules/core/include/opencv2/core/gpu.inl.hpp
+46
-3
gpu_stream.cpp
modules/core/src/gpu_stream.cpp
+72
-158
arithm.cpp
modules/gpuarithm/src/arithm.cpp
+6
-20
gmg.cpp
modules/gpubgsegm/src/gmg.cpp
+1
-4
brute_force_matcher.cpp
modules/gpufeatures2d/src/brute_force_matcher.cpp
+4
-16
filtering.cpp
modules/gpufilters/src/filtering.cpp
+4
-16
match_template.cpp
modules/gpuimgproc/src/match_template.cpp
+5
-20
farneback.cpp
modules/gpuoptflow/src/farneback.cpp
+9
-9
disparity_bilateral_filter.cpp
modules/gpustereo/src/disparity_bilateral_filter.cpp
+1
-4
stereobp.cpp
modules/gpustereo/src/stereobp.cpp
+10
-38
stereocsbp.cpp
modules/gpustereo/src/stereocsbp.cpp
+14
-38
pyramids.cpp
modules/gpuwarping/src/pyramids.cpp
+2
-8
resize.cpp
modules/gpuwarping/src/resize.cpp
+1
-4
detector_cuda.cpp
modules/softcascade/src/detector_cuda.cpp
+7
-30
gpu-basics-similarity.cpp
..._code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
+8
-8
No files found.
modules/core/include/opencv2/core/gpu.hpp
View file @
76f4b02b
...
@@ -51,8 +51,7 @@
...
@@ -51,8 +51,7 @@
#include "opencv2/core.hpp"
#include "opencv2/core.hpp"
#include "opencv2/core/gpu_types.hpp"
#include "opencv2/core/gpu_types.hpp"
namespace
cv
{
namespace
gpu
namespace
cv
{
namespace
gpu
{
{
//////////////////////////////// GpuMat ///////////////////////////////
//////////////////////////////// GpuMat ///////////////////////////////
...
@@ -337,59 +336,56 @@ CV_EXPORTS void registerPageLocked(Mat& m);
...
@@ -337,59 +336,56 @@ CV_EXPORTS void registerPageLocked(Mat& m);
//! unmaps the memory of matrix m, and makes it pageable again
//! unmaps the memory of matrix m, and makes it pageable again
CV_EXPORTS
void
unregisterPageLocked
(
Mat
&
m
);
CV_EXPORTS
void
unregisterPageLocked
(
Mat
&
m
);
//////////////////////////////// CudaStream ////////////////////////////////
///////////////////////////////// Stream //////////////////////////////////
// Encapculates Cuda Stream. Provides interface for async coping.
// Encapculates Cuda Stream. Provides interface for async coping.
// Passed to each function that supports async kernel execution.
// Passed to each function that supports async kernel execution.
// Reference counting is enabled
// Reference counting is enabled
.
class
CV_EXPORTS
Stream
class
CV_EXPORTS
Stream
{
{
typedef
void
(
Stream
::*
bool_type
)()
const
;
void
this_type_does_not_support_comparisons
()
const
{}
public
:
public
:
typedef
void
(
*
StreamCallback
)(
int
status
,
void
*
userData
);
//! creates a new asynchronous stream
Stream
();
Stream
();
~
Stream
();
Stream
(
const
Stream
&
);
//! queries an asynchronous stream for completion status
Stream
&
operator
=
(
const
Stream
&
)
;
bool
queryIfComplete
()
const
;
bool
queryIfComplete
();
//! waits for stream tasks to complete
void
waitForCompletion
();
void
waitForCompletion
();
//! downloads asynchronously
//! adds a callback to be called on the host after all currently enqueued items in the stream have completed
// Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its subMat)
void
enqueueHostCallback
(
StreamCallback
callback
,
void
*
userData
);
void
enqueueDownload
(
const
GpuMat
&
src
,
CudaMem
&
dst
);
void
enqueueDownload
(
const
GpuMat
&
src
,
Mat
&
dst
);
//! uploads asynchronously
//! return Stream object for default CUDA stream
// Warning! cv::Mat must point to page locked memory (i.e. to CudaMem data or to its ROI)
static
Stream
&
Null
();
void
enqueueUpload
(
const
CudaMem
&
src
,
GpuMat
&
dst
);
void
enqueueUpload
(
const
Mat
&
src
,
GpuMat
&
dst
);
//!
copy asynchronously
//!
returns true if stream object is not default (!= 0)
void
enqueueCopy
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
;
operator
bool_type
()
const
;
//! memory set asynchronously
// obsolete methods
void
enqueueMemSet
(
GpuMat
&
src
,
Scalar
val
);
void
enqueueMemSet
(
GpuMat
&
src
,
Scalar
val
,
const
GpuMat
&
mask
);
//! converts matrix type, ex from float to uchar depending on type
void
enqueueDownload
(
const
GpuMat
&
src
,
OutputArray
dst
);
void
enqueueConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
dtype
,
double
a
=
1
,
double
b
=
0
);
//! adds a callback to be called on the host after all currently enqueued items in the stream have completed
void
enqueueUpload
(
InputArray
src
,
GpuMat
&
dst
);
typedef
void
(
*
StreamCallback
)(
Stream
&
stream
,
int
status
,
void
*
userData
);
void
enqueueHostCallback
(
StreamCallback
callback
,
void
*
userData
);
static
Stream
&
Null
(
);
void
enqueueCopy
(
const
GpuMat
&
src
,
OutputArray
dst
);
operator
bool
()
const
;
void
enqueueMemSet
(
GpuMat
&
src
,
Scalar
val
);
void
enqueueMemSet
(
GpuMat
&
src
,
Scalar
val
,
InputArray
mask
);
private
:
void
enqueueConvert
(
const
GpuMat
&
src
,
OutputArray
dst
,
int
dtype
,
double
alpha
=
1.0
,
double
beta
=
0.0
);
struct
Impl
;
explicit
Stream
(
Impl
*
impl
);
class
Impl
;
void
create
();
void
release
();
Impl
*
impl
;
private
:
Ptr
<
Impl
>
impl_
;
Stream
(
const
Ptr
<
Impl
>&
impl
);
friend
struct
StreamAccessor
;
friend
struct
StreamAccessor
;
};
};
...
@@ -498,7 +494,13 @@ CV_EXPORTS void printCudaDeviceInfo(int device);
...
@@ -498,7 +494,13 @@ CV_EXPORTS void printCudaDeviceInfo(int device);
CV_EXPORTS
void
printShortCudaDeviceInfo
(
int
device
);
CV_EXPORTS
void
printShortCudaDeviceInfo
(
int
device
);
}}
// cv::gpu
}}
// namespace cv { namespace gpu {
namespace
cv
{
template
<>
CV_EXPORTS
void
Ptr
<
cv
::
gpu
::
Stream
::
Impl
>::
delete_obj
();
}
#include "opencv2/core/gpu.inl.hpp"
#include "opencv2/core/gpu.inl.hpp"
...
...
modules/core/include/opencv2/core/gpu.inl.hpp
View file @
76f4b02b
...
@@ -46,8 +46,7 @@
...
@@ -46,8 +46,7 @@
#include "opencv2/core/gpu.hpp"
#include "opencv2/core/gpu.hpp"
namespace
cv
{
namespace
gpu
namespace
cv
{
namespace
gpu
{
{
//////////////////////////////// GpuMat ///////////////////////////////
//////////////////////////////// GpuMat ///////////////////////////////
...
@@ -524,7 +523,51 @@ void swap(CudaMem& a, CudaMem& b)
...
@@ -524,7 +523,51 @@ void swap(CudaMem& a, CudaMem& b)
a
.
swap
(
b
);
a
.
swap
(
b
);
}
}
}}
// namespace cv { namespace gpu
//////////////////////////////// Stream ///////////////////////////////
inline
void
Stream
::
enqueueDownload
(
const
GpuMat
&
src
,
OutputArray
dst
)
{
src
.
download
(
dst
,
*
this
);
}
inline
void
Stream
::
enqueueUpload
(
InputArray
src
,
GpuMat
&
dst
)
{
dst
.
upload
(
src
,
*
this
);
}
inline
void
Stream
::
enqueueCopy
(
const
GpuMat
&
src
,
OutputArray
dst
)
{
src
.
copyTo
(
dst
,
*
this
);
}
inline
void
Stream
::
enqueueMemSet
(
GpuMat
&
src
,
Scalar
val
)
{
src
.
setTo
(
val
,
*
this
);
}
inline
void
Stream
::
enqueueMemSet
(
GpuMat
&
src
,
Scalar
val
,
InputArray
mask
)
{
src
.
setTo
(
val
,
mask
,
*
this
);
}
inline
void
Stream
::
enqueueConvert
(
const
GpuMat
&
src
,
OutputArray
dst
,
int
dtype
,
double
alpha
,
double
beta
)
{
src
.
convertTo
(
dst
,
dtype
,
alpha
,
beta
,
*
this
);
}
inline
Stream
::
Stream
(
const
Ptr
<
Impl
>&
impl
)
:
impl_
(
impl
)
{
}
}}
// namespace cv { namespace gpu {
//////////////////////////////// Mat ////////////////////////////////
//////////////////////////////// Mat ////////////////////////////////
...
...
modules/core/src/gpu_stream.cpp
View file @
76f4b02b
...
@@ -45,170 +45,103 @@
...
@@ -45,170 +45,103 @@
using
namespace
cv
;
using
namespace
cv
;
using
namespace
cv
::
gpu
;
using
namespace
cv
::
gpu
;
#if
!defined (HAVE_CUDA)
#if
ndef HAVE_CUDA
cv
::
gpu
::
Stream
::
Stream
()
{
throw_no_cuda
();
}
class
cv
::
gpu
::
Stream
::
Impl
cv
::
gpu
::
Stream
::~
Stream
()
{}
cv
::
gpu
::
Stream
::
Stream
(
const
Stream
&
)
{
throw_no_cuda
();
}
Stream
&
cv
::
gpu
::
Stream
::
operator
=
(
const
Stream
&
)
{
throw_no_cuda
();
return
*
this
;
}
bool
cv
::
gpu
::
Stream
::
queryIfComplete
()
{
throw_no_cuda
();
return
false
;
}
void
cv
::
gpu
::
Stream
::
waitForCompletion
()
{
throw_no_cuda
();
}
void
cv
::
gpu
::
Stream
::
enqueueDownload
(
const
GpuMat
&
,
Mat
&
)
{
throw_no_cuda
();
}
void
cv
::
gpu
::
Stream
::
enqueueDownload
(
const
GpuMat
&
,
CudaMem
&
)
{
throw_no_cuda
();
}
void
cv
::
gpu
::
Stream
::
enqueueUpload
(
const
CudaMem
&
,
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
gpu
::
Stream
::
enqueueUpload
(
const
Mat
&
,
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
gpu
::
Stream
::
enqueueCopy
(
const
GpuMat
&
,
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
gpu
::
Stream
::
enqueueMemSet
(
GpuMat
&
,
Scalar
)
{
throw_no_cuda
();
}
void
cv
::
gpu
::
Stream
::
enqueueMemSet
(
GpuMat
&
,
Scalar
,
const
GpuMat
&
)
{
throw_no_cuda
();
}
void
cv
::
gpu
::
Stream
::
enqueueConvert
(
const
GpuMat
&
,
GpuMat
&
,
int
,
double
,
double
)
{
throw_no_cuda
();
}
void
cv
::
gpu
::
Stream
::
enqueueHostCallback
(
StreamCallback
,
void
*
)
{
throw_no_cuda
();
}
Stream
&
cv
::
gpu
::
Stream
::
Null
()
{
throw_no_cuda
();
static
Stream
s
;
return
s
;
}
cv
::
gpu
::
Stream
::
operator
bool
()
const
{
throw_no_cuda
();
return
false
;
}
cv
::
gpu
::
Stream
::
Stream
(
Impl
*
)
{
throw_no_cuda
();
}
void
cv
::
gpu
::
Stream
::
create
()
{
throw_no_cuda
();
}
void
cv
::
gpu
::
Stream
::
release
()
{
throw_no_cuda
();
}
#else
/* !defined (HAVE_CUDA) */
struct
Stream
::
Impl
{
{
static
cudaStream_t
getStream
(
const
Impl
*
impl
)
public
:
Impl
(
void
*
ptr
=
0
)
{
{
return
impl
?
impl
->
stream
:
0
;
(
void
)
ptr
;
throw_no_cuda
();
}
}
};
#else
class
cv
::
gpu
::
Stream
::
Impl
{
public
:
cudaStream_t
stream
;
cudaStream_t
stream
;
int
ref_counter
;
Impl
();
Impl
(
cudaStream_t
stream
);
~
Impl
();
};
};
c
udaStream_t
cv
::
gpu
::
StreamAccessor
::
getStream
(
const
Stream
&
stream
)
c
v
::
gpu
::
Stream
::
Impl
::
Impl
()
:
stream
(
0
)
{
{
return
Stream
::
Impl
::
getStream
(
stream
.
impl
);
cudaSafeCall
(
cudaStreamCreate
(
&
stream
)
);
}
}
cv
::
gpu
::
Stream
::
Stream
()
:
impl
(
0
)
cv
::
gpu
::
Stream
::
Impl
::
Impl
(
cudaStream_t
stream_
)
:
stream
(
stream_
)
{
{
create
();
}
}
cv
::
gpu
::
Stream
::
~
Stream
()
cv
::
gpu
::
Stream
::
Impl
::~
Impl
()
{
{
release
();
if
(
stream
)
cudaStreamDestroy
(
stream
);
}
}
c
v
::
gpu
::
Stream
::
Stream
(
const
Stream
&
stream
)
:
impl
(
stream
.
impl
)
c
udaStream_t
cv
::
gpu
::
StreamAccessor
::
getStream
(
const
Stream
&
stream
)
{
{
if
(
impl
)
return
stream
.
impl_
->
stream
;
CV_XADD
(
&
impl
->
ref_counter
,
1
);
}
}
Stream
&
cv
::
gpu
::
Stream
::
operator
=
(
const
Stream
&
stream
)
#endif
{
if
(
this
!=
&
stream
)
{
release
();
impl
=
stream
.
impl
;
if
(
impl
)
CV_XADD
(
&
impl
->
ref_counter
,
1
);
}
return
*
this
;
cv
::
gpu
::
Stream
::
Stream
()
{
#ifndef HAVE_CUDA
throw_no_cuda
();
#else
impl_
=
new
Impl
;
#endif
}
}
bool
cv
::
gpu
::
Stream
::
queryIfComplete
()
bool
cv
::
gpu
::
Stream
::
queryIfComplete
()
const
{
{
cudaStream_t
stream
=
Impl
::
getStream
(
impl
);
#ifndef HAVE_CUDA
cudaError_t
err
=
cudaStreamQuery
(
stream
);
throw_no_cuda
();
return
false
;
#else
cudaError_t
err
=
cudaStreamQuery
(
impl_
->
stream
);
if
(
err
==
cudaErrorNotReady
||
err
==
cudaSuccess
)
if
(
err
==
cudaErrorNotReady
||
err
==
cudaSuccess
)
return
err
==
cudaSuccess
;
return
err
==
cudaSuccess
;
cudaSafeCall
(
err
);
cudaSafeCall
(
err
);
return
false
;
return
false
;
#endif
}
}
void
cv
::
gpu
::
Stream
::
waitForCompletion
()
void
cv
::
gpu
::
Stream
::
waitForCompletion
()
{
{
cudaStream_t
stream
=
Impl
::
getStream
(
impl
);
#ifndef HAVE_CUDA
cudaSafeCall
(
cudaStreamSynchronize
(
stream
)
);
throw_no_cuda
();
}
#else
cudaSafeCall
(
cudaStreamSynchronize
(
impl_
->
stream
)
);
void
cv
::
gpu
::
Stream
::
enqueueDownload
(
const
GpuMat
&
src
,
Mat
&
dst
)
#endif
{
// if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert
(
src
.
size
()
==
dst
.
size
()
&&
src
.
type
()
==
dst
.
type
()
);
cudaStream_t
stream
=
Impl
::
getStream
(
impl
);
size_t
bwidth
=
src
.
cols
*
src
.
elemSize
();
cudaSafeCall
(
cudaMemcpy2DAsync
(
dst
.
data
,
dst
.
step
,
src
.
data
,
src
.
step
,
bwidth
,
src
.
rows
,
cudaMemcpyDeviceToHost
,
stream
)
);
}
void
cv
::
gpu
::
Stream
::
enqueueDownload
(
const
GpuMat
&
src
,
CudaMem
&
dst
)
{
dst
.
create
(
src
.
size
(),
src
.
type
());
cudaStream_t
stream
=
Impl
::
getStream
(
impl
);
size_t
bwidth
=
src
.
cols
*
src
.
elemSize
();
cudaSafeCall
(
cudaMemcpy2DAsync
(
dst
.
data
,
dst
.
step
,
src
.
data
,
src
.
step
,
bwidth
,
src
.
rows
,
cudaMemcpyDeviceToHost
,
stream
)
);
}
void
cv
::
gpu
::
Stream
::
enqueueUpload
(
const
CudaMem
&
src
,
GpuMat
&
dst
)
{
dst
.
create
(
src
.
size
(),
src
.
type
());
cudaStream_t
stream
=
Impl
::
getStream
(
impl
);
size_t
bwidth
=
src
.
cols
*
src
.
elemSize
();
cudaSafeCall
(
cudaMemcpy2DAsync
(
dst
.
data
,
dst
.
step
,
src
.
data
,
src
.
step
,
bwidth
,
src
.
rows
,
cudaMemcpyHostToDevice
,
stream
)
);
}
void
cv
::
gpu
::
Stream
::
enqueueUpload
(
const
Mat
&
src
,
GpuMat
&
dst
)
{
dst
.
create
(
src
.
size
(),
src
.
type
());
cudaStream_t
stream
=
Impl
::
getStream
(
impl
);
size_t
bwidth
=
src
.
cols
*
src
.
elemSize
();
cudaSafeCall
(
cudaMemcpy2DAsync
(
dst
.
data
,
dst
.
step
,
src
.
data
,
src
.
step
,
bwidth
,
src
.
rows
,
cudaMemcpyHostToDevice
,
stream
)
);
}
void
cv
::
gpu
::
Stream
::
enqueueCopy
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
dst
.
create
(
src
.
size
(),
src
.
type
());
cudaStream_t
stream
=
Impl
::
getStream
(
impl
);
size_t
bwidth
=
src
.
cols
*
src
.
elemSize
();
cudaSafeCall
(
cudaMemcpy2DAsync
(
dst
.
data
,
dst
.
step
,
src
.
data
,
src
.
step
,
bwidth
,
src
.
rows
,
cudaMemcpyDeviceToDevice
,
stream
)
);
}
void
cv
::
gpu
::
Stream
::
enqueueMemSet
(
GpuMat
&
src
,
Scalar
val
)
{
src
.
setTo
(
val
,
*
this
);
}
void
cv
::
gpu
::
Stream
::
enqueueMemSet
(
GpuMat
&
src
,
Scalar
val
,
const
GpuMat
&
mask
)
{
src
.
setTo
(
val
,
mask
,
*
this
);
}
void
cv
::
gpu
::
Stream
::
enqueueConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
dtype
,
double
alpha
,
double
beta
)
{
src
.
convertTo
(
dst
,
dtype
,
alpha
,
beta
,
*
this
);
}
}
#if
CUDART_VERSION >= 5000
#if
defined(HAVE_CUDA) && (CUDART_VERSION >= 5000)
namespace
namespace
{
{
struct
CallbackData
struct
CallbackData
{
{
cv
::
gpu
::
Stream
::
StreamCallback
callback
;
Stream
::
StreamCallback
callback
;
void
*
userData
;
void
*
userData
;
Stream
stream
;
CallbackData
(
Stream
::
StreamCallback
callback_
,
void
*
userData_
)
:
callback
(
callback_
),
userData
(
userData_
)
{}
};
};
void
CUDART_CB
cudaStreamCallback
(
cudaStream_t
,
cudaError_t
status
,
void
*
userData
)
void
CUDART_CB
cudaStreamCallback
(
cudaStream_t
,
cudaError_t
status
,
void
*
userData
)
{
{
CallbackData
*
data
=
reinterpret_cast
<
CallbackData
*>
(
userData
);
CallbackData
*
data
=
reinterpret_cast
<
CallbackData
*>
(
userData
);
data
->
callback
(
data
->
stream
,
static_cast
<
int
>
(
status
),
data
->
userData
);
data
->
callback
(
static_cast
<
int
>
(
status
),
data
->
userData
);
delete
data
;
delete
data
;
}
}
}
}
...
@@ -217,58 +150,39 @@ namespace
...
@@ -217,58 +150,39 @@ namespace
void
cv
::
gpu
::
Stream
::
enqueueHostCallback
(
StreamCallback
callback
,
void
*
userData
)
void
cv
::
gpu
::
Stream
::
enqueueHostCallback
(
StreamCallback
callback
,
void
*
userData
)
{
{
#if CUDART_VERSION >= 5000
#ifndef HAVE_CUDA
CallbackData
*
data
=
new
CallbackData
;
data
->
callback
=
callback
;
data
->
userData
=
userData
;
data
->
stream
=
*
this
;
cudaStream_t
stream
=
Impl
::
getStream
(
impl
);
cudaSafeCall
(
cudaStreamAddCallback
(
stream
,
cudaStreamCallback
,
data
,
0
)
);
#else
(
void
)
callback
;
(
void
)
callback
;
(
void
)
userData
;
(
void
)
userData
;
CV_Error
(
CV_StsNotImplemented
,
"This function requires CUDA 5.0"
);
throw_no_cuda
();
#else
#if CUDART_VERSION < 5000
(
void
)
callback
;
(
void
)
userData
;
CV_Error
(
cv
::
Error
::
StsNotImplemented
,
"This function requires CUDA 5.0"
);
#else
CallbackData
*
data
=
new
CallbackData
(
callback
,
userData
);
cudaSafeCall
(
cudaStreamAddCallback
(
impl_
->
stream
,
cudaStreamCallback
,
data
,
0
)
);
#endif
#endif
#endif
}
}
cv
::
gpu
::
Stream
&
cv
::
gpu
::
Stream
::
Null
()
Stream
&
cv
::
gpu
::
Stream
::
Null
()
{
{
static
Stream
s
(
(
Impl
*
)
0
);
static
Stream
s
(
new
Impl
(
0
)
);
return
s
;
return
s
;
}
}
cv
::
gpu
::
Stream
::
operator
bool
()
const
cv
::
gpu
::
Stream
::
operator
bool_type
()
const
{
return
impl
&&
impl
->
stream
;
}
cv
::
gpu
::
Stream
::
Stream
(
Impl
*
impl_
)
:
impl
(
impl_
)
{
{
#ifndef HAVE_CUDA
return
0
;
#else
return
(
impl_
->
stream
!=
0
)
?
&
Stream
::
this_type_does_not_support_comparisons
:
0
;
#endif
}
}
void
cv
::
gpu
::
Stream
::
create
()
template
<>
void
cv
::
Ptr
<
Stream
::
Impl
>::
delete_obj
()
{
if
(
impl
)
release
();
cudaStream_t
stream
;
cudaSafeCall
(
cudaStreamCreate
(
&
stream
)
);
impl
=
(
Stream
::
Impl
*
)
fastMalloc
(
sizeof
(
Stream
::
Impl
));
impl
->
stream
=
stream
;
impl
->
ref_counter
=
1
;
}
void
cv
::
gpu
::
Stream
::
release
()
{
{
if
(
impl
&&
CV_XADD
(
&
impl
->
ref_counter
,
-
1
)
==
1
)
if
(
obj
)
delete
obj
;
{
cudaSafeCall
(
cudaStreamDestroy
(
impl
->
stream
)
);
cv
::
fastFree
(
impl
);
}
}
}
#endif
/* !defined (HAVE_CUDA) */
modules/gpuarithm/src/arithm.cpp
View file @
76f4b02b
...
@@ -217,10 +217,7 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
...
@@ -217,10 +217,7 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
{
{
if
(
src3
.
empty
())
if
(
src3
.
empty
())
{
{
if
(
stream
)
dst
.
setTo
(
Scalar
::
all
(
0
),
stream
);
stream
.
enqueueMemSet
(
dst
,
Scalar
::
all
(
0
));
else
dst
.
setTo
(
Scalar
::
all
(
0
));
}
}
else
else
{
{
...
@@ -230,10 +227,7 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
...
@@ -230,10 +227,7 @@ void cv::gpu::gemm(const GpuMat& src1, const GpuMat& src2, double alpha, const G
}
}
else
else
{
{
if
(
stream
)
src3
.
copyTo
(
dst
,
stream
);
stream
.
enqueueCopy
(
src3
,
dst
);
else
src3
.
copyTo
(
dst
);
}
}
}
}
}
}
...
@@ -336,18 +330,13 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
...
@@ -336,18 +330,13 @@ void cv::gpu::integralBuffered(const GpuMat& src, GpuMat& sum, GpuMat& buffer, S
cv
::
gpu
::
cudev
::
imgproc
::
shfl_integral_gpu
(
src
,
buffer
,
stream
);
cv
::
gpu
::
cudev
::
imgproc
::
shfl_integral_gpu
(
src
,
buffer
,
stream
);
sum
.
create
(
src
.
rows
+
1
,
src
.
cols
+
1
,
CV_32SC1
);
sum
.
create
(
src
.
rows
+
1
,
src
.
cols
+
1
,
CV_32SC1
);
if
(
s
)
s
.
enqueueMemSet
(
sum
,
Scalar
::
all
(
0
));
sum
.
setTo
(
Scalar
::
all
(
0
),
s
);
else
sum
.
setTo
(
Scalar
::
all
(
0
));
GpuMat
inner
=
sum
(
Rect
(
1
,
1
,
src
.
cols
,
src
.
rows
));
GpuMat
inner
=
sum
(
Rect
(
1
,
1
,
src
.
cols
,
src
.
rows
));
GpuMat
res
=
buffer
(
Rect
(
0
,
0
,
src
.
cols
,
src
.
rows
));
GpuMat
res
=
buffer
(
Rect
(
0
,
0
,
src
.
cols
,
src
.
rows
));
if
(
s
)
res
.
copyTo
(
inner
,
s
);
s
.
enqueueCopy
(
res
,
inner
);
else
res
.
copyTo
(
inner
);
}
}
else
else
{
{
...
@@ -720,10 +709,7 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
...
@@ -720,10 +709,7 @@ void cv::gpu::convolve(const GpuMat& image, const GpuMat& templ, GpuMat& result,
GpuMat
result_block
(
result_roi_size
,
result_data
.
type
(),
GpuMat
result_block
(
result_roi_size
,
result_data
.
type
(),
result_data
.
ptr
(),
result_data
.
step
);
result_data
.
ptr
(),
result_data
.
step
);
if
(
stream
)
result_block
.
copyTo
(
result_roi
,
stream
);
stream
.
enqueueCopy
(
result_block
,
result_roi
);
else
result_block
.
copyTo
(
result_roi
);
}
}
}
}
...
...
modules/gpubgsegm/src/gmg.cpp
View file @
76f4b02b
...
@@ -134,10 +134,7 @@ void cv::gpu::GMG_GPU::operator ()(const cv::gpu::GpuMat& frame, cv::gpu::GpuMat
...
@@ -134,10 +134,7 @@ void cv::gpu::GMG_GPU::operator ()(const cv::gpu::GpuMat& frame, cv::gpu::GpuMat
initialize
(
frame
.
size
(),
0.0
f
,
frame
.
depth
()
==
CV_8U
?
255.0
f
:
frame
.
depth
()
==
CV_16U
?
std
::
numeric_limits
<
ushort
>::
max
()
:
1.0
f
);
initialize
(
frame
.
size
(),
0.0
f
,
frame
.
depth
()
==
CV_8U
?
255.0
f
:
frame
.
depth
()
==
CV_16U
?
std
::
numeric_limits
<
ushort
>::
max
()
:
1.0
f
);
fgmask
.
create
(
frameSize_
,
CV_8UC1
);
fgmask
.
create
(
frameSize_
,
CV_8UC1
);
if
(
stream
)
fgmask
.
setTo
(
cv
::
Scalar
::
all
(
0
),
stream
);
stream
.
enqueueMemSet
(
fgmask
,
cv
::
Scalar
::
all
(
0
));
else
fgmask
.
setTo
(
cv
::
Scalar
::
all
(
0
));
funcs
[
frame
.
depth
()][
frame
.
channels
()
-
1
](
frame
,
fgmask
,
colors_
,
weights_
,
nfeatures_
,
frameNum_
,
learningRate
,
updateBackgroundModel
,
cv
::
gpu
::
StreamAccessor
::
getStream
(
stream
));
funcs
[
frame
.
depth
()][
frame
.
channels
()
-
1
](
frame
,
fgmask
,
colors_
,
weights_
,
nfeatures_
,
frameNum_
,
learningRate
,
updateBackgroundModel
,
cv
::
gpu
::
StreamAccessor
::
getStream
(
stream
));
...
...
modules/gpufeatures2d/src/brute_force_matcher.cpp
View file @
76f4b02b
...
@@ -497,10 +497,7 @@ void cv::gpu::BFMatcher_GPU::knnMatchSingle(const GpuMat& query, const GpuMat& t
...
@@ -497,10 +497,7 @@ void cv::gpu::BFMatcher_GPU::knnMatchSingle(const GpuMat& query, const GpuMat& t
ensureSizeIsEnough
(
nQuery
,
nTrain
,
CV_32FC1
,
allDist
);
ensureSizeIsEnough
(
nQuery
,
nTrain
,
CV_32FC1
,
allDist
);
}
}
if
(
stream
)
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
),
stream
);
stream
.
enqueueMemSet
(
trainIdx
,
Scalar
::
all
(
-
1
));
else
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
));
caller_t
func
=
callers
[
query
.
depth
()];
caller_t
func
=
callers
[
query
.
depth
()];
CV_Assert
(
func
!=
0
);
CV_Assert
(
func
!=
0
);
...
@@ -616,10 +613,7 @@ void cv::gpu::BFMatcher_GPU::knnMatch2Collection(const GpuMat& query, const GpuM
...
@@ -616,10 +613,7 @@ void cv::gpu::BFMatcher_GPU::knnMatch2Collection(const GpuMat& query, const GpuM
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32SC2
,
imgIdx
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32SC2
,
imgIdx
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32FC2
,
distance
);
ensureSizeIsEnough
(
1
,
nQuery
,
CV_32FC2
,
distance
);
if
(
stream
)
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
),
stream
);
stream
.
enqueueMemSet
(
trainIdx
,
Scalar
::
all
(
-
1
));
else
trainIdx
.
setTo
(
Scalar
::
all
(
-
1
));
caller_t
func
=
callers
[
query
.
depth
()];
caller_t
func
=
callers
[
query
.
depth
()];
CV_Assert
(
func
!=
0
);
CV_Assert
(
func
!=
0
);
...
@@ -803,10 +797,7 @@ void cv::gpu::BFMatcher_GPU::radiusMatchSingle(const GpuMat& query, const GpuMat
...
@@ -803,10 +797,7 @@ void cv::gpu::BFMatcher_GPU::radiusMatchSingle(const GpuMat& query, const GpuMat
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nTrain
/
100
),
10
),
CV_32FC1
,
distance
);
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nTrain
/
100
),
10
),
CV_32FC1
,
distance
);
}
}
if
(
stream
)
nMatches
.
setTo
(
Scalar
::
all
(
0
),
stream
);
stream
.
enqueueMemSet
(
nMatches
,
Scalar
::
all
(
0
));
else
nMatches
.
setTo
(
Scalar
::
all
(
0
));
caller_t
func
=
callers
[
query
.
depth
()];
caller_t
func
=
callers
[
query
.
depth
()];
CV_Assert
(
func
!=
0
);
CV_Assert
(
func
!=
0
);
...
@@ -931,10 +922,7 @@ void cv::gpu::BFMatcher_GPU::radiusMatchCollection(const GpuMat& query, GpuMat&
...
@@ -931,10 +922,7 @@ void cv::gpu::BFMatcher_GPU::radiusMatchCollection(const GpuMat& query, GpuMat&
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nQuery
/
100
),
10
),
CV_32FC1
,
distance
);
ensureSizeIsEnough
(
nQuery
,
std
::
max
((
nQuery
/
100
),
10
),
CV_32FC1
,
distance
);
}
}
if
(
stream
)
nMatches
.
setTo
(
Scalar
::
all
(
0
),
stream
);
stream
.
enqueueMemSet
(
nMatches
,
Scalar
::
all
(
0
));
else
nMatches
.
setTo
(
Scalar
::
all
(
0
));
caller_t
func
=
callers
[
query
.
depth
()];
caller_t
func
=
callers
[
query
.
depth
()];
CV_Assert
(
func
!=
0
);
CV_Assert
(
func
!=
0
);
...
...
modules/gpufilters/src/filtering.cpp
View file @
76f4b02b
...
@@ -157,10 +157,7 @@ namespace
...
@@ -157,10 +157,7 @@ namespace
if
(
roi
.
size
()
!=
src_size
)
if
(
roi
.
size
()
!=
src_size
)
{
{
if
(
stream
)
dst
.
setTo
(
Scalar
::
all
(
0
),
stream
);
stream
.
enqueueMemSet
(
dst
,
Scalar
::
all
(
0
));
else
dst
.
setTo
(
Scalar
::
all
(
0
));
}
}
normalizeROI
(
roi
,
filter2D
->
ksize
,
filter2D
->
anchor
,
src_size
);
normalizeROI
(
roi
,
filter2D
->
ksize
,
filter2D
->
anchor
,
src_size
);
...
@@ -221,10 +218,7 @@ namespace
...
@@ -221,10 +218,7 @@ namespace
if
(
roi
.
size
()
!=
src_size
)
if
(
roi
.
size
()
!=
src_size
)
{
{
if
(
stream
)
dst
.
setTo
(
Scalar
::
all
(
0
),
stream
);
stream
.
enqueueMemSet
(
dst
,
Scalar
::
all
(
0
));
else
dst
.
setTo
(
Scalar
::
all
(
0
));
}
}
ensureSizeIsEnough
(
src_size
,
bufType
,
*
pbuf
);
ensureSizeIsEnough
(
src_size
,
bufType
,
*
pbuf
);
...
@@ -487,10 +481,7 @@ namespace
...
@@ -487,10 +481,7 @@ namespace
if
(
roi
.
size
()
!=
src_size
)
if
(
roi
.
size
()
!=
src_size
)
{
{
if
(
stream
)
dst
.
setTo
(
Scalar
::
all
(
0
),
stream
);
stream
.
enqueueMemSet
(
dst
,
Scalar
::
all
(
0
));
else
dst
.
setTo
(
Scalar
::
all
(
0
));
}
}
normalizeROI
(
roi
,
filter2D
->
ksize
,
filter2D
->
anchor
,
src_size
);
normalizeROI
(
roi
,
filter2D
->
ksize
,
filter2D
->
anchor
,
src_size
);
...
@@ -557,10 +548,7 @@ namespace
...
@@ -557,10 +548,7 @@ namespace
if
(
iterations
==
0
||
_kernel
.
rows
*
_kernel
.
cols
==
1
)
if
(
iterations
==
0
||
_kernel
.
rows
*
_kernel
.
cols
==
1
)
{
{
if
(
stream
)
src
.
copyTo
(
dst
,
stream
);
stream
.
enqueueCopy
(
src
,
dst
);
else
src
.
copyTo
(
dst
);
return
;
return
;
}
}
...
...
modules/gpuimgproc/src/match_template.cpp
View file @
76f4b02b
...
@@ -196,16 +196,9 @@ namespace
...
@@ -196,16 +196,9 @@ namespace
return
;
return
;
}
}
if
(
stream
)
image
.
convertTo
(
buf
.
imagef
,
CV_32F
,
stream
);
{
templ
.
convertTo
(
buf
.
templf
,
CV_32F
,
stream
);
stream
.
enqueueConvert
(
image
,
buf
.
imagef
,
CV_32F
);
stream
.
enqueueConvert
(
templ
,
buf
.
templf
,
CV_32F
);
}
else
{
image
.
convertTo
(
buf
.
imagef
,
CV_32F
);
templ
.
convertTo
(
buf
.
templf
,
CV_32F
);
}
matchTemplate_CCORR_32F
(
buf
.
imagef
,
buf
.
templf
,
result
,
buf
,
stream
);
matchTemplate_CCORR_32F
(
buf
.
imagef
,
buf
.
templf
,
result
,
buf
,
stream
);
}
}
...
@@ -317,16 +310,8 @@ namespace
...
@@ -317,16 +310,8 @@ namespace
void
matchTemplate_CCOFF_NORMED_8U
(
void
matchTemplate_CCOFF_NORMED_8U
(
const
GpuMat
&
image
,
const
GpuMat
&
templ
,
GpuMat
&
result
,
MatchTemplateBuf
&
buf
,
Stream
&
stream
)
const
GpuMat
&
image
,
const
GpuMat
&
templ
,
GpuMat
&
result
,
MatchTemplateBuf
&
buf
,
Stream
&
stream
)
{
{
if
(
stream
)
image
.
convertTo
(
buf
.
imagef
,
CV_32F
,
stream
);
{
templ
.
convertTo
(
buf
.
templf
,
CV_32F
,
stream
);
stream
.
enqueueConvert
(
image
,
buf
.
imagef
,
CV_32F
);
stream
.
enqueueConvert
(
templ
,
buf
.
templf
,
CV_32F
);
}
else
{
image
.
convertTo
(
buf
.
imagef
,
CV_32F
);
templ
.
convertTo
(
buf
.
templf
,
CV_32F
);
}
matchTemplate_CCORR_32F
(
buf
.
imagef
,
buf
.
templf
,
result
,
buf
,
stream
);
matchTemplate_CCORR_32F
(
buf
.
imagef
,
buf
.
templf
,
result
,
buf
,
stream
);
...
...
modules/gpuoptflow/src/farneback.cpp
View file @
76f4b02b
...
@@ -235,8 +235,8 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
...
@@ -235,8 +235,8 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
break
;
break
;
}
}
streams
[
0
].
enqueueConvert
(
frame0
,
frames_
[
0
],
CV_32F
);
frame0
.
convertTo
(
frames_
[
0
],
CV_32F
,
streams
[
0
]
);
streams
[
1
].
enqueueConvert
(
frame1
,
frames_
[
1
],
CV_32F
);
frame1
.
convertTo
(
frames_
[
1
],
CV_32F
,
streams
[
1
]
);
if
(
fastPyramids
)
if
(
fastPyramids
)
{
{
...
@@ -293,21 +293,21 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
...
@@ -293,21 +293,21 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
{
{
gpu
::
resize
(
flowx0
,
curFlowX
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
0
]);
gpu
::
resize
(
flowx0
,
curFlowX
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
0
]);
gpu
::
resize
(
flowy0
,
curFlowY
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
1
]);
gpu
::
resize
(
flowy0
,
curFlowY
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
1
]);
streams
[
0
].
enqueueConvert
(
curFlowX
,
curFlowX
,
curFlowX
.
depth
(),
scale
);
curFlowX
.
convertTo
(
curFlowX
,
curFlowX
.
depth
(),
scale
,
streams
[
0
]
);
streams
[
1
].
enqueueConvert
(
curFlowY
,
curFlowY
,
curFlowY
.
depth
(),
scale
);
curFlowY
.
convertTo
(
curFlowY
,
curFlowY
.
depth
(),
scale
,
streams
[
1
]
);
}
}
else
else
{
{
streams
[
0
].
enqueueMemSet
(
curFlowX
,
0
);
curFlowX
.
setTo
(
0
,
streams
[
0
]
);
streams
[
1
].
enqueueMemSet
(
curFlowY
,
0
);
curFlowY
.
setTo
(
0
,
streams
[
1
]
);
}
}
}
}
else
else
{
{
gpu
::
resize
(
prevFlowX
,
curFlowX
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
0
]);
gpu
::
resize
(
prevFlowX
,
curFlowX
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
0
]);
gpu
::
resize
(
prevFlowY
,
curFlowY
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
1
]);
gpu
::
resize
(
prevFlowY
,
curFlowY
,
Size
(
width
,
height
),
0
,
0
,
INTER_LINEAR
,
streams
[
1
]);
streams
[
0
].
enqueueConvert
(
curFlowX
,
curFlowX
,
curFlowX
.
depth
(),
1.
/
pyrScale
);
curFlowX
.
convertTo
(
curFlowX
,
curFlowX
.
depth
(),
1.
/
pyrScale
,
streams
[
0
]
);
streams
[
1
].
enqueueConvert
(
curFlowY
,
curFlowY
,
curFlowY
.
depth
(),
1.
/
pyrScale
);
curFlowY
.
convertTo
(
curFlowY
,
curFlowY
.
depth
(),
1.
/
pyrScale
,
streams
[
1
]
);
}
}
GpuMat
M
=
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
M_
);
GpuMat
M
=
allocMatFromBuf
(
5
*
height
,
width
,
CV_32F
,
M_
);
...
@@ -343,7 +343,7 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
...
@@ -343,7 +343,7 @@ void cv::gpu::FarnebackOpticalFlow::operator ()(
{
{
cudev
::
optflow_farneback
::
gaussianBlurGpu
(
cudev
::
optflow_farneback
::
gaussianBlurGpu
(
frames_
[
i
],
smoothSize
/
2
,
blurredFrame
[
i
],
BORDER_REFLECT101
,
S
(
streams
[
i
]));
frames_
[
i
],
smoothSize
/
2
,
blurredFrame
[
i
],
BORDER_REFLECT101
,
S
(
streams
[
i
]));
gpu
::
resize
(
blurredFrame
[
i
],
pyrLevel
[
i
],
Size
(
width
,
height
),
INTER_LINEAR
,
streams
[
i
]);
gpu
::
resize
(
blurredFrame
[
i
],
pyrLevel
[
i
],
Size
(
width
,
height
),
0.0
,
0.0
,
INTER_LINEAR
,
streams
[
i
]);
cudev
::
optflow_farneback
::
polynomialExpansionGpu
(
pyrLevel
[
i
],
polyN
,
R
[
i
],
S
(
streams
[
i
]));
cudev
::
optflow_farneback
::
polynomialExpansionGpu
(
pyrLevel
[
i
],
polyN
,
R
[
i
],
S
(
streams
[
i
]));
}
}
}
}
...
...
modules/gpustereo/src/disparity_bilateral_filter.cpp
View file @
76f4b02b
...
@@ -113,10 +113,7 @@ namespace
...
@@ -113,10 +113,7 @@ namespace
if
(
&
dst
!=
&
disp
)
if
(
&
dst
!=
&
disp
)
{
{
if
(
stream
)
disp
.
copyTo
(
dst
,
stream
);
stream
.
enqueueCopy
(
disp
,
dst
);
else
disp
.
copyTo
(
dst
);
}
}
disp_bilateral_filter
<
T
>
(
dst
,
img
,
img
.
channels
(),
iters
,
StreamAccessor
::
getStream
(
stream
));
disp_bilateral_filter
<
T
>
(
dst
,
img
,
img
.
channels
(),
iters
,
StreamAccessor
::
getStream
(
stream
));
...
...
modules/gpustereo/src/stereobp.cpp
View file @
76f4b02b
...
@@ -194,20 +194,10 @@ namespace
...
@@ -194,20 +194,10 @@ namespace
if
(
rthis
.
levels
&
1
)
if
(
rthis
.
levels
&
1
)
{
{
//can clear less area
//can clear less area
if
(
stream
)
u
.
setTo
(
zero
,
stream
);
{
d
.
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
u
,
zero
);
l
.
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
d
,
zero
);
r
.
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
l
,
zero
);
stream
.
enqueueMemSet
(
r
,
zero
);
}
else
{
u
.
setTo
(
zero
);
d
.
setTo
(
zero
);
l
.
setTo
(
zero
);
r
.
setTo
(
zero
);
}
}
}
if
(
rthis
.
levels
>
1
)
if
(
rthis
.
levels
>
1
)
...
@@ -222,20 +212,10 @@ namespace
...
@@ -222,20 +212,10 @@ namespace
if
((
rthis
.
levels
&
1
)
==
0
)
if
((
rthis
.
levels
&
1
)
==
0
)
{
{
if
(
stream
)
u2
.
setTo
(
zero
,
stream
);
{
d2
.
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
u2
,
zero
);
l2
.
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
d2
,
zero
);
r2
.
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
l2
,
zero
);
stream
.
enqueueMemSet
(
r2
,
zero
);
}
else
{
u2
.
setTo
(
zero
);
d2
.
setTo
(
zero
);
l2
.
setTo
(
zero
);
r2
.
setTo
(
zero
);
}
}
}
}
}
...
@@ -313,20 +293,12 @@ namespace
...
@@ -313,20 +293,12 @@ namespace
out
=
((
disp
.
type
()
==
CV_16S
)
?
disp
:
(
out
.
create
(
rows
,
cols
,
CV_16S
),
out
));
out
=
((
disp
.
type
()
==
CV_16S
)
?
disp
:
(
out
.
create
(
rows
,
cols
,
CV_16S
),
out
));
if
(
stream
)
out
.
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
out
,
zero
);
else
out
.
setTo
(
zero
);
output_callers
[
funcIdx
](
u
,
d
,
l
,
r
,
datas
.
front
(),
out
,
cudaStream
);
output_callers
[
funcIdx
](
u
,
d
,
l
,
r
,
datas
.
front
(),
out
,
cudaStream
);
if
(
disp
.
type
()
!=
CV_16S
)
if
(
disp
.
type
()
!=
CV_16S
)
{
out
.
convertTo
(
disp
,
disp
.
type
(),
stream
);
if
(
stream
)
stream
.
enqueueConvert
(
out
,
disp
,
disp
.
type
());
else
out
.
convertTo
(
disp
,
disp
.
type
());
}
}
}
StereoBeliefPropagation
&
rthis
;
StereoBeliefPropagation
&
rthis
;
...
...
modules/gpustereo/src/stereocsbp.cpp
View file @
76f4b02b
...
@@ -213,36 +213,18 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& te
...
@@ -213,36 +213,18 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& te
load_constants
(
rthis
.
ndisp
,
rthis
.
max_data_term
,
rthis
.
data_weight
,
rthis
.
max_disc_term
,
rthis
.
disc_single_jump
,
rthis
.
min_disp_th
,
left
,
right
,
temp
);
load_constants
(
rthis
.
ndisp
,
rthis
.
max_data_term
,
rthis
.
data_weight
,
rthis
.
max_disc_term
,
rthis
.
disc_single_jump
,
rthis
.
min_disp_th
,
left
,
right
,
temp
);
if
(
stream
)
l
[
0
].
setTo
(
zero
,
stream
);
{
d
[
0
].
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
l
[
0
],
zero
);
r
[
0
].
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
d
[
0
],
zero
);
u
[
0
].
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
r
[
0
],
zero
);
stream
.
enqueueMemSet
(
u
[
0
],
zero
);
l
[
1
].
setTo
(
zero
,
stream
);
d
[
1
].
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
l
[
1
],
zero
);
r
[
1
].
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
d
[
1
],
zero
);
u
[
1
].
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
r
[
1
],
zero
);
stream
.
enqueueMemSet
(
u
[
1
],
zero
);
data_cost
.
setTo
(
zero
,
stream
);
data_cost_selected
.
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
data_cost
,
zero
);
stream
.
enqueueMemSet
(
data_cost_selected
,
zero
);
}
else
{
l
[
0
].
setTo
(
zero
);
d
[
0
].
setTo
(
zero
);
r
[
0
].
setTo
(
zero
);
u
[
0
].
setTo
(
zero
);
l
[
1
].
setTo
(
zero
);
d
[
1
].
setTo
(
zero
);
r
[
1
].
setTo
(
zero
);
u
[
1
].
setTo
(
zero
);
data_cost
.
setTo
(
zero
);
data_cost_selected
.
setTo
(
zero
);
}
int
cur_idx
=
0
;
int
cur_idx
=
0
;
...
@@ -279,20 +261,14 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& te
...
@@ -279,20 +261,14 @@ static void csbp_operator(StereoConstantSpaceBP& rthis, GpuMat& mbuf, GpuMat& te
out
=
((
disp
.
type
()
==
CV_16S
)
?
disp
:
(
out
.
create
(
rows
,
cols
,
CV_16S
),
out
));
out
=
((
disp
.
type
()
==
CV_16S
)
?
disp
:
(
out
.
create
(
rows
,
cols
,
CV_16S
),
out
));
if
(
stream
)
out
.
setTo
(
zero
,
stream
);
stream
.
enqueueMemSet
(
out
,
zero
);
else
out
.
setTo
(
zero
);
compute_disp
(
u
[
cur_idx
].
ptr
<
T
>
(),
d
[
cur_idx
].
ptr
<
T
>
(),
l
[
cur_idx
].
ptr
<
T
>
(),
r
[
cur_idx
].
ptr
<
T
>
(),
compute_disp
(
u
[
cur_idx
].
ptr
<
T
>
(),
d
[
cur_idx
].
ptr
<
T
>
(),
l
[
cur_idx
].
ptr
<
T
>
(),
r
[
cur_idx
].
ptr
<
T
>
(),
data_cost_selected
.
ptr
<
T
>
(),
disp_selected_pyr
[
cur_idx
].
ptr
<
T
>
(),
elem_step
,
out
,
nr_plane_pyr
[
0
],
cudaStream
);
data_cost_selected
.
ptr
<
T
>
(),
disp_selected_pyr
[
cur_idx
].
ptr
<
T
>
(),
elem_step
,
out
,
nr_plane_pyr
[
0
],
cudaStream
);
if
(
disp
.
type
()
!=
CV_16S
)
if
(
disp
.
type
()
!=
CV_16S
)
{
{
if
(
stream
)
out
.
convertTo
(
disp
,
disp
.
type
(),
stream
);
stream
.
enqueueConvert
(
out
,
disp
,
disp
.
type
());
else
out
.
convertTo
(
disp
,
disp
.
type
());
}
}
}
}
...
...
modules/gpuwarping/src/pyramids.cpp
View file @
76f4b02b
...
@@ -184,10 +184,7 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream
...
@@ -184,10 +184,7 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream
if
(
outRoi
.
width
==
layer0_
.
cols
&&
outRoi
.
height
==
layer0_
.
rows
)
if
(
outRoi
.
width
==
layer0_
.
cols
&&
outRoi
.
height
==
layer0_
.
rows
)
{
{
if
(
stream
)
layer0_
.
copyTo
(
outImg
,
stream
);
stream
.
enqueueCopy
(
layer0_
,
outImg
);
else
layer0_
.
copyTo
(
outImg
);
}
}
float
lastScale
=
1.0
f
;
float
lastScale
=
1.0
f
;
...
@@ -202,10 +199,7 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream
...
@@ -202,10 +199,7 @@ void cv::gpu::ImagePyramid::getLayer(GpuMat& outImg, Size outRoi, Stream& stream
if
(
outRoi
.
width
==
curLayer
.
cols
&&
outRoi
.
height
==
curLayer
.
rows
)
if
(
outRoi
.
width
==
curLayer
.
cols
&&
outRoi
.
height
==
curLayer
.
rows
)
{
{
if
(
stream
)
curLayer
.
copyTo
(
outImg
,
stream
);
stream
.
enqueueCopy
(
curLayer
,
outImg
);
else
curLayer
.
copyTo
(
outImg
);
}
}
if
(
outRoi
.
width
>=
curLayer
.
cols
&&
outRoi
.
height
>=
curLayer
.
rows
)
if
(
outRoi
.
width
>=
curLayer
.
cols
&&
outRoi
.
height
>=
curLayer
.
rows
)
...
...
modules/gpuwarping/src/resize.cpp
View file @
76f4b02b
...
@@ -77,10 +77,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
...
@@ -77,10 +77,7 @@ void cv::gpu::resize(const GpuMat& src, GpuMat& dst, Size dsize, double fx, doub
if
(
dsize
==
src
.
size
())
if
(
dsize
==
src
.
size
())
{
{
if
(
s
)
src
.
copyTo
(
dst
,
s
);
s
.
enqueueCopy
(
src
,
dst
);
else
src
.
copyTo
(
dst
);
return
;
return
;
}
}
...
...
modules/softcascade/src/detector_cuda.cpp
View file @
76f4b02b
...
@@ -335,10 +335,7 @@ struct cv::softcascade::SCascade::Fields
...
@@ -335,10 +335,7 @@ struct cv::softcascade::SCascade::Fields
void
detect
(
cv
::
gpu
::
GpuMat
&
objects
,
cv
::
gpu
::
Stream
&
s
)
const
void
detect
(
cv
::
gpu
::
GpuMat
&
objects
,
cv
::
gpu
::
Stream
&
s
)
const
{
{
if
(
s
)
objects
.
setTo
(
Scalar
::
all
(
0
),
s
);
s
.
enqueueMemSet
(
objects
,
0
);
else
cudaMemset
(
objects
.
data
,
0
,
sizeof
(
Detection
));
cudaSafeCall
(
cudaGetLastError
());
cudaSafeCall
(
cudaGetLastError
());
...
@@ -354,16 +351,8 @@ struct cv::softcascade::SCascade::Fields
...
@@ -354,16 +351,8 @@ struct cv::softcascade::SCascade::Fields
cv
::
gpu
::
GpuMat
ndetections
=
cv
::
gpu
::
GpuMat
(
objects
,
cv
::
Rect
(
0
,
0
,
sizeof
(
Detection
),
1
));
cv
::
gpu
::
GpuMat
ndetections
=
cv
::
gpu
::
GpuMat
(
objects
,
cv
::
Rect
(
0
,
0
,
sizeof
(
Detection
),
1
));
ensureSizeIsEnough
(
objects
.
rows
,
objects
.
cols
,
CV_8UC1
,
overlaps
);
ensureSizeIsEnough
(
objects
.
rows
,
objects
.
cols
,
CV_8UC1
,
overlaps
);
if
(
s
)
overlaps
.
setTo
(
0
,
s
);
{
suppressed
.
setTo
(
0
,
s
);
s
.
enqueueMemSet
(
overlaps
,
0
);
s
.
enqueueMemSet
(
suppressed
,
0
);
}
else
{
overlaps
.
setTo
(
0
);
suppressed
.
setTo
(
0
);
}
cudaStream_t
stream
=
cv
::
gpu
::
StreamAccessor
::
getStream
(
s
);
cudaStream_t
stream
=
cv
::
gpu
::
StreamAccessor
::
getStream
(
s
);
cudev
::
suppress
(
objects
,
overlaps
,
ndetections
,
suppressed
,
stream
);
cudev
::
suppress
(
objects
,
overlaps
,
ndetections
,
suppressed
,
stream
);
...
@@ -488,18 +477,12 @@ void integral(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& sum, cv::gpu::GpuMat&
...
@@ -488,18 +477,12 @@ void integral(const cv::gpu::GpuMat& src, cv::gpu::GpuMat& sum, cv::gpu::GpuMat&
cv
::
softcascade
::
cudev
::
shfl_integral
(
src
,
buffer
,
stream
);
cv
::
softcascade
::
cudev
::
shfl_integral
(
src
,
buffer
,
stream
);
sum
.
create
(
src
.
rows
+
1
,
src
.
cols
+
1
,
CV_32SC1
);
sum
.
create
(
src
.
rows
+
1
,
src
.
cols
+
1
,
CV_32SC1
);
if
(
s
)
sum
.
setTo
(
cv
::
Scalar
::
all
(
0
),
s
);
s
.
enqueueMemSet
(
sum
,
cv
::
Scalar
::
all
(
0
));
else
sum
.
setTo
(
cv
::
Scalar
::
all
(
0
));
cv
::
gpu
::
GpuMat
inner
=
sum
(
cv
::
Rect
(
1
,
1
,
src
.
cols
,
src
.
rows
));
cv
::
gpu
::
GpuMat
inner
=
sum
(
cv
::
Rect
(
1
,
1
,
src
.
cols
,
src
.
rows
));
cv
::
gpu
::
GpuMat
res
=
buffer
(
cv
::
Rect
(
0
,
0
,
src
.
cols
,
src
.
rows
));
cv
::
gpu
::
GpuMat
res
=
buffer
(
cv
::
Rect
(
0
,
0
,
src
.
cols
,
src
.
rows
));
if
(
s
)
res
.
copyTo
(
inner
,
s
);
s
.
enqueueCopy
(
res
,
inner
);
else
res
.
copyTo
(
inner
);
}
}
else
{
CV_Error
(
cv
::
Error
::
GpuNotSupported
,
": CC 3.x required."
);}
else
{
CV_Error
(
cv
::
Error
::
GpuNotSupported
,
": CC 3.x required."
);}
}
}
...
@@ -541,10 +524,7 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp
...
@@ -541,10 +524,7 @@ void cv::softcascade::SCascade::detect(InputArray _image, InputArray _rois, Outp
}
}
else
else
{
{
if
(
s
)
image
.
copyTo
(
flds
.
hogluv
,
s
);
s
.
enqueueCopy
(
image
,
flds
.
hogluv
);
else
image
.
copyTo
(
flds
.
hogluv
);
}
}
flds
.
detect
(
objects
,
s
);
flds
.
detect
(
objects
,
s
);
...
@@ -571,10 +551,7 @@ using cv::gpu::GpuMat;
...
@@ -571,10 +551,7 @@ using cv::gpu::GpuMat;
inline
void
setZero
(
cv
::
gpu
::
GpuMat
&
m
,
cv
::
gpu
::
Stream
&
s
)
inline
void
setZero
(
cv
::
gpu
::
GpuMat
&
m
,
cv
::
gpu
::
Stream
&
s
)
{
{
if
(
s
)
m
.
setTo
(
0
,
s
);
s
.
enqueueMemSet
(
m
,
0
);
else
m
.
setTo
(
0
);
}
}
struct
SeparablePreprocessor
:
public
cv
::
softcascade
::
ChannelsProcessor
struct
SeparablePreprocessor
:
public
cv
::
softcascade
::
ChannelsProcessor
...
...
samples/cpp/tutorial_code/gpu/gpu-basics-similarity/gpu-basics-similarity.cpp
View file @
76f4b02b
...
@@ -368,8 +368,8 @@ Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b)
...
@@ -368,8 +368,8 @@ Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b)
gpu
::
Stream
stream
;
gpu
::
Stream
stream
;
stream
.
enqueueConvert
(
b
.
gI1
,
b
.
t1
,
CV_32F
);
b
.
gI1
.
convertTo
(
b
.
t1
,
CV_32F
,
stream
);
stream
.
enqueueConvert
(
b
.
gI2
,
b
.
t2
,
CV_32F
);
b
.
gI2
.
convertTo
(
b
.
t2
,
CV_32F
,
stream
);
gpu
::
split
(
b
.
t1
,
b
.
vI1
,
stream
);
gpu
::
split
(
b
.
t1
,
b
.
vI1
,
stream
);
gpu
::
split
(
b
.
t2
,
b
.
vI2
,
stream
);
gpu
::
split
(
b
.
t2
,
b
.
vI2
,
stream
);
...
@@ -379,16 +379,16 @@ Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b)
...
@@ -379,16 +379,16 @@ Scalar getMSSIM_GPU_optimized( const Mat& i1, const Mat& i2, BufferMSSIM& b)
for
(
int
i
=
0
;
i
<
b
.
gI1
.
channels
();
++
i
)
for
(
int
i
=
0
;
i
<
b
.
gI1
.
channels
();
++
i
)
{
{
gpu
::
multiply
(
b
.
vI2
[
i
],
b
.
vI2
[
i
],
b
.
I2_2
,
stream
);
// I2^2
gpu
::
multiply
(
b
.
vI2
[
i
],
b
.
vI2
[
i
],
b
.
I2_2
,
1
,
-
1
,
stream
);
// I2^2
gpu
::
multiply
(
b
.
vI1
[
i
],
b
.
vI1
[
i
],
b
.
I1_2
,
stream
);
// I1^2
gpu
::
multiply
(
b
.
vI1
[
i
],
b
.
vI1
[
i
],
b
.
I1_2
,
1
,
-
1
,
stream
);
// I1^2
gpu
::
multiply
(
b
.
vI1
[
i
],
b
.
vI2
[
i
],
b
.
I1_I2
,
stream
);
// I1 * I2
gpu
::
multiply
(
b
.
vI1
[
i
],
b
.
vI2
[
i
],
b
.
I1_I2
,
1
,
-
1
,
stream
);
// I1 * I2
gpu
::
GaussianBlur
(
b
.
vI1
[
i
],
b
.
mu1
,
Size
(
11
,
11
),
buf
,
1.5
,
0
,
BORDER_DEFAULT
,
-
1
,
stream
);
gpu
::
GaussianBlur
(
b
.
vI1
[
i
],
b
.
mu1
,
Size
(
11
,
11
),
buf
,
1.5
,
0
,
BORDER_DEFAULT
,
-
1
,
stream
);
gpu
::
GaussianBlur
(
b
.
vI2
[
i
],
b
.
mu2
,
Size
(
11
,
11
),
buf
,
1.5
,
0
,
BORDER_DEFAULT
,
-
1
,
stream
);
gpu
::
GaussianBlur
(
b
.
vI2
[
i
],
b
.
mu2
,
Size
(
11
,
11
),
buf
,
1.5
,
0
,
BORDER_DEFAULT
,
-
1
,
stream
);
gpu
::
multiply
(
b
.
mu1
,
b
.
mu1
,
b
.
mu1_2
,
stream
);
gpu
::
multiply
(
b
.
mu1
,
b
.
mu1
,
b
.
mu1_2
,
1
,
-
1
,
stream
);
gpu
::
multiply
(
b
.
mu2
,
b
.
mu2
,
b
.
mu2_2
,
stream
);
gpu
::
multiply
(
b
.
mu2
,
b
.
mu2
,
b
.
mu2_2
,
1
,
-
1
,
stream
);
gpu
::
multiply
(
b
.
mu1
,
b
.
mu2
,
b
.
mu1_mu2
,
stream
);
gpu
::
multiply
(
b
.
mu1
,
b
.
mu2
,
b
.
mu1_mu2
,
1
,
-
1
,
stream
);
gpu
::
GaussianBlur
(
b
.
I1_2
,
b
.
sigma1_2
,
Size
(
11
,
11
),
buf
,
1.5
,
0
,
BORDER_DEFAULT
,
-
1
,
stream
);
gpu
::
GaussianBlur
(
b
.
I1_2
,
b
.
sigma1_2
,
Size
(
11
,
11
),
buf
,
1.5
,
0
,
BORDER_DEFAULT
,
-
1
,
stream
);
gpu
::
subtract
(
b
.
sigma1_2
,
b
.
mu1_2
,
b
.
sigma1_2
,
gpu
::
GpuMat
(),
-
1
,
stream
);
gpu
::
subtract
(
b
.
sigma1_2
,
b
.
mu1_2
,
b
.
sigma1_2
,
gpu
::
GpuMat
(),
-
1
,
stream
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment