Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
1bdd86ed
Commit
1bdd86ed
authored
Dec 24, 2014
by
Vadim Pisarevsky
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3523 from jet47:fix-cuda-buffer-pool
parents
cddee22c
68e08bbe
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
480 additions
and
477 deletions
+480
-477
cuda.hpp
modules/core/include/opencv2/core/cuda.hpp
+1
-0
private.cuda.hpp
modules/core/include/opencv2/core/private.cuda.hpp
+2
-20
cuda_buffer_pool.cpp
modules/core/src/cuda_buffer_pool.cpp
+0
-435
cuda_stream.cpp
modules/core/src/cuda_stream.cpp
+477
-22
No files found.
modules/core/include/opencv2/core/cuda.hpp
View file @
1bdd86ed
...
...
@@ -490,6 +490,7 @@ private:
friend
struct
StreamAccessor
;
friend
class
BufferPool
;
friend
class
DefaultDeviceInitializer
;
};
class
CV_EXPORTS
Event
...
...
modules/core/include/opencv2/core/private.cuda.hpp
View file @
1bdd86ed
...
...
@@ -92,26 +92,6 @@ static inline void throw_no_cuda() { CV_Error(cv::Error::StsNotImplemented, "The
namespace
cv
{
namespace
cuda
{
class
MemoryStack
;
class
CV_EXPORTS
StackAllocator
:
public
GpuMat
::
Allocator
{
public
:
explicit
StackAllocator
(
cudaStream_t
stream
);
~
StackAllocator
();
bool
allocate
(
GpuMat
*
mat
,
int
rows
,
int
cols
,
size_t
elemSize
);
void
free
(
GpuMat
*
mat
);
private
:
StackAllocator
(
const
StackAllocator
&
);
StackAllocator
&
operator
=
(
const
StackAllocator
&
);
cudaStream_t
stream_
;
MemoryStack
*
memStack_
;
size_t
alignment_
;
};
class
CV_EXPORTS
BufferPool
{
public
:
...
...
@@ -120,6 +100,8 @@ namespace cv { namespace cuda
GpuMat
getBuffer
(
int
rows
,
int
cols
,
int
type
);
GpuMat
getBuffer
(
Size
size
,
int
type
)
{
return
getBuffer
(
size
.
height
,
size
.
width
,
type
);
}
GpuMat
::
Allocator
*
getAllocator
()
const
{
return
allocator_
;
}
private
:
GpuMat
::
Allocator
*
allocator_
;
};
...
...
modules/core/src/cuda_buffer_pool.cpp
deleted
100644 → 0
View file @
cddee22c
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Copyright (C) 2013, OpenCV Foundation, all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include "precomp.hpp"
using
namespace
cv
;
using
namespace
cv
::
cuda
;
#ifdef HAVE_CUDA
#include "opencv2/cudev/common.hpp"
/////////////////////////////////////////////////////////////
/// MemoryStack
namespace
{
class
MemoryPool
;
}
class
cv
::
cuda
::
MemoryStack
{
public
:
uchar
*
requestMemory
(
size_t
size
);
void
returnMemory
(
uchar
*
ptr
);
uchar
*
datastart
;
uchar
*
dataend
;
uchar
*
tip
;
bool
isFree
;
MemoryPool
*
pool
;
#if defined(DEBUG) || defined(_DEBUG)
std
::
vector
<
size_t
>
allocations
;
#endif
};
uchar
*
cv
::
cuda
::
MemoryStack
::
requestMemory
(
size_t
size
)
{
const
size_t
freeMem
=
dataend
-
tip
;
if
(
size
>
freeMem
)
return
0
;
uchar
*
ptr
=
tip
;
tip
+=
size
;
#if defined(DEBUG) || defined(_DEBUG)
allocations
.
push_back
(
size
);
#endif
return
ptr
;
}
void
cv
::
cuda
::
MemoryStack
::
returnMemory
(
uchar
*
ptr
)
{
CV_DbgAssert
(
ptr
>=
datastart
&&
ptr
<
dataend
);
#if defined(DEBUG) || defined(_DEBUG)
const
size_t
allocSize
=
tip
-
ptr
;
CV_Assert
(
allocSize
==
allocations
.
back
()
);
allocations
.
pop_back
();
#endif
tip
=
ptr
;
}
/////////////////////////////////////////////////////////////
/// MemoryPool
namespace
{
class
MemoryPool
{
public
:
MemoryPool
();
void
initialize
(
size_t
stackSize
,
int
stackCount
);
void
release
();
MemoryStack
*
getFreeMemStack
();
void
returnMemStack
(
MemoryStack
*
memStack
);
private
:
void
initilizeImpl
();
Mutex
mtx_
;
bool
initialized_
;
size_t
stackSize_
;
int
stackCount_
;
uchar
*
mem_
;
std
::
vector
<
MemoryStack
>
stacks_
;
};
MemoryPool
::
MemoryPool
()
:
initialized_
(
false
),
mem_
(
0
)
{
// default : 10 Mb, 5 stacks
stackSize_
=
10
*
1024
*
1024
;
stackCount_
=
5
;
}
void
MemoryPool
::
initialize
(
size_t
stackSize
,
int
stackCount
)
{
AutoLock
lock
(
mtx_
);
release
();
stackSize_
=
stackSize
;
stackCount_
=
stackCount
;
initilizeImpl
();
}
void
MemoryPool
::
initilizeImpl
()
{
const
size_t
totalSize
=
stackSize_
*
stackCount_
;
if
(
totalSize
>
0
)
{
cudaError_t
err
=
cudaMalloc
(
&
mem_
,
totalSize
);
if
(
err
!=
cudaSuccess
)
return
;
stacks_
.
resize
(
stackCount_
);
uchar
*
ptr
=
mem_
;
for
(
int
i
=
0
;
i
<
stackCount_
;
++
i
)
{
stacks_
[
i
].
datastart
=
ptr
;
stacks_
[
i
].
dataend
=
ptr
+
stackSize_
;
stacks_
[
i
].
tip
=
ptr
;
stacks_
[
i
].
isFree
=
true
;
stacks_
[
i
].
pool
=
this
;
ptr
+=
stackSize_
;
}
initialized_
=
true
;
}
}
void
MemoryPool
::
release
()
{
if
(
mem_
)
{
#if defined(DEBUG) || defined(_DEBUG)
for
(
int
i
=
0
;
i
<
stackCount_
;
++
i
)
{
CV_DbgAssert
(
stacks_
[
i
].
isFree
);
CV_DbgAssert
(
stacks_
[
i
].
tip
==
stacks_
[
i
].
datastart
);
}
#endif
cudaFree
(
mem_
);
mem_
=
0
;
initialized_
=
false
;
}
}
MemoryStack
*
MemoryPool
::
getFreeMemStack
()
{
AutoLock
lock
(
mtx_
);
if
(
!
initialized_
)
initilizeImpl
();
if
(
!
mem_
)
return
0
;
for
(
int
i
=
0
;
i
<
stackCount_
;
++
i
)
{
if
(
stacks_
[
i
].
isFree
)
{
stacks_
[
i
].
isFree
=
false
;
return
&
stacks_
[
i
];
}
}
return
0
;
}
void
MemoryPool
::
returnMemStack
(
MemoryStack
*
memStack
)
{
AutoLock
lock
(
mtx_
);
CV_DbgAssert
(
!
memStack
->
isFree
);
#if defined(DEBUG) || defined(_DEBUG)
bool
found
=
false
;
for
(
int
i
=
0
;
i
<
stackCount_
;
++
i
)
{
if
(
memStack
==
&
stacks_
[
i
])
{
found
=
true
;
break
;
}
}
CV_DbgAssert
(
found
);
#endif
CV_DbgAssert
(
memStack
->
tip
==
memStack
->
datastart
);
memStack
->
isFree
=
true
;
}
}
/////////////////////////////////////////////////////////////
/// MemoryPoolManager
namespace
{
Mutex
mtx_
;
bool
memory_pool_manager_initialized
;
class
MemoryPoolManager
{
public
:
MemoryPoolManager
();
~
MemoryPoolManager
();
void
Init
();
MemoryPool
*
getPool
(
int
deviceId
);
private
:
std
::
vector
<
MemoryPool
>
pools_
;
}
manager
;
//MemoryPoolManager ;
MemoryPoolManager
::
MemoryPoolManager
()
{
}
void
MemoryPoolManager
::
Init
()
{
int
deviceCount
=
getCudaEnabledDeviceCount
();
if
(
deviceCount
>
0
)
pools_
.
resize
(
deviceCount
);
}
MemoryPoolManager
::~
MemoryPoolManager
()
{
for
(
size_t
i
=
0
;
i
<
pools_
.
size
();
++
i
)
{
cudaSetDevice
(
static_cast
<
int
>
(
i
));
pools_
[
i
].
release
();
}
}
MemoryPool
*
MemoryPoolManager
::
getPool
(
int
deviceId
)
{
CV_DbgAssert
(
deviceId
>=
0
&&
deviceId
<
static_cast
<
int
>
(
pools_
.
size
())
);
return
&
pools_
[
deviceId
];
}
MemoryPool
*
memPool
(
int
deviceId
)
{
{
AutoLock
lock
(
mtx_
);
if
(
!
memory_pool_manager_initialized
)
{
memory_pool_manager_initialized
=
true
;
manager
.
Init
();
}
}
return
manager
.
getPool
(
deviceId
);
}
}
/////////////////////////////////////////////////////////////
/// StackAllocator
namespace
{
bool
enableMemoryPool
=
true
;
}
cv
::
cuda
::
StackAllocator
::
StackAllocator
(
cudaStream_t
stream
)
:
stream_
(
stream
),
memStack_
(
0
)
{
if
(
enableMemoryPool
)
{
const
int
deviceId
=
getDevice
();
{
AutoLock
lock
(
mtx_
);
memStack_
=
memPool
(
deviceId
)
->
getFreeMemStack
();
}
DeviceInfo
devInfo
(
deviceId
);
alignment_
=
devInfo
.
textureAlignment
();
}
}
cv
::
cuda
::
StackAllocator
::~
StackAllocator
()
{
cudaStreamSynchronize
(
stream_
);
if
(
memStack_
!=
0
)
memStack_
->
pool
->
returnMemStack
(
memStack_
);
}
namespace
{
size_t
alignUp
(
size_t
what
,
size_t
alignment
)
{
size_t
alignMask
=
alignment
-
1
;
size_t
inverseAlignMask
=
~
alignMask
;
size_t
res
=
(
what
+
alignMask
)
&
inverseAlignMask
;
return
res
;
}
}
bool
cv
::
cuda
::
StackAllocator
::
allocate
(
GpuMat
*
mat
,
int
rows
,
int
cols
,
size_t
elemSize
)
{
if
(
memStack_
==
0
)
return
false
;
size_t
pitch
,
memSize
;
if
(
rows
>
1
&&
cols
>
1
)
{
pitch
=
alignUp
(
cols
*
elemSize
,
alignment_
);
memSize
=
pitch
*
rows
;
}
else
{
// Single row or single column must be continuous
pitch
=
elemSize
*
cols
;
memSize
=
alignUp
(
elemSize
*
cols
*
rows
,
64
);
}
uchar
*
ptr
=
memStack_
->
requestMemory
(
memSize
);
if
(
ptr
==
0
)
return
false
;
mat
->
data
=
ptr
;
mat
->
step
=
pitch
;
mat
->
refcount
=
(
int
*
)
fastMalloc
(
sizeof
(
int
));
return
true
;
}
void
cv
::
cuda
::
StackAllocator
::
free
(
GpuMat
*
mat
)
{
if
(
memStack_
==
0
)
return
;
memStack_
->
returnMemory
(
mat
->
datastart
);
fastFree
(
mat
->
refcount
);
}
void
cv
::
cuda
::
setBufferPoolUsage
(
bool
on
)
{
enableMemoryPool
=
on
;
}
void
cv
::
cuda
::
setBufferPoolConfig
(
int
deviceId
,
size_t
stackSize
,
int
stackCount
)
{
const
int
currentDevice
=
getDevice
();
if
(
deviceId
>=
0
)
{
setDevice
(
deviceId
);
memPool
(
deviceId
)
->
initialize
(
stackSize
,
stackCount
);
}
else
{
const
int
deviceCount
=
getCudaEnabledDeviceCount
();
for
(
deviceId
=
0
;
deviceId
<
deviceCount
;
++
deviceId
)
{
setDevice
(
deviceId
);
memPool
(
deviceId
)
->
initialize
(
stackSize
,
stackCount
);
}
}
setDevice
(
currentDevice
);
}
/////////////////////////////////////////////////////////////
/// BufferPool
GpuMat
cv
::
cuda
::
BufferPool
::
getBuffer
(
int
rows
,
int
cols
,
int
type
)
{
GpuMat
buf
(
allocator_
);
buf
.
create
(
rows
,
cols
,
type
);
return
buf
;
}
#endif
modules/core/src/cuda_stream.cpp
View file @
1bdd86ed
...
...
@@ -45,8 +45,217 @@
using
namespace
cv
;
using
namespace
cv
::
cuda
;
/////////////////////////////////////////////////////////////
/// MemoryStack
#ifdef HAVE_CUDA
namespace
{
class
MemoryPool
;
class
MemoryStack
{
public
:
uchar
*
requestMemory
(
size_t
size
);
void
returnMemory
(
uchar
*
ptr
);
uchar
*
datastart
;
uchar
*
dataend
;
uchar
*
tip
;
bool
isFree
;
MemoryPool
*
pool
;
#if !defined(NDEBUG)
std
::
vector
<
size_t
>
allocations
;
#endif
};
uchar
*
MemoryStack
::
requestMemory
(
size_t
size
)
{
const
size_t
freeMem
=
dataend
-
tip
;
if
(
size
>
freeMem
)
return
0
;
uchar
*
ptr
=
tip
;
tip
+=
size
;
#if !defined(NDEBUG)
allocations
.
push_back
(
size
);
#endif
return
ptr
;
}
void
MemoryStack
::
returnMemory
(
uchar
*
ptr
)
{
CV_DbgAssert
(
ptr
>=
datastart
&&
ptr
<
dataend
);
#if !defined(NDEBUG)
const
size_t
allocSize
=
tip
-
ptr
;
CV_Assert
(
allocSize
==
allocations
.
back
()
);
allocations
.
pop_back
();
#endif
tip
=
ptr
;
}
}
#endif
/////////////////////////////////////////////////////////////
/// MemoryPool
#ifdef HAVE_CUDA
namespace
{
class
MemoryPool
{
public
:
MemoryPool
();
void
initialize
(
size_t
stackSize
,
int
stackCount
);
void
release
();
MemoryStack
*
getFreeMemStack
();
void
returnMemStack
(
MemoryStack
*
memStack
);
private
:
void
initilizeImpl
();
Mutex
mtx_
;
bool
initialized_
;
size_t
stackSize_
;
int
stackCount_
;
uchar
*
mem_
;
std
::
vector
<
MemoryStack
>
stacks_
;
};
MemoryPool
::
MemoryPool
()
:
initialized_
(
false
),
mem_
(
0
)
{
// default : 10 Mb, 5 stacks
stackSize_
=
10
*
1024
*
1024
;
stackCount_
=
5
;
}
void
MemoryPool
::
initialize
(
size_t
stackSize
,
int
stackCount
)
{
AutoLock
lock
(
mtx_
);
release
();
stackSize_
=
stackSize
;
stackCount_
=
stackCount
;
initilizeImpl
();
}
void
MemoryPool
::
initilizeImpl
()
{
const
size_t
totalSize
=
stackSize_
*
stackCount_
;
if
(
totalSize
>
0
)
{
cudaError_t
err
=
cudaMalloc
(
&
mem_
,
totalSize
);
if
(
err
!=
cudaSuccess
)
return
;
stacks_
.
resize
(
stackCount_
);
uchar
*
ptr
=
mem_
;
for
(
int
i
=
0
;
i
<
stackCount_
;
++
i
)
{
stacks_
[
i
].
datastart
=
ptr
;
stacks_
[
i
].
dataend
=
ptr
+
stackSize_
;
stacks_
[
i
].
tip
=
ptr
;
stacks_
[
i
].
isFree
=
true
;
stacks_
[
i
].
pool
=
this
;
ptr
+=
stackSize_
;
}
initialized_
=
true
;
}
}
void
MemoryPool
::
release
()
{
if
(
mem_
)
{
#if !defined(NDEBUG)
for
(
int
i
=
0
;
i
<
stackCount_
;
++
i
)
{
CV_DbgAssert
(
stacks_
[
i
].
isFree
);
CV_DbgAssert
(
stacks_
[
i
].
tip
==
stacks_
[
i
].
datastart
);
}
#endif
cudaFree
(
mem_
);
mem_
=
0
;
initialized_
=
false
;
}
}
MemoryStack
*
MemoryPool
::
getFreeMemStack
()
{
AutoLock
lock
(
mtx_
);
if
(
!
initialized_
)
initilizeImpl
();
if
(
!
mem_
)
return
0
;
for
(
int
i
=
0
;
i
<
stackCount_
;
++
i
)
{
if
(
stacks_
[
i
].
isFree
)
{
stacks_
[
i
].
isFree
=
false
;
return
&
stacks_
[
i
];
}
}
return
0
;
}
void
MemoryPool
::
returnMemStack
(
MemoryStack
*
memStack
)
{
AutoLock
lock
(
mtx_
);
CV_DbgAssert
(
!
memStack
->
isFree
);
#if !defined(NDEBUG)
bool
found
=
false
;
for
(
int
i
=
0
;
i
<
stackCount_
;
++
i
)
{
if
(
memStack
==
&
stacks_
[
i
])
{
found
=
true
;
break
;
}
}
CV_DbgAssert
(
found
);
#endif
CV_DbgAssert
(
memStack
->
tip
==
memStack
->
datastart
);
memStack
->
isFree
=
true
;
}
}
#endif
////////////////////////////////////////////////////////////////
//
Stream
//
/ Stream::Impl
#ifndef HAVE_CUDA
...
...
@@ -62,6 +271,11 @@ public:
#else
namespace
{
class
StackAllocator
;
}
class
cv
::
cuda
::
Stream
::
Impl
{
public
:
...
...
@@ -74,10 +288,6 @@ public:
~
Impl
();
};
cv
::
cuda
::
BufferPool
::
BufferPool
(
Stream
&
stream
)
:
allocator_
(
stream
.
impl_
->
stackAllocator_
.
get
())
{
}
cv
::
cuda
::
Stream
::
Impl
::
Impl
()
:
stream
(
0
)
{
cudaSafeCall
(
cudaStreamCreate
(
&
stream
)
);
...
...
@@ -98,13 +308,101 @@ cv::cuda::Stream::Impl::~Impl()
cudaStreamDestroy
(
stream
);
}
cudaStream_t
cv
::
cuda
::
StreamAccessor
::
getStream
(
const
Stream
&
stream
)
#endif
/////////////////////////////////////////////////////////////
/// DefaultDeviceInitializer
#ifdef HAVE_CUDA
namespace
cv
{
namespace
cuda
{
return
stream
.
impl_
->
stream
;
}
class
DefaultDeviceInitializer
{
public
:
DefaultDeviceInitializer
();
~
DefaultDeviceInitializer
();
Stream
&
getNullStream
(
int
deviceId
);
MemoryPool
*
getMemoryPool
(
int
deviceId
);
private
:
void
initStreams
();
void
initPools
();
std
::
vector
<
Ptr
<
Stream
>
>
streams_
;
Mutex
streams_mtx_
;
std
::
vector
<
MemoryPool
>
pools_
;
Mutex
pools_mtx_
;
};
DefaultDeviceInitializer
::
DefaultDeviceInitializer
()
{
}
DefaultDeviceInitializer
::~
DefaultDeviceInitializer
()
{
streams_
.
clear
();
for
(
size_t
i
=
0
;
i
<
pools_
.
size
();
++
i
)
{
cudaSetDevice
(
static_cast
<
int
>
(
i
));
pools_
[
i
].
release
();
}
pools_
.
clear
();
}
Stream
&
DefaultDeviceInitializer
::
getNullStream
(
int
deviceId
)
{
AutoLock
lock
(
streams_mtx_
);
if
(
streams_
.
empty
())
{
int
deviceCount
=
getCudaEnabledDeviceCount
();
if
(
deviceCount
>
0
)
streams_
.
resize
(
deviceCount
);
}
CV_DbgAssert
(
deviceId
>=
0
&&
deviceId
<
static_cast
<
int
>
(
streams_
.
size
())
);
if
(
streams_
[
deviceId
].
empty
())
{
cudaStream_t
stream
=
NULL
;
Ptr
<
Stream
::
Impl
>
impl
=
makePtr
<
Stream
::
Impl
>
(
stream
);
streams_
[
deviceId
]
=
Ptr
<
Stream
>
(
new
Stream
(
impl
));
}
return
*
streams_
[
deviceId
];
}
MemoryPool
*
DefaultDeviceInitializer
::
getMemoryPool
(
int
deviceId
)
{
AutoLock
lock
(
pools_mtx_
);
if
(
pools_
.
empty
())
{
int
deviceCount
=
getCudaEnabledDeviceCount
();
if
(
deviceCount
>
0
)
pools_
.
resize
(
deviceCount
);
}
CV_DbgAssert
(
deviceId
>=
0
&&
deviceId
<
static_cast
<
int
>
(
pools_
.
size
())
);
return
&
pools_
[
deviceId
];
}
DefaultDeviceInitializer
initializer
;
}}
#endif
/////////////////////////////////////////////////////////////
/// Stream
cv
::
cuda
::
Stream
::
Stream
()
{
#ifndef HAVE_CUDA
...
...
@@ -181,7 +479,7 @@ void cv::cuda::Stream::enqueueHostCallback(StreamCallback callback, void* userDa
#if CUDART_VERSION < 5000
(
void
)
callback
;
(
void
)
userData
;
CV_Error
(
cv
::
Error
::
StsNotImplemented
,
"This function requires CUDA 5.0"
);
CV_Error
(
cv
::
Error
::
StsNotImplemented
,
"This function requires CUDA
>=
5.0"
);
#else
CallbackData
*
data
=
new
CallbackData
(
callback
,
userData
);
...
...
@@ -190,33 +488,190 @@ void cv::cuda::Stream::enqueueHostCallback(StreamCallback callback, void* userDa
#endif
}
namespace
Stream
&
cv
::
cuda
::
Stream
::
Null
()
{
#ifndef HAVE_CUDA
throw_no_cuda
();
static
Stream
stream
;
return
stream
;
#else
const
int
deviceId
=
getDevice
();
return
initializer
.
getNullStream
(
deviceId
);
#endif
}
cv
::
cuda
::
Stream
::
operator
bool_type
()
const
{
bool
default_stream_is_initialized
;
Mutex
mtx
;
Ptr
<
Stream
>
default_stream
;
#ifndef HAVE_CUDA
return
0
;
#else
return
(
impl_
->
stream
!=
0
)
?
&
Stream
::
this_type_does_not_support_comparisons
:
0
;
#endif
}
Stream
&
cv
::
cuda
::
Stream
::
Null
()
#ifdef HAVE_CUDA
cudaStream_t
cv
::
cuda
::
StreamAccessor
::
getStream
(
const
Stream
&
stream
)
{
AutoLock
lock
(
mtx
);
if
(
!
default_stream_is_initialized
)
return
stream
.
impl_
->
stream
;
}
#endif
/////////////////////////////////////////////////////////////
/// StackAllocator
#ifdef HAVE_CUDA
namespace
{
bool
enableMemoryPool
=
true
;
class
StackAllocator
:
public
GpuMat
::
Allocator
{
default_stream
=
Ptr
<
Stream
>
(
new
Stream
(
Ptr
<
Impl
>
(
new
Impl
(
0
))));
default_stream_is_initialized
=
true
;
public
:
explicit
StackAllocator
(
cudaStream_t
stream
);
~
StackAllocator
();
bool
allocate
(
GpuMat
*
mat
,
int
rows
,
int
cols
,
size_t
elemSize
);
void
free
(
GpuMat
*
mat
);
private
:
StackAllocator
(
const
StackAllocator
&
);
StackAllocator
&
operator
=
(
const
StackAllocator
&
);
cudaStream_t
stream_
;
MemoryStack
*
memStack_
;
size_t
alignment_
;
};
StackAllocator
::
StackAllocator
(
cudaStream_t
stream
)
:
stream_
(
stream
),
memStack_
(
0
)
{
if
(
enableMemoryPool
)
{
const
int
deviceId
=
getDevice
();
memStack_
=
initializer
.
getMemoryPool
(
deviceId
)
->
getFreeMemStack
();
DeviceInfo
devInfo
(
deviceId
);
alignment_
=
devInfo
.
textureAlignment
();
}
}
StackAllocator
::~
StackAllocator
()
{
cudaStreamSynchronize
(
stream_
);
if
(
memStack_
!=
0
)
memStack_
->
pool
->
returnMemStack
(
memStack_
);
}
size_t
alignUp
(
size_t
what
,
size_t
alignment
)
{
size_t
alignMask
=
alignment
-
1
;
size_t
inverseAlignMask
=
~
alignMask
;
size_t
res
=
(
what
+
alignMask
)
&
inverseAlignMask
;
return
res
;
}
bool
StackAllocator
::
allocate
(
GpuMat
*
mat
,
int
rows
,
int
cols
,
size_t
elemSize
)
{
if
(
memStack_
==
0
)
return
false
;
size_t
pitch
,
memSize
;
if
(
rows
>
1
&&
cols
>
1
)
{
pitch
=
alignUp
(
cols
*
elemSize
,
alignment_
);
memSize
=
pitch
*
rows
;
}
else
{
// Single row or single column must be continuous
pitch
=
elemSize
*
cols
;
memSize
=
alignUp
(
elemSize
*
cols
*
rows
,
64
);
}
uchar
*
ptr
=
memStack_
->
requestMemory
(
memSize
);
if
(
ptr
==
0
)
return
false
;
mat
->
data
=
ptr
;
mat
->
step
=
pitch
;
mat
->
refcount
=
(
int
*
)
fastMalloc
(
sizeof
(
int
));
return
true
;
}
void
StackAllocator
::
free
(
GpuMat
*
mat
)
{
if
(
memStack_
==
0
)
return
;
memStack_
->
returnMemory
(
mat
->
datastart
);
fastFree
(
mat
->
refcount
);
}
return
*
default_stream
;
}
cv
::
cuda
::
Stream
::
operator
bool_type
()
const
#endif
/////////////////////////////////////////////////////////////
/// BufferPool
void
cv
::
cuda
::
setBufferPoolUsage
(
bool
on
)
{
#ifndef HAVE_CUDA
return
0
;
(
void
)
on
;
throw_no_cuda
();
#else
return
(
impl_
->
stream
!=
0
)
?
&
Stream
::
this_type_does_not_support_comparisons
:
0
;
enableMemoryPool
=
on
;
#endif
}
void
cv
::
cuda
::
setBufferPoolConfig
(
int
deviceId
,
size_t
stackSize
,
int
stackCount
)
{
#ifndef HAVE_CUDA
(
void
)
deviceId
;
(
void
)
stackSize
;
(
void
)
stackCount
;
throw_no_cuda
();
#else
const
int
currentDevice
=
getDevice
();
if
(
deviceId
>=
0
)
{
setDevice
(
deviceId
);
initializer
.
getMemoryPool
(
deviceId
)
->
initialize
(
stackSize
,
stackCount
);
}
else
{
const
int
deviceCount
=
getCudaEnabledDeviceCount
();
for
(
deviceId
=
0
;
deviceId
<
deviceCount
;
++
deviceId
)
{
setDevice
(
deviceId
);
initializer
.
getMemoryPool
(
deviceId
)
->
initialize
(
stackSize
,
stackCount
);
}
}
setDevice
(
currentDevice
);
#endif
}
#ifdef HAVE_CUDA
cv
::
cuda
::
BufferPool
::
BufferPool
(
Stream
&
stream
)
:
allocator_
(
stream
.
impl_
->
stackAllocator_
.
get
())
{
}
GpuMat
cv
::
cuda
::
BufferPool
::
getBuffer
(
int
rows
,
int
cols
,
int
type
)
{
GpuMat
buf
(
allocator_
);
buf
.
create
(
rows
,
cols
,
type
);
return
buf
;
}
#endif
////////////////////////////////////////////////////////////////
// Event
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment