Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
07825bad
Commit
07825bad
authored
Jul 19, 2010
by
Anatoly Baksheev
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
compilation with no cuda re factored
parent
20e2dc84
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
550 additions
and
313 deletions
+550
-313
CMakeLists.txt
modules/gpu/CMakeLists.txt
+15
-14
mat_operators.cu
modules/gpu/cuda/mat_operators.cu
+0
-151
devmem2d.hpp
modules/gpu/include/opencv2/gpu/devmem2d.hpp
+3
-2
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+105
-29
matpl.hpp
modules/gpu/include/opencv2/gpu/matpl.hpp
+0
-0
matrix_operations.hpp
modules/gpu/include/opencv2/gpu/matrix_operations.hpp
+87
-11
stream_accessor.hpp
modules/gpu/include/opencv2/gpu/stream_accessor.hpp
+65
-0
cuda_shared.hpp
modules/gpu/src/cuda/cuda_shared.hpp
+14
-8
stereobm.cu
modules/gpu/src/cuda/stereobm.cu
+0
-0
cudastream.cpp
modules/gpu/src/cudastream.cpp
+91
-29
initialization.cpp
modules/gpu/src/initialization.cpp
+17
-8
matrix_operations.cpp
modules/gpu/src/matrix_operations.cpp
+92
-31
precomp.cpp
modules/gpu/src/precomp.cpp
+10
-4
precomp.hpp
modules/gpu/src/precomp.hpp
+8
-21
stereobm_gpu.cpp
modules/gpu/src/stereobm_gpu.cpp
+43
-5
No files found.
modules/gpu/CMakeLists.txt
View file @
07825bad
set
(
name
"gpu"
)
set
(
DEPS
"opencv_core"
)
set
(
name
"gpu"
)
set
(
DEPS
"opencv_core"
)
set
(
the_target
"opencv_
${
name
}
"
)
...
...
@@ -9,25 +10,25 @@ project(${the_target})
add_definitions
(
-DCVAPI_EXPORTS
)
include_directories
(
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/include"
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/cuda"
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/
src/
cuda"
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/src"
"
${
CMAKE_CURRENT_BINARY_DIR
}
"
)
foreach
(
d
${
DEPS
}
)
if
(
${
d
}
MATCHES
"opencv_"
)
if
(
${
d
}
MATCHES
"opencv_"
)
string
(
REPLACE
"opencv_"
"
${
CMAKE_CURRENT_SOURCE_DIR
}
/../"
d_dir
${
d
}
)
include_directories
(
"
${
d_dir
}
/include"
)
include_directories
(
"
${
d_dir
}
/include"
)
endif
()
endforeach
()
endforeach
()
file
(
GLOB lib_srcs
"src/*.cpp"
)
file
(
GLOB lib_int_hdrs
"src/*.h*"
)
file
(
GLOB lib_cuda
"cuda/*.cu"
)
file
(
GLOB lib_cuda_hdrs
"
cuda/*.h*"
)
file
(
GLOB lib_cuda
"
src/
cuda/*.cu"
)
file
(
GLOB lib_cuda_hdrs
"
src/cuda/*.h*"
)
source_group
(
"Src"
FILES
${
lib_srcs
}
${
lib_int_hdrs
}
)
source_group
(
"Cuda"
FILES
${
lib_cuda
}
${
lib_cuda_hdrs
}
)
file
(
GLOB lib_hdrs
"include/opencv2/
${
name
}
/*.h*"
)
file
(
GLOB lib_hdrs
"include/opencv2/
${
name
}
/*.h*"
)
source_group
(
"Include"
FILES
${
lib_hdrs
}
)
if
(
HAVE_CUDA
)
...
...
@@ -35,13 +36,13 @@ if (HAVE_CUDA)
link_directories
(
${
CUDA_LIBRARIES
}
)
if
(
UNIX OR APPLE
)
set
(
CUDA_NVCC_FLAGS
"-Xcompiler;-fPIC"
)
endif
()
set
(
CUDA_NVCC_FLAGS
"-Xcompiler;-fPIC
;
"
)
endif
()
CUDA_COMPILE
(
cuda_objs
${
lib_cuda
}
)
#CUDA_BUILD_CLEAN_TARGET()
endif
()
add_library
(
${
the_target
}
${
lib_srcs
}
${
lib_hdrs
}
${
lib_int_hdrs
}
${
lib_cuda
}
${
lib_cuda_hdrs
}
${
cuda_objs
}
)
...
...
@@ -50,7 +51,7 @@ if(PCHSupport_FOUND)
if
(
${
CMAKE_GENERATOR
}
MATCHES
"Visual*"
OR
${
CMAKE_GENERATOR
}
MATCHES
"Xcode*"
)
if
(
${
CMAKE_GENERATOR
}
MATCHES
"Visual*"
)
set
(
${
the_target
}
_pch
"src/precomp.cpp"
)
endif
()
endif
()
add_native_precompiled_header
(
${
the_target
}
${
pch_header
}
)
elseif
(
CMAKE_COMPILER_IS_GNUCXX AND
${
CMAKE_GENERATOR
}
MATCHES
".*Makefiles"
)
add_precompiled_header
(
${
the_target
}
${
pch_header
}
)
...
...
modules/gpu/cuda/mat_operators.cu
deleted
100644 → 0
View file @
20e2dc84
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#include <stddef.h>
#include "cuda_shared.hpp"
#include "cuda_runtime.h"
__constant__ float scalar_d[4];
namespace mat_operators
{
template <typename T, int channels, int count = channels>
struct unroll
{
__device__ static void unroll_set(T * mat, size_t i)
{
mat[i] = static_cast<T>(scalar_d[i % channels]);
unroll<T, channels, count - 1>::unroll_set(mat, i+1);
}
__device__ static void unroll_set_with_mask(T * mat, float mask, size_t i)
{
mat[i] = mask * static_cast<T>(scalar_d[i % channels]);
unroll<T, channels, count - 1>::unroll_set_with_mask(mat, mask, i+1);
}
};
template <typename T, int channels>
struct unroll<T,channels,0>
{
__device__ static void unroll_set(T * , size_t){}
__device__ static void unroll_set_with_mask(T * , float, size_t){}
};
template <typename T, int channels>
__global__ void kernel_set_to_without_mask(T * mat)
{
size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T);
unroll<T, channels>::unroll_set(mat, i);
}
template <typename T, int channels>
__global__ void kernel_set_to_with_mask(T * mat, const float * mask)
{
size_t i = (blockIdx.x * blockDim.x + threadIdx.x) * sizeof(T);
unroll<T, channels>::unroll_set_with_mask(mat, i, mask[i]);
}
}
extern "C" void cv::gpu::impl::set_to_with_mask(const DevMem2D& mat, const double * scalar, const DevMem2D& mask, int depth, int channels)
{
scalar_d[0] = scalar[0];
scalar_d[1] = scalar[1];
scalar_d[2] = scalar[2];
scalar_d[3] = scalar[3];
int numBlocks = mat.rows * mat.step / 256;
dim3 threadsPerBlock(256);
if (channels == 1)
{
if (depth == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 1><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
if (depth == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
if (depth == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int, 1><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
}
if (channels == 2)
{
if (depth == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 2><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
if (depth == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
if (depth == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int, 2><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
}
if (channels == 3)
{
if (depth == 1) ::mat_operators::kernel_set_to_with_mask<unsigned char, 3><<<numBlocks,threadsPerBlock>>>(mat.ptr, (float *)mask.ptr);
if (depth == 2) ::mat_operators::kernel_set_to_with_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr, (float *)mask.ptr);
if (depth == 4) ::mat_operators::kernel_set_to_with_mask<unsigned int, 3><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr, (float *)mask.ptr);
}
}
extern "C" void cv::gpu::impl::set_to_without_mask(const DevMem2D& mat, const double * scalar, int depth, int channels)
{
scalar_d[0] = scalar[0];
scalar_d[1] = scalar[1];
scalar_d[2] = scalar[2];
scalar_d[3] = scalar[3];
int numBlocks = mat.rows * mat.step / 256;
dim3 threadsPerBlock(256);
if (channels == 1)
{
if (depth == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 1><<<numBlocks,threadsPerBlock>>>(mat.ptr);
if (depth == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 1><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
if (depth == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int, 1><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
}
if (channels == 2)
{
if (depth == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 2><<<numBlocks,threadsPerBlock>>>(mat.ptr);
if (depth == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 2><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
if (depth == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int, 2><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
}
if (channels == 3)
{
if (depth == 1) ::mat_operators::kernel_set_to_without_mask<unsigned char, 3><<<numBlocks,threadsPerBlock>>>(mat.ptr);
if (depth == 2) ::mat_operators::kernel_set_to_without_mask<unsigned short, 3><<<numBlocks,threadsPerBlock>>>((unsigned short *)mat.ptr);
if (depth == 4) ::mat_operators::kernel_set_to_without_mask<unsigned int, 3><<<numBlocks,threadsPerBlock>>>((unsigned int *)mat.ptr);
}
}
modules/gpu/include/opencv2/gpu/devmem2d.hpp
View file @
07825bad
...
...
@@ -48,12 +48,13 @@ namespace cv
namespace
gpu
{
// Simple lightweight structure that encapsulates image ptr on device, its pitch and its sizes.
// It is intended to pass to nvcc-compiled code.
// It is intended to pass to nvcc-compiled code.
GpuMat depends on headers that nvcc can't compile
template
<
typename
T
=
unsigned
char
>
struct
DevMem2D_
{
enum
{
elem_size
=
sizeof
(
T
)
};
typedef
T
elem_t
;
enum
{
elem_size
=
sizeof
(
elem_t
)
};
int
cols
;
int
rows
;
...
...
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
07825bad
...
...
@@ -52,15 +52,20 @@ namespace cv
{
//////////////////////////////// Initialization ////////////////////////
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
CV_EXPORTS
int
getCudaEnabledDeviceCount
();
//! Functions below throw cv::Expception if the library is compiled without Cuda.
CV_EXPORTS
string
getDeviceName
(
int
device
);
CV_EXPORTS
void
setDevice
(
int
device
);
CV_EXPORTS
int
getDevice
();
CV_EXPORTS
void
getComputeCapability
(
int
device
,
int
*
major
,
int
*
minor
);
CV_EXPORTS
int
getNumberOfSMs
(
int
device
);
//////////////////////////////// GpuMat ////////////////////////////////
//! Smart pointer for GPU memory with reference counting. Its interface is mostly similar with cv::Mat.
class
CV_EXPORTS
GpuMat
{
public
:
...
...
@@ -85,7 +90,7 @@ namespace cv
GpuMat
(
const
GpuMat
&
m
,
const
Rect
&
roi
);
//! builds GpuMat from Mat. Perfom blocking upload to device.
GpuMat
(
const
Mat
&
m
);
explicit
GpuMat
(
const
Mat
&
m
);
//! destructor - calls release()
~
GpuMat
();
...
...
@@ -211,44 +216,109 @@ namespace cv
uchar
*
dataend
;
};
//////////////////////////////// MatPL ////////////////////////////////
// MatPL is limited cv::Mat with page locked memory allocation.
// Page locked memory is only needed for async and faster coping to GPU.
// It is convertable to cv::Mat header without reference counting
// so you can use it with other opencv functions.
class
CV_EXPORTS
MatPL
{
public
:
//Not supported. Now behaviour is like ALLOC_DEFAULT.
//enum { ALLOC_DEFAULT = 0, ALLOC_PORTABLE = 1, ALLOC_WRITE_COMBINED = 4 }
MatPL
();
MatPL
(
const
MatPL
&
m
);
MatPL
(
int
_rows
,
int
_cols
,
int
_type
);
MatPL
(
Size
_size
,
int
_type
);
//! creates from cv::Mat with coping data
explicit
MatPL
(
const
Mat
&
m
);
~
MatPL
();
MatPL
&
operator
=
(
const
MatPL
&
m
);
//! returns deep copy of the matrix, i.e. the data is copied
MatPL
clone
()
const
;
//! allocates new matrix data unless the matrix already has specified size and type.
void
create
(
int
_rows
,
int
_cols
,
int
_type
);
void
create
(
Size
_size
,
int
_type
);
//! decrements reference counter and released memory if needed.
void
release
();
//! returns matrix header with disabled reference counting for MatPL data.
Mat
createMatHeader
()
const
;
operator
Mat
()
const
;
// Please see cv::Mat for descriptions
bool
isContinuous
()
const
;
size_t
elemSize
()
const
;
size_t
elemSize1
()
const
;
int
type
()
const
;
int
depth
()
const
;
int
channels
()
const
;
size_t
step1
()
const
;
Size
size
()
const
;
bool
empty
()
const
;
// Please see cv::Mat for descriptions
int
flags
;
int
rows
,
cols
;
size_t
step
;
uchar
*
data
;
int
*
refcount
;
uchar
*
datastart
;
uchar
*
dataend
;
};
//////////////////////////////// CudaStream ////////////////////////////////
// Encapculates Cuda Stream. Provides interface for async coping.
// Passed to each function that supports async kernel execution.
// Reference counting is enabled
class
CudaStream
class
C
V_EXPORTS
C
udaStream
{
public
:
static
CudaStream
empty
();
CudaStream
();
~
CudaStream
();
CudaStream
(
const
CudaStream
&
);
CudaStream
&
operator
=
(
const
CudaStream
&
);
bool
queryIfComplete
();
void
waitForCompletion
();
void
waitForCompletion
();
//calls cudaMemcpyAsync
//! downloads asynchronously.
// Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its subMat)
void
enqueueDownload
(
const
GpuMat
&
src
,
MatPL
&
dst
);
void
enqueueDownload
(
const
GpuMat
&
src
,
Mat
&
dst
);
void
enqueueUpload
(
const
Mat
&
src
,
GpuMat
&
dst
);
void
enqueueCopy
(
const
GpuMat
&
src
,
GpuMat
&
dst
);
// calls cudaMemset2D asynchronous for single channel. Invoke kernel for some multichannel.
void
enqueueMemSet
(
const
GpuMat
&
src
,
Scalar
val
);
//! uploads asynchronously.
// Warning! cv::Mat must point to page locked memory (i.e. to MatPL data or to its ROI)
void
enqueueUpload
(
const
MatPL
&
src
,
GpuMat
&
dst
);
void
enqueueUpload
(
const
Mat
&
src
,
GpuMat
&
dst
);
// invoke kernel asynchronous because of mask
void
enqueueCopy
(
const
GpuMat
&
src
,
GpuMat
&
dst
);
void
enqueueMemSet
(
const
GpuMat
&
src
,
Scalar
val
);
void
enqueueMemSet
(
const
GpuMat
&
src
,
Scalar
val
,
const
GpuMat
&
mask
);
// converts matrix type, ex from float to uchar depending on type
void
enqueueConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
type
);
struct
Impl
;
const
Impl
&
getImpl
()
const
;
void
enqueueConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
type
,
double
a
=
1
,
double
b
=
0
);
private
:
Impl
*
impl
;
CudaStream
(
const
CudaStream
&
);
CudaStream
&
operator
=
(
const
CudaStream
&
);
void
create
();
void
release
();
struct
Impl
;
Impl
*
impl
;
friend
struct
StreamAccessor
;
};
//////////////////////////////// StereoBM_GPU ////////////////////////////////
...
...
@@ -265,17 +335,22 @@ namespace cv
StereoBM_GPU
(
int
preset
,
int
ndisparities
=
0
);
//! the stereo correspondence operator. Finds the disparity for the specified rectified stereo pair
//! Output disparity has CV_8U type.
void
operator
()
(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
)
const
;
void
operator
()
(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
);
//! Acync version
void
operator
()
(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
,
const
CudaStream
&
stream
);
//! Some heuristics that tries to estmate
// if current GPU will be faster then CPU in this algorithm.
// It queries current active device.
static
bool
checkIfGpuCallReasonable
();
private
:
mutable
GpuMat
minSSD
;
GpuMat
minSSD
;
int
preset
;
int
ndisp
;
};
}
}
#include "opencv2/gpu/gpumat.hpp"
#include "opencv2/gpu/matrix_operations.hpp"
#endif
/* __OPENCV_GPU_HPP__ */
\ No newline at end of file
modules/gpu/include/opencv2/gpu/matpl.hpp
deleted
100644 → 0
View file @
20e2dc84
This diff is collapsed.
Click to expand it.
modules/gpu/include/opencv2/gpu/
gpumat
.hpp
→
modules/gpu/include/opencv2/gpu/
matrix_operations
.hpp
View file @
07825bad
...
...
@@ -43,27 +43,25 @@
#ifndef __OPENCV_GPU_MATRIX_OPERATIONS_HPP__
#define __OPENCV_GPU_MATRIX_OPERATIONS_HPP__
namespace
cv
{
namespace
gpu
{
////////////////////////////////////////////////////////////////////////
//////////////////////////////// GpuMat ////////////////////////////////
////////////////////////////////////////////////////////////////////////
inline
GpuMat
::
GpuMat
()
:
flags
(
0
),
rows
(
0
),
cols
(
0
),
step
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
)
{}
inline
GpuMat
::
GpuMat
()
:
flags
(
0
),
rows
(
0
),
cols
(
0
),
step
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
)
{}
inline
GpuMat
::
GpuMat
(
int
_rows
,
int
_cols
,
int
_type
)
:
flags
(
0
),
rows
(
0
),
cols
(
0
),
step
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
)
inline
GpuMat
::
GpuMat
(
int
_rows
,
int
_cols
,
int
_type
)
:
flags
(
0
),
rows
(
0
),
cols
(
0
),
step
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
)
{
if
(
_rows
>
0
&&
_cols
>
0
)
create
(
_rows
,
_cols
,
_type
);
}
inline
GpuMat
::
GpuMat
(
Size
_size
,
int
_type
)
:
flags
(
0
),
rows
(
0
),
cols
(
0
),
step
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
)
inline
GpuMat
::
GpuMat
(
Size
_size
,
int
_type
)
:
flags
(
0
),
rows
(
0
),
cols
(
0
),
step
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
)
{
if
(
_size
.
height
>
0
&&
_size
.
width
>
0
)
create
(
_size
.
height
,
_size
.
width
,
_type
);
...
...
@@ -249,12 +247,9 @@ inline void GpuMat::assignTo( GpuMat& m, int type ) const
//CPP GpuMat& GpuMat::operator = (const Scalar& s);
//CPP GpuMat& GpuMat::setTo(const Scalar& s, const GpuMat& mask=GpuMat());
//CPP GpuMat GpuMat::reshape(int _cn, int _rows=0) const;
//CPP void GpuMat::create(int _rows, int _cols, int _type);
inline
void
GpuMat
::
create
(
Size
_size
,
int
_type
)
{
create
(
_size
.
height
,
_size
.
width
,
_type
);
}
//CPP void GpuMat::create(int _rows, int _cols, int _type);
//CPP void GpuMat::release();
inline
void
GpuMat
::
swap
(
GpuMat
&
b
)
...
...
@@ -343,6 +338,87 @@ template<typename _Tp> inline const _Tp* GpuMat::ptr(int y) const
static
inline
void
swap
(
GpuMat
&
a
,
GpuMat
&
b
)
{
a
.
swap
(
b
);
}
///////////////////////////////////////////////////////////////////////
//////////////////////////////// MatPL ////////////////////////////////
///////////////////////////////////////////////////////////////////////
MatPL
::
MatPL
()
:
flags
(
0
),
rows
(
0
),
cols
(
0
),
step
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
)
{}
MatPL
::
MatPL
(
int
_rows
,
int
_cols
,
int
_type
)
:
flags
(
0
),
rows
(
0
),
cols
(
0
),
step
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
)
{
if
(
_rows
>
0
&&
_cols
>
0
)
create
(
_rows
,
_cols
,
_type
);
}
MatPL
::
MatPL
(
Size
_size
,
int
_type
)
:
flags
(
0
),
rows
(
0
),
cols
(
0
),
step
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
)
{
if
(
_size
.
height
>
0
&&
_size
.
width
>
0
)
create
(
_size
.
height
,
_size
.
width
,
_type
);
}
MatPL
::
MatPL
(
const
MatPL
&
m
)
:
flags
(
m
.
flags
),
rows
(
m
.
rows
),
cols
(
m
.
cols
),
step
(
m
.
step
),
data
(
m
.
data
),
refcount
(
m
.
refcount
),
datastart
(
0
),
dataend
(
0
)
{
if
(
refcount
)
CV_XADD
(
refcount
,
1
);
}
MatPL
::
MatPL
(
const
Mat
&
m
)
:
flags
(
0
),
rows
(
0
),
cols
(
0
),
step
(
0
),
data
(
0
),
refcount
(
0
),
datastart
(
0
),
dataend
(
0
)
{
if
(
m
.
rows
>
0
&&
m
.
cols
>
0
)
create
(
m
.
size
(),
m
.
type
()
);
Mat
tmp
=
createMatHeader
();
m
.
copyTo
(
tmp
);
}
MatPL
::~
MatPL
()
{
release
();
}
MatPL
&
MatPL
::
operator
=
(
const
MatPL
&
m
)
{
if
(
this
!=
&
m
)
{
if
(
m
.
refcount
)
CV_XADD
(
m
.
refcount
,
1
);
release
();
flags
=
m
.
flags
;
rows
=
m
.
rows
;
cols
=
m
.
cols
;
step
=
m
.
step
;
data
=
m
.
data
;
datastart
=
m
.
datastart
;
dataend
=
m
.
dataend
;
refcount
=
m
.
refcount
;
}
return
*
this
;
}
MatPL
MatPL
::
clone
()
const
{
MatPL
m
(
size
(),
type
());
Mat
to
=
m
;
Mat
from
=
*
this
;
from
.
copyTo
(
to
);
return
m
;
}
inline
void
MatPL
::
create
(
Size
_size
,
int
_type
)
{
create
(
_size
.
height
,
_size
.
width
,
_type
);
}
//CCP void MatPL::create(int _rows, int _cols, int _type);
//CPP void MatPL::release();
inline
Mat
MatPL
::
createMatHeader
()
const
{
return
Mat
(
size
(),
type
(),
data
);
}
inline
MatPL
::
operator
Mat
()
const
{
return
createMatHeader
();
}
inline
bool
MatPL
::
isContinuous
()
const
{
return
(
flags
&
Mat
::
CONTINUOUS_FLAG
)
!=
0
;
}
inline
size_t
MatPL
::
elemSize
()
const
{
return
CV_ELEM_SIZE
(
flags
);
}
inline
size_t
MatPL
::
elemSize1
()
const
{
return
CV_ELEM_SIZE1
(
flags
);
}
inline
int
MatPL
::
type
()
const
{
return
CV_MAT_TYPE
(
flags
);
}
inline
int
MatPL
::
depth
()
const
{
return
CV_MAT_DEPTH
(
flags
);
}
inline
int
MatPL
::
channels
()
const
{
return
CV_MAT_CN
(
flags
);
}
inline
size_t
MatPL
::
step1
()
const
{
return
step
/
elemSize1
();
}
inline
Size
MatPL
::
size
()
const
{
return
Size
(
cols
,
rows
);
}
inline
bool
MatPL
::
empty
()
const
{
return
data
==
0
;
}
}
/* end of namespace gpu */
}
/* end of namespace cv */
...
...
modules/gpu/include/opencv2/gpu/stream_accessor.hpp
0 → 100644
View file @
07825bad
/*M///////////////////////////////////////////////////////////////////////////////////////
//
// IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
//
// By downloading, copying, installing or using the software you agree to this license.
// If you do not agree to this license, do not download, install,
// copy or use the software.
//
//
// License Agreement
// For Open Source Computer Vision Library
//
// Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
// Copyright (C) 2009, Willow Garage Inc., all rights reserved.
// Third party copyrights are property of their respective owners.
//
// Redistribution and use in source and binary forms, with or without modification,
// are permitted provided that the following conditions are met:
//
// * Redistribution's of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
//
// * Redistribution's in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other GpuMaterials provided with the distribution.
//
// * The name of the copyright holders may not be used to endorse or promote products
// derived from this software without specific prior written permission.
//
// This software is provided by the copyright holders and contributors "as is" and
// any express or implied warranties, including, but not limited to, the implied
// warranties of merchantability and fitness for a particular purpose are disclaimed.
// In no event shall the Intel Corporation or contributors be liable for any direct,
// indirect, incidental, special, exemplary, or consequential damages
// (including, but not limited to, procurement of substitute goods or services;
// loss of use, data, or profits; or business interruption) however caused
// and on any theory of liability, whether in contract, strict liability,
// or tort (including negligence or otherwise) arising in any way out of
// the use of this software, even if advised of the possibility of such damage.
//
//M*/
#ifndef __OPENCV_GPU_STREAM_ACCESSOR_HPP__
#define __OPENCV_GPU_STREAM_ACCESSOR_HPP__
#include "opencv2/gpu/gpu.hpp"
#include "cuda_runtime_api.h"
namespace
cv
{
namespace
gpu
{
// This is only header file that depends on Cuda. All other headers are independent.
// So if you use OpenCV binaries you do noot need to install Cuda Toolkit.
// But of you wanna use GPU by yourself, may get cuda stream instance using the class below.
// In this case you have to install Cuda Toolkit.
struct
StreamAccessor
{
CV_EXPORTS
static
cudaStream_t
getStream
(
const
CudaStream
&
stream
);
};
}
}
#endif
/* __OPENCV_GPU_STREAM_ACCESSOR_HPP__ */
\ No newline at end of file
modules/gpu/cuda/cuda_shared.hpp
→
modules/gpu/
src/
cuda/cuda_shared.hpp
View file @
07825bad
...
...
@@ -44,31 +44,37 @@
#define __OPENCV_CUDA_SHARED_HPP__
#include "opencv2/gpu/devmem2d.hpp"
#include "cuda_runtime_api.h"
namespace
cv
{
namespace
gpu
{
{
typedef
unsigned
char
uchar
;
typedef
unsigned
short
ushort
;
typedef
unsigned
int
uint
;
typedef
unsigned
int
uint
;
extern
"C"
void
error
(
const
char
*
error_string
,
const
char
*
file
,
const
int
line
,
const
char
*
func
=
""
);
namespace
impl
{
{
static
inline
int
divUp
(
int
a
,
int
b
)
{
return
(
a
%
b
==
0
)
?
a
/
b
:
a
/
b
+
1
;
}
extern
"C"
void
stereoBM_GPU
(
const
DevMem2D
&
left
,
const
DevMem2D
&
right
,
DevMem2D
&
disp
,
int
maxdisp
,
DevMem2D_
<
uint
>&
minSSD_buf
);
extern
"C"
void
set_to_without_mask
(
const
DevMem2D
&
mat
,
const
double
*
scalar
,
int
depth
,
int
channels
);
extern
"C"
void
set_to_with_mask
(
const
DevMem2D
&
mat
,
const
double
*
scalar
,
const
DevMem2D
&
mask
,
int
depth
,
int
channels
);
}
}
}
#ifdef __CUDACC__
#define cudaSafeCall(expr) { cudaError_t err = expr; if( cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__); }
#if defined(__GNUC__)
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__, __func__);
#else
/* defined(__CUDACC__) || defined(__MSVC__) */
#define cudaSafeCall(expr) ___cudaSafeCall(expr, __FILE__, __LINE__)
#endif
static
inline
void
___cudaSafeCall
(
cudaError_t
err
,
const
char
*
file
,
const
int
line
,
const
char
*
func
=
""
)
{
if
(
cudaSuccess
!=
err
)
cv
::
gpu
::
error
(
cudaGetErrorString
(
err
),
__FILE__
,
__LINE__
,
func
);
}
#endif
/* __OPENCV_CUDA_SHARED_HPP__ */
modules/gpu/cuda/stereobm.cu
→
modules/gpu/
src/
cuda/stereobm.cu
View file @
07825bad
File moved
modules/gpu/src/cudastream.cpp
View file @
07825bad
...
...
@@ -41,57 +41,119 @@
//M*/
#include "precomp.hpp"
//#include "opencv2/gpu/stream_access.hpp"
using
namespace
cv
;
using
namespace
cv
::
gpu
;
cv
::
gpu
::
CudaStream
::
CudaStream
()
//: impl( (Impl*)fastMalloc(sizeof(Impl)) )
#if !defined (HAVE_CUDA)
void
cv
::
gpu
::
CudaStream
::
create
()
{
throw_nogpu
();
}
void
cv
::
gpu
::
CudaStream
::
release
()
{
throw_nogpu
();
}
cv
::
gpu
::
CudaStream
::
CudaStream
()
:
impl
(
0
)
{
throw_nogpu
();
}
cv
::
gpu
::
CudaStream
::~
CudaStream
()
{
throw_nogpu
();
}
cv
::
gpu
::
CudaStream
::
CudaStream
(
const
CudaStream
&
stream
)
{
throw_nogpu
();
}
CudaStream
&
cv
::
gpu
::
CudaStream
::
operator
=
(
const
CudaStream
&
stream
)
{
throw_nogpu
();
return
*
this
;
}
bool
cv
::
gpu
::
CudaStream
::
queryIfComplete
()
{
throw_nogpu
();
return
true
;
}
void
cv
::
gpu
::
CudaStream
::
waitForCompletion
()
{
throw_nogpu
();
}
void
cv
::
gpu
::
CudaStream
::
enqueueDownload
(
const
GpuMat
&
src
,
Mat
&
dst
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
CudaStream
::
enqueueDownload
(
const
GpuMat
&
src
,
MatPL
&
dst
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
CudaStream
::
enqueueUpload
(
const
MatPL
&
src
,
GpuMat
&
dst
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
CudaStream
::
enqueueUpload
(
const
Mat
&
src
,
GpuMat
&
dst
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
CudaStream
::
enqueueCopy
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
CudaStream
::
enqueueMemSet
(
const
GpuMat
&
src
,
Scalar
val
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
CudaStream
::
enqueueMemSet
(
const
GpuMat
&
src
,
Scalar
val
,
const
GpuMat
&
mask
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
CudaStream
::
enqueueConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
type
,
double
a
,
double
b
)
{
throw_nogpu
();
}
#else
/* !defined (HAVE_CUDA) */
#include "opencv2/gpu/stream_accessor.hpp"
struct
CudaStream
::
Impl
{
//cudaSafeCall( cudaStreamCreate( &impl->stream) );
}
cv
::
gpu
::
CudaStream
::~
CudaStream
()
cudaStream_t
stream
;
int
ref_counter
;
};
namespace
{
if
(
impl
)
template
<
class
S
,
class
D
>
void
devcopy
(
const
S
&
src
,
D
&
dst
,
cudaStream_t
s
,
cudaMemcpyKind
k
)
{
cudaSafeCall
(
cudaStreamDestroy
(
*
(
cudaStream_t
*
)
impl
)
);
cv
::
fastFree
(
impl
);
}
dst
.
create
(
src
.
size
(),
src
.
type
());
size_t
bwidth
=
src
.
cols
*
src
.
elemSize
();
cudaSafeCall
(
cudaMemcpy2DAsync
(
dst
.
data
,
dst
.
step
,
src
.
data
,
src
.
step
,
bwidth
,
src
.
rows
,
k
,
s
)
);
};
}
bool
cv
::
gpu
::
CudaStream
::
queryIfComplete
()
CV_EXPORTS
cudaStream_t
cv
::
gpu
::
StreamAccessor
::
getStream
(
const
CudaStream
&
stream
)
{
return
stream
.
impl
->
stream
;
};
void
cv
::
gpu
::
CudaStream
::
create
()
{
//cudaError_t err = cudaStreamQuery( *(cudaStream_t*)impl );
if
(
impl
)
release
();
//if (err == cudaSuccess)
// return true
;
cudaStream_t
stream
;
cudaSafeCall
(
cudaStreamCreate
(
&
stream
)
)
;
//if (err == cudaErrorNotReady)
// return false;
impl
=
(
CudaStream
::
Impl
*
)
fastMalloc
(
sizeof
(
CudaStream
::
Impl
));
////cudaErrorInvalidResourceHandle
//cudaSafeCall( err );
return
true
;
impl
->
stream
=
stream
;
impl
->
ref_counter
=
1
;
}
void
cv
::
gpu
::
CudaStream
::
waitForCompletion
()
void
cv
::
gpu
::
CudaStream
::
release
()
{
cudaSafeCall
(
cudaStreamSynchronize
(
*
(
cudaStream_t
*
)
impl
)
);
if
(
impl
&&
CV_XADD
(
&
impl
->
ref_counter
,
-
1
)
==
1
)
{
cudaSafeCall
(
cudaStreamDestroy
(
impl
->
stream
)
);
cv
::
fastFree
(
impl
);
}
}
void
cv
::
gpu
::
CudaStream
::
enqueueDownload
(
const
GpuMat
&
src
,
Mat
&
dst
)
cv
::
gpu
::
CudaStream
::
CudaStream
()
:
impl
(
0
)
{
create
();
}
cv
::
gpu
::
CudaStream
::~
CudaStream
()
{
release
();
}
cv
::
gpu
::
CudaStream
::
CudaStream
(
const
CudaStream
&
stream
)
:
impl
(
stream
.
impl
)
{
// cudaMemcpy2DAsync(dst.data, dst.step, src.data, src.step, src.cols * src.elemSize(), src.rows, cudaMemcpyDeviceToHost,
if
(
impl
)
CV_XADD
(
&
impl
->
ref_counter
,
1
);
}
void
cv
::
gpu
::
CudaStream
::
enqueueUpload
(
const
Mat
&
src
,
GpuMat
&
dst
)
CudaStream
&
cv
::
gpu
::
CudaStream
::
operator
=
(
const
CudaStream
&
stream
)
{
CV_Assert
(
!
"Not implemented"
);
if
(
this
!=
&
stream
)
{
if
(
stream
.
impl
)
CV_XADD
(
&
stream
.
impl
->
ref_counter
,
1
);
release
();
impl
=
stream
.
impl
;
}
return
*
this
;
}
void
cv
::
gpu
::
CudaStream
::
enqueueCopy
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
bool
cv
::
gpu
::
CudaStream
::
queryIfComplete
()
{
CV_Assert
(
!
"Not implemented"
);
cudaError_t
err
=
cudaStreamQuery
(
impl
->
stream
);
if
(
err
==
cudaErrorNotReady
||
err
==
cudaSuccess
)
return
err
==
cudaSuccess
;
cudaSafeCall
(
err
);
}
void
cv
::
gpu
::
CudaStream
::
waitForCompletion
()
{
cudaSafeCall
(
cudaStreamSynchronize
(
impl
->
stream
)
);
}
void
cv
::
gpu
::
CudaStream
::
enqueueDownload
(
const
GpuMat
&
src
,
Mat
&
dst
)
{
// if not -> allocation will be done, but after that dst will not point to page locked memory
CV_Assert
(
src
.
cols
==
dst
.
cols
&&
src
.
rows
==
dst
.
rows
&&
src
.
type
()
==
dst
.
type
()
)
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToHost
);
}
void
cv
::
gpu
::
CudaStream
::
enqueueDownload
(
const
GpuMat
&
src
,
MatPL
&
dst
)
{
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToHost
);
}
void
cv
::
gpu
::
CudaStream
::
enqueueUpload
(
const
MatPL
&
src
,
GpuMat
&
dst
){
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyHostToDevice
);
}
void
cv
::
gpu
::
CudaStream
::
enqueueUpload
(
const
Mat
&
src
,
GpuMat
&
dst
)
{
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyHostToDevice
);
}
void
cv
::
gpu
::
CudaStream
::
enqueueCopy
(
const
GpuMat
&
src
,
GpuMat
&
dst
)
{
devcopy
(
src
,
dst
,
impl
->
stream
,
cudaMemcpyDeviceToDevice
);
}
void
cv
::
gpu
::
CudaStream
::
enqueueMemSet
(
const
GpuMat
&
src
,
Scalar
val
)
{
CV_Assert
(
!
"Not implemented"
);
...
...
@@ -102,11 +164,10 @@ void cv::gpu::CudaStream::enqueueMemSet(const GpuMat& src, Scalar val, const Gpu
CV_Assert
(
!
"Not implemented"
);
}
void
cv
::
gpu
::
CudaStream
::
enqueueConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
type
)
void
cv
::
gpu
::
CudaStream
::
enqueueConvert
(
const
GpuMat
&
src
,
GpuMat
&
dst
,
int
type
,
double
a
,
double
b
)
{
CV_Assert
(
!
"Not implemented"
);
}
//struct cudaStream_t& cv::gpu::CudaStream::getStream() { return stream; }
#endif
/* !defined (HAVE_CUDA) */
\ No newline at end of file
modules/gpu/src/initialization.cpp
View file @
07825bad
...
...
@@ -45,15 +45,18 @@
using
namespace
cv
;
using
namespace
cv
::
gpu
;
#ifndef HAVE_CUDA
#if !defined (HAVE_CUDA)
CV_EXPORTS
int
cv
::
gpu
::
getCudaEnabledDeviceCount
()
{
return
0
;
}
CV_EXPORTS
string
cv
::
gpu
::
getDeviceName
(
int
/*device*/
)
{
cudaSafeCall
(
0
);
return
0
;
}
CV_EXPORTS
void
cv
::
gpu
::
setDevice
(
int
/*device*/
)
{
cudaSafeCall
(
0
);
}
CV_EXPORTS
void
cv
::
gpu
::
getComputeCapability
(
int
/*device*/
,
int
*
/*major*/
,
int
*
/*minor*/
)
{
cudaSafeCall
(
0
);
}
CV_EXPORTS
int
cv
::
gpu
::
getNumberOfSMs
(
int
/*device*/
)
{
cudaSafeCall
(
0
);
return
0
;
}
CV_EXPORTS
string
cv
::
gpu
::
getDeviceName
(
int
/*device*/
)
{
throw_nogpu
();
return
0
;
}
CV_EXPORTS
void
cv
::
gpu
::
setDevice
(
int
/*device*/
)
{
throw_nogpu
();
}
CV_EXPORTS
int
cv
::
gpu
::
getDevice
()
{
throw_nogpu
();
return
0
;
}
CV_EXPORTS
void
cv
::
gpu
::
getComputeCapability
(
int
/*device*/
,
int
*
/*major*/
,
int
*
/*minor*/
)
{
throw_nogpu
();
}
CV_EXPORTS
int
cv
::
gpu
::
getNumberOfSMs
(
int
/*device*/
)
{
throw_nogpu
();
return
0
;
}
#else
#else
/* !defined (HAVE_CUDA) */
CV_EXPORTS
int
cv
::
gpu
::
getCudaEnabledDeviceCount
()
{
...
...
@@ -73,6 +76,12 @@ CV_EXPORTS void cv::gpu::setDevice(int device)
{
cudaSafeCall
(
cudaSetDevice
(
device
)
);
}
CV_EXPORTS
int
cv
::
gpu
::
getDevice
()
{
int
device
;
cudaSafeCall
(
cudaGetDevice
(
&
device
)
);
return
device
;
}
CV_EXPORTS
void
cv
::
gpu
::
getComputeCapability
(
int
device
,
int
*
major
,
int
*
minor
)
{
...
...
@@ -90,4 +99,5 @@ CV_EXPORTS int cv::gpu::getNumberOfSMs(int device)
return
prop
.
multiProcessorCount
;
}
#endif
\ No newline at end of file
#endif
modules/gpu/src/
gpumat
.cpp
→
modules/gpu/src/
matrix_operations
.cpp
View file @
07825bad
...
...
@@ -45,23 +45,53 @@
using
namespace
cv
;
using
namespace
cv
::
gpu
;
////////////////////////////////////////////////////////////////////////
//////////////////////////////// GpuMat ////////////////////////////////
////////////////////////////////////////////////////////////////////////
void
GpuMat
::
upload
(
const
Mat
&
m
)
#if !defined (HAVE_CUDA)
namespace
cv
{
namespace
gpu
{
void
GpuMat
::
upload
(
const
Mat
&
/*m*/
)
{
throw_nogpu
();
}
void
GpuMat
::
download
(
cv
::
Mat
&
/*m*/
)
const
{
throw_nogpu
();
}
void
GpuMat
::
copyTo
(
GpuMat
&
/*m*/
)
const
{
throw_nogpu
();
}
void
GpuMat
::
copyTo
(
GpuMat
&
/*m*/
,
const
GpuMat
&
/* mask */
)
const
{
throw_nogpu
();
}
void
GpuMat
::
convertTo
(
GpuMat
&
/*m*/
,
int
/*rtype*/
,
double
/*alpha*/
,
double
/*beta*/
)
const
{
throw_nogpu
();
}
GpuMat
&
GpuMat
::
operator
=
(
const
Scalar
&
/*s*/
)
{
throw_nogpu
();
return
*
this
;
}
GpuMat
&
GpuMat
::
setTo
(
const
Scalar
&
/*s*/
,
const
GpuMat
&
/*mask*/
)
{
throw_nogpu
();
return
*
this
;
}
GpuMat
GpuMat
::
reshape
(
int
/*new_cn*/
,
int
/*new_rows*/
)
const
{
throw_nogpu
();
return
GpuMat
();
}
void
GpuMat
::
create
(
int
/*_rows*/
,
int
/*_cols*/
,
int
/*_type*/
)
{
throw_nogpu
();
}
void
GpuMat
::
release
()
{
throw_nogpu
();
}
void
MatPL
::
create
(
int
/*_rows*/
,
int
/*_cols*/
,
int
/*_type*/
)
{
throw_nogpu
();
}
void
MatPL
::
release
()
{
throw_nogpu
();
}
}
}
#else
/* !defined (HAVE_CUDA) */
void
cv
::
gpu
::
GpuMat
::
upload
(
const
Mat
&
m
)
{
CV_DbgAssert
(
!
m
.
empty
());
create
(
m
.
size
(),
m
.
type
());
cudaSafeCall
(
cudaMemcpy2D
(
data
,
step
,
m
.
data
,
m
.
step
,
cols
*
elemSize
(),
rows
,
cudaMemcpyHostToDevice
)
);
}
void
GpuMat
::
download
(
cv
::
Mat
&
m
)
const
void
cv
::
gpu
::
GpuMat
::
download
(
cv
::
Mat
&
m
)
const
{
CV_DbgAssert
(
!
this
->
empty
());
m
.
create
(
size
(),
type
());
cudaSafeCall
(
cudaMemcpy2D
(
m
.
data
,
m
.
step
,
data
,
step
,
cols
*
elemSize
(),
rows
,
cudaMemcpyDeviceToHost
)
);
}
void
GpuMat
::
copyTo
(
GpuMat
&
m
)
const
void
cv
::
gpu
::
GpuMat
::
copyTo
(
GpuMat
&
m
)
const
{
CV_DbgAssert
(
!
this
->
empty
());
m
.
create
(
size
(),
type
());
...
...
@@ -69,45 +99,30 @@ void GpuMat::copyTo( GpuMat& m ) const
cudaSafeCall
(
cudaThreadSynchronize
()
);
}
void
GpuMat
::
copyTo
(
GpuMat
&
/*m*/
,
const
GpuMat
&
/* mask */
)
const
{
void
cv
::
gpu
::
GpuMat
::
copyTo
(
GpuMat
&
/*m*/
,
const
GpuMat
&
/* mask */
)
const
{
CV_Assert
(
!
"Not implemented"
);
}
void
GpuMat
::
convertTo
(
GpuMat
&
/*m*/
,
int
/*rtype*/
,
double
/*alpha*/
,
double
/*beta*/
)
const
void
cv
::
gpu
::
GpuMat
::
convertTo
(
GpuMat
&
/*m*/
,
int
/*rtype*/
,
double
/*alpha*/
,
double
/*beta*/
)
const
{
CV_Assert
(
!
"Not implemented"
);
}
GpuMat
&
GpuMat
::
operator
=
(
const
Scalar
&
s
)
GpuMat
&
cv
::
gpu
::
GpuMat
::
operator
=
(
const
Scalar
&
/*s*/
)
{
cv
::
gpu
::
impl
::
set_to_without_mask
(
*
this
,
s
.
val
,
this
->
depth
(),
this
->
channels
());
CV_Assert
(
!
"Not implemented"
);
return
*
this
;
}
GpuMat
&
GpuMat
::
setTo
(
const
Scalar
&
s
,
const
GpuMat
&
mask
)
GpuMat
&
cv
::
gpu
::
GpuMat
::
setTo
(
const
Scalar
&
/*s*/
,
const
GpuMat
&
/*mask*/
)
{
CV_Assert
(
mask
.
type
()
==
CV_32F
);
CV_DbgAssert
(
!
this
->
empty
());
this
->
channels
();
this
->
depth
();
if
(
mask
.
empty
())
{
cv
::
gpu
::
impl
::
set_to_without_mask
(
*
this
,
s
.
val
,
this
->
depth
(),
this
->
channels
());
}
else
{
cv
::
gpu
::
impl
::
set_to_with_mask
(
*
this
,
s
.
val
,
mask
,
this
->
depth
(),
this
->
channels
());
}
CV_Assert
(
!
"Not implemented"
);
return
*
this
;
}
GpuMat
GpuMat
::
reshape
(
int
new_cn
,
int
new_rows
)
const
GpuMat
cv
::
gpu
::
GpuMat
::
reshape
(
int
new_cn
,
int
new_rows
)
const
{
GpuMat
hdr
=
*
this
;
...
...
@@ -148,7 +163,7 @@ GpuMat GpuMat::reshape(int new_cn, int new_rows) const
return
hdr
;
}
void
GpuMat
::
create
(
int
_rows
,
int
_cols
,
int
_type
)
void
cv
::
gpu
::
GpuMat
::
create
(
int
_rows
,
int
_cols
,
int
_type
)
{
_type
&=
TYPE_MASK
;
if
(
rows
==
_rows
&&
cols
==
_cols
&&
type
()
==
_type
&&
data
)
...
...
@@ -162,7 +177,7 @@ void GpuMat::create(int _rows, int _cols, int _type)
rows
=
_rows
;
cols
=
_cols
;
size_t
esz
=
elemSize
();
size_t
esz
=
elemSize
();
void
*
dev_ptr
;
cudaSafeCall
(
cudaMallocPitch
(
&
dev_ptr
,
&
step
,
esz
*
cols
,
rows
)
);
...
...
@@ -174,19 +189,19 @@ void GpuMat::create(int _rows, int _cols, int _type)
size_t
nettosize
=
(
size_t
)
_nettosize
;
datastart
=
data
=
(
uchar
*
)
dev_ptr
;
dataend
=
data
+
nettosize
;
dataend
=
data
+
nettosize
;
refcount
=
(
int
*
)
fastMalloc
(
sizeof
(
*
refcount
));
*
refcount
=
1
;
}
}
void
GpuMat
::
release
()
void
cv
::
gpu
::
GpuMat
::
release
()
{
if
(
refcount
&&
CV_XADD
(
refcount
,
-
1
)
==
1
)
{
fastFree
(
refcount
);
cudaSafeCall
(
cudaFree
(
datastart
)
);
cudaSafeCall
(
cudaFree
(
datastart
)
);
}
data
=
datastart
=
dataend
=
0
;
step
=
rows
=
cols
=
0
;
...
...
@@ -194,7 +209,52 @@ void GpuMat::release()
}
///////////////////////////////////////////////////////////////////////
//////////////////////////////// MatPL ////////////////////////////////
///////////////////////////////////////////////////////////////////////
void
cv
::
gpu
::
MatPL
::
create
(
int
_rows
,
int
_cols
,
int
_type
)
{
_type
&=
TYPE_MASK
;
if
(
rows
==
_rows
&&
cols
==
_cols
&&
type
()
==
_type
&&
data
)
return
;
if
(
data
)
release
();
CV_DbgAssert
(
_rows
>=
0
&&
_cols
>=
0
);
if
(
_rows
>
0
&&
_cols
>
0
)
{
flags
=
Mat
::
MAGIC_VAL
+
Mat
::
CONTINUOUS_FLAG
+
_type
;
rows
=
_rows
;
cols
=
_cols
;
step
=
elemSize
()
*
cols
;
int64
_nettosize
=
(
int64
)
step
*
rows
;
size_t
nettosize
=
(
size_t
)
_nettosize
;
if
(
_nettosize
!=
(
int64
)
nettosize
)
CV_Error
(
CV_StsNoMem
,
"Too big buffer is allocated"
);
size_t
datasize
=
alignSize
(
nettosize
,
(
int
)
sizeof
(
*
refcount
));
//datastart = data = (uchar*)fastMalloc(datasize + sizeof(*refcount));
void
*
ptr
;
cudaSafeCall
(
cudaHostAlloc
(
&
ptr
,
datasize
,
cudaHostAllocDefault
)
);
datastart
=
data
=
(
uchar
*
)
ptr
;
dataend
=
data
+
nettosize
;
refcount
=
(
int
*
)
cv
::
fastMalloc
(
sizeof
(
*
refcount
));
*
refcount
=
1
;
}
}
void
cv
::
gpu
::
MatPL
::
release
()
{
if
(
refcount
&&
CV_XADD
(
refcount
,
-
1
)
==
1
)
{
cudaSafeCall
(
cudaFreeHost
(
datastart
)
);
fastFree
(
refcount
);
}
data
=
datastart
=
dataend
=
0
;
step
=
rows
=
cols
=
0
;
refcount
=
0
;
}
#endif
/* !defined (HAVE_CUDA) */
\ No newline at end of file
modules/gpu/src/precomp.cpp
View file @
07825bad
...
...
@@ -44,7 +44,13 @@
/* End of file. */
extern
"C"
void
cv
::
gpu
::
error
(
const
char
*
error_string
,
const
char
*
file
,
const
int
line
,
const
char
*
func
)
{
cv
::
error
(
cv
::
Exception
(
CV_GpuApiCallError
,
error_string
,
func
,
file
,
line
)
);
}
namespace
cv
{
namespace
gpu
{
extern
"C"
void
error
(
const
char
*
error_string
,
const
char
*
file
,
const
int
line
,
const
char
*
func
)
{
cv
::
error
(
cv
::
Exception
(
CV_GpuApiCallError
,
error_string
,
func
,
file
,
line
)
);
}
}
}
modules/gpu/src/precomp.hpp
View file @
07825bad
...
...
@@ -53,30 +53,17 @@
#include <iostream>
#include "opencv2/gpu/gpu.hpp"
#include "cuda_shared.hpp"
#ifndef HAVE_CUDA
#define cudaSafeCall(expr) CV_Error(CV_GpuNotFound, "The library is compilled with no GPU support")
#define cudaCallerSafeCall(expr) CV_Error(CV_GpuNotFound, "The library is compilled with no GPU support")
#if defined(HAVE_CUDA)
#else
/* HAVE_CUDA */
#include "cuda_shared.hpp"
#include "cuda_runtime_api.h"
#if _MSC_VER >= 1200
#pragma warning (disable : 4100 4211 4201 4408)
#endif
#include "cuda_runtime_api.h"
#ifdef __GNUC__
#define cudaSafeCall(expr) { cudaError_t err = expr; if(cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__, __func__); }
#else
#define cudaSafeCall(expr) { cudaError_t err = expr; if(cudaSuccess != err) cv::gpu::error(cudaGetErrorString(err), __FILE__, __LINE__); }
#endif
#else
/* defined(HAVE_CUDA) */
#define cudaCallerSafeCall(expr) expr;
static
inline
void
throw_nogpu
()
{
CV_Error
(
CV_GpuNotFound
,
"The library is compilled with no GPU support"
);
}
#endif
/* defined(HAVE_CUDA) */
#endif
/* HAVE_CUDA */
#endif
#endif
/* __OPENCV_PRECOMP_H__ */
modules/gpu/src/stereobm_gpu.cpp
View file @
07825bad
...
...
@@ -44,15 +44,45 @@
using
namespace
cv
;
using
namespace
cv
::
gpu
;
#if !defined (HAVE_CUDA)
cv
::
gpu
::
StereoBM_GPU
::
StereoBM_GPU
()
{
throw_nogpu
();
}
cv
::
gpu
::
StereoBM_GPU
::
StereoBM_GPU
(
int
preset_
,
int
ndisparities_
)
{
throw_nogpu
();
}
bool
cv
::
gpu
::
StereoBM_GPU
::
checkIfGpuCallReasonable
()
{
throw_nogpu
();
return
false
;
}
void
cv
::
gpu
::
StereoBM_GPU
::
operator
()
(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
StereoBM_GPU
::
operator
()
(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
,
const
CudaStream
&
stream
)
{
throw_nogpu
();
}
#else
/* !defined (HAVE_CUDA) */
StereoBM_GPU
::
StereoBM_GPU
()
:
preset
(
BASIC_PRESET
),
ndisp
(
64
)
{}
StereoBM_GPU
::
StereoBM_GPU
(
int
preset_
,
int
ndisparities_
)
:
preset
(
preset_
),
ndisp
(
ndisparities_
)
cv
::
gpu
::
StereoBM_GPU
::
StereoBM_GPU
()
:
preset
(
BASIC_PRESET
),
ndisp
(
64
)
{}
cv
::
gpu
::
StereoBM_GPU
::
StereoBM_GPU
(
int
preset_
,
int
ndisparities_
)
:
preset
(
preset_
),
ndisp
(
ndisparities_
)
{
const
int
max_supported_ndisp
=
1
<<
(
sizeof
(
unsigned
char
)
*
8
);
CV_Assert
(
ndisp
<=
max_supported_ndisp
);
CV_Assert
(
ndisp
%
8
==
0
);
}
bool
cv
::
gpu
::
StereoBM_GPU
::
checkIfGpuCallReasonable
()
{
if
(
0
==
getCudaEnabledDeviceCount
())
return
false
;
int
device
=
getDevice
();
int
minor
,
major
;
getComputeCapability
(
device
,
&
major
,
&
minor
);
int
numSM
=
getNumberOfSMs
(
device
);
if
(
major
>
1
||
numSM
>
16
)
return
true
;
return
false
;
}
void
StereoBM_GPU
::
operator
()
(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
)
const
void
cv
::
gpu
::
StereoBM_GPU
::
operator
()
(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
)
{
CV_DbgAssert
(
left
.
rows
==
right
.
rows
&&
left
.
cols
==
right
.
cols
);
CV_DbgAssert
(
left
.
type
()
==
CV_8UC1
);
...
...
@@ -67,6 +97,13 @@ void StereoBM_GPU::operator() ( const GpuMat& left, const GpuMat& right, GpuMat&
}
DevMem2D
disp
=
disparity
;
DevMem2D_
<
uint
>
mssd
=
minSSD
;
cudaCallerSafeCall
(
impl
::
stereoBM_GPU
(
left
,
right
,
disp
,
ndisp
,
mssd
)
);
DevMem2D_
<
u
nsigned
int
>
mssd
=
minSSD
;
impl
::
stereoBM_GPU
(
left
,
right
,
disp
,
ndisp
,
mssd
);
}
void
cv
::
gpu
::
StereoBM_GPU
::
operator
()
(
const
GpuMat
&
left
,
const
GpuMat
&
right
,
GpuMat
&
disparity
,
const
CudaStream
&
stream
)
{
CV_Assert
(
!
"Not implemented"
);
}
#endif
/* !defined (HAVE_CUDA) */
\ No newline at end of file
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment