Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
574b3f94
Commit
574b3f94
authored
Jan 20, 2011
by
Alexey Spizhevoy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
updated gpu initialization functions, added compile-time error on CC 1.0
parent
6187b971
Hide whitespace changes
Inline
Side-by-side
Showing
6 changed files
with
71 additions
and
147 deletions
+71
-147
CMakeLists.txt
CMakeLists.txt
+18
-29
cvconfig.h.cmake
cvconfig.h.cmake
+8
-15
gpu_image_processing.tex
doc/gpu_image_processing.tex
+2
-2
gpu_object_detection.tex
doc/gpu_object_detection.tex
+13
-13
initialization.cpp
modules/gpu/src/initialization.cpp
+26
-88
precomp.hpp
modules/gpu/src/precomp.hpp
+4
-0
No files found.
CMakeLists.txt
View file @
574b3f94
...
...
@@ -708,47 +708,36 @@ if(WITH_CUDA)
message
(
STATUS
"CUDA detected: "
${
CUDA_VERSION
}
)
set
(
CUDA_ARCH_GPU
"1.3 2.0"
CACHE STRING
"Specify 'real' GPU architectures to build binaries for"
)
set
(
CUDA_ARCH_PTX
"1.1 1.3"
CACHE STRING
"Specify 'virtual' PTX architectures to build PTX intermediate code for"
)
set
(
CUDA_ARCH_PTX
"1.1 1.3"
CACHE STRING
"Specify 'virtual' PTX architectures to build PTX intermediate code for"
)
# Architectures to be searched for in user's input
set
(
CUDA_ARCH_ALL 1.0 1.1 1.2 1.3 2.0 2.1
)
# These variables are used in config templates
string
(
REGEX REPLACE
"
\\
."
""
ARCH_GPU_NO_POINTS
"
${
CUDA_ARCH_GPU
}
"
)
string
(
REGEX REPLACE
"
\\
."
""
ARCH_PTX_NO_POINTS
"
${
CUDA_ARCH_PTX
}
"
)
# Parse user's input
foreach
(
ARCH IN LISTS CUDA_ARCH_ALL
)
string
(
REGEX MATCH
${
ARCH
}
ARCH_GPU_MATCH
"
${
CUDA_ARCH_GPU
}
"
)
string
(
REGEX MATCH
${
ARCH
}
ARCH_PTX_MATCH
"
${
CUDA_ARCH_PTX
}
"
)
string
(
REGEX REPLACE
"
\\
."
""
ARCH_GPU_AS_NUM
"
${
ARCH_GPU_MATCH
}
"
)
string
(
REGEX REPLACE
"
\\
."
""
ARCH_PTX_AS_NUM
"
${
ARCH_PTX_MATCH
}
"
)
# Define variables indicating the architectures specified by user
if
(
NOT
${
ARCH_GPU_AS_NUM
}
STREQUAL
""
)
set
(
OPENCV_ARCH_GPU_
${
ARCH_GPU_AS_NUM
}
1
)
endif
()
if
(
NOT
${
ARCH_PTX_AS_NUM
}
STREQUAL
""
)
set
(
OPENCV_ARCH_PTX_
${
ARCH_PTX_AS_NUM
}
1
)
endif
()
endforeach
()
# Ckeck if user specified 1.0 compute capability
string
(
REGEX MATCH
"1.0"
HAS_ARCH_10
"
${
CUDA_ARCH_GPU
}
${
CUDA_ARCH_PTX
}
"
)
if
(
NOT
${
HAS_ARCH_10
}
STREQUAL
""
)
set
(
OPENCV_ARCH_GPU_OR_PTX_10 1
)
endif
()
set
(
NVCC_FLAGS_EXTRA
""
)
# Tell nvcc to add binaries for the specified GPUs
string
(
REGEX REPLACE
"
\\
."
""
CUDA_ARCH_GPU
"
${
CUDA_ARCH_GPU
}
"
)
string
(
REGEX MATCHALL
"[0-9]+"
CUDA_ARCH_GPU_LIST
"
${
CUDA_ARCH_GPU
}
"
)
foreach
(
ARCH_GPU IN LISTS CUDA_ARCH_GPU_LIST
)
set
(
NVCC_FLAGS_EXTRA
${
NVCC_FLAGS_EXTRA
}
-gencode arch=compute_
${
ARCH_GPU
}
,code=sm_
${
ARCH_GPU
}
)
string
(
REGEX MATCHALL
"[0-9]+"
ARCH_LIST
"
${
ARCH_GPU_NO_POINTS
}
"
)
foreach
(
ARCH IN LISTS ARCH_LIST
)
set
(
NVCC_FLAGS_EXTRA
${
NVCC_FLAGS_EXTRA
}
-gencode arch=compute_
${
ARCH
}
,code=sm_
${
ARCH
}
)
endforeach
()
# Tell nvcc to add PTX intermediate code for the specified architectures
string
(
REGEX REPLACE
"
\\
."
""
CUDA_ARCH_PTX
"
${
CUDA_ARCH_PTX
}
"
)
string
(
REGEX MATCHALL
"[0-9]+"
CUDA_ARCH_PTX_LIST
"
${
CUDA_ARCH_PTX
}
"
)
foreach
(
ARCH_PTX IN LISTS CUDA_ARCH_PTX_LIST
)
set
(
NVCC_FLAGS_EXTRA
${
NVCC_FLAGS_EXTRA
}
-gencode arch=compute_
${
ARCH_PTX
}
,code=compute_
${
ARCH_PTX
}
)
string
(
REGEX MATCHALL
"[0-9]+"
ARCH_LIST
"
${
ARCH_PTX_NO_POINTS
}
"
)
foreach
(
ARCH IN LISTS ARCH_LIST
)
set
(
NVCC_FLAGS_EXTRA
${
NVCC_FLAGS_EXTRA
}
-gencode arch=compute_
${
ARCH
}
,code=compute_
${
ARCH
}
)
endforeach
()
set
(
CUDA_NVCC_FLAGS
${
CUDA_NVCC_FLAGS
}
${
NVCC_FLAGS_EXTRA
}
)
message
(
STATUS
"CUDA NVCC flags:
${
CUDA_NVCC_FLAGS
}
"
)
set
(
CUDA_NVCC_FLAGS
${
CUDA_NVCC_FLAGS
}
${
NVCC_FLAGS_EXTRA
}
)
set
(
OpenCV_CUDA_CC
"
${
NVCC_FLAGS_EXTRA
}
"
)
message
(
STATUS
"CUDA NVCC flags:
${
CUDA_NVCC_FLAGS
}
"
)
endif
()
endif
()
...
...
cvconfig.h.cmake
View file @
574b3f94
...
...
@@ -163,21 +163,14 @@
/* NVidia Cuda Runtime API*/
#cmakedefine HAVE_CUDA
/* Compile for 'real' NVIDIA GPU architecture */
#cmakedefine OPENCV_ARCH_GPU_10
#cmakedefine OPENCV_ARCH_GPU_11
#cmakedefine OPENCV_ARCH_GPU_12
#cmakedefine OPENCV_ARCH_GPU_13
#cmakedefine OPENCV_ARCH_GPU_20
#cmakedefine OPENCV_ARCH_GPU_21
/* Compile for 'virtual' NVIDIA PTX architecture */
#cmakedefine OPENCV_ARCH_PTX_10
#cmakedefine OPENCV_ARCH_PTX_11
#cmakedefine OPENCV_ARCH_PTX_12
#cmakedefine OPENCV_ARCH_PTX_13
#cmakedefine OPENCV_ARCH_PTX_20
#cmakedefine OPENCV_ARCH_PTX_21
/* Compile for 'real' NVIDIA GPU architectures */
#define OPENCV_ARCH_GPU "${ARCH_GPU_NO_POINTS}"
/* Compile for 'virtual' NVIDIA PTX architectures */
#define OPENCV_ARCH_PTX "${ARCH_PTX_NO_POINTS}"
/* Create PTX or CUBIN for 1.0 compute capability */
#cmakedefine OPENCV_ARCH_GPU_OR_PTX_10
/* VideoInput library */
#cmakedefine HAVE_VIDEOINPUT
...
...
doc/gpu_image_processing.tex
View file @
574b3f94
...
...
@@ -232,10 +232,10 @@ private:
\cvCppFunc
{
gpu::ConvolveBuf::ConvolveBuf
}
\cvdefCpp
{
ConvolveBuf();
}
\cvdefCpp
{
ConvolveBuf
::ConvolveBuf
();
}
Constructs an empty buffer which will be properly resized after first call of the convolve function.
\cvdefCpp
{
ConvolveBuf(Size image
\_
size, Size templ
\_
size);
}
\cvdefCpp
{
ConvolveBuf
::ConvolveBuf
(Size image
\_
size, Size templ
\_
size);
}
Constructs a buffer for the convolve function with respectively arguments.
...
...
doc/gpu_object_detection.tex
View file @
574b3f94
...
...
@@ -82,13 +82,13 @@ Creates HOG descriptor and detector.
\cvCppFunc
{
gpu::HOGDescriptor::getDescriptorSize
}
Returns number of coefficients required for the classification.
\cvdefCpp
{
size
\_
t getDescriptorSize() const;
}
\cvdefCpp
{
size
\_
t
HOGDescriptor::
getDescriptorSize() const;
}
\cvCppFunc
{
gpu::HOGDescriptor::getBlockHistogramSize
}
Returns block histogram size.
\cvdefCpp
{
size
\_
t getBlockHistogramSize() const;
}
\cvdefCpp
{
size
\_
t
HOGDescriptor::
getBlockHistogramSize() const;
}
\cvCppFunc
{
gpu::HOGDescriptor::setSVMDetector
}
...
...
@@ -100,25 +100,25 @@ Sets coefficients for the linear SVM classifier.
\cvCppFunc
{
gpu::HOGDescriptor::getDefaultPeopleDetector
}
Returns coefficients of the classifier trained for people detection (for default window size).
\cvdefCpp
{
static vector<float> getDefaultPeopleDetector();
}
\cvdefCpp
{
static vector<float>
HOGDescriptor::
getDefaultPeopleDetector();
}
\cvCppFunc
{
gpu::HOGDescriptor::getPeopleDetector48x96
}
Returns coefficients of the classifier trained for people detection (for 48x96 windows).
\cvdefCpp
{
static vector<float> getPeopleDetector48x96();
}
\cvdefCpp
{
static vector<float>
HOGDescriptor::
getPeopleDetector48x96();
}
\cvCppFunc
{
gpu::HOGDescriptor::getPeopleDetector64x128
}
Returns coefficients of the classifier trained for people detection (for 64x128 windows).
\cvdefCpp
{
static vector<float> getPeopleDetector64x128();
}
\cvdefCpp
{
static vector<float>
HOGDescriptor::
getPeopleDetector64x128();
}
\cvCppFunc
{
gpu::HOGDescriptor::detect
}
Perfroms object detection without multiscale window.
\cvdefCpp
{
void detect(const GpuMat
\&
img, vector<Point>
\&
found
\_
locations,
\par
\cvdefCpp
{
void
HOGDescriptor::
detect(const GpuMat
\&
img, vector<Point>
\&
found
\_
locations,
\par
double hit
\_
threshold=0, Size win
\_
stride=Size(),
\par
Size padding=Size());
}
...
...
@@ -134,10 +134,10 @@ Perfroms object detection without multiscale window.
\cvCppFunc
{
gpu::HOGDescriptor::detectMultiScale
}
Perfroms object detection with multiscale window.
\cvdefCpp
{
void
detectMultiScale(const GpuMat
\&
img, vector<Rect>
\&
found
\_
locations
,
\par
double hit
\_
threshold=0, Size win
\_
stride=Size()
,
\par
Size padding=Size(), double scale0=1.05
,
\par
int group
\_
threshold=2);
}
\cvdefCpp
{
void
HOGDescriptor::detectMultiScale(const GpuMat
\&
img
,
\par
vector<Rect>
\&
found
\_
locations, double hit
\_
threshold=0
,
\par
Size win
\_
stride=Size(), Size padding=Size()
,
\par
double scale0=1.05,
int group
\_
threshold=2);
}
\begin{description}
\cvarg
{
img
}{
Source image. See
\cvCppCross
{
gpu::HOGDescriptor::detect
}
for type limitations.
}
...
...
@@ -154,9 +154,9 @@ See \cvCppCross{groupRectangles}.}
\cvCppFunc
{
gpu::HOGDescriptor::getDescriptors
}
Returns block descriptors computed for the whole image. It's mainly used for classifier learning purposes.
\cvdefCpp
{
void
getDescriptors(const GpuMat
\&
img, Size win
\_
stride
,
\par
GpuMat
\&
descriptors,
\par
int descr
\_
format=DESCR
\_
FORMAT
\_
COL
\_
BY
\_
COL);
}
\cvdefCpp
{
void
HOGDescriptor::getDescriptors(const GpuMat
\&
img
,
\par
Size win
\_
stride,
GpuMat
\&
descriptors,
\par
int descr
\_
format=DESCR
\_
FORMAT
\_
COL
\_
BY
\_
COL);
}
\begin{description}
\cvarg
{
img
}{
Source image. See
\cvCppCross
{
gpu::HOGDescriptor::detect
}
for type limitations.
}
...
...
modules/gpu/src/initialization.cpp
View file @
574b3f94
...
...
@@ -41,6 +41,7 @@
//M*/
#include "precomp.hpp"
#include <functional>
using
namespace
cv
;
using
namespace
cv
::
gpu
;
...
...
@@ -58,12 +59,12 @@ CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t& /*free*/, size_t& /*total*/) { t
CV_EXPORTS
bool
cv
::
gpu
::
hasNativeDoubleSupport
(
int
/*device*/
)
{
throw_nogpu
();
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasAtomicsSupport
(
int
/*device*/
)
{
throw_nogpu
();
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasPtxVersion
(
int
major
,
int
minor
)
{
throw_nogpu
();
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasLessOrEqualPtxVersion
(
int
major
,
int
minor
)
{
throw_nogpu
();
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasGreaterOrEqualPtxVersion
(
int
major
,
int
minor
)
{
throw_nogpu
();
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasCubinVersion
(
int
major
,
int
minor
)
{
throw_nogpu
();
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasGreaterOrEqualCubinVersion
(
int
major
,
int
minor
)
{
throw_nogpu
();
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasVersion
(
int
major
,
int
minor
)
{
throw_nogpu
();
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasGreaterOrEqualVersion
(
int
major
,
int
minor
)
{
throw_nogpu
();
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasLessOrEqualPtxVersion
(
int
major
,
int
minor
)
{
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasGreaterOrEqualPtxVersion
(
int
major
,
int
minor
)
{
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasCubinVersion
(
int
major
,
int
minor
)
{
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasGreaterOrEqualCubinVersion
(
int
major
,
int
minor
)
{
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasVersion
(
int
major
,
int
minor
)
{
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
hasGreaterOrEqualVersion
(
int
major
,
int
minor
)
{
return
false
;
}
CV_EXPORTS
bool
cv
::
gpu
::
isCompatibleWith
(
int
device
)
{
throw_nogpu
();
return
false
;
}
...
...
@@ -142,118 +143,55 @@ CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int device)
namespace
{
template
<
typename
Comparer
>
bool
c
heckPtxVersion
(
int
major
,
int
minor
,
Comparer
cmp
)
bool
c
ompare
(
const
std
::
string
&
str
,
int
x
,
Comparer
cmp
)
{
#ifdef OPENCV_ARCH_PTX_10
if
(
cmp
(
1
,
0
,
major
,
minor
))
return
true
;
#endif
#ifdef OPENCV_ARCH_PTX_11
if
(
cmp
(
1
,
1
,
major
,
minor
))
return
true
;
#endif
#ifdef OPENCV_ARCH_PTX_12
if
(
cmp
(
1
,
2
,
major
,
minor
))
return
true
;
#endif
#ifdef OPENCV_ARCH_PTX_13
if
(
cmp
(
1
,
3
,
major
,
minor
))
return
true
;
#endif
#ifdef OPENCV_ARCH_PTX_20
if
(
cmp
(
2
,
0
,
major
,
minor
))
return
true
;
#endif
#ifdef OPENCV_ARCH_PTX_21
if
(
cmp
(
2
,
1
,
major
,
minor
))
return
true
;
#endif
return
false
;
}
template
<
typename
Comparer
>
bool
checkCubinVersion
(
int
major
,
int
minor
,
Comparer
cmp
)
{
#ifdef OPENCV_ARCH_GPU_10
if
(
cmp
(
1
,
0
,
major
,
minor
))
return
true
;
#endif
#ifdef OPENCV_ARCH_GPU_11
if
(
cmp
(
1
,
1
,
major
,
minor
))
return
true
;
#endif
std
::
stringstream
stream
(
str
);
#ifdef OPENCV_ARCH_GPU_12
if
(
cmp
(
1
,
2
,
major
,
minor
))
return
true
;
#endif
#ifdef OPENCV_ARCH_GPU_13
if
(
cmp
(
1
,
3
,
major
,
minor
))
return
true
;
#endif
#ifdef OPENCV_ARCH_GPU_20
if
(
cmp
(
2
,
0
,
major
,
minor
))
return
true
;
#endif
#ifdef OPENCV_ARCH_GPU_21
if
(
cmp
(
2
,
1
,
major
,
minor
))
return
true
;
#endif
return
false
;
}
int
val
;
stream
>>
val
;
struct
ComparerEqual
{
bool
operator
()(
int
lhs1
,
int
lhs2
,
int
rhs1
,
int
rhs2
)
const
{
return
lhs1
==
rhs1
&&
lhs2
==
rhs2
;
}
};
struct
ComparerLessOrEqual
{
bool
operator
()(
int
lhs1
,
int
lhs2
,
int
rhs1
,
int
rhs2
)
const
while
(
!
stream
.
eof
()
&&
!
stream
.
fail
())
{
return
lhs1
<
rhs1
||
(
lhs1
==
rhs1
&&
lhs2
<=
rhs2
);
if
(
cmp
(
val
,
x
))
return
true
;
stream
>>
val
;
}
};
struct
ComparerGreaterOrEqual
{
bool
operator
()(
int
lhs1
,
int
lhs2
,
int
rhs1
,
int
rhs2
)
const
{
return
lhs1
>
rhs1
||
(
lhs1
==
rhs1
&&
lhs2
>=
rhs2
);
}
};
return
false
;
}
}
CV_EXPORTS
bool
cv
::
gpu
::
hasPtxVersion
(
int
major
,
int
minor
)
{
return
checkPtxVersion
(
major
,
minor
,
ComparerEqual
());
return
::
compare
(
OPENCV_ARCH_PTX
,
major
*
10
+
minor
,
std
::
equal_to
<
int
>
());
}
CV_EXPORTS
bool
cv
::
gpu
::
hasLessOrEqualPtxVersion
(
int
major
,
int
minor
)
{
return
checkPtxVersion
(
major
,
minor
,
ComparerLessOrEqual
());
return
::
compare
(
OPENCV_ARCH_PTX
,
major
*
10
+
minor
,
std
::
less_equal
<
int
>
());
}
CV_EXPORTS
bool
cv
::
gpu
::
hasGreaterOrEqualPtxVersion
(
int
major
,
int
minor
)
{
return
checkPtxVersion
(
major
,
minor
,
ComparerGreaterOrEqual
());
return
::
compare
(
OPENCV_ARCH_PTX
,
major
*
10
+
minor
,
std
::
greater_equal
<
int
>
());
}
CV_EXPORTS
bool
cv
::
gpu
::
hasCubinVersion
(
int
major
,
int
minor
)
{
return
checkCubinVersion
(
major
,
minor
,
ComparerEqual
());
return
::
compare
(
OPENCV_ARCH_GPU
,
major
*
10
+
minor
,
std
::
equal_to
<
int
>
());
}
CV_EXPORTS
bool
cv
::
gpu
::
hasGreaterOrEqualCubinVersion
(
int
major
,
int
minor
)
{
return
checkCubinVersion
(
major
,
minor
,
ComparerGreaterOrEqual
());
return
::
compare
(
OPENCV_ARCH_GPU
,
major
*
10
+
minor
,
std
::
greater_equal
<
int
>
());
}
...
...
@@ -284,7 +222,7 @@ CV_EXPORTS bool cv::gpu::isCompatibleWith(int device)
return
true
;
// Check CUBIN compatibilty
for
(
int
i
=
0
;
i
<=
minor
;
++
i
)
for
(
int
i
=
minor
;
i
>=
0
;
--
i
)
if
(
hasCubinVersion
(
major
,
i
))
return
true
;
...
...
modules/gpu/src/precomp.hpp
View file @
574b3f94
...
...
@@ -85,6 +85,10 @@
#error "Insufficient NPP version, please update it."
#endif
#if defined(OPENCV_ARCH_GPU_OR_PTX_10)
#error "OpenCV GPU module doesn't support NVIDIA compute capability 1.0"
#endif
static
inline
void
throw_nogpu
()
{
CV_Error
(
CV_GpuNotSupported
,
"The called functionality is disabled for current build or platform"
);
}
#else
/* defined(HAVE_CUDA) */
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment