Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
d38ca097
Commit
d38ca097
authored
Jan 30, 2012
by
Vladislav Vinogradov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added gpu::printCudaDeviceInfo to all samples
parent
ed038ef9
Show whitespace changes
Inline
Side-by-side
Showing
17 changed files
with
213 additions
and
1 deletion
+213
-1
gpu.hpp
modules/gpu/include/opencv2/gpu/gpu.hpp
+3
-0
initialization.cpp
modules/gpu/src/initialization.cpp
+158
-0
precomp.hpp
modules/gpu/src/precomp.hpp
+1
-0
cascadeclassifier.cpp
samples/gpu/cascadeclassifier.cpp
+2
-0
cascadeclassifier_nvidia_api.cpp
samples/gpu/cascadeclassifier_nvidia_api.cpp
+2
-0
driver_api_multi.cpp
samples/gpu/driver_api_multi.cpp
+2
-0
driver_api_stereo_multi.cpp
samples/gpu/driver_api_stereo_multi.cpp
+2
-0
hog.cpp
samples/gpu/hog.cpp
+2
-0
morfology.cpp
samples/gpu/morfology.cpp
+2
-0
multi.cpp
samples/gpu/multi.cpp
+2
-0
optical_flow.cpp
samples/gpu/optical_flow.cpp
+2
-0
opticalflow_nvidia_api.cpp
samples/gpu/opticalflow_nvidia_api.cpp
+2
-0
performance.cpp
samples/gpu/performance/performance.cpp
+26
-1
performance.h
samples/gpu/performance/performance.h
+1
-0
stereo_match.cpp
samples/gpu/stereo_match.cpp
+2
-0
stereo_multi.cpp
samples/gpu/stereo_multi.cpp
+2
-0
surf_keypoint_matcher.cpp
samples/gpu/surf_keypoint_matcher.cpp
+2
-0
No files found.
modules/gpu/include/opencv2/gpu/gpu.hpp
View file @
d38ca097
...
...
@@ -139,6 +139,9 @@ private:
int
minorVersion_
;
};
CV_EXPORTS
void
printCudaDeviceInfo
(
int
device
);
CV_EXPORTS
void
printShortCudaDeviceInfo
(
int
device
);
//////////////////////////////// CudaMem ////////////////////////////////
// CudaMem is limited cv::Mat with page locked memory allocation.
// Page locked memory is only needed for async and faster coping to GPU.
...
...
modules/gpu/src/initialization.cpp
View file @
d38ca097
...
...
@@ -171,6 +171,8 @@ bool cv::gpu::DeviceInfo::supports(cv::gpu::FeatureSet) const { throw_nogpu(); r
bool
cv
::
gpu
::
DeviceInfo
::
isCompatible
()
const
{
throw_nogpu
();
return
false
;
}
void
cv
::
gpu
::
DeviceInfo
::
query
()
{
throw_nogpu
();
}
void
cv
::
gpu
::
DeviceInfo
::
queryMemory
(
size_t
&
,
size_t
&
)
const
{
throw_nogpu
();
}
void
cv
::
gpu
::
printCudaDeviceInfo
(
int
device
)
{
throw_nogpu
();
}
void
cv
::
gpu
::
printShortCudaDeviceInfo
(
int
device
)
{
throw_nogpu
();
}
#else
/* !defined (HAVE_CUDA) */
...
...
@@ -271,5 +273,161 @@ void cv::gpu::DeviceInfo::queryMemory(size_t& free_memory, size_t& total_memory)
setDevice
(
prev_device_id
);
}
namespace
{
template
<
class
T
>
void
getCudaAttribute
(
T
*
attribute
,
CUdevice_attribute
device_attribute
,
int
device
)
{
*
attribute
=
T
();
CUresult
error
=
CUDA_SUCCESS
;
// = cuDeviceGetAttribute( attribute, device_attribute, device ); why link erros under ubuntu??
if
(
CUDA_SUCCESS
==
error
)
return
;
printf
(
"Driver API error = %04d
\n
"
,
error
);
cv
::
gpu
::
error
(
"driver API error"
,
__FILE__
,
__LINE__
);
}
int
convertSMVer2Cores
(
int
major
,
int
minor
)
{
// Defines for GPU Architecture types (using the SM version to determine the # of cores per SM
typedef
struct
{
int
SM
;
// 0xMm (hexidecimal notation), M = SM Major version, and m = SM minor version
int
Cores
;
}
SMtoCores
;
SMtoCores
gpuArchCoresPerSM
[]
=
{
{
0x10
,
8
},
{
0x11
,
8
},
{
0x12
,
8
},
{
0x13
,
8
},
{
0x20
,
32
},
{
0x21
,
48
},
{
-
1
,
-
1
}
};
int
index
=
0
;
while
(
gpuArchCoresPerSM
[
index
].
SM
!=
-
1
)
{
if
(
gpuArchCoresPerSM
[
index
].
SM
==
((
major
<<
4
)
+
minor
)
)
return
gpuArchCoresPerSM
[
index
].
Cores
;
index
++
;
}
printf
(
"MapSMtoCores undefined SMversion %d.%d!
\n
"
,
major
,
minor
);
return
-
1
;
}
}
void
cv
::
gpu
::
printCudaDeviceInfo
(
int
device
)
{
int
count
=
getCudaEnabledDeviceCount
();
bool
valid
=
(
device
>=
0
)
&&
(
device
<
count
);
int
beg
=
valid
?
device
:
0
;
int
end
=
valid
?
device
+
1
:
count
;
printf
(
"*** CUDA Device Query (Runtime API) version (CUDART static linking) ***
\n\n
"
);
printf
(
"Device count: %d
\n
"
,
count
);
int
driverVersion
=
0
,
runtimeVersion
=
0
;
cudaSafeCall
(
cudaDriverGetVersion
(
&
driverVersion
)
);
cudaSafeCall
(
cudaRuntimeGetVersion
(
&
runtimeVersion
)
);
const
char
*
computeMode
[]
=
{
"Default (multiple host threads can use ::cudaSetDevice() with device simultaneously)"
,
"Exclusive (only one host thread in one process is able to use ::cudaSetDevice() with this device)"
,
"Prohibited (no host thread can use ::cudaSetDevice() with this device)"
,
"Exclusive Process (many threads in one process is able to use ::cudaSetDevice() with this device)"
,
"Unknown"
,
NULL
};
for
(
int
dev
=
beg
;
dev
<
end
;
++
dev
)
{
cudaDeviceProp
prop
;
cudaSafeCall
(
cudaGetDeviceProperties
(
&
prop
,
dev
)
);
printf
(
"
\n
Device %d:
\"
%s
\"\n
"
,
dev
,
prop
.
name
);
printf
(
" CUDA Driver Version / Runtime Version %d.%d / %d.%d
\n
"
,
driverVersion
/
1000
,
driverVersion
%
100
,
runtimeVersion
/
1000
,
runtimeVersion
%
100
);
printf
(
" CUDA Capability Major/Minor version number: %d.%d
\n
"
,
prop
.
major
,
prop
.
minor
);
printf
(
" Total amount of global memory: %.0f MBytes (%llu bytes)
\n
"
,
(
float
)
prop
.
totalGlobalMem
/
1048576.0
f
,
(
unsigned
long
long
)
prop
.
totalGlobalMem
);
printf
(
" (%2d) Multiprocessors x (%2d) CUDA Cores/MP: %d CUDA Cores
\n
"
,
prop
.
multiProcessorCount
,
convertSMVer2Cores
(
prop
.
major
,
prop
.
minor
),
convertSMVer2Cores
(
prop
.
major
,
prop
.
minor
)
*
prop
.
multiProcessorCount
);
printf
(
" GPU Clock Speed: %.2f GHz
\n
"
,
prop
.
clockRate
*
1e-6
f
);
#if (CUDART_VERSION >= 4000)
// This is not available in the CUDA Runtime API, so we make the necessary calls the driver API to support this for output
int
memoryClock
,
memBusWidth
,
L2CacheSize
;
getCudaAttribute
<
int
>
(
&
memoryClock
,
CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE
,
dev
);
getCudaAttribute
<
int
>
(
&
memBusWidth
,
CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH
,
dev
);
getCudaAttribute
<
int
>
(
&
L2CacheSize
,
CU_DEVICE_ATTRIBUTE_L2_CACHE_SIZE
,
dev
);
printf
(
" Memory Clock rate: %.2f Mhz
\n
"
,
memoryClock
*
1e-3
f
);
printf
(
" Memory Bus Width: %d-bit
\n
"
,
memBusWidth
);
if
(
L2CacheSize
)
printf
(
" L2 Cache Size: %d bytes
\n
"
,
L2CacheSize
);
printf
(
" Max Texture Dimension Size (x,y,z) 1D=(%d), 2D=(%d,%d), 3D=(%d,%d,%d)
\n
"
,
prop
.
maxTexture1D
,
prop
.
maxTexture2D
[
0
],
prop
.
maxTexture2D
[
1
],
prop
.
maxTexture3D
[
0
],
prop
.
maxTexture3D
[
1
],
prop
.
maxTexture3D
[
2
]);
printf
(
" Max Layered Texture Size (dim) x layers 1D=(%d) x %d, 2D=(%d,%d) x %d
\n
"
,
prop
.
maxTexture1DLayered
[
0
],
prop
.
maxTexture1DLayered
[
1
],
prop
.
maxTexture2DLayered
[
0
],
prop
.
maxTexture2DLayered
[
1
],
prop
.
maxTexture2DLayered
[
2
]);
#endif
printf
(
" Total amount of constant memory: %u bytes
\n
"
,
(
int
)
prop
.
totalConstMem
);
printf
(
" Total amount of shared memory per block: %u bytes
\n
"
,
(
int
)
prop
.
sharedMemPerBlock
);
printf
(
" Total number of registers available per block: %d
\n
"
,
prop
.
regsPerBlock
);
printf
(
" Warp size: %d
\n
"
,
prop
.
warpSize
);
printf
(
" Maximum number of threads per block: %d
\n
"
,
prop
.
maxThreadsPerBlock
);
printf
(
" Maximum sizes of each dimension of a block: %d x %d x %d
\n
"
,
prop
.
maxThreadsDim
[
0
],
prop
.
maxThreadsDim
[
1
],
prop
.
maxThreadsDim
[
2
]);
printf
(
" Maximum sizes of each dimension of a grid: %d x %d x %d
\n
"
,
prop
.
maxGridSize
[
0
],
prop
.
maxGridSize
[
1
],
prop
.
maxGridSize
[
2
]);
printf
(
" Maximum memory pitch: %u bytes
\n
"
,
(
int
)
prop
.
memPitch
);
printf
(
" Texture alignment: %u bytes
\n
"
,
(
int
)
prop
.
textureAlignment
);
#if CUDART_VERSION >= 4000
printf
(
" Concurrent copy and execution: %s with %d copy engine(s)
\n
"
,
(
prop
.
deviceOverlap
?
"Yes"
:
"No"
),
prop
.
asyncEngineCount
);
#else
printf
(
" Concurrent copy and execution: %s
\n
"
,
prop
.
deviceOverlap
?
"Yes"
:
"No"
);
#endif
printf
(
" Run time limit on kernels: %s
\n
"
,
prop
.
kernelExecTimeoutEnabled
?
"Yes"
:
"No"
);
printf
(
" Integrated GPU sharing Host Memory: %s
\n
"
,
prop
.
integrated
?
"Yes"
:
"No"
);
printf
(
" Support host page-locked memory mapping: %s
\n
"
,
prop
.
canMapHostMemory
?
"Yes"
:
"No"
);
printf
(
" Concurrent kernel execution: %s
\n
"
,
prop
.
concurrentKernels
?
"Yes"
:
"No"
);
printf
(
" Alignment requirement for Surfaces: %s
\n
"
,
prop
.
surfaceAlignment
?
"Yes"
:
"No"
);
printf
(
" Device has ECC support enabled: %s
\n
"
,
prop
.
ECCEnabled
?
"Yes"
:
"No"
);
printf
(
" Device is using TCC driver mode: %s
\n
"
,
prop
.
tccDriver
?
"Yes"
:
"No"
);
#if CUDART_VERSION >= 4000
printf
(
" Device supports Unified Addressing (UVA): %s
\n
"
,
prop
.
unifiedAddressing
?
"Yes"
:
"No"
);
printf
(
" Device PCI Bus ID / PCI location ID: %d / %d
\n
"
,
prop
.
pciBusID
,
prop
.
pciDeviceID
);
#endif
printf
(
" Compute Mode:
\n
"
);
printf
(
" %s
\n
"
,
computeMode
[
prop
.
computeMode
]);
}
printf
(
"
\n
"
);
printf
(
"deviceQuery, CUDA Driver = CUDART"
);
printf
(
", CUDA Driver Version = %d.%d"
,
driverVersion
/
1000
,
driverVersion
%
100
);
printf
(
", CUDA Runtime Version = %d.%d"
,
runtimeVersion
/
1000
,
runtimeVersion
%
100
);
printf
(
", NumDevs = %d
\n\n
"
,
count
);
fflush
(
stdout
);
}
void
cv
::
gpu
::
printShortCudaDeviceInfo
(
int
device
)
{
int
count
=
getCudaEnabledDeviceCount
();
bool
valid
=
(
device
>=
0
)
&&
(
device
<
count
);
int
beg
=
valid
?
device
:
0
;
int
end
=
valid
?
device
+
1
:
count
;
int
driverVersion
=
0
,
runtimeVersion
=
0
;
cudaSafeCall
(
cudaDriverGetVersion
(
&
driverVersion
)
);
cudaSafeCall
(
cudaRuntimeGetVersion
(
&
runtimeVersion
)
);
for
(
int
dev
=
beg
;
dev
<
end
;
++
dev
)
{
cudaDeviceProp
prop
;
cudaSafeCall
(
cudaGetDeviceProperties
(
&
prop
,
dev
)
);
const
char
*
arch_str
=
prop
.
major
<
2
?
" (not Fermi)"
:
""
;
printf
(
"Device %d:
\"
%s
\"
%.0fMb"
,
dev
,
prop
.
name
,
(
float
)
prop
.
totalGlobalMem
/
1048576.0
f
);
printf
(
", sm_%d%d%s, %d cores"
,
prop
.
major
,
prop
.
minor
,
arch_str
,
convertSMVer2Cores
(
prop
.
major
,
prop
.
minor
)
*
prop
.
multiProcessorCount
);
printf
(
", Driver/Runtime ver.%d.%d/%d.%d
\n
"
,
driverVersion
/
1000
,
driverVersion
%
100
,
runtimeVersion
/
1000
,
runtimeVersion
%
100
);
}
fflush
(
stdout
);
}
#endif
modules/gpu/src/precomp.hpp
View file @
d38ca097
...
...
@@ -70,6 +70,7 @@
#ifdef HAVE_CUDA
#include "cuda.h"
#include "cuda_runtime_api.h"
#include "npp.h"
...
...
samples/gpu/cascadeclassifier.cpp
View file @
d38ca097
...
...
@@ -109,6 +109,8 @@ int main(int argc, const char *argv[])
return
cerr
<<
"No GPU found or the library is compiled without GPU support"
<<
endl
,
-
1
;
}
cv
::
gpu
::
printShortCudaDeviceInfo
(
cv
::
gpu
::
getDevice
());
string
cascadeName
;
string
inputName
;
bool
isInputImage
=
false
;
...
...
samples/gpu/cascadeclassifier_nvidia_api.cpp
View file @
d38ca097
...
...
@@ -154,6 +154,8 @@ int main(int argc, const char** argv)
ncvAssertPrintReturn
(
cv
::
gpu
::
getCudaEnabledDeviceCount
()
!=
0
,
"No GPU found or the library is compiled without GPU support"
,
-
1
);
ncvAssertPrintReturn
(
argc
==
3
,
"Invalid number of arguments"
,
-
1
);
cv
::
gpu
::
printShortCudaDeviceInfo
(
cv
::
gpu
::
getDevice
());
string
cascadeName
=
argv
[
1
];
string
inputName
=
argv
[
2
];
...
...
samples/gpu/driver_api_multi.cpp
View file @
d38ca097
...
...
@@ -71,6 +71,8 @@ int main(int argc, char **argv)
for
(
int
i
=
0
;
i
<
num_devices
;
++
i
)
{
cv
::
gpu
::
printShortCudaDeviceInfo
(
i
);
DeviceInfo
dev_info
(
i
);
if
(
!
dev_info
.
isCompatible
())
{
...
...
samples/gpu/driver_api_stereo_multi.cpp
View file @
d38ca097
...
...
@@ -98,6 +98,8 @@ int main(int argc, char** argv)
for
(
int
i
=
0
;
i
<
num_devices
;
++
i
)
{
cv
::
gpu
::
printShortCudaDeviceInfo
(
i
);
DeviceInfo
dev_info
(
i
);
if
(
!
dev_info
.
isCompatible
())
{
...
...
samples/gpu/hog.cpp
View file @
d38ca097
...
...
@@ -193,6 +193,8 @@ Args Args::read(int argc, char** argv)
App
::
App
(
const
Args
&
s
)
{
cv
::
gpu
::
printShortCudaDeviceInfo
(
cv
::
gpu
::
getDevice
());
args
=
s
;
cout
<<
"
\n
Controls:
\n
"
<<
"
\t
ESC - exit
\n
"
...
...
samples/gpu/morfology.cpp
View file @
d38ca097
...
...
@@ -74,6 +74,8 @@ int main( int argc, char** argv )
return
-
1
;
}
cv
::
gpu
::
printShortCudaDeviceInfo
(
cv
::
gpu
::
getDevice
());
help
();
...
...
samples/gpu/multi.cpp
View file @
d38ca097
...
...
@@ -46,6 +46,8 @@ int main()
}
for
(
int
i
=
0
;
i
<
num_devices
;
++
i
)
{
cv
::
gpu
::
printShortCudaDeviceInfo
(
i
);
DeviceInfo
dev_info
(
i
);
if
(
!
dev_info
.
isCompatible
())
{
...
...
samples/gpu/optical_flow.cpp
View file @
d38ca097
...
...
@@ -71,6 +71,8 @@ int main(int argc, const char* argv[])
return
-
1
;
}
cv
::
gpu
::
printShortCudaDeviceInfo
(
cv
::
gpu
::
getDevice
());
cout
<<
"OpenCV / NVIDIA Computer Vision"
<<
endl
;
cout
<<
"Optical Flow Demo: Frame Interpolation"
<<
endl
;
cout
<<
"========================================="
<<
endl
;
...
...
samples/gpu/opticalflow_nvidia_api.cpp
View file @
d38ca097
...
...
@@ -393,6 +393,8 @@ int main(int argc, char **argv)
return
result
;
}
cv
::
gpu
::
printShortCudaDeviceInfo
(
cv
::
gpu
::
getDevice
());
std
::
cout
<<
"OpenCV / NVIDIA Computer Vision
\n
"
;
std
::
cout
<<
"Optical Flow Demo: Frame Interpolation
\n
"
;
std
::
cout
<<
"=========================================
\n
"
;
...
...
samples/gpu/performance/performance.cpp
View file @
d38ca097
...
...
@@ -5,6 +5,7 @@
using
namespace
std
;
using
namespace
cv
;
using
namespace
cv
::
gpu
;
void
TestSystem
::
run
()
{
...
...
@@ -75,6 +76,7 @@ void TestSystem::finishCurrentSubtest()
void
TestSystem
::
printHeading
()
{
cout
<<
endl
;
cout
<<
setiosflags
(
ios_base
::
left
);
cout
<<
TAB
<<
setw
(
10
)
<<
"CPU, ms"
<<
setw
(
10
)
<<
"GPU, ms"
<<
setw
(
14
)
<<
"SPEEDUP"
...
...
@@ -145,13 +147,21 @@ int CV_CDECL cvErrorCallback(int /*status*/, const char* /*func_name*/,
int
main
(
int
argc
,
const
char
*
argv
[])
{
int
num_devices
=
getCudaEnabledDeviceCount
();
if
(
num_devices
==
0
)
{
cerr
<<
"No GPU found or the library was compiled without GPU support"
;
return
-
1
;
}
redirectError
(
cvErrorCallback
);
const
char
*
keys
=
"{ h | help | false | print help message }"
"{ f | filter | | filter for test }"
"{ w | workdir | | set working directory }"
"{ l | list | false | show all tests }"
;
"{ l | list | false | show all tests }"
"{ d | device | 0 | device id }"
;
CommandLineParser
cmd
(
argc
,
argv
,
keys
);
...
...
@@ -162,6 +172,21 @@ int main(int argc, const char* argv[])
return
0
;
}
int
device
=
cmd
.
get
<
int
>
(
"device"
);
if
(
device
<
0
||
device
>=
num_devices
)
{
cerr
<<
"Invalid device ID"
<<
endl
;
return
-
1
;
}
DeviceInfo
dev_info
(
device
);
if
(
!
dev_info
.
isCompatible
())
{
cerr
<<
"GPU module isn't built for GPU #"
<<
device
<<
" "
<<
dev_info
.
name
()
<<
", CC "
<<
dev_info
.
majorVersion
()
<<
'.'
<<
dev_info
.
minorVersion
()
<<
endl
;
return
-
1
;
}
setDevice
(
device
);
printShortCudaDeviceInfo
(
device
);
string
filter
=
cmd
.
get
<
string
>
(
"filter"
);
string
workdir
=
cmd
.
get
<
string
>
(
"workdir"
);
bool
list
=
cmd
.
get
<
bool
>
(
"list"
);
...
...
samples/gpu/performance/performance.h
View file @
d38ca097
...
...
@@ -6,6 +6,7 @@
#include <vector>
#include <string>
#include "opencv2/core/core.hpp"
#include "opencv2/gpu/gpu.hpp"
#define TAB " "
...
...
samples/gpu/stereo_match.cpp
View file @
d38ca097
...
...
@@ -139,6 +139,8 @@ Params Params::read(int argc, char** argv)
App
::
App
(
const
Params
&
p
)
:
p
(
p
),
running
(
false
)
{
cv
::
gpu
::
printShortCudaDeviceInfo
(
cv
::
gpu
::
getDevice
());
cout
<<
"stereo_match_gpu sample
\n
"
;
cout
<<
"
\n
Controls:
\n
"
<<
"
\t
esc - exit
\n
"
...
...
samples/gpu/stereo_multi.cpp
View file @
d38ca097
...
...
@@ -68,6 +68,8 @@ int main(int argc, char** argv)
}
for
(
int
i
=
0
;
i
<
num_devices
;
++
i
)
{
cv
::
gpu
::
printShortCudaDeviceInfo
(
i
);
DeviceInfo
dev_info
(
i
);
if
(
!
dev_info
.
isCompatible
())
{
...
...
samples/gpu/surf_keypoint_matcher.cpp
View file @
d38ca097
...
...
@@ -43,6 +43,8 @@ int main(int argc, char* argv[])
}
}
cv
::
gpu
::
printShortCudaDeviceInfo
(
cv
::
gpu
::
getDevice
());
SURF_GPU
surf
;
// detecting keypoints & computing descriptors
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment