Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
O
opencv
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
opencv
Commits
ea94b435
Commit
ea94b435
authored
Feb 03, 2011
by
Alexey Spizhevoy
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
added stereo_multi_gpu sample, cosmetic changes in multi_gpu sample
parent
ed779556
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
218 additions
and
22 deletions
+218
-22
stereobm.cu
modules/gpu/src/cuda/stereobm.cu
+1
-1
multi.cpp
samples/gpu/multi.cpp
+34
-21
stereo_multi.cpp
samples/gpu/stereo_multi.cpp
+183
-0
No files found.
modules/gpu/src/cuda/stereobm.cu
View file @
ea94b435
...
@@ -357,7 +357,7 @@ extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const
...
@@ -357,7 +357,7 @@ extern "C" void stereoBM_GPU(const DevMem2D& left, const DevMem2D& right, const
cudaSafeCall( cudaMemset2D(disp.data, disp.step, 0, disp.cols, disp.rows) );
cudaSafeCall( cudaMemset2D(disp.data, disp.step, 0, disp.cols, disp.rows) );
cudaSafeCall( cudaMemset2D(minSSD_buf.data, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp.rows) );
cudaSafeCall( cudaMemset2D(minSSD_buf.data, minSSD_buf.step, 0xFF, minSSD_buf.cols * minSSD_buf.elemSize(), disp.rows) );
cudaSafeCall( cudaMemcpyToSymbol(
cwidth, &left.cols, sizeof(left.cols) ) );
cudaSafeCall( cudaMemcpyToSymbol( cwidth, &left.cols, sizeof(left.cols) ) );
cudaSafeCall( cudaMemcpyToSymbol( cheight, &left.rows, sizeof(left.rows) ) );
cudaSafeCall( cudaMemcpyToSymbol( cheight, &left.rows, sizeof(left.rows) ) );
cudaSafeCall( cudaMemcpyToSymbol( cminSSDImage, &minSSD_buf.data, sizeof(minSSD_buf.data) ) );
cudaSafeCall( cudaMemcpyToSymbol( cminSSDImage, &minSSD_buf.data, sizeof(minSSD_buf.data) ) );
...
...
samples/gpu/multi.cpp
View file @
ea94b435
/* This sample demonstrates the way you can perform independed tasks
on the different GPUs */
// Disable some warnings which are caused with CUDA headers
// Disable some warnings which are caused with CUDA headers
#pragma warning(disable: 4201 4408 4100)
#pragma warning(disable: 4201 4408 4100)
...
@@ -34,41 +37,50 @@ using namespace cv::gpu;
...
@@ -34,41 +37,50 @@ using namespace cv::gpu;
struct
Worker
{
void
operator
()(
int
device_id
)
const
;
};
struct
Worker
{
void
operator
()(
int
device_id
)
const
;
};
void
destroyContexts
();
void
destroyContexts
();
#define
cuS
afeCall(code) if (code != CUDA_SUCCESS) { \
#define
s
afeCall(code) if (code != CUDA_SUCCESS) { \
cout
<<
"CUDA driver API error: code "
<<
code
\
cout
<<
"CUDA driver API error: code "
<<
code
\
<<
", file "
<<
__FILE__
<<
", line "
<<
__LINE__
<<
endl
;
\
<<
", file "
<<
__FILE__
<<
", line "
<<
__LINE__
<<
endl
;
\
destroyContexts
();
\
destroyContexts
();
\
exit
(
-
1
);
\
exit
(
-
1
);
\
}
}
// Each GPU is associated with its own context
// Each GPU is associated with its own context
CUcontext
contexts
[
2
];
CUcontext
contexts
[
2
];
int
main
()
int
main
()
{
{
if
(
getCudaEnabledDeviceCount
()
<
2
)
int
num_devices
=
getCudaEnabledDeviceCount
();
if
(
num_devices
<
2
)
{
{
cout
<<
"Two or more GPUs are required
\n
"
;
cout
<<
"Two or more GPUs are required
\n
"
;
return
-
1
;
return
-
1
;
}
}
cuSafeCall
(
cuInit
(
0
));
for
(
int
i
=
0
;
i
<
num_devices
;
++
i
)
{
if
(
!
DeviceInfo
(
i
).
isCompatible
())
{
cout
<<
"GPU module isn't built for GPU #"
<<
i
<<
" ("
<<
DeviceInfo
(
i
).
name
()
<<
")"
;
return
-
1
;
}
}
safeCall
(
cuInit
(
0
));
// Create context for
the first GPU
// Create context for
GPU #0
CUdevice
device
;
CUdevice
device
;
cuS
afeCall
(
cuDeviceGet
(
&
device
,
0
));
s
afeCall
(
cuDeviceGet
(
&
device
,
0
));
cuS
afeCall
(
cuCtxCreate
(
&
contexts
[
0
],
0
,
device
));
s
afeCall
(
cuCtxCreate
(
&
contexts
[
0
],
0
,
device
));
CUcontext
prev_context
;
CUcontext
prev_context
;
cuS
afeCall
(
cuCtxPopCurrent
(
&
prev_context
));
s
afeCall
(
cuCtxPopCurrent
(
&
prev_context
));
// Create context for
the second GPU
// Create context for
GPU #1
cuS
afeCall
(
cuDeviceGet
(
&
device
,
1
));
s
afeCall
(
cuDeviceGet
(
&
device
,
1
));
cuS
afeCall
(
cuCtxCreate
(
&
contexts
[
1
],
0
,
device
));
s
afeCall
(
cuCtxCreate
(
&
contexts
[
1
],
0
,
device
));
cuS
afeCall
(
cuCtxPopCurrent
(
&
prev_context
));
s
afeCall
(
cuCtxPopCurrent
(
&
prev_context
));
// Execute calculation in two threads using two GPUs
// Execute calculation in two threads using two GPUs
int
devices
[]
=
{
0
,
1
};
int
devices
[]
=
{
0
,
1
};
...
@@ -81,8 +93,8 @@ int main()
...
@@ -81,8 +93,8 @@ int main()
void
Worker
::
operator
()(
int
device_id
)
const
void
Worker
::
operator
()(
int
device_id
)
const
{
{
// Set proper context
// Set
the
proper context
cuS
afeCall
(
cuCtxPushCurrent
(
contexts
[
device_id
]));
s
afeCall
(
cuCtxPushCurrent
(
contexts
[
device_id
]));
Mat
src
(
1000
,
1000
,
CV_32F
);
Mat
src
(
1000
,
1000
,
CV_32F
);
Mat
dst
;
Mat
dst
;
...
@@ -93,15 +105,15 @@ void Worker::operator()(int device_id) const
...
@@ -93,15 +105,15 @@ void Worker::operator()(int device_id) const
// CPU works
// CPU works
transpose
(
src
,
dst
);
transpose
(
src
,
dst
);
// GPU works
GpuMat
d_src
(
src
);
GpuMat
d_src
(
src
);
GpuMat
d_dst
;
GpuMat
d_dst
;
// GPU works
transpose
(
d_src
,
d_dst
);
transpose
(
d_src
,
d_dst
);
// Check results
// Check results
bool
passed
=
norm
(
dst
-
Mat
(
d_dst
),
NORM_INF
)
<
1e-3
;
bool
passed
=
norm
(
dst
-
Mat
(
d_dst
),
NORM_INF
)
<
1e-3
;
cout
<<
"GPU #"
<<
device_id
<<
": "
<<
(
passed
?
"passed"
:
"FAILED"
)
<<
endl
;
cout
<<
"GPU #"
<<
device_id
<<
" ("
<<
DeviceInfo
().
name
()
<<
"): "
<<
(
passed
?
"passed"
:
"FAILED"
)
<<
endl
;
// Deallocate data here, otherwise deallocation will be performed
// Deallocate data here, otherwise deallocation will be performed
// after context is extracted from the stack
// after context is extracted from the stack
...
@@ -109,14 +121,14 @@ void Worker::operator()(int device_id) const
...
@@ -109,14 +121,14 @@ void Worker::operator()(int device_id) const
d_dst
.
release
();
d_dst
.
release
();
CUcontext
prev_context
;
CUcontext
prev_context
;
cuS
afeCall
(
cuCtxPopCurrent
(
&
prev_context
));
s
afeCall
(
cuCtxPopCurrent
(
&
prev_context
));
}
}
void
destroyContexts
()
void
destroyContexts
()
{
{
cuS
afeCall
(
cuCtxDestroy
(
contexts
[
0
]));
s
afeCall
(
cuCtxDestroy
(
contexts
[
0
]));
cuS
afeCall
(
cuCtxDestroy
(
contexts
[
1
]));
s
afeCall
(
cuCtxDestroy
(
contexts
[
1
]));
}
}
#endif
#endif
\ No newline at end of file
samples/gpu/stereo_multi.cpp
0 → 100644
View file @
ea94b435
/* This sample demonstrates working on one piece of data using two GPUs.
It splits input into two parts and processes them separately on different
GPUs. */
// Disable some warnings which are caused with CUDA headers
#pragma warning(disable: 4201 4408 4100)
#include <iostream>
#include <cvconfig.h>
#include <opencv2/core/core.hpp>
#include <opencv2/highgui/highgui.hpp>
#include <opencv2/gpu/gpu.hpp>
#if !defined(HAVE_CUDA) || !defined(HAVE_TBB)
int
main
()
{
#if !defined(HAVE_CUDA)
cout
<<
"CUDA support is required (CMake key 'WITH_CUDA' must be true).
\n
"
;
#endif
#if !defined(HAVE_TBB)
cout
<<
"TBB support is required (CMake key 'WITH_TBB' must be true).
\n
"
;
#endif
return
0
;
}
#else
#include <cuda.h>
#include <cuda_runtime.h>
#include "opencv2/core/internal.hpp" // For TBB wrappers
using
namespace
std
;
using
namespace
cv
;
using
namespace
cv
::
gpu
;
struct
Worker
{
void
operator
()(
int
device_id
)
const
;
};
void
destroyContexts
();
#define safeCall(code) if (code != CUDA_SUCCESS) { \
cout
<<
"CUDA driver API error: code "
<<
code
\
<<
", file "
<<
__FILE__
<<
", line "
<<
__LINE__
<<
endl
;
\
destroyContexts
();
\
exit
(
-
1
);
\
}
// Each GPU is associated with its own context
CUcontext
contexts
[
2
];
void
inline
contextOn
(
int
id
)
{
safeCall
(
cuCtxPushCurrent
(
contexts
[
id
]));
}
void
inline
contextOff
()
{
CUcontext
prev_context
;
safeCall
(
cuCtxPopCurrent
(
&
prev_context
));
}
GpuMat
d_left
[
2
];
GpuMat
d_right
[
2
];
StereoBM_GPU
*
bm
[
2
];
GpuMat
d_result
[
2
];
Mat
result
;
int
main
(
int
argc
,
char
**
argv
)
{
if
(
argc
<
3
)
{
cout
<<
"Usage: stereo_multi_gpu <left_image> <right_image>
\n
"
;
return
-
1
;
}
int
num_devices
=
getCudaEnabledDeviceCount
();
if
(
num_devices
<
2
)
{
cout
<<
"Two or more GPUs are required
\n
"
;
return
-
1
;
}
for
(
int
i
=
0
;
i
<
num_devices
;
++
i
)
{
if
(
!
DeviceInfo
(
i
).
isCompatible
())
{
cout
<<
"GPU module isn't built for GPU #"
<<
i
<<
" ("
<<
DeviceInfo
(
i
).
name
()
<<
")"
;
return
-
1
;
}
}
// Load input data
Mat
left
=
imread
(
argv
[
1
],
CV_LOAD_IMAGE_GRAYSCALE
);
Mat
right
=
imread
(
argv
[
2
],
CV_LOAD_IMAGE_GRAYSCALE
);
if
(
left
.
empty
())
{
cout
<<
"Cannot open '"
<<
argv
[
1
]
<<
"'
\n
"
;
return
-
1
;
}
if
(
right
.
empty
())
{
cout
<<
"Cannot open '"
<<
argv
[
2
]
<<
"'
\n
"
;
return
-
1
;
}
safeCall
(
cuInit
(
0
));
// Create context for the first GPU
CUdevice
device
;
safeCall
(
cuDeviceGet
(
&
device
,
0
));
safeCall
(
cuCtxCreate
(
&
contexts
[
0
],
0
,
device
));
contextOff
();
// Create context for the second GPU
safeCall
(
cuDeviceGet
(
&
device
,
1
));
safeCall
(
cuCtxCreate
(
&
contexts
[
1
],
0
,
device
));
contextOff
();
// Split source images for processing on the first GPU
contextOn
(
0
);
d_left
[
0
].
upload
(
left
.
rowRange
(
0
,
left
.
rows
/
2
));
d_right
[
0
].
upload
(
right
.
rowRange
(
0
,
right
.
rows
/
2
));
bm
[
0
]
=
new
StereoBM_GPU
();
contextOff
();
// Split source images for processing on the second GPU
contextOn
(
1
);
d_left
[
1
].
upload
(
left
.
rowRange
(
left
.
rows
/
2
,
left
.
rows
));
d_right
[
1
].
upload
(
right
.
rowRange
(
right
.
rows
/
2
,
right
.
rows
));
bm
[
1
]
=
new
StereoBM_GPU
();
contextOff
();
// Execute calculation in two threads using two GPUs
int
devices
[]
=
{
0
,
1
};
parallel_do
(
devices
,
devices
+
2
,
Worker
());
// Release the first GPU resources
contextOn
(
0
);
imshow
(
"GPU #0 result"
,
Mat
(
d_result
[
0
]));
d_left
[
0
].
release
();
d_right
[
0
].
release
();
d_result
[
0
].
release
();
delete
bm
[
0
];
contextOff
();
// Release the second GPU resources
contextOn
(
1
);
imshow
(
"GPU #1 result"
,
Mat
(
d_result
[
1
]));
d_left
[
1
].
release
();
d_right
[
1
].
release
();
d_result
[
1
].
release
();
delete
bm
[
1
];
contextOff
();
waitKey
();
destroyContexts
();
return
0
;
}
void
Worker
::
operator
()(
int
device_id
)
const
{
contextOn
(
device_id
);
bm
[
device_id
]
->
operator
()(
d_left
[
device_id
],
d_right
[
device_id
],
d_result
[
device_id
]);
cout
<<
"GPU #"
<<
device_id
<<
" ("
<<
DeviceInfo
().
name
()
<<
"): finished
\n
"
;
contextOff
();
}
void
destroyContexts
()
{
safeCall
(
cuCtxDestroy
(
contexts
[
0
]));
safeCall
(
cuCtxDestroy
(
contexts
[
1
]));
}
#endif
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment