1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
/* This sample demonstrates working on one piece of data using two GPUs.
It splits input into two parts and processes them separately on different
GPUs. */
// Disable some warnings which are caused with CUDA headers
#if defined(_MSC_VER)
#pragma warning(disable: 4201 4408 4100)
#endif
#include <iostream>
#include "cvconfig.h"
#include "opencv2/core/core.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/gpu/gpu.hpp"
#if !defined(HAVE_CUDA) || !defined(HAVE_TBB)
int main()
{
#if !defined(HAVE_CUDA)
std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n";
#endif
#if !defined(HAVE_TBB)
std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";
#endif
return 0;
}
#else
#include <cuda.h>
#include <cuda_runtime.h>
#include "opencv2/core/internal.hpp" // For TBB wrappers
using namespace std;
using namespace cv;
using namespace cv::gpu;
struct Worker { void operator()(int device_id) const; };
void destroyContexts();
#define safeCall(expr) safeCall_(expr, #expr, __FILE__, __LINE__)
inline void safeCall_(int code, const char* expr, const char* file, int line)
{
if (code != CUDA_SUCCESS)
{
std::cout << "CUDA driver API error: code " << code << ", expr " << expr
<< ", file " << file << ", line " << line << endl;
destroyContexts();
exit(-1);
}
}
// Each GPU is associated with its own context
CUcontext contexts[2];
void inline contextOn(int id)
{
safeCall(cuCtxPushCurrent(contexts[id]));
}
void inline contextOff()
{
CUcontext prev_context;
safeCall(cuCtxPopCurrent(&prev_context));
}
// GPUs data
GpuMat d_left[2];
GpuMat d_right[2];
StereoBM_GPU* bm[2];
GpuMat d_result[2];
// CPU result
Mat result;
void printHelp()
{
std::cout << "Usage: driver_api_stereo_multi_gpu --left <left_image> --right <right_image>\n";
}
int main(int argc, char** argv)
{
if (argc < 5)
{
printHelp();
return -1;
}
int num_devices = getCudaEnabledDeviceCount();
if (num_devices < 2)
{
std::cout << "Two or more GPUs are required\n";
return -1;
}
for (int i = 0; i < num_devices; ++i)
{
cv::gpu::printShortCudaDeviceInfo(i);
DeviceInfo dev_info(i);
if (!dev_info.isCompatible())
{
std::cout << "GPU module isn't built for GPU #" << i << " ("
<< dev_info.name() << ", CC " << dev_info.majorVersion()
<< dev_info.minorVersion() << "\n";
return -1;
}
}
// Load input data
Mat left, right;
for (int i = 1; i < argc; ++i)
{
if (string(argv[i]) == "--left")
{
left = imread(argv[++i], CV_LOAD_IMAGE_GRAYSCALE);
CV_Assert(!left.empty());
}
else if (string(argv[i]) == "--right")
{
right = imread(argv[++i], CV_LOAD_IMAGE_GRAYSCALE);
CV_Assert(!right.empty());
}
else if (string(argv[i]) == "--help")
{
printHelp();
return -1;
}
}
// Init CUDA Driver API
safeCall(cuInit(0));
// Create context for GPU #0
CUdevice device;
safeCall(cuDeviceGet(&device, 0));
safeCall(cuCtxCreate(&contexts[0], 0, device));
contextOff();
// Create context for GPU #1
safeCall(cuDeviceGet(&device, 1));
safeCall(cuCtxCreate(&contexts[1], 0, device));
contextOff();
// Split source images for processing on GPU #0
contextOn(0);
d_left[0].upload(left.rowRange(0, left.rows / 2));
d_right[0].upload(right.rowRange(0, right.rows / 2));
bm[0] = new StereoBM_GPU();
contextOff();
// Split source images for processing on the GPU #1
contextOn(1);
d_left[1].upload(left.rowRange(left.rows / 2, left.rows));
d_right[1].upload(right.rowRange(right.rows / 2, right.rows));
bm[1] = new StereoBM_GPU();
contextOff();
// Execute calculation in two threads using two GPUs
int devices[] = {0, 1};
parallel_do(devices, devices + 2, Worker());
// Release the first GPU resources
contextOn(0);
imshow("GPU #0 result", Mat(d_result[0]));
d_left[0].release();
d_right[0].release();
d_result[0].release();
delete bm[0];
contextOff();
// Release the second GPU resources
contextOn(1);
imshow("GPU #1 result", Mat(d_result[1]));
d_left[1].release();
d_right[1].release();
d_result[1].release();
delete bm[1];
contextOff();
waitKey();
destroyContexts();
return 0;
}
void Worker::operator()(int device_id) const
{
contextOn(device_id);
bm[device_id]->operator()(d_left[device_id], d_right[device_id],
d_result[device_id]);
std::cout << "GPU #" << device_id << " (" << DeviceInfo().name()
<< "): finished\n";
contextOff();
}
void destroyContexts()
{
safeCall(cuCtxDestroy(contexts[0]));
safeCall(cuCtxDestroy(contexts[1]));
}
#endif