multi.cpp 2.38 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13
/* This sample demonstrates the way you can perform independed tasks
   on the different GPUs */

// Disable some warnings which are caused with CUDA headers
#if defined(_MSC_VER)
#pragma warning(disable: 4201 4408 4100)

#include <iostream>
#include "cvconfig.h"
#include "opencv2/core/core.hpp"
#include "opencv2/gpu/gpu.hpp"

14 15 16 17 18 19 20 21 22 23 24 25
#ifdef HAVE_TBB
#  include "tbb/tbb_stddef.h"
#    include "tbb/tbb.h"
#    include "tbb/task.h"
#    undef min
#    undef max
#  else
#    undef HAVE_TBB
#  endif

26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72
#if !defined(HAVE_CUDA) || !defined(HAVE_TBB)

int main()
#if !defined(HAVE_CUDA)
    std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true).\n";

#if !defined(HAVE_TBB)
    std::cout << "TBB support is required (CMake key 'WITH_TBB' must be true).\n";

    return 0;


using namespace std;
using namespace cv;
using namespace cv::gpu;

struct Worker { void operator()(int device_id) const; };

int main()
    int num_devices = getCudaEnabledDeviceCount();
    if (num_devices < 2)
        std::cout << "Two or more GPUs are required\n";
        return -1;
    for (int i = 0; i < num_devices; ++i)

        DeviceInfo dev_info(i);
        if (!dev_info.isCompatible())
            std::cout << "GPU module isn't built for GPU #" << i << " ("
                 << << ", CC " << dev_info.majorVersion()
                 << dev_info.minorVersion() << "\n";
            return -1;

    // Execute calculation in two threads using two GPUs
    int devices[] = {0, 1};
    tbb::parallel_do(devices, devices + 2, Worker());
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108

    return 0;

void Worker::operator()(int device_id) const

    Mat src(1000, 1000, CV_32F);
    Mat dst;

    RNG rng(0);
    rng.fill(src, RNG::UNIFORM, 0, 1);

    // CPU works
    transpose(src, dst);

    // GPU works
    GpuMat d_src(src);
    GpuMat d_dst;
    transpose(d_src, d_dst);

    // Check results
    bool passed = norm(dst - Mat(d_dst), NORM_INF) < 1e-3;
    std::cout << "GPU #" << device_id << " (" << DeviceInfo().name() << "): "
        << (passed ? "passed" : "FAILED") << endl;

    // Deallocate data here, otherwise deallocation will be performed
    // after context is extracted from the stack
