refactored gpu info structures (TargetArchs and DeviceInfo)

now DeviceInfo provides full information about device (from cudaDeviceProp)

refactored gpu info structures (TargetArchs and DeviceInfo)
now DeviceInfo provides full information about device (from cudaDeviceProp)
2dab93c2 · Vladislav Vinogradov · 76f4b02b · 2dab93c2 · 2dab93c2 · 2dab93c2
Commit 2dab93c2 authored Apr 16, 2013 by Vladislav Vinogradov
13 changed files
--- a/modules/core/include/opencv2/core/gpu.hpp
+++ b/modules/core/include/opencv2/core/gpu.hpp
@@ -392,17 +392,17 @@ private:
 //////////////////////////////// Initialization & Info ////////////////////////
-//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
+//! this is the only function that do not throw exceptions if the library is compiled without CUDA
 CV_EXPORTS int getCudaEnabledDeviceCount();
-//! Functions below throw cv::Expception if the library is compiled without Cuda.
+//! set device to be used for GPU executions for the calling host thread
 CV_EXPORTS void setDevice(int device);
+//! returns which device is currently being used for the calling host thread
 CV_EXPORTS int getDevice();
-//! Explicitly destroys and cleans up all resources associated with the current device in the current process.
+//! explicitly destroys and cleans up all resources associated with the current device in the current process
-//! Any subsequent API call to this device will reinitialize the device.
+//! any subsequent API call to this device will reinitialize the device
 CV_EXPORTS void resetDevice();
 enum FeatureSet
@@ -423,75 +423,218 @@ enum FeatureSet
    DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
 };
-// Checks whether current device supports the given feature
+//! checks whether current device supports the given feature
 CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
-// Gives information about what GPU archs this OpenCV GPU module was
+//! information about what GPU archs this OpenCV GPU module was compiled for
-// compiled for
 class CV_EXPORTS TargetArchs
 {
 public:
    static bool builtWith(FeatureSet feature_set);
    static bool has(int major, int minor);
    static bool hasPtx(int major, int minor);
    static bool hasBin(int major, int minor);
    static bool hasEqualOrLessPtx(int major, int minor);
    static bool hasEqualOrGreater(int major, int minor);
    static bool hasEqualOrGreaterPtx(int major, int minor);
    static bool hasEqualOrGreaterBin(int major, int minor);
-private:
-    TargetArchs();
 };
-// Gives information about the given GPU
+//! information about the given GPU.
 class CV_EXPORTS DeviceInfo
 {
 public:
-    // Creates DeviceInfo object for the current GPU
+    //! creates DeviceInfo object for the current GPU
-    DeviceInfo() : device_id_(getDevice()) { query(); }
+    DeviceInfo();
-    // Creates DeviceInfo object for the given GPU
+    //! creates DeviceInfo object for the given GPU
-    DeviceInfo(int device_id) : device_id_(device_id) { query(); }
+    DeviceInfo(int device_id);
-    String name() const { return name_; }
+    //! device number.
+    int deviceID() const;
-    // Return compute capability versions
+    //! ASCII string identifying device
-    int majorVersion() const { return majorVersion_; }
+    const char* name() const;
-    int minorVersion() const { return minorVersion_; }
-    int multiProcessorCount() const { return multi_processor_count_; }
+    //! global memory available on device in bytes
+    size_t totalGlobalMem() const;
+    //! shared memory available per block in bytes
    size_t sharedMemPerBlock() const;
+    //! 32-bit registers available per block
+    int regsPerBlock() const;
+    //! warp size in threads
+    int warpSize() const;
+    //! maximum pitch in bytes allowed by memory copies
+    size_t memPitch() const;
+    //! maximum number of threads per block
+    int maxThreadsPerBlock() const;
+    //! maximum size of each dimension of a block
+    Vec3i maxThreadsDim() const;
+    //! maximum size of each dimension of a grid
+    Vec3i maxGridSize() const;
+    //! clock frequency in kilohertz
+    int clockRate() const;
+    //! constant memory available on device in bytes
+    size_t totalConstMem() const;
+    //! major compute capability
+    int major() const;
+    //! minor compute capability
+    int minor() const;
+    //! alignment requirement for textures
+    size_t textureAlignment() const;
+    //! pitch alignment requirement for texture references bound to pitched memory
+    size_t texturePitchAlignment() const;
+    //! number of multiprocessors on device
+    int multiProcessorCount() const;
+    //! specified whether there is a run time limit on kernels
+    bool kernelExecTimeoutEnabled() const;
+    //! device is integrated as opposed to discrete
+    bool integrated() const;
+    //! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
+    bool canMapHostMemory() const;
+    enum ComputeMode
+    {
+        ComputeModeDefault,         /**< default compute mode (Multiple threads can use ::cudaSetDevice() with this device) */
+        ComputeModeExclusive,       /**< compute-exclusive-thread mode (Only one thread in one process will be able to use ::cudaSetDevice() with this device) */
+        ComputeModeProhibited,      /**< compute-prohibited mode (No threads can use ::cudaSetDevice() with this device) */
+        ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use ::cudaSetDevice() with this device) */
+    };
+    //! compute mode
+    ComputeMode computeMode() const;
+    //! maximum 1D texture size
+    int maxTexture1D() const;
+    //! maximum 1D mipmapped texture size
+    int maxTexture1DMipmap() const;
+    //! maximum size for 1D textures bound to linear memory
+    int maxTexture1DLinear() const;
+    //! maximum 2D texture dimensions
+    Vec2i maxTexture2D() const;
+    //! maximum 2D mipmapped texture dimensions
+    Vec2i maxTexture2DMipmap() const;
+    //! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
+    Vec3i maxTexture2DLinear() const;
+    //! maximum 2D texture dimensions if texture gather operations have to be performed
+    Vec2i maxTexture2DGather() const;
+    //! maximum 3D texture dimensions
+    Vec3i maxTexture3D() const;
+    //! maximum Cubemap texture dimensions
+    int maxTextureCubemap() const;
+    //! maximum 1D layered texture dimensions
+    Vec2i maxTexture1DLayered() const;
+    //! maximum 2D layered texture dimensions
+    Vec3i maxTexture2DLayered() const;
+    //! maximum Cubemap layered texture dimensions
+    Vec2i maxTextureCubemapLayered() const;
+    //! maximum 1D surface size
+    int maxSurface1D() const;
+    //! maximum 2D surface dimensions
+    Vec2i maxSurface2D() const;
+    //! maximum 3D surface dimensions
+    Vec3i maxSurface3D() const;
+    //! maximum 1D layered surface dimensions
+    Vec2i maxSurface1DLayered() const;
+    //! maximum 2D layered surface dimensions
+    Vec3i maxSurface2DLayered() const;
+    //! maximum Cubemap surface dimensions
+    int maxSurfaceCubemap() const;
+    //! maximum Cubemap layered surface dimensions
+    Vec2i maxSurfaceCubemapLayered() const;
+    //! alignment requirements for surfaces
+    size_t surfaceAlignment() const;
+    //! device can possibly execute multiple kernels concurrently
+    bool concurrentKernels() const;
+    //! device has ECC support enabled
+    bool ECCEnabled() const;
+    //! PCI bus ID of the device
+    int pciBusID() const;
+    //! PCI device ID of the device
+    int pciDeviceID() const;
+    //! PCI domain ID of the device
+    int pciDomainID() const;
+    //! true if device is a Tesla device using TCC driver, false otherwise
+    bool tccDriver() const;
+    //! number of asynchronous engines
+    int asyncEngineCount() const;
+    //! device shares a unified address space with the host
+    bool unifiedAddressing() const;
+    //! peak memory clock frequency in kilohertz
+    int memoryClockRate() const;
+    //! global memory bus width in bits
+    int memoryBusWidth() const;
+    //! size of L2 cache in bytes
+    int l2CacheSize() const;
+    //! maximum resident threads per multiprocessor
+    int maxThreadsPerMultiProcessor() const;
+    //! gets free and total device memory
    void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
    size_t freeMemory() const;
    size_t totalMemory() const;
-    // Checks whether device supports the given feature
+    //! checks whether device supports the given feature
    bool supports(FeatureSet feature_set) const;
-    // Checks whether the GPU module can be run on the given device
+    //! checks whether the GPU module can be run on the given device
    bool isCompatible() const;
-    bool canMapHostMemory() const;
-    size_t textureAlignment() const;
-    int deviceID() const { return device_id_; }
 private:
-    void query();
    int device_id_;
-    String name_;
-    int multi_processor_count_;
-    int majorVersion_;
-    int minorVersion_;
 };
 CV_EXPORTS void printCudaDeviceInfo(int device);
 CV_EXPORTS void printShortCudaDeviceInfo(int device);
 }} // namespace cv { namespace gpu {

--- a/modules/core/include/opencv2/core/gpu.inl.hpp
+++ b/modules/core/include/opencv2/core/gpu.inl.hpp
@@ -567,6 +567,62 @@ Stream::Stream(const Ptr<Impl>& impl)
 {
 }
+//////////////////////////////// Initialization & Info ////////////////////////
+inline
+bool TargetArchs::has(int major, int minor)
+{
+    return hasPtx(major, minor) || hasBin(major, minor);
+}
+inline
+bool TargetArchs::hasEqualOrGreater(int major, int minor)
+{
+    return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
+}
+inline
+DeviceInfo::DeviceInfo()
+{
+    device_id_ = getDevice();
+}
+inline
+DeviceInfo::DeviceInfo(int device_id)
+{
+    CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
+    device_id_ = device_id;
+}
+inline
+int DeviceInfo::deviceID() const
+{
+    return device_id_;
+}
+inline
+size_t DeviceInfo::freeMemory() const
+{
+    size_t _totalMemory, _freeMemory;
+    queryMemory(_totalMemory, _freeMemory);
+    return _freeMemory;
+}
+inline
+size_t DeviceInfo::totalMemory() const
+{
+    size_t _totalMemory, _freeMemory;
+    queryMemory(_totalMemory, _freeMemory);
+    return _totalMemory;
+}
+inline
+bool DeviceInfo::supports(FeatureSet feature_set) const
+{
+    int version = major() * 10 + minor();
+    return version >= feature_set;
+}
 }} // namespace cv { namespace gpu {
 //////////////////////////////// Mat ////////////////////////////////

--- a/modules/core/src/gpu.cpp
+++ b/modules/core/src/gpu.cpp
--- a/modules/gpufilters/src/filtering.cpp
+++ b/modules/gpufilters/src/filtering.cpp
@@ -878,7 +878,7 @@ namespace
        virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
        {
            DeviceInfo devInfo;
-            int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
+            int cc = devInfo.major() * 10 + devInfo.minor();
            func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s));
        }
@@ -977,7 +977,7 @@ namespace
        virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
        {
            DeviceInfo devInfo;
-            int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
+            int cc = devInfo.major() * 10 + devInfo.minor();
            if (ksize > 16 && cc < 20)
                CV_Error(cv::Error::StsNotImplemented, "column linear filter doesn't implemented for kernel size > 16 for device with compute capabilities less than 2.0");

--- a/modules/gpuoptflow/test/test_optflow.cpp
+++ b/modules/gpuoptflow/test/test_optflow.cpp
@@ -80,7 +80,7 @@ GPU_TEST_P(BroxOpticalFlow, Regression)
    brox(loadMat(frame0), loadMat(frame1), u, v);
    std::string fname(cvtest::TS::ptr()->get_data_path());
-    if (devInfo.majorVersion() >= 2)
+    if (devInfo.major() >= 2)
        fname += "opticalflow/brox_optical_flow_cc20.bin";
    else
        fname += "opticalflow/brox_optical_flow.bin";

--- a/modules/gpustereo/src/stereobm.cpp
+++ b/modules/gpustereo/src/stereobm.cpp
@@ -91,7 +91,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
    DeviceInfo device_info;
-    if (device_info.majorVersion() > 1 || device_info.multiProcessorCount() > 16)
+    if (device_info.major() > 1 || device_info.multiProcessorCount() > 16)
        return true;
    return false;

--- a/modules/ts/src/gpu_perf.cpp
+++ b/modules/ts/src/gpu_perf.cpp
@@ -287,8 +287,8 @@ namespace perf
            cv::gpu::DeviceInfo info(i);
            printf("[----------]\n"), fflush(stdout);
-            printf("[ DEVICE   ] \t# %d %s.\n", i, info.name().c_str()), fflush(stdout);
+            printf("[ DEVICE   ] \t# %d %s.\n", i, info.name()), fflush(stdout);
-            printf("[          ] \tCompute capability: %d.%d\n", (int)info.majorVersion(), (int)info.minorVersion()), fflush(stdout);
+            printf("[          ] \tCompute capability: %d.%d\n", (int)info.major(), (int)info.minor()), fflush(stdout);
            printf("[          ] \tMulti Processor Count:  %d\n", info.multiProcessorCount()), fflush(stdout);
            printf("[          ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)), fflush(stdout);
            printf("[          ] \tFree  memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory()  / 1024.0) / 1024.0)), fflush(stdout);

--- a/modules/ts/src/ts_perf.cpp
+++ b/modules/ts/src/ts_perf.cpp
@@ -682,13 +682,13 @@ void TestBase::Init(int argc, const char* const argv[])
        cv::gpu::DeviceInfo info(param_cuda_device);
        if (!info.isCompatible())
        {
-            printf("[----------]\n[ FAILURE  ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name().c_str()), fflush(stdout);
+            printf("[----------]\n[ FAILURE  ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name()), fflush(stdout);
            exit(-1);
        }
        cv::gpu::setDevice(param_cuda_device);
-        printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name().c_str()), fflush(stdout);
+        printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name()), fflush(stdout);
    }
 #endif

--- a/samples/gpu/driver_api_multi.cpp
+++ b/samples/gpu/driver_api_multi.cpp
@@ -82,8 +82,8 @@ int main()
        if (!dev_info.isCompatible())
        {
            std::cout << "GPU module isn't built for GPU #" << i << " ("
-                 << dev_info.name() << ", CC " << dev_info.majorVersion()
+                 << dev_info.name() << ", CC " << dev_info.major()
-                 << dev_info.minorVersion() << "\n";
+                 << dev_info.minor() << "\n";
            return -1;
        }
    }

--- a/samples/gpu/driver_api_stereo_multi.cpp
+++ b/samples/gpu/driver_api_stereo_multi.cpp
@@ -112,8 +112,8 @@ int main(int argc, char** argv)
        if (!dev_info.isCompatible())
        {
            std::cout << "GPU module isn't built for GPU #" << i << " ("
-                 << dev_info.name() << ", CC " << dev_info.majorVersion()
+                 << dev_info.name() << ", CC " << dev_info.major()
-                 << dev_info.minorVersion() << "\n";
+                 << dev_info.minor() << "\n";
            return -1;
        }
    }

--- a/samples/gpu/multi.cpp
+++ b/samples/gpu/multi.cpp
@@ -62,8 +62,8 @@ int main()
        if (!dev_info.isCompatible())
        {
            std::cout << "GPU module isn't built for GPU #" << i << " ("
-                 << dev_info.name() << ", CC " << dev_info.majorVersion()
+                 << dev_info.name() << ", CC " << dev_info.major()
-                 << dev_info.minorVersion() << "\n";
+                 << dev_info.minor() << "\n";
            return -1;
        }
    }

--- a/samples/gpu/performance/performance.cpp
+++ b/samples/gpu/performance/performance.cpp
@@ -191,7 +191,7 @@ int main(int argc, const char* argv[])
    DeviceInfo dev_info(device);
    if (!dev_info.isCompatible())
    {
-        cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
+        cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.major() << '.' << dev_info.minor() << endl;
        return -1;
    }
    setDevice(device);

--- a/samples/gpu/stereo_multi.cpp
+++ b/samples/gpu/stereo_multi.cpp
@@ -81,8 +81,8 @@ int main(int argc, char** argv)
        if (!dev_info.isCompatible())
        {
            std::cout << "GPU module isn't built for GPU #" << i << " ("
-                 << dev_info.name() << ", CC " << dev_info.majorVersion()
+                 << dev_info.name() << ", CC " << dev_info.major()
-                 << dev_info.minorVersion() << "\n";
+                 << dev_info.minor() << "\n";
            return -1;
        }
    }