Commit 2dab93c2 authored by Vladislav Vinogradov's avatar Vladislav Vinogradov

refactored gpu info structures (TargetArchs and DeviceInfo)

now DeviceInfo provides full information about device (from cudaDeviceProp)
parent 76f4b02b
......@@ -392,17 +392,17 @@ private:
//////////////////////////////// Initialization & Info ////////////////////////
//! This is the only function that do not throw exceptions if the library is compiled without Cuda.
//! this is the only function that do not throw exceptions if the library is compiled without CUDA
CV_EXPORTS int getCudaEnabledDeviceCount();
//! Functions below throw cv::Expception if the library is compiled without Cuda.
//! set device to be used for GPU executions for the calling host thread
CV_EXPORTS void setDevice(int device);
//! returns which device is currently being used for the calling host thread
CV_EXPORTS int getDevice();
//! Explicitly destroys and cleans up all resources associated with the current device in the current process.
//! Any subsequent API call to this device will reinitialize the device.
//! explicitly destroys and cleans up all resources associated with the current device in the current process
//! any subsequent API call to this device will reinitialize the device
CV_EXPORTS void resetDevice();
enum FeatureSet
......@@ -423,75 +423,218 @@ enum FeatureSet
DYNAMIC_PARALLELISM = FEATURE_SET_COMPUTE_35
};
// Checks whether current device supports the given feature
//! checks whether current device supports the given feature
CV_EXPORTS bool deviceSupports(FeatureSet feature_set);
// Gives information about what GPU archs this OpenCV GPU module was
// compiled for
//! information about what GPU archs this OpenCV GPU module was compiled for
class CV_EXPORTS TargetArchs
{
public:
static bool builtWith(FeatureSet feature_set);
static bool has(int major, int minor);
static bool hasPtx(int major, int minor);
static bool hasBin(int major, int minor);
static bool hasEqualOrLessPtx(int major, int minor);
static bool hasEqualOrGreater(int major, int minor);
static bool hasEqualOrGreaterPtx(int major, int minor);
static bool hasEqualOrGreaterBin(int major, int minor);
private:
TargetArchs();
};
// Gives information about the given GPU
//! information about the given GPU.
class CV_EXPORTS DeviceInfo
{
public:
// Creates DeviceInfo object for the current GPU
DeviceInfo() : device_id_(getDevice()) { query(); }
//! creates DeviceInfo object for the current GPU
DeviceInfo();
// Creates DeviceInfo object for the given GPU
DeviceInfo(int device_id) : device_id_(device_id) { query(); }
//! creates DeviceInfo object for the given GPU
DeviceInfo(int device_id);
String name() const { return name_; }
//! device number.
int deviceID() const;
// Return compute capability versions
int majorVersion() const { return majorVersion_; }
int minorVersion() const { return minorVersion_; }
//! ASCII string identifying device
const char* name() const;
int multiProcessorCount() const { return multi_processor_count_; }
//! global memory available on device in bytes
size_t totalGlobalMem() const;
//! shared memory available per block in bytes
size_t sharedMemPerBlock() const;
//! 32-bit registers available per block
int regsPerBlock() const;
//! warp size in threads
int warpSize() const;
//! maximum pitch in bytes allowed by memory copies
size_t memPitch() const;
//! maximum number of threads per block
int maxThreadsPerBlock() const;
//! maximum size of each dimension of a block
Vec3i maxThreadsDim() const;
//! maximum size of each dimension of a grid
Vec3i maxGridSize() const;
//! clock frequency in kilohertz
int clockRate() const;
//! constant memory available on device in bytes
size_t totalConstMem() const;
//! major compute capability
int major() const;
//! minor compute capability
int minor() const;
//! alignment requirement for textures
size_t textureAlignment() const;
//! pitch alignment requirement for texture references bound to pitched memory
size_t texturePitchAlignment() const;
//! number of multiprocessors on device
int multiProcessorCount() const;
//! specified whether there is a run time limit on kernels
bool kernelExecTimeoutEnabled() const;
//! device is integrated as opposed to discrete
bool integrated() const;
//! device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer
bool canMapHostMemory() const;
enum ComputeMode
{
ComputeModeDefault, /**< default compute mode (Multiple threads can use ::cudaSetDevice() with this device) */
ComputeModeExclusive, /**< compute-exclusive-thread mode (Only one thread in one process will be able to use ::cudaSetDevice() with this device) */
ComputeModeProhibited, /**< compute-prohibited mode (No threads can use ::cudaSetDevice() with this device) */
ComputeModeExclusiveProcess /**< compute-exclusive-process mode (Many threads in one process will be able to use ::cudaSetDevice() with this device) */
};
//! compute mode
ComputeMode computeMode() const;
//! maximum 1D texture size
int maxTexture1D() const;
//! maximum 1D mipmapped texture size
int maxTexture1DMipmap() const;
//! maximum size for 1D textures bound to linear memory
int maxTexture1DLinear() const;
//! maximum 2D texture dimensions
Vec2i maxTexture2D() const;
//! maximum 2D mipmapped texture dimensions
Vec2i maxTexture2DMipmap() const;
//! maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory
Vec3i maxTexture2DLinear() const;
//! maximum 2D texture dimensions if texture gather operations have to be performed
Vec2i maxTexture2DGather() const;
//! maximum 3D texture dimensions
Vec3i maxTexture3D() const;
//! maximum Cubemap texture dimensions
int maxTextureCubemap() const;
//! maximum 1D layered texture dimensions
Vec2i maxTexture1DLayered() const;
//! maximum 2D layered texture dimensions
Vec3i maxTexture2DLayered() const;
//! maximum Cubemap layered texture dimensions
Vec2i maxTextureCubemapLayered() const;
//! maximum 1D surface size
int maxSurface1D() const;
//! maximum 2D surface dimensions
Vec2i maxSurface2D() const;
//! maximum 3D surface dimensions
Vec3i maxSurface3D() const;
//! maximum 1D layered surface dimensions
Vec2i maxSurface1DLayered() const;
//! maximum 2D layered surface dimensions
Vec3i maxSurface2DLayered() const;
//! maximum Cubemap surface dimensions
int maxSurfaceCubemap() const;
//! maximum Cubemap layered surface dimensions
Vec2i maxSurfaceCubemapLayered() const;
//! alignment requirements for surfaces
size_t surfaceAlignment() const;
//! device can possibly execute multiple kernels concurrently
bool concurrentKernels() const;
//! device has ECC support enabled
bool ECCEnabled() const;
//! PCI bus ID of the device
int pciBusID() const;
//! PCI device ID of the device
int pciDeviceID() const;
//! PCI domain ID of the device
int pciDomainID() const;
//! true if device is a Tesla device using TCC driver, false otherwise
bool tccDriver() const;
//! number of asynchronous engines
int asyncEngineCount() const;
//! device shares a unified address space with the host
bool unifiedAddressing() const;
//! peak memory clock frequency in kilohertz
int memoryClockRate() const;
//! global memory bus width in bits
int memoryBusWidth() const;
//! size of L2 cache in bytes
int l2CacheSize() const;
//! maximum resident threads per multiprocessor
int maxThreadsPerMultiProcessor() const;
//! gets free and total device memory
void queryMemory(size_t& totalMemory, size_t& freeMemory) const;
size_t freeMemory() const;
size_t totalMemory() const;
// Checks whether device supports the given feature
//! checks whether device supports the given feature
bool supports(FeatureSet feature_set) const;
// Checks whether the GPU module can be run on the given device
//! checks whether the GPU module can be run on the given device
bool isCompatible() const;
bool canMapHostMemory() const;
size_t textureAlignment() const;
int deviceID() const { return device_id_; }
private:
void query();
int device_id_;
String name_;
int multi_processor_count_;
int majorVersion_;
int minorVersion_;
};
CV_EXPORTS void printCudaDeviceInfo(int device);
CV_EXPORTS void printShortCudaDeviceInfo(int device);
}} // namespace cv { namespace gpu {
......
......@@ -567,6 +567,62 @@ Stream::Stream(const Ptr<Impl>& impl)
{
}
//////////////////////////////// Initialization & Info ////////////////////////
inline
bool TargetArchs::has(int major, int minor)
{
return hasPtx(major, minor) || hasBin(major, minor);
}
inline
bool TargetArchs::hasEqualOrGreater(int major, int minor)
{
return hasEqualOrGreaterPtx(major, minor) || hasEqualOrGreaterBin(major, minor);
}
inline
DeviceInfo::DeviceInfo()
{
device_id_ = getDevice();
}
inline
DeviceInfo::DeviceInfo(int device_id)
{
CV_Assert( device_id >= 0 && device_id < getCudaEnabledDeviceCount() );
device_id_ = device_id;
}
inline
int DeviceInfo::deviceID() const
{
return device_id_;
}
inline
size_t DeviceInfo::freeMemory() const
{
size_t _totalMemory, _freeMemory;
queryMemory(_totalMemory, _freeMemory);
return _freeMemory;
}
inline
size_t DeviceInfo::totalMemory() const
{
size_t _totalMemory, _freeMemory;
queryMemory(_totalMemory, _freeMemory);
return _totalMemory;
}
inline
bool DeviceInfo::supports(FeatureSet feature_set) const
{
int version = major() * 10 + minor();
return version >= feature_set;
}
}} // namespace cv { namespace gpu {
//////////////////////////////// Mat ////////////////////////////////
......
......@@ -878,7 +878,7 @@ namespace
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
{
DeviceInfo devInfo;
int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
int cc = devInfo.major() * 10 + devInfo.minor();
func(src, dst, kernel.ptr<float>(), ksize, anchor, brd_type, cc, StreamAccessor::getStream(s));
}
......@@ -977,7 +977,7 @@ namespace
virtual void operator()(const GpuMat& src, GpuMat& dst, Stream& s = Stream::Null())
{
DeviceInfo devInfo;
int cc = devInfo.majorVersion() * 10 + devInfo.minorVersion();
int cc = devInfo.major() * 10 + devInfo.minor();
if (ksize > 16 && cc < 20)
CV_Error(cv::Error::StsNotImplemented, "column linear filter doesn't implemented for kernel size > 16 for device with compute capabilities less than 2.0");
......
......@@ -80,7 +80,7 @@ GPU_TEST_P(BroxOpticalFlow, Regression)
brox(loadMat(frame0), loadMat(frame1), u, v);
std::string fname(cvtest::TS::ptr()->get_data_path());
if (devInfo.majorVersion() >= 2)
if (devInfo.major() >= 2)
fname += "opticalflow/brox_optical_flow_cc20.bin";
else
fname += "opticalflow/brox_optical_flow.bin";
......
......@@ -91,7 +91,7 @@ bool cv::gpu::StereoBM_GPU::checkIfGpuCallReasonable()
DeviceInfo device_info;
if (device_info.majorVersion() > 1 || device_info.multiProcessorCount() > 16)
if (device_info.major() > 1 || device_info.multiProcessorCount() > 16)
return true;
return false;
......
......@@ -287,8 +287,8 @@ namespace perf
cv::gpu::DeviceInfo info(i);
printf("[----------]\n"), fflush(stdout);
printf("[ DEVICE ] \t# %d %s.\n", i, info.name().c_str()), fflush(stdout);
printf("[ ] \tCompute capability: %d.%d\n", (int)info.majorVersion(), (int)info.minorVersion()), fflush(stdout);
printf("[ DEVICE ] \t# %d %s.\n", i, info.name()), fflush(stdout);
printf("[ ] \tCompute capability: %d.%d\n", (int)info.major(), (int)info.minor()), fflush(stdout);
printf("[ ] \tMulti Processor Count: %d\n", info.multiProcessorCount()), fflush(stdout);
printf("[ ] \tTotal memory: %d Mb\n", static_cast<int>(static_cast<int>(info.totalMemory() / 1024.0) / 1024.0)), fflush(stdout);
printf("[ ] \tFree memory: %d Mb\n", static_cast<int>(static_cast<int>(info.freeMemory() / 1024.0) / 1024.0)), fflush(stdout);
......
......@@ -682,13 +682,13 @@ void TestBase::Init(int argc, const char* const argv[])
cv::gpu::DeviceInfo info(param_cuda_device);
if (!info.isCompatible())
{
printf("[----------]\n[ FAILURE ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name().c_str()), fflush(stdout);
printf("[----------]\n[ FAILURE ] \tDevice %s is NOT compatible with current GPU module build.\n[----------]\n", info.name()), fflush(stdout);
exit(-1);
}
cv::gpu::setDevice(param_cuda_device);
printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name().c_str()), fflush(stdout);
printf("[----------]\n[ GPU INFO ] \tRun test suite on %s GPU.\n[----------]\n", info.name()), fflush(stdout);
}
#endif
......
......@@ -82,8 +82,8 @@ int main()
if (!dev_info.isCompatible())
{
std::cout << "GPU module isn't built for GPU #" << i << " ("
<< dev_info.name() << ", CC " << dev_info.majorVersion()
<< dev_info.minorVersion() << "\n";
<< dev_info.name() << ", CC " << dev_info.major()
<< dev_info.minor() << "\n";
return -1;
}
}
......
......@@ -112,8 +112,8 @@ int main(int argc, char** argv)
if (!dev_info.isCompatible())
{
std::cout << "GPU module isn't built for GPU #" << i << " ("
<< dev_info.name() << ", CC " << dev_info.majorVersion()
<< dev_info.minorVersion() << "\n";
<< dev_info.name() << ", CC " << dev_info.major()
<< dev_info.minor() << "\n";
return -1;
}
}
......
......@@ -62,8 +62,8 @@ int main()
if (!dev_info.isCompatible())
{
std::cout << "GPU module isn't built for GPU #" << i << " ("
<< dev_info.name() << ", CC " << dev_info.majorVersion()
<< dev_info.minorVersion() << "\n";
<< dev_info.name() << ", CC " << dev_info.major()
<< dev_info.minor() << "\n";
return -1;
}
}
......
......@@ -191,7 +191,7 @@ int main(int argc, const char* argv[])
DeviceInfo dev_info(device);
if (!dev_info.isCompatible())
{
cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.majorVersion() << '.' << dev_info.minorVersion() << endl;
cerr << "GPU module isn't built for GPU #" << device << " " << dev_info.name() << ", CC " << dev_info.major() << '.' << dev_info.minor() << endl;
return -1;
}
setDevice(device);
......
......@@ -81,8 +81,8 @@ int main(int argc, char** argv)
if (!dev_info.isCompatible())
{
std::cout << "GPU module isn't built for GPU #" << i << " ("
<< dev_info.name() << ", CC " << dev_info.majorVersion()
<< dev_info.minorVersion() << "\n";
<< dev_info.name() << ", CC " << dev_info.major()
<< dev_info.minor() << "\n";
return -1;
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment