Commit 87793068 authored by Alexey Spizhevoy's avatar Alexey Spizhevoy

updated main CMakeLists.txt gpu module section, now user can manage binary and…

updated main CMakeLists.txt gpu module section, now user can manage binary and intermediate code versions of the gpu module image
added more functions to check version of gpu code in runtime
parent 1e1a1392
...@@ -700,48 +700,55 @@ endif() ...@@ -700,48 +700,55 @@ endif()
############################### CUDA ################################ ############################### CUDA ################################
if (WITH_CUDA) if(WITH_CUDA)
find_package(CUDA 3.2) find_package(CUDA 3.2)
if (CUDA_FOUND)
if(CUDA_FOUND)
set(HAVE_CUDA 1) set(HAVE_CUDA 1)
message(STATUS "CUDA detected: " ${CUDA_VERSION}) message(STATUS "CUDA detected: " ${CUDA_VERSION})
set(CUDA_COMPUTE_CAPABILITIES " 1.1 1.2 1.3 2.0 " CACHE STRING "Add or remove compute capability") set(CUDA_ARCH_GPU "1.1 1.2 1.3 2.0" CACHE STRING "Specify 'real' GPU architectures to build binaries for")
set(CUDA_NVCC_FLAGS_ARCH ${CUDA_COMPUTE_CAPABILITIES}) set(CUDA_ARCH_PTX "1.1 1.3 2.0" CACHE STRING "Specify 'virtual' PTX architectures to build PTX intermediate code for")
string(REGEX MATCH "1\\.0" STR_OPENCV_GPU_CUDA_ARCH_10 ${CUDA_COMPUTE_CAPABILITIES}) # Architectures to be searched for in user's input
string(REGEX MATCH "1\\.1" STR_OPENCV_GPU_CUDA_ARCH_11 ${CUDA_COMPUTE_CAPABILITIES}) set (CUDA_ARCH_ALL 1.0 1.1 1.2 1.3 2.0 2.1)
string(REGEX MATCH "1\\.2" STR_OPENCV_GPU_CUDA_ARCH_12 ${CUDA_COMPUTE_CAPABILITIES})
string(REGEX MATCH "1\\.3" STR_OPENCV_GPU_CUDA_ARCH_13 ${CUDA_COMPUTE_CAPABILITIES}) # Parse user's input
string(REGEX MATCH "2\\.0" STR_OPENCV_GPU_CUDA_ARCH_20 ${CUDA_COMPUTE_CAPABILITIES}) foreach(ARCH IN LISTS CUDA_ARCH_ALL)
string(REGEX MATCH "2\\.1" STR_OPENCV_GPU_CUDA_ARCH_21 ${CUDA_COMPUTE_CAPABILITIES}) string(REGEX MATCH ${ARCH} ARCH_GPU_MATCH "${CUDA_ARCH_GPU}")
string(REGEX MATCH ${ARCH} ARCH_PTX_MATCH "${CUDA_ARCH_PTX}")
string(COMPARE EQUAL "1.0" "${STR_OPENCV_GPU_CUDA_ARCH_10}" OPENCV_GPU_CUDA_ARCH_10) string(REGEX REPLACE "\\." "" ARCH_GPU_AS_NUM "${ARCH_GPU_MATCH}")
string(COMPARE EQUAL "1.1" "${STR_OPENCV_GPU_CUDA_ARCH_11}" OPENCV_GPU_CUDA_ARCH_11) string(REGEX REPLACE "\\." "" ARCH_PTX_AS_NUM "${ARCH_PTX_MATCH}")
string(COMPARE EQUAL "1.2" "${STR_OPENCV_GPU_CUDA_ARCH_12}" OPENCV_GPU_CUDA_ARCH_12)
string(COMPARE EQUAL "1.3" "${STR_OPENCV_GPU_CUDA_ARCH_13}" OPENCV_GPU_CUDA_ARCH_13) # Define variables indicating the architectures specified by user
string(COMPARE EQUAL "2.0" "${STR_OPENCV_GPU_CUDA_ARCH_20}" OPENCV_GPU_CUDA_ARCH_20) if(NOT ${ARCH_GPU_AS_NUM} STREQUAL "")
string(COMPARE EQUAL "2.1" "${STR_OPENCV_GPU_CUDA_ARCH_21}" OPENCV_GPU_CUDA_ARCH_21) set(OPENCV_ARCH_GPU_${ARCH_GPU_AS_NUM} 1)
endif()
set(CUDA_NVCC_FLAGS_NUM "") if(NOT ${ARCH_PTX_AS_NUM} STREQUAL "")
set(OPENCV_ARCH_PTX_${ARCH_PTX_AS_NUM} 1)
while(NOT ${CUDA_NVCC_FLAGS_ARCH} STREQUAL "") endif()
string(REGEX MATCH "[0-9]+.[0-9]+" RESULT_NUM ${CUDA_NVCC_FLAGS_ARCH}) endforeach()
string(REGEX MATCHALL "[0-9]" RESULT_STR ${RESULT_NUM})
string(REGEX REPLACE ";" "\ " RESULT ${RESULT_STR}) set(NVCC_FLAGS_EXTRA "")
list(APPEND CUDA_NVCC_FLAGS_NUM ${RESULT})
string(REGEX REPLACE "${RESULT_NUM}" "\ " CUDA_NVCC_FLAGS_ARCH_STR ${CUDA_NVCC_FLAGS_ARCH}) # Tell nvcc to add binaries for the specified GPUs
string(STRIP ${CUDA_NVCC_FLAGS_ARCH_STR} CUDA_NVCC_FLAGS_ARCH) string(REGEX REPLACE "\\." "" CUDA_ARCH_GPU "${CUDA_ARCH_GPU}")
endwhile() string(REGEX MATCHALL "[0-9]+" CUDA_ARCH_GPU_LIST "${CUDA_ARCH_GPU}")
foreach(ARCH_GPU IN LISTS CUDA_ARCH_GPU_LIST)
set (OpenCV_CUDA_CC "") set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH_GPU},code=sm_${ARCH_GPU})
set (loop_var "")
foreach( loop_var IN LISTS CUDA_NVCC_FLAGS_NUM)
set (CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} -gencode arch=compute_${loop_var},code=sm_${loop_var})
set (OpenCV_CUDA_CC ${OpenCV_CUDA_CC} -gencode arch=compute_${loop_var},code=sm_${loop_var})
endforeach() endforeach()
### set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${OpenCV_COMPUTE_CAPABILITIES}) # Tell nvcc to add PTX intermediate code for the specified architectures
string(REGEX REPLACE "\\." "" CUDA_ARCH_PTX "${CUDA_ARCH_PTX}")
string(REGEX MATCHALL "[0-9]+" CUDA_ARCH_PTX_LIST "${CUDA_ARCH_PTX}")
foreach(ARCH_PTX IN LISTS CUDA_ARCH_PTX_LIST)
set(NVCC_FLAGS_EXTRA ${NVCC_FLAGS_EXTRA} -gencode arch=compute_${ARCH_PTX},code=compute_${ARCH_PTX})
endforeach()
set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS} ${NVCC_FLAGS_EXTRA})
message(STATUS "CUDA NVCC flags: ${CUDA_NVCC_FLAGS}")
set(OpenCV_CUDA_CC "${NVCC_FLAGS_EXTRA}")
endif() endif()
endif() endif()
......
...@@ -163,23 +163,21 @@ ...@@ -163,23 +163,21 @@
/* NVidia Cuda Runtime API*/ /* NVidia Cuda Runtime API*/
#cmakedefine HAVE_CUDA #cmakedefine HAVE_CUDA
/* The project was generated with 1.0 NVIDIA device arch support */ /* Compile for 'real' NVIDIA GPU architecture */
#cmakedefine OPENCV_GPU_CUDA_ARCH_10 #cmakedefine OPENCV_ARCH_GPU_10
#cmakedefine OPENCV_ARCH_GPU_11
/* The project was generated with 1.1 NVIDIA device arch support */ #cmakedefine OPENCV_ARCH_GPU_12
#cmakedefine OPENCV_GPU_CUDA_ARCH_11 #cmakedefine OPENCV_ARCH_GPU_13
#cmakedefine OPENCV_ARCH_GPU_20
/* The project was generated with 1.2 NVIDIA device arch support */ #cmakedefine OPENCV_ARCH_GPU_21
#cmakedefine OPENCV_GPU_CUDA_ARCH_12
/* Compile for 'virtual' NVIDIA PTX architecture */
/* The project was generated with 1.3 NVIDIA device arch support */ #cmakedefine OPENCV_ARCH_PTX_10
#cmakedefine OPENCV_GPU_CUDA_ARCH_13 #cmakedefine OPENCV_ARCH_PTX_11
#cmakedefine OPENCV_ARCH_PTX_12
/* The project was generated with 2.0 NVIDIA device arch support */ #cmakedefine OPENCV_ARCH_PTX_13
#cmakedefine OPENCV_GPU_CUDA_ARCH_20 #cmakedefine OPENCV_ARCH_PTX_20
#cmakedefine OPENCV_ARCH_PTX_21
/* The project was generated with 2.1 NVIDIA device arch support */
#cmakedefine OPENCV_GPU_CUDA_ARCH_21
/* VideoInput library */ /* VideoInput library */
#cmakedefine HAVE_VIDEOINPUT #cmakedefine HAVE_VIDEOINPUT
......
...@@ -69,30 +69,70 @@ Returns true, if the specified GPU has atomics support, otherwise false. ...@@ -69,30 +69,70 @@ Returns true, if the specified GPU has atomics support, otherwise false.
\end{description} \end{description}
\cvCppFunc{gpu::ptxVersionIs} \cvCppFunc{gpu::hasPtxVersion}
Returns true, if the GPU module was built with PTX support of the given compute capability, otherwise false. Returns true, if the GPU module has PTX code for the given architecture, otherwise false.
\cvdefCpp{bool ptxVersionIs(int major, int minor);} \cvdefCpp{bool hasPtxVersion(int major, int minor);}
\begin{description} \begin{description}
\cvarg{major}{Major compute capability version.} \cvarg{major}{Major compute capability version.}
\cvarg{minor}{Minor compute capability version.} \cvarg{minor}{Minor compute capability version.}
\end{description} \end{description}
\cvCppFunc{gpu::ptxVersionIsLessOrEqual} \cvCppFunc{gpu::hasLessOrEqualPtxVersion}
Returns true, if the GPU module was built with PTX support of the given compute capability or less, otherwise false. Returns true, if the GPU module has PTX code for the given architecture or older one, otherwise false.
\cvdefCpp{bool ptxVersionIsLessOrEqual(int major, int minor);} \cvdefCpp{bool hasLessOrEqualPtxVersion(int major, int minor);}
\begin{description} \begin{description}
\cvarg{major}{Major compute capability version.} \cvarg{major}{Major compute capability version.}
\cvarg{minor}{Minor compute capability version.} \cvarg{minor}{Minor compute capability version.}
\end{description} \end{description}
\cvCppFunc{gpu::ptxVersionIsGreaterOrEqual} \cvCppFunc{gpu::hasGreaterOrEqualPtxVersion}
Returns true, if the GPU module was built with PTX support of the given compute capability or greater, otherwise false. Returns true, if the GPU module has PTX code for the given architecture or newer one, otherwise false.
\cvdefCpp{bool ptxVersionIsGreaterOrEqual(int major, int minor);} \cvdefCpp{bool hasGreaterOrEqualPtxVersion(int major, int minor);}
\begin{description}
\cvarg{major}{Major compute capability version.}
\cvarg{minor}{Minor compute capability version.}
\end{description}
\cvCppFunc{gpu::hasCubinVersion}
Returns true, if the GPU module has CUBIN code for the given architecture, otherwise false.
\cvdefCpp{bool hasCubinVersion(int major, int minor);}
\begin{description}
\cvarg{major}{Major compute capability version.}
\cvarg{minor}{Minor compute capability version.}
\end{description}
\cvCppFunc{gpu::hasGreaterOrEqualCubinVersion}
Returns true, if the GPU module has CUBIN code for the given architecture or newer one, otherwise false.
\cvdefCpp{bool hasGreaterOrEqualCubinVersion(int major, int minor);}
\begin{description}
\cvarg{major}{Major compute capability version.}
\cvarg{minor}{Minor compute capability version.}
\end{description}
\cvCppFunc{gpu::hasVersion}
Returns true, if the GPU module has PTX or CUBIN code for the given architecture, otherwise false.
\cvdefCpp{bool hasVersion(int major, int minor);}
\begin{description}
\cvarg{major}{Major compute capability version.}
\cvarg{minor}{Minor compute capability version.}
\end{description}
\cvCppFunc{gpu::hasGreaterOrEqualVersion}
Returns true, if the GPU module has PTX or CUBIN code for the given architecture or newer one, otherwise false.
\cvdefCpp{bool hasGreaterOrEqualVersion(int major, int minor);}
\begin{description} \begin{description}
\cvarg{major}{Major compute capability version.} \cvarg{major}{Major compute capability version.}
\cvarg{minor}{Minor compute capability version.} \cvarg{minor}{Minor compute capability version.}
...@@ -100,7 +140,7 @@ Returns true, if the GPU module was built with PTX support of the given compute ...@@ -100,7 +140,7 @@ Returns true, if the GPU module was built with PTX support of the given compute
\cvCppFunc{gpu::isCompatibleWith} \cvCppFunc{gpu::isCompatibleWith}
Returns true, if the GPU module is PTX compatible with the given NVIDIA GPU device, otherwise false. Returns true, if the GPU module is PTX or CUBIN compatible with the given GPU device, otherwise false.
\cvdefCpp{bool isCompatibleWith(int device);} \cvdefCpp{bool isCompatibleWith(int device);}
\begin{description} \begin{description}
......
...@@ -72,11 +72,16 @@ namespace cv ...@@ -72,11 +72,16 @@ namespace cv
CV_EXPORTS bool hasNativeDoubleSupport(int device); CV_EXPORTS bool hasNativeDoubleSupport(int device);
CV_EXPORTS bool hasAtomicsSupport(int device); CV_EXPORTS bool hasAtomicsSupport(int device);
CV_EXPORTS bool ptxVersionIs(int major, int minor); CV_EXPORTS bool hasPtxVersion(int major, int minor);
CV_EXPORTS bool ptxVersionIsLessOrEqual(int major, int minor); CV_EXPORTS bool hasLessOrEqualPtxVersion(int major, int minor);
CV_EXPORTS bool ptxVersionIsGreaterOrEqual(int major, int minor); CV_EXPORTS bool hasGreaterOrEqualPtxVersion(int major, int minor);
CV_EXPORTS bool hasCubinVersion(int major, int minor);
CV_EXPORTS bool hasGreaterOrEqualCubinVersion(int major, int minor);
CV_EXPORTS bool hasVersion(int major, int minor);
CV_EXPORTS bool hasGreaterOrEqualVersion(int major, int minor);
//! Checks if the GPU module is PTX compatible with the given NVIDIA device
CV_EXPORTS bool isCompatibleWith(int device); CV_EXPORTS bool isCompatibleWith(int device);
//////////////////////////////// Error handling //////////////////////// //////////////////////////////// Error handling ////////////////////////
......
...@@ -57,9 +57,13 @@ CV_EXPORTS int cv::gpu::getNumberOfSMs(int /*device*/) { throw_nogpu(); return 0 ...@@ -57,9 +57,13 @@ CV_EXPORTS int cv::gpu::getNumberOfSMs(int /*device*/) { throw_nogpu(); return 0
CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t& /*free*/, size_t& /*total*/) { throw_nogpu(); } CV_EXPORTS void cv::gpu::getGpuMemInfo(size_t& /*free*/, size_t& /*total*/) { throw_nogpu(); }
CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int /*device*/) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasNativeDoubleSupport(int /*device*/) { throw_nogpu(); return false; }
CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int /*device*/) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasAtomicsSupport(int /*device*/) { throw_nogpu(); return false; }
CV_EXPORTS bool cv::gpu::ptxVersionIs(int major, int minor) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor) { throw_nogpu(); return false; }
CV_EXPORTS bool cv::gpu::ptxVersionIsLessOrEqual(int major, int minor) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor) { throw_nogpu(); return false; }
CV_EXPORTS bool cv::gpu::ptxVersionIsGreaterOrEqual(int major, int minor) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor) { throw_nogpu(); return false; }
CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor) { throw_nogpu(); return false; }
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor) { throw_nogpu(); return false; }
CV_EXPORTS bool cv::gpu::hasVersion(int major, int minor) { throw_nogpu(); return false; }
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualVersion(int major, int minor) { throw_nogpu(); return false; }
CV_EXPORTS bool cv::gpu::isCompatibleWith(int device) { throw_nogpu(); return false; } CV_EXPORTS bool cv::gpu::isCompatibleWith(int device) { throw_nogpu(); return false; }
...@@ -140,37 +144,63 @@ namespace ...@@ -140,37 +144,63 @@ namespace
template <typename Comparer> template <typename Comparer>
bool checkPtxVersion(int major, int minor, Comparer cmp) bool checkPtxVersion(int major, int minor, Comparer cmp)
{ {
#ifdef OPENCV_GPU_CUDA_ARCH_10 #ifdef OPENCV_ARCH_PTX_10
if (cmp(1, 0, major, minor)) return true; if (cmp(1, 0, major, minor)) return true;
#endif #endif
#ifdef OPENCV_GPU_CUDA_ARCH_11 #ifdef OPENCV_ARCH_PTX_11
if (cmp(1, 1, major, minor)) return true; if (cmp(1, 1, major, minor)) return true;
#endif #endif
#ifdef OPENCV_GPU_CUDA_ARCH_12 #ifdef OPENCV_ARCH_PTX_12
if (cmp(1, 2, major, minor)) return true; if (cmp(1, 2, major, minor)) return true;
#endif #endif
#ifdef OPENCV_GPU_CUDA_ARCH_13 #ifdef OPENCV_ARCH_PTX_13
if (cmp(1, 3, major, minor)) return true; if (cmp(1, 3, major, minor)) return true;
#endif #endif
#ifdef OPENCV_GPU_CUDA_ARCH_20 #ifdef OPENCV_ARCH_PTX_20
if (cmp(2, 0, major, minor)) return true; if (cmp(2, 0, major, minor)) return true;
#endif #endif
#ifdef OPENCV_GPU_CUDA_ARCH_21 #ifdef OPENCV_ARCH_PTX_21
if (cmp(2, 1, major, minor)) return true; if (cmp(2, 1, major, minor)) return true;
#endif #endif
return false; return false;
} }
}
template <typename Comparer>
bool checkCubinVersion(int major, int minor, Comparer cmp)
{
#ifdef OPENCV_ARCH_GPU_10
if (cmp(1, 0, major, minor)) return true;
#endif
#ifdef OPENCV_ARCH_GPU_11
if (cmp(1, 1, major, minor)) return true;
#endif
#ifdef OPENCV_ARCH_GPU_12
if (cmp(1, 2, major, minor)) return true;
#endif
#ifdef OPENCV_ARCH_GPU_13
if (cmp(1, 3, major, minor)) return true;
#endif
#ifdef OPENCV_ARCH_GPU_20
if (cmp(2, 0, major, minor)) return true;
#endif
#ifdef OPENCV_ARCH_GPU_21
if (cmp(2, 1, major, minor)) return true;
#endif
return false;
}
CV_EXPORTS bool cv::gpu::ptxVersionIs(int major, int minor)
{
struct ComparerEqual struct ComparerEqual
{ {
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
...@@ -178,12 +208,7 @@ CV_EXPORTS bool cv::gpu::ptxVersionIs(int major, int minor) ...@@ -178,12 +208,7 @@ CV_EXPORTS bool cv::gpu::ptxVersionIs(int major, int minor)
return lhs1 == rhs1 && lhs2 == rhs2; return lhs1 == rhs1 && lhs2 == rhs2;
} }
}; };
return checkPtxVersion(major, minor, ComparerEqual());
}
CV_EXPORTS bool cv::gpu::ptxVersionIsLessOrEqual(int major, int minor)
{
struct ComparerLessOrEqual struct ComparerLessOrEqual
{ {
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
...@@ -191,12 +216,7 @@ CV_EXPORTS bool cv::gpu::ptxVersionIsLessOrEqual(int major, int minor) ...@@ -191,12 +216,7 @@ CV_EXPORTS bool cv::gpu::ptxVersionIsLessOrEqual(int major, int minor)
return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2); return lhs1 < rhs1 || (lhs1 == rhs1 && lhs2 <= rhs2);
} }
}; };
return checkPtxVersion(major, minor, ComparerLessOrEqual());
}
CV_EXPORTS bool cv::gpu::ptxVersionIsGreaterOrEqual(int major, int minor)
{
struct ComparerGreaterOrEqual struct ComparerGreaterOrEqual
{ {
bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const bool operator()(int lhs1, int lhs2, int rhs1, int rhs2) const
...@@ -204,10 +224,52 @@ CV_EXPORTS bool cv::gpu::ptxVersionIsGreaterOrEqual(int major, int minor) ...@@ -204,10 +224,52 @@ CV_EXPORTS bool cv::gpu::ptxVersionIsGreaterOrEqual(int major, int minor)
return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2); return lhs1 > rhs1 || (lhs1 == rhs1 && lhs2 >= rhs2);
} }
}; };
}
CV_EXPORTS bool cv::gpu::hasPtxVersion(int major, int minor)
{
return checkPtxVersion(major, minor, ComparerEqual());
}
CV_EXPORTS bool cv::gpu::hasLessOrEqualPtxVersion(int major, int minor)
{
return checkPtxVersion(major, minor, ComparerLessOrEqual());
}
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualPtxVersion(int major, int minor)
{
return checkPtxVersion(major, minor, ComparerGreaterOrEqual()); return checkPtxVersion(major, minor, ComparerGreaterOrEqual());
} }
CV_EXPORTS bool cv::gpu::hasCubinVersion(int major, int minor)
{
return checkCubinVersion(major, minor, ComparerEqual());
}
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualCubinVersion(int major, int minor)
{
return checkCubinVersion(major, minor, ComparerGreaterOrEqual());
}
CV_EXPORTS bool cv::gpu::hasVersion(int major, int minor)
{
return hasPtxVersion(major, minor) || hasCubinVersion(major, minor);
}
CV_EXPORTS bool cv::gpu::hasGreaterOrEqualVersion(int major, int minor)
{
return hasGreaterOrEqualPtxVersion(major, minor) ||
hasGreaterOrEqualCubinVersion(major, minor);
}
CV_EXPORTS bool cv::gpu::isCompatibleWith(int device) CV_EXPORTS bool cv::gpu::isCompatibleWith(int device)
{ {
// According to the CUDA C Programming Guide Version 3.2: "PTX code // According to the CUDA C Programming Guide Version 3.2: "PTX code
...@@ -217,7 +279,16 @@ CV_EXPORTS bool cv::gpu::isCompatibleWith(int device) ...@@ -217,7 +279,16 @@ CV_EXPORTS bool cv::gpu::isCompatibleWith(int device)
int major, minor; int major, minor;
getComputeCapability(device, major, minor); getComputeCapability(device, major, minor);
return ptxVersionIsLessOrEqual(major, minor); // Check PTX compatibility
if (hasLessOrEqualPtxVersion(major, minor))
return true;
// Check CUBIN compatibilty
for (int i = 0; i <= minor; ++i)
if (hasCubinVersion(major, i))
return true;
return false;
} }
#endif #endif
......
...@@ -166,7 +166,7 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf) ...@@ -166,7 +166,7 @@ Scalar cv::gpu::sum(const GpuMat& src, GpuMat& buf)
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
Caller* callers = multipass_callers; Caller* callers = multipass_callers;
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice())) if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.depth()]; Caller caller = callers[src.depth()];
...@@ -202,7 +202,7 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf) ...@@ -202,7 +202,7 @@ Scalar cv::gpu::sqrSum(const GpuMat& src, GpuMat& buf)
sqrSumCaller<int>, sqrSumCaller<float>, 0 }; sqrSumCaller<int>, sqrSumCaller<float>, 0 };
Caller* callers = multipass_callers; Caller* callers = multipass_callers;
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice())) if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
callers = singlepass_callers; callers = singlepass_callers;
Size buf_size; Size buf_size;
...@@ -289,7 +289,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ...@@ -289,7 +289,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
if (mask.empty()) if (mask.empty())
{ {
Caller* callers = multipass_callers; Caller* callers = multipass_callers;
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice())) if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.type()]; Caller caller = callers[src.type()];
...@@ -299,7 +299,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp ...@@ -299,7 +299,7 @@ void cv::gpu::minMax(const GpuMat& src, double* minVal, double* maxVal, const Gp
else else
{ {
MaskedCaller* callers = masked_multipass_callers; MaskedCaller* callers = masked_multipass_callers;
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice())) if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
callers = masked_singlepass_callers; callers = masked_singlepass_callers;
MaskedCaller caller = callers[src.type()]; MaskedCaller caller = callers[src.type()];
...@@ -389,7 +389,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ...@@ -389,7 +389,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
if (mask.empty()) if (mask.empty())
{ {
Caller* callers = multipass_callers; Caller* callers = multipass_callers;
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice())) if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.type()]; Caller caller = callers[src.type()];
...@@ -399,7 +399,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point ...@@ -399,7 +399,7 @@ void cv::gpu::minMaxLoc(const GpuMat& src, double* minVal, double* maxVal, Point
else else
{ {
MaskedCaller* callers = masked_multipass_callers; MaskedCaller* callers = masked_multipass_callers;
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice())) if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
callers = masked_singlepass_callers; callers = masked_singlepass_callers;
MaskedCaller caller = callers[src.type()]; MaskedCaller caller = callers[src.type()];
...@@ -459,7 +459,7 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf) ...@@ -459,7 +459,7 @@ int cv::gpu::countNonZero(const GpuMat& src, GpuMat& buf)
ensureSizeIsEnough(buf_size, CV_8U, buf); ensureSizeIsEnough(buf_size, CV_8U, buf);
Caller* callers = multipass_callers; Caller* callers = multipass_callers;
if (ptxVersionIsGreaterOrEqual(1, 1) && hasAtomicsSupport(getDevice())) if (hasGreaterOrEqualVersion(1, 1) && hasAtomicsSupport(getDevice()))
callers = singlepass_callers; callers = singlepass_callers;
Caller caller = callers[src.type()]; Caller caller = callers[src.type()];
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment