Commit 4e59d6fd authored by Alexander Alekhin's avatar Alexander Alekhin

Merge pull request #12224 from cv3d:improvments/CUDA_detection

parents 94758c18 e136c11c
......@@ -70,6 +70,12 @@ if(CUDA_FOUND)
unset(CUDA_ARCH_PTX CACHE)
endif()
SET(DETECT_ARCHS_COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run")
if(WIN32 AND CMAKE_LINKER) #Workaround for VS cl.exe not being in the env. path
get_filename_component(host_compiler_bindir ${CMAKE_LINKER} DIRECTORY)
SET(DETECT_ARCHS_COMMAND ${DETECT_ARCHS_COMMAND} "-ccbin" "${host_compiler_bindir}")
endif()
set(__cuda_arch_ptx "")
if(CUDA_GENERATION STREQUAL "Fermi")
set(__cuda_arch_bin "2.0")
......@@ -82,10 +88,11 @@ if(CUDA_FOUND)
elseif(CUDA_GENERATION STREQUAL "Volta")
set(__cuda_arch_bin "7.0")
elseif(CUDA_GENERATION STREQUAL "Auto")
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
execute_process( COMMAND ${DETECT_ARCHS_COMMAND}
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REGEX REPLACE ".*\n" "" _nvcc_out "${_nvcc_out}") #Strip leading warning messages, if any
if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
else()
......@@ -99,10 +106,11 @@ if(CUDA_FOUND)
set(__cuda_arch_bin "3.2")
set(__cuda_arch_ptx "")
elseif(AARCH64)
execute_process( COMMAND "${CUDA_NVCC_EXECUTABLE}" ${CUDA_NVCC_FLAGS} "${OpenCV_SOURCE_DIR}/cmake/checks/OpenCVDetectCudaArch.cu" "--run"
execute_process( COMMAND ${DETECT_ARCHS_COMMAND}
WORKING_DIRECTORY "${CMAKE_BINARY_DIR}${CMAKE_FILES_DIRECTORY}/CMakeTmp/"
RESULT_VARIABLE _nvcc_res OUTPUT_VARIABLE _nvcc_out
ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
string(REGEX REPLACE ".*\n" "" _nvcc_out "${_nvcc_out}") #Strip leading warning messages, if any
if(NOT _nvcc_res EQUAL 0)
message(STATUS "Automatic detection of CUDA generation failed. Going to build for all known architectures.")
set(__cuda_arch_bin "5.3 6.2 7.0")
......
#include <stdio.h>
#include <iostream>
#include <sstream>
#include <list>
int main()
{
std::ostringstream arch;
std::list<std::string> archs;
int count = 0;
if (cudaSuccess != cudaGetDeviceCount(&count)){return -1;}
if (count == 0) {return -1;}
if (cudaSuccess != cudaGetDeviceCount(&count)){ return -1; }
if (count == 0) { return -1; }
for (int device = 0; device < count; ++device)
{
cudaDeviceProp prop;
if (cudaSuccess != cudaGetDeviceProperties(&prop, device)){ continue;}
printf("%d.%d ", prop.major, prop.minor);
if (cudaSuccess != cudaGetDeviceProperties(&prop, device)){ continue; }
arch << prop.major << "." << prop.minor;
archs.push_back(arch.str());
arch.str("");
}
archs.unique(); #Some devices might have the same arch
for (std::list<std::string>::iterator it=archs.begin(); it!=archs.end(); ++it)
std::cout << *it << " ";
return 0;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment