Added the capability of directly compiling cuda and ptx to CMake. (#906)

* Added the capability of directly compiling cuda and ptx to CMake. * Added comment to show example use case for cuda source file. * Minimum compute/sm is 35 in order to support PTX intrinsics * Updated CMake and added example nvcc kernel. * Made cudart link statically. * Added compute/sm capabilities for code gen in nvcc based on either cuda 8 or 9. * Header update. * Fix cmake indentation.

Added the capability of directly compiling cuda and ptx to CMake. (#906)
* Added the capability of directly compiling cuda and ptx to CMake. * Added comment to show example use case for cuda source file. * Minimum compute/sm is 35 in order to support PTX intrinsics * Updated CMake and added example nvcc kernel. * Made cudart link statically. * Added compute/sm capabilities for code gen in nvcc based on either cuda 8 or 9. * Header update. * Fix cmake indentation.
b66c9f07 · Chris Sullivan · Robert Kimball · 11421efd · b66c9f07 · b66c9f07
Commit b66c9f07 authored Oct 17, 2018 by Chris Sullivan Committed by Robert Kimball Oct 17, 2018
Showing with 94 additions and 3 deletions

CMakeLists.txt src/ngraph/runtime/gpu/CMakeLists.txt +34 -3

example.cu.cpp src/ngraph/runtime/gpu/nvcc/example.cu.cpp +32 -0

kernels.hpp src/ngraph/runtime/gpu/nvcc/kernels.hpp +28 -0

No files found.
--- a/src/ngraph/runtime/gpu/CMakeLists.txt
+++ b/src/ngraph/runtime/gpu/CMakeLists.txt
@@ -31,7 +31,7 @@ set(SRC
    gpu_external_function.cpp
    gpu_invoke.cpp
    gpu_kernel_emitters.cpp
-	gpu_memory_manager.cpp
+    gpu_memory_manager.cpp
    gpu_primitive_emitter.cpp
    gpu_runtime_context.cpp
    gpu_tensor_wrapper.cpp
@@ -44,19 +44,50 @@ set(SRC
    pass/gpu_rnn_fusion.cpp
    op/rnn.cpp
    )
+set(CUDA_SRC
+    nvcc/example.cu.cpp
+    )
 if (NGRAPH_GPU_ENABLE)
-    add_library(gpu_backend SHARED ${SRC})
+    find_package(CUDA 9 QUIET)
+        if (CUDA_FOUND)
+            set(CUDA9_FOUND TRUE)
+        else()
+            find_package(CUDA 8 REQUIRED)
+        endif()
+	set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};
+	    --compiler-options -fPIC;
+	    -arch=sm_30;
+	    -gencode=arch=compute_35,code=sm_35;
+	    -gencode=arch=compute_50,code=sm_50;
+	    -gencode=arch=compute_52,code=sm_52;
+	    -gencode=arch=compute_60,code=sm_60;
+	    -gencode=arch=compute_61,code=sm_61;
+	    -gencode=arch=compute_61,code=compute_61)
+    if (CUDA9_FOUND)
+        set(CUDA_NVCC_FLAGS ${CUDA_NVCC_FLAGS};
+            -gencode=arch=compute_62,code=sm_62;
+	    -gencode=arch=compute_70,code=sm_70;
+	    -gencode=arch=compute_70,code=compute_70)
+    endif()
+    set_source_files_properties( ${CUDA_SRC} PROPERTIES CUDA_SOURCE_PROPERTY_FORMAT OBJ)
+    cuda_compile(CUDA_OBJ ${CUDA_SRC} STATIC)
+    add_library(gpu_backend SHARED ${SRC} ${CUDA_OBJ})
    set_target_properties(gpu_backend PROPERTIES VERSION ${NGRAPH_VERSION} SOVERSION ${NGRAPH_API_VERSION})
    target_link_libraries(gpu_backend PUBLIC ngraph codegen)
    find_library(CUDA_nvrtc_LIBRARY nvrtc /usr/local/cuda/lib64)
    find_library(CUDA_cuda_LIBRARY cuda /usr/local/cuda/lib64/stubs)
-    find_package(CUDA 8 REQUIRED)
+    find_library(CUDA_cudart_LIBRARY libcudart_static.a /usr/local/cuda/lib64)
    find_package(CUDNN 7 REQUIRED)
    target_include_directories(gpu_backend SYSTEM PUBLIC ${CUDA_INCLUDE_DIRS} ${CUDNN_INCLUDE_DIR})
    target_link_libraries(gpu_backend PUBLIC
        ${CUDA_cuda_LIBRARY}
        ${CUDA_nvrtc_LIBRARY}
+        ${CUDA_cudart_LIBRARY}
        ${CUDA_LIBRARIES}
        ${CUDA_CUBLAS_LIBRARIES}
        ${CUDNN_LIBRARIES})

--- a/src/ngraph/runtime/gpu/nvcc/example.cu.cpp
+++ b/src/ngraph/runtime/gpu/nvcc/example.cu.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include <iostream>
+#include "ngraph/runtime/gpu/nvcc/kernels.hpp"
+using namespace ngraph;
+__global__ void example()
+{
+    size_t tid = blockDim.x * blockIdx.x + threadIdx.x;
+    printf("Hello from tid = %d\n", tid);
+    __syncthreads();
+}
+void runtime::gpu::example_kernel()
+{
+    example<<<1, 32>>>();
+    return;
+}
--- a/src/ngraph/runtime/gpu/nvcc/kernels.hpp
+++ b/src/ngraph/runtime/gpu/nvcc/kernels.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace gpu
+        {
+            void example_kernel();
+        }
+    }
+}