fix bugs for add and maximum

1c74ad24 · fenglei.tian · 94429375 · 1c74ad24 · 1c74ad24 · 1c74ad24
Commit 1c74ad24 authored Feb 07, 2018 by fenglei.tian
5 changed files
--- a/scripts/run.sh
+++ b/scripts/run.sh
-./build/test/unit-test --gtest_filter=GPU.ab
+#./build/test/unit-test --gtest_filter=GPU.ab
+./build/test/unit-test --gtest_filter=GPU.maximum
+#./build/test/unit-test --gtest_filter=GPU.abs
 #./build/test/unit-test --gtest_filter=GPU.dot*
--- a/src/ngraph/runtime/gpu/gpu_call_frame.cpp
+++ b/src/ngraph/runtime/gpu/gpu_call_frame.cpp
@@ -16,12 +16,9 @@
 #include <fstream>
 #include <stdio.h>

-#include <cuda_runtime.h>
-#include "cublas.h"
-
 #include "ngraph/runtime/gpu/gpu_call_frame.hpp"
-#include "ngraph/runtime/gpu/gpu_external_function.hpp"
 #include "ngraph/runtime/gpu/gpu_tensor_view.hpp"
+#include "ngraph/runtime/gpu/gpu_external_function.hpp"

 using namespace std;
 using namespace ngraph;
@@ -31,19 +28,25 @@ runtime::gpu::GPU_CallFrame::GPU_CallFrame(std::shared_ptr<GPU_ExternalFunction>
    : m_external_function(external_function)
    , m_compiled_function(compiled_function)
 {
-    cublasStatus_t stat = cublasCreate(&m_cublas_handle);
-    if (stat != CUBLAS_STATUS_SUCCESS)
+    cublasStatus_t cublasStatus  = cublasCreate(&m_cublas_handle);
+    if (cublasStatus != CUBLAS_STATUS_SUCCESS)
    {
-        throw runtime_error("cuBLAS create failed");
+        throw runtime_error("cuBLAS create handle failed");
    }
+    cudnnStatus_t cudnnStatus = cudnnCreate(&m_cudnn_handle);
+    if (cudnnStatus != CUDNN_STATUS_SUCCESS) 
+    {
+        throw runtime_error("cuDnn create handle failed");
+    }         
+
+    // Pass scalars as reference on the Host
    cublasSetPointerMode(m_cublas_handle, CUBLAS_POINTER_MODE_HOST);
-    // Pass scalars as reference on the device
-    cublasSetPointerMode(m_cublas_handle, CUBLAS_POINTER_MODE_DEVICE);
 }

 runtime::gpu::GPU_CallFrame::~GPU_CallFrame()
 {
    cublasDestroy(m_cublas_handle);
+    cudnnDestroy(m_cudnn_handle);
 }

 void runtime::gpu::GPU_CallFrame::tensor_call(
@@ -67,7 +70,7 @@ void runtime::gpu::GPU_CallFrame::tensor_call(
        outputs.push_back(tv->m_allocated_buffer_pool);
    }

-    m_compiled_function(inputs.data(), outputs.data(), m_cublas_handle);
+    m_compiled_function(inputs.data(), outputs.data(), m_cublas_handle, m_cudnn_handle);
 }

 void runtime::gpu::GPU_CallFrame::call(

--- a/src/ngraph/runtime/gpu/gpu_call_frame.hpp
+++ b/src/ngraph/runtime/gpu/gpu_call_frame.hpp
@@ -18,7 +18,9 @@
 #include <memory>
 #include <vector>

+#include <cuda_runtime.h>
 #include "cublas_v2.h"
+#include <cudnn.h>

 #include "ngraph/function.hpp"
 #include "ngraph/runtime/call_frame.hpp"
@@ -37,7 +39,8 @@ namespace ngraph

            using EntryPoint_t = void(void** inputs,
                                      void** outputs,
-                                      cublasHandle_t& cublas_handle);
+                                      cublasHandle_t& cublas_handle,
+                                      cudnnHandle_t& cudnn_handle);

            using EntryPoint = std::function<EntryPoint_t>;

@@ -66,6 +69,7 @@ namespace ngraph
                std::shared_ptr<GPU_ExternalFunction> m_external_function;
                EntryPoint m_compiled_function;
                cublasHandle_t m_cublas_handle;
+                cudnnHandle_t m_cudnn_handle;
            };
        }
    }

--- a/src/ngraph/runtime/gpu/gpu_emitter.cpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.cpp
@@ -260,20 +260,14 @@ void runtime::gpu::GPU_Emitter::EmitMaximum(codegen::CodeWriter& writer,
                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& args,
                                            const vector<runtime::gpu::GPU_TensorViewWrapper>& out)
 {
-    const Shape& arg0_shape = args[0].get_shape();
-    const Shape& arg1_shape = args[1].get_shape();
-    // clang-format off
    writer << "{   // " << n->get_name() << "\n";
    writer.indent++;
    writer << "int count = " << out[0].get_size() << ";\n";
-      writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_HOST);\n";;
    writer +=  R"(
-      float alpha1 = 1.0, alpha2 = 1.0, beta = 0;
-              cudnnHandle_t cudnnHandle;
-              (cudnnCreate(&cudnnHandle));
-              cudnnTensorDescriptor_t descriptor;
-              (cudnnCreateTensorDescriptor(&descriptor));
-              (cudnnSetTensor4dDescriptor(descriptor,
+float alpha1 = 1.0, alpha2 = 1.0, beta = 0;
+cudnnTensorDescriptor_t descriptor;
+(cudnnCreateTensorDescriptor(&descriptor));
+(cudnnSetTensor4dDescriptor(descriptor,
                            /*format=*/CUDNN_TENSOR_NHWC,
                            /*dataType=*/CUDNN_DATA_FLOAT,
                            /*batch_size=*/1,
@@ -281,15 +275,15 @@ void runtime::gpu::GPU_Emitter::EmitMaximum(codegen::CodeWriter& writer,
                            /*image_height=*/1,
                            /*image_width=*/count));

-              cudnnOpTensorDescriptor_t opTensorDesc;
-              (cudnnCreateOpTensorDescriptor(&opTensorDesc));
-              (cudnnSetOpTensorDescriptor(opTensorDesc,
+cudnnOpTensorDescriptor_t opTensorDesc;
+(cudnnCreateOpTensorDescriptor(&opTensorDesc));
+(cudnnSetOpTensorDescriptor(opTensorDesc,
                            CUDNN_OP_TENSOR_MAX,
                            CUDNN_DATA_FLOAT,
                            CUDNN_NOT_PROPAGATE_NAN));
    )";

-      writer  << "cudnnOpTensor(cudnnHandle,"
+    writer  << "cudnnOpTensor(cudnn_handle,"
            << "opTensorDesc,"
            << "&alpha1,"
            << "descriptor,"
@@ -302,11 +296,9 @@ void runtime::gpu::GPU_Emitter::EmitMaximum(codegen::CodeWriter& writer,
            << out[0].get_name() << ");\n";

      writer +=  R"(
-                cudnnDestroy(cudnnHandle);
    )";
    writer.indent--;
    writer << "}\n";
-    // clang-format on
 }

 void runtime::gpu::GPU_Emitter::EmitMinimum(codegen::CodeWriter& writer,

--- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
+++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp
@@ -358,7 +358,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
    for (shared_ptr<Function> f : pass_manager.get_state().get_functions())
    {
        writer << "extern \"C\" void " << f->get_name()
-               << "(void** inputs, void** outputs, cublasHandle_t& cublas_handle);\n";
+               << "(void** inputs, void** outputs, cublasHandle_t& cublas_handle, cudnnHandle_t& cudnn_handle);\n";
    }

    writer << "\n";
@@ -476,7 +476,7 @@ void runtime::gpu::GPU_ExternalFunction::compile()
        }

        writer << "extern \"C\" void " << current_function->get_name();
-        writer << "(void** inputs, void** outputs, cublasHandle_t& cublas_handle)\n";
+        writer << "(void** inputs, void** outputs, cublasHandle_t& cublas_handle, cudnnHandle_t& cudnn_handle)\n";
        writer << "{\n";
        writer.indent++;