fix merge bug and apply clang format

2e295d27 · fenglei.tian · 809dda4f · 2e295d27 · 2e295d27 · 2e295d27
Commit 2e295d27 authored Mar 08, 2018 by fenglei.tian
Showing with 68 additions and 105 deletions

gpu_emitter.cpp src/ngraph/runtime/gpu/gpu_emitter.cpp +51 -94

gpu_emitter.hpp src/ngraph/runtime/gpu/gpu_emitter.hpp +2 -1

gpu_external_function.cpp src/ngraph/runtime/gpu/gpu_external_function.cpp +15 -10

No files found.
--- a/src/ngraph/runtime/gpu/gpu_emitter.cpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.cpp
@@ -91,20 +91,23 @@
 #include "ngraph/ops/sum.hpp"
 #include "ngraph/ops/tan.hpp"
 #include "ngraph/ops/tanh.hpp"
-#include "ngraph/runtime/gpu/gpu_emitter.hpp"
 #include "ngraph/runtime/gpu/gpu_cuda_kernel_emitters.hpp"
+#include "ngraph/runtime/gpu/gpu_emitter.hpp"
 #include "ngraph/runtime/gpu/gpu_kernel_emitters.hpp"
 #include "ngraph/util.hpp"

 using namespace std;
 using namespace ngraph;
+
 namespace ngraph
 {
-}
-
+    namespace runtime
+    {
+        namespace gpu
+        {
            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::Abs)
-{
+            {
                if (out[0].get_size() == 0)
                {
                    return;
@@ -112,15 +115,15 @@ namespace ngraph
                writer << "{  // " << node->get_name() << "\n";
                writer.indent++;
                writer << "int count = " << out[0].get_size() << ";\n";
-    writer << "ngraph::runtime::gpu::emit_abs((void*) " << args[0].get_name() << ", (void*) "
-           << out[0].get_name() << ", count);\n";
+                writer << "ngraph::runtime::gpu::emit_abs((void*) " << args[0].get_name()
+                       << ", (void*) " << out[0].get_name() << ", count);\n";
                writer.indent--;
                writer << "}\n";
-}
+            }

            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::Add)
-{
+            {
                if (out[0].get_size() == 0)
                {
                    return;
@@ -158,17 +161,17 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
                       << "descriptor," << out[0].get_name() << ");\n";
                writer.indent--;
                writer << "}\n";
-}
+            }

            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::Dot)
-{
+            {
                if (out[0].get_size() == 0)
                {
                    return;
                }

-    const ngraph::op::Dot* dot = static_cast<const ngraph::op::Dot*>(n);
+                const ngraph::op::Dot* dot = static_cast<const ngraph::op::Dot*>(node);
                const Shape& arg0_shape = args[0].get_shape();
                const Shape& arg1_shape = args[1].get_shape();
                if (arg0_shape.empty() || arg1_shape.empty())
@@ -184,7 +187,8 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
                           << "1," << out[0].get_name() << ", 1);\n";
                    writer << "cublasSscal("
                           << "cublas_handle,"
-               << "count ," << first.get_name() << "," << out[0].get_name() << ", 1);\n";
+                           << "count ," << first.get_name() << "," << out[0].get_name()
+                           << ", 1);\n";
                    writer.indent--;
                    writer << "}\n";
                    return;
@@ -195,8 +199,8 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
                {
                    writer << "{   // " << node->get_name() << "\n";
                    writer.indent++;
-        writer << "runtime::gpu::cuda_memset(" << out[0].get_name() << ", 0, " << out[0].get_size()
-               << " * sizeof(float));\n";
+                    writer << "runtime::gpu::cuda_memset(" << out[0].get_name() << ", 0, "
+                           << out[0].get_size() << " * sizeof(float));\n";
                    writer.indent--;
                    writer << "}\n";
                    return;
@@ -224,7 +228,8 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
                           << "cublas_handle,"
                           << "CUBLAS_OP_T," << arg0_shape[0] << "," << arg0_shape[1] << ","
                           << "&alpha," // Alpha
-               << args[0].get_name() << "," << arg0_shape[1] << "," << args[1].get_name() << ","
+                           << args[0].get_name() << "," << arg0_shape[1] << ","
+                           << args[1].get_name() << ","
                           << "1,"
                           << "&beta," // beta
                           << out[0].get_name() << ","
@@ -270,15 +275,14 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
                }
                else
                {
-        throw std::runtime_error(node->get_name() + " with more then 2D is not implemented.");
+                    throw std::runtime_error(node->get_name() +
+                                             " with more then 2D is not implemented.");
+                }
            }
-}
-
-

            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::Maximum)
-{
+            {
                if (out[0].get_size() == 0)
                {
                    return;
@@ -320,7 +324,7 @@ cudnnSetOpTensorDescriptor(opTensorDesc,

            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::Minimum)
-{
+            {
                if (out[0].get_size() == 0)
                {
                    return;
@@ -362,7 +366,7 @@ cudnnSetOpTensorDescriptor(opTensorDesc,

            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::Negative)
-{
+            {
                if (out[0].get_size() == 0)
                {
                    return;
@@ -400,35 +404,15 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
                       << "descriptor," << out[0].get_name() << ");\n";
                writer.indent--;
                writer << "}\n";
-}
+            }

            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::Broadcast)
-{
-    if (out[0].get_size() == 0)
            {
-        return;
-    }
-    auto broadcast = static_cast<const ngraph::op::Broadcast*>(n);
-    auto arg_shape = args[0].get_shape();
-    auto result_shape = out[0].get_shape();
-
-    auto& axes = broadcast->get_broadcast_axes();
-    //broadcast axes is empty, do a copy
-    if (axes.empty())
+                if (out[0].get_size() == 0)
                {
-        writer << "{   // " << node->get_name() << " \n";
-        writer.indent++;
-        writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name()
-               << ", " << out[0].get_size() << " * " << out[0].get_element_type().size() << ");\n";
-        writer.indent--;
-        writer << "}\n";
                    return;
                }
-
-            template <>
-            void GPU_Emitter::EMITTER_DECL(ngraph::op::Broadcast)
-            {
                auto broadcast = static_cast<const ngraph::op::Broadcast*>(node);
                auto arg_shape = args[0].get_shape();
                auto result_shape = out[0].get_shape();
@@ -490,50 +474,20 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
                    throw std::runtime_error(node->get_name() + " is not implemented.");
                }
            }
-        }
-    }
-    if (is_one_axes)
-    {
-        int repeat_times = 1;
-        for (int i = 0; i < axes_v.size(); i++)
-        {
-            repeat_times *= result_shape[axes_v[i]];
-        }
-
-        int repeat_size = 1;
-        for (int i = *axes_v.rbegin() + 1; i < result_shape.size(); i++)
-        {
-            repeat_size *= result_shape[i];
-        }
-
-        writer << "{   // " << node->get_name() << " \n";
-        writer.indent++;
-        writer << "runtime::gpu::emit_broadcast(" << args[0].get_name() << ", " << out[0].get_name()
-               << ", " << repeat_size << ", " << repeat_times << ", " << out[0].get_size()
-               << ");\n";
-        writer.indent--;
-        writer << "}\n";
-    }
-    else
-    {
-        throw std::runtime_error(node->get_name() + " is not implemented.");
-    }
-}
-

            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::Constant)
-{
-}
+            {
+            }

            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::Reshape)
-{
+            {
                if (out[0].get_size() == 0)
                {
                    return;
                }
-    auto reshape = static_cast<const op::Reshape*>(n);
+                auto reshape = static_cast<const op::Reshape*>(node);
                writer << "{   // " << node->get_name() << "\n";
                writer.indent++;
                auto arg_shape = args[0].get_shape();
@@ -557,8 +511,9 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
                {
                    writer << "{   // " << node->get_name() << " 1\n";
                    writer.indent++;
-        writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name()
-               << ", " << out[0].get_size() << " * " << out[0].get_element_type().size() << ");\n";
+                    writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", "
+                           << args[0].get_name() << ", " << out[0].get_size() << " * "
+                           << out[0].get_element_type().size() << ");\n";
                    writer.indent--;
                    writer << "}\n";
                }
@@ -578,8 +533,8 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
                           << "&alpha," // Alpha
                           << args[0].get_name() << "," << arg_shape[1] << ","
                           << "&beta," // beta
-               << args[0].get_name() << "," << arg_shape[1] << "," << out[0].get_name() << ","
-               << result_shape[1] << ");\n";
+                           << args[0].get_name() << "," << arg_shape[1] << "," << out[0].get_name()
+                           << "," << result_shape[1] << ");\n";
                    writer << "cublasSetPointerMode(cublas_handle, CUBLAS_POINTER_MODE_DEVICE);\n";
                    writer.indent--;
                    writer << "}\n";
@@ -588,11 +543,12 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
                else
                {
                    throw runtime_error(
-            "Axis permutation in reshape is not implemented yet for tensors with rank>2");
+                        "Axis permutation in reshape is not implemented yet for tensors with "
+                        "rank>2");
                }
                writer.indent--;
                writer << "}\n";
-}
+            }

            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::FunctionCall)
@@ -601,7 +557,7 @@ cudnnSetOpTensorDescriptor(opTensorDesc,

            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::Multiply)
-{
+            {
                if (out[0].get_size() == 0)
                {
                    return;
@@ -639,11 +595,11 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
                       << "descriptor," << out[0].get_name() << ");\n";
                writer.indent--;
                writer << "}\n";
-}
+            }

-v            template <>
+            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::Sqrt)
-{
+            {
                if (out[0].get_size() == 0)
                {
                    return;
@@ -682,18 +638,19 @@ cudnnSetOpTensorDescriptor(opTensorDesc,
                writer.indent--;
                writer << "}\n";
            }
-        }
-    }
-}

            template <>
            void GPU_Emitter::EMITTER_DECL(ngraph::op::Result)
-{
+            {
                writer << "{   //" << node->get_name() << "\n";
                writer.indent++;
-    writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", " << args[0].get_name()
-           << ", " << out[0].get_size() << " * " << out[0].get_element_type().size() << ");\n";
+                writer << "runtime::gpu::cuda_memcpyDtD(" << out[0].get_name() << ", "
+                       << args[0].get_name() << ", " << out[0].get_size() << " * "
+                       << out[0].get_element_type().size() << ");\n";
                writer.indent--;
                writer << "}\n";
                return;
+            }
+        }
+    }
 }
--- a/src/ngraph/runtime/gpu/gpu_emitter.hpp
+++ b/src/ngraph/runtime/gpu/gpu_emitter.hpp
@@ -46,7 +46,8 @@ namespace ngraph
                                 const std::vector<GPU_TensorViewWrapper>& args,
                                 const std::vector<GPU_TensorViewWrapper>& out)
                {
-                    throw std::runtime_error("Unimplemented op in GPU emitter for " + node->get_name());
+                    throw std::runtime_error("Unimplemented op in GPU emitter for " +
+                                             node->get_name());
                }

                static void nop(GPU_ExternalFunction* external_function,

--- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
+++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp
@@ -187,7 +187,8 @@ static const ngraph::runtime::gpu::OpMap dispatcher{
    {TI(ngraph::op::Convert), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Convert>},
    {TI(ngraph::op::Constant), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Constant>},
    {TI(ngraph::op::Reshape), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Reshape>},
-    {TI(ngraph::op::FunctionCall), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::FunctionCall>},
+    {TI(ngraph::op::FunctionCall),
+     &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::FunctionCall>},
    {TI(ngraph::op::Reduce), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Reduce>},
    {TI(ngraph::op::Sign), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Sign>},
    {TI(ngraph::op::Slice), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Slice>},
@@ -202,12 +203,14 @@ static const ngraph::runtime::gpu::OpMap dispatcher{
    {TI(ngraph::op::Asin), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Asin>},
    {TI(ngraph::op::Acos), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Acos>},
    {TI(ngraph::op::Atan), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Atan>},
-    {TI(ngraph::op::ReplaceSlice), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ReplaceSlice>},
+    {TI(ngraph::op::ReplaceSlice),
+     &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ReplaceSlice>},
    {TI(ngraph::op::OneHot), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::OneHot>},
    {TI(ngraph::op::Floor), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Floor>},
    {TI(ngraph::op::Ceiling), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Ceiling>},
    {TI(ngraph::op::Sqrt), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Sqrt>},
-    {TI(ngraph::op::Convolution), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Convolution>},
+    {TI(ngraph::op::Convolution),
+     &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Convolution>},
    {TI(ngraph::op::ConvolutionBackpropFilters),
     &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ConvolutionBackpropFilters>},
    {TI(ngraph::op::ConvolutionBackpropData),
@@ -216,7 +219,8 @@ static const ngraph::runtime::gpu::OpMap dispatcher{
    {TI(ngraph::op::MaxPool), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::MaxPool>},
    {TI(ngraph::op::Reverse), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Reverse>},
    {TI(ngraph::op::Result), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Result>},
-    {TI(ngraph::op::ReduceWindow), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ReduceWindow>},
+    {TI(ngraph::op::ReduceWindow),
+     &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ReduceWindow>},
    {TI(ngraph::op::SelectAndScatter),
     &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::SelectAndScatter>},
    {TI(ngraph::op::AvgPool), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::AvgPool>},
@@ -232,7 +236,8 @@ static const ngraph::runtime::gpu::OpMap dispatcher{
    {TI(ngraph::op::Max), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Max>},
    {TI(ngraph::op::Min), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Min>},
    {TI(ngraph::op::Relu), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Relu>},
-    {TI(ngraph::op::ReluBackprop), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ReluBackprop>},
+    {TI(ngraph::op::ReluBackprop),
+     &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::ReluBackprop>},
    {TI(ngraph::op::Softmax), &ngraph::runtime::gpu::GPU_Emitter::emit<ngraph::op::Softmax>},
 };

@@ -564,8 +569,8 @@ using namespace std;
            size_t temp_pool_size = current_function->get_temporary_pool_size();
            writer << "// Allocate the memory pool\n";
            // TODO memory pool malloc.
-            writer << "void* pool_base_ptr = ngraph::runtime::gpu::create_gpu_buffer(" << temp_pool_size
-                   << ");\n";
+            writer << "void* pool_base_ptr = ngraph::runtime::gpu::create_gpu_buffer("
+                   << temp_pool_size << ");\n";

            // Add temporaries to the variable name map
            for (shared_ptr<Node> node : current_function->get_ordered_ops())
@@ -641,9 +646,9 @@ using namespace std;
            {
                if (contains(constants, tv.get()))
                {
-                    writer << "ngraph::runtime::gpu::cuda_memcpyHtD(outputs[" << output_index << "], "
-                           << tv->get_tensor().get_name() << ", " << tv->get_tensor().size()
-                           << ");\n";
+                    writer << "ngraph::runtime::gpu::cuda_memcpyHtD(outputs[" << output_index
+                           << "], " << tv->get_tensor().get_name() << ", "
+                           << tv->get_tensor().size() << ");\n";
                }
                else
                {