clang format

:

clang format
:
b5414ba5 · fenglei.tian · 6204a154 · b5414ba5
Commit b5414ba5 authored Mar 08, 2018 by fenglei.tian
Show whitespace changes
Inline Side-by-side

Showing with 106 additions and 82 deletions

gpu_external_function.cpp src/ngraph/runtime/gpu/gpu_external_function.cpp +106 -82

No files found.
--- a/src/ngraph/runtime/gpu/gpu_external_function.cpp
+++ b/src/ngraph/runtime/gpu/gpu_external_function.cpp
@@ -165,7 +165,7 @@ namespace ngraph
    {
        namespace gpu
        {
-static const OpMap dispatcher{
+            static const OpMap dispatcher{
                {TI(ngraph::op::Add), &GPU_Emitter::emit<ngraph::op::Add>},
                {TI(ngraph::op::Dot), &GPU_Emitter::emit<ngraph::op::Dot>},
                {TI(ngraph::op::Multiply), &GPU_Emitter::emit<ngraph::op::Multiply>},
@@ -192,8 +192,7 @@ static const OpMap dispatcher{
                {TI(ngraph::op::Convert), &GPU_Emitter::emit<ngraph::op::Convert>},
                {TI(ngraph::op::Constant), &GPU_Emitter::emit<ngraph::op::Constant>},
                {TI(ngraph::op::Reshape), &GPU_Emitter::emit<ngraph::op::Reshape>},
-    {TI(ngraph::op::FunctionCall),
+                {TI(ngraph::op::FunctionCall), &GPU_Emitter::emit<ngraph::op::FunctionCall>},
-     &GPU_Emitter::emit<ngraph::op::FunctionCall>},
                {TI(ngraph::op::Reduce), &GPU_Emitter::emit<ngraph::op::Reduce>},
                {TI(ngraph::op::Sign), &GPU_Emitter::EmitUnaryElementwise},
                {TI(ngraph::op::Slice), &GPU_Emitter::emit<ngraph::op::Slice>},
@@ -208,14 +207,12 @@ static const OpMap dispatcher{
                {TI(ngraph::op::Asin), &GPU_Emitter::EmitUnaryElementwise},
                {TI(ngraph::op::Acos), &GPU_Emitter::EmitUnaryElementwise},
                {TI(ngraph::op::Atan), &GPU_Emitter::EmitUnaryElementwise},
-    {TI(ngraph::op::ReplaceSlice),
+                {TI(ngraph::op::ReplaceSlice), &GPU_Emitter::emit<ngraph::op::ReplaceSlice>},
-     &GPU_Emitter::emit<ngraph::op::ReplaceSlice>},
                {TI(ngraph::op::OneHot), &GPU_Emitter::emit<ngraph::op::OneHot>},
                {TI(ngraph::op::Floor), &GPU_Emitter::EmitUnaryElementwise},
                {TI(ngraph::op::Ceiling), &GPU_Emitter::EmitUnaryElementwise},
                {TI(ngraph::op::Sqrt), &GPU_Emitter::emit<ngraph::op::Sqrt>},
-    {TI(ngraph::op::Convolution),
+                {TI(ngraph::op::Convolution), &GPU_Emitter::emit<ngraph::op::Convolution>},
-     &GPU_Emitter::emit<ngraph::op::Convolution>},
                {TI(ngraph::op::ConvolutionBackpropFilters),
                 &GPU_Emitter::emit<ngraph::op::ConvolutionBackpropFilters>},
                {TI(ngraph::op::ConvolutionBackpropData),
@@ -224,50 +221,48 @@ static const OpMap dispatcher{
                {TI(ngraph::op::MaxPool), &GPU_Emitter::emit<ngraph::op::MaxPool>},
                {TI(ngraph::op::Reverse), &GPU_Emitter::emit<ngraph::op::Reverse>},
                {TI(ngraph::op::Result), &GPU_Emitter::emit<ngraph::op::Result>},
-    {TI(ngraph::op::ReduceWindow),
+                {TI(ngraph::op::ReduceWindow), &GPU_Emitter::emit<ngraph::op::ReduceWindow>},
-     &GPU_Emitter::emit<ngraph::op::ReduceWindow>},
                {TI(ngraph::op::SelectAndScatter),
                 &GPU_Emitter::emit<ngraph::op::SelectAndScatter>},
                {TI(ngraph::op::AvgPool), &GPU_Emitter::emit<ngraph::op::AvgPool>},
-    {TI(ngraph::op::AvgPoolBackprop),
+                {TI(ngraph::op::AvgPoolBackprop), &GPU_Emitter::emit<ngraph::op::AvgPoolBackprop>},
-     &GPU_Emitter::emit<ngraph::op::AvgPoolBackprop>},
                {TI(ngraph::op::Pad), &GPU_Emitter::emit<ngraph::op::Pad>},
                {TI(ngraph::op::BatchNorm), &GPU_Emitter::emit<ngraph::op::BatchNorm>},
                {TI(ngraph::op::BatchNormBackprop),
                 &GPU_Emitter::emit<ngraph::op::BatchNormBackprop>},
-    {TI(ngraph::op::MaxPoolBackprop),
+                {TI(ngraph::op::MaxPoolBackprop), &GPU_Emitter::emit<ngraph::op::MaxPoolBackprop>},
-     &GPU_Emitter::emit<ngraph::op::MaxPoolBackprop>},
                {TI(ngraph::op::Product), &GPU_Emitter::emit<ngraph::op::Product>},
                {TI(ngraph::op::Max), &GPU_Emitter::emit<ngraph::op::Max>},
                {TI(ngraph::op::Min), &GPU_Emitter::emit<ngraph::op::Min>},
                {TI(ngraph::op::Relu), &GPU_Emitter::emit<ngraph::op::Relu>},
-    {TI(ngraph::op::ReluBackprop),
+                {TI(ngraph::op::ReluBackprop), &GPU_Emitter::emit<ngraph::op::ReluBackprop>},
-     &GPU_Emitter::emit<ngraph::op::ReluBackprop>},
                {TI(ngraph::op::Softmax), &GPU_Emitter::emit<ngraph::op::Softmax>},
-};
+            };
-GPU_ExternalFunction::GPU_ExternalFunction(
+            GPU_ExternalFunction::GPU_ExternalFunction(const shared_ptr<ngraph::Function>& function,
-    const shared_ptr<ngraph::Function>& function, bool release_function)
+                                                       bool release_function)
                : ngraph::runtime::ExternalFunction(function, release_function)
                , m_compiled_function(nullptr)
                , m_emit_timing(std::getenv("NGRAPH_GPU_EMIT_TIMING") != nullptr)
-{
+            {
-}
+            }
-void GPU_ExternalFunction::compile()
+            void GPU_ExternalFunction::compile()
-{
+            {
                if (m_is_compiled)
                {
                    return;
                }
                string function_name = m_function->get_name();
-    string dump_filename = file_util::path_join(s_output_dir, function_name + "_ops.txt");
+                string dump_filename =
+                    file_util::path_join(s_output_dir, function_name + "_ops.txt");
                pass::Manager pass_manager;
                // pass_manager.register_pass<pass::TopologicalSort>();
                // For now, just make everyone row-major.
-    pass_manager.register_pass<pass::AssignLayout<descriptor::layout::DenseTensorViewLayout>>();
+                pass_manager
+                    .register_pass<pass::AssignLayout<descriptor::layout::DenseTensorViewLayout>>();
                pass_manager.register_pass<pass::Liveness>();
                pass_manager.register_pass<pass::MemoryLayout>(64);
                pass_manager.register_pass<pass::DumpSorted>(dump_filename);
@@ -313,7 +308,8 @@ using namespace std;
                {
                    writer << "// Declare debug timers\n";
                    vector<string> names;
-        for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
+                    for (shared_ptr<Function> current_function :
+                         pass_manager.get_state().get_functions())
                    {
                        for (shared_ptr<Node> node : current_function->get_ordered_ops())
                        {
@@ -327,8 +323,8 @@ using namespace std;
                    {
                        writer << "ngraph::stopwatch timer_" << s << ";\n";
                    }
-        writer << "extern \"C\" size_t get_debug_timer_count() { return " << names.size()
+                    writer << "extern \"C\" size_t get_debug_timer_count() { return "
-               << "; }\n";
+                           << names.size() << "; }\n";
                    writer << "extern \"C\" const char* get_debug_timer_name(size_t index)\n";
                    writer << "{\n";
                    writer.indent++;
@@ -344,7 +340,8 @@ using namespace std;
                    writer << "return rc;\n";
                    writer.indent--;
                    writer << "}\n";
-        writer << "extern \"C\" const size_t get_debug_timer_microseconds(size_t index)\n";
+                    writer
+                        << "extern \"C\" const size_t get_debug_timer_microseconds(size_t index)\n";
                    writer << "{\n";
                    writer.indent++;
                    writer << "size_t rc;\n";
@@ -360,7 +357,8 @@ using namespace std;
                    writer << "return rc;\n";
                    writer.indent--;
                    writer << "}\n";
-        writer << "extern \"C\" const size_t get_debug_timer_call_count(size_t index)\n";
+                    writer
+                        << "extern \"C\" const size_t get_debug_timer_call_count(size_t index)\n";
                    writer << "{\n";
                    writer.indent++;
                    writer << "size_t rc;\n";
@@ -368,7 +366,8 @@ using namespace std;
                    writer << "{\n";
                    for (size_t i = 0; i < names.size(); i++)
                    {
-            writer << "case " << i << ": rc = timer_" << names[i] << ".get_call_count(); break;\n";
+                        writer << "case " << i << ": rc = timer_" << names[i]
+                               << ".get_call_count(); break;\n";
                    }
                    writer << "default: rc = 0;\n";
                    writer << "}\n";
@@ -384,26 +383,31 @@ using namespace std;
                writer << "void *__dso_handle = 0;\n\n";
                writer << "// Declare all constants\n";
-    for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
+                for (shared_ptr<Function> current_function :
+                     pass_manager.get_state().get_functions())
                {
                    for (shared_ptr<Node> node : current_function->get_ordered_ops())
                    {
                        const op::Constant* c = dynamic_cast<ngraph::op::Constant*>(node.get());
                        if (c)
                        {
-                shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view();
+                            shared_ptr<descriptor::TensorView> tv =
+                                node->get_outputs()[0].get_tensor_view();
                            auto c_value_strings = c->get_value_strings();
-                writer << "static " << tv->get_tensor().get_element_type().c_type_string() << " "
+                            writer << "static "
-                       << tv->get_tensor().get_name() << "_cpu[" << c_value_strings.size()
+                                   << tv->get_tensor().get_element_type().c_type_string() << " "
-                       << "] =\n";
+                                   << tv->get_tensor().get_name() << "_cpu["
+                                   << c_value_strings.size() << "] =\n";
                            writer << "{\n";
                            writer.indent++;
                            writer << emit_string_array(c_value_strings, 100 - writer.indent * 4);
                            writer.indent--;
                            writer << "\n};\n\n";
-                writer << "static " << tv->get_tensor().get_element_type().c_type_string() << " *"
+                            writer << "static "
+                                   << tv->get_tensor().get_element_type().c_type_string() << " *"
                                   << tv->get_tensor().get_name() << ";\n";
-                m_variable_name_map[tv->get_tensor().get_name()] = tv->get_tensor().get_name();
+                            m_variable_name_map[tv->get_tensor().get_name()] =
+                                tv->get_tensor().get_name();
                        }
                    }
                }
@@ -411,7 +415,8 @@ using namespace std;
                writer << "// Declare all functions\n";
                for (shared_ptr<Function> f : pass_manager.get_state().get_functions())
                {
-        writer << "extern \"C\" void " << f->get_name() << "(void** inputs, void** outputs, "
+                    writer << "extern \"C\" void " << f->get_name()
+                           << "(void** inputs, void** outputs, "
                              "cublasHandle_t& cublas_handle, "
                              "cudnnHandle_t& cudnn_handle);\n";
                }
@@ -419,7 +424,8 @@ using namespace std;
                writer << "\n";
                unordered_map<Node*, string> match_functions;
-    for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
+                for (shared_ptr<Function> current_function :
+                     pass_manager.get_state().get_functions())
                {
                    bool temporaries_used = false;
                    size_t worst_case_tmp_size = 0;
@@ -512,7 +518,8 @@ using namespace std;
                    }
                }
-    for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
+                for (shared_ptr<Function> current_function :
+                     pass_manager.get_state().get_functions())
                {
                    set<string> output_names;
                    for (shared_ptr<Node> op : current_function->get_results())
@@ -525,18 +532,21 @@ using namespace std;
                    {
                        if (dynamic_cast<ngraph::op::Constant*>(node.get()))
                        {
-                shared_ptr<descriptor::TensorView> tv = node->get_outputs()[0].get_tensor_view();
+                            shared_ptr<descriptor::TensorView> tv =
+                                node->get_outputs()[0].get_tensor_view();
                            constants.insert(tv.get());
                        }
                    }
                    writer << "extern \"C\" void " << current_function->get_name();
-        writer << "(void** inputs, void** outputs, cublasHandle_t& cublas_handle, cudnnHandle_t& "
+                    writer << "(void** inputs, void** outputs, cublasHandle_t& cublas_handle, "
+                              "cudnnHandle_t& "
                              "cudnn_handle)\n";
                    writer << "{\n";
                    writer.indent++;
-        for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
+                    for (shared_ptr<Function> current_function :
+                         pass_manager.get_state().get_functions())
                    {
                        for (shared_ptr<Node> node : current_function->get_ordered_ops())
                        {
@@ -548,9 +558,10 @@ using namespace std;
                                writer << "if(" << tv->get_tensor().get_name() << " == NULL)\n";
                                writer << "{\n";
                                writer.indent++;
-                    writer << "runtime::gpu::cuda_memcpyHtD(" << tv->get_tensor().get_name() << ", "
+                                writer << "runtime::gpu::cuda_memcpyHtD("
-                           << tv->get_tensor().get_name() << "_cpu, " << tv->get_tensor().size()
+                                       << tv->get_tensor().get_name() << ", "
-                           << ");\n";
+                                       << tv->get_tensor().get_name() << "_cpu, "
+                                       << tv->get_tensor().size() << ");\n";
                                writer.indent--;
                                writer << "}\n";
                            }
@@ -584,7 +595,8 @@ using namespace std;
                            {
                                stringstream ss;
                                ss << "((" << tensor->get_element_type().c_type_string()
-                       << "*)((char *)pool_base_ptr + " << tensor->get_pool_offset() << "))";
+                                   << "*)((char *)pool_base_ptr + " << tensor->get_pool_offset()
+                                   << "))";
                                m_variable_name_map[tensor->get_name()] = ss.str();
                            }
                        }
@@ -592,12 +604,15 @@ using namespace std;
                    // Add inputs to the variable name map
                    size_t arg_index = 0;
-        for (shared_ptr<ngraph::op::Parameter> param : current_function->get_parameters())
+                    for (shared_ptr<ngraph::op::Parameter> param :
+                         current_function->get_parameters())
                    {
                        for (size_t i = 0; i < param->get_output_size(); ++i)
                        {
-                shared_ptr<descriptor::TensorView> tv = param->get_output_tensor_view(i);
+                            shared_ptr<descriptor::TensorView> tv =
-                const element::Type& et = tv->get_tensor_view_type()->get_element_type();
+                                param->get_output_tensor_view(i);
+                            const element::Type& et =
+                                tv->get_tensor_view_type()->get_element_type();
                            string type = et.c_type_string();
                            stringstream ss;
                            ss << "((" << type << "*)(inputs[" << arg_index << "]))";
@@ -631,7 +646,8 @@ using namespace std;
                        shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view();
                        const element::Type& et = tv->get_tensor_view_type()->get_element_type();
                        bool parameter_as_output = false;
-            for (shared_ptr<ngraph::op::Parameter> param : current_function->get_parameters())
+                        for (shared_ptr<ngraph::op::Parameter> param :
+                             current_function->get_parameters())
                        {
                            for (const descriptor::Output& pout : param->get_outputs())
                            {
@@ -639,8 +655,10 @@ using namespace std;
                                if (tv == ptv)
                                {
                                    parameter_as_output = true;
-                        writer << "ngraph::runtime::gpu::cuda_memcpyDtD(reinterpret_cast<"
+                                    writer
-                               << et.c_type_string() << "*>(outputs[" << output_index << "]), "
+                                        << "ngraph::runtime::gpu::cuda_memcpyDtD(reinterpret_cast<"
+                                        << et.c_type_string() << "*>(outputs[" << output_index
+                                        << "]), "
                                        << m_variable_name_map[ptv->get_tensor().get_name()] << ", "
                                        << ptv->get_tensor().size() << ");\n";
                                    break;
@@ -651,9 +669,9 @@ using namespace std;
                        {
                            if (contains(constants, tv.get()))
                            {
-                    writer << "ngraph::runtime::gpu::cuda_memcpyHtD(outputs[" << output_index
+                                writer << "ngraph::runtime::gpu::cuda_memcpyHtD(outputs["
-                           << "], " << tv->get_tensor().get_name() << ", "
+                                       << output_index << "], " << tv->get_tensor().get_name()
-                           << tv->get_tensor().size() << ");\n";
+                                       << ", " << tv->get_tensor().size() << ");\n";
                            }
                            else
                            {
@@ -668,27 +686,29 @@ using namespace std;
                    for (shared_ptr<Node> node : current_function->get_ordered_ops())
                    {
-            auto& n = *node; // Work around a compiler warning (*node inside typeid may have effects
+                        auto& n =
+                            *node; // Work around a compiler warning (*node inside typeid may have effects
                        // with shared pointers, which is fine here but clang doesn't like it.)
                        auto handler = dispatcher.find(type_index(typeid(n)));
                        if (handler == dispatcher.end())
                        {
-                throw ngraph_error("Unhandled op during code generation : " + node->description());
+                            throw ngraph_error("Unhandled op during code generation : " +
+                                               node->description());
                        }
                        vector<GPU_TensorViewWrapper> in;
                        for (const descriptor::Input& input : node->get_inputs())
                        {
                            const descriptor::Output& output = input.get_output();
                            shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
-                in.push_back(
+                            in.push_back(GPU_TensorViewWrapper(
-                    GPU_TensorViewWrapper(tv, m_variable_name_map[tv->get_tensor().get_name()]));
+                                tv, m_variable_name_map[tv->get_tensor().get_name()]));
                        }
                        vector<GPU_TensorViewWrapper> out;
                        for (const descriptor::Output& output : node->get_outputs())
                        {
                            shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
-                out.push_back(
+                            out.push_back(GPU_TensorViewWrapper(
-                    GPU_TensorViewWrapper(tv, m_variable_name_map[tv->get_tensor().get_name()]));
+                                tv, m_variable_name_map[tv->get_tensor().get_name()]));
                        }
                        // Emit operation prologue
@@ -742,7 +762,8 @@ using namespace std;
                // TODO: Cleanup and make this a utility function
                file_util::make_directory(s_output_dir);
-    string filename = file_util::path_join(s_output_dir, function_name + "_codegen.cpp");
+                string filename =
+                    file_util::path_join(s_output_dir, function_name + "_codegen.cpp");
                ofstream out(filename);
                string code = writer.get_code();
                out << code;
@@ -761,7 +782,8 @@ using namespace std;
                }
                m_execution_engine->add_module(codegen_module);
                m_execution_engine->finalize();
-    m_compiled_function = m_execution_engine->find_function<EntryPoint_t>(function_name);
+                m_compiled_function =
+                    m_execution_engine->find_function<EntryPoint_t>(function_name);
                assert(m_compiled_function);
                m_is_compiled = true;
@@ -769,13 +791,13 @@ using namespace std;
                {
                    release_function();
                }
-}
+            }
-void GPU_ExternalFunction::handle_output_alias(
+            void GPU_ExternalFunction::handle_output_alias(
                codegen::CodeWriter& writer,
                const Node& node,
                const unordered_map<descriptor::TensorView*, vector<size_t>>& output_alias_map)
-{
+            {
                for (const descriptor::Output& output : node.get_outputs())
                {
                    shared_ptr<descriptor::TensorView> otv = output.get_tensor_view();
@@ -789,45 +811,46 @@ void GPU_ExternalFunction::handle_output_alias(
                            writer.indent++;
                            for (size_t i = 1; i < outputs.size(); i++)
                            {
-                    writer << "ngraph::runtime::gpu::cuda_memcpyDtD(static_cast<void*>(outputs["
+                                writer << "ngraph::runtime::gpu::cuda_memcpyDtD(static_cast<void*>("
-                           << outputs[i] << "]), static_cast<void*>(outputs[" << outputs[0]
+                                          "outputs["
-                           << "]), " << otv->get_tensor().size() << ");\n";
+                                       << outputs[i] << "]), static_cast<void*>(outputs["
+                                       << outputs[0] << "]), " << otv->get_tensor().size()
+                                       << ");\n";
                            }
                            writer.indent--;
                            writer << "}\n";
                        }
                    }
                }
-}
+            }
-shared_ptr<ngraph::runtime::CallFrame> GPU_ExternalFunction::make_call_frame()
+            shared_ptr<ngraph::runtime::CallFrame> GPU_ExternalFunction::make_call_frame()
-{
+            {
                if (!m_is_compiled)
                {
                    compile();
                }
-    return make_shared<GPU_CallFrame>(shared_from_this(),
+                return make_shared<GPU_CallFrame>(shared_from_this(), m_compiled_function);
-                                                            m_compiled_function);
+            }
-}
-void GPU_ExternalFunction::emit_debug_function_entry(
+            void GPU_ExternalFunction::emit_debug_function_entry(
                codegen::CodeWriter& writer,
                Node* node,
                const std::vector<GPU_TensorViewWrapper>& in,
                const std::vector<GPU_TensorViewWrapper>& out)
-{
+            {
                writer << "timer_" << node->get_name() << ".start();\n";
-}
+            }
-void GPU_ExternalFunction::emit_debug_function_exit(
+            void GPU_ExternalFunction::emit_debug_function_exit(
                codegen::CodeWriter& writer,
                Node* node,
                const std::vector<GPU_TensorViewWrapper>& in,
                const std::vector<GPU_TensorViewWrapper>& out)
-{
+            {
                writer << "timer_" << node->get_name() << ".stop();\n";
-}
+            }
        }
    }
 }
\ No newline at end of file