Commit 178de84e authored by fenglei.tian's avatar fenglei.tian

external_function bug fix

parent 2db7022e
/******************************************************************************* // ----------------------------------------------------------------------------
* Copyright 2017-2018 Intel Corporation // Copyright 2017 Nervana Systems Inc.
* // Licensed under the Apache License, Version 2.0 (the "License");
* Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License.
* you may not use this file except in compliance with the License. // You may obtain a copy of the License at
* You may obtain a copy of the License at //
* // http://www.apache.org/licenses/LICENSE-2.0
* http://www.apache.org/licenses/LICENSE-2.0 //
* // Unless required by applicable law or agreed to in writing, software
* Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS,
* distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and
* See the License for the specific language governing permissions and // ----------------------------------------------------------------------------
* limitations under the License.
*******************************************************************************/
#include <cstdlib> #include <cstdlib>
#include <fstream> #include <fstream>
...@@ -22,6 +20,12 @@ ...@@ -22,6 +20,12 @@
#include <typeindex> #include <typeindex>
#include <typeinfo> #include <typeinfo>
#include <unordered_map> #include <unordered_map>
#include <cuda_runtime.h>
#include <cudnn_v7.h>
#include "cublas_v2.h"
#include "cuda.h"
#include "ngraph/codegen/code_writer.hpp" #include "ngraph/codegen/code_writer.hpp"
#include "ngraph/codegen/compiler.hpp" #include "ngraph/codegen/compiler.hpp"
#include "ngraph/codegen/execution_engine.hpp" #include "ngraph/codegen/execution_engine.hpp"
...@@ -224,8 +228,8 @@ void runtime::gpu::GPU_ExternalFunction::compile() ...@@ -224,8 +228,8 @@ void runtime::gpu::GPU_ExternalFunction::compile()
writer += writer +=
R"(// Generated by the NGraph GPU backend R"(// Generated by the NGraph GPU backend
#include "cuda_runtime.h" #include <cuda_runtime.h>
#include "cudnn_v7.h" #include <cudnn_v7.h>
#include "cublas_v2.h" #include "cublas_v2.h"
#include "cuda.h" #include "cuda.h"
...@@ -249,6 +253,11 @@ void runtime::gpu::GPU_ExternalFunction::compile() ...@@ -249,6 +253,11 @@ void runtime::gpu::GPU_ExternalFunction::compile()
string pch_header_source = writer.get_code(); string pch_header_source = writer.get_code();
writer += R"(
using namespace ngraph;
using namespace std;
)";
if (m_emit_timing) if (m_emit_timing)
{ {
writer << "// Declare debug timers\n"; writer << "// Declare debug timers\n";
...@@ -324,7 +333,6 @@ void runtime::gpu::GPU_ExternalFunction::compile() ...@@ -324,7 +333,6 @@ void runtime::gpu::GPU_ExternalFunction::compile()
writer << "void *__dso_handle = 0;\n\n"; writer << "void *__dso_handle = 0;\n\n";
writer << "// Declare all constants\n"; writer << "// Declare all constants\n";
unordered_map<Node*, string> match_functions;
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions()) for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{ {
for (shared_ptr<Node> node : current_function->get_ordered_ops()) for (shared_ptr<Node> node : current_function->get_ordered_ops())
...@@ -355,8 +363,12 @@ void runtime::gpu::GPU_ExternalFunction::compile() ...@@ -355,8 +363,12 @@ void runtime::gpu::GPU_ExternalFunction::compile()
writer << "\n"; writer << "\n";
unordered_map<Node*, string> match_functions;
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions()) for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{ {
bool temporaries_used = false;
size_t worst_case_tmp_size = 0;
set<string> output_names; set<string> output_names;
for (shared_ptr<Node> op : current_function->get_results()) for (shared_ptr<Node> op : current_function->get_results())
{ {
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment