Commit 178de84e authored by fenglei.tian's avatar fenglei.tian

external_function bug fix

parent 2db7022e
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include <cstdlib>
#include <fstream>
......@@ -22,6 +20,12 @@
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include <cuda_runtime.h>
#include <cudnn_v7.h>
#include "cublas_v2.h"
#include "cuda.h"
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/codegen/compiler.hpp"
#include "ngraph/codegen/execution_engine.hpp"
......@@ -224,8 +228,8 @@ void runtime::gpu::GPU_ExternalFunction::compile()
writer +=
R"(// Generated by the NGraph GPU backend
#include "cuda_runtime.h"
#include "cudnn_v7.h"
#include <cuda_runtime.h>
#include <cudnn_v7.h>
#include "cublas_v2.h"
#include "cuda.h"
......@@ -249,6 +253,11 @@ void runtime::gpu::GPU_ExternalFunction::compile()
string pch_header_source = writer.get_code();
writer += R"(
using namespace ngraph;
using namespace std;
)";
if (m_emit_timing)
{
writer << "// Declare debug timers\n";
......@@ -324,7 +333,6 @@ void runtime::gpu::GPU_ExternalFunction::compile()
writer << "void *__dso_handle = 0;\n\n";
writer << "// Declare all constants\n";
unordered_map<Node*, string> match_functions;
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{
for (shared_ptr<Node> node : current_function->get_ordered_ops())
......@@ -355,8 +363,12 @@ void runtime::gpu::GPU_ExternalFunction::compile()
writer << "\n";
unordered_map<Node*, string> match_functions;
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{
bool temporaries_used = false;
size_t worst_case_tmp_size = 0;
set<string> output_names;
for (shared_ptr<Node> op : current_function->get_results())
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment