Commit abb68627 authored by Jaikrishnan Menon's avatar Jaikrishnan Menon Committed by Scott Cyphers

Jmenon/dexec (#1092)

* CPU: Direct Execution
Part 1 with bare minimum infrastructure

* Refactor: Move build related functionality to a separate TU
and external function method

* Add TU back after merge

* Remove an assert

* Remove commented-out code
parent 79dd92d3
......@@ -16,6 +16,7 @@
set(SRC
cpu_backend.cpp
cpu_builder.cpp
cpu_call_frame.cpp
cpu_emitter.cpp
cpu_external_function.cpp
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include <algorithm>
#include <cmath>
#include <numeric>
#include <string>
#include <typeindex>
#include <unordered_map>
#include <vector>
#include "ngraph/node.hpp"
#include "ngraph/op/abs.hpp"
#include "ngraph/op/acos.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/allreduce.hpp"
#include "ngraph/op/and.hpp"
#include "ngraph/op/asin.hpp"
#include "ngraph/op/atan.hpp"
#include "ngraph/op/avg_pool.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/ceiling.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/cos.hpp"
#include "ngraph/op/cosh.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/equal.hpp"
#include "ngraph/op/exp.hpp"
#include "ngraph/op/floor.hpp"
#include "ngraph/op/function_call.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/op/greater.hpp"
#include "ngraph/op/greater_eq.hpp"
#include "ngraph/op/less.hpp"
#include "ngraph/op/less_eq.hpp"
#include "ngraph/op/log.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/minimum.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/negative.hpp"
#include "ngraph/op/not.hpp"
#include "ngraph/op/not_equal.hpp"
#include "ngraph/op/one_hot.hpp"
#include "ngraph/op/op.hpp"
#include "ngraph/op/or.hpp"
#include "ngraph/op/pad.hpp"
#include "ngraph/op/parameter.hpp"
#include "ngraph/op/power.hpp"
#include "ngraph/op/product.hpp"
#include "ngraph/op/reduce.hpp"
#include "ngraph/op/reduce_window.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/remainder.hpp"
#include "ngraph/op/replace_slice.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/result.hpp"
#include "ngraph/op/reverse.hpp"
#include "ngraph/op/reverse_sequence.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/op/select_and_scatter.hpp"
#include "ngraph/op/sign.hpp"
#include "ngraph/op/sin.hpp"
#include "ngraph/op/sinh.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/op/sqrt.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/op/sum.hpp"
#include "ngraph/op/tan.hpp"
#include "ngraph/op/tanh.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/kernel/abs.hpp"
#include "ngraph/runtime/cpu/kernel/add.hpp"
#include "ngraph/runtime/cpu/kernel/multiply.hpp"
#include "ngraph/runtime/cpu/kernel/result.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/convert_layout.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
#include "ngraph/runtime/cpu/op/sigmoid.hpp"
#include "ngraph/type/element_type.hpp"
#include "ngraph/util.hpp"
#ifdef NGRAPH_DISTRIBUTED
#include <mpi.h>
#include "ngraph/op/allreduce.hpp"
#endif
using namespace std;
using namespace ngraph;
// Per-type kernel macro
#define SELECT_KERNEL(KV, ET, K) \
if (ET == element::boolean) \
{ \
KV = K<char>; \
} \
else if (ET == element::f32) \
{ \
KV = K<float>; \
} \
else if (ET == element::f64) \
{ \
KV = K<double>; \
} \
else if (ET == element::i8) \
{ \
KV = K<int8_t>; \
} \
else if (ET == element::i16) \
{ \
KV = K<int16_t>; \
} \
else if (ET == element::i32) \
{ \
KV = K<int32_t>; \
} \
else if (ET == element::i64) \
{ \
KV = K<int64_t>; \
} \
else if (ET == element::u8) \
{ \
KV = K<uint8_t>; \
} \
else if (ET == element::u16) \
{ \
KV = K<uint16_t>; \
} \
else if (ET == element::u32) \
{ \
KV = K<uint32_t>; \
} \
else if (ET == element::u64) \
{ \
KV = K<uint64_t>; \
}
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Add)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
std::function<void(void*, void*, void*, size_t)> kernel;
SELECT_KERNEL(kernel, out[0].get_element_type(), runtime::cpu::kernel::add);
auto element_count = out[0].get_size();
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& arg1_tensor = tensor_data[args[1].get_name()];
auto& out0_tensor = tensor_data[out[0].get_name()];
auto functor = [&, kernel, element_count](CPURuntimeContext* ctx) {
kernel(arg0_tensor, arg1_tensor, out0_tensor, element_count);
};
functors.emplace_back(functor);
}
template <>
void Builder::BUILDER_DECL(ngraph::op::Multiply)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
std::function<void(void*, void*, void*, size_t)> kernel;
SELECT_KERNEL(kernel, out[0].get_element_type(), runtime::cpu::kernel::multiply);
auto element_count = out[0].get_size();
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& arg1_tensor = tensor_data[args[1].get_name()];
auto& out0_tensor = tensor_data[out[0].get_name()];
auto functor = [&, kernel, element_count](CPURuntimeContext* ctx) {
kernel(arg0_tensor, arg1_tensor, out0_tensor, element_count);
};
functors.emplace_back(functor);
}
template <>
void Builder::BUILDER_DECL(ngraph::op::Abs)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
std::function<void(void*, void*, size_t)> kernel;
SELECT_KERNEL(kernel, out[0].get_element_type(), runtime::cpu::kernel::abs);
auto element_count = out[0].get_size();
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& out0_tensor = tensor_data[out[0].get_name()];
auto functor = [&, kernel, element_count](CPURuntimeContext* ctx) {
kernel(arg0_tensor, out0_tensor, element_count);
};
functors.emplace_back(functor);
}
template <>
void Builder::BUILDER_DECL(ngraph::op::Result)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
std::function<void(void*, void*, size_t)> kernel;
SELECT_KERNEL(kernel, out[0].get_element_type(), runtime::cpu::kernel::result);
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& out0_tensor = tensor_data[out[0].get_name()];
auto size = shape_size(node->get_shape());
auto functor = [&, kernel, size](CPURuntimeContext* ctx) {
kernel(arg0_tensor, out0_tensor, size);
};
functors.emplace_back(functor);
}
template <>
void Builder::BUILDER_DECL(ngraph::op::Constant)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
vector<void**> dest;
for (auto& result : external_function->get_function()->get_results())
{
if (result.get() == node)
{
dest.push_back(&tensor_data[result->get_output_tensor(0).get_name()]);
}
}
auto& src = tensor_data[node->get_output_tensor(0).get_name()];
auto size = node->get_output_tensor(0).size();
auto functor = [&, dest, src, size](CPURuntimeContext* ctx) {
for (auto p : dest)
{
memcpy(*p, src, size);
}
};
functors.emplace_back(functor);
}
#define TI(x) type_index(typeid(x))
const BuildOpMap build_dispatcher{
{TI(ngraph::op::Add), &runtime::cpu::Builder::build<ngraph::op::Add>},
{TI(ngraph::op::Multiply), &runtime::cpu::Builder::build<ngraph::op::Multiply>},
{TI(ngraph::op::Parameter), &runtime::cpu::Builder::nop},
{TI(ngraph::op::Abs), &runtime::cpu::Builder::build<ngraph::op::Abs>},
{TI(ngraph::op::Result), &runtime::cpu::Builder::build<ngraph::op::Result>},
{TI(ngraph::op::Constant), &runtime::cpu::Builder::build<ngraph::op::Constant>}};
}
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <string>
#include <vector>
#include "ngraph/node.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#define BUILDER_DECL(op_name) \
build<op_name>(CPU_ExternalFunction * external_function, \
const ngraph::Node* node, \
const std::vector<TensorViewWrapper>& args, \
const std::vector<TensorViewWrapper>& out)
namespace ngraph
{
namespace runtime
{
namespace cpu
{
using BuildOpFunction =
std::function<void(CPU_ExternalFunction* external_function,
const ngraph::Node*,
const std::vector<TensorViewWrapper>& inputs,
const std::vector<TensorViewWrapper>& outputs)>;
using BuildOpMap = std::unordered_map<std::type_index, BuildOpFunction>;
extern const BuildOpMap build_dispatcher;
class Builder
{
public:
template <typename OP>
static void build(CPU_ExternalFunction* external_function,
const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
const std::vector<TensorViewWrapper>& out)
{
throw std::runtime_error("Unimplemented op in CPU builder");
}
static void nop(CPU_ExternalFunction* external_function,
const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
const std::vector<TensorViewWrapper>& out)
{
}
static void buildBatchNorm(CPU_ExternalFunction* external_function,
const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
const std::vector<TensorViewWrapper>& out,
bool append_relu = false);
};
}
}
}
......@@ -63,7 +63,14 @@ void runtime::cpu::CPU_CallFrame::call(
}
// Invoke compiled computation
m_compiled_function(inputs.data(), outputs.data(), ctx);
if (!m_external_function->is_direct_execution())
{
m_compiled_function(inputs.data(), outputs.data(), ctx);
}
else
{
m_external_function->get_executor()(ctx, inputs, outputs);
}
if (runtime::cpu::IsTracingEnabled())
{
......
......@@ -89,7 +89,6 @@
#include "ngraph/op/sum.hpp"
#include "ngraph/op/tan.hpp"
#include "ngraph/op/tanh.hpp"
#include "ngraph/runtime/cpu/cpu_emitter.hpp"
#include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
......
......@@ -109,7 +109,9 @@
#include "ngraph/pass/memory_layout.hpp"
#include "ngraph/pass/nop_elimination.hpp"
#include "ngraph/pass/result_copy_elimination.hpp"
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_backend.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_emitter.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
......@@ -314,6 +316,8 @@ runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction(
, m_emit_timing(false)
, m_use_tbb(std::getenv("NGRAPH_CPU_USE_TBB") != nullptr)
, m_function_name(function->get_name())
, m_is_built(false)
, m_direct_execution(std::getenv("NGRAPH_DEX") != nullptr)
{
}
......@@ -681,25 +685,6 @@ using namespace ngraph::runtime;
}
}
// create output alias map
/*
size_t output_index = 0;
unordered_map<descriptor::TensorView*, vector<size_t>> output_alias_map;
vector<size_t> aliases;
for (size_t i = 0; i < current_function->get_output_size(); ++i)
{
shared_ptr<Node> op = current_function->get_output_op(i);
shared_ptr<descriptor::TensorView> otv = op->get_output_tensor_view();
vector<size_t>& al = output_alias_map[otv.get()];
al.push_back(output_index);
if (al.size() > 1)
{
aliases.push_back(output_index);
}
output_index++;
}
*/
// Add outputs to the variable name map
for (size_t i = 0; i < current_function->get_output_size(); ++i)
{
......@@ -960,6 +945,34 @@ using namespace ngraph::runtime;
writer += "}\n\n";
}
// TODO: Cleanup and make this a utility function
file_util::make_directory(s_output_dir);
string filename = file_util::path_join(s_output_dir, m_function_name + "_codegen.cpp");
ofstream out(filename);
string code = writer.get_code();
out << code;
out.close();
m_compiler.reset(new codegen::Compiler());
m_execution_engine.reset(new codegen::ExecutionEngine());
m_compiler->set_precompiled_header_source(pch_header_source);
auto codegen_module = m_compiler->compile(code);
if (codegen_module == nullptr)
{
throw runtime_error("function failed to compile");
}
m_execution_engine->add_module(codegen_module);
m_execution_engine->finalize();
m_compiled_function = m_execution_engine->find_function<EntryPoint_t>(m_function_name);
if (m_compiled_function == nullptr)
{
throw runtime_error("could not find compiled function");
}
// Store layouts assigned for arguments
for (const auto& parameter : m_function->get_parameters())
{
......@@ -975,6 +988,7 @@ using namespace ngraph::runtime;
static_pointer_cast<runtime::cpu::LayoutDescriptor>(tv->get_tensor_view_layout()));
}
}
// Store layouts assigned for results
if (!result_layout_descriptors.empty())
{
......@@ -995,35 +1009,187 @@ using namespace ngraph::runtime;
}
}
// TODO: Cleanup and make this a utility function
file_util::make_directory(s_output_dir);
string filename = file_util::path_join(s_output_dir, m_function_name + "_codegen.cpp");
ofstream out(filename);
string code = writer.get_code();
out << code;
out.close();
m_is_compiled = true;
if (m_release_function)
{
release_function();
}
}
m_compiler.reset(new codegen::Compiler());
m_execution_engine.reset(new codegen::ExecutionEngine());
void runtime::cpu::CPU_ExternalFunction::build()
{
if (m_is_built)
{
return;
}
m_compiler->set_precompiled_header_source(pch_header_source);
m_mkldnn_emitter.reset(new MKLDNNEmitter());
auto codegen_module = m_compiler->compile(code);
ngraph::pass::Manager pass_manager;
if (codegen_module == nullptr)
//nv_cwi is required only by some frontends
//in which case they should run this pass(CPUWorkspaceInsertion) explicitly
NodeVector nv_cwi;
pass_manager.register_pass<ngraph::pass::NopElimination>();
pass_manager.register_pass<runtime::cpu::pass::LSTMFusion>();
pass_manager.register_pass<runtime::cpu::pass::RNNFusion>();
pass_manager.register_pass<runtime::cpu::pass::ConcatInputs>();
pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>();
pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>();
pass_manager.register_pass<ngraph::pass::CoreFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUWorkspaceInsertion>(nv_cwi);
pass_manager.register_pass<runtime::cpu::pass::CPUAssignment>(this);
pass_manager.register_pass<runtime::cpu::pass::CPULayout>(this);
pass_manager.register_pass<runtime::cpu::pass::CPUPostLayoutOptimizations>();
pass_manager.register_pass<runtime::cpu::pass::CPUShuffleFolding>();
pass_manager.register_pass<ngraph::pass::ResultCopyElimination>();
pass_manager.register_pass<ngraph::pass::GetOutputElementElimination>();
pass_manager.register_pass<ngraph::pass::Liveness>();
pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment, true);
pass_manager.run_passes(m_function);
// Store layouts assigned for arguments
for (const auto& parameter : m_function->get_parameters())
{
throw runtime_error("function failed to compile");
for (size_t i = 0; i < parameter->get_output_size(); ++i)
{
auto tv = parameter->get_output_tensor_view(i);
if (tv->get_tensor_view_layout() == nullptr)
{
throw ngraph_error("layout missing on function parameter's tensor view: " +
tv->get_name());
}
parameter_layout_descriptors.emplace_back(
static_pointer_cast<runtime::cpu::LayoutDescriptor>(tv->get_tensor_view_layout()));
}
}
m_execution_engine->add_module(codegen_module);
m_execution_engine->finalize();
m_compiled_function = m_execution_engine->find_function<EntryPoint_t>(m_function_name);
if (m_compiled_function == nullptr)
// Store layouts assigned for results
if (!result_layout_descriptors.empty())
{
throw runtime_error("could not find compiled function");
throw ngraph_error("Function output layouts should not be pre-assigned");
}
for (size_t i = 0; i < m_function->get_output_size(); ++i)
{
const auto& output = m_function->get_output_op(i);
for (size_t j = 0; j < output->get_output_size(); ++j)
{
auto tv = output->get_output_tensor_view(j);
if (tv->get_tensor_view_layout() == nullptr)
{
throw ngraph_error("layout missing on function output tensor: " + tv->get_name());
}
result_layout_descriptors.emplace_back(
static_pointer_cast<runtime::cpu::LayoutDescriptor>(tv->get_tensor_view_layout()));
}
}
m_is_compiled = true;
// Build executor
// Inputs
size_t arg_index = 0;
for (auto& param : m_function->get_parameters())
{
for (size_t i = 0; i < param->get_output_size(); ++i)
{
shared_ptr<descriptor::TensorView> tv = param->get_output_tensor_view(i);
function_input_index[tv->get_tensor().get_name()] = arg_index;
arg_index++;
}
}
// Outputs
for (size_t i = 0; i < m_function->get_output_size(); ++i)
{
shared_ptr<Node> op = m_function->get_output_op(i);
shared_ptr<descriptor::TensorView> tv = op->get_output_tensor_view();
function_output_index[tv->get_tensor().get_name()] = i;
auto res = std::dynamic_pointer_cast<ngraph::op::Result>(op);
if (!res->needs_copy())
{
shared_ptr<descriptor::TensorView> itv =
res->get_inputs().at(0).get_output().get_tensor_view();
function_output_index[itv->get_tensor().get_name()] = i;
}
}
// Intermediates
if (m_function->get_temporary_pool_size())
{
m_memory_buffer_sizes.push_back(m_function->get_temporary_pool_size());
for (auto& node : m_function->get_ordered_ops())
{
for (auto tensor : node->liveness_new_list)
{
intermediates_offsets[tensor->get_name()] = tensor->get_pool_offset();
}
}
}
// Constants
for (auto& node : m_function->get_ordered_ops())
{
const auto c = dynamic_cast<ngraph::op::Constant*>(node.get());
if (c)
{
auto tv = node->get_outputs()[0].get_tensor_view();
tensor_data[tv->get_tensor().get_name()] = const_cast<void*>(c->get_data_ptr());
}
}
for (shared_ptr<Node> node : m_function->get_ordered_ops())
{
auto& n = *node; // Work around a compiler warning (*node inside typeid may have effects
// with shared pointers, which is fine here but clang doesn't like it.)
auto handler = build_dispatcher.find(type_index(typeid(n)));
if (handler == build_dispatcher.end())
{
throw ngraph_error("Unhandled op during code generation : " + node->description());
}
vector<TensorViewWrapper> in;
for (const descriptor::Input& input : node->get_inputs())
{
const descriptor::Output& output = input.get_output();
shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
in.push_back(TensorViewWrapper(tv, tv->get_tensor().get_name()));
}
vector<TensorViewWrapper> out;
for (const descriptor::Output& output : node->get_outputs())
{
shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
out.push_back(TensorViewWrapper(tv, tv->get_tensor().get_name()));
}
handler->second(this, node.get(), in, out);
}
executor = [&](CPURuntimeContext* ctx, vector<void*>& inputs, vector<void*>& outputs) {
for (auto& p : intermediates_offsets)
{
tensor_data[p.first] =
static_cast<uint8_t*>(ctx->memory_buffers[0]->get_ptr()) + p.second;
}
for (const auto& p : function_input_index)
{
tensor_data[p.first] = inputs[p.second];
}
for (const auto& p : function_output_index)
{
tensor_data[p.first] = outputs[p.second];
}
for (const auto& functor : functors)
{
functor(ctx);
}
};
m_is_built = true;
if (m_release_function)
{
release_function();
......@@ -1033,11 +1199,16 @@ using namespace ngraph::runtime;
shared_ptr<ngraph::runtime::cpu::CPU_CallFrame>
runtime::cpu::CPU_ExternalFunction::make_call_frame()
{
if (!m_is_compiled)
if (!m_is_compiled && !m_direct_execution)
{
compile();
}
if (!m_is_built && m_direct_execution)
{
build();
}
return make_shared<ngraph::runtime::cpu::CPU_CallFrame>(shared_from_this(),
m_compiled_function);
}
......
......@@ -17,6 +17,7 @@
#pragma once
#include <functional>
#include <list>
#include <map>
#include <memory>
#include <string>
......@@ -94,7 +95,19 @@ namespace ngraph
// Temporary Memory Pool alignment
static const size_t s_memory_pool_alignment;
std::list<std::function<void(CPURuntimeContext*)>>& get_functors()
{
return functors;
}
std::unordered_map<std::string, void*>& get_tensor_data() { return tensor_data; }
std::function<void(CPURuntimeContext*, std::vector<void*>&, std::vector<void*>&)>&
get_executor()
{
return executor;
}
bool is_direct_execution() const { return m_direct_execution; }
protected:
void build();
void compile();
private:
......@@ -126,6 +139,7 @@ namespace ngraph
std::unique_ptr<codegen::ExecutionEngine> m_execution_engine;
bool m_emit_timing;
bool m_use_tbb;
std::unordered_map<std::string, std::string> m_variable_name_map;
std::map<std::string, size_t> m_name_index_map;
......@@ -142,6 +156,15 @@ namespace ngraph
std::unique_ptr<MKLDNNEmitter> m_mkldnn_emitter;
std::string m_function_name;
std::list<std::function<void(CPURuntimeContext*)>> functors;
std::function<void(CPURuntimeContext*, std::vector<void*>&, std::vector<void*>&)>
executor;
std::unordered_map<std::string, void*> tensor_data;
std::unordered_map<std::string, size_t> intermediates_offsets;
std::unordered_map<std::string, size_t> function_input_index, function_output_index;
bool m_is_built;
bool m_direct_execution;
};
}
}
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void abs(void* input0, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
out.device(eigen::global_thread_pool_device) = in0.abs();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void add(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) = in0 + in1;
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void multiply(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) = in0 * in1;
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void result(const void* arg, void* out, size_t count)
{
memcpy(out, arg, sizeof(ElementType) * count);
}
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment