Commit bdee1ec5 authored by Robert Kimball's avatar Robert Kimball

update

parent e090c6aa
...@@ -52,14 +52,14 @@ runtime::gcpu::GCPUBackend::GCPUBackend(const vector<string>& unsupported_op_nam ...@@ -52,14 +52,14 @@ runtime::gcpu::GCPUBackend::GCPUBackend(const vector<string>& unsupported_op_nam
shared_ptr<runtime::Tensor> runtime::gcpu::GCPUBackend::create_tensor(const element::Type& type, shared_ptr<runtime::Tensor> runtime::gcpu::GCPUBackend::create_tensor(const element::Type& type,
const Shape& shape) const Shape& shape)
{ {
return make_shared<runtime::HostTensor>(type, shape, this); return make_shared<runtime::HostTensor>(type, shape);
} }
shared_ptr<runtime::Tensor> runtime::gcpu::GCPUBackend::create_tensor(const element::Type& type, shared_ptr<runtime::Tensor> runtime::gcpu::GCPUBackend::create_tensor(const element::Type& type,
const Shape& shape, const Shape& shape,
void* memory_pointer) void* memory_pointer)
{ {
return make_shared<runtime::HostTensor>(type, shape, memory_pointer, this); return make_shared<runtime::HostTensor>(type, shape, memory_pointer);
} }
shared_ptr<runtime::Executable> shared_ptr<runtime::Executable>
......
...@@ -15,17 +15,22 @@ ...@@ -15,17 +15,22 @@
//***************************************************************************** //*****************************************************************************
#include "ngraph/runtime/generic_cpu/gcpu_executable.hpp" #include "ngraph/runtime/generic_cpu/gcpu_executable.hpp"
#include "ngraph/cpio.hpp"
#include "ngraph/descriptor/layout/dense_tensor_layout.hpp" #include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
#include "ngraph/except.hpp" #include "ngraph/except.hpp"
#include "ngraph/op/convert.hpp" #include "ngraph/op/convert.hpp"
#include "ngraph/op/select.hpp" #include "ngraph/op/select.hpp"
#include "ngraph/op/util/binary_elementwise_comparison.hpp" #include "ngraph/op/util/binary_elementwise_comparison.hpp"
#include "ngraph/pass/assign_layout.hpp" #include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/core_fusion.hpp"
#include "ngraph/pass/fused_op_decomposition.hpp"
#include "ngraph/pass/implicit_broadcast_elimination.hpp"
#include "ngraph/pass/like_replacement.hpp" #include "ngraph/pass/like_replacement.hpp"
#include "ngraph/pass/liveness.hpp" #include "ngraph/pass/liveness.hpp"
#include "ngraph/pass/manager.hpp" #include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp" #include "ngraph/pass/memory_layout.hpp"
#include "ngraph/runtime/backend_manager.hpp" #include "ngraph/runtime/backend_manager.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
using namespace std; using namespace std;
...@@ -35,21 +40,35 @@ using descriptor::layout::DenseTensorLayout; ...@@ -35,21 +40,35 @@ using descriptor::layout::DenseTensorLayout;
runtime::gcpu::GCPUExecutable::GCPUExecutable(const shared_ptr<Function>& function, runtime::gcpu::GCPUExecutable::GCPUExecutable(const shared_ptr<Function>& function,
bool enable_performance_collection) bool enable_performance_collection)
: m_is_compiled{true}
, m_performance_counters_enabled{enable_performance_collection}
{ {
m_function = clone_function(*function);
pass::Manager pass_manager;
pass_manager.register_pass<pass::LikeReplacement>();
pass_manager.register_pass<pass::FusedOpDecomposition>();
pass_manager.register_pass<pass::ImplicitBroadcastElimination>();
pass_manager.register_pass<pass::AssignLayout<DenseTensorLayout>>();
pass_manager.register_pass<pass::Liveness>();
pass_manager.run_passes(m_function);
for (const shared_ptr<Node>& node : m_function->get_ordered_ops())
{ {
m_is_compiled = true; m_wrapped_nodes.emplace_back(node);
pass::Manager pass_manager; }
pass_manager.register_pass<pass::LikeReplacement>(); set_parameters_and_results(*m_function);
pass_manager.register_pass<pass::AssignLayout<DenseTensorLayout>>(); }
pass_manager.register_pass<pass::Liveness>();
pass_manager.run_passes(function);
for (const shared_ptr<Node>& node : function->get_ordered_ops()) runtime::gcpu::GCPUExecutable::GCPUExecutable(const std::string& model_string)
{ : m_is_compiled{true}
m_wrapped_nodes.emplace_back(node); , m_performance_counters_enabled{false}
} {
m_function = deserialize(model_string);
for (const shared_ptr<Node>& node : m_function->get_ordered_ops())
{
m_wrapped_nodes.emplace_back(node);
} }
set_parameters_and_results(*function); set_parameters_and_results(*m_function);
} }
bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs, bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
...@@ -82,7 +101,7 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor ...@@ -82,7 +101,7 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
{ {
for (size_t i = 0; i < param->get_output_size(); ++i) for (size_t i = 0; i < param->get_output_size(); ++i)
{ {
descriptor::Tensor* tensor = param->get_output_tensor_ptr(i).get(); descriptor::Tensor* tensor = &param->output(i).get_tensor();
tensor_map.insert({tensor, func_inputs[input_count++]}); tensor_map.insert({tensor, func_inputs[input_count++]});
} }
} }
...@@ -95,14 +114,14 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor ...@@ -95,14 +114,14 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
{ {
throw ngraph_error("One of function's outputs isn't op::Result"); throw ngraph_error("One of function's outputs isn't op::Result");
} }
descriptor::Tensor* tensor = output->get_output_tensor_ptr(0).get(); descriptor::Tensor* tensor = &output->output(0).get_tensor();
tensor_map.insert({tensor, func_outputs[output_count]}); tensor_map.insert({tensor, func_outputs[output_count]});
} }
// for each ordered op in the graph // for each ordered op in the graph
for (const NodeWrapper& wrapped : m_wrapped_nodes) for (const NodeWrapper& wrapped : m_wrapped_nodes)
{ {
const Node* op = &wrapped.get_node(); auto op = wrapped.get_node();
auto type_id = wrapped.get_typeid(); auto type_id = wrapped.get_typeid();
if (type_id == OP_TYPEID::Parameter) if (type_id == OP_TYPEID::Parameter)
{ {
...@@ -111,9 +130,9 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor ...@@ -111,9 +130,9 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
// get op inputs from map // get op inputs from map
vector<shared_ptr<HostTensor>> op_inputs; vector<shared_ptr<HostTensor>> op_inputs;
for (const descriptor::Input& input : op->get_inputs()) for (auto input : op->inputs())
{ {
descriptor::Tensor* tensor = input.get_output().get_tensor_ptr().get(); descriptor::Tensor* tensor = &input.get_tensor();
op_inputs.push_back(tensor_map.at(tensor)); op_inputs.push_back(tensor_map.at(tensor));
} }
...@@ -121,14 +140,14 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor ...@@ -121,14 +140,14 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
vector<shared_ptr<HostTensor>> op_outputs; vector<shared_ptr<HostTensor>> op_outputs;
for (size_t i = 0; i < op->get_output_size(); ++i) for (size_t i = 0; i < op->get_output_size(); ++i)
{ {
descriptor::Tensor* tensor = op->get_output_tensor_ptr(i).get(); descriptor::Tensor* tensor = &op->output(i).get_tensor();
shared_ptr<HostTensor> host_tensor; shared_ptr<HostTensor> host_tensor;
auto it = tensor_map.find(tensor); auto it = tensor_map.find(tensor);
if (it == tensor_map.end()) if (it == tensor_map.end())
{ {
const Shape& shape = op->get_output_shape(i); const Shape& shape = op->get_output_shape(i);
const element::Type& type = op->get_output_element_type(i); const element::Type& type = op->get_output_element_type(i);
string name = op->get_output_tensor(i).get_name(); string name = op->output(i).get_tensor().get_name();
host_tensor = make_shared<runtime::HostTensor>(type, shape, name); host_tensor = make_shared<runtime::HostTensor>(type, shape, name);
tensor_map.insert({tensor, host_tensor}); tensor_map.insert({tensor, host_tensor});
} }
...@@ -177,7 +196,7 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor ...@@ -177,7 +196,7 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
} }
if (m_nan_check_enabled) if (m_nan_check_enabled)
{ {
perform_nan_check(op_outputs, op); perform_nan_check(op_outputs, op.get());
} }
} }
...@@ -186,19 +205,9 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor ...@@ -186,19 +205,9 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
void runtime::gcpu::GCPUExecutable::generate_calls(const element::Type& type, void runtime::gcpu::GCPUExecutable::generate_calls(const element::Type& type,
const NodeWrapper& op, const NodeWrapper& op,
const vector<shared_ptr<HostTensor>>& outputs, const vector<shared_ptr<HostTensor>>& out,
const vector<shared_ptr<HostTensor>>& inputs) const vector<shared_ptr<HostTensor>>& in)
{ {
vector<void*> out;
vector<const void*> in;
for (auto t : outputs)
{
out.push_back(t->get_data_ptr());
}
for (auto t : inputs)
{
in.push_back(t->get_data_ptr());
}
stringstream ss; stringstream ss;
switch (type.get_type_enum()) switch (type.get_type_enum())
{ {
...@@ -216,7 +225,8 @@ void runtime::gcpu::GCPUExecutable::generate_calls(const element::Type& type, ...@@ -216,7 +225,8 @@ void runtime::gcpu::GCPUExecutable::generate_calls(const element::Type& type,
case element::Type_t::undefined: case element::Type_t::undefined:
case element::Type_t::dynamic: case element::Type_t::dynamic:
case element::Type_t::bf16: case element::Type_t::bf16:
ss << "unsupported element type " << type << " op " << op.get_node().get_name(); case element::Type_t::f16:
ss << "unsupported element type " << type << " op " << op.get_node()->get_name();
throw ngraph_error(ss.str()); throw ngraph_error(ss.str());
} }
} }
...@@ -229,11 +239,9 @@ void runtime::gcpu::GCPUExecutable::set_nan_check(bool enable) ...@@ -229,11 +239,9 @@ void runtime::gcpu::GCPUExecutable::set_nan_check(bool enable)
vector<runtime::PerformanceCounter> runtime::gcpu::GCPUExecutable::get_performance_data() const vector<runtime::PerformanceCounter> runtime::gcpu::GCPUExecutable::get_performance_data() const
{ {
vector<runtime::PerformanceCounter> rc; vector<runtime::PerformanceCounter> rc;
for (const pair<const Node*, stopwatch> p : m_timer_map) for (const pair<shared_ptr<const Node>, stopwatch> p : m_timer_map)
{ {
rc.emplace_back(p.first->get_name().c_str(), rc.emplace_back(p.first, p.second.get_total_microseconds(), p.second.get_call_count());
p.second.get_total_microseconds(),
p.second.get_call_count());
} }
return rc; return rc;
} }
...@@ -286,3 +294,12 @@ void runtime::gcpu::GCPUExecutable::perform_nan_check(const vector<shared_ptr<Ho ...@@ -286,3 +294,12 @@ void runtime::gcpu::GCPUExecutable::perform_nan_check(const vector<shared_ptr<Ho
arg_number++; arg_number++;
} }
} }
void runtime::gcpu::GCPUExecutable::save(ostream& out)
{
cpio::Writer writer(out);
string si = "INTERPRETER Save File 1.0";
writer.write("save_info", si.data(), si.size());
string model = serialize(m_function, 0);
writer.write("model", model.data(), model.size());
}
...@@ -51,7 +51,7 @@ class ngraph::runtime::gcpu::NodeWrapper ...@@ -51,7 +51,7 @@ class ngraph::runtime::gcpu::NodeWrapper
public: public:
NodeWrapper(const std::shared_ptr<const ngraph::Node>& node); NodeWrapper(const std::shared_ptr<const ngraph::Node>& node);
const Node& get_node() const { return *m_node; } std::shared_ptr<const Node> get_node() const { return m_node; }
ngraph::runtime::gcpu::OP_TYPEID get_typeid() const { return m_typeid; } ngraph::runtime::gcpu::OP_TYPEID get_typeid() const { return m_typeid; }
private: private:
std::shared_ptr<const ngraph::Node> m_node; std::shared_ptr<const ngraph::Node> m_node;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment