Unverified Commit 60252edd authored by Scott Cyphers's avatar Scott Cyphers Committed by GitHub

Merge branch 'master' into ayzhuang/batch_norm_infer_relu_fusion

parents 341205cf 47342339
...@@ -18,10 +18,10 @@ include(ExternalProject) ...@@ -18,10 +18,10 @@ include(ExternalProject)
# Includes blas 3.8.0 in mkldnn # Includes blas 3.8.0 in mkldnn
set(NGRAPH_MKLDNN_SHORT_VERSION 0) set(NGRAPH_MKLDNN_SHORT_VERSION 0)
set(NGRAPH_MKLDNN_FULL_VERSION 0.19.0.0) set(NGRAPH_MKLDNN_FULL_VERSION 0.20.0.0)
set(NGRAPH_MKLDNN_VERSION "v0.19") set(NGRAPH_MKLDNN_VERSION "v0.20")
set(NGRAPH_MKLDNN_SUB_VERSION "2019.0.5.20190502") set(NGRAPH_MKLDNN_SUB_VERSION "2019.0.5.20190502")
set(NGRAPH_MKLDNN_GIT_TAG "027de76") set(NGRAPH_MKLDNN_GIT_TAG "v0.20")
#------------------------------------------------------------------------------ #------------------------------------------------------------------------------
# Fetch and install MKL-DNN # Fetch and install MKL-DNN
......
...@@ -28,16 +28,3 @@ index f10feb20..05f47961 100644 ...@@ -28,16 +28,3 @@ index f10feb20..05f47961 100644
set_property(TARGET ${LIB_NAME} PROPERTY PUBLIC_HEADER ${HEADERS}) set_property(TARGET ${LIB_NAME} PROPERTY PUBLIC_HEADER ${HEADERS})
target_include_directories(${LIB_NAME} PUBLIC target_include_directories(${LIB_NAME} PUBLIC
diff --git a/src/cpu/jit_avx512_common_conv_kernel.cpp b/src/cpu/jit_avx512_common_conv_kernel.cpp
index 1bb98fa43..b8b54401f 100644
--- a/src/cpu/jit_avx512_common_conv_kernel.cpp
+++ b/src/cpu/jit_avx512_common_conv_kernel.cpp
@@ -3055,7 +3055,7 @@ void jit_avx512_common_conv_bwd_weights_kernel_f32::bias_kernel_3d() {
void jit_avx512_common_conv_bwd_weights_kernel_f32
::compute_oh_loop_common()
{
- assert(jcp.harness == harness_mb_reduction);
+ assert(one_of(jcp.harness, harness_mb_reduction, harness_3d_reduction));
int b_pad = jcp.b_pad;
int t_pad = jcp.t_pad;
bool is_dilated = jcp.dilate_h != 0;
#!/usr/bin/env python3 #!/usr/bin/env python3
# -*- coding: utf-8 -*- # -*- coding: utf-8 -*-
# #
...@@ -73,11 +71,11 @@ author = 'Intel Corporation' ...@@ -73,11 +71,11 @@ author = 'Intel Corporation'
# built documents. # built documents.
# #
# The short X.Y version. # The short X.Y version.
version = '0.22' version = '0.23'
# The Documentation full version, including alpha/beta/rc tags. Some features # The Documentation full version, including alpha/beta/rc tags. Some features
# available in the latest code will not necessarily be documented first # available in the latest code will not necessarily be documented first
release = '0.22.0' release = '0.23.0'
# The language for content autogenerated by Sphinx. Refer to documentation # The language for content autogenerated by Sphinx. Refer to documentation
# for a list of supported languages. # for a list of supported languages.
......
...@@ -9,11 +9,11 @@ ...@@ -9,11 +9,11 @@
<dt>{{ _('Recent Versions') }}</dt> <dt>{{ _('Recent Versions') }}</dt>
<dd><!-- Until our https://docs.ngraph.ai/ publishing is set up, we link to GitHub --> <dd><!-- Until our https://docs.ngraph.ai/ publishing is set up, we link to GitHub -->
<ul> <ul>
<li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.22.0">0.22</a></li> <li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.23.0">0.23.0</a></li>
<li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.22.0">0.22.0</a></li>
<li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.21.0">0.21.0</a></li> <li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.21.0">0.21.0</a></li>
<li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.20.0">0.20.0</a></li> <li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.20.0">0.20.0</a></li>
<li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.19.0">0.19.0</a></li> <li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.19.0">0.19.0</a></li>
<li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.18.1">0.18.1</a></li>
</ul></dd> </ul></dd>
</dl> </dl>
<dl> <dl>
...@@ -26,4 +26,4 @@ ...@@ -26,4 +26,4 @@
</dd> </dd>
</dl> </dl>
</div> </div>
</div> </div>
\ No newline at end of file
...@@ -6,28 +6,30 @@ Release Notes ...@@ -6,28 +6,30 @@ Release Notes
nGraph is provided as source code, APIs, build scripts, and some binary formats nGraph is provided as source code, APIs, build scripts, and some binary formats
for various Compiler stack configurations and use cases. for various Compiler stack configurations and use cases.
For downloads formatted as ``.zip`` and ``tar.gz``, see
https://github.com/NervanaSystems/ngraph/releases.
This page includes additional documentation updates. This page includes additional documentation updates.
We are pleased to announce the release of version |version|-doc. We are pleased to announce the release of version |version|-doc.
==============================
Core updates for |version| Core updates for |version|
~~~~~~~~~~~~~~~~~~~~~~~~~~~ ~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ PlaidML support
+ More ONNX ops + More ONNX ops
+ Optimizations + Elementwise divide defaults to Python semantics
+ Don't reseed RNG on each use + GenerateMask seed optional
0.22-doc
--------
+ Initial doc and API for IntelGPU backend. Latest doc updates
+ DynamicBackend API. ~~~~~~~~~~~~~~~~~~
+ Note deprecation of support of MXNet's ``ngraph-mxnet`` PyPI.
+ Noted changes on graph inspection options resultant from PR 3016. + Document new debug tool
+ Added better tips and details to doc-contributor-README. + Note deprecation of MXNet's ``ngraph-mxnet`` PyPI
+ Note default change to `svg` files for graphs and visualization
+ Add more prominent tips for contributors who find the doc-contributor-README
.. important:: Pre-releases (``-rc-0.*``) have newer features, and are less stable. .. important:: Pre-releases (``-rc-0.*``) have newer features, and are less stable.
...@@ -36,8 +38,15 @@ Core updates for |version| ...@@ -36,8 +38,15 @@ Core updates for |version|
Changelog on Previous Releases Changelog on Previous Releases
============================== ==============================
For downloads formatted as ``.zip`` and ``tar.gz``, see 0.22
https://github.com/NervanaSystems/ngraph/releases. ----
+ More ONNX ops
+ Optimizations
+ Don't reseed RNG on each use
+ Initial doc and API for IntelGPU backend
+ DynamicBackend API
0.21 0.21
---- ----
...@@ -51,12 +60,6 @@ https://github.com/NervanaSystems/ngraph/releases. ...@@ -51,12 +60,6 @@ https://github.com/NervanaSystems/ngraph/releases.
+ offset arg for tensor creation is deprecated + offset arg for tensor creation is deprecated
+ static linking support + static linking support
+ Initial test of 0.21-doc + Initial test of 0.21-doc
0.21-doc
--------
Summary of documentation-related changes:
+ Updated :doc:`doc-contributor-README` for new community-based contributions. + Updated :doc:`doc-contributor-README` for new community-based contributions.
+ Added instructions on how to test or display the installed nGraph version. + Added instructions on how to test or display the installed nGraph version.
+ Added instructions on building nGraph bridge (ngraph-bridge). + Added instructions on building nGraph bridge (ngraph-bridge).
...@@ -82,8 +85,6 @@ Summary of documentation-related changes: ...@@ -82,8 +85,6 @@ Summary of documentation-related changes:
0.19 0.19
---- ----
**Download** `0.19.0-rc.2`_
+ More dynamic shape preparation + More dynamic shape preparation
+ Distributed interface factored out + Distributed interface factored out
+ fp16 and bfloat16 types + fp16 and bfloat16 types
...@@ -103,9 +104,6 @@ Summary of documentation-related changes: ...@@ -103,9 +104,6 @@ Summary of documentation-related changes:
0.18 0.18
---- ----
**Download** `0.18.1`_
+ Python formatting issue + Python formatting issue
+ mkl-dnn work-around + mkl-dnn work-around
+ Event tracing improvements + Event tracing improvements
...@@ -118,8 +116,6 @@ Summary of documentation-related changes: ...@@ -118,8 +116,6 @@ Summary of documentation-related changes:
0.17 0.17
---- ----
**Download** `0.17.0-rc.1`_
+ Allow negative padding in more places + Allow negative padding in more places
+ Add code generation for some quantized ops + Add code generation for some quantized ops
+ Preliminary dynamic shape support + Preliminary dynamic shape support
...@@ -131,11 +127,6 @@ Summary of documentation-related changes: ...@@ -131,11 +127,6 @@ Summary of documentation-related changes:
0.16 0.16
---- ----
* **Download**: `0.16.0-rc.3`_
* **Download** `0.16.0-rc.2`_
* **Download** `0.16.0-rc.1`_
+ NodeInput and NodeOutput classes prepare for simplifications of Node + NodeInput and NodeOutput classes prepare for simplifications of Node
+ Test improvements + Test improvements
+ Additional quantization ops + Additional quantization ops
...@@ -143,11 +134,3 @@ Summary of documentation-related changes: ...@@ -143,11 +134,3 @@ Summary of documentation-related changes:
+ Fix memory leak + Fix memory leak
+ Concat optimization + Concat optimization
+ Doc updates + Doc updates
.. _0.20.0-rc.0: https://github.com/NervanaSystems/ngraph/releases/tag/v0.20.0-rc.0_
.. _0.19.0-rc.2: https://github.com/NervanaSystems/ngraph/releases/tag/v0.19.0-rc.2_
.. _0.18.1: https://github.com/NervanaSystems/ngraph/releases/tag/v0.18.1_
.. _0.17.0-rc.1: `https://github.com/NervanaSystems/ngraph/releases/tag/v0.17.0-rc.1
.. _0.16.0-rc.3: https://github.com/NervanaSystems/ngraph/releases/tag/v0.16.0-rc.3
.. _0.16.0-rc.2: https://github.com/NervanaSystems/ngraph/releases/tag/v0.16.0-rc.2
.. _0.16.0-rc.1: https://github.com/NervanaSystems/ngraph/releases/tag/v0.16.0-rc.1
pytest pytest
tox tox
pydocstyle==3.0.0
flake8 flake8
flake8-commas flake8-commas
flake8-comprehensions flake8-comprehensions
......
...@@ -370,7 +370,6 @@ set (SRC ...@@ -370,7 +370,6 @@ set (SRC
op/util/index_reduction.hpp op/util/index_reduction.hpp
op/util/logical_reduction.cpp op/util/logical_reduction.cpp
op/util/logical_reduction.hpp op/util/logical_reduction.hpp
op/util/reshape.hpp
op/util/rnn_cell_base.cpp op/util/rnn_cell_base.cpp
op/util/rnn_cell_base.hpp op/util/rnn_cell_base.hpp
op/util/unary_elementwise_arithmetic.cpp op/util/unary_elementwise_arithmetic.cpp
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <cstddef>
#include <memory>
#include <vector>
#include "ngraph/builder/reshape.hpp"
#include "ngraph/node.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace op
{
namespace util
{
/// \brief Change shape of input tensor.
///
/// \param[in] node The node producing the tensor to be reshaped.
/// \param[in] shape The new shape for input tensor.
///
/// \return The node representing a Reshape operation.
///
std::shared_ptr<ngraph::Node> reshape(const std::shared_ptr<ngraph::Node>& node,
const Shape& shape)
{
return builder::reshape(node, shape);
}
/// \brief Permute axes according to specified axes_order parameter.
///
/// \param node The node which axes we want to permute.
/// \param axes_order The permutation of node tensor axes.
///
/// \return: New node with permuted axes.
std::shared_ptr<ngraph::Node> reorder_axes(const std::shared_ptr<ngraph::Node>& node,
std::vector<std::size_t> axes_order)
{
return builder::reorder_axes(node, axes_order);
}
/// \brief Return transposed tensor (with axes in reversed order).
///
/// \param node Input tensor we want to transpose
///
/// \return: New node with reversed dimensions.
std::shared_ptr<ngraph::Node> transpose(const std::shared_ptr<ngraph::Node>& node)
{
return builder::transpose(node);
}
/// \brief Flatten the input tensor into a 2D matrix.
///
/// \param node The tensor to be flattened.
/// \param axis The axis dividing shape.
///
/// \return The new node will be a 2D matrix representing the flattened input node.
std::shared_ptr<ngraph::Node> flatten(const std::shared_ptr<ngraph::Node>& node,
int axis)
{
return builder::flatten(node, axis);
}
} // namespace util
} // namespace op
} // namespace ngraph
...@@ -49,7 +49,8 @@ public: ...@@ -49,7 +49,8 @@ public:
} }
}; };
std::unique_ptr<ngraph::runtime::Allocator> ngraph::runtime::create_default_allocator() ngraph::runtime::Allocator* ngraph::runtime::get_default_allocator()
{ {
return std::unique_ptr<DefaultAllocator>(new DefaultAllocator()); static std::unique_ptr<DefaultAllocator> allocator(new DefaultAllocator());
return allocator.get();
} }
...@@ -30,7 +30,7 @@ namespace ngraph ...@@ -30,7 +30,7 @@ namespace ngraph
class DefaultAllocator; class DefaultAllocator;
/// \brief Create a default allocator that calls into system /// \brief Create a default allocator that calls into system
/// allocation libraries /// allocation libraries
std::unique_ptr<Allocator> create_default_allocator(); ngraph::runtime::Allocator* get_default_allocator();
} }
} }
......
...@@ -185,7 +185,7 @@ runtime::Allocator* runtime::cpu::CPU_Backend::get_host_memory_allocator() ...@@ -185,7 +185,7 @@ runtime::Allocator* runtime::cpu::CPU_Backend::get_host_memory_allocator()
{ {
if (!m_allocator) if (!m_allocator)
{ {
m_allocator = create_default_allocator(); return runtime::get_default_allocator();
} }
return m_allocator.get(); return m_allocator.get();
} }
......
...@@ -15,10 +15,10 @@ ...@@ -15,10 +15,10 @@
# ****************************************************************************** # ******************************************************************************
if (NGRAPH_GENERIC_CPU_ENABLE) if (NGRAPH_GENERIC_CPU_ENABLE)
find_package(OpenMP) # find_package(OpenMP)
if (OPENMP_FOUND) # if (OPENMP_FOUND)
add_compile_options(${OpenMP_CXX_FLAGS}) # add_compile_options(${OpenMP_CXX_FLAGS})
endif() # endif()
add_library(gcpu_backend SHARED gcpu_backend.cpp gcpu_executable.cpp node_wrapper.cpp) add_library(gcpu_backend SHARED gcpu_backend.cpp gcpu_executable.cpp node_wrapper.cpp)
if(NGRAPH_LIB_VERSIONING_ENABLE) if(NGRAPH_LIB_VERSIONING_ENABLE)
set_target_properties(gcpu_backend PROPERTIES set_target_properties(gcpu_backend PROPERTIES
......
...@@ -52,14 +52,14 @@ runtime::gcpu::GCPUBackend::GCPUBackend(const vector<string>& unsupported_op_nam ...@@ -52,14 +52,14 @@ runtime::gcpu::GCPUBackend::GCPUBackend(const vector<string>& unsupported_op_nam
shared_ptr<runtime::Tensor> runtime::gcpu::GCPUBackend::create_tensor(const element::Type& type, shared_ptr<runtime::Tensor> runtime::gcpu::GCPUBackend::create_tensor(const element::Type& type,
const Shape& shape) const Shape& shape)
{ {
return make_shared<runtime::HostTensor>(type, shape, this); return make_shared<runtime::HostTensor>(type, shape);
} }
shared_ptr<runtime::Tensor> runtime::gcpu::GCPUBackend::create_tensor(const element::Type& type, shared_ptr<runtime::Tensor> runtime::gcpu::GCPUBackend::create_tensor(const element::Type& type,
const Shape& shape, const Shape& shape,
void* memory_pointer) void* memory_pointer)
{ {
return make_shared<runtime::HostTensor>(type, shape, memory_pointer, this); return make_shared<runtime::HostTensor>(type, shape, memory_pointer);
} }
shared_ptr<runtime::Executable> shared_ptr<runtime::Executable>
......
...@@ -15,17 +15,22 @@ ...@@ -15,17 +15,22 @@
//***************************************************************************** //*****************************************************************************
#include "ngraph/runtime/generic_cpu/gcpu_executable.hpp" #include "ngraph/runtime/generic_cpu/gcpu_executable.hpp"
#include "ngraph/cpio.hpp"
#include "ngraph/descriptor/layout/dense_tensor_layout.hpp" #include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
#include "ngraph/except.hpp" #include "ngraph/except.hpp"
#include "ngraph/op/convert.hpp" #include "ngraph/op/convert.hpp"
#include "ngraph/op/select.hpp" #include "ngraph/op/select.hpp"
#include "ngraph/op/util/binary_elementwise_comparison.hpp" #include "ngraph/op/util/binary_elementwise_comparison.hpp"
#include "ngraph/pass/assign_layout.hpp" #include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/core_fusion.hpp"
#include "ngraph/pass/fused_op_decomposition.hpp"
#include "ngraph/pass/implicit_broadcast_elimination.hpp"
#include "ngraph/pass/like_replacement.hpp" #include "ngraph/pass/like_replacement.hpp"
#include "ngraph/pass/liveness.hpp" #include "ngraph/pass/liveness.hpp"
#include "ngraph/pass/manager.hpp" #include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp" #include "ngraph/pass/memory_layout.hpp"
#include "ngraph/runtime/backend_manager.hpp" #include "ngraph/runtime/backend_manager.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
using namespace std; using namespace std;
...@@ -35,21 +40,35 @@ using descriptor::layout::DenseTensorLayout; ...@@ -35,21 +40,35 @@ using descriptor::layout::DenseTensorLayout;
runtime::gcpu::GCPUExecutable::GCPUExecutable(const shared_ptr<Function>& function, runtime::gcpu::GCPUExecutable::GCPUExecutable(const shared_ptr<Function>& function,
bool enable_performance_collection) bool enable_performance_collection)
: m_is_compiled{true}
, m_performance_counters_enabled{enable_performance_collection}
{ {
m_function = clone_function(*function);
pass::Manager pass_manager;
pass_manager.register_pass<pass::LikeReplacement>();
pass_manager.register_pass<pass::FusedOpDecomposition>();
pass_manager.register_pass<pass::ImplicitBroadcastElimination>();
pass_manager.register_pass<pass::AssignLayout<DenseTensorLayout>>();
pass_manager.register_pass<pass::Liveness>();
pass_manager.run_passes(m_function);
for (const shared_ptr<Node>& node : m_function->get_ordered_ops())
{ {
m_is_compiled = true; m_wrapped_nodes.emplace_back(node);
pass::Manager pass_manager; }
pass_manager.register_pass<pass::LikeReplacement>(); set_parameters_and_results(*m_function);
pass_manager.register_pass<pass::AssignLayout<DenseTensorLayout>>(); }
pass_manager.register_pass<pass::Liveness>();
pass_manager.run_passes(function);
for (const shared_ptr<Node>& node : function->get_ordered_ops()) runtime::gcpu::GCPUExecutable::GCPUExecutable(const std::string& model_string)
{ : m_is_compiled{true}
m_wrapped_nodes.emplace_back(node); , m_performance_counters_enabled{false}
} {
m_function = deserialize(model_string);
for (const shared_ptr<Node>& node : m_function->get_ordered_ops())
{
m_wrapped_nodes.emplace_back(node);
} }
set_parameters_and_results(*function); set_parameters_and_results(*m_function);
} }
bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs, bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor>>& outputs,
...@@ -82,7 +101,7 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor ...@@ -82,7 +101,7 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
{ {
for (size_t i = 0; i < param->get_output_size(); ++i) for (size_t i = 0; i < param->get_output_size(); ++i)
{ {
descriptor::Tensor* tensor = param->get_output_tensor_ptr(i).get(); descriptor::Tensor* tensor = &param->output(i).get_tensor();
tensor_map.insert({tensor, func_inputs[input_count++]}); tensor_map.insert({tensor, func_inputs[input_count++]});
} }
} }
...@@ -95,14 +114,14 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor ...@@ -95,14 +114,14 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
{ {
throw ngraph_error("One of function's outputs isn't op::Result"); throw ngraph_error("One of function's outputs isn't op::Result");
} }
descriptor::Tensor* tensor = output->get_output_tensor_ptr(0).get(); descriptor::Tensor* tensor = &output->output(0).get_tensor();
tensor_map.insert({tensor, func_outputs[output_count]}); tensor_map.insert({tensor, func_outputs[output_count]});
} }
// for each ordered op in the graph // for each ordered op in the graph
for (const NodeWrapper& wrapped : m_wrapped_nodes) for (const NodeWrapper& wrapped : m_wrapped_nodes)
{ {
const Node* op = &wrapped.get_node(); auto op = wrapped.get_node();
auto type_id = wrapped.get_typeid(); auto type_id = wrapped.get_typeid();
if (type_id == OP_TYPEID::Parameter) if (type_id == OP_TYPEID::Parameter)
{ {
...@@ -111,9 +130,9 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor ...@@ -111,9 +130,9 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
// get op inputs from map // get op inputs from map
vector<shared_ptr<HostTensor>> op_inputs; vector<shared_ptr<HostTensor>> op_inputs;
for (const descriptor::Input& input : op->get_inputs()) for (auto input : op->inputs())
{ {
descriptor::Tensor* tensor = input.get_output().get_tensor_ptr().get(); descriptor::Tensor* tensor = &input.get_tensor();
op_inputs.push_back(tensor_map.at(tensor)); op_inputs.push_back(tensor_map.at(tensor));
} }
...@@ -121,14 +140,14 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor ...@@ -121,14 +140,14 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
vector<shared_ptr<HostTensor>> op_outputs; vector<shared_ptr<HostTensor>> op_outputs;
for (size_t i = 0; i < op->get_output_size(); ++i) for (size_t i = 0; i < op->get_output_size(); ++i)
{ {
descriptor::Tensor* tensor = op->get_output_tensor_ptr(i).get(); descriptor::Tensor* tensor = &op->output(i).get_tensor();
shared_ptr<HostTensor> host_tensor; shared_ptr<HostTensor> host_tensor;
auto it = tensor_map.find(tensor); auto it = tensor_map.find(tensor);
if (it == tensor_map.end()) if (it == tensor_map.end())
{ {
const Shape& shape = op->get_output_shape(i); const Shape& shape = op->get_output_shape(i);
const element::Type& type = op->get_output_element_type(i); const element::Type& type = op->get_output_element_type(i);
string name = op->get_output_tensor(i).get_name(); string name = op->output(i).get_tensor().get_name();
host_tensor = make_shared<runtime::HostTensor>(type, shape, name); host_tensor = make_shared<runtime::HostTensor>(type, shape, name);
tensor_map.insert({tensor, host_tensor}); tensor_map.insert({tensor, host_tensor});
} }
...@@ -177,7 +196,7 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor ...@@ -177,7 +196,7 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
} }
if (m_nan_check_enabled) if (m_nan_check_enabled)
{ {
perform_nan_check(op_outputs, op); perform_nan_check(op_outputs, op.get());
} }
} }
...@@ -186,19 +205,9 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor ...@@ -186,19 +205,9 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
void runtime::gcpu::GCPUExecutable::generate_calls(const element::Type& type, void runtime::gcpu::GCPUExecutable::generate_calls(const element::Type& type,
const NodeWrapper& op, const NodeWrapper& op,
const vector<shared_ptr<HostTensor>>& outputs, const vector<shared_ptr<HostTensor>>& out,
const vector<shared_ptr<HostTensor>>& inputs) const vector<shared_ptr<HostTensor>>& in)
{ {
vector<void*> out;
vector<const void*> in;
for (auto t : outputs)
{
out.push_back(t->get_data_ptr());
}
for (auto t : inputs)
{
in.push_back(t->get_data_ptr());
}
stringstream ss; stringstream ss;
switch (type.get_type_enum()) switch (type.get_type_enum())
{ {
...@@ -216,7 +225,8 @@ void runtime::gcpu::GCPUExecutable::generate_calls(const element::Type& type, ...@@ -216,7 +225,8 @@ void runtime::gcpu::GCPUExecutable::generate_calls(const element::Type& type,
case element::Type_t::undefined: case element::Type_t::undefined:
case element::Type_t::dynamic: case element::Type_t::dynamic:
case element::Type_t::bf16: case element::Type_t::bf16:
ss << "unsupported element type " << type << " op " << op.get_node().get_name(); case element::Type_t::f16:
ss << "unsupported element type " << type << " op " << op.get_node()->get_name();
throw ngraph_error(ss.str()); throw ngraph_error(ss.str());
} }
} }
...@@ -229,11 +239,9 @@ void runtime::gcpu::GCPUExecutable::set_nan_check(bool enable) ...@@ -229,11 +239,9 @@ void runtime::gcpu::GCPUExecutable::set_nan_check(bool enable)
vector<runtime::PerformanceCounter> runtime::gcpu::GCPUExecutable::get_performance_data() const vector<runtime::PerformanceCounter> runtime::gcpu::GCPUExecutable::get_performance_data() const
{ {
vector<runtime::PerformanceCounter> rc; vector<runtime::PerformanceCounter> rc;
for (const pair<const Node*, stopwatch> p : m_timer_map) for (const pair<shared_ptr<const Node>, stopwatch> p : m_timer_map)
{ {
rc.emplace_back(p.first->get_name().c_str(), rc.emplace_back(p.first, p.second.get_total_microseconds(), p.second.get_call_count());
p.second.get_total_microseconds(),
p.second.get_call_count());
} }
return rc; return rc;
} }
...@@ -286,3 +294,12 @@ void runtime::gcpu::GCPUExecutable::perform_nan_check(const vector<shared_ptr<Ho ...@@ -286,3 +294,12 @@ void runtime::gcpu::GCPUExecutable::perform_nan_check(const vector<shared_ptr<Ho
arg_number++; arg_number++;
} }
} }
void runtime::gcpu::GCPUExecutable::save(ostream& out)
{
cpio::Writer writer(out);
string si = "INTERPRETER Save File 1.0";
writer.write("save_info", si.data(), si.size());
string model = serialize(m_function, 0);
writer.write("model", model.data(), model.size());
}
...@@ -140,6 +140,91 @@ namespace ngraph ...@@ -140,6 +140,91 @@ namespace ngraph
} }
} }
template <typename T>
void broadcast_5d(const T* in,
T* out,
const Shape& in_shape,
const Shape& out_shape,
const AxisSet& broadcast_axes)
{
size_t index[5];
size_t* out_index = 0;
for (size_t i = 0; i < 5; i++)
{
if (broadcast_axes.count(i) == 0)
{
out_index = &index[i];
break;
}
}
for (index[0] = 0; index[0] < out_shape[0]; ++index[0])
{
for (index[1] = 0; index[1] < out_shape[1]; ++index[1])
{
for (index[2] = 0; index[2] < out_shape[2]; ++index[2])
{
for (index[3] = 0; index[3] < out_shape[3]; ++index[3])
{
for (index[4] = 0; index[4] < out_shape[4]; ++index[4])
{
out[index[0] * out_shape[1] * out_shape[2] * out_shape[3] *
out_shape[4] +
index[1] * out_shape[2] * out_shape[3] * out_shape[4] +
index[2] * out_shape[3] * out_shape[4] +
index[3] * out_shape[4] + index[4]] = in[*out_index];
}
}
}
}
}
}
template <typename T>
void broadcast_6d(const T* in,
T* out,
const Shape& in_shape,
const Shape& out_shape,
const AxisSet& broadcast_axes)
{
size_t index[6];
size_t* out_index = 0;
for (size_t i = 0; i < 6; i++)
{
if (broadcast_axes.count(i) == 0)
{
out_index = &index[i];
break;
}
}
for (index[0] = 0; index[0] < out_shape[0]; ++index[0])
{
for (index[1] = 0; index[1] < out_shape[1]; ++index[1])
{
for (index[2] = 0; index[2] < out_shape[2]; ++index[2])
{
for (index[3] = 0; index[3] < out_shape[3]; ++index[3])
{
for (index[4] = 0; index[4] < out_shape[4]; ++index[4])
{
for (index[5] = 0; index[5] < out_shape[5]; ++index[5])
{
out[index[0] * out_shape[1] * out_shape[2] *
out_shape[3] * out_shape[4] * out_shape[5] +
index[1] * out_shape[2] * out_shape[3] *
out_shape[4] * out_shape[5] +
index[2] * out_shape[3] * out_shape[4] *
out_shape[5] +
index[3] * out_shape[4] * out_shape[5] +
index[4] * out_shape[5] + index[5]] =
in[*out_index];
}
}
}
}
}
}
}
template <typename T> template <typename T>
void broadcast(const T* in, void broadcast(const T* in,
T* out, T* out,
...@@ -167,6 +252,16 @@ namespace ngraph ...@@ -167,6 +252,16 @@ namespace ngraph
case 4: case 4:
broadcast_4d<T>(in, out, in_shape, out_shape, broadcast_axes); broadcast_4d<T>(in, out, in_shape, out_shape, broadcast_axes);
break; break;
case 5:
broadcast_5d<T>(in, out, in_shape, out_shape, broadcast_axes);
break;
case 6:
broadcast_6d<T>(in, out, in_shape, out_shape, broadcast_axes);
break;
default:
runtime::reference::broadcast<T>(
in, out, in_shape, out_shape, broadcast_axes);
break;
} }
} }
else else
......
...@@ -244,10 +244,7 @@ namespace ngraph ...@@ -244,10 +244,7 @@ namespace ngraph
case 4: reshape_in4<T>(in, out, in_shape, in_axis_order, out_shape); break; case 4: reshape_in4<T>(in, out, in_shape, in_axis_order, out_shape); break;
case 5: reshape_in5<T>(in, out, in_shape, in_axis_order, out_shape); break; case 5: reshape_in5<T>(in, out, in_shape, in_axis_order, out_shape); break;
case 6: reshape_in6<T>(in, out, in_shape, in_axis_order, out_shape); break; case 6: reshape_in6<T>(in, out, in_shape, in_axis_order, out_shape); break;
default: default: reference::reshape(in, out, in_shape, in_axis_order, out_shape); break;
NGRAPH_INFO << "reference::reshape";
reference::reshape(in, out, in_shape, in_axis_order, out_shape);
break;
} }
} }
} }
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <algorithm>
#include <cmath>
#include <numeric>
#include <vector>
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace gcpu
{
namespace kernel
{
template <typename T>
void result(const T* arg, T* out, size_t count)
{
memcpy(out, arg, sizeof(T) * count);
}
}
}
}
}
...@@ -51,7 +51,7 @@ class ngraph::runtime::gcpu::NodeWrapper ...@@ -51,7 +51,7 @@ class ngraph::runtime::gcpu::NodeWrapper
public: public:
NodeWrapper(const std::shared_ptr<const ngraph::Node>& node); NodeWrapper(const std::shared_ptr<const ngraph::Node>& node);
const Node& get_node() const { return *m_node; } std::shared_ptr<const Node> get_node() const { return m_node; }
ngraph::runtime::gcpu::OP_TYPEID get_typeid() const { return m_typeid; } ngraph::runtime::gcpu::OP_TYPEID get_typeid() const { return m_typeid; }
private: private:
std::shared_ptr<const ngraph::Node> m_node; std::shared_ptr<const ngraph::Node> m_node;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment