Unverified Commit 7cf242d3 authored by Jayaram Bobba's avatar Jayaram Bobba Committed by GitHub

Merge pull request #436 from NervanaSystems/jmenon/cpu_layout_infra

CPU Layout Part 1: Infrastructure
parents 88bc37f1 fd3147bc
......@@ -169,10 +169,15 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/cpu_kernel_utils.cpp
runtime/cpu/cpu_emitter.cpp
runtime/cpu/cpu_external_function.cpp
runtime/cpu/cpu_tensor_view.cpp
runtime/cpu/cpu_tensor_view_wrapper.cpp
runtime/cpu/cpu_layout_descriptor.cpp
runtime/cpu/cpu_tracing.cpp
runtime/cpu/mkldnn_utils.cpp
runtime/cpu/ops/convert_layout.cpp
runtime/cpu/ops/matmul_bias.cpp
runtime/cpu/pass/cpu_fusion.cpp
runtime/cpu/pass/cpu_layout.cpp
)
# LLVM binary builds are typically built without RTTI
# The built-in headers are in a version-specific directory
......
......@@ -16,8 +16,8 @@
#include "ngraph/runtime/cpu/cpu_backend.hpp"
#include "ngraph/log.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/external_function.hpp"
#include "ngraph/runtime/host_tensor_view.hpp"
using namespace ngraph;
using namespace std;
......@@ -32,6 +32,6 @@ std::shared_ptr<ngraph::runtime::TensorView>
runtime::cpu::CPU_Backend::make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape)
{
auto rc = make_shared<runtime::HostTensorView>(element_type, shape);
auto rc = make_shared<runtime::cpu::CPUTensorView>(element_type, shape);
return dynamic_pointer_cast<runtime::TensorView>(rc);
}
......@@ -18,8 +18,8 @@
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/cpu/cpu_tracing.hpp"
#include "ngraph/runtime/host_tensor_view.hpp"
using namespace std;
using namespace ngraph;
......@@ -43,16 +43,20 @@ void runtime::cpu::CPU_CallFrame::tensor_call(
{
vector<void*> inputs;
vector<void*> outputs;
propagate_layouts(input_tvs, m_external_function->get_parameter_layout_descriptors());
propagate_layouts(output_tvs, m_external_function->get_result_layout_descriptors());
for (size_t i = 0; i < input_tvs.size(); i++)
{
shared_ptr<runtime::HostTensorView> tv =
static_pointer_cast<runtime::HostTensorView>(input_tvs[i]);
shared_ptr<runtime::cpu::CPUTensorView> tv =
static_pointer_cast<runtime::cpu::CPUTensorView>(input_tvs[i]);
inputs.push_back(tv->get_data_ptr());
}
for (size_t i = 0; i < output_tvs.size(); i++)
{
shared_ptr<runtime::HostTensorView> tv =
static_pointer_cast<runtime::HostTensorView>(output_tvs[i]);
shared_ptr<runtime::cpu::CPUTensorView> tv =
static_pointer_cast<runtime::cpu::CPUTensorView>(output_tvs[i]);
outputs.push_back(tv->get_data_ptr());
}
......@@ -85,6 +89,26 @@ void runtime::cpu::CPU_CallFrame::call(
tensor_call(inputs, outputs);
}
void runtime::cpu::CPU_CallFrame::propagate_layouts(
const std::vector<std::shared_ptr<runtime::TensorView>>& tvs,
const LayoutDescriptorPtrs& layouts) const
{
if (layouts.size() != tvs.size())
{
throw ngraph_error(
"Error propagating layouts - tensor view and layout descriptor counts do not match");
}
for (size_t i = 0; i < tvs.size(); i++)
{
if (layouts[i] == nullptr)
{
throw ngraph_error(
"Error propagating layouts - layout information missing from tensor view");
}
tvs[i]->get_descriptor()->set_tensor_view_layout(layouts[i]);
}
}
vector<runtime::PerformanceCounter> runtime::cpu::CPU_CallFrame::get_performance_data() const
{
vector<runtime::PerformanceCounter> rc;
......
......@@ -22,6 +22,7 @@
#include "ngraph/function.hpp"
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/tensor_view.hpp"
......@@ -60,6 +61,9 @@ namespace ngraph
void tensor_call(const std::vector<std::shared_ptr<TensorView>>& inputs,
const std::vector<std::shared_ptr<TensorView>>& outputs) override;
void propagate_layouts(const std::vector<std::shared_ptr<runtime::TensorView>>& tvs,
const LayoutDescriptorPtrs& layouts) const;
std::vector<ngraph::runtime::PerformanceCounter>
get_performance_data() const override;
......
......@@ -27,7 +27,6 @@
#include "ngraph/codegen/compiler.hpp"
#include "ngraph/codegen/execution_engine.hpp"
#include "ngraph/descriptor/input.hpp"
#include "ngraph/descriptor/layout/dense_tensor_view_layout.hpp"
#include "ngraph/descriptor/output.hpp"
#include "ngraph/descriptor/primary_tensor_view.hpp"
#include "ngraph/file_util.hpp"
......@@ -85,7 +84,6 @@
#include "ngraph/ops/sum.hpp"
#include "ngraph/ops/tan.hpp"
#include "ngraph/ops/tanh.hpp"
#include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/dump_sorted.hpp"
#include "ngraph/pass/liveness.hpp"
#include "ngraph/pass/manager.hpp"
......@@ -94,16 +92,21 @@
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_emitter.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/cpu/cpu_tracing.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/runtime/host_tensor_view.hpp"
#include "ngraph/runtime/cpu/pass/cpu_layout.hpp"
using namespace std;
using namespace ngraph;
static const string s_output_dir = "cpu_codegen";
// Temporary Memory Pool alignment
static const size_t s_memory_pool_alignment = 4096;
class StaticInitializers
{
public:
......@@ -226,12 +229,13 @@ void runtime::cpu::CPU_ExternalFunction::compile()
string function_name = m_function->get_name();
pass::Manager pass_manager;
// For now, just make everyone row-major.
pass_manager.register_pass<pass::CPUFusion>();
pass_manager.register_pass<pass::AssignLayout<descriptor::layout::DenseTensorViewLayout>>();
pass_manager.register_pass<pass::Liveness>();
pass_manager.register_pass<pass::MemoryLayout>(64);
ngraph::pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPULayout>();
pass_manager.register_pass<ngraph::pass::Liveness>();
pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment);
pass_manager.run_passes(m_function);
codegen::CodeWriter writer;
......@@ -241,11 +245,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
{
for (shared_ptr<Node> node : current_function->get_ordered_ops())
{
if (dynamic_cast<op::Convolution*>(node.get()) ||
dynamic_cast<op::ConvolutionBackpropData*>(node.get()) ||
dynamic_cast<op::ConvolutionBackpropFilters*>(node.get()) ||
dynamic_cast<op::AvgPool*>(node.get()) || dynamic_cast<op::MaxPool*>(node.get()) ||
dynamic_cast<op::AvgPoolBackprop*>(node.get()))
if (ngraph::runtime::cpu::mkldnn_utils::IsMKLDNNOp(*node))
{
include_mkldnn_headers = true;
}
......@@ -520,7 +520,7 @@ using namespace ngraph::runtime;
writer << "// Memory pool size is " << temp_pool_size << " bytes\n";
writer << "// Worst case size is " << worst_case_tmp_size << " bytes\n";
writer << "ngraph::runtime::AlignedBuffer memory_handler(" << temp_pool_size << ", "
<< ngraph::runtime::alignment << ");\n";
<< s_memory_pool_alignment << ");\n";
writer << "size_t pool_base_ptr = (size_t)memory_handler.get_ptr();\n";
writer << "\n";
......@@ -762,6 +762,41 @@ using namespace ngraph::runtime;
writer += "}\n\n";
}
// Store layouts assigned for arguments
for (const auto& parameter : m_function->get_parameters())
{
for (size_t i = 0; i < parameter->get_output_size(); ++i)
{
auto tv = parameter->get_output_tensor_view(i);
if (tv->get_tensor_view_layout() == nullptr)
{
throw ngraph_error("layout missing on function parameter's tensor view: " +
tv->get_name());
}
parameter_layout_descriptors.emplace_back(
static_pointer_cast<runtime::cpu::LayoutDescriptor>(tv->get_tensor_view_layout()));
}
}
// Store layouts assigned for results
if (!result_layout_descriptors.empty())
{
throw ngraph_error("Function output layouts should not be pre-assigned");
}
for (size_t i = 0; i < m_function->get_output_size(); ++i)
{
const auto& output = m_function->get_output_op(i);
for (size_t j = 0; j < output->get_output_size(); ++j)
{
auto tv = output->get_output_tensor_view(j);
if (tv->get_tensor_view_layout() == nullptr)
{
throw ngraph_error("layout missing on function output tensor: " + tv->get_name());
}
result_layout_descriptors.emplace_back(
static_pointer_cast<runtime::cpu::LayoutDescriptor>(tv->get_tensor_view_layout()));
}
}
// TODO: Cleanup and make this a utility function
file_util::make_directory(s_output_dir);
......@@ -834,6 +869,18 @@ shared_ptr<ngraph::runtime::CallFrame> runtime::cpu::CPU_ExternalFunction::make_
m_compiled_function);
}
const runtime::cpu::LayoutDescriptorPtrs&
runtime::cpu::CPU_ExternalFunction::get_parameter_layout_descriptors()
{
return parameter_layout_descriptors;
}
const runtime::cpu::LayoutDescriptorPtrs&
runtime::cpu::CPU_ExternalFunction::get_result_layout_descriptors()
{
return result_layout_descriptors;
}
void runtime::cpu::CPU_ExternalFunction::emit_debug_function_entry(
codegen::CodeWriter& writer,
Node* node,
......
......@@ -29,6 +29,7 @@
#include "ngraph/codegen/execution_engine.hpp"
#include "ngraph/function.hpp"
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/external_function.hpp"
......@@ -74,6 +75,9 @@ namespace ngraph
bool release_function = true);
std::shared_ptr<ngraph::runtime::CallFrame> make_call_frame();
const LayoutDescriptorPtrs& get_parameter_layout_descriptors();
const LayoutDescriptorPtrs& get_result_layout_descriptors();
const std::vector<OpAttributes>& get_op_attrs() const { return m_op_attrs; }
protected:
void compile();
......@@ -106,6 +110,9 @@ namespace ngraph
bool m_emit_timing;
bool m_use_tbb;
std::unordered_map<std::string, std::string> m_variable_name_map;
LayoutDescriptorPtrs parameter_layout_descriptors;
LayoutDescriptorPtrs result_layout_descriptors;
std::vector<OpAttributes> m_op_attrs;
};
}
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <algorithm>
#include "cpu_layout_descriptor.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
const AxisVector LayoutDescriptor::Native2DAxisOrder{0, 1};
const AxisVector LayoutDescriptor::Native4DAxisOrder{0, 1, 2, 3};
const AxisVector LayoutDescriptor::CHWNAxisOrder{1, 2, 3, 0};
AxisVector LayoutDescriptor::create_native_axis_order(size_t rank)
{
AxisVector native_axis_order(rank);
std::iota(native_axis_order.begin(), native_axis_order.end(), 0);
return native_axis_order;
}
LayoutDescriptor::LayoutDescriptor(const ngraph::descriptor::TensorView& tv,
const AxisVector& tv_axis_order)
: TensorViewLayout(tv)
, axis_order(tv_axis_order)
, offset(0)
, size(ngraph::shape_size(tv.get_tensor_view_type()->get_shape()))
, mkldnn_format(mkldnn::memory::format::format_undef)
{
auto shape = get_shape();
size_t s = 1;
if (tv_axis_order.size() != shape.size())
{
throw ngraph_error("Axis order is incomplete");
}
for (auto it = tv_axis_order.crbegin(); it != tv_axis_order.crend(); it++)
{
if (*it >= shape.size())
{
throw ngraph_error("Axis is out of bounds");
}
strides.emplace_back(s);
s *= shape[*it];
}
std::reverse(strides.begin(), strides.end());
}
size_t LayoutDescriptor::get_index_offset(const std::vector<size_t>& indices)
{
if (indices.size() != strides.size())
{
throw ngraph_error("Indices have incorrect rank");
}
size_t result = 0;
for (int i = 0; i < indices.size(); i++)
{
result += strides[i] + indices[i];
}
return result;
}
bool LayoutDescriptor::
operator==(const ngraph::descriptor::layout::TensorViewLayout& other) const
{
const LayoutDescriptor* p_other = dynamic_cast<const LayoutDescriptor*>(&other);
if (!p_other)
{
return false;
}
if (get_element_type() != p_other->get_element_type())
{
return false;
}
if (strides != p_other->strides)
{
return false;
}
if (offset != p_other->offset)
{
return false;
}
return true;
}
}
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <cstdint>
#include <memory>
#include <vector>
#include <mkldnn.hpp>
#include "ngraph/common.hpp"
#include "ngraph/descriptor/layout/tensor_view_layout.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/types/type.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
class LayoutDescriptor : public ngraph::descriptor::layout::TensorViewLayout
{
public:
LayoutDescriptor(const ngraph::descriptor::TensorView& tv,
const AxisVector& tv_axis_order);
~LayoutDescriptor() {}
size_t get_size() override { return size; }
size_t get_offset() const { return offset; }
size_t get_index_offset(const std::vector<size_t>& indices) override;
const Strides& get_strides() const override { return strides; }
bool operator==(const TensorViewLayout& other) const override;
void set_mkldnn_format(const mkldnn::memory::format& format)
{
mkldnn_format = format;
}
mkldnn::memory::format get_mkldnn_format() const { return mkldnn_format; }
const AxisVector& get_axis_order() const { return axis_order; }
static const AxisVector Native2DAxisOrder;
static const AxisVector Native4DAxisOrder;
static const AxisVector CHWNAxisOrder;
static AxisVector create_native_axis_order(size_t rank);
private:
AxisVector axis_order;
Strides strides;
size_t offset;
size_t size;
// Numeric backend-specific fields
mkldnn::memory::format mkldnn_format;
};
typedef std::vector<std::shared_ptr<ngraph::runtime::cpu::LayoutDescriptor>>
LayoutDescriptorPtrs;
}
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <cstring>
#include <memory>
#include "cpu_tensor_view.hpp"
#include "ngraph/descriptor/layout/tensor_view_layout.hpp"
#include "ngraph/descriptor/primary_tensor_view.hpp"
#include "ngraph/except.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/shape.hpp"
using namespace ngraph;
using namespace std;
// TODO(jmenon): Refactor all the alignment specifications into
// a single place and allow lower or no alignment when possible
const size_t runtime::cpu::CPUTensorView::BufferAlignment = 64;
runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
const string& name)
: runtime::TensorView(std::make_shared<ngraph::descriptor::PrimaryTensorView>(
std::make_shared<ngraph::TensorViewType>(element_type, shape), name, true, true, false))
, buffer(nullptr)
, aligned_buffer(nullptr)
{
// TODO(jmenon): A fallback layout should not be needed but is required
// because of how some unit test functionality is written (ex. 'backprop_derivative')
// This needs to be removed
m_descriptor->set_tensor_view_layout(std::make_shared<runtime::cpu::LayoutDescriptor>(
*m_descriptor, runtime::cpu::LayoutDescriptor::create_native_axis_order(shape.size())));
buffer_size = shape_size(shape) * element_type.size();
if (buffer_size)
{
size_t allocation_size = buffer_size + BufferAlignment;
auto ptr = malloc(allocation_size);
if (!ptr)
{
throw ngraph_error("Error allocating CPU Tensor View memory");
}
buffer = static_cast<char*>(ptr);
std::align(BufferAlignment, buffer_size, ptr, allocation_size);
aligned_buffer = static_cast<char*>(ptr);
}
}
runtime::cpu::CPUTensorView::~CPUTensorView()
{
free(buffer);
}
char* runtime::cpu::CPUTensorView::get_data_ptr()
{
return aligned_buffer;
}
const char* runtime::cpu::CPUTensorView::get_data_ptr() const
{
return aligned_buffer;
}
void runtime::cpu::CPUTensorView::write(const void* source, size_t tensor_offset, size_t n)
{
if (tensor_offset + n > buffer_size)
{
throw out_of_range("write access past end of tensor");
}
char* target = get_data_ptr();
memcpy(&target[tensor_offset], source, n);
}
void runtime::cpu::CPUTensorView::read(void* target, size_t tensor_offset, size_t n) const
{
if (tensor_offset + n > buffer_size)
{
throw out_of_range("read access past end of tensor");
}
const char* source = get_data_ptr();
memcpy(target, &source[tensor_offset], n);
}
size_t runtime::cpu::CPUTensorView::get_size() const
{
return get_element_count();
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <string>
#include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/types/element_type.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
class CPUTensorView : public ngraph::runtime::TensorView
{
public:
CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape,
const std::string& name = "external");
virtual ~CPUTensorView();
char* get_data_ptr();
const char* get_data_ptr() const;
size_t get_size() const;
const element::Type& get_element_type() const;
/// @brief Write bytes directly into the tensor
/// @param p Pointer to source of data
/// @param tensor_offset Offset into tensor storage to begin writing. Must be element-aligned.
/// @param n Number of bytes to write, must be integral number of elements.
void write(const void* p, size_t tensor_offset, size_t n) override;
/// @brief Read bytes directly from the tensor
/// @param p Pointer to destination for data
/// @param tensor_offset Offset into tensor storage to begin reading. Must be element-aligned.
/// @param n Number of bytes to read, must be integral number of elements.
void read(void* p, size_t tensor_offset, size_t n) const override;
private:
static const size_t BufferAlignment;
char* buffer;
char* aligned_buffer;
size_t buffer_size;
};
}
}
}
// ----------------------------------------------------------------------------
// Copyright 2018 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <fstream>
#include <map>
......
// ----------------------------------------------------------------------------
// Copyright 2018 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <typeindex>
#include <typeinfo>
#include <unordered_set>
#include "ngraph/node.hpp"
#include "ngraph/ops/avg_pool.hpp"
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/max_pool.hpp"
#include "mkldnn_utils.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace mkldnn_utils
{
#define TI(x) std::type_index(typeid(x))
const std::unordered_set<std::type_index> s_op_registry{
TI(ngraph::op::AvgPool),
TI(ngraph::op::AvgPoolBackprop),
TI(ngraph::op::Convolution),
TI(ngraph::op::ConvolutionBackpropData),
TI(ngraph::op::ConvolutionBackpropFilters),
TI(ngraph::op::MaxPool)};
bool IsMKLDNNOp(ngraph::Node& op)
{
return (s_op_registry.find(TI(op)) != s_op_registry.end());
}
mkldnn::memory::format
CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout)
{
switch (layout.get_shape().size())
{
case 1: return mkldnn::memory::format::x;
case 2: return mkldnn::memory::format::nc;
case 4: return mkldnn::memory::format::nchw;
default: return mkldnn::memory::format::format_undef;
}
}
}
}
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <typeindex>
#include <typeinfo>
#include <unordered_set>
#include <mkldnn.hpp>
#include "ngraph/node.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace mkldnn_utils
{
bool IsMKLDNNOp(ngraph::Node& op);
mkldnn::memory::format
CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout);
}
}
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/runtime/cpu/ops/convert_layout.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
using namespace std;
using namespace ngraph;
runtime::cpu::op::ConvertLayout::ConvertLayout(
const shared_ptr<Node>& arg, const shared_ptr<runtime::cpu::LayoutDescriptor>& layout)
: ConvertLayout(arg, 0, layout)
{
}
runtime::cpu::op::ConvertLayout::ConvertLayout(
const shared_ptr<Node>& arg,
size_t output_index,
const shared_ptr<runtime::cpu::LayoutDescriptor>& layout)
: RequiresTensorViewArgs("ConvertLayout", {arg})
, arg_output_index(output_index)
, output_layout(layout)
{
const auto& arg_tensor_view = arg->get_output_tensor_view(arg_output_index);
const auto& arg_layout = arg_tensor_view->get_tensor_view_layout();
if (!arg_layout)
{
throw ngraph_error("Layout conversion input tensor is missing layout information");
}
add_output(layout->get_element_type(), layout->get_shape());
get_output_tensor_view()->set_tensor_view_layout(layout);
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/ops/op.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
class LayoutDescriptor;
namespace op
{
/// \brief Layout Conversion
///
/// Converts an input tensor to a tensor with the given layout descriptor
class ConvertLayout : public ngraph::op::RequiresTensorViewArgs
{
public:
ConvertLayout(
const std::shared_ptr<Node>& arg,
const std::shared_ptr<ngraph::runtime::cpu::LayoutDescriptor>& layout);
ConvertLayout(
const std::shared_ptr<Node>& arg,
size_t output_index,
const std::shared_ptr<ngraph::runtime::cpu::LayoutDescriptor>& layout);
virtual std::shared_ptr<Node> copy_with_new_args(
const std::vector<std::shared_ptr<Node>>& new_args) const override
{
if (new_args.size() != 1)
throw ngraph_error("Incorrect number of new arguments");
return std::make_shared<ConvertLayout>(new_args.at(0), output_layout);
}
protected:
size_t arg_output_index;
std::shared_ptr<ngraph::runtime::cpu::LayoutDescriptor> output_layout;
};
}
}
}
}
......@@ -97,7 +97,7 @@ static std::vector<T> apply_permutation(std::vector<T> input, ngraph::AxisVector
return output;
}
void ngraph::pass::CPUFusion::construct_gemm_pattern()
void ngraph::runtime::cpu::pass::CPUFusion::construct_gemm_pattern()
{
Shape shape_w{2, 4};
Shape shape_x{4, 1};
......
......@@ -20,13 +20,19 @@
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{
class CPUFusion;
}
}
}
}
class ngraph::pass::CPUFusion : public ngraph::pass::GraphRewrite
class ngraph::runtime::cpu::pass::CPUFusion : public ngraph::pass::GraphRewrite
{
public:
CPUFusion()
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <algorithm>
#include <memory>
#include <mkldnn.hpp>
#include "cpu_layout.hpp"
#include "ngraph/descriptor/output.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
//using namespace ngraph::runtime::cpu::pass;
using namespace ngraph;
bool runtime::cpu::pass::CPULayout::run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes)
{
for (const auto& node : nodes)
{
for (size_t i = 0; i < node->get_output_size(); ++i)
{
auto tv = node->get_output_tensor_view(i);
if (tv->get_tensor_view_layout())
{
continue;
}
auto tvt = tv->get_tensor_view_type();
auto& tensor = tv->get_tensor();
auto rank = tvt->get_shape().size();
auto native_axis_order =
ngraph::runtime::cpu::LayoutDescriptor::create_native_axis_order(rank);
auto layout =
std::make_shared<ngraph::runtime::cpu::LayoutDescriptor>(*tv, native_axis_order);
if (tensor.is_output() || tensor.is_input() || tensor.is_constant())
{
// Set the MKLDNN format to native row-major variants
layout->set_mkldnn_format(mkldnn_utils::CreateNativeDataFormat(*layout));
}
else
{
if (ngraph::runtime::cpu::mkldnn_utils::IsMKLDNNOp(*node))
{
// TODO(jmenon): get_inputs is marked as to-be-deprecated
// but get_input_ops isn't a suitable API so this needs to be
// reworked
for (const descriptor::Input& input : node->get_inputs())
{
const auto& output = input.get_output();
auto output_tv = output.get_tensor_view();
auto output_tvl = output_tv->get_tensor_view_layout();
// TODO(jmenon): Propagate layout based on inputs
// TODO(jmenon): Insert layout conversions when needed
}
}
else
{
layout->set_mkldnn_format(mkldnn::memory::format::format_undef);
}
}
tv->set_tensor_view_layout(layout);
}
}
return false;
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/pass/pass.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace pass
{
class CPULayout : public ngraph::pass::CallGraphPass
{
public:
virtual bool
run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes) override;
};
}
}
}
}
......@@ -21,6 +21,7 @@
#include "ngraph/descriptor/tensor_view.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/types/element_type.hpp"
#include "ngraph/util.hpp"
namespace ngraph
......
......@@ -144,7 +144,7 @@ TEST(cpu_fusion, cpu_fusion_pass_basic)
auto add = dot + broadcast;
auto graph = make_shared<op::Abs>(add);
pass::Manager pass_manager;
pass_manager.register_pass<pass::CPUFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
auto func = make_shared<Function>(graph, op::Parameters{A, B, C});
pass_manager.run_passes(func);
ASSERT_NE(std::dynamic_pointer_cast<op::MatmulBias>(graph->get_input_op(0)), nullptr);
......@@ -157,7 +157,7 @@ TEST(cpu_fusion, gemm_mlp)
stringstream ss(json_string);
shared_ptr<Function> func = ngraph::deserialize(ss);
pass::Manager pass_manager;
pass_manager.register_pass<pass::CPUFusion>();
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
pass_manager.run_passes(func);
size_t ccg = count_ops_of_type<op::MatmulBias>(func);
ASSERT_EQ(ccg, 3);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment