Unverified Commit 859d1102 authored by Jayaram Bobba's avatar Jayaram Bobba Committed by GitHub

Merge pull request #536 from NervanaSystems/jbobba/batchnorm-layout

MKLDNN optimized layouts: convolution bprop, avg_pool fprop and bprop
parents b0648401 1c96c45c
This diff is collapsed.
......@@ -107,8 +107,9 @@ void runtime::cpu::CPUTensorView::read(void* target, size_t tensor_offset, size_
auto tvl = this->get_tensor_view_layout();
auto cpu_tvl = dynamic_cast<runtime::cpu::LayoutDescriptor*>(tvl.get());
if (cpu_tvl && cpu_tvl->get_mkldnn_format() != memory::format::format_undef &&
cpu_tvl->get_mkldnn_format() !=
runtime::cpu::mkldnn_utils::CreateNativeDataFormat(*cpu_tvl))
!runtime::cpu::mkldnn_utils::compare_mkldnn_formats(
cpu_tvl->get_mkldnn_format(),
runtime::cpu::mkldnn_utils::CreateNativeDataFormat(*cpu_tvl)))
{
auto tensor_shape = this->get_shape();
auto input_format = cpu_tvl->get_mkldnn_format();
......
......@@ -25,12 +25,15 @@
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/types/element_type.hpp"
#include "mkldnn_utils.hpp"
using namespace mkldnn;
using namespace ngraph;
using namespace std;
#define TI(x) std::type_index(typeid(x))
......@@ -148,3 +151,38 @@ const std::string& runtime::cpu::mkldnn_utils::get_mkldnn_format_string(memory::
std::to_string(fmt));
return it->second;
}
mkldnn::memory::format runtime::cpu::mkldnn_utils::get_input_mkldnn_format(const Node* node,
int index)
{
auto tvl = node->get_inputs()[index].get_output().get_tensor_view()->get_tensor_view_layout();
return dynamic_cast<runtime::cpu::LayoutDescriptor&>(*tvl).get_mkldnn_format();
}
mkldnn::memory::format runtime::cpu::mkldnn_utils::get_output_mkldnn_format(const Node* node,
int index)
{
auto tvl = node->get_output_tensor_view(0)->get_tensor_view_layout();
return dynamic_cast<runtime::cpu::LayoutDescriptor&>(*tvl).get_mkldnn_format();
}
bool runtime::cpu::mkldnn_utils::use_mkldnn_kernel(const ngraph::Node* node)
{
auto op_annotations = static_cast<const ngraph::op::Op*>(node)->get_op_annotations();
return (op_annotations &&
static_pointer_cast<ngraph::runtime::cpu::CPUOpAnnotations>(op_annotations)
->is_mkldnn_op());
}
bool runtime::cpu::mkldnn_utils::compare_mkldnn_formats(mkldnn::memory::format fmt1,
mkldnn::memory::format fmt2)
{
set<mkldnn::memory::format> similar_4d_formats{mkldnn::memory::format::nchw,
mkldnn::memory::format::oihw};
if ((fmt1 == fmt2) || (similar_4d_formats.find(fmt1) != similar_4d_formats.end() &&
similar_4d_formats.find(fmt2) != similar_4d_formats.end()))
{
return true;
}
return false;
}
......@@ -38,6 +38,12 @@ namespace ngraph
const std::string& get_mkldnn_data_type_string(const ngraph::element::Type& type);
mkldnn::memory::data_type get_mkldnn_data_type(const ngraph::element::Type& type);
const std::string& get_mkldnn_format_string(mkldnn::memory::format fmt);
mkldnn::memory::format get_input_mkldnn_format(const Node* node, int index);
mkldnn::memory::format get_output_mkldnn_format(const Node* node, int index);
bool use_mkldnn_kernel(const ngraph::Node* node);
bool compare_mkldnn_formats(mkldnn::memory::format fmt1,
mkldnn::memory::format fmt2);
}
}
}
......
......@@ -25,7 +25,9 @@
#include <mkldnn.hpp>
#include "ngraph/descriptor/output.hpp"
#include "ngraph/ops/avg_pool.hpp"
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
......@@ -66,6 +68,134 @@ namespace ngraph
convolution->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::ConvolutionBackpropData)
{
auto convolution = static_cast<op::ConvolutionBackpropData*>(node);
auto arg0_shape = node->get_input_shape(0);
auto arg1_shape = node->get_input_shape(1);
auto result_shape = node->get_output_shape(0);
auto arg0_rank = arg0_shape.size();
auto arg1_rank = arg1_shape.size();
bool data_dilated = false;
for (size_t s : convolution->get_data_dilation_strides_forward())
{
data_dilated = data_dilated || (s != 1);
}
if (!data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
node->get_input_element_type(0) == element::f32)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
convolution->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::ConvolutionBackpropFilters)
{
auto convolution = static_cast<op::ConvolutionBackpropFilters*>(node);
auto arg0_shape = node->get_input_shape(0);
auto arg1_shape = node->get_input_shape(1);
auto result_shape = node->get_output_shape(0);
auto arg0_rank = arg0_shape.size();
auto arg1_rank = arg1_shape.size();
bool data_dilated = false;
for (size_t s : convolution->get_data_dilation_strides_forward())
{
data_dilated = data_dilated || (s != 1);
}
if (!data_dilated && arg0_rank == 4 && arg1_rank == 4 &&
node->get_input_element_type(0) == element::f32)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
convolution->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::AvgPool)
{
auto avg_pool = static_cast<op::AvgPool*>(node);
auto arg0_shape = node->get_input_shape(0);
auto arg0_rank = arg0_shape.size();
auto result_shape = node->get_output_shape(0);
if (arg0_rank == 4 && avg_pool->get_window_shape().size() == 2 &&
node->get_input_element_type(0) == element::f32)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
avg_pool->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::AvgPoolBackprop)
{
auto avg_pool = static_cast<op::AvgPoolBackprop*>(node);
auto arg0_shape = node->get_input_shape(0);
auto arg0_rank = arg0_shape.size();
auto result_shape = node->get_output_shape(0);
if (arg0_rank == 4 && avg_pool->get_window_shape().size() == 2 &&
node->get_input_element_type(0) == element::f32)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
avg_pool->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::Relu)
{
auto avg_pool = static_cast<op::Relu*>(node);
auto arg0_shape = node->get_input_shape(0);
auto arg0_rank = arg0_shape.size();
auto result_shape = node->get_output_shape(0);
if (arg0_rank == 4 && node->get_input_element_type(0) == element::f32)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
avg_pool->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::ReluBackprop)
{
auto avg_pool = static_cast<op::ReluBackprop*>(node);
auto arg0_shape = node->get_input_shape(0);
auto arg0_rank = arg0_shape.size();
auto result_shape = node->get_output_shape(0);
if (arg0_rank == 4 && node->get_input_element_type(0) == element::f32)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
avg_pool->set_op_annotations(op_annotations);
}
}
}
}
}
......@@ -76,6 +206,16 @@ namespace ngraph
static const runtime::cpu::pass::AssignOpMap s_dispatcher{
{TI(ngraph::op::Convolution),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Convolution>},
{TI(ngraph::op::ConvolutionBackpropData),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ConvolutionBackpropData>},
{TI(ngraph::op::ConvolutionBackpropFilters),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ConvolutionBackpropFilters>},
{TI(ngraph::op::AvgPool), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::AvgPool>},
{TI(ngraph::op::AvgPoolBackprop),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::AvgPoolBackprop>},
{TI(ngraph::op::Relu), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Relu>},
{TI(ngraph::op::ReluBackprop),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ReluBackprop>},
};
bool runtime::cpu::pass::CPUAssignment::run_on_call_graph(
......
This diff is collapsed.
......@@ -53,6 +53,13 @@ namespace ngraph
private:
std::shared_ptr<CPU_ExternalFunction> m_external_function;
static std::shared_ptr<Node> insert_input_conversions(
CPU_ExternalFunction* external_function,
std::shared_ptr<Node>& node,
const std::vector<mkldnn::memory::format>& required_formats);
static void set_output_layouts(
std::shared_ptr<Node>& node,
const std::vector<mkldnn::memory::format>& output_formats);
static void set_default_layouts(CPU_ExternalFunction* external_function,
std::shared_ptr<Node> node);
};
......
......@@ -6100,8 +6100,9 @@ TEST(${BACKEND_NAME}, convolution_outlining)
EXPECT_EQ(vector<float>{expected_result}, read_vector<float>(result));
}
TEST(${BACKEND_NAME}, convolution_layout)
TEST(${BACKEND_NAME}, mkldnn_layouts)
{
SKIP_TEST_FOR("INTERPRETER", "${BACKEND_NAME}");
Shape shape_a{1, 16, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_b{32, 16, 1, 1};
......@@ -6114,7 +6115,9 @@ TEST(${BACKEND_NAME}, convolution_layout)
CoordinateDiff{0, 0},
CoordinateDiff{0, 0},
Strides{1, 1});
auto f = make_shared<Function>(conv1, op::Parameters{A, B});
Shape pool_shape{1, 1};
auto pool1 = make_shared<op::AvgPool>(conv1, pool_shape);
auto f = make_shared<Function>(pool1, op::Parameters{A, B});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment