Commit 6cd77ff2 authored by Jayaram Bobba's avatar Jayaram Bobba Committed by Scott Cyphers

Inplace convert for bitcasting ops (#2446)

* Inplace convert for bitcasting ops

* Dont passthrough blocked layouts

* More checks for types not handled by mkldnn
parent 39fe4f24
......@@ -103,7 +103,10 @@ namespace ngraph
auto functor = [&, kernel, element_count](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
kernel(arg_tensor, out_tensor, element_count, ectx->arena);
if (arg_tensor != out_tensor)
{
kernel(arg_tensor, out_tensor, element_count, ectx->arena);
}
};
functors.emplace_back(functor);
}
......
......@@ -1386,6 +1386,8 @@ namespace ngraph
{
auto& result_element_type = out[0].get_element_type();
writer << "if ((void*)" << out[0].get_name() << " != (void*)" << args[0].get_name()
<< ") \n";
writer.block_begin();
writer << "#pragma omp parallel for\n";
writer << "for (size_t i = 0; i < " << out[0].get_size() << "; i++)\n";
......
......@@ -289,6 +289,17 @@ mkldnn::memory::desc runtime::cpu::mkldnn_utils::create_default_mkldnn_md(
return memory::desc(memory::dims(shape.begin(), shape.end()), et, format);
}
bool runtime::cpu::mkldnn_utils::can_create_mkldnn_md(const ngraph::element::Type type)
{
auto it = get_mkldnn_data_type_map().find(type);
if (it == get_mkldnn_data_type_map().end() ||
it->second == mkldnn::memory::data_type::data_undef)
{
return false;
}
return true;
}
bool runtime::cpu::mkldnn_utils::can_create_mkldnn_md(const Shape& dims,
const Strides& strides,
const ngraph::element::Type type)
......
......@@ -51,6 +51,7 @@ namespace ngraph
bool is_output,
mkldnn::memory::format format);
bool is_perm_sorted(const Strides& a, const AxisVector& perm);
bool can_create_mkldnn_md(const ngraph::element::Type type);
bool can_create_mkldnn_md(const Shape& dims,
const Strides& strides,
const ngraph::element::Type type);
......
......@@ -30,6 +30,7 @@
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
......@@ -670,39 +671,26 @@ namespace ngraph
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolutionRelu)
{
if (node->get_input_element_type(0) == element::u8 &&
node->get_input_element_type(1) == element::i8)
{
runtime::cpu::mkldnn_utils::assign_mkldnn_kernel(node);
}
runtime::cpu::mkldnn_utils::assign_mkldnn_kernel(node);
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolutionBias)
{
if (node->get_input_element_type(0) == element::u8 &&
node->get_input_element_type(1) == element::i8)
{
runtime::cpu::mkldnn_utils::assign_mkldnn_kernel(node);
}
runtime::cpu::mkldnn_utils::assign_mkldnn_kernel(node);
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolutionBiasAdd)
{
auto quantized_conv_bias = static_cast<op::QuantizedConvolutionBiasAdd*>(node);
if (node->get_input_element_type(0) == element::u8 &&
node->get_input_element_type(1) == element::i8 &&
node->get_input_element_type(3) == element::u8)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
const int ADD_INPUT = 3;
// Accumulates conv into the second input of the unfused add
op_annotations->add_in_place_oi_pair({0, ADD_INPUT, true});
quantized_conv_bias->set_op_annotations(op_annotations);
}
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
const int ADD_INPUT = 3;
// Accumulates conv into the second input of the unfused add
op_annotations->add_in_place_oi_pair({0, ADD_INPUT, true});
quantized_conv_bias->set_op_annotations(op_annotations);
}
template <>
......@@ -710,18 +698,13 @@ namespace ngraph
{
auto quantized_conv_bias =
static_cast<op::QuantizedConvolutionBiasSignedAdd*>(node);
if (node->get_input_element_type(0) == element::u8 &&
node->get_input_element_type(1) == element::i8 &&
node->get_input_element_type(3) == element::i8)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
const int ADD_INPUT = 3;
// Accumulates conv into the second input of the unfused add
op_annotations->add_in_place_oi_pair({0, ADD_INPUT, true});
quantized_conv_bias->set_op_annotations(op_annotations);
}
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
const int ADD_INPUT = 3;
// Accumulates conv into the second input of the unfused add
op_annotations->add_in_place_oi_pair({0, ADD_INPUT, true});
quantized_conv_bias->set_op_annotations(op_annotations);
}
template <>
......@@ -810,6 +793,22 @@ namespace ngraph
}
runtime::cpu::mkldnn_utils::assign_mkldnn_kernel(node);
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::Convert)
{
auto convert = static_cast<op::Convert*>(node);
if ((node->get_input_element_type(0) == element::i8 &&
node->get_output_element_type(0) == element::u8) ||
(node->get_input_element_type(0) == element::u8 &&
node->get_output_element_type(0) == element::i8))
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->add_in_place_oi_pair({0, 0, false});
convert->set_op_annotations(op_annotations);
}
}
}
}
}
......@@ -820,6 +819,7 @@ namespace ngraph
static const runtime::cpu::pass::AssignOpMap s_dispatcher{
{TI(ngraph::op::Add), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Add>},
{TI(ngraph::op::Concat), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Concat>},
{TI(ngraph::op::Convert), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Convert>},
{TI(ngraph::op::AvgPool), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::AvgPool>},
{TI(ngraph::op::AvgPoolBackprop),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::AvgPoolBackprop>},
......
......@@ -31,6 +31,7 @@
#include "ngraph/op/avg_pool.hpp"
#include "ngraph/op/batch_norm.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
......@@ -1936,6 +1937,31 @@ namespace ngraph
set_native_layouts(external_function, node);
}
}
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::Convert)
{
auto input_md = mkldnn_utils::get_input_mkldnn_md(node.get(), 0);
auto tv = node->get_output_tensor_ptr(0);
if (input_md.data.format == mkldnn_blocked ||
input_md.data.format == mkldnn_format_undef ||
!mkldnn_utils::can_create_mkldnn_md(tv->get_element_type()))
{
// Cannot pass through layout information for blocked layouts at the moment
set_native_layouts(external_function, node);
}
else
{
vector<memory::desc> o_mds;
o_mds.push_back(mkldnn_utils::create_default_mkldnn_md(
node.get(),
0,
true,
static_cast<memory::format>(input_md.data.format)));
set_output_layouts(node, o_mds);
}
}
}
}
}
......@@ -1945,6 +1971,7 @@ namespace ngraph
static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
{TI(ngraph::op::Concat), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Concat>},
{TI(ngraph::op::Convert), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Convert>},
{TI(ngraph::op::AvgPool), &runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPool>},
{TI(ngraph::op::AvgPoolBackprop),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::AvgPoolBackprop>},
......
......@@ -668,3 +668,24 @@ TEST(cpu_test, convolution_large_padding)
auto cpu_f = make_function();
compare_backends(int_f, cpu_f, "INTERPRETER", "CPU", 1e-4, 1e-4);
}
TEST(cpu_test, convert_inplace)
{
Shape shape{2, 2};
auto A = make_shared<op::Parameter>(element::u8, shape);
auto B = op::Constant::create(element::u8, shape, {1, 1, 1, 1});
auto C = op::Constant::create(element::i8, shape, {1, 1, 1, 1});
auto f =
make_shared<Function>(make_shared<op::Convert>(A + B, element::i8) - C, ParameterVector{A});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::u8, shape);
copy_data(a, vector<uint8_t>{1, 2, 3, 254});
auto result = backend->create_tensor(element::i8, shape);
auto handle = backend->compile(f);
backend->call_with_validate(handle, {result}, {a});
EXPECT_EQ((vector<int8_t>{1, 2, 3, -2}), read_vector<int8_t>(result));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment