Commit a5dd9f6f authored by Ashok Emani's avatar Ashok Emani Committed by Robert Kimball

handle channelwise scale mask (#2396)

* handle channelwise scale mask

* add Quantize op to cpu_layout

* enable Quantize to use input layout for output

* add comments for channelwise mask

* add Dequantize to cpu_layout

* resolve style-check

* fix unsupported layout issue

* fix s8s8 reorder issue
parent c7e47d1f
......@@ -49,6 +49,12 @@ namespace ngraph
//becomes a copy
input_desc = result_desc;
}
else if ((input_desc.data.format == mkldnn_nchw ||
input_desc.data.format == mkldnn_nhwc) &&
result_desc.data.format == mkldnn_OIhw4i16o4i_s8s8)
{
input_desc.data.format = mkldnn_oihw;
}
else if (input_desc.data.format == mkldnn_nchw && input_desc.data.ndims == 4 &&
result_desc.data.ndims == 5 && node->get_users().size() == 1)
{
......
......@@ -264,8 +264,13 @@ namespace ngraph
vector<float> dyn_scales;
dyn_scales.assign(static_cast<float*>(arg1_tensor),
static_cast<float*>(arg1_tensor) + scales_size);
dyn_scales[0] = 1.0 / dyn_scales[0];
attr.set_output_scales(0, dyn_scales);
for (size_t i = 0; i < scales_size; i++)
{
dyn_scales[i] = 1.0 / dyn_scales[i];
}
// quantize across first dim (mask=2^0) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 1;
attr.set_output_scales(mask, dyn_scales);
attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
auto reorder_desc = mkldnn::reorder::primitive_desc(
{input_desc, executor::global_cpu_engine},
......
......@@ -65,7 +65,9 @@ namespace ngraph
vector<float> dyn_scales;
dyn_scales.assign(static_cast<float*>(arg2_tensor),
static_cast<float*>(arg2_tensor) + scales_size);
conv_attr.set_output_scales(0, dyn_scales);
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
......@@ -114,7 +116,9 @@ namespace ngraph
vector<float> dyn_scales;
dyn_scales.assign(static_cast<float*>(arg2_tensor),
static_cast<float*>(arg2_tensor) + scales_size);
conv_attr.set_output_scales(0, dyn_scales);
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<false>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
......@@ -165,7 +169,9 @@ namespace ngraph
vector<float> dyn_scales;
dyn_scales.assign(static_cast<float*>(arg3_tensor),
static_cast<float*>(arg3_tensor) + scales_size);
conv_attr.set_output_scales(0, dyn_scales);
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
......@@ -249,7 +255,9 @@ namespace ngraph
new_pops.append_sum(dyn_post_op_scales[0]);
}
}
conv_attr.set_output_scales(0, dyn_scales);
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales);
conv_attr.set_post_ops(new_pops);
mkldnn_emitter->convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
......@@ -338,7 +346,9 @@ namespace ngraph
}
}
conv_attr.set_post_ops(new_pops);
conv_attr.set_output_scales(0, dyn_scales);
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->convolution_forward<true>(
conv_desc, conv_attr, executor::global_cpu_engine, conv_index);
}
......
......@@ -159,9 +159,7 @@ std::map<memory::format, const std::string>&
{memory::format::tnc, "memory::format::tnc"},
{memory::format::ldsnc, "memory::format::ldsnc"},
{memory::format::ldigo, "memory::format::ldigo"},
{memory::format::ldigo_p, "memory::format::ldigo_p"},
{memory::format::ldgoi, "memory::format::ldgoi"},
{memory::format::ldgoi_p, "memory::format::ldgoi_p"},
{memory::format::ldgo, "memory::format::ldgo"},
{memory::format::wino_fmt, "memory::format::wino_fmt"},
{memory::format::format_last, "memory::format::format_last"},
......
......@@ -33,6 +33,7 @@
#include "ngraph/op/concat.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/experimental/quantized_avg_pool.hpp"
#include "ngraph/op/experimental/quantized_conv.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
......@@ -42,6 +43,7 @@
#include "ngraph/op/lrn.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/op/op.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/relu.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/result.hpp"
......@@ -1188,6 +1190,60 @@ namespace ngraph
}
}
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::Quantize)
{
auto input_md = mkldnn_utils::get_input_mkldnn_md(node.get(), 0);
auto tv = node->get_output_tensor_ptr(0);
auto fmt = static_cast<mkldnn::memory::format>(input_md.data.format);
if (fmt == mkldnn_blocked || fmt == mkldnn_format_undef ||
!mkldnn_utils::can_create_mkldnn_md(tv->get_element_type()))
{
// Cannot pass through layout information for blocked layouts at the moment
set_native_layouts(external_function, node);
}
else
{
// mkldnn expects nhwc for int8, avoids reorder
if (fmt == mkldnn::memory::format::nchw ||
fmt == mkldnn::memory::format::nChw8c ||
fmt == mkldnn::memory::format::nChw16c)
{
fmt = mkldnn::memory::format::nhwc;
}
vector<memory::desc> o_mds;
o_mds.push_back(mkldnn_utils::create_default_mkldnn_md(
node.get(), 0, true, static_cast<memory::format>(fmt)));
set_output_layouts(node, o_mds);
}
}
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::Dequantize)
{
auto input_md = mkldnn_utils::get_input_mkldnn_md(node.get(), 0);
auto tv = node->get_output_tensor_ptr(0);
auto fmt = static_cast<mkldnn::memory::format>(input_md.data.format);
if (fmt == mkldnn_blocked || fmt == mkldnn_format_undef ||
!mkldnn_utils::can_create_mkldnn_md(tv->get_element_type()))
{
// Cannot pass through layout information for blocked layouts at the moment
set_native_layouts(external_function, node);
}
else
{
// reorder as default nchw layout
if (fmt == mkldnn::memory::format::nhwc)
{
fmt = mkldnn::memory::format::nchw;
}
vector<memory::desc> o_mds;
o_mds.push_back(mkldnn_utils::create_default_mkldnn_md(
node.get(), 0, true, static_cast<memory::format>(fmt)));
set_output_layouts(node, o_mds);
}
}
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::MaxPoolWithIndices)
{
......@@ -1989,6 +2045,8 @@ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
&runtime::cpu::pass::CPULayout::layout<ngraph::op::QuantizedMaxPool>},
{TI(ngraph::op::QuantizedAvgPool),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::QuantizedAvgPool>},
{TI(ngraph::op::Quantize), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Quantize>},
{TI(ngraph::op::Dequantize), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Dequantize>},
{TI(ngraph::op::MaxPoolWithIndices),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::MaxPoolWithIndices>},
{TI(ngraph::op::MaxPoolBackprop),
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment