Commit 1f40160d authored by Nishant Patel's avatar Nishant Patel Committed by Scott Cyphers

QCBiasAdd and QCBiasSignedAdd for mkldnn (#2062)

* Quantize the bias to int32

* Bias scale fix

* mnist works

* Quantize Bias

* Introduce Quantize op in the graph to quantize bias & feedback

* Add QuantizedConvBiasAdd

* Comments and some refactoring

* Add test case with float bias and enable int32 as quantized type in ngraph

* Change shape of scale from Shape{} to Shape{1} in the backend

* Add QuantizedConvBiasSignedAdd

* Fix Layouts, clean up and a test case for QCBA

* Test case for QCBSA

* cleanup mkldnn_emitter.hpp

* fix build error

* Constant fold
parent 4034a0c2
......@@ -230,5 +230,125 @@ namespace ngraph
return make_shared<op::QuantizedMaxPool>(
input, window_shape, window_movement_strides, padding_below, padding_above);
}
std::shared_ptr<Node>
ScaledQuantizedConvolutionBiasAdd(std::shared_ptr<Node> input,
std::shared_ptr<Node> filters,
std::shared_ptr<Node> bias,
std::shared_ptr<Node> sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output_conv_1,
std::shared_ptr<Node> max_freezed_output_conv_1,
std::shared_ptr<Node> min_freezed_output_conv_2,
std::shared_ptr<Node> max_freezed_output_conv_2,
const bool with_relu)
{
auto output_et = with_relu ? element::u8 : element::i8;
auto requantization_scale = quantization_util::get_scale(min_input,
max_input,
min_filter,
max_filter,
min_freezed_output_conv_1,
max_freezed_output_conv_1,
output_et);
auto sum_scale = builder::quantization_util::get_sum_scale(min_freezed_output_conv_1,
max_freezed_output_conv_1,
min_freezed_output_conv_2,
max_freezed_output_conv_2);
if (bias->get_element_type() != element::i32)
{
auto zero = make_constant(element::i32, min_input->get_shape(), 0);
AxisSet quantization_axes;
auto bias_scale =
quantization_util::get_bias_scale(min_input, max_input, min_filter, max_filter);
op::Quantize::RoundMode round_mode =
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
bias = make_shared<op::Quantize>(
bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
}
return make_shared<op::QuantizedConvolutionBiasAdd>(input,
filters,
bias,
sum_input,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
requantization_scale,
sum_scale,
with_relu);
}
std::shared_ptr<Node>
ScaledQuantizedConvolutionBiasSignedAdd(std::shared_ptr<Node> input,
std::shared_ptr<Node> filters,
std::shared_ptr<Node> bias,
std::shared_ptr<Node> sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output_conv_1,
std::shared_ptr<Node> max_freezed_output_conv_1,
std::shared_ptr<Node> min_freezed_output_conv_2,
std::shared_ptr<Node> max_freezed_output_conv_2,
const bool with_relu)
{
auto output_et = with_relu ? element::u8 : element::i8;
auto requantization_scale = quantization_util::get_scale(min_input,
max_input,
min_filter,
max_filter,
min_freezed_output_conv_1,
max_freezed_output_conv_1,
output_et);
auto sum_scale = builder::quantization_util::get_sum_scale(min_freezed_output_conv_1,
max_freezed_output_conv_1,
min_freezed_output_conv_2,
max_freezed_output_conv_2);
if (bias->get_element_type() != element::i32)
{
auto zero = make_constant(element::i32, min_input->get_shape(), 0);
AxisSet quantization_axes;
auto bias_scale =
quantization_util::get_bias_scale(min_input, max_input, min_filter, max_filter);
op::Quantize::RoundMode round_mode =
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
bias = make_shared<op::Quantize>(
bias, bias_scale, zero, element::i32, quantization_axes, round_mode);
}
return make_shared<op::QuantizedConvolutionBiasSignedAdd>(input,
filters,
bias,
sum_input,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
requantization_scale,
sum_scale,
with_relu);
}
}
}
......@@ -105,5 +105,45 @@ namespace ngraph
const Shape& padding_above,
std::shared_ptr<Node> min,
std::shared_ptr<Node> max);
std::shared_ptr<Node>
ScaledQuantizedConvolutionBiasAdd(std::shared_ptr<Node> input,
std::shared_ptr<Node> filters,
std::shared_ptr<Node> bias,
std::shared_ptr<Node> sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output_conv_1,
std::shared_ptr<Node> max_freezed_output_conv_1,
std::shared_ptr<Node> min_freezed_output_conv_2,
std::shared_ptr<Node> max_freezed_output_conv_2,
const bool with_relu);
std::shared_ptr<Node>
ScaledQuantizedConvolutionBiasSignedAdd(std::shared_ptr<Node> input,
std::shared_ptr<Node> filters,
std::shared_ptr<Node> bias,
std::shared_ptr<Node> sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
std::shared_ptr<Node> min_input,
std::shared_ptr<Node> max_input,
std::shared_ptr<Node> min_filter,
std::shared_ptr<Node> max_filter,
std::shared_ptr<Node> min_freezed_output_conv_1,
std::shared_ptr<Node> max_freezed_output_conv_1,
std::shared_ptr<Node> min_freezed_output_conv_2,
std::shared_ptr<Node> max_freezed_output_conv_2,
const bool with_relu);
}
}
......@@ -177,6 +177,32 @@ namespace ngraph
return range / (max_abs_input_range * max_abs_filter_range);
}
std::shared_ptr<Node> get_sum_scale(std::shared_ptr<Node> min_freezed_output_conv_1,
std::shared_ptr<Node> max_freezed_output_conv_1,
std::shared_ptr<Node> min_freezed_output_conv_2,
std::shared_ptr<Node> max_freezed_output_conv_2)
{
auto type = min_freezed_output_conv_1->get_element_type();
if (type != max_freezed_output_conv_1->get_element_type() ||
type != min_freezed_output_conv_2->get_element_type() ||
type != max_freezed_output_conv_2->get_element_type())
{
throw ngraph_error("get_sum_scale: min and max must have same type");
}
auto shape = min_freezed_output_conv_1->get_shape();
if (shape != max_freezed_output_conv_1->get_shape() ||
shape != min_freezed_output_conv_2->get_shape() ||
shape != max_freezed_output_conv_2->get_shape())
{
throw ngraph_error("get_sum_scale: min and max must have same shape");
}
auto max_abs_conv_1 = max_abs(min_freezed_output_conv_1, max_freezed_output_conv_1);
auto max_abs_conv_2 = max_abs(min_freezed_output_conv_2, max_freezed_output_conv_2);
return max_abs_conv_2 / max_abs_conv_1;
}
std::shared_ptr<Node> get_scale(std::shared_ptr<Node> input_min_range,
std::shared_ptr<Node> input_max_range,
const ngraph::element::Type& quant_type,
......
......@@ -90,3 +90,146 @@ shared_ptr<Node> op::QuantizedConvolutionBias::copy_with_new_args(const NodeVect
new_args.at(3),
m_with_relu));
}
op::QuantizedConvolutionBiasAdd::QuantizedConvolutionBiasAdd(const shared_ptr<Node>& data_batch,
const shared_ptr<Node>& filters,
const shared_ptr<Node>& bias,
const shared_ptr<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> scale,
const std::shared_ptr<Node> sum_scale,
const bool with_relu)
: Op("QuantizedConvolutionBiasAdd",
check_single_output_args({data_batch, filters, bias, sum_input, scale, sum_scale}))
, m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below)
, m_padding_above(padding_above)
, m_data_dilation_strides(data_dilation_strides)
, m_with_relu(with_relu)
{
constructor_validate_and_infer_types();
auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape();
// TODO: call ngraph util
// util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape());
auto output_et = with_relu ? element::u8 : element::i8;
set_output_type(0,
output_et,
util::infer_convolution_output_shape(this,
data_batch_shape,
filters_shape,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
0, /* batch_axis_data, */
1, /* input_channel_axis_data, */
1, /* input_channel_axis_filters, */
0, /* output_channel_axis_filters, */
0, /* batch_axis_result, */
1 /* output_channel_axis_result, */
));
}
shared_ptr<Node>
op::QuantizedConvolutionBiasAdd::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 6)
{
throw ngraph_error("Incorrect number of new arguments");
}
return shared_ptr<Node>(new QuantizedConvolutionBiasAdd(new_args.at(0),
new_args.at(1),
new_args.at(2),
new_args.at(3),
get_window_movement_strides(),
get_window_dilation_strides(),
get_padding_below(),
get_padding_above(),
get_data_dilation_strides(),
new_args.at(4),
new_args.at(5),
m_with_relu));
}
op::QuantizedConvolutionBiasSignedAdd::QuantizedConvolutionBiasSignedAdd(
const shared_ptr<Node>& data_batch,
const shared_ptr<Node>& filters,
const shared_ptr<Node>& bias,
const shared_ptr<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> scale,
const std::shared_ptr<Node> sum_scale,
const bool with_relu)
: Op("QuantizedConvolutionBiasSignedAdd",
check_single_output_args({data_batch, filters, bias, sum_input, scale, sum_scale}))
, m_window_movement_strides(window_movement_strides)
, m_window_dilation_strides(window_dilation_strides)
, m_padding_below(padding_below)
, m_padding_above(padding_above)
, m_data_dilation_strides(data_dilation_strides)
, m_with_relu(with_relu)
{
constructor_validate_and_infer_types();
auto& data_batch_shape = data_batch->get_shape();
auto& filters_shape = filters->get_shape();
// TODO: call ngraph util
// util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape());
auto output_et = with_relu ? element::u8 : element::i8;
set_output_type(0,
output_et,
util::infer_convolution_output_shape(this,
data_batch_shape,
filters_shape,
window_movement_strides,
window_dilation_strides,
padding_below,
padding_above,
data_dilation_strides,
0, /* batch_axis_data, */
1, /* input_channel_axis_data, */
1, /* input_channel_axis_filters, */
0, /* output_channel_axis_filters, */
0, /* batch_axis_result, */
1 /* output_channel_axis_result, */
));
}
shared_ptr<Node>
op::QuantizedConvolutionBiasSignedAdd::copy_with_new_args(const NodeVector& new_args) const
{
if (new_args.size() != 6)
{
throw ngraph_error("Incorrect number of new arguments");
}
return shared_ptr<Node>(new QuantizedConvolutionBiasSignedAdd(new_args.at(0),
new_args.at(1),
new_args.at(2),
new_args.at(3),
get_window_movement_strides(),
get_window_dilation_strides(),
get_padding_below(),
get_padding_above(),
get_data_dilation_strides(),
new_args.at(4),
new_args.at(5),
m_with_relu));
}
......@@ -62,5 +62,79 @@ namespace ngraph
Strides m_data_dilation_strides;
bool m_with_relu;
};
class QuantizedConvolutionBiasAdd : public Op
{
public:
QuantizedConvolutionBiasAdd(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const std::shared_ptr<Node>& bias,
const std::shared_ptr<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> scale,
const std::shared_ptr<Node> sum_scale,
const bool with_relu = false);
const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
std::shared_ptr<Node> get_bias() { return get_argument(2); }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
bool with_relu() const { return m_with_relu; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
protected:
Strides m_window_movement_strides;
Strides m_window_dilation_strides;
CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
bool m_with_relu;
};
class QuantizedConvolutionBiasSignedAdd : public Op
{
public:
QuantizedConvolutionBiasSignedAdd(const std::shared_ptr<Node>& data_batch,
const std::shared_ptr<Node>& filters,
const std::shared_ptr<Node>& bias,
const std::shared_ptr<Node>& sum_input,
const Strides& window_movement_strides,
const Strides& window_dilation_strides,
const CoordinateDiff& padding_below,
const CoordinateDiff& padding_above,
const Strides& data_dilation_strides,
const std::shared_ptr<Node> scale,
const std::shared_ptr<Node> sum_scale,
const bool with_relu = false);
const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
const CoordinateDiff& get_padding_below() const { return m_padding_below; }
const CoordinateDiff& get_padding_above() const { return m_padding_above; }
const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
std::shared_ptr<Node> get_bias() { return get_argument(2); }
std::shared_ptr<Node> get_filters() { return get_argument(1); }
std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
bool with_relu() const { return m_with_relu; }
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
protected:
Strides m_window_movement_strides;
Strides m_window_dilation_strides;
CoordinateDiff m_padding_below;
CoordinateDiff m_padding_above;
Strides m_data_dilation_strides;
bool m_with_relu;
};
}
}
......@@ -130,9 +130,80 @@ namespace ngraph
}
}
template <>
void Builder::BUILDER_DECL(ngraph::op::QuantizedConvolutionBiasAdd)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name());
auto& out0_tensor = external_function->get_tensor_data(out[0].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolutionBiasAdd>(
node, args, out);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], arg2_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[3], out0_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
};
functors.emplace_back(functor);
}
else
{
throw ngraph_error(
"unsupported parameters for QuantizedConvolutionBiasAdd via DEX");
}
}
template <>
void Builder::BUILDER_DECL(ngraph::op::QuantizedConvolutionBiasSignedAdd)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& functors = external_function->get_functors();
auto& arg0_tensor = external_function->get_tensor_data(args[0].get_name());
auto& arg1_tensor = external_function->get_tensor_data(args[1].get_name());
auto& arg2_tensor = external_function->get_tensor_data(args[2].get_name());
auto& out0_tensor = external_function->get_tensor_data(out[0].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto conv_index =
mkldnn_emitter
->build_convolution<ngraph::op::QuantizedConvolutionBiasSignedAdd>(
node, args, out);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
auto functor = [&, conv_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], arg2_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[3], out0_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
};
functors.emplace_back(functor);
}
else
{
throw ngraph_error(
"unsupported parameters for QuantizedConvolutionBiasSignedAdd via DEX");
}
}
REGISTER_OP_BUILDER(QuantizedConvolution);
REGISTER_OP_BUILDER(QuantizedConvolutionRelu);
REGISTER_OP_BUILDER(QuantizedConvolutionBias);
REGISTER_OP_BUILDER(QuantizedConvolutionBiasAdd);
REGISTER_OP_BUILDER(QuantizedConvolutionBiasSignedAdd);
}
}
}
......@@ -3108,6 +3108,67 @@ namespace ngraph
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::QuantizedConvolutionBiasAdd)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto qconv_index =
mkldnn_emitter->build_convolution<ngraph::op::QuantizedConvolutionBiasAdd>(
node, args, out);
auto& deps = mkldnn_emitter->get_primitive_deps(qconv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(qconv_index) << ");\n";
}
else
{
throw ngraph_error(
"QuantizedConvolutionBiasAdd is only supported with MKLDNN kernel.");
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::QuantizedConvolutionBiasSignedAdd)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto qconv_index =
mkldnn_emitter
->build_convolution<ngraph::op::QuantizedConvolutionBiasSignedAdd>(
node, args, out);
auto& deps = mkldnn_emitter->get_primitive_deps(qconv_index);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << args[1].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[2])
<< ", " << args[2].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[3])
<< ", " << out[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(qconv_index) << ");\n";
}
else
{
throw ngraph_error(
"QuantizedConvolutionBiasSignedAdd is only supported with MKLDNN kernel.");
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::ConvolutionBias)
{
......
......@@ -344,6 +344,10 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::ConvolutionBias), &runtime::cpu::CPU_Emitter::emit<op::ConvolutionBias>},
{TI(ngraph::op::QuantizedConvolutionBias),
&runtime::cpu::CPU_Emitter::emit<op::QuantizedConvolutionBias>},
{TI(ngraph::op::QuantizedConvolutionBiasAdd),
&runtime::cpu::CPU_Emitter::emit<op::QuantizedConvolutionBiasAdd>},
{TI(ngraph::op::QuantizedConvolutionBiasSignedAdd),
&runtime::cpu::CPU_Emitter::emit<op::QuantizedConvolutionBiasSignedAdd>},
{TI(ngraph::op::ConvolutionRelu), &runtime::cpu::CPU_Emitter::emit<op::ConvolutionRelu>},
{TI(ngraph::op::QuantizedConvolution),
&runtime::cpu::CPU_Emitter::emit<op::QuantizedConvolution>},
......
......@@ -131,6 +131,22 @@ namespace ngraph
const float scale,
const mkldnn::post_ops& pops = mkldnn::post_ops());
// TODO (nbpatel) Templatize the return type when we have double scales
template <typename OP>
std::vector<float> extract_scale_value(const ngraph::Node* node, int index)
{
auto qc = dynamic_cast<const OP*>(node);
auto scale_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(qc->get_arguments()[index]);
if (scale_const_op == nullptr)
{
throw ngraph_error("Scale must be a Constant");
}
auto scale_val = scale_const_op->template get_vector<float>();
return scale_val;
}
template <typename OP>
size_t build_convolution(const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
......@@ -166,6 +182,21 @@ namespace ngraph
ops.append_sum(1.f);
}
if (std::is_same<OP, ngraph::op::QuantizedConvolutionBiasAdd>())
{
auto sum_scale_val =
extract_scale_value<ngraph::op::QuantizedConvolutionBiasAdd>(node, 5);
ops.append_sum(sum_scale_val[0]);
}
if (std::is_same<OP, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
{
auto sum_scale_val =
extract_scale_value<ngraph::op::QuantizedConvolutionBiasSignedAdd>(node,
5);
ops.append_sum(2.0 * sum_scale_val[0]);
}
auto add_relu = [&]() {
if (dynamic_cast<const ngraph::op::ConvolutionBias*>(node))
{
......@@ -195,6 +226,19 @@ namespace ngraph
return (dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node))
->with_relu();
}
if (dynamic_cast<const ngraph::op::QuantizedConvolutionBiasAdd*>(node))
{
return (dynamic_cast<const ngraph::op::QuantizedConvolutionBiasAdd*>(
node))
->with_relu();
}
if (dynamic_cast<const ngraph::op::QuantizedConvolutionBiasSignedAdd*>(
node))
{
return (dynamic_cast<
const ngraph::op::QuantizedConvolutionBiasSignedAdd*>(node))
->with_relu();
}
return false;
};
......@@ -222,18 +266,10 @@ namespace ngraph
convolution->get_padding_above(),
ops);
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolution>())
else if (std::is_same<OP, ngraph::op::QuantizedConvolution>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>())
{
auto qc = dynamic_cast<const ngraph::op::QuantizedConvolution*>(node);
auto scale_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(qc->get_arguments()[2]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolution scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
auto scale_val = extract_scale_value<OP>(node, 2);
return build_quantized_convolution_forward(
data_desc,
weights_desc,
......@@ -245,42 +281,13 @@ namespace ngraph
scale_val[0],
ops);
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionRelu>())
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasAdd>() ||
std::is_same<OP, ngraph::op::QuantizedConvolutionBiasSignedAdd>())
{
auto qcr = dynamic_cast<const ngraph::op::QuantizedConvolutionRelu*>(node);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
qcr->get_arguments()[2]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolutionRelu scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
return build_quantized_convolution_forward(
data_desc,
weights_desc,
result_desc,
convolution->get_window_movement_strides(),
window_dilation_strides_adjusted,
convolution->get_padding_below(),
convolution->get_padding_above(),
scale_val[0],
ops);
}
else if (std::is_same<OP, ngraph::op::QuantizedConvolutionBias>())
{
auto qcb = dynamic_cast<const ngraph::op::QuantizedConvolutionBias*>(node);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
qcb->get_arguments()[3]);
if (scale_const_op == nullptr)
{
throw ngraph_error("QuantizedConvolutionBias scale must be a Constant");
}
auto scale_val = scale_const_op->get_vector<float>();
// conv+bias = cvt_to_int8(scale*(dst + bias))
int index =
std::is_same<OP, ngraph::op::QuantizedConvolutionBias>() ? 3 : 4;
auto scale_val = extract_scale_value<OP>(node, index);
auto bias_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
return build_quantized_convolution_forward(
data_desc,
......
......@@ -659,6 +659,43 @@ namespace ngraph
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolutionBiasAdd)
{
auto quantized_conv_bias = static_cast<op::QuantizedConvolutionBiasAdd*>(node);
if (node->get_input_element_type(0) == element::u8 &&
node->get_input_element_type(1) == element::i8 &&
node->get_input_element_type(3) == element::u8)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
const int ADD_INPUT = 3;
// Accumulates conv into the second input of the unfused add
op_annotations->add_in_place_oi_pair({0, ADD_INPUT, true});
quantized_conv_bias->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedConvolutionBiasSignedAdd)
{
auto quantized_conv_bias =
static_cast<op::QuantizedConvolutionBiasSignedAdd*>(node);
if (node->get_input_element_type(0) == element::u8 &&
node->get_input_element_type(1) == element::i8 &&
node->get_input_element_type(3) == element::i8)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
const int ADD_INPUT = 3;
// Accumulates conv into the second input of the unfused add
op_annotations->add_in_place_oi_pair({0, ADD_INPUT, true});
quantized_conv_bias->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::Dequantize)
{
......@@ -797,6 +834,10 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedConvolutionRelu>},
{TI(ngraph::op::QuantizedConvolutionBias),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedConvolutionBias>},
{TI(ngraph::op::QuantizedConvolutionBiasAdd),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedConvolutionBiasAdd>},
{TI(ngraph::op::QuantizedConvolutionBiasSignedAdd),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedConvolutionBiasSignedAdd>},
{TI(ngraph::op::GroupConvolutionBias),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::GroupConvolutionBias>},
{TI(ngraph::op::Quantize), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Quantize>},
......
......@@ -516,6 +516,62 @@ namespace ngraph
}
}
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::QuantizedConvolutionBiasAdd)
{
if (mkldnn_utils::use_mkldnn_kernel(node.get()))
{
vector<memory::desc> i_mds;
vector<memory::desc> o_mds;
ConvolutionLayout<ngraph::op::QuantizedConvolutionBiasAdd, true>(
node, i_mds, o_mds);
auto scale_input_md = mkldnn_utils::create_default_mkldnn_md(
node.get(), 4, false, memory::format::x);
auto sum_scale_input_md = mkldnn_utils::create_default_mkldnn_md(
node.get(), 5, false, memory::format::x);
i_mds.push_back(o_mds[0]);
i_mds.push_back(scale_input_md);
i_mds.push_back(sum_scale_input_md);
node = insert_input_conversions(external_function, node, i_mds);
set_output_layouts(node, o_mds);
}
else
{
set_native_layouts(external_function, node);
}
}
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::QuantizedConvolutionBiasSignedAdd)
{
if (mkldnn_utils::use_mkldnn_kernel(node.get()))
{
vector<memory::desc> i_mds;
vector<memory::desc> o_mds;
ConvolutionLayout<ngraph::op::QuantizedConvolutionBiasSignedAdd, true>(
node, i_mds, o_mds);
auto scale_input_md = mkldnn_utils::create_default_mkldnn_md(
node.get(), 4, false, memory::format::x);
auto sum_scale_input_md = mkldnn_utils::create_default_mkldnn_md(
node.get(), 5, false, memory::format::x);
i_mds.push_back(o_mds[0]);
i_mds.push_back(scale_input_md);
i_mds.push_back(sum_scale_input_md);
node = insert_input_conversions(external_function, node, i_mds);
set_output_layouts(node, o_mds);
}
else
{
set_native_layouts(external_function, node);
}
}
template <>
void CPULayout::LAYOUT_DECL(ngraph::op::ConvolutionRelu)
{
......@@ -1996,6 +2052,10 @@ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
&runtime::cpu::pass::CPULayout::layout<ngraph::op::QuantizedConvolutionRelu>},
{TI(ngraph::op::QuantizedConvolutionBias),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::QuantizedConvolutionBias>},
{TI(ngraph::op::QuantizedConvolutionBiasAdd),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::QuantizedConvolutionBiasAdd>},
{TI(ngraph::op::QuantizedConvolutionBiasSignedAdd),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::QuantizedConvolutionBiasSignedAdd>},
{TI(ngraph::op::GroupConvolutionBias),
&runtime::cpu::pass::CPULayout::layout<ngraph::op::GroupConvolutionBias>},
};
......
......@@ -317,6 +317,121 @@ TEST(builder, scaled_QC_with_bias_and_relu)
EXPECT_EQ((vector<uint8_t>{0, 0, 0, 0, 0, 0, 191, 255, 234}), read_vector<uint8_t>(result));
}
TEST(builder, scaled_QC_with_bias_add_and_relu)
{
Shape shape_a{1, 1, 3, 4}; // input shape
Shape shape_b{1, 1, 3, 3}; // filter shape
Shape shape_r{1, 1, 3, 4}; // output shape
vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4};
vector<int8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2};
vector<int32_t> c_data = {5};
vector<uint8_t> conv_2_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4};
auto A = make_shared<op::Parameter>(element::u8, shape_a);
auto B = make_shared<op::Parameter>(element::i8, shape_b);
auto Add = make_shared<op::Parameter>(element::u8, shape_a);
auto Bias = make_shared<op::Parameter>(element::i32, Shape{1});
auto C = op::Constant::create(element::f32, Shape{}, {0.0f});
auto D = op::Constant::create(element::f32, Shape{}, {255.0f});
auto E = op::Constant::create(element::f32, Shape{}, {-127.0f});
auto F = op::Constant::create(element::f32, Shape{}, {127.0f});
auto G = op::Constant::create(element::f32, Shape{}, {22.0f});
auto H = op::Constant::create(element::f32, Shape{}, {90.0f});
auto I = op::Constant::create(element::f32, Shape{}, {22.0f});
auto J = op::Constant::create(element::f32, Shape{}, {180.0f});
auto CV = ngraph::builder::ScaledQuantizedConvolutionBiasAdd(A,
B,
Bias,
Add,
Strides{1, 1}, // move_strides
Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // below_pads
CoordinateDiff{1, 1}, // above_pads
Strides{1, 1}, // data_dilation
C,
D,
E,
F,
G,
H,
I,
J,
true);
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, Add});
constant_fold(f);
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::u8, shape_a);
copy_data(a, a_data);
auto b = backend->create_tensor(element::i8, shape_b);
copy_data(b, b_data);
auto c = backend->create_tensor(element::i32, Shape{1});
copy_data(c, c_data);
auto d = backend->create_tensor(element::u8, shape_a);
copy_data(d, conv_2_data);
auto result = backend->create_tensor(element::u8, shape_r);
backend->call_with_validate(f, {result}, {a, b, c, d});
EXPECT_EQ((vector<uint8_t>{78, 114, 105, 113, 132, 230, 255, 136, 110, 165, 142, 133}),
read_vector<uint8_t>(result));
}
TEST(builder, scaled_QC_with_bias_signed_add_and_relu)
{
Shape shape_a{1, 1, 3, 4}; // input shape
Shape shape_b{1, 1, 3, 3}; // filter shape
Shape shape_r{1, 1, 3, 4}; // output shape
vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4};
vector<int8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2};
vector<int32_t> c_data = {5};
vector<int8_t> conv_2_data = {-1, -2, -3, -4, -5, -6, -10, 0, 1, 2, 3, 4};
auto A = make_shared<op::Parameter>(element::u8, shape_a);
auto B = make_shared<op::Parameter>(element::i8, shape_b);
auto Add = make_shared<op::Parameter>(element::i8, shape_a);
auto Bias = make_shared<op::Parameter>(element::i32, Shape{1});
auto C = op::Constant::create(element::f32, Shape{}, {0.0f});
auto D = op::Constant::create(element::f32, Shape{}, {255.0f});
auto E = op::Constant::create(element::f32, Shape{}, {-127.0f});
auto F = op::Constant::create(element::f32, Shape{}, {127.0f});
auto G = op::Constant::create(element::f32, Shape{}, {22.0f});
auto H = op::Constant::create(element::f32, Shape{}, {90.0f});
auto I = op::Constant::create(element::f32, Shape{}, {22.0f});
auto J = op::Constant::create(element::f32, Shape{}, {90.0f});
auto CV =
ngraph::builder::ScaledQuantizedConvolutionBiasSignedAdd(A,
B,
Bias,
Add,
Strides{1, 1}, // move_strides
Strides{1, 1}, // filter_dilation
CoordinateDiff{1, 1}, // below_pads
CoordinateDiff{1, 1}, // above_pads
Strides{1, 1}, // data_dilation
C,
D,
E,
F,
G,
H,
I,
J,
true);
auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, Bias, Add});
constant_fold(f);
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::u8, shape_a);
copy_data(a, a_data);
auto b = backend->create_tensor(element::i8, shape_b);
copy_data(b, b_data);
auto c = backend->create_tensor(element::i32, Shape{1});
copy_data(c, c_data);
auto d = backend->create_tensor(element::i8, shape_a);
copy_data(d, conv_2_data);
auto result = backend->create_tensor(element::u8, shape_r);
backend->call_with_validate(f, {result}, {a, b, c, d});
EXPECT_EQ((vector<uint8_t>{76, 110, 99, 105, 122, 218, 255, 136, 110, 165, 142, 133}),
read_vector<uint8_t>(result));
}
TEST(builder, scaled_QC_with_f32_bias_and_relu)
{
Shape shape_a{1, 1, 3, 3}; // input shape
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment