Add support for Dequantize op via mkldnn for IA backend (codegen + DEX) (#1565)

* Add support for Dequantize op via mkldnn for IA backend (codegen + DEX) * Remove unused variable * Static cast target range

Add support for Dequantize op via mkldnn for IA backend (codegen + DEX) (#1565)
* Add support for Dequantize op via mkldnn for IA backend (codegen + DEX) * Remove unused variable * Static cast target range
e6267708 · Nishant Patel · Robert Kimball · 00a76f3b · e6267708 · e6267708
Commit e6267708 authored Sep 07, 2018 by Nishant Patel Committed by Robert Kimball Sep 07, 2018
12 changed files
--- a/src/ngraph/runtime/cpu/CMakeLists.txt
+++ b/src/ngraph/runtime/cpu/CMakeLists.txt
@@ -36,6 +36,7 @@ set(SRC
    builder/convert.cpp
    builder/convert_layout.cpp
    builder/convolution.cpp
+    builder/dequantize.cpp
    builder/dot.cpp
    builder/function_call.cpp
    builder/lstm.cpp
@@ -77,6 +78,7 @@ set(SRC
    op/conv_bias.cpp
    op/conv_relu.cpp
    op/convert_layout.cpp
+    op/dequantize.cpp
    op/loop_kernel.cpp
    op/lstm.cpp
    op/matmul_bias.cpp

--- a/src/ngraph/runtime/cpu/builder/dequantize.cpp
+++ b/src/ngraph/runtime/cpu/builder/dequantize.cpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "ngraph/runtime/cpu/op/dequantize.hpp"
+#include <vector>
+#include "ngraph/op/constant.hpp"
+#include "ngraph/runtime/cpu/cpu_builder.hpp"
+#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
+#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            template <>
+            void Builder::BUILDER_DECL(ngraph::op::Dequantize)
+            {
+                if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
+                {
+                    auto& functors = external_function->get_functors();
+                    auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
+                    auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
+                    auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
+
+                    auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
+                    auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
+
+                    size_t dequantize_index =
+                        mkldnn_emitter->build_dequantization(node, input_desc, result_desc);
+
+                    auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index);
+                    auto functor = [&, dequantize_index](CPURuntimeContext* ctx) {
+                        cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
+                        cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
+                        cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, dequantize_index);
+                    };
+                    functors.emplace_back(functor);
+                }
+                else
+                {
+                    throw ngraph_error("unsupported parameters for DequantizeOp via DEX");
+                }
+            }
+            REGISTER_OP_BUILDER(Dequantize);
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -102,6 +102,7 @@
 #include "ngraph/runtime/cpu/op/conv_bias.hpp"
 #include "ngraph/runtime/cpu/op/conv_relu.hpp"
 #include "ngraph/runtime/cpu/op/convert_layout.hpp"
+#include "ngraph/runtime/cpu/op/dequantize.hpp"
 #include "ngraph/runtime/cpu/op/group_conv.hpp"
 #include "ngraph/runtime/cpu/op/loop_kernel.hpp"
 #include "ngraph/runtime/cpu/op/lstm.hpp"
@@ -2773,6 +2774,32 @@ namespace ngraph
                }
            }

+            template <>
+            void CPU_Emitter::EMITTER_DECL(ngraph::op::Dequantize)
+            {
+                if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
+                {
+                    auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
+                    auto input_data_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
+                    auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
+
+                    size_t dequantize_index =
+                        mkldnn_emitter->build_dequantization(node, input_data_desc, result_desc);
+
+                    auto& deps = mkldnn_emitter->get_primitive_deps(dequantize_index);
+                    writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
+                           << ", " << args[0].get_name() << ");\n";
+                    writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
+                           << ", " << out[0].get_name() << ");\n";
+                    writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
+                           << to_string(dequantize_index) << ");\n";
+                }
+                else
+                {
+                    throw ngraph_error("unsupported parameters for DequantizeOp");
+                }
+            }
+
            template <>
            void CPU_Emitter::EMITTER_DECL(ngraph::op::ConvolutionBackpropFilters)
            {

--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -142,6 +142,7 @@
 #include "ngraph/runtime/cpu/op/conv_bias.hpp"
 #include "ngraph/runtime/cpu/op/conv_relu.hpp"
 #include "ngraph/runtime/cpu/op/convert_layout.hpp"
+#include "ngraph/runtime/cpu/op/dequantize.hpp"
 #include "ngraph/runtime/cpu/op/group_conv.hpp"
 #include "ngraph/runtime/cpu/op/loop_kernel.hpp"
 #include "ngraph/runtime/cpu/op/lstm.hpp"
@@ -292,6 +293,7 @@ static const runtime::cpu::OpMap dispatcher{
    {TI(ngraph::op::Ceiling), &runtime::cpu::CPU_Emitter::emit<op::Ceiling>},
    {TI(ngraph::op::Sqrt), &runtime::cpu::CPU_Emitter::emit<op::Sqrt>},
    {TI(ngraph::op::Convolution), &runtime::cpu::CPU_Emitter::emit<op::Convolution>},
+    {TI(ngraph::op::Dequantize), &runtime::cpu::CPU_Emitter::emit<op::Dequantize>},
    {TI(ngraph::op::ConvolutionBackpropFilters),
     &runtime::cpu::CPU_Emitter::emit<op::ConvolutionBackpropFilters>},
    {TI(ngraph::op::ConvolutionBackpropData),

--- a/src/ngraph/runtime/cpu/mkldnn_emitter.cpp
+++ b/src/ngraph/runtime/cpu/mkldnn_emitter.cpp
@@ -19,10 +19,12 @@

 #include "mkldnn_emitter.hpp"

+#include "ngraph/op/constant.hpp"
 #include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
 #include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
 #include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
 #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
+#include "ngraph/runtime/cpu/op/dequantize.hpp"
 #include "ngraph/type/element_type.hpp"

 using namespace ngraph::runtime::cpu;
@@ -119,6 +121,32 @@ size_t MKLDNNEmitter::build_memory_primitive(const mkldnn::memory::desc& desc)
    return index;
 }

+size_t MKLDNNEmitter::build_dequantization(const ngraph::Node* node,
+                                           const mkldnn::memory::desc& input_desc,
+                                           const mkldnn::memory::desc& result_desc)
+{
+    auto dequantize = static_cast<const ngraph::op::Dequantize*>(node);
+    auto min_const_op = std::static_pointer_cast<ngraph::op::Constant>(dequantize->get_argument(1));
+    auto max_const_op = std::static_pointer_cast<ngraph::op::Constant>(dequantize->get_argument(2));
+    float min_range = *(static_cast<float const*>(min_const_op->get_data_ptr()));
+    float max_range = *(static_cast<float const*>(max_const_op->get_data_ptr()));
+
+    const float max_abs = std::max(std::abs(min_range), std::abs(max_range));
+    bool is_signed = (dequantize->get_dequantize_et()).is_signed();
+    const float target_range =
+        static_cast<float>((is_signed ? std::pow(2, 7) : std::pow(2, 8)) - 1);
+    const float scale_factor = max_abs / target_range;
+    std::vector<float> scales;
+    scales.push_back(scale_factor);
+    mkldnn::primitive_attr attr;
+    attr.set_output_scales(0, scales);
+    attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
+
+    size_t dequantize_index = 0;
+    dequantize_index = this->build_quantize_reorder(input_desc, result_desc, attr);
+    return dequantize_index;
+}
+
 mkldnn::memory::format MKLDNNEmitter::query_convolution_forward_weight_format(
    const mkldnn::memory::desc& input_data_desc,
    const mkldnn::memory::desc& weights_desc_any,
@@ -638,6 +666,23 @@ size_t MKLDNNEmitter::build_reorder(const mkldnn::memory::desc& input_desc,
    return primitive_index;
 }

+size_t MKLDNNEmitter::build_quantize_reorder(const mkldnn::memory::desc& input_desc,
+                                             const mkldnn::memory::desc& result_desc,
+                                             const mkldnn::primitive_attr attr)
+{
+    size_t input_index = build_memory_primitive(input_desc);
+    size_t result_index = build_memory_primitive(result_desc);
+    auto reorder_desc =
+        mkldnn::reorder::primitive_desc({input_desc, mkldnn_utils::global_cpu_engine},
+                                        {result_desc, mkldnn_utils::global_cpu_engine},
+                                        attr);
+
+    size_t primitive_index = insert_primitive(new mkldnn::reorder(
+        reorder_desc, *m_mkldnn_primitives[input_index], *m_mkldnn_primitives[result_index]));
+    m_primitive_deps[primitive_index] = {input_index, result_index};
+    return primitive_index;
+}
+
 size_t MKLDNNEmitter::build_lrn_forward(const mkldnn::memory::desc& input_desc,
                                        const mkldnn::memory::desc& result_desc,
                                        float alpha,

--- a/src/ngraph/runtime/cpu/mkldnn_emitter.hpp
+++ b/src/ngraph/runtime/cpu/mkldnn_emitter.hpp
@@ -504,6 +504,14 @@ namespace ngraph
                                          const mkldnn::memory::desc& result_desc,
                                          float alpha);

+                size_t build_quantize_reorder(const mkldnn::memory::desc& input_desc,
+                                              const mkldnn::memory::desc& result_desc,
+                                              const mkldnn::primitive_attr attr);
+
+                size_t build_dequantization(const ngraph::Node* node,
+                                            const mkldnn::memory::desc& input_desc,
+                                            const mkldnn::memory::desc& result_desc);
+
            private:
                std::vector<mkldnn::primitive*> m_mkldnn_primitives;
                std::vector<mkldnn::stream> m_mkldnn_streams;

--- a/src/ngraph/runtime/cpu/op/dequantize.cpp
+++ b/src/ngraph/runtime/cpu/op/dequantize.cpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include "ngraph/runtime/cpu/op/dequantize.hpp"
+#include "ngraph/op/constant.hpp"
+
+ngraph::op::Dequantize::Dequantize(std::shared_ptr<Node> input,
+                                   std::shared_ptr<Node> min,
+                                   std::shared_ptr<Node> max,
+                                   const element::Type& type)
+    : Op("Dequantize", check_single_output_args({input, min, max}))
+    , m_element_type(type)
+{
+    constructor_validate_and_infer_types();
+
+    if (input->get_element_type() != element::u8 && input->get_element_type() != element::i8)
+    {
+        throw ngraph_error("Dequantization supported only for i8/u8!");
+    }
+
+    if (min->get_element_type() != min->get_element_type())
+    {
+        throw ngraph_error("Min's element type isn't equal to max's!");
+    }
+
+    if (min->get_shape().size() != 0)
+    {
+        throw ngraph_error("Min is not a scalar!");
+    }
+
+    if (max->get_shape().size() != 0)
+    {
+        throw ngraph_error("Max is not a scalar!");
+    }
+
+    if (!(std::dynamic_pointer_cast<op::Constant>(min) &&
+          std::dynamic_pointer_cast<op::Constant>(max)))
+    {
+        throw ngraph_error("Min and max have to be constants!");
+    }
+
+    set_output_type(0, element::f32, input->get_shape());
+}
+
+std::shared_ptr<ngraph::Node>
+    ngraph::op::Dequantize::copy_with_new_args(const NodeVector& new_args) const
+{
+    if (new_args.size() != 3)
+    {
+        throw ngraph_error("Incorrect number of new arguments");
+    }
+    return std::make_shared<Dequantize>(
+        new_args.at(0), new_args.at(1), new_args.at(2), m_element_type);
+}
--- a/src/ngraph/runtime/cpu/op/dequantize.hpp
+++ b/src/ngraph/runtime/cpu/op/dequantize.hpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#pragma once
+
+#include "ngraph/node.hpp"
+#include "ngraph/node_vector.hpp"
+#include "ngraph/op/op.hpp"
+
+namespace ngraph
+{
+    namespace op
+    {
+        class Dequantize : public Op
+        {
+        public:
+            Dequantize(std::shared_ptr<Node> input,
+                       std::shared_ptr<Node> min,
+                       std::shared_ptr<Node> max,
+                       const element::Type& type);
+            const element::Type& get_dequantize_et() const { return m_element_type; }
+            virtual std::shared_ptr<Node>
+                copy_with_new_args(const NodeVector& new_args) const override;
+
+        private:
+            const element::Type m_element_type;
+        };
+    }
+}
--- a/src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
+++ b/src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
@@ -40,6 +40,7 @@
 #include "ngraph/runtime/cpu/op/bounded_relu.hpp"
 #include "ngraph/runtime/cpu/op/conv_bias.hpp"
 #include "ngraph/runtime/cpu/op/conv_relu.hpp"
+#include "ngraph/runtime/cpu/op/dequantize.hpp"
 #include "ngraph/runtime/cpu/op/group_conv.hpp"
 #include "ngraph/runtime/cpu/op/lstm.hpp"
 #include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
@@ -659,6 +660,20 @@ namespace ngraph
                        bounded_relu->set_op_annotations(op_annotations);
                    }
                }
+
+                template <>
+                void CPUAssignment::ASSIGN_DECL(ngraph::op::Dequantize)
+                {
+                    if (node->get_input_element_type(0) == element::u8 ||
+                        node->get_input_element_type(0) == element::i8)
+                    {
+                        auto dequantize = static_cast<op::Dequantize*>(node);
+                        auto op_annotations =
+                            std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
+                        op_annotations->set_mkldnn_op(true);
+                        dequantize->set_op_annotations(op_annotations);
+                    }
+                }
            }
        }
    }
@@ -712,6 +727,8 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
    {TI(ngraph::op::Lstm), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Lstm>},
    {TI(ngraph::op::Rnn), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Rnn>},
    {TI(ngraph::op::Softmax), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Softmax>},
+    {TI(ngraph::op::Dequantize),
+     &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Dequantize>},
 };

 bool runtime::cpu::pass::CPUAssignment::run_on_call_graph(

--- a/src/ngraph/runtime/cpu/pass/cpu_layout.cpp
+++ b/src/ngraph/runtime/cpu/pass/cpu_layout.cpp
@@ -49,6 +49,7 @@
 #include "ngraph/runtime/cpu/op/conv_bias.hpp"
 #include "ngraph/runtime/cpu/op/conv_relu.hpp"
 #include "ngraph/runtime/cpu/op/convert_layout.hpp"
+#include "ngraph/runtime/cpu/op/dequantize.hpp"
 #include "ngraph/runtime/cpu/op/group_conv.hpp"
 #include "ngraph/runtime/cpu/op/lstm.hpp"
 #include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
@@ -1538,6 +1539,20 @@ namespace ngraph
                        set_native_layouts(external_function, node);
                    }
                }
+
+                template <>
+                void CPULayout::LAYOUT_DECL(ngraph::op::Dequantize)
+                {
+                    if (mkldnn_utils::use_mkldnn_kernel(node.get()))
+                    {
+                        //TODO : Propogate Layout
+                        set_native_layouts(external_function, node);
+                    }
+                    else
+                    {
+                        throw ngraph_error("Dequantized op is only supported in MKLDNN for now.");
+                    }
+                }
            }
        }
    }
@@ -1593,6 +1608,7 @@ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
    {TI(ngraph::op::Rnn), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Rnn>},
    {TI(ngraph::op::Softmax), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Softmax>},
    {TI(ngraph::op::BoundedRelu), &runtime::cpu::pass::CPULayout::layout<ngraph::op::BoundedRelu>},
+    {TI(ngraph::op::Dequantize), &runtime::cpu::pass::CPULayout::layout<ngraph::op::Dequantize>},
 };

 bool runtime::cpu::pass::CPULayout::run_on_call_graph(const std::list<std::shared_ptr<Node>>& nodes)

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -58,7 +58,7 @@ if (NGRAPH_INTERPRETER_ENABLE)
 endif()

 if (NGRAPH_CPU_ENABLE)
-    set(SRC ${SRC} core_fusion.cpp)
+    set(SRC ${SRC} core_fusion.cpp quantize_cpu.cpp)
 endif()

 add_subdirectory(models)

--- a/test/quantize_cpu.cpp
+++ b/test/quantize_cpu.cpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+
+#include <algorithm>
+#include <cinttypes>
+#include <cmath>
+#include <cstdlib>
+#include <string>
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "ngraph/op/constant.hpp"
+#include "ngraph/op/get_output_element.hpp"
+#include "ngraph/runtime/cpu/op/dequantize.hpp"
+#include "util/all_close.hpp"
+#include "util/all_close_f.hpp"
+#include "util/ndarray.hpp"
+#include "util/random.hpp"
+#include "util/test_control.hpp"
+#include "util/test_tools.hpp"
+
+using namespace std;
+using namespace ngraph;
+
+template <typename T>
+void DequantizeTest(int input, float min, float max, float expected_output)
+{
+    vector<T> a_data = {static_cast<T>(input)};
+    Shape shape_a{1};
+    auto A = make_shared<op::Parameter>(element::from<T>(), shape_a);
+    auto B = op::Constant::create(element::f32, Shape{}, {min});
+    auto C = op::Constant::create(element::f32, Shape{}, {max});
+    auto r = make_shared<op::Dequantize>(A, B, C, element::from<T>());
+    auto f = make_shared<Function>(r, op::ParameterVector{A});
+    auto backend = runtime::Backend::create("CPU");
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::from<T>(), Shape{});
+    copy_data(a, a_data);
+    auto result = backend->create_tensor(element::f32, Shape{});
+    backend->call(f, {result}, {a});
+    EXPECT_EQ((vector<float>{expected_output}), read_vector<float>(result));
+}
+
+TEST(quantize_cpu, dequantize_from_uint8)
+{
+    DequantizeTest<uint8_t>(255, 100.0f, 300.0f, 300.0);
+}
+
+TEST(quantize_cpu, dequantize_from_uint8_smallrange)
+{
+    DequantizeTest<uint8_t>(255, -2.0f, 5.0f, 5.0);
+}
+
+TEST(quantize_cpu, dequantize_from_int8_smallrange)
+{
+    DequantizeTest<int8_t>(-127, -2.0f, 1.0f, -2.0);
+}
+
+TEST(quantize_cpu, dequantize_from_int8)
+{
+    DequantizeTest<int8_t>(42, -1.0f, 300.0f, static_cast<float>(99.212601));
+}