run Quantize / Dequantize reference kernels on CPU backend (#1780)

* dex core quantize/de * more types * codegen * remove comments * remove more dead code

run Quantize / Dequantize reference kernels on CPU backend (#1780)
* dex core quantize/de * more types * codegen * remove comments * remove more dead code
bcfbf099 · Adam Straw · Robert Kimball · 7497ca7c · bcfbf099 · bcfbf099
Commit bcfbf099 authored Oct 15, 2018 by Adam Straw Committed by Robert Kimball Oct 15, 2018
5 changed files
--- a/src/ngraph/runtime/cpu/CMakeLists.txt
+++ b/src/ngraph/runtime/cpu/CMakeLists.txt
@@ -70,6 +70,7 @@ set(SRC
    builder/softmax.cpp
    builder/sum.cpp
    builder/topk.cpp
+    builder/quantization.cpp
    kernel/eigen_thread_pool.cpp
    kernel/pad.cpp
    kernel/reduce_max.cpp

--- a/src/ngraph/runtime/cpu/builder/quantization.cpp
+++ b/src/ngraph/runtime/cpu/builder/quantization.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include <cstring>
+#include "ngraph/op/dequantize.hpp"
+#include "ngraph/op/quantize.hpp"
+#include "ngraph/runtime/cpu/cpu_builder.hpp"
+#include "ngraph/runtime/reference/dequantize.hpp"
+#include "ngraph/runtime/reference/quantize.hpp"
+using namespace std;
+using namespace ngraph;
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            template <>
+            void Builder::BUILDER_DECL(ngraph::op::Dequantize)
+            {
+                auto& functors = external_function->get_functors();
+                auto& tensor_data = external_function->get_tensor_data();
+                const ngraph::op::Dequantize* dequantize =
+                    static_cast<const ngraph::op::Dequantize*>(node);
+                function<void(CPURuntimeContext*)> functor;
+                auto& arg0_tensor = tensor_data[args[0].get_name()];
+                auto& arg1_tensor = tensor_data[args[1].get_name()];
+                auto& arg2_tensor = tensor_data[args[2].get_name()];
+                auto& out_tensor = tensor_data[out[0].get_name()];
+                auto arg0_shape = args[0].get_shape();
+                auto arg1_shape = args[1].get_shape();
+                auto daxes = dequantize->get_axes();
+                if (args[0].get_element_type() == element::i8)
+                {
+                    if (out[0].get_element_type() == element::f32)
+                    {
+                        functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
+                            ngraph::runtime::reference::dequantize<int8_t>(
+                                static_cast<int8_t*>(arg0_tensor),
+                                static_cast<float*>(arg1_tensor),
+                                static_cast<int8_t*>(arg2_tensor),
+                                static_cast<float*>(out_tensor),
+                                arg0_shape,
+                                arg1_shape,
+                                daxes);
+                        };
+                    }
+                    else if (out[0].get_element_type() == element::f64)
+                    {
+                        functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
+                            ngraph::runtime::reference::dequantize<int8_t>(
+                                static_cast<int8_t*>(arg0_tensor),
+                                static_cast<double*>(arg1_tensor),
+                                static_cast<int8_t*>(arg2_tensor),
+                                static_cast<double*>(out_tensor),
+                                arg0_shape,
+                                arg1_shape,
+                                daxes);
+                        };
+                    }
+                    else
+                    {
+                        throw ngraph_error("Unsupported dequantization element type");
+                    }
+                }
+                else if (args[0].get_element_type() == element::u8)
+                {
+                    if (out[0].get_element_type() == element::f32)
+                    {
+                        functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
+                            ngraph::runtime::reference::dequantize<uint8_t>(
+                                static_cast<uint8_t*>(arg0_tensor),
+                                static_cast<float*>(arg1_tensor),
+                                static_cast<uint8_t*>(arg2_tensor),
+                                static_cast<float*>(out_tensor),
+                                arg0_shape,
+                                arg1_shape,
+                                daxes);
+                        };
+                    }
+                    else if (out[0].get_element_type() == element::f64)
+                    {
+                        functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
+                            ngraph::runtime::reference::dequantize<uint8_t>(
+                                static_cast<uint8_t*>(arg0_tensor),
+                                static_cast<double*>(arg1_tensor),
+                                static_cast<uint8_t*>(arg2_tensor),
+                                static_cast<double*>(out_tensor),
+                                arg0_shape,
+                                arg1_shape,
+                                daxes);
+                        };
+                    }
+                    else
+                    {
+                        throw ngraph_error("Unsupported dequantization element type");
+                    }
+                }
+                else
+                {
+                    throw ngraph_error("Unsupported input element type");
+                }
+                functors.emplace_back(functor);
+            }
+            template <>
+            void Builder::BUILDER_DECL(ngraph::op::Quantize)
+            {
+                auto& functors = external_function->get_functors();
+                auto& tensor_data = external_function->get_tensor_data();
+                const ngraph::op::Quantize* quantize =
+                    static_cast<const ngraph::op::Quantize*>(node);
+                function<void(CPURuntimeContext*)> functor;
+                auto& arg0_tensor = tensor_data[args[0].get_name()];
+                auto& arg1_tensor = tensor_data[args[1].get_name()];
+                auto& arg2_tensor = tensor_data[args[2].get_name()];
+                auto& out_tensor = tensor_data[out[0].get_name()];
+                auto arg0_shape = args[0].get_shape();
+                auto arg1_shape = args[1].get_shape();
+                auto daxes = quantize->get_axes();
+                if (args[0].get_element_type() == element::f32)
+                {
+                    if (out[0].get_element_type() == element::i8)
+                    {
+                        functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
+                            ngraph::runtime::reference::quantize<float>(
+                                static_cast<float*>(arg0_tensor),
+                                static_cast<float*>(arg1_tensor),
+                                static_cast<int8_t*>(arg2_tensor),
+                                static_cast<int8_t*>(out_tensor),
+                                arg0_shape,
+                                arg1_shape,
+                                daxes);
+                        };
+                    }
+                    else if (out[0].get_element_type() == element::u8)
+                    {
+                        functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
+                            ngraph::runtime::reference::quantize<float>(
+                                static_cast<float*>(arg0_tensor),
+                                static_cast<float*>(arg1_tensor),
+                                static_cast<uint8_t*>(arg2_tensor),
+                                static_cast<uint8_t*>(out_tensor),
+                                arg0_shape,
+                                arg1_shape,
+                                daxes);
+                        };
+                    }
+                    else
+                    {
+                        throw ngraph_error("Unsupported quantization element type");
+                    }
+                }
+                else if (args[0].get_element_type() == element::f64)
+                {
+                    if (out[0].get_element_type() == element::i8)
+                    {
+                        functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
+                            ngraph::runtime::reference::quantize<double>(
+                                static_cast<double*>(arg0_tensor),
+                                static_cast<double*>(arg1_tensor),
+                                static_cast<int8_t*>(arg2_tensor),
+                                static_cast<int8_t*>(out_tensor),
+                                arg0_shape,
+                                arg1_shape,
+                                daxes);
+                        };
+                    }
+                    else if (out[0].get_element_type() == element::u8)
+                    {
+                        functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
+                            ngraph::runtime::reference::quantize<double>(
+                                static_cast<double*>(arg0_tensor),
+                                static_cast<double*>(arg1_tensor),
+                                static_cast<uint8_t*>(arg2_tensor),
+                                static_cast<uint8_t*>(out_tensor),
+                                arg0_shape,
+                                arg1_shape,
+                                daxes);
+                        };
+                    }
+                    else
+                    {
+                        throw ngraph_error("Unsupported quantization element type");
+                    }
+                }
+                else
+                {
+                    throw ngraph_error("Unsupported input element type");
+                }
+                functors.emplace_back(functor);
+            }
+            REGISTER_OP_BUILDER(Dequantize);
+            REGISTER_OP_BUILDER(Quantize);
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -42,6 +42,7 @@
 #include "ngraph/op/convolution.hpp"
 #include "ngraph/op/cos.hpp"
 #include "ngraph/op/cosh.hpp"
+#include "ngraph/op/dequantize.hpp"
 #include "ngraph/op/divide.hpp"
 #include "ngraph/op/dot.hpp"
 #include "ngraph/op/equal.hpp"
@@ -71,6 +72,7 @@
 #include "ngraph/op/parameter.hpp"
 #include "ngraph/op/power.hpp"
 #include "ngraph/op/product.hpp"
+#include "ngraph/op/quantize.hpp"
 #include "ngraph/op/reduce.hpp"
 #include "ngraph/op/reduce_window.hpp"
 #include "ngraph/op/relu.hpp"
@@ -4690,6 +4692,34 @@ namespace ngraph
                writer.block_end();
            }
+            template <>
+            void CPU_Emitter::EMITTER_DECL(ngraph::op::Dequantize)
+            {
+                auto dequantize = static_cast<const ngraph::op::Dequantize*>(node);
+                writer << "reference::dequantize(";
+                writer << "            " << args[0].get_name() << ",\n";
+                writer << "            " << args[1].get_name() << ",\n";
+                writer << "            " << args[2].get_name() << ",\n";
+                writer << "            " << out[0].get_name() << ",\n";
+                writer << "            {" << join(args[0].get_shape()) << "},\n";
+                writer << "            {" << join(args[1].get_shape()) << "},\n";
+                writer << "            {" << join(dequantize->get_axes()) << "});\n";
+            }
+            template <>
+            void CPU_Emitter::EMITTER_DECL(ngraph::op::Quantize)
+            {
+                auto quantize = static_cast<const ngraph::op::Dequantize*>(node);
+                writer << "reference::quantize(";
+                writer << "            " << args[0].get_name() << ",\n";
+                writer << "            " << args[1].get_name() << ",\n";
+                writer << "            " << args[2].get_name() << ",\n";
+                writer << "            " << out[0].get_name() << ",\n";
+                writer << "            {" << join(args[0].get_shape()) << "},\n";
+                writer << "            {" << join(args[1].get_shape()) << "},\n";
+                writer << "            {" << join(quantize->get_axes()) << "});\n";
+            }
 #undef TI
        }
    }

--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -65,6 +65,7 @@
 #include "ngraph/op/convolution.hpp"
 #include "ngraph/op/cos.hpp"
 #include "ngraph/op/cosh.hpp"
+#include "ngraph/op/dequantize.hpp"
 #include "ngraph/op/divide.hpp"
 #include "ngraph/op/dot.hpp"
 #include "ngraph/op/equal.hpp"
@@ -94,6 +95,7 @@
 #include "ngraph/op/parameter.hpp"
 #include "ngraph/op/power.hpp"
 #include "ngraph/op/product.hpp"
+#include "ngraph/op/quantize.hpp"
 #include "ngraph/op/reduce.hpp"
 #include "ngraph/op/reduce_window.hpp"
 #include "ngraph/op/relu.hpp"
@@ -362,6 +364,8 @@ static const runtime::cpu::OpMap dispatcher{
     &runtime::cpu::CPU_Emitter::emit<runtime::cpu::op::LoopKernel>},
    {TI(ngraph::op::LRN), &runtime::cpu::CPU_Emitter::emit<ngraph::op::LRN>},
    {TI(ngraph::op::ConvolutionAdd), &runtime::cpu::CPU_Emitter::emit<op::ConvolutionAdd>},
+    {TI(ngraph::op::Quantize), &runtime::cpu::CPU_Emitter::emit<op::Quantize>},
+    {TI(ngraph::op::Dequantize), &runtime::cpu::CPU_Emitter::emit<op::Dequantize>},
 };
@@ -436,6 +440,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
 #include "ngraph/runtime/reference/broadcast.hpp"
 #include "ngraph/runtime/reference/concat.hpp"
 #include "ngraph/runtime/reference/convolution.hpp"
+#include "ngraph/runtime/reference/dequantize.hpp"
 #include "ngraph/runtime/reference/dot.hpp"
 #include "ngraph/runtime/reference/lrn.hpp"
 #include "ngraph/runtime/reference/max.hpp"
@@ -446,6 +451,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
 #include "ngraph/runtime/reference/or.hpp"
 #include "ngraph/runtime/reference/pad.hpp"
 #include "ngraph/runtime/reference/product.hpp"
+#include "ngraph/runtime/reference/quantize.hpp"
 #include "ngraph/runtime/reference/reduce.hpp"
 #include "ngraph/runtime/reference/reduce_window.hpp"
 #include "ngraph/runtime/reference/relu.hpp"

--- a/src/ngraph/runtime/cpu/unit_test.manifest
+++ b/src/ngraph/runtime/cpu/unit_test.manifest
@@ -12,10 +12,3 @@ one_hot_vector_1_fp
 one_hot_vector_1_fp_nonint
 backwards_batch_norm_three_outputs
 backwards_maxpool_n2_c1_hw5_3x3_str2_max_pad1x2_2x3
-quantize
-quantize_axes
-quantize_int8
-quantize_clamp
-dequantize
-dequantize_axes
-dequantize_int8