Change the API to take input_axes. filter_axes & output_axes

e568de2e · nishant.b.patel · c75f7db3 · e568de2e · e568de2e · e568de2e
Commit e568de2e authored Jun 12, 2019 by nishant.b.patel
11 changed files
--- a/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
+++ b/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
@@ -36,52 +36,6 @@ namespace ngraph
    {
        namespace quantization
        {
-            // TODO: this codes is falling back to fp32 convolution
-            //       need to make this the primary builder which means
-            //       1) add support for zero point in QuantizeConvolution op API
-            //       2) add QuantizedConvolution reference kernel, including zero point
-            shared_ptr<Node> QuantizedLinearConvolution(const shared_ptr<Node>& input,
-                                                        const shared_ptr<Node>& filter,
-                                                        const Strides& window_movement_strides,
-                                                        const Strides& window_dilation_strides,
-                                                        const CoordinateDiff& padding_below,
-                                                        const CoordinateDiff& padding_above,
-                                                        const Strides& data_dilation_strides,
-                                                        const shared_ptr<Node>& input_scale,
-                                                        const shared_ptr<Node>& input_zero_point,
-                                                        const shared_ptr<Node>& filter_scale,
-                                                        const shared_ptr<Node>& filter_zero_point,
-                                                        const shared_ptr<Node>& output_scale,
-                                                        const shared_ptr<Node>& output_zero_point)
-            {
-                AxisSet axes;
-                auto dq_input = make_shared<op::Dequantize>(
-                    input, input_scale, input_zero_point, input_scale->get_element_type(), axes);
-                auto dq_filter = make_shared<op::Dequantize>(filter,
-                                                             filter_scale,
-                                                             filter_zero_point,
-                                                             filter_scale->get_element_type(),
-                                                             axes);
-                auto convolution = make_shared<op::Convolution>(dq_input,
-                                                                dq_filter,
-                                                                window_movement_strides,
-                                                                window_dilation_strides,
-                                                                padding_below,
-                                                                padding_above,
-                                                                data_dilation_strides);
-                auto q_convolution =
-                    make_shared<op::Quantize>(convolution,
-                                              output_scale,
-                                              output_zero_point,
-                                              output_zero_point->get_element_type(),
-                                              axes,
-                                              op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
-                return move(q_convolution);
-            }
            shared_ptr<Node> QuantizedLinearConvolutionBias(const shared_ptr<Node>& input,
                                                            const shared_ptr<Node>& filter,
                                                            const shared_ptr<Node>& bias,

--- a/src/ngraph/builder/quantization/quantized_linear_convolution.hpp
+++ b/src/ngraph/builder/quantization/quantized_linear_convolution.hpp
@@ -25,21 +25,6 @@ namespace ngraph
    {
        namespace quantization
        {
-            std::shared_ptr<Node>
-                QuantizedLinearConvolution(const std::shared_ptr<Node>& input,
-                                           const std::shared_ptr<Node>& filter,
-                                           const Strides& window_movement_strides,
-                                           const Strides& window_dilation_strides,
-                                           const CoordinateDiff& padding_below,
-                                           const CoordinateDiff& padding_above,
-                                           const Strides& data_dilation_strides,
-                                           const std::shared_ptr<Node>& input_scale,
-                                           const std::shared_ptr<Node>& input_zero_point,
-                                           const std::shared_ptr<Node>& filter_scale,
-                                           const std::shared_ptr<Node>& filter_zero_point,
-                                           const std::shared_ptr<Node>& output_scale,
-                                           const std::shared_ptr<Node>& output_zero_point);
            std::shared_ptr<Node>
                QuantizedLinearConvolutionBias(const std::shared_ptr<Node>& input,
                                               const std::shared_ptr<Node>& filter,

--- a/src/ngraph/builder/quantized_conv_builder.cpp
+++ b/src/ngraph/builder/quantized_conv_builder.cpp
@@ -40,7 +40,9 @@ namespace ngraph
                                                     const shared_ptr<Node>& min_output,
                                                     const shared_ptr<Node>& max_output,
                                                     const ngraph::element::Type& output_type,
-                                                     const ngraph::AxisSet& axes)
+                                                     const ngraph::AxisSet& input_axes,
+                                                     const ngraph::AxisSet& filter_axes,
+                                                     const ngraph::AxisSet& output_axes)
        {
            auto input_scale =
                quantization_scale::get_scale(min_input, max_input, input->get_element_type());
@@ -69,7 +71,9 @@ namespace ngraph
                output_scale,
                filter_zero_point, // output type will be same as filter
                output_type,
-                axes);
+                input_axes,
+                filter_axes,
+                output_axes);
        }
    }
 }
--- a/src/ngraph/builder/quantized_conv_builder.hpp
+++ b/src/ngraph/builder/quantized_conv_builder.hpp
@@ -39,6 +39,8 @@ namespace ngraph
                                                          const std::shared_ptr<Node>& min_output,
                                                          const std::shared_ptr<Node>& max_output,
                                                          const ngraph::element::Type& output_type,
-                                                          const ngraph::AxisSet& axes);
+                                                          const ngraph::AxisSet& input_axes,
+                                                          const ngraph::AxisSet& filter_axes,
+                                                          const ngraph::AxisSet& output_axes);
    }
 }
--- a/src/ngraph/op/quantized_convolution.cpp
+++ b/src/ngraph/op/quantized_convolution.cpp
@@ -38,7 +38,9 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& input,
                                               const std::shared_ptr<Node>& output_scale,
                                               const std::shared_ptr<Node>& output_zero_point,
                                               const ngraph::element::Type& output_type,
-                                               const ngraph::AxisSet& axes)
+                                               const ngraph::AxisSet& input_axes,
+                                               const ngraph::AxisSet& filter_axes,
+                                               const ngraph::AxisSet& output_axes)
    : Op("QuantizedConvolution",
         check_single_output_args({input,
                                   filters,
@@ -54,7 +56,9 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& input,
    , m_padding_above(padding_above)
    , m_data_dilation_strides(data_dilation_strides)
    , m_output_type(output_type)
-    , m_axes(axes)
+    , m_input_axes(input_axes)
+    , m_filter_axes(filter_axes)
+    , m_output_axes(output_axes)
 {
    constructor_validate_and_infer_types();
 }
@@ -165,5 +169,7 @@ shared_ptr<Node> op::QuantizedConvolution::copy_with_new_args(const NodeVector&
                                                     new_args.at(6),
                                                     new_args.at(7),
                                                     m_output_type,
-                                                     m_axes));
+                                                     m_input_axes,
+                                                     m_filter_axes,
+                                                     m_output_axes));
 }
--- a/src/ngraph/op/quantized_convolution.hpp
+++ b/src/ngraph/op/quantized_convolution.hpp
@@ -57,7 +57,9 @@ namespace ngraph
                                 const std::shared_ptr<Node>& output_scale,
                                 const std::shared_ptr<Node>& output_zero_point,
                                 const ngraph::element::Type& output_type,
-                                 const ngraph::AxisSet& axes);
+                                 const ngraph::AxisSet& input_axes,
+                                 const ngraph::AxisSet& filter_axes,
+                                 const ngraph::AxisSet& output_axes);
            const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
            const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
            const CoordinateDiff& get_padding_below() const { return m_padding_below; }
@@ -66,7 +68,9 @@ namespace ngraph
            std::shared_ptr<Node> get_filters() { return get_argument(1); }
            std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
            const ngraph::element::Type& get_output_type() const { return m_output_type; }
-            const ngraph::AxisSet& get_axes() const { return m_axes; }
+            const ngraph::AxisSet& get_input_axes() const { return m_input_axes; }
+            const ngraph::AxisSet& get_filter_axes() const { return m_filter_axes; }
+            const ngraph::AxisSet& get_output_axes() const { return m_output_axes; }
            void validate_and_infer_types() override;
            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;
@@ -78,7 +82,9 @@ namespace ngraph
            CoordinateDiff m_padding_above;
            Strides m_data_dilation_strides;
            ngraph::element::Type m_output_type;
-            ngraph::AxisSet m_axes;
+            ngraph::AxisSet m_input_axes;
+            ngraph::AxisSet m_filter_axes;
+            ngraph::AxisSet m_output_axes;
        };
    }
 }
--- a/src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
+++ b/src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
@@ -1823,6 +1823,8 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconv_relu(bool with_
                                                                   output_scale,
                                                                   int8_zero,
                                                                   element::i8,
+                                                                   AxisSet{},
+                                                                   AxisSet{},
                                                                   AxisSet{});
    }
    auto dq =

--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -1350,7 +1350,9 @@ static shared_ptr<ngraph::Function>
                auto padding_above = node_js.at("padding_above").get<vector<std::ptrdiff_t>>();
                auto data_dilation_strides = node_js["data_dilation_strides"];
                auto output_type = read_element_type(node_js.at("output_type"));
-                auto axes = node_js.at("axes").get<set<size_t>>();
+                auto input_axes = node_js.at("input_axes").get<set<size_t>>();
+                auto filter_axes = node_js.at("filter_axes").get<set<size_t>>();
+                auto output_axes = node_js.at("output_axes").get<set<size_t>>();
                node = make_shared<op::QuantizedConvolution>(
                    args[0],
                    args[1],
@@ -1366,7 +1368,9 @@ static shared_ptr<ngraph::Function>
                    args[6],
                    args[7],
                    output_type,
-                    axes);
+                    input_axes,
+                    filter_axes,
+                    output_axes);
                break;
            }
            case OP_TYPEID::QuantizedDotBias: { break;
@@ -2298,7 +2302,9 @@ static json write(const Node& n, bool binary_constant_data)
        node["padding_above"] = tmp->get_padding_above();
        node["data_dilation_strides"] = tmp->get_data_dilation_strides();
        node["output_type"] = write_element_type(tmp->get_element_type());
-        node["axes"] = tmp->get_axes();
+        node["input_axes"] = tmp->get_input_axes();
+        node["filter_axes"] = tmp->get_filter_axes();
+        node["output_axes"] = tmp->get_output_axes();
        break;
    }
    case OP_TYPEID::QuantizedDotBias: { break;

--- a/test/backend_test.in.cpp
+++ b/test/backend_test.in.cpp
@@ -7520,6 +7520,8 @@ NGRAPH_TEST(${BACKEND_NAME}, quantized_convolution)
                                                           G,
                                                           H,
                                                           element::i8,
+                                                           AxisSet{},
+                                                           AxisSet{},
                                                           AxisSet{});
    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B, C, D, E, F, G, H});
    auto backend = runtime::Backend::create("${BACKEND_NAME}");
@@ -7653,6 +7655,8 @@ NGRAPH_TEST(${BACKEND_NAME}, quantized_conv_non_zero_zero_point)
                                                            result_scale,
                                                            result_zero_point,
                                                            element::u8,
+                                                            AxisSet{},
+                                                            AxisSet{},
                                                            AxisSet{});
    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B});
    // Create some tensors for input/output

--- a/test/builder_quantization.cpp
+++ b/test/builder_quantization.cpp
@@ -1294,61 +1294,6 @@ TEST(builder, dynamic_scaled_QD_with_bias)
              read_vector<uint8_t>(f_requantize_relu_r));
 }
-TEST(builder, scaled_QC_u8u8)
-{
-    Shape shape_a{1, 1, 3, 4};                                     // input shape
-    Shape shape_b{1, 1, 3, 3};                                     // filter shape
-    Shape shape_r{1, 1, 3, 4};                                     // output shape
-    vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4}; //{-1, -2, 3, 2, 4, 1, 0, 1, 0};
-    vector<uint8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2};          //{0, -1, 0, -2, -3, 5, 0, 2, 1};
-    auto A = make_shared<op::Parameter>(element::u8, shape_a);
-    auto B = make_shared<op::Parameter>(element::u8, shape_b);
-    auto input_scale = op::Constant::create(element::f32, Shape{}, {2});
-    auto filter_scale = op::Constant::create(element::f32, Shape{}, {2});
-    auto output_scale = op::Constant::create(element::f32, Shape{}, {2});
-    auto u8_zero = op::Constant::create(element::u8, Shape{}, {0});
-    auto CV = make_shared<ngraph::op::QuantizedConvolution>(A,
-                                                            B,
-                                                            Strides{1, 1},        // move_strides
-                                                            Strides{1, 1},        // filter_dilation
-                                                            CoordinateDiff{1, 1}, // below_pads
-                                                            CoordinateDiff{1, 1}, // above_pads
-                                                            Strides{1, 1},        // data_dilation
-                                                            input_scale,
-                                                            u8_zero,
-                                                            filter_scale,
-                                                            u8_zero,
-                                                            output_scale,
-                                                            u8_zero,
-                                                            element::u8,
-                                                            AxisSet{});
-    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B});
-    constant_fold(f);
-    auto backend = runtime::Backend::create("CPU");
-    // Create some tensors for input/output
-    auto a = backend->create_tensor(element::u8, shape_a);
-    copy_data(a, a_data);
-    auto b = backend->create_tensor(element::u8, shape_b);
-    copy_data(b, b_data);
-    auto result = backend->create_tensor(element::u8, shape_r);
-    auto handle = backend->compile(f);
-    handle->call_with_validate({result}, {a, b});
-    EXPECT_EQ((vector<uint8_t>{22 * 2,
-                               34 * 2,
-                               30 * 2,
-                               32 * 2,
-                               38 * 2,
-                               72 * 2,
-                               90 * 2,
-                               43 * 2,
-                               33 * 2,
-                               52 * 2,
-                               43 * 2,
-                               39 * 2} /*{1, 28, -3, 16, -7, -14, 3, -7, -3}*/),
-              read_vector<uint8_t>(result));
-}
 TEST(builder, scaled_QDot_u8u8)
 {
    Shape shape_a{1, 2}; // input shape
@@ -1386,129 +1331,3 @@ TEST(builder, scaled_QDot_u8u8)
    handle->call_with_validate({result}, {a, b});
    EXPECT_EQ((vector<uint8_t>{3, 13, 23}), read_vector<uint8_t>(result));
 }
-TEST(builder, scaled_QC_non_zero_zero_point)
-{
-    Shape shape_a{1, 1, 7, 7}; // input shape
-    Shape shape_b{1, 1, 1, 1}; // filter shape
-    Shape shape_r{1, 1, 7, 7};
-    vector<float> X = {0.45246148109436035f,   0.15498268604278564f,  0.11199361085891724f,
-                       -0.39421093463897705f,  0.2626858949661255f,   0.13414543867111206f,
-                       -0.27184486389160156f,  -0.43028733134269714f, -0.26825493574142456f,
-                       0.3893144130706787f,    -0.13631996512413025f, -0.009590476751327515f,
-                       -0.48771554231643677f,  -0.25256502628326416f, -0.2812897562980652f,
-                       0.4043201804161072f,    0.07795023918151855f,  0.326981782913208f,
-                       0.13114392757415771f,   -0.4416425824165344f,  0.12446999549865723f,
-                       0.36739975214004517f,   0.1698915958404541f,   0.2008744478225708f,
-                       0.23339951038360596f,   0.38613730669021606f,  0.11117297410964966f,
-                       0.3877097964286804f,    0.20812749862670898f,  -0.34297940135002136f,
-                       -0.029246658086776733f, -0.20483523607254028f, -0.19244328141212463f,
-                       -0.11104947328567505f,  -0.32830488681793213f, -0.01800677180290222f,
-                       0.3618946671485901f,    -0.40949052572250366f, -0.18248388171195984f,
-                       -0.3349453806877136f,   -0.34091079235076904f, 0.006497859954833984f,
-                       0.4537564516067505f,    0.08006560802459717f,  -0.14788749814033508f,
-                       0.034442365169525146f,  -0.33322954177856445f, 0.06049239635467529f,
-                       0.42619407176971436f};
-    vector<float> W = {-0.4406261742115021f};
-    vector<float> expected_vals = {
-        -0.19936637580394745f,  -0.06828942894935608f,  -0.04934731498360634f,
-        0.17369966208934784f,   -0.11574628204107285f,  -0.05910799279808998f,
-        0.1197819635272026f,    0.18959586322307587f,   0.1182001456618309f,
-        -0.17154212296009064f,  0.06006614491343498f,   0.0042258151806890965f,
-        0.21490024030208588f,   0.11128675937652588f,   0.12394362688064575f,
-        -0.17815405130386353f,  -0.034346915781497955f, -0.14407673478126526f,
-        -0.05778544768691063f,  0.19459928572177887f,   -0.05484473705291748f,
-        -0.16188594698905945f,  -0.07485868036746979f,  -0.08851054310798645f,
-        -0.10284193605184555f,  -0.17014220356941223f,  -0.04898572340607643f,
-        -0.17083507776260376f,  -0.09170642495155334f,  0.1511256992816925f,
-        0.012886842712759972f,  0.09025576710700989f,   0.08479554951190948f,
-        0.0489313043653965f,    0.14465972781181335f,   0.007934254594147205f,
-        -0.15946026146411896f,  0.1804322451353073f,    0.08040717244148254f,
-        0.1475857049226761f,    0.15021422505378723f,   -0.0028631272725760937f,
-        -0.19993697106838226f,  -0.03527900204062462f,  0.06516310572624207f,
-        -0.015176207758486271f, 0.14682966470718384f,   -0.02665453404188156f,
-        -0.18779225647449493f};
-    auto lhs = make_shared<op::Parameter>(element::f32, shape_a);
-    auto rhs = make_shared<op::Parameter>(element::f32, shape_b);
-    auto result = make_shared<op::Parameter>(element::f32, shape_r);
-    AxisSet quantization_axes;
-    op::Quantize::RoundMode round_mode = op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN;
-    auto lhs_scale = op::Constant::create(element::f32, Shape{}, {0.00369205});
-    auto lhs_zero_point = op::Constant::create(element::u8, Shape{}, {132});
-    auto rhs_scale = op::Constant::create(element::f32, Shape{}, {0.00172795});
-    auto rhs_zero_point = op::Constant::create(element::u8, Shape{}, {255});
-    auto result_scale = op::Constant::create(element::f32, Shape{}, {0.00162681});
-    auto result_zero_point = op::Constant::create(element::u8, Shape{}, {123});
-    auto quantize_lhs = make_shared<op::Quantize>(
-        lhs, lhs_scale, lhs_zero_point, element::u8, quantization_axes, round_mode);
-    auto quantize_rhs = make_shared<op::Quantize>(
-        rhs, rhs_scale, rhs_zero_point, element::u8, quantization_axes, round_mode);
-    auto quantize_result = make_shared<op::Quantize>(
-        result, result_scale, result_zero_point, element::u8, quantization_axes, round_mode);
-    auto lhs_f = make_shared<Function>(quantize_lhs, ParameterVector{lhs});
-    auto rhs_f = make_shared<Function>(quantize_rhs, ParameterVector{rhs});
-    auto result_f = make_shared<Function>(quantize_result, ParameterVector{result});
-    auto backend = runtime::Backend::create("CPU");
-    auto lhs_data = backend->create_tensor(element::f32, shape_a);
-    auto rhs_data = backend->create_tensor(element::f32, shape_b);
-    auto result_data = backend->create_tensor(element::f32, shape_r);
-    auto lhs_output = backend->create_tensor(element::u8, shape_a);
-    auto rhs_output = backend->create_tensor(element::u8, shape_b);
-    auto result_output = backend->create_tensor(element::u8, shape_r);
-    copy_data(lhs_data, X);
-    copy_data(rhs_data, W);
-    copy_data(result_data, expected_vals);
-    auto lhs_handle = backend->compile(lhs_f);
-    auto rhs_handle = backend->compile(rhs_f);
-    auto result_handle = backend->compile(result_f);
-    lhs_handle->call_with_validate({lhs_output}, {lhs_data});
-    rhs_handle->call_with_validate({rhs_output}, {rhs_data});
-    result_handle->call_with_validate({result_output}, {result_data});
-    auto A = make_shared<op::Parameter>(element::u8, shape_a);
-    auto B = make_shared<op::Parameter>(element::u8, shape_b);
-    auto CV = make_shared<ngraph::op::QuantizedConvolution>(A,
-                                                            B,
-                                                            Strides{1, 1},        // move_strides
-                                                            Strides{1, 1},        // filter_dilation
-                                                            CoordinateDiff{0, 0}, // below_pads
-                                                            CoordinateDiff{0, 0}, // above_pads
-                                                            Strides{1, 1},        // data_dilation
-                                                            lhs_scale,
-                                                            lhs_zero_point,
-                                                            rhs_scale,
-                                                            rhs_zero_point,
-                                                            result_scale,
-                                                            result_zero_point,
-                                                            element::u8,
-                                                            AxisSet{});
-    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B});
-    constant_fold(f);
-    // Create some tensors for input/output
-    auto a = backend->create_tensor(element::u8, shape_a);
-    copy_data(a, read_vector<uint8_t>(lhs_output));
-    auto b = backend->create_tensor(element::u8, shape_b);
-    copy_data(b, read_vector<uint8_t>(rhs_output));
-    auto final_result = backend->create_tensor(element::u8, shape_r);
-    auto handle = backend->compile(f);
-    handle->call_with_validate({final_result}, {a, b});
-    for (int i = 0; i < 49; ++i)
-    {
-        EXPECT_EQ((read_vector<uint8_t>(result_output))[i], (read_vector<uint8_t>(final_result))[i])
-            << "Vectors x and y differ at index " << i;
-    }
-}
--- a/test/cpu_fusion.cpp
+++ b/test/cpu_fusion.cpp
@@ -3183,6 +3183,8 @@ TEST(cpu_quant_fusion, qconv_relu)
                                                               output_scale,
                                                               int8_zero,
                                                               element::i8,
+                                                               AxisSet{},
+                                                               AxisSet{},
                                                               AxisSet{});
        auto dq = std::make_shared<op::Dequantize>(
            conv, output_scale, int8_zero, element::f32, AxisSet{});