Replicate onnx rt test case for non-zero zero point conv test

ec295e44 · nishant.b.patel · 86577af5 · ec295e44 · ec295e44 · ec295e44
Commit ec295e44 authored May 29, 2019 by nishant.b.patel
Showing with 149 additions and 13 deletions

convolution.hpp src/ngraph/runtime/cpu/kernel/convolution.hpp +3 -3

convolution.hpp src/ngraph/runtime/reference/convolution.hpp +10 -10

builder_quantization.cpp test/builder_quantization.cpp +136 -0

No files found.
--- a/src/ngraph/runtime/cpu/kernel/convolution.hpp
+++ b/src/ngraph/runtime/cpu/kernel/convolution.hpp
@@ -63,11 +63,11 @@ namespace ngraph
                        padding_above,
                        data_dilation_strides,
                        static_cast<const float*>(input_scale),
-                        static_cast<INPUT*>(input_zero_point),
+                        static_cast<const INPUT*>(input_zero_point),
                        static_cast<const float*>(filter_scale),
-                        static_cast<FILTER*>(filter_zero_point),
+                        static_cast<const FILTER*>(filter_zero_point),
                        static_cast<const float*>(output_scale),
-                        static_cast<OUTPUT*>(output_zero_point));
+                        static_cast<const OUTPUT*>(output_zero_point));
                }

                template <typename ElementType>

--- a/src/ngraph/runtime/reference/convolution.hpp
+++ b/src/ngraph/runtime/reference/convolution.hpp
@@ -74,11 +74,11 @@ namespace ngraph
                                     size_t out_batch_axis,
                                     size_t out_channel_axis,
                                     const float* input_scale = nullptr,
-                                     INPUT* input_zero_point = nullptr,
+                                     const INPUT* input_zero_point = nullptr,
                                     const float* filter_scale = nullptr,
-                                     FILTER* filter_zero_point = nullptr,
+                                     const FILTER* filter_zero_point = nullptr,
                                     const float* output_scale = nullptr,
-                                     OUTPUT* output_zero_point = nullptr)
+                                     const OUTPUT* output_zero_point = nullptr)
            {
                bool is_quantized = false;
                if (input_scale && input_zero_point && filter_scale && filter_zero_point &&
@@ -227,8 +227,8 @@ namespace ngraph
                                ACCUMULATION f_v = filter[filter_idx];
                                if (is_quantized)
                                {
-                                    in_v = in_v - *(input_zero_point);
-                                    f_v = f_v - *(filter_zero_point);
+                                    in_v = in_v - *input_zero_point;
+                                    f_v = f_v - *filter_zero_point;
                                }
                                result += in_v * f_v;
                                in_idx += in_channel_stride;
@@ -240,9 +240,9 @@ namespace ngraph
                    }
                    if (is_quantized)
                    {
-                        float scale = ((*(input_scale)) * (*(filter_scale))) / (*(output_scale));
+                        float scale = *input_scale * *filter_scale / *output_scale;
                        out[out_transform.index(out_coord)] =
-                            static_cast<OUTPUT>((result * scale) + *(output_zero_point));
+                            static_cast<OUTPUT>(result * scale + *output_zero_point);
                    }
                    else
                    {
@@ -268,11 +268,11 @@ namespace ngraph
                             const CoordinateDiff& in_pad_above,
                             const Strides& in_dilation,
                             const float* input_scale = nullptr,
-                             INPUT* input_zero_point = nullptr,
+                             const INPUT* input_zero_point = nullptr,
                             const float* filter_scale = nullptr,
-                             FILTER* filter_zero_point = nullptr,
+                             const FILTER* filter_zero_point = nullptr,
                             const float* output_scale = nullptr,
-                             OUTPUT* output_zero_point = nullptr)
+                             const OUTPUT* output_zero_point = nullptr)

            {
                general_convolution<INPUT, FILTER, OUTPUT, ACCUMULATION>(in,

--- a/test/builder_quantization.cpp
+++ b/test/builder_quantization.cpp
@@ -1523,3 +1523,139 @@ TEST(builder, scaled_QDot_u8u8)
    handle->call_with_validate({result}, {a, b});
    EXPECT_EQ((vector<uint8_t>{3, 13, 23}), read_vector<uint8_t>(result));
 }
+
+void FindMinMax(const vector<float>& vec, float* min, float* max)
+{
+    *min = *max = 0;
+    *min = *std::min_element(vec.begin(), vec.end());
+    *max = *std::max_element(vec.begin(), vec.end());
+}
+
+// uses quantization range 0-255
+void FindScaleAndZeroPoint(float min, float max, float* scale, uint8_t* zero_point)
+{
+    min = std::min(min, 0.f);
+    max = std::max(max, 0.f);
+    float qmin = 0;
+    float qmax = 255;
+
+    *scale = (max - min) / (qmax - qmin);
+    const auto initial_zero_point = qmin - min / *scale;
+    *zero_point =
+        static_cast<uint8_t>(std::round(std::max(0.f, std::min(255.f, initial_zero_point))));
+}
+
+void Quantize(float scale,
+              uint8_t zero_point,
+              const std::vector<float>& input,
+              std::vector<uint8_t>* input_quantized)
+{
+    for (size_t i = 0; i < input.size(); i++)
+    {
+        const float clamped_val = std::max(
+            0.f, std::min(255.f, std::round(static_cast<float>(input[i]) / scale) + zero_point));
+        (*input_quantized)[i] = static_cast<uint8_t>(clamped_val);
+    }
+}
+
+TEST(builder, scaled_QC_non_zero_zero_point)
+{
+    Shape shape_a{1, 1, 7, 7}; // input shape
+    Shape shape_b{1, 1, 1, 1}; // filter shape
+    Shape shape_r{1, 1, 7, 7};
+    vector<float> X = {0.45246148109436035f,   0.15498268604278564f,  0.11199361085891724f,
+                       -0.39421093463897705f,  0.2626858949661255f,   0.13414543867111206f,
+                       -0.27184486389160156f,  -0.43028733134269714f, -0.26825493574142456f,
+                       0.3893144130706787f,    -0.13631996512413025f, -0.009590476751327515f,
+                       -0.48771554231643677f,  -0.25256502628326416f, -0.2812897562980652f,
+                       0.4043201804161072f,    0.07795023918151855f,  0.326981782913208f,
+                       0.13114392757415771f,   -0.4416425824165344f,  0.12446999549865723f,
+                       0.36739975214004517f,   0.1698915958404541f,   0.2008744478225708f,
+                       0.23339951038360596f,   0.38613730669021606f,  0.11117297410964966f,
+                       0.3877097964286804f,    0.20812749862670898f,  -0.34297940135002136f,
+                       -0.029246658086776733f, -0.20483523607254028f, -0.19244328141212463f,
+                       -0.11104947328567505f,  -0.32830488681793213f, -0.01800677180290222f,
+                       0.3618946671485901f,    -0.40949052572250366f, -0.18248388171195984f,
+                       -0.3349453806877136f,   -0.34091079235076904f, 0.006497859954833984f,
+                       0.4537564516067505f,    0.08006560802459717f,  -0.14788749814033508f,
+                       0.034442365169525146f,  -0.33322954177856445f, 0.06049239635467529f,
+                       0.42619407176971436f};
+    vector<float> W = {-0.4406261742115021f};
+    auto expected_vals = {-0.19936637580394745f,  -0.06828942894935608f,  -0.04934731498360634f,
+                          0.17369966208934784f,   -0.11574628204107285f,  -0.05910799279808998f,
+                          0.1197819635272026f,    0.18959586322307587f,   0.1182001456618309f,
+                          -0.17154212296009064f,  0.06006614491343498f,   0.0042258151806890965f,
+                          0.21490024030208588f,   0.11128675937652588f,   0.12394362688064575f,
+                          -0.17815405130386353f,  -0.034346915781497955f, -0.14407673478126526f,
+                          -0.05778544768691063f,  0.19459928572177887f,   -0.05484473705291748f,
+                          -0.16188594698905945f,  -0.07485868036746979f,  -0.08851054310798645f,
+                          -0.10284193605184555f,  -0.17014220356941223f,  -0.04898572340607643f,
+                          -0.17083507776260376f,  -0.09170642495155334f,  0.1511256992816925f,
+                          0.012886842712759972f,  0.09025576710700989f,   0.08479554951190948f,
+                          0.0489313043653965f,    0.14465972781181335f,   0.007934254594147205f,
+                          -0.15946026146411896f,  0.1804322451353073f,    0.08040717244148254f,
+                          0.1475857049226761f,    0.15021422505378723f,   -0.0028631272725760937f,
+                          -0.19993697106838226f,  -0.03527900204062462f,  0.06516310572624207f,
+                          -0.015176207758486271f, 0.14682966470718384f,   -0.02665453404188156f,
+                          -0.18779225647449493f};
+
+    float lhs_min, lhs_max, rhs_min, rhs_max, result_min, result_max;
+    FindMinMax(X, &lhs_min, &lhs_max);
+    FindMinMax(W, &rhs_min, &rhs_max);
+    FindMinMax(expected_vals, &result_min, &result_max);
+
+    float lhs_scale, rhs_scale, result_scale;
+    uint8_t lhs_zero_point, rhs_zero_point, result_zero_point;
+    FindScaleAndZeroPoint(lhs_min, lhs_max, &lhs_scale, &lhs_zero_point);
+    FindScaleAndZeroPoint(rhs_min, rhs_max, &rhs_scale, &rhs_zero_point);
+    FindScaleAndZeroPoint(result_min, result_max, &result_scale, &result_zero_point);
+
+    vector<uint8_t> x_quantized(X.size()), w_quantized(W.size()),
+        result_quantized(expected_vals.size());
+    Quantize(lhs_scale, lhs_zero_point, X, &x_quantized);
+    Quantize(rhs_scale, rhs_zero_point, W, &w_quantized);
+    Quantize(result_scale, result_zero_point, expected_vals, &result_quantized);
+
+    auto A = make_shared<op::Parameter>(element::u8, shape_a);
+    auto B = make_shared<op::Parameter>(element::u8, shape_b);
+
+    auto input_scale = op::Constant::create(element::f32, Shape{}, {lhs_scale});
+    auto filter_scale = op::Constant::create(element::f32, Shape{}, {rhs_scale});
+    auto output_scale = op::Constant::create(element::f32, Shape{}, {result_scale});
+    auto input_zero_point = op::Constant::create(element::u8, Shape{}, {lhs_zero_point});
+    auto filter_zero_point = op::Constant::create(element::u8, Shape{}, {rhs_zero_point});
+    auto output_zero_point = op::Constant::create(element::u8, Shape{}, {result_zero_point});
+    auto CV = make_shared<ngraph::op::QuantizedConvolution>(A,
+                                                            B,
+                                                            Strides{1, 1},        // move_strides
+                                                            Strides{1, 1},        // filter_dilation
+                                                            CoordinateDiff{0, 0}, // below_pads
+                                                            CoordinateDiff{0, 0}, // above_pads
+                                                            Strides{1, 1},        // data_dilation
+                                                            input_scale,
+                                                            input_zero_point,
+                                                            filter_scale,
+                                                            filter_zero_point,
+                                                            output_scale,
+                                                            output_zero_point,
+                                                            element::u8,
+                                                            AxisSet{});
+    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B});
+    constant_fold(f);
+
+    auto backend = runtime::Backend::create("CPU");
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::u8, shape_a);
+    copy_data(a, x_quantized);
+    auto b = backend->create_tensor(element::u8, shape_b);
+    copy_data(b, w_quantized);
+    auto result = backend->create_tensor(element::u8, shape_r);
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a, b});
+
+    for (int i = 0; i < 49; ++i)
+    {
+        EXPECT_EQ(result_quantized[i], (read_vector<uint8_t>(result))[i])
+            << "Vectors x and y differ at index " << i;
+    }
+}