Generic Reference Convolution (#2840)

* Generalize types in general convolution * type-o * rounding * Do prod wide * templatize conv in cpu/kernel & add u8u8 support for Qconv * Remove cast function * Avoid compiler warning * Merge problem

Generic Reference Convolution (#2840)
* Generalize types in general convolution * type-o * rounding * Do prod wide * templatize conv in cpu/kernel & add u8u8 support for Qconv * Remove cast function * Avoid compiler warning * Merge problem
94d39716 · Nishant Patel · Scott Cyphers · 8e798add · 94d39716 · 94d39716
Commit 94d39716 authored May 01, 2019 by Nishant Patel Committed by Scott Cyphers May 01, 2019
6 changed files
--- a/src/ngraph/op/experimental/quantized_conv.cpp
+++ b/src/ngraph/op/experimental/quantized_conv.cpp
@@ -49,6 +49,11 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc

    auto output_et = requantize ? element::i8 : element::i32;

+    if (data_batch->get_element_type() == element::u8 && filters->get_element_type() == element::u8)
+    {
+        output_et = element::u8;
+    }
+
    set_output_type(0,
                    output_et,
                    util::infer_convolution_output_shape(this,

--- a/src/ngraph/runtime/cpu/builder/convolution.cpp
+++ b/src/ngraph/runtime/cpu/builder/convolution.cpp
@@ -88,10 +88,10 @@ namespace ngraph
                }
                else
                {
-                    std::function<decltype(runtime::cpu::kernel::convolution<float>)> kernel;
+                    std::function<decltype(runtime::cpu::kernel::convolution<float, float, float>)>
+                        kernel;

-                    SELECT_KERNEL(
-                        kernel, out[0].get_element_type(), runtime::cpu::kernel::convolution);
+                    kernel = runtime::cpu::kernel::convolution<float, float, float>;

                    auto window_movement_strides = convolution->get_window_movement_strides();
                    auto window_dilation_strides = convolution->get_window_dilation_strides();
@@ -123,7 +123,8 @@ namespace ngraph
                               window_dilation_strides,
                               padding_below,
                               padding_above,
-                               data_dilation_strides);
+                               data_dilation_strides,
+                               1.0f);
                    };
                    functors.emplace_back(functor);
                }

--- a/src/ngraph/runtime/cpu/builder/quantized_conv.cpp
+++ b/src/ngraph/runtime/cpu/builder/quantized_conv.cpp
@@ -20,6 +20,7 @@
 #include "ngraph/op/experimental/quantized_conv_relu.hpp"
 #include "ngraph/runtime/cpu/cpu_builder.hpp"
 #include "ngraph/runtime/cpu/cpu_executor.hpp"
+#include "ngraph/runtime/cpu/kernel/convolution.hpp"
 #include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
 #include "ngraph/runtime/cpu/mkldnn_utils.hpp"

@@ -35,20 +36,25 @@ namespace ngraph
            template <>
            void Builder::BUILDER_DECL(ngraph::op::QuantizedConvolution)
            {
-                if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
-                {
+                auto qconvolution = static_cast<const ngraph::op::QuantizedConvolution*>(node);
+
                auto& functors = external_function->get_functors();
-                    auto arg0_buffer_index =
-                        external_function->get_buffer_index(args[0].get_name());
-                    auto arg1_buffer_index =
-                        external_function->get_buffer_index(args[1].get_name());
-                    auto arg2_buffer_index =
-                        external_function->get_buffer_index(args[2].get_name());
+
+                auto arg0_shape = args[0].get_shape();
+                auto arg1_shape = args[1].get_shape();
+                auto result_shape = out[0].get_shape();
+
+                auto arg0_buffer_index = external_function->get_buffer_index(args[0].get_name());
+                auto arg1_buffer_index = external_function->get_buffer_index(args[1].get_name());
+                auto arg2_buffer_index = external_function->get_buffer_index(args[2].get_name());
                auto out0_buffer_index = external_function->get_buffer_index(out[0].get_name());

-                    auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
                auto scales_size = shape_size(args[2].get_shape());

+                if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
+                {
+                    auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
+
                    auto conv_desc =
                        mkldnn_emitter
                            ->get_convolution_forward_desc<ngraph::op::QuantizedConvolution>(node);
@@ -101,7 +107,51 @@ namespace ngraph
                }
                else
                {
-                    throw ngraph_error("unsupported parameters for QuantizedConvolution via DEX");
+                    std::function<decltype(
+                        runtime::cpu::kernel::convolution<uint8_t, uint8_t, uint8_t, int32_t>)>
+                        kernel;
+                    kernel = runtime::cpu::kernel::convolution<uint8_t, uint8_t, uint8_t, int32_t>;
+
+                    auto window_movement_strides = qconvolution->get_window_movement_strides();
+                    auto window_dilation_strides = qconvolution->get_window_dilation_strides();
+                    auto padding_below = qconvolution->get_padding_below();
+                    auto padding_above = qconvolution->get_padding_above();
+                    auto data_dilation_strides = qconvolution->get_data_dilation_strides();
+
+                    auto functor = [&,
+                                    kernel,
+                                    arg0_shape,
+                                    arg1_shape,
+                                    arg0_buffer_index,
+                                    arg1_buffer_index,
+                                    arg2_buffer_index,
+                                    out0_buffer_index,
+                                    result_shape,
+                                    window_movement_strides,
+                                    window_dilation_strides,
+                                    padding_below,
+                                    padding_above,
+                                    data_dilation_strides,
+                                    scales_size](CPURuntimeContext* ctx,
+                                                 CPUExecutionContext* ectx) {
+                        vector<float> dyn_scales;
+                        dyn_scales.assign(static_cast<float*>(ctx->buffer_data[arg2_buffer_index]),
+                                          static_cast<float*>(ctx->buffer_data[arg2_buffer_index]) +
+                                              scales_size);
+                        kernel(ctx->buffer_data[arg0_buffer_index],
+                               ctx->buffer_data[arg1_buffer_index],
+                               ctx->buffer_data[out0_buffer_index],
+                               arg0_shape,
+                               arg1_shape,
+                               result_shape,
+                               window_movement_strides,
+                               window_dilation_strides,
+                               padding_below,
+                               padding_above,
+                               data_dilation_strides,
+                               dyn_scales[0]);
+                    };
+                    functors.emplace_back(functor);
                }
            }


--- a/src/ngraph/runtime/cpu/kernel/convolution.hpp
+++ b/src/ngraph/runtime/cpu/kernel/convolution.hpp
@@ -27,7 +27,11 @@ namespace ngraph
        {
            namespace kernel
            {
-                template <typename ElementType>
+                template <typename INPUT,
+                          typename FILTER,
+                          typename OUTPUT,
+                          typename ACCUMULATION =
+                              typename ngraph::runtime::reference::widen<OUTPUT>::type>
                void convolution(void* input0,
                                 void* input1,
                                 void* output,
@@ -38,11 +42,13 @@ namespace ngraph
                                 const Strides& window_dilation_strides,
                                 const CoordinateDiff& padding_below,
                                 const CoordinateDiff& padding_above,
-                                 const Strides& data_dilation_strides)
+                                 const Strides& data_dilation_strides,
+                                 const float requant_scale)
                {
-                    reference::convolution<ElementType>(static_cast<const ElementType*>(input0),
-                                                        static_cast<const ElementType*>(input1),
-                                                        static_cast<ElementType*>(output),
+                    reference::convolution<INPUT, FILTER, OUTPUT, ACCUMULATION>(
+                        static_cast<const INPUT*>(input0),
+                        static_cast<const FILTER*>(input1),
+                        static_cast<OUTPUT*>(output),
                        arg0_shape,
                        arg1_shape,
                        result_shape,
@@ -50,7 +56,8 @@ namespace ngraph
                        window_dilation_strides,
                        padding_below,
                        padding_above,
-                                                        data_dilation_strides);
+                        data_dilation_strides,
+                        requant_scale);
                }

                template <typename ElementType>

--- a/src/ngraph/runtime/reference/convolution.hpp
+++ b/src/ngraph/runtime/reference/convolution.hpp
@@ -16,7 +16,9 @@

 #pragma once

+#include <cfenv>
 #include <cmath>
+#include <functional>

 #include "ngraph/axis_vector.hpp"
 #include "ngraph/coordinate_transform.hpp"
@@ -29,13 +31,34 @@ namespace ngraph
    {
        namespace reference
        {
+            template <typename T>
+            struct widen
+            {
+                using type = T;
+            };
+
+            template <>
+            struct widen<float>
+            {
+                using type = double;
+            };
+
+            template <>
+            struct widen<double>
+            {
+                using type = long double;
+            };
+
            // in: NC_I...
            // filter: C_OC_I...
            // out: NC_O...
-            template <typename T>
-            void general_convolution(const T* in,
-                                     const T* filter,
-                                     T* out,
+            template <typename INPUT,
+                      typename FILTER,
+                      typename OUTPUT,
+                      typename ACCUMULATION = typename widen<OUTPUT>::type>
+            void general_convolution(const INPUT* in,
+                                     const FILTER* filter,
+                                     OUTPUT* out,
                                     const Shape& in_shape,
                                     const Shape& filter_shape,
                                     const Shape& out_shape,
@@ -49,8 +72,11 @@ namespace ngraph
                                     size_t filter_out_channel_axis,
                                     size_t filter_in_channel_axis,
                                     size_t out_batch_axis,
-                                     size_t out_channel_axis)
+                                     size_t out_channel_axis,
+                                     const float requant_scale = 1.0f)
            {
+                auto old_mode = std::fegetround();
+                std::fesetround(FE_TONEAREST);
                // Comments throughout assume without loss of generality that:
                //
                // * batch axes for both in and out are 0
@@ -164,7 +190,7 @@ namespace ngraph
                    //
                    //   out[O] += in[I] * filter[F].

-                    T result = 0;
+                    ACCUMULATION result = 0;

                    CoordinateTransform::Iterator in_it = in_transform.begin();
                    CoordinateTransform::Iterator filter_it = filter_transform.begin();
@@ -185,8 +211,8 @@ namespace ngraph
                            size_t filter_idx = filter_transform.index(filter_coord);
                            for (size_t in_channel = 0; in_channel < n_in_channels; ++in_channel)
                            {
-                                T in_v = in[in_idx];
-                                T f_v = filter[filter_idx];
+                                ACCUMULATION in_v = in[in_idx];
+                                ACCUMULATION f_v = filter[filter_idx];
                                result += in_v * f_v;
                                in_idx += in_channel_stride;
                                filter_idx += filter_in_channel_stride;
@@ -195,15 +221,19 @@ namespace ngraph
                        ++in_it;
                        ++filter_it;
                    }
-
-                    out[out_transform.index(out_coord)] = result;
+                    out[out_transform.index(out_coord)] =
+                        static_cast<OUTPUT>(result * requant_scale);
                }
+                std::fesetround(old_mode);
            }

-            template <typename T>
-            void convolution(const T* in,
-                             const T* filter,
-                             T* out,
+            template <typename INPUT,
+                      typename FILTER,
+                      typename OUTPUT,
+                      typename ACCUMULATION = typename widen<OUTPUT>::type>
+            void convolution(const INPUT* in,
+                             const FILTER* filter,
+                             OUTPUT* out,
                             const Shape& in_shape,
                             const Shape& filter_shape,
                             const Shape& out_shape,
@@ -211,9 +241,11 @@ namespace ngraph
                             const Strides& filter_dilation,
                             const CoordinateDiff& in_pad_below,
                             const CoordinateDiff& in_pad_above,
-                             const Strides& in_dilation)
+                             const Strides& in_dilation,
+                             const float requant_scale = 1.0f)
+
            {
-                general_convolution(in,
+                general_convolution<INPUT, FILTER, OUTPUT, ACCUMULATION>(in,
                                                                         filter,
                                                                         out,
                                                                         in_shape,
@@ -229,13 +261,17 @@ namespace ngraph
                                                                         0,
                                                                         1,
                                                                         0,
-                                    1);
+                                                                         1,
+                                                                         requant_scale);
            }

-            template <typename T>
-            void convolution_backprop_filter(const T* in,
-                                             const T* delta_out,
-                                             T* delta_filter,
+            template <typename INPUT,
+                      typename OUTPUT,
+                      typename FILTER,
+                      typename ACCUMULATION = typename widen<FILTER>::type>
+            void convolution_backprop_filter(const INPUT* in,
+                                             const OUTPUT* delta_out,
+                                             FILTER* delta_filter,
                                             const Shape& in_shape,
                                             const Shape& out_shape,
                                             const Shape& filter_shape,
@@ -245,7 +281,7 @@ namespace ngraph
                                             const CoordinateDiff& backprop_in_pad_above,
                                             const Strides& in_dilation)
            {
-                general_convolution(in,
+                general_convolution<INPUT, OUTPUT, FILTER, ACCUMULATION>(in,
                                                                         delta_out,
                                                                         delta_filter,
                                                                         in_shape,
@@ -264,10 +300,13 @@ namespace ngraph
                                                                         0);
            }

-            template <typename T>
-            void convolution_backprop_in(const T* delta_out,
-                                         const T* filter,
-                                         T* delta_in,
+            template <typename OUTPUT,
+                      typename FILTER,
+                      typename INPUT,
+                      typename ACCUMULATION = typename widen<INPUT>::type>
+            void convolution_backprop_in(const OUTPUT* delta_out,
+                                         const FILTER* filter,
+                                         INPUT* delta_in,
                                         const Shape& out_shape,
                                         const Shape& filter_shape,
                                         const Shape& in_shape,
@@ -279,15 +318,16 @@ namespace ngraph
            {
                // Note that we only reverse the spatial dimensions here (loop
                // starts at 2)
-                std::vector<T> reversed(shape_size(filter_shape));
+                std::vector<INPUT> reversed(shape_size(filter_shape));
                AxisSet reverse_axes;
                for (size_t i = 2; i < filter_shape.size(); ++i)
                {
                    reverse_axes.insert(i);
                }
-                reverse<T>(filter, &reversed[0], filter_shape, filter_shape, reverse_axes);
+                reverse<FILTER>(filter, &reversed[0], filter_shape, filter_shape, reverse_axes);

-                general_convolution(delta_out,
+                general_convolution<OUTPUT, FILTER, INPUT, ACCUMULATION>(
+                    delta_out,
                    &reversed[0],
                    delta_in,
                    out_shape,

--- a/test/builder_quantization.cpp
+++ b/test/builder_quantization.cpp
@@ -1423,3 +1423,49 @@ TEST(builder, dynamic_scaled_QD_with_bias)
    EXPECT_EQ((vector<uint8_t>{178, 231, 255, 255, 0, 255, 255, 255, 255, 255, 0, 255}),
              read_vector<uint8_t>(f_requantize_relu_r));
 }
+
+TEST(builder, scaled_QC_u8u8)
+{
+    Shape shape_a{1, 1, 3, 4};                                     // input shape
+    Shape shape_b{1, 1, 3, 3};                                     // filter shape
+    Shape shape_r{1, 1, 3, 4};                                     // output shape
+    vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4}; //{-1, -2, 3, 2, 4, 1, 0, 1, 0};
+    vector<uint8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2};          //{0, -1, 0, -2, -3, 5, 0, 2, 1};
+    auto A = make_shared<op::Parameter>(element::u8, shape_a);
+    auto B = make_shared<op::Parameter>(element::u8, shape_b);
+    auto scale = op::Constant::create(element::f32, Shape{}, {2});
+    auto CV = make_shared<ngraph::op::QuantizedConvolution>(A,
+                                                            B,
+                                                            Strides{1, 1},        // move_strides
+                                                            Strides{1, 1},        // filter_dilation
+                                                            CoordinateDiff{1, 1}, // below_pads
+                                                            CoordinateDiff{1, 1}, // above_pads
+                                                            Strides{1, 1},        // data_dilation
+                                                            scale,
+                                                            false);
+    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B});
+    constant_fold(f);
+
+    auto backend = runtime::Backend::create("CPU");
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::u8, shape_a);
+    copy_data(a, a_data);
+    auto b = backend->create_tensor(element::u8, shape_b);
+    copy_data(b, b_data);
+    auto result = backend->create_tensor(element::u8, shape_r);
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a, b});
+    EXPECT_EQ((vector<uint8_t>{22 * 2,
+                               34 * 2,
+                               30 * 2,
+                               32 * 2,
+                               38 * 2,
+                               72 * 2,
+                               90 * 2,
+                               43 * 2,
+                               33 * 2,
+                               52 * 2,
+                               43 * 2,
+                               39 * 2} /*{1, 28, -3, 16, -7, -14, 3, -7, -3}*/),
+              read_vector<uint8_t>(result));
+}