Generic Reference Convolution (#2840)

* Generalize types in general convolution * type-o * rounding * Do prod wide * templatize conv in cpu/kernel & add u8u8 support for Qconv * Remove cast function * Avoid compiler warning * Merge problem

Generic Reference Convolution (#2840)
* Generalize types in general convolution * type-o * rounding * Do prod wide * templatize conv in cpu/kernel & add u8u8 support for Qconv * Remove cast function * Avoid compiler warning * Merge problem
94d39716 · Nishant Patel · Scott Cyphers · 8e798add · 94d39716 · 94d39716
Commit 94d39716 authored May 01, 2019 by Nishant Patel Committed by Scott Cyphers May 01, 2019
6 changed files
--- a/src/ngraph/op/experimental/quantized_conv.cpp
+++ b/src/ngraph/op/experimental/quantized_conv.cpp
@@ -49,6 +49,11 @@ op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& data_batc

    auto output_et = requantize ? element::i8 : element::i32;

+    if (data_batch->get_element_type() == element::u8 && filters->get_element_type() == element::u8)
+    {
+        output_et = element::u8;
+    }
+
    set_output_type(0,
                    output_et,
                    util::infer_convolution_output_shape(this,

--- a/src/ngraph/runtime/cpu/builder/convolution.cpp
+++ b/src/ngraph/runtime/cpu/builder/convolution.cpp
@@ -88,10 +88,10 @@ namespace ngraph
                }
                else
                {
-                    std::function<decltype(runtime::cpu::kernel::convolution<float>)> kernel;
+                    std::function<decltype(runtime::cpu::kernel::convolution<float, float, float>)>
+                        kernel;

-                    SELECT_KERNEL(
-                        kernel, out[0].get_element_type(), runtime::cpu::kernel::convolution);
+                    kernel = runtime::cpu::kernel::convolution<float, float, float>;

                    auto window_movement_strides = convolution->get_window_movement_strides();
                    auto window_dilation_strides = convolution->get_window_dilation_strides();
@@ -123,7 +123,8 @@ namespace ngraph
                               window_dilation_strides,
                               padding_below,
                               padding_above,
-                               data_dilation_strides);
+                               data_dilation_strides,
+                               1.0f);
                    };
                    functors.emplace_back(functor);
                }

--- a/src/ngraph/runtime/cpu/builder/quantized_conv.cpp
+++ b/src/ngraph/runtime/cpu/builder/quantized_conv.cpp
@@ -20,6 +20,7 @@
 #include "ngraph/op/experimental/quantized_conv_relu.hpp"
 #include "ngraph/runtime/cpu/cpu_builder.hpp"
 #include "ngraph/runtime/cpu/cpu_executor.hpp"
+#include "ngraph/runtime/cpu/kernel/convolution.hpp"
 #include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
 #include "ngraph/runtime/cpu/mkldnn_utils.hpp"

@@ -35,19 +36,24 @@ namespace ngraph
            template <>
            void Builder::BUILDER_DECL(ngraph::op::QuantizedConvolution)
            {
+                auto qconvolution = static_cast<const ngraph::op::QuantizedConvolution*>(node);
+
+                auto& functors = external_function->get_functors();
+
+                auto arg0_shape = args[0].get_shape();
+                auto arg1_shape = args[1].get_shape();
+                auto result_shape = out[0].get_shape();
+
+                auto arg0_buffer_index = external_function->get_buffer_index(args[0].get_name());
+                auto arg1_buffer_index = external_function->get_buffer_index(args[1].get_name());
+                auto arg2_buffer_index = external_function->get_buffer_index(args[2].get_name());
+                auto out0_buffer_index = external_function->get_buffer_index(out[0].get_name());
+
+                auto scales_size = shape_size(args[2].get_shape());
+
                if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
                {
-                    auto& functors = external_function->get_functors();
-                    auto arg0_buffer_index =
-                        external_function->get_buffer_index(args[0].get_name());
-                    auto arg1_buffer_index =
-                        external_function->get_buffer_index(args[1].get_name());
-                    auto arg2_buffer_index =
-                        external_function->get_buffer_index(args[2].get_name());
-                    auto out0_buffer_index = external_function->get_buffer_index(out[0].get_name());
-
                    auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
-                    auto scales_size = shape_size(args[2].get_shape());

                    auto conv_desc =
                        mkldnn_emitter
@@ -101,7 +107,51 @@ namespace ngraph
                }
                else
                {
-                    throw ngraph_error("unsupported parameters for QuantizedConvolution via DEX");
+                    std::function<decltype(
+                        runtime::cpu::kernel::convolution<uint8_t, uint8_t, uint8_t, int32_t>)>
+                        kernel;
+                    kernel = runtime::cpu::kernel::convolution<uint8_t, uint8_t, uint8_t, int32_t>;
+
+                    auto window_movement_strides = qconvolution->get_window_movement_strides();
+                    auto window_dilation_strides = qconvolution->get_window_dilation_strides();
+                    auto padding_below = qconvolution->get_padding_below();
+                    auto padding_above = qconvolution->get_padding_above();
+                    auto data_dilation_strides = qconvolution->get_data_dilation_strides();
+
+                    auto functor = [&,
+                                    kernel,
+                                    arg0_shape,
+                                    arg1_shape,
+                                    arg0_buffer_index,
+                                    arg1_buffer_index,
+                                    arg2_buffer_index,
+                                    out0_buffer_index,
+                                    result_shape,
+                                    window_movement_strides,
+                                    window_dilation_strides,
+                                    padding_below,
+                                    padding_above,
+                                    data_dilation_strides,
+                                    scales_size](CPURuntimeContext* ctx,
+                                                 CPUExecutionContext* ectx) {
+                        vector<float> dyn_scales;
+                        dyn_scales.assign(static_cast<float*>(ctx->buffer_data[arg2_buffer_index]),
+                                          static_cast<float*>(ctx->buffer_data[arg2_buffer_index]) +
+                                              scales_size);
+                        kernel(ctx->buffer_data[arg0_buffer_index],
+                               ctx->buffer_data[arg1_buffer_index],
+                               ctx->buffer_data[out0_buffer_index],
+                               arg0_shape,
+                               arg1_shape,
+                               result_shape,
+                               window_movement_strides,
+                               window_dilation_strides,
+                               padding_below,
+                               padding_above,
+                               data_dilation_strides,
+                               dyn_scales[0]);
+                    };
+                    functors.emplace_back(functor);
                }
            }


--- a/src/ngraph/runtime/cpu/kernel/convolution.hpp
+++ b/src/ngraph/runtime/cpu/kernel/convolution.hpp
@@ -27,7 +27,11 @@ namespace ngraph
        {
            namespace kernel
            {
-                template <typename ElementType>
+                template <typename INPUT,
+                          typename FILTER,
+                          typename OUTPUT,
+                          typename ACCUMULATION =
+                              typename ngraph::runtime::reference::widen<OUTPUT>::type>
                void convolution(void* input0,
                                 void* input1,
                                 void* output,
@@ -38,19 +42,22 @@ namespace ngraph
                                 const Strides& window_dilation_strides,
                                 const CoordinateDiff& padding_below,
                                 const CoordinateDiff& padding_above,
-                                 const Strides& data_dilation_strides)
+                                 const Strides& data_dilation_strides,
+                                 const float requant_scale)
                {
-                    reference::convolution<ElementType>(static_cast<const ElementType*>(input0),
-                                                        static_cast<const ElementType*>(input1),
-                                                        static_cast<ElementType*>(output),
-                                                        arg0_shape,
-                                                        arg1_shape,
-                                                        result_shape,
-                                                        window_movement_strides,
-                                                        window_dilation_strides,
-                                                        padding_below,
-                                                        padding_above,
-                                                        data_dilation_strides);
+                    reference::convolution<INPUT, FILTER, OUTPUT, ACCUMULATION>(
+                        static_cast<const INPUT*>(input0),
+                        static_cast<const FILTER*>(input1),
+                        static_cast<OUTPUT*>(output),
+                        arg0_shape,
+                        arg1_shape,
+                        result_shape,
+                        window_movement_strides,
+                        window_dilation_strides,
+                        padding_below,
+                        padding_above,
+                        data_dilation_strides,
+                        requant_scale);
                }

                template <typename ElementType>

--- a/src/ngraph/runtime/reference/convolution.hpp
+++ b/src/ngraph/runtime/reference/convolution.hpp
--- a/test/builder_quantization.cpp
+++ b/test/builder_quantization.cpp
@@ -1423,3 +1423,49 @@ TEST(builder, dynamic_scaled_QD_with_bias)
    EXPECT_EQ((vector<uint8_t>{178, 231, 255, 255, 0, 255, 255, 255, 255, 255, 0, 255}),
              read_vector<uint8_t>(f_requantize_relu_r));
 }
+
+TEST(builder, scaled_QC_u8u8)
+{
+    Shape shape_a{1, 1, 3, 4};                                     // input shape
+    Shape shape_b{1, 1, 3, 3};                                     // filter shape
+    Shape shape_r{1, 1, 3, 4};                                     // output shape
+    vector<uint8_t> a_data = {1, 2, 3, 4, 5, 6, 7, 0, 1, 2, 3, 4}; //{-1, -2, 3, 2, 4, 1, 0, 1, 0};
+    vector<uint8_t> b_data = {1, 2, 3, 4, 5, 0, 0, 1, 2};          //{0, -1, 0, -2, -3, 5, 0, 2, 1};
+    auto A = make_shared<op::Parameter>(element::u8, shape_a);
+    auto B = make_shared<op::Parameter>(element::u8, shape_b);
+    auto scale = op::Constant::create(element::f32, Shape{}, {2});
+    auto CV = make_shared<ngraph::op::QuantizedConvolution>(A,
+                                                            B,
+                                                            Strides{1, 1},        // move_strides
+                                                            Strides{1, 1},        // filter_dilation
+                                                            CoordinateDiff{1, 1}, // below_pads
+                                                            CoordinateDiff{1, 1}, // above_pads
+                                                            Strides{1, 1},        // data_dilation
+                                                            scale,
+                                                            false);
+    auto f = make_shared<Function>(NodeVector{CV}, ParameterVector{A, B});
+    constant_fold(f);
+
+    auto backend = runtime::Backend::create("CPU");
+    // Create some tensors for input/output
+    auto a = backend->create_tensor(element::u8, shape_a);
+    copy_data(a, a_data);
+    auto b = backend->create_tensor(element::u8, shape_b);
+    copy_data(b, b_data);
+    auto result = backend->create_tensor(element::u8, shape_r);
+    auto handle = backend->compile(f);
+    handle->call_with_validate({result}, {a, b});
+    EXPECT_EQ((vector<uint8_t>{22 * 2,
+                               34 * 2,
+                               30 * 2,
+                               32 * 2,
+                               38 * 2,
+                               72 * 2,
+                               90 * 2,
+                               43 * 2,
+                               33 * 2,
+                               52 * 2,
+                               43 * 2,
+                               39 * 2} /*{1, 28, -3, 16, -7, -14, 3, -7, -3}*/),
+              read_vector<uint8_t>(result));
+}