IntelGPU backend: Kahan summation algorithm in Sum operation implemented (#2422)

a275bfcf · Sergey Shalnov · Scott Cyphers · adfe479a · a275bfcf · a275bfcf
Commit a275bfcf authored Feb 12, 2019 by Sergey Shalnov Committed by Scott Cyphers Feb 12, 2019
Hide whitespace changes
Inline Side-by-side

Showing with 9 additions and 7 deletions

intelgpu_op_broadcast.cpp src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.cpp +9 -6

unit_test.manifest src/ngraph/runtime/intelgpu/unit_test.manifest +0 -1

No files found.
--- a/src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.cpp
@@ -150,17 +150,20 @@ void runtime::intelgpu::do_bcast_sum_operation(cldnn::topology& topology,
                                                  axis);
            }

+            const string opencl_type_name = get_opencl_type_name(output_type);
+            const string reduction_init_acc = opencl_type_name + " result = 0.0f;\n" +
+                                              opencl_type_name + " compensation = 0.0f;\n";
            const string reduction_str =
                "output" + access_dims(input_shape, "i", axis) + " = result;\n";

            // Generate loops related to input order with GWS
-            gws = generate_loops_w_axes(writer,
-                                        input_shape,
-                                        true,
-                                        axis,
-                                        get_opencl_type_name(output_type) + " result = 0.0f;\n");
+            gws = generate_loops_w_axes(writer, input_shape, true, axis, reduction_init_acc);

-            writer << "result += input0" << access_dims(input_shape) << ";\n";
+            writer << opencl_type_name << " y = input0" << access_dims(input_shape)
+                   << " - compensation;\n"
+                   << opencl_type_name << " t = result + y;\n"
+                   << "compensation = (t - result) - y;\n"
+                   << "result = t;\n";

            // Close brackets related to input order with reduction
            generate_loops_w_axes(writer, input_shape, false, axis, reduction_str);

--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
@@ -70,7 +70,6 @@ shape_of_matrix
 shape_of_scalar
 shape_of_vector
 softmax_axis_3d_double
-sum_stable_acc
 sum_stable_acc_double
 sum_stable_simple_double
 sum_trivial_in_double