Optimized eigen kernel for spatial mean (#1094)

* Optimized eigen kernel for 2D reduction on a 4D tensor used for spatial mean * revert change to serializer

Optimized eigen kernel for spatial mean (#1094)
* Optimized eigen kernel for 2D reduction on a 4D tensor used for spatial mean * revert change to serializer
0b95efa6 · Jayaram Bobba · Scott Cyphers · abb68627 · 0b95efa6 · 0b95efa6
Commit 0b95efa6 authored Jun 08, 2018 by Jayaram Bobba Committed by Scott Cyphers Jun 08, 2018
Showing with 25 additions and 0 deletions

cpu_emitter.cpp src/ngraph/runtime/cpu/cpu_emitter.cpp +10 -0

cpu_kernels.hpp src/ngraph/runtime/cpu/cpu_kernels.hpp +6 -0

reduce_sum.cpp src/ngraph/runtime/cpu/kernel/reduce_sum.cpp +9 -0

No files found.
--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -2095,6 +2095,16 @@ namespace ngraph
                           << "{" << join(sum->get_reduction_axes()) << "}"
                           << ");\n";
                }
+                else if (args[0].get_element_type() == element::f32 &&
+                         args[0].get_shape().size() == 4 && sum->get_reduction_axes().size() == 2)
+                {
+                    writer << "cpu::kernel::reduce_sum_4d_2rd_float32(" << args[0].get_name()
+                           << ", " << out[0].get_name() << ", "
+                           << "{" << join(args[0].get_shape()) << "}, "
+                           << "{" << join(out[0].get_shape()) << "}, "
+                           << "{" << join(sum->get_reduction_axes()) << "}"
+                           << ");\n";
+                }
                else if (args[0].get_element_type() == element::f32 &&
                         args[0].get_shape().size() == 4 && sum->get_reduction_axes().size() == 4)
                {

--- a/src/ngraph/runtime/cpu/cpu_kernels.hpp
+++ b/src/ngraph/runtime/cpu/cpu_kernels.hpp
@@ -158,6 +158,12 @@ namespace ngraph
                                               const Shape& output_shape,
                                               const AxisSet& reduction_axes);

+                void reduce_sum_4d_2rd_float32(float* input,
+                                               float* output,
+                                               const Shape& input_shape,
+                                               const Shape& output_shape,
+                                               const AxisSet& reduction_axes);
+
                void reduce_sum_all_4d_float32(float* input,
                                               float* output,
                                               const Shape& input_shape,

--- a/src/ngraph/runtime/cpu/kernel/reduce_sum.cpp
+++ b/src/ngraph/runtime/cpu/kernel/reduce_sum.cpp
@@ -57,6 +57,15 @@ namespace ngraph
                {
                    reduce_sum_all<float, 4>(input, output, input_shape, output_shape);
                }
+                void reduce_sum_4d_2rd_float32(float* input,
+                                               float* output,
+                                               const Shape& input_shape,
+                                               const Shape& output_shape,
+                                               const AxisSet& reduction_axes)
+                {
+                    reduce_sum<float, 4, 2>(
+                        input, output, input_shape, output_shape, reduction_axes);
+                }
            }
        }
    }