CPU: Optimize 2D Max reductions with a single reduction axis (#823)

577d5c6c · Jaikrishnan Menon · Scott Cyphers · 25a0f622 · 577d5c6c · 577d5c6c
Commit 577d5c6c authored Apr 06, 2018 by Jaikrishnan Menon Committed by Scott Cyphers Apr 06, 2018
5 changed files
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -205,6 +205,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
        runtime/cpu/mkldnn_utils.cpp
        runtime/cpu/kernel/eigen_thread_pool.cpp
        runtime/cpu/kernel/pad.cpp
+        runtime/cpu/kernel/reduce_max.cpp
        runtime/cpu/kernel/reduce_sum.cpp
        runtime/cpu/op/conv_bias.cpp
        runtime/cpu/op/conv_relu.cpp

--- a/src/ngraph/runtime/cpu/cpu_emitter.cpp
+++ b/src/ngraph/runtime/cpu/cpu_emitter.cpp
@@ -3105,14 +3105,26 @@ namespace ngraph
                           << "});\n";
                }
 #else
-                // TODO: add an emitter akin to the emit_sum
+                if (args[0].get_element_type() == element::f32 && args[0].get_shape().size() == 2 &&
-                writer << "reference::max<" << out[0].get_type() << ">(" << args[0].get_name()
+                    max->get_reduction_axes().size() == 1)
-                       << ",\n";
+                {
-                writer << "                         " << out[0].get_name() << ",\n";
+                    writer << "cpu::kernel::reduce_max_2d_1rd_float32(" << args[0].get_name()
-                writer << "                         {" << join(args[0].get_shape()) << "},\n";
+                           << ", " << out[0].get_name() << ", "
-                writer << "                         {" << join(out[0].get_shape()) << "},\n";
+                           << "{" << join(args[0].get_shape()) << "}, "
-                writer << "                         {" << join(max->get_reduction_axes())
+                           << "{" << join(out[0].get_shape()) << "}, "
-                       << "});\n";
+                           << "{" << join(max->get_reduction_axes()) << "}"
+                           << ");\n";
+                }
+                else
+                {
+                    writer << "reference::max<" << out[0].get_type() << ">(" << args[0].get_name()
+                           << ",\n";
+                    writer << "                         " << out[0].get_name() << ",\n";
+                    writer << "                         {" << join(args[0].get_shape()) << "},\n";
+                    writer << "                         {" << join(out[0].get_shape()) << "},\n";
+                    writer << "                         {" << join(max->get_reduction_axes())
+                           << "});\n";
+                }
 #endif
                writer.block_end();
            }

--- a/src/ngraph/runtime/cpu/cpu_kernels.hpp
+++ b/src/ngraph/runtime/cpu/cpu_kernels.hpp
@@ -144,6 +144,12 @@ namespace ngraph
                                               float* output,
                                               const Shape& input_shape,
                                               const Shape& output_shape);
+                void reduce_max_2d_1rd_float32(float* input,
+                                               float* output,
+                                               const Shape& input_shape,
+                                               const Shape& output_shape,
+                                               const AxisSet& reduction_axes);
            }
        }
    }

--- a/src/ngraph/runtime/cpu/kernel/reduce_max.cpp
+++ b/src/ngraph/runtime/cpu/kernel/reduce_max.cpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#include "reduce_max.hpp"
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace kernel
+            {
+                void reduce_max_all_1d_float32(float* input,
+                                               float* output,
+                                               const Shape& input_shape,
+                                               const Shape& output_shape)
+                {
+                    reduce_max_all<float, 1>(input, output, input_shape, output_shape);
+                }
+                void reduce_max_all_2d_float32(float* input,
+                                               float* output,
+                                               const Shape& input_shape,
+                                               const Shape& output_shape)
+                {
+                    reduce_max_all<float, 2>(input, output, input_shape, output_shape);
+                }
+                void reduce_max_2d_1rd_float32(float* input,
+                                               float* output,
+                                               const Shape& input_shape,
+                                               const Shape& output_shape,
+                                               const AxisSet& reduction_axes)
+                {
+                    reduce_max<float, 2, 1>(
+                        input, output, input_shape, output_shape, reduction_axes);
+                }
+                void reduce_max_all_4d_float32(float* input,
+                                               float* output,
+                                               const Shape& input_shape,
+                                               const Shape& output_shape)
+                {
+                    reduce_max_all<float, 4>(input, output, input_shape, output_shape);
+                }
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/cpu/kernel/reduce_max.hpp
+++ b/src/ngraph/runtime/cpu/kernel/reduce_max.hpp
+/*******************************************************************************
+* Copyright 2018 Intel Corporation
+*
+* Licensed under the Apache License, Version 2.0 (the "License");
+* you may not use this file except in compliance with the License.
+* You may obtain a copy of the License at
+*
+*     http://www.apache.org/licenses/LICENSE-2.0
+*
+* Unless required by applicable law or agreed to in writing, software
+* distributed under the License is distributed on an "AS IS" BASIS,
+* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+* See the License for the specific language governing permissions and
+* limitations under the License.
+*******************************************************************************/
+#pragma once
+#define EIGEN_USE_THREADS
+#include <unsupported/Eigen/CXX11/Tensor>
+#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
+#include "ngraph/shape.hpp"
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace cpu
+        {
+            namespace kernel
+            {
+                template <typename ElementType, unsigned int Rank>
+                void reduce_max_all(ElementType* input,
+                                    ElementType* output,
+                                    const Shape& input_shape,
+                                    const Shape& output_shape)
+                {
+                    Eigen::array<Eigen::Index, Rank> in_dims;
+                    Eigen::array<Eigen::Index, 0> out_dims;
+                    for (int i = 0; i < Rank; i++)
+                    {
+                        in_dims[i] = input_shape[i];
+                    }
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, 0, Eigen::RowMajor>> out(output,
+                                                                                         out_dims);
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(input,
+                                                                                           in_dims);
+                    out.device(eigen::global_thread_pool_device) = in.maximum();
+                }
+                template <typename ElementType, unsigned int Rank, unsigned int ReductionDims>
+                void reduce_max(ElementType* input,
+                                ElementType* output,
+                                const Shape& input_shape,
+                                const Shape& output_shape,
+                                const AxisSet& reduction_axes)
+                {
+                    Eigen::array<Eigen::Index, Rank> in_dims;
+                    Eigen::array<Eigen::Index, Rank - ReductionDims> out_dims;
+                    Eigen::array<Eigen::Index, ReductionDims> reduction_dims;
+                    for (int i = 0; i < Rank; i++)
+                    {
+                        in_dims[i] = input_shape[i];
+                    }
+                    for (int i = 0; i < Rank - ReductionDims; i++)
+                    {
+                        out_dims[i] = output_shape[i];
+                    }
+                    int i = 0;
+                    for (auto axis : reduction_axes)
+                    {
+                        reduction_dims[i++] = axis;
+                    }
+                    Eigen::TensorMap<
+                        Eigen::Tensor<ElementType, Rank - ReductionDims, Eigen::RowMajor>>
+                        out(output, out_dims);
+                    Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(input,
+                                                                                           in_dims);
+                    out.device(eigen::global_thread_pool_device) = in.maximum(reduction_dims);
+                }
+            }
+        }
+    }
+}