Commit 577d5c6c authored by Jaikrishnan Menon's avatar Jaikrishnan Menon Committed by Scott Cyphers

CPU: Optimize 2D Max reductions with a single reduction axis (#823)

parent 25a0f622
......@@ -205,6 +205,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/mkldnn_utils.cpp
runtime/cpu/kernel/eigen_thread_pool.cpp
runtime/cpu/kernel/pad.cpp
runtime/cpu/kernel/reduce_max.cpp
runtime/cpu/kernel/reduce_sum.cpp
runtime/cpu/op/conv_bias.cpp
runtime/cpu/op/conv_relu.cpp
......
......@@ -3105,14 +3105,26 @@ namespace ngraph
<< "});\n";
}
#else
// TODO: add an emitter akin to the emit_sum
writer << "reference::max<" << out[0].get_type() << ">(" << args[0].get_name()
<< ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " {" << join(args[0].get_shape()) << "},\n";
writer << " {" << join(out[0].get_shape()) << "},\n";
writer << " {" << join(max->get_reduction_axes())
<< "});\n";
if (args[0].get_element_type() == element::f32 && args[0].get_shape().size() == 2 &&
max->get_reduction_axes().size() == 1)
{
writer << "cpu::kernel::reduce_max_2d_1rd_float32(" << args[0].get_name()
<< ", " << out[0].get_name() << ", "
<< "{" << join(args[0].get_shape()) << "}, "
<< "{" << join(out[0].get_shape()) << "}, "
<< "{" << join(max->get_reduction_axes()) << "}"
<< ");\n";
}
else
{
writer << "reference::max<" << out[0].get_type() << ">(" << args[0].get_name()
<< ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " {" << join(args[0].get_shape()) << "},\n";
writer << " {" << join(out[0].get_shape()) << "},\n";
writer << " {" << join(max->get_reduction_axes())
<< "});\n";
}
#endif
writer.block_end();
}
......
......@@ -144,6 +144,12 @@ namespace ngraph
float* output,
const Shape& input_shape,
const Shape& output_shape);
void reduce_max_2d_1rd_float32(float* input,
float* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes);
}
}
}
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "reduce_max.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
void reduce_max_all_1d_float32(float* input,
float* output,
const Shape& input_shape,
const Shape& output_shape)
{
reduce_max_all<float, 1>(input, output, input_shape, output_shape);
}
void reduce_max_all_2d_float32(float* input,
float* output,
const Shape& input_shape,
const Shape& output_shape)
{
reduce_max_all<float, 2>(input, output, input_shape, output_shape);
}
void reduce_max_2d_1rd_float32(float* input,
float* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_max<float, 2, 1>(
input, output, input_shape, output_shape, reduction_axes);
}
void reduce_max_all_4d_float32(float* input,
float* output,
const Shape& input_shape,
const Shape& output_shape)
{
reduce_max_all<float, 4>(input, output, input_shape, output_shape);
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType, unsigned int Rank>
void reduce_max_all(ElementType* input,
ElementType* output,
const Shape& input_shape,
const Shape& output_shape)
{
Eigen::array<Eigen::Index, Rank> in_dims;
Eigen::array<Eigen::Index, 0> out_dims;
for (int i = 0; i < Rank; i++)
{
in_dims[i] = input_shape[i];
}
Eigen::TensorMap<Eigen::Tensor<ElementType, 0, Eigen::RowMajor>> out(output,
out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(input,
in_dims);
out.device(eigen::global_thread_pool_device) = in.maximum();
}
template <typename ElementType, unsigned int Rank, unsigned int ReductionDims>
void reduce_max(ElementType* input,
ElementType* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
Eigen::array<Eigen::Index, Rank> in_dims;
Eigen::array<Eigen::Index, Rank - ReductionDims> out_dims;
Eigen::array<Eigen::Index, ReductionDims> reduction_dims;
for (int i = 0; i < Rank; i++)
{
in_dims[i] = input_shape[i];
}
for (int i = 0; i < Rank - ReductionDims; i++)
{
out_dims[i] = output_shape[i];
}
int i = 0;
for (auto axis : reduction_axes)
{
reduction_dims[i++] = axis;
}
Eigen::TensorMap<
Eigen::Tensor<ElementType, Rank - ReductionDims, Eigen::RowMajor>>
out(output, out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(input,
in_dims);
out.device(eigen::global_thread_pool_device) = in.maximum(reduction_dims);
}
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment