Commit 60ae9e8c authored by Jaikrishnan Menon's avatar Jaikrishnan Menon

CPU Direct Execution: Implement common reduction builder and Max

Also modify existing kernel so it works within the builder framework
parent 2d543ee4
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <cstring>
#include "ngraph/op/max.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/kernel/reduce_max.hpp"
#include "reduction.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Max)
{
BUILD_REDUCTION_FUNCTOR(Max, max);
}
REGISTER_OP_BUILDER(Max);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#define BUILD_REDUCTION_FUNCTOR(OP, K) \
auto& functors = external_function->get_functors(); \
auto& tensor_data = external_function->get_tensor_data(); \
\
auto& arg_tensor = tensor_data[args[0].get_name()]; \
auto& out_tensor = tensor_data[out[0].get_name()]; \
\
auto op = static_cast<const ngraph::op::OP*>(node); \
\
auto arg_shape = args[0].get_shape(); \
auto arg_rank = arg_shape.size(); \
\
auto result_shape = out[0].get_shape(); \
auto& result_element_type = out[0].get_element_type(); \
\
auto reduction_axes = op->get_reduction_axes(); \
\
if (reduction_axes.empty()) \
{ \
size_t size = out[0].get_size() * out[0].get_element_type().size(); \
auto functor = [&, size](CPURuntimeContext* ctx) { \
memcpy(out_tensor, arg_tensor, size); \
}; \
functors.emplace_back(functor); \
return; \
} \
\
if (reduction_axes.size() == arg_rank) \
{ \
std::function<decltype(runtime::cpu::kernel::reduce_##K##_all<float, 2>)> kernel; \
SELECT_KERNEL_BY_RANK( \
kernel, result_element_type, arg_rank, runtime::cpu::kernel::reduce_##K##_all); \
auto functor = [&, kernel, arg_shape, result_shape](CPURuntimeContext* ctx) { \
kernel(arg_tensor, out_tensor, arg_shape, result_shape); \
}; \
functors.emplace_back(functor); \
return; \
} \
\
if (reduction_axes.size() == 1) \
{ \
std::function<decltype(runtime::cpu::kernel::reduce_##K##_1rd<float, 2>)> kernel; \
SELECT_KERNEL_BY_RANK( \
kernel, result_element_type, arg_rank, runtime::cpu::kernel::reduce_##K##_1rd); \
auto functor = \
[&, kernel, arg_shape, result_shape, reduction_axes](CPURuntimeContext* ctx) { \
kernel(arg_tensor, out_tensor, arg_shape, result_shape, reduction_axes); \
}; \
functors.emplace_back(functor); \
return; \
} \
\
if (reduction_axes.size() == 2 && arg_rank == 3) \
{ \
std::function<decltype(runtime::cpu::kernel::reduce_##K##_3d_2rd<float>)> kernel; \
SELECT_KERNEL(kernel, result_element_type, runtime::cpu::kernel::reduce_##K##_3d_2rd); \
auto functor = \
[&, kernel, arg_shape, result_shape, reduction_axes](CPURuntimeContext* ctx) { \
kernel(arg_tensor, out_tensor, arg_shape, result_shape, reduction_axes); \
}; \
functors.emplace_back(functor); \
return; \
} \
\
if (reduction_axes.size() == 2 && arg_rank == 4) \
{ \
std::function<decltype(runtime::cpu::kernel::reduce_##K##_4d_2rd<float>)> kernel; \
SELECT_KERNEL(kernel, result_element_type, runtime::cpu::kernel::reduce_##K##_4d_2rd); \
auto functor = \
[&, kernel, arg_shape, result_shape, reduction_axes](CPURuntimeContext* ctx) { \
kernel(arg_tensor, out_tensor, arg_shape, result_shape, reduction_axes); \
}; \
functors.emplace_back(functor); \
return; \
} \
\
if (reduction_axes.size() == 2 && arg_rank == 5) \
{ \
std::function<decltype(runtime::cpu::kernel::reduce_##K##_5d_2rd<float>)> kernel; \
SELECT_KERNEL(kernel, result_element_type, runtime::cpu::kernel::reduce_##K##_5d_2rd); \
auto functor = \
[&, kernel, arg_shape, result_shape, reduction_axes](CPURuntimeContext* ctx) { \
kernel(arg_tensor, out_tensor, arg_shape, result_shape, reduction_axes); \
}; \
functors.emplace_back(functor); \
return; \
} \
\
std::function<decltype(runtime::cpu::kernel::K<float>)> ref_kernel; \
\
SELECT_KERNEL(ref_kernel, result_element_type, runtime::cpu::kernel::K); \
\
auto functor = \
[&, ref_kernel, arg_shape, result_shape, reduction_axes](CPURuntimeContext* ctx) { \
ref_kernel(arg_tensor, out_tensor, arg_shape, result_shape, reduction_axes); \
}; \
functors.emplace_back(functor);
......@@ -20,6 +20,7 @@
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
#include "ngraph/runtime/reference/max.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
......@@ -31,8 +32,8 @@ namespace ngraph
namespace kernel
{
template <typename ElementType, unsigned int Rank>
void reduce_max_all(ElementType* input,
ElementType* output,
void reduce_max_all(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape)
{
......@@ -44,16 +45,16 @@ namespace ngraph
in_dims[i] = input_shape[i];
}
Eigen::TensorMap<Eigen::Tensor<ElementType, 0, Eigen::RowMajor>> out(output,
out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(input,
in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 0, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
static_cast<ElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) = in.maximum();
}
template <typename ElementType, unsigned int Rank, unsigned int ReductionDims>
void reduce_max(ElementType* input,
ElementType* output,
void reduce_max(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
......@@ -80,11 +81,69 @@ namespace ngraph
Eigen::TensorMap<
Eigen::Tensor<ElementType, Rank - ReductionDims, Eigen::RowMajor>>
out(output, out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(input,
in_dims);
out(static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
static_cast<ElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) = in.maximum(reduction_dims);
}
template <typename ElementType, unsigned int Rank>
void reduce_max_1rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_max<ElementType, Rank, 1>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void reduce_max_3d_2rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_max<ElementType, 3, 2>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void reduce_max_4d_2rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_max<ElementType, 4, 2>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void reduce_max_5d_2rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_max<ElementType, 5, 2>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void max(void* arg,
void* out,
const Shape& in_shape,
const Shape& out_shape,
const AxisSet& reduction_axes)
{
reference::max(static_cast<ElementType*>(arg),
static_cast<ElementType*>(out),
in_shape,
out_shape,
reduction_axes);
}
}
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment