Commit d473eda9 authored by Jaikrishnan Menon's avatar Jaikrishnan Menon Committed by Scott Cyphers

DEX: Enable packetized innermost dim reducers (#1431)

parent d742c501
......@@ -54,6 +54,21 @@
\
if (reduction_axes.size() == 1) \
{ \
if (*reduction_axes.begin() == arg_rank - 1) \
{ \
std::function<decltype(runtime::cpu::kernel::reduce_##K##_innermost_1rd<float, 2>)> \
kernel; \
SELECT_KERNEL_BY_RANK(kernel, \
result_element_type, \
arg_rank, \
runtime::cpu::kernel::reduce_##K##_innermost_1rd); \
auto functor = [&, kernel, arg_shape, result_shape](CPURuntimeContext* ctx) { \
kernel(arg_tensor, out_tensor, arg_shape, result_shape); \
}; \
functors.emplace_back(functor); \
return; \
} \
\
std::function<decltype(runtime::cpu::kernel::reduce_##K##_1rd<float, 2>)> kernel; \
SELECT_KERNEL_BY_RANK( \
kernel, result_element_type, arg_rank, runtime::cpu::kernel::reduce_##K##_1rd); \
......
......@@ -52,6 +52,33 @@ namespace ngraph
out.device(eigen::global_thread_pool_device) = in.maximum();
}
template <typename ElementType, unsigned int Rank>
void reduce_max_innermost_1rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape)
{
Eigen::array<Eigen::Index, Rank> in_dims;
Eigen::array<Eigen::Index, Rank - 1> out_dims;
Eigen::IndexList<Eigen::type2index<Rank - 1>> reduction_dim;
for (int i = 0; i < Rank; i++)
{
in_dims[i] = input_shape[i];
}
for (int i = 0; i < Rank - 1; i++)
{
out_dims[i] = output_shape[i];
}
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank - 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
static_cast<ElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) = in.maximum(reduction_dim);
}
template <typename ElementType, unsigned int Rank, unsigned int ReductionDims>
void reduce_max(void* input,
void* output,
......
......@@ -52,6 +52,33 @@ namespace ngraph
out.device(eigen::global_thread_pool_device) = in.minimum();
}
template <typename ElementType, unsigned int Rank>
void reduce_min_innermost_1rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape)
{
Eigen::array<Eigen::Index, Rank> in_dims;
Eigen::array<Eigen::Index, Rank - 1> out_dims;
Eigen::IndexList<Eigen::type2index<Rank - 1>> reduction_dim;
for (int i = 0; i < Rank; i++)
{
in_dims[i] = input_shape[i];
}
for (int i = 0; i < Rank - 1; i++)
{
out_dims[i] = output_shape[i];
}
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank - 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
static_cast<ElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) = in.minimum(reduction_dim);
}
template <typename ElementType, unsigned int Rank, unsigned int ReductionDims>
void reduce_min(void* input,
void* output,
......
......@@ -52,6 +52,33 @@ namespace ngraph
out.device(eigen::global_thread_pool_device) = in.prod();
}
template <typename ElementType, unsigned int Rank>
void reduce_product_innermost_1rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape)
{
Eigen::array<Eigen::Index, Rank> in_dims;
Eigen::array<Eigen::Index, Rank - 1> out_dims;
Eigen::IndexList<Eigen::type2index<Rank - 1>> reduction_dim;
for (int i = 0; i < Rank; i++)
{
in_dims[i] = input_shape[i];
}
for (int i = 0; i < Rank - 1; i++)
{
out_dims[i] = output_shape[i];
}
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank - 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
static_cast<ElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) = in.prod(reduction_dim);
}
template <typename ElementType, unsigned int Rank, unsigned int ReductionDims>
void reduce_product(void* input,
void* output,
......
......@@ -52,6 +52,33 @@ namespace ngraph
out.device(eigen::global_thread_pool_device) = in.sum();
}
template <typename ElementType, unsigned int Rank>
void reduce_sum_innermost_1rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape)
{
Eigen::array<Eigen::Index, Rank> in_dims;
Eigen::array<Eigen::Index, Rank - 1> out_dims;
Eigen::IndexList<Eigen::type2index<Rank - 1>> reduction_dim;
for (int i = 0; i < Rank; i++)
{
in_dims[i] = input_shape[i];
}
for (int i = 0; i < Rank - 1; i++)
{
out_dims[i] = output_shape[i];
}
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank - 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
static_cast<ElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) = in.sum(reduction_dim);
}
template <typename ElementType, unsigned int Rank, unsigned int ReductionDims>
void reduce_sum(void* input,
void* output,
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment