Unverified Commit 8c1aad8f authored by Jayaram Bobba's avatar Jayaram Bobba Committed by GitHub

Merge pull request #1249 from NervanaSystems/jmenon/dex4

CPU Direct Execution Part 4
parents 7a35cf81 0b20b1a7
...@@ -28,9 +28,25 @@ set(SRC ...@@ -28,9 +28,25 @@ set(SRC
cpu_tensor_view.cpp cpu_tensor_view.cpp
cpu_tracing.cpp cpu_tracing.cpp
builder/avg_pool.cpp builder/avg_pool.cpp
builder/batch_norm.cpp
builder/broadcast.cpp
builder/concat.cpp
builder/convert.cpp
builder/convert_layout.cpp builder/convert_layout.cpp
builder/convolution.cpp builder/convolution.cpp
builder/dot.cpp
builder/function_call.cpp
builder/matmul_bias.cpp
builder/max.cpp
builder/max_pool.cpp
builder/min.cpp
builder/reshape.cpp builder/reshape.cpp
builder/reverse.cpp
builder/reverse_sequence.cpp
builder/select.cpp
builder/select_and_scatter.cpp
builder/sigmoid.cpp
builder/sum.cpp
kernel/eigen_thread_pool.cpp kernel/eigen_thread_pool.cpp
kernel/pad.cpp kernel/pad.cpp
kernel/reduce_max.cpp kernel/reduce_max.cpp
......
...@@ -107,6 +107,8 @@ namespace ngraph ...@@ -107,6 +107,8 @@ namespace ngraph
functors.emplace_back(functor); functors.emplace_back(functor);
} }
} }
REGISTER_OP_BUILDER(AvgPool);
} }
} }
} }
This diff is collapsed.
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <cstring>
#include "ngraph/op/broadcast.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/kernel/broadcast.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Broadcast)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto& arg_tensor = tensor_data[args[0].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto arg_shape = args[0].get_shape();
auto out_shape = out[0].get_shape();
auto arg_rank = arg_shape.size();
auto out_rank = out_shape.size();
auto broadcast = static_cast<const ngraph::op::Broadcast*>(node);
if (broadcast->get_broadcast_axes().empty())
{
size_t size = out[0].get_size() * out[0].get_element_type().size();
auto functor = [&, size](CPURuntimeContext* ctx) {
memcpy(out_tensor, arg_tensor, size);
};
functors.emplace_back(functor);
return;
}
if (!arg_rank)
{
arg_rank = 1;
arg_shape = Shape{1};
}
auto new_shape = Shape(out_rank, 1);
const auto& broadcast_axes = broadcast->get_broadcast_axes();
size_t i = 0;
for (size_t j = 0; j < out_rank; j++)
{
if (broadcast_axes.count(j))
{
new_shape[j] = 1;
}
else
{
new_shape[j] = arg_shape[i++];
}
}
std::function<decltype(runtime::cpu::kernel::broadcast<float, 2>)> kernel;
SELECT_KERNEL_BY_RANK(
kernel, args[0].get_element_type(), out_rank, runtime::cpu::kernel::broadcast);
auto functor = [&, kernel, new_shape, out_shape](CPURuntimeContext* ctx) {
kernel(arg_tensor, out_tensor, new_shape, out_shape);
};
functors.emplace_back(functor);
}
REGISTER_OP_BUILDER(Broadcast);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/op/concat.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/kernel/concat.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Concat)
{
auto axis =
(dynamic_cast<const ngraph::op::Concat*>(node))->get_concatenation_axis();
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
std::function<decltype(runtime::cpu::kernel::concat<float, 1>)> kernel;
SELECT_KERNEL_BY_RANK(kernel,
out[0].get_element_type(),
out[0].get_shape().size(),
runtime::cpu::kernel::concat);
vector<reference_wrapper<void*>> arg_tensors;
vector<Shape> arg_shapes;
for (auto& arg : args)
{
if (shape_size(arg.get_shape()))
{
arg_tensors.emplace_back(tensor_data[arg.get_name()]);
arg_shapes.emplace_back(arg.get_shape());
}
}
auto& out_tensor = tensor_data[out[0].get_name()];
auto out_shape = out[0].get_shape();
auto functor =
[&, kernel, arg_tensors, arg_shapes, out_shape, axis](CPURuntimeContext* ctx) {
kernel(arg_tensors, arg_shapes, out_tensor, out_shape, axis);
};
functors.emplace_back(functor);
}
REGISTER_OP_BUILDER(Concat);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/op/convert.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/kernel/convert.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Convert)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto& arg_tensor = tensor_data[args[0].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto element_count = out[0].get_size();
std::function<decltype(runtime::cpu::kernel::convert<float, int>)> kernel;
if (out[0].get_element_type() == element::boolean)
{
SELECT_KERNEL(
kernel, args[0].get_element_type(), runtime::cpu::kernel::convert_to_i8);
}
else if (out[0].get_element_type() == element::f32)
{
SELECT_KERNEL(kernel,
args[0].get_element_type(),
runtime::cpu::kernel::convert_to_float32);
}
else if (out[0].get_element_type() == element::f64)
{
SELECT_KERNEL(kernel,
args[0].get_element_type(),
runtime::cpu::kernel::convert_to_float64);
}
else if (out[0].get_element_type() == element::i8)
{
SELECT_KERNEL(
kernel, args[0].get_element_type(), runtime::cpu::kernel::convert_to_i8);
}
else if (out[0].get_element_type() == element::i16)
{
SELECT_KERNEL(
kernel, args[0].get_element_type(), runtime::cpu::kernel::convert_to_i16);
}
else if (out[0].get_element_type() == element::i32)
{
SELECT_KERNEL(
kernel, args[0].get_element_type(), runtime::cpu::kernel::convert_to_i32);
}
else if (out[0].get_element_type() == element::i64)
{
SELECT_KERNEL(
kernel, args[0].get_element_type(), runtime::cpu::kernel::convert_to_i64);
}
else if (out[0].get_element_type() == element::u8)
{
SELECT_KERNEL(
kernel, args[0].get_element_type(), runtime::cpu::kernel::convert_to_u8);
}
else if (out[0].get_element_type() == element::u16)
{
SELECT_KERNEL(
kernel, args[0].get_element_type(), runtime::cpu::kernel::convert_to_u16);
}
else if (out[0].get_element_type() == element::u32)
{
SELECT_KERNEL(
kernel, args[0].get_element_type(), runtime::cpu::kernel::convert_to_u32);
}
else if (out[0].get_element_type() == element::u64)
{
SELECT_KERNEL(
kernel, args[0].get_element_type(), runtime::cpu::kernel::convert_to_u64);
}
else
{
throw ngraph_error("Cannot convert from an invalid input element type");
}
auto functor = [&, kernel, element_count](CPURuntimeContext* ctx) {
kernel(arg_tensor, out_tensor, element_count);
};
functors.emplace_back(functor);
}
REGISTER_OP_BUILDER(Convert);
}
}
}
...@@ -403,6 +403,14 @@ namespace ngraph ...@@ -403,6 +403,14 @@ namespace ngraph
"ConvolutionBiasBackpropFiltersBias is only supported with MKLDNN kernel."); "ConvolutionBiasBackpropFiltersBias is only supported with MKLDNN kernel.");
} }
} }
REGISTER_OP_BUILDER(Convolution);
REGISTER_OP_BUILDER(ConvolutionRelu);
REGISTER_OP_BUILDER(ConvolutionBias);
REGISTER_OP_BUILDER(ConvolutionBiasAdd);
REGISTER_OP_BUILDER(ConvolutionBackpropData);
REGISTER_OP_BUILDER(ConvolutionBackpropFilters);
REGISTER_OP_BUILDER(ConvolutionBiasBackpropFiltersBias);
} }
} }
} }
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <cstring>
#include "ngraph/op/dot.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/kernel/dot.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Dot)
{
auto dot = static_cast<const ngraph::op::Dot*>(node);
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto result_shape = out[0].get_shape();
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& arg1_tensor = tensor_data[args[1].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto reduction_axes_count = dot->get_reduction_axes_count();
if (!shape_size(result_shape))
{
auto functor = [](CPURuntimeContext* ctx) {};
functors.emplace_back(functor);
return;
}
if (!shape_size(arg0_shape) || !shape_size(arg1_shape))
{
auto size = shape_size(result_shape) * out[0].get_element_type().size();
auto functor = [&, size](CPURuntimeContext* ctx) {
memset(out_tensor, 0, size);
};
functors.emplace_back(functor);
return;
}
if (arg0_shape.empty() || arg1_shape.empty())
{
auto first = (arg0_shape.empty() ? args[0] : args[1]);
auto second = (arg0_shape.empty() ? args[1] : args[0]);
auto& first_tensor = tensor_data[first.get_name()];
auto& second_tensor = tensor_data[second.get_name()];
std::function<decltype(runtime::cpu::kernel::dot_scalar<float>)> kernel;
SELECT_KERNEL(
kernel, out[0].get_element_type(), runtime::cpu::kernel::dot_scalar);
auto element_count = shape_size(second.get_shape());
auto functor = [&, kernel, element_count](CPURuntimeContext* ctx) {
kernel(first_tensor, second_tensor, out_tensor, element_count);
};
functors.emplace_back(functor);
return;
}
if ((arg0_shape.size() == 1) && (arg1_shape.size() == 1) &&
reduction_axes_count == 1)
{
std::function<decltype(runtime::cpu::kernel::dot_1d_1d_1rd<float>)> kernel;
SELECT_KERNEL(
kernel, out[0].get_element_type(), runtime::cpu::kernel::dot_1d_1d_1rd);
auto functor =
[&, kernel, arg0_shape, arg1_shape, result_shape](CPURuntimeContext* ctx) {
kernel(arg0_tensor,
arg1_tensor,
out_tensor,
arg0_shape,
arg1_shape,
result_shape);
};
functors.emplace_back(functor);
return;
}
if ((arg0_shape.size() == 2) && (arg1_shape.size() == 1) &&
reduction_axes_count == 1)
{
std::function<decltype(runtime::cpu::kernel::dot_2d_1d_1rd<float>)> kernel;
SELECT_KERNEL(
kernel, out[0].get_element_type(), runtime::cpu::kernel::dot_2d_1d_1rd);
auto functor =
[&, kernel, arg0_shape, arg1_shape, result_shape](CPURuntimeContext* ctx) {
kernel(arg0_tensor,
arg1_tensor,
out_tensor,
arg0_shape,
arg1_shape,
result_shape);
};
functors.emplace_back(functor);
return;
}
if ((arg0_shape.size() == 3) && (arg1_shape.size() == 3) &&
reduction_axes_count == 1)
{
std::function<decltype(runtime::cpu::kernel::dot_3d_3d_1rd<float>)> kernel;
SELECT_KERNEL(
kernel, out[0].get_element_type(), runtime::cpu::kernel::dot_3d_3d_1rd);
auto functor =
[&, kernel, arg0_shape, arg1_shape, result_shape](CPURuntimeContext* ctx) {
kernel(arg0_tensor,
arg1_tensor,
out_tensor,
arg0_shape,
arg1_shape,
result_shape);
};
functors.emplace_back(functor);
return;
}
if ((arg0_shape.size() == 3) && (arg1_shape.size() == 2) &&
reduction_axes_count == 1)
{
std::function<decltype(runtime::cpu::kernel::dot_3d_2d_1rd<float>)> kernel;
SELECT_KERNEL(
kernel, out[0].get_element_type(), runtime::cpu::kernel::dot_3d_2d_1rd);
auto functor =
[&, kernel, arg0_shape, arg1_shape, result_shape](CPURuntimeContext* ctx) {
kernel(arg0_tensor,
arg1_tensor,
out_tensor,
arg0_shape,
arg1_shape,
result_shape);
};
functors.emplace_back(functor);
return;
}
std::function<decltype(runtime::cpu::kernel::dot<float>)> kernel;
SELECT_KERNEL(kernel, out[0].get_element_type(), runtime::cpu::kernel::dot);
auto functor =
[&, kernel, arg0_shape, arg1_shape, result_shape, reduction_axes_count](
CPURuntimeContext* ctx) {
kernel(arg0_tensor,
arg1_tensor,
out_tensor,
arg0_shape,
arg1_shape,
result_shape,
reduction_axes_count);
};
functors.emplace_back(functor);
}
REGISTER_OP_BUILDER(Dot);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/op/function_call.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/tensor_view.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::FunctionCall)
{
auto function_call = static_cast<const ngraph::op::FunctionCall*>(node);
auto function = function_call->get_functions()[0];
auto backend = runtime::Backend::create("CPU");
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto& callees = external_function->get_callees();
// Note: We bypass the completely broken ngraph "backend" API here
vector<reference_wrapper<void *>> arg_tensors, out_tensors;
vector<Shape> arg_shapes, out_shapes;
vector<element::Type> arg_types, out_types;
for (const auto& arg : args)
{
arg_shapes.emplace_back(arg.get_shape());
arg_types.emplace_back(arg.get_element_type());
arg_tensors.emplace_back(tensor_data[arg.get_name()]);
}
for (const auto& result : out)
{
out_shapes.emplace_back(result.get_shape());
out_types.emplace_back(result.get_element_type());
out_tensors.emplace_back(tensor_data[result.get_name()]);
}
if (!callees.count(function->get_name()))
{
callees[function->get_name()] = make_shared<CPU_ExternalFunction>(function);
}
auto& callee_external_function = callees[function->get_name()];
auto functor = [&,
backend,
arg_shapes,
arg_types,
arg_tensors,
out_shapes,
out_types,
out_tensors](CPURuntimeContext* ctx) {
TensorViewPtrs inputs, outputs;
for (int i = 0; i < arg_shapes.size(); i++)
{
inputs.emplace_back(
backend->create_tensor(arg_types[i], arg_shapes[i], arg_tensors[i]));
}
for (int i = 0; i < out_shapes.size(); i++)
{
outputs.emplace_back(
backend->create_tensor(out_types[i], out_shapes[i], out_tensors[i]));
}
auto call_frame = callee_external_function->make_call_frame();
call_frame->call(outputs, inputs);
};
functors.emplace_back(functor);
}
REGISTER_OP_BUILDER(FunctionCall);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/runtime/cpu/op/matmul_bias.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::MatmulBias)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& arg1_tensor = tensor_data[args[1].get_name()];
auto& out0_tensor = tensor_data[out[0].get_name()];
const ngraph::op::MatmulBias* mm = static_cast<const ngraph::op::MatmulBias*>(node);
const auto& arg0_shape = mm->get_arg0_shape();
const auto& arg1_shape = mm->get_arg1_shape();
const auto& arg2_shape = node->get_shape();
auto m = arg0_shape[0];
auto n = arg1_shape[1];
auto k = arg0_shape[1];
bool transpose_A = false, transpose_B = false;
auto lda = arg0_shape[1];
auto ldb = arg1_shape[1];
if (mm->get_is_arg0_transposed())
{
transpose_A = true;
m = arg0_shape[1];
k = arg0_shape[0];
}
if (mm->get_is_arg1_transposed())
{
transpose_B = true;
n = arg1_shape[0];
}
const float beta = 0.0f;
auto mm_functor =
[&, transpose_A, transpose_B, m, n, k, lda, ldb, beta, arg2_shape](
CPURuntimeContext* ctx) {
cblas::cblas_sgemm(
cblas::Layout::RowMajor,
transpose_A ? cblas::Transpose::Transpose : cblas::Transpose::None,
transpose_B ? cblas::Transpose::Transpose : cblas::Transpose::None,
m,
n,
k,
1.0f,
static_cast<float*>(arg0_tensor),
max(1UL, lda),
static_cast<float*>(arg1_tensor),
max(1UL, ldb),
beta,
static_cast<float*>(out0_tensor),
max(1UL, arg2_shape[1]));
};
function<void(CPURuntimeContext*)> bias_functor = [](CPURuntimeContext* ctx) {};
if (args.size() > 2)
{
auto& arg2_tensor = tensor_data[args[2].get_name()];
auto axes = mm->get_broadcast_axes();
if (axes.size() == 1)
{
if (*(axes.begin()) == 0)
{
vector<float> ones_row(arg2_shape[0], 1.0f);
bias_functor = [&, ones_row, arg2_shape](CPURuntimeContext* ctx) {
cblas::cblas_sgemm(cblas::Layout::RowMajor,
cblas::Transpose::None,
cblas::Transpose::None,
arg2_shape[0],
arg2_shape[1],
1,
1.0f,
ones_row.data(),
1UL,
static_cast<float*>(arg2_tensor),
max(1UL, arg2_shape[1]),
1.0f,
static_cast<float*>(out0_tensor),
max(1UL, arg2_shape[1]));
};
}
else
{
vector<float> ones_col(arg2_shape[1], 1.0f);
bias_functor = [&, ones_col, arg2_shape](CPURuntimeContext* ctx) {
cblas::cblas_sgemm(cblas::Layout::RowMajor,
cblas::Transpose::None,
cblas::Transpose::None,
arg2_shape[0],
arg2_shape[1],
1,
1.0f,
static_cast<float*>(arg2_tensor),
1UL,
ones_col.data(),
max(1UL, arg2_shape[1]),
1.0f,
static_cast<float*>(out0_tensor),
max(1UL, arg2_shape[1]));
};
}
}
else
{
if (axes.size() != 2)
{
throw ngraph_error("unexpected broadcast rank");
}
vector<float> ones_scalar(arg2_shape[0], 1.0f);
bias_functor = [&, ones_scalar, arg2_shape](CPURuntimeContext* ctx) {
vector<float> bias(arg2_shape[1], *static_cast<float*>(arg2_tensor));
cblas::cblas_sgemm(cblas::Layout::RowMajor,
cblas::Transpose::None,
cblas::Transpose::None,
arg2_shape[0],
arg2_shape[1],
1,
1.0f,
ones_scalar.data(),
1UL,
bias.data(),
max(1UL, arg2_shape[1]),
1.0f,
static_cast<float*>(out0_tensor),
max(1UL, arg2_shape[1]));
};
}
}
auto functor = [&, mm_functor, bias_functor](CPURuntimeContext* ctx) {
mm_functor(ctx);
bias_functor(ctx);
};
functors.emplace_back(functor);
}
REGISTER_OP_BUILDER(MatmulBias);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <cstring>
#include "ngraph/op/max.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/kernel/reduce_max.hpp"
#include "reduction.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Max)
{
BUILD_REDUCTION_FUNCTOR(Max, max);
}
REGISTER_OP_BUILDER(Max);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/runtime/cpu/kernel/max_pool.hpp"
#include "ngraph/op/max_pool.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::MaxPool)
{
auto max_pool = static_cast<const ngraph::op::MaxPool*>(node);
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto arg0_shape = args[0].get_shape();
auto out_shape = out[0].get_shape();
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto window_shape = max_pool->get_window_shape();
auto window_movement_strides = max_pool->get_window_movement_strides();
auto padding_below = max_pool->get_padding_below();
auto padding_above = max_pool->get_padding_above();
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_emitter->build_memory_descriptor(
args[0], runtime::cpu::mkldnn_utils::get_input_mkldnn_format(node, 0));
auto result_desc = mkldnn_emitter->build_memory_descriptor(
out[0], runtime::cpu::mkldnn_utils::get_output_mkldnn_format(node, 0));
size_t max_pool_index =
mkldnn_emitter->build_pooling_forward(mkldnn::algorithm::pooling_max,
input_desc,
result_desc,
window_movement_strides,
window_shape,
padding_below,
padding_above);
auto& deps = mkldnn_emitter->get_primitive_deps(max_pool_index);
auto functor = [&, max_pool_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index);
};
functors.emplace_back(functor);
}
else
{
std::function<decltype(runtime::cpu::kernel::max_pool<float>)> kernel;
SELECT_KERNEL(
kernel, out[0].get_element_type(), runtime::cpu::kernel::max_pool);
auto functor = [&,
kernel,
arg0_shape,
out_shape,
window_shape,
window_movement_strides,
padding_below,
padding_above](CPURuntimeContext* ctx) {
kernel(arg0_tensor,
out_tensor,
arg0_shape,
out_shape,
window_shape,
window_movement_strides,
padding_below,
padding_above);
};
functors.emplace_back(functor);
}
}
REGISTER_OP_BUILDER(MaxPool);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <cstring>
#include "ngraph/op/min.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/kernel/reduce_min.hpp"
#include "reduction.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Min)
{
BUILD_REDUCTION_FUNCTOR(Min, min);
}
REGISTER_OP_BUILDER(Min);
}
}
}
This diff is collapsed.
...@@ -171,6 +171,8 @@ namespace ngraph ...@@ -171,6 +171,8 @@ namespace ngraph
functors.emplace_back(functor); functors.emplace_back(functor);
} }
} }
REGISTER_OP_BUILDER(Reshape);
} }
} }
} }
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/op/reverse.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/kernel/reverse.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Reverse)
{
auto reverse = static_cast<const ngraph::op::Reverse*>(node);
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto& arg_tensor = tensor_data[args[0].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto arg_shape = args[0].get_shape();
auto result_shape = out[0].get_shape();
auto reversed_axes = reverse->get_reversed_axes();
std::function<decltype(runtime::cpu::kernel::reverse<float>)> kernel;
SELECT_KERNEL(kernel, out[0].get_element_type(), runtime::cpu::kernel::reverse);
auto functor =
[&, kernel, arg_shape, result_shape, reversed_axes](CPURuntimeContext* ctx) {
kernel(arg_tensor, out_tensor, arg_shape, result_shape, reversed_axes);
};
functors.emplace_back(functor);
}
REGISTER_OP_BUILDER(Reverse);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/op/reverse_sequence.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/kernel/reverse_sequence.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::ReverseSequence)
{
auto rev_seq = static_cast<const ngraph::op::ReverseSequence*>(node);
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto& arg_tensor = tensor_data[args[0].get_name()];
auto& seq_len_tensor = tensor_data[args[1].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto arg_shape = args[0].get_shape();
auto sequence_axis = rev_seq->get_sequence_axis();
auto batch_axis = rev_seq->get_batch_axis();
std::function<decltype(runtime::cpu::kernel::reverse_sequence<int, int, 4>)> kernel;
if (args[1].get_element_type() == element::i32)
{
SELECT_KERNEL_BY_RANK(kernel,
args[0].get_element_type(),
arg_shape.size(),
runtime::cpu::kernel::reverse_sequence_sli32);
}
else
{
throw ngraph_error("Unsupported sequence length type " +
args[1].get_element_type().c_type_string() +
" requires a kernel instantiation to handle this type");
}
auto functor =
[&, kernel, arg_shape, batch_axis, sequence_axis](CPURuntimeContext* ctx) {
kernel(arg_tensor,
out_tensor,
arg_shape,
batch_axis,
sequence_axis,
seq_len_tensor);
};
functors.emplace_back(functor);
}
REGISTER_OP_BUILDER(ReverseSequence);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/op/select.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/kernel/select.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Select)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& arg1_tensor = tensor_data[args[1].get_name()];
auto& arg2_tensor = tensor_data[args[2].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto element_count = args[0].get_size();
std::function<decltype(runtime::cpu::kernel::select<float>)> kernel;
SELECT_KERNEL(kernel, out[0].get_element_type(), runtime::cpu::kernel::select);
auto functor = [&, kernel, element_count](CPURuntimeContext* ctx) {
kernel(arg0_tensor, arg1_tensor, arg2_tensor, out_tensor, element_count);
};
functors.emplace_back(functor);
}
REGISTER_OP_BUILDER(Select);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/op/select_and_scatter.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/reference/select_and_scatter.hpp"
#include "ngraph/runtime/tensor_view.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::SelectAndScatter)
{
auto select_and_scatter = static_cast<const ngraph::op::SelectAndScatter*>(node);
auto select_function = select_and_scatter->get_functions()[0];
auto scatter_function = select_and_scatter->get_functions()[1];
auto backend = runtime::Backend::create("CPU");
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto& callees = external_function->get_callees();
// Note: We bypass the completely broken ngraph "backend" API here
auto element_type = node->get_output_element_type(0);
if (element_type != element::f32)
{
throw ngraph_error(
"CPU direct execution mode does not support non-float inputs, use compiled "
"mode instead");
}
auto arg0_shape = args[0].get_shape();
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto arg1_shape = args[1].get_shape();
auto& arg1_tensor = tensor_data[args[1].get_name()];
auto& arg2_tensor = tensor_data[args[2].get_name()];
auto out_shape = out[0].get_shape();
auto& out_tensor = tensor_data[out[0].get_name()];
auto window_shape = select_and_scatter->get_window_shape();
auto window_movement_strides = select_and_scatter->get_window_movement_strides();
if (!callees.count(select_function->get_name()))
{
callees[select_function->get_name()] =
make_shared<CPU_ExternalFunction>(select_function);
}
if (!callees.count(scatter_function->get_name()))
{
callees[scatter_function->get_name()] =
make_shared<CPU_ExternalFunction>(scatter_function);
}
auto& select_external_function = callees[select_function->get_name()];
auto& scatter_external_function = callees[scatter_function->get_name()];
auto select = [&, backend](float x, float y) {
TensorViewPtrs inputs, outputs;
char output;
inputs.emplace_back(backend->create_tensor(element::f32, Shape{}, &x));
inputs.emplace_back(backend->create_tensor(element::f32, Shape{}, &y));
outputs.emplace_back(backend->create_tensor(element::f32, Shape{}, &output));
select_external_function->make_call_frame()->call(outputs, inputs);
return output;
};
auto scatter = [&, backend](float x, float y) {
TensorViewPtrs inputs, outputs;
float output;
inputs.emplace_back(backend->create_tensor(element::f32, Shape{}, &x));
inputs.emplace_back(backend->create_tensor(element::f32, Shape{}, &y));
outputs.emplace_back(backend->create_tensor(element::f32, Shape{}, &output));
scatter_external_function->make_call_frame()->call(outputs, inputs);
return output;
};
auto functor = [&,
backend,
select,
scatter,
arg0_shape,
arg1_shape,
out_shape,
window_shape,
window_movement_strides](CPURuntimeContext* ctx) {
reference::select_and_scatter<float>(static_cast<float*>(arg0_tensor),
static_cast<float*>(arg1_tensor),
static_cast<float*>(arg2_tensor),
static_cast<float*>(out_tensor),
arg0_shape,
arg1_shape,
out_shape,
select,
scatter,
window_shape,
window_movement_strides);
};
functors.emplace_back(functor);
}
REGISTER_OP_BUILDER(SelectAndScatter);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
//#include "ngraph/runtime/cpu/kernel/avg_pool.hpp"
#include "ngraph/op/sigmoid.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Sigmoid)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto input_shape = args[0].get_shape();
auto out_shape = out[0].get_shape();
auto input_size = static_cast<int>(shape_size(input_shape));
auto out_size = static_cast<int>(shape_size(out_shape));
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn::memory::desc(
{input_size},
mkldnn_utils::get_mkldnn_data_type(args[0].get_element_type()),
mkldnn::memory::format::x);
auto out_desc = mkldnn::memory::desc(
{out_size},
mkldnn_utils::get_mkldnn_data_type(out[0].get_element_type()),
mkldnn::memory::format::x);
auto sigmoid_index = mkldnn_emitter->build_sigmoid_forward(input_desc, out_desc);
auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index);
auto functor = [&, sigmoid_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, sigmoid_index);
};
functors.emplace_back(functor);
}
template <>
void Builder::BUILDER_DECL(ngraph::op::SigmoidBackprop)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& arg1_tensor = tensor_data[args[1].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto input_shape = args[0].get_shape();
auto delta_shape = args[1].get_shape();
auto out_shape = out[0].get_shape();
int input_size = static_cast<int>(shape_size(input_shape));
int delta_size = static_cast<int>(shape_size(delta_shape));
int out_size = static_cast<int>(shape_size(out_shape));
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn::memory::desc(
{input_size},
mkldnn_utils::get_mkldnn_data_type(args[0].get_element_type()),
mkldnn::memory::format::x);
auto delta_desc = mkldnn::memory::desc(
{delta_size},
mkldnn_utils::get_mkldnn_data_type(args[1].get_element_type()),
mkldnn::memory::format::x);
auto out_desc = mkldnn::memory::desc(
{out_size},
mkldnn_utils::get_mkldnn_data_type(out[0].get_element_type()),
mkldnn::memory::format::x);
size_t sigmoid_index =
mkldnn_emitter->build_sigmoid_backward(input_desc, delta_desc, out_desc);
auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index);
auto functor = [&, sigmoid_index](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg0_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], arg1_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[2], out_tensor);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, sigmoid_index);
};
functors.emplace_back(functor);
}
REGISTER_OP_BUILDER(Sigmoid);
REGISTER_OP_BUILDER(SigmoidBackprop);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <cstring>
#include "ngraph/op/sum.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/kernel/reduce_sum.hpp"
#include "reduction.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Sum)
{
BUILD_REDUCTION_FUNCTOR(Sum, sum);
}
REGISTER_OP_BUILDER(Sum);
}
}
}
This diff is collapsed.
...@@ -157,6 +157,49 @@ ...@@ -157,6 +157,49 @@
SELECT_RANK(KV, uint64_t, R, K); \ SELECT_RANK(KV, uint64_t, R, K); \
} }
#define BUILD_UNARY_ELEMWISE_FUNCTOR(OP) \
auto& functors = external_function->get_functors(); \
auto& tensor_data = external_function->get_tensor_data(); \
std::function<void(void*, void*, size_t)> kernel; \
\
SELECT_KERNEL(kernel, args[0].get_element_type(), OP); \
\
auto element_count = out[0].get_size(); \
auto& arg0_tensor = tensor_data[args[0].get_name()]; \
auto& out0_tensor = tensor_data[out[0].get_name()]; \
\
auto functor = [&, kernel, element_count](CPURuntimeContext* ctx) { \
kernel(arg0_tensor, out0_tensor, element_count); \
}; \
functors.emplace_back(functor);
#define BUILD_BINARY_ELEMWISE_FUNCTOR(OP) \
auto& functors = external_function->get_functors(); \
auto& tensor_data = external_function->get_tensor_data(); \
std::function<void(void*, void*, void*, size_t)> kernel; \
\
SELECT_KERNEL(kernel, args[0].get_element_type(), OP); \
\
auto element_count = out[0].get_size(); \
auto& arg0_tensor = tensor_data[args[0].get_name()]; \
auto& arg1_tensor = tensor_data[args[1].get_name()]; \
auto& out0_tensor = tensor_data[out[0].get_name()]; \
\
auto functor = [&, kernel, element_count](CPURuntimeContext* ctx) { \
kernel(arg0_tensor, arg1_tensor, out0_tensor, element_count); \
}; \
functors.emplace_back(functor);
#define REGISTER_OP_BUILDER(OP) \
static struct __register_##OP##_builder \
{ \
__register_##OP##_builder() \
{ \
build_dispatcher.insert({type_index(typeid(ngraph::op::OP)), \
&runtime::cpu::Builder::build<ngraph::op::OP>}); \
} \
} __register_##OP##_builder_instance;
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
...@@ -171,7 +214,7 @@ namespace ngraph ...@@ -171,7 +214,7 @@ namespace ngraph
using BuildOpMap = std::unordered_map<std::type_index, BuildOpFunction>; using BuildOpMap = std::unordered_map<std::type_index, BuildOpFunction>;
extern const BuildOpMap build_dispatcher; extern BuildOpMap build_dispatcher;
class Builder class Builder
{ {
......
...@@ -1043,7 +1043,7 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1043,7 +1043,7 @@ void runtime::cpu::CPU_ExternalFunction::build()
pass_manager.register_pass<ngraph::pass::GetOutputElementElimination>(); pass_manager.register_pass<ngraph::pass::GetOutputElementElimination>();
pass_manager.register_pass<ngraph::pass::Liveness>(); pass_manager.register_pass<ngraph::pass::Liveness>();
pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment, true); pass_manager.register_pass<ngraph::pass::MemoryLayout>(s_memory_pool_alignment, true);
pass_manager.run_passes(m_function); pass_manager.run_passes(m_function, false);
// Store layouts assigned for arguments // Store layouts assigned for arguments
for (const auto& parameter : m_function->get_parameters()) for (const auto& parameter : m_function->get_parameters())
...@@ -1166,6 +1166,8 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1166,6 +1166,8 @@ void runtime::cpu::CPU_ExternalFunction::build()
out_names.push_back(tv->get_tensor().get_name()); out_names.push_back(tv->get_tensor().get_name());
} }
m_op_attrs.emplace_back(node->description(), out_names, in_names);
size_t functor_count = functors.size(); size_t functor_count = functors.size();
handler->second(this, node.get(), in, out); handler->second(this, node.get(), in, out);
...@@ -1191,6 +1193,9 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1191,6 +1193,9 @@ void runtime::cpu::CPU_ExternalFunction::build()
executor = [&](CPURuntimeContext* ctx, vector<void*>& inputs, vector<void*>& outputs) { executor = [&](CPURuntimeContext* ctx, vector<void*>& inputs, vector<void*>& outputs) {
static bool first_iteration = true; static bool first_iteration = true;
cpu::Timestamp start_ts;
int profiler_count = 0;
for (auto& p : intermediates_offsets) for (auto& p : intermediates_offsets)
{ {
tensor_data[p.first] = tensor_data[p.first] =
...@@ -1215,16 +1220,40 @@ void runtime::cpu::CPU_ExternalFunction::build() ...@@ -1215,16 +1220,40 @@ void runtime::cpu::CPU_ExternalFunction::build()
{ {
for (size_t j = 0; j < p.second; j++) for (size_t j = 0; j < p.second; j++)
{ {
if (runtime::cpu::IsTracingEnabled())
{
start_ts = cpu::Clock::now();
}
(*functor)(ctx); (*functor)(ctx);
if (runtime::cpu::IsTracingEnabled())
{
ctx->op_durations[profiler_count++] =
(std::chrono::duration_cast<cpu::Timescale>(cpu::Clock::now() -
start_ts))
.count();
}
std::advance(functor, 1); std::advance(functor, 1);
} }
} }
else else
{ {
if (runtime::cpu::IsTracingEnabled())
{
for (size_t j = 0; j < p.second; j++)
{
ctx->op_durations[profiler_count++] = 0;
}
}
std::advance(functor, p.second); std::advance(functor, p.second);
} }
} }
first_iteration = false; first_iteration = false;
if (runtime::cpu::IsTracingEnabled())
{
assert(m_op_attrs.size() == profiler_count);
}
}; };
m_is_built = true; m_is_built = true;
......
...@@ -105,6 +105,11 @@ namespace ngraph ...@@ -105,6 +105,11 @@ namespace ngraph
{ {
return executor; return executor;
} }
std::unordered_map<std::string, std::shared_ptr<CPU_ExternalFunction>>&
get_callees()
{
return callees;
}
bool is_direct_execution() const { return m_direct_execution; } bool is_direct_execution() const { return m_direct_execution; }
protected: protected:
void build(); void build();
...@@ -167,6 +172,7 @@ namespace ngraph ...@@ -167,6 +172,7 @@ namespace ngraph
std::unordered_map<std::string, bool> tensor_stale; std::unordered_map<std::string, bool> tensor_stale;
std::unordered_map<std::string, size_t> intermediates_offsets; std::unordered_map<std::string, size_t> intermediates_offsets;
std::unordered_map<std::string, size_t> function_input_index, function_output_index; std::unordered_map<std::string, size_t> function_input_index, function_output_index;
std::unordered_map<std::string, std::shared_ptr<CPU_ExternalFunction>> callees;
bool m_is_built; bool m_is_built;
bool m_direct_execution; bool m_direct_execution;
}; };
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
void logical_and(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> out(
static_cast<char*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> in0(
static_cast<char*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> in1(
static_cast<char*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) =
(in0 && in1).template cast<char>();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/runtime/reference/batch_norm.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void batch_norm_three_outputs(double eps,
const void* arg0,
const void* arg1,
const void* arg2,
void* out0,
void* out1,
void* out2,
const Shape& arg2_shape)
{
reference::batch_norm_three_outputs(eps,
static_cast<const ElementType*>(arg0),
static_cast<const ElementType*>(arg1),
static_cast<const ElementType*>(arg2),
static_cast<ElementType*>(out0),
static_cast<ElementType*>(out1),
static_cast<ElementType*>(out2),
arg2_shape);
}
template <typename ElementType>
void batch_norm_one_output(double eps,
const void* arg0,
const void* arg1,
const void* arg2,
const void* arg3,
const void* arg4,
void* out0,
const Shape& arg2_shape)
{
reference::batch_norm_one_output(eps,
static_cast<const ElementType*>(arg0),
static_cast<const ElementType*>(arg1),
static_cast<const ElementType*>(arg2),
static_cast<const ElementType*>(arg3),
static_cast<const ElementType*>(arg4),
static_cast<ElementType*>(out0),
arg2_shape);
}
}
}
}
}
...@@ -19,8 +19,10 @@ ...@@ -19,8 +19,10 @@
#define EIGEN_USE_THREADS #define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor> #include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/axis_set.hpp"
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp" #include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
#include "ngraph/runtime/reference/broadcast.hpp" #include "ngraph/runtime/reference/broadcast.hpp"
#include "ngraph/shape.hpp"
namespace ngraph namespace ngraph
{ {
...@@ -30,18 +32,33 @@ namespace ngraph ...@@ -30,18 +32,33 @@ namespace ngraph
{ {
namespace kernel namespace kernel
{ {
template <typename ElementType> template <typename ElementType, unsigned int Rank>
void broadcast(void* input0, void broadcast(void* input,
void* output, void* output,
const Shape& arg0_shape, const Shape& input_shape,
const Shape& result_shape, const Shape& output_shape)
const AxisSet& broadcast_axes)
{ {
reference::broadcast<ElementType>(static_cast<const ElementType*>(input0), Eigen::array<Eigen::Index, Rank> out_dims;
static_cast<ElementType*>(output), Eigen::array<Eigen::Index, Rank> in_dims;
arg0_shape,
result_shape, for (int i = 0; i < Rank; i++)
broadcast_axes); {
out_dims[i] = output_shape[i];
in_dims[i] = input_shape[i];
}
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
static_cast<ElementType*>(input), in_dims);
Eigen::array<ptrdiff_t, Rank> factors;
for (int i = 0; i < Rank; i++)
{
factors[i] = output_shape[i] / input_shape[i];
}
out.device(eigen::global_thread_pool_device) = in.broadcast(factors);
} }
} }
} }
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <functional>
#include <iostream>
#include <vector>
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType, int Rank>
void concat(std::vector<std::reference_wrapper<void*>> inputs,
std::vector<Shape> input_shapes,
void* output,
Shape output_shape,
size_t axis)
{
Eigen::array<Eigen::Index, Rank> out_dims;
for (int i = 0; i < Rank; i++)
{
out_dims[i] = output_shape[i];
}
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::array<Eigen::Index, Rank> in_dims, concat_pos;
concat_pos.fill(0);
for (int i = 0; i < input_shapes.size(); i++)
{
for (int j = 0; j < Rank; j++)
{
in_dims[j] = input_shapes[i][j];
}
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
static_cast<ElementType*>(inputs[i].get()), in_dims);
out.slice(concat_pos, in_dims).device(eigen::global_thread_pool_device) =
in;
concat_pos[axis] += in_dims[axis];
}
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename InputElementType, typename OutputElementType>
void convert(void* input, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<OutputElementType, 1, Eigen::RowMajor>> out(
static_cast<OutputElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<InputElementType, 1, Eigen::RowMajor>> in(
static_cast<InputElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) =
in.template cast<OutputElementType>();
}
template <typename InputElementType>
void convert_to_float32(void* input, void* output, size_t count)
{
convert<InputElementType, float>(input, output, count);
}
template <typename InputElementType>
void convert_to_float64(void* input, void* output, size_t count)
{
convert<InputElementType, double>(input, output, count);
}
template <typename InputElementType>
void convert_to_i8(void* input, void* output, size_t count)
{
convert<InputElementType, int8_t>(input, output, count);
}
template <typename InputElementType>
void convert_to_i16(void* input, void* output, size_t count)
{
convert<InputElementType, int16_t>(input, output, count);
}
template <typename InputElementType>
void convert_to_i32(void* input, void* output, size_t count)
{
convert<InputElementType, int32_t>(input, output, count);
}
template <typename InputElementType>
void convert_to_i64(void* input, void* output, size_t count)
{
convert<InputElementType, int64_t>(input, output, count);
}
template <typename InputElementType>
void convert_to_u8(void* input, void* output, size_t count)
{
convert<InputElementType, uint8_t>(input, output, count);
}
template <typename InputElementType>
void convert_to_u16(void* input, void* output, size_t count)
{
convert<InputElementType, uint16_t>(input, output, count);
}
template <typename InputElementType>
void convert_to_u32(void* input, void* output, size_t count)
{
convert<InputElementType, uint32_t>(input, output, count);
}
template <typename InputElementType>
void convert_to_u64(void* input, void* output, size_t count)
{
convert<InputElementType, uint64_t>(input, output, count);
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void cwise_pow(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) = in0.binaryExpr(
in1, Eigen::internal::scalar_pow_op<ElementType, ElementType>());
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void divide(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) = in0 / in1;
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
#include "ngraph/runtime/reference/dot.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType,
unsigned int Input0Rank,
unsigned int Input1Rank,
unsigned int DotDims>
void dot(void* input0,
void* input1,
void* output,
const Shape& input0_shape,
const Shape& input1_shape,
const Shape& output_shape)
{
constexpr unsigned int OutRank = Input0Rank + Input1Rank - 2 * DotDims;
Eigen::array<Eigen::Index, OutRank> out_dims;
Eigen::array<Eigen::Index, Input0Rank> in0_dims;
Eigen::array<Eigen::Index, Input1Rank> in1_dims;
Eigen::array<Eigen::IndexPair<Eigen::Index>, DotDims> dot_dims;
for (int i = 0; i < OutRank; i++)
{
out_dims[i] = output_shape[i];
}
for (int i = 0; i < Input0Rank; i++)
{
in0_dims[i] = input0_shape[i];
}
for (int i = 0; i < Input1Rank; i++)
{
in1_dims[i] = input1_shape[i];
}
for (int i = 0; i < DotDims; i++)
{
dot_dims[i].first = Input0Rank - DotDims + i;
dot_dims[i].second = i;
}
Eigen::TensorMap<Eigen::Tensor<ElementType, OutRank, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Input0Rank, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in0_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Input1Rank, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in1_dims);
out.device(eigen::global_thread_pool_device) = in0.contract(in1, dot_dims);
}
template <typename ElementType>
void dot_scalar(void* input0, void* input1, void* output, size_t element_count)
{
Eigen::array<Eigen::Index, 1> out_dims;
Eigen::array<Eigen::Index, 1> in1_dims;
out_dims[0] = element_count;
in1_dims[0] = element_count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
auto in0 = static_cast<ElementType*>(input0);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in1_dims);
out.device(eigen::global_thread_pool_device) = in0[0] * in1;
}
template <typename ElementType>
void dot_1d_1d_1rd(void* input0,
void* input1,
void* output,
const Shape& input0_shape,
const Shape& input1_shape,
const Shape& output_shape)
{
dot<ElementType, 1, 1, 1>(
input0, input1, output, input0_shape, input1_shape, output_shape);
}
template <typename ElementType>
void dot_2d_1d_1rd(void* input0,
void* input1,
void* output,
const Shape& input0_shape,
const Shape& input1_shape,
const Shape& output_shape)
{
dot<ElementType, 2, 1, 1>(
input0, input1, output, input0_shape, input1_shape, output_shape);
}
template <typename ElementType>
void dot_3d_3d_1rd(void* input0,
void* input1,
void* output,
const Shape& input0_shape,
const Shape& input1_shape,
const Shape& output_shape)
{
dot<ElementType, 3, 3, 1>(
input0, input1, output, input0_shape, input1_shape, output_shape);
}
template <typename ElementType>
void dot_3d_2d_1rd(void* input0,
void* input1,
void* output,
const Shape& input0_shape,
const Shape& input1_shape,
const Shape& output_shape)
{
dot<ElementType, 3, 2, 1>(
input0, input1, output, input0_shape, input1_shape, output_shape);
}
template <typename ElementType>
void dot(void* arg0,
void* arg1,
void* out,
const Shape& arg0_shape,
const Shape& arg1_shape,
const Shape& out_shape,
size_t reduction_axes_count)
{
reference::dot(static_cast<const ElementType*>(arg0),
static_cast<const ElementType*>(arg1),
static_cast<ElementType*>(out),
arg0_shape,
arg1_shape,
out_shape,
reduction_axes_count);
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void equal(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> out(
static_cast<char*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) =
(in0 == in1).template cast<char>();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void exp(void* input0, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
out.device(eigen::global_thread_pool_device) = in0.exp();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void floor(void* input0, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
out.device(eigen::global_thread_pool_device) = in0.floor();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void greater(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> out(
static_cast<char*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) =
(in0 > in1).template cast<char>();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void greater_eq(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> out(
static_cast<char*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) =
(in0 >= in1).template cast<char>();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void less(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> out(
static_cast<char*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) =
(in0 < in1).template cast<char>();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void less_eq(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> out(
static_cast<char*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) =
(in0 <= in1).template cast<char>();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void log(void* input0, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
out.device(eigen::global_thread_pool_device) = in0.log();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/runtime/reference/max_pool.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void max_pool(void* arg,
void* out,
const Shape& arg_shape,
const Shape& out_shape,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above)
{
reference::max_pool<ElementType>(static_cast<const ElementType*>(arg),
static_cast<ElementType*>(out),
arg_shape,
out_shape,
window_shape,
window_movement_strides,
padding_below,
padding_above);
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void maximum(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) = in0.cwiseMax(in1);
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void minimum(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) = in0.cwiseMin(in1);
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void negative(void* input0, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
out.device(eigen::global_thread_pool_device) = -in0;
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void logical_not(void* input0, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> out(
static_cast<char*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
out.device(eigen::global_thread_pool_device) =
(in0 == ElementType(0)).template cast<char>();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void not_equal(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> out(
static_cast<char*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) =
(in0 != in1).template cast<char>();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
void logical_or(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> out(
static_cast<char*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> in0(
static_cast<char*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> in1(
static_cast<char*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) =
(in0 || in1).template cast<char>();
}
}
}
}
}
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <unsupported/Eigen/CXX11/Tensor> #include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp" #include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
#include "ngraph/runtime/reference/max.hpp"
#include "ngraph/shape.hpp" #include "ngraph/shape.hpp"
namespace ngraph namespace ngraph
...@@ -31,8 +32,8 @@ namespace ngraph ...@@ -31,8 +32,8 @@ namespace ngraph
namespace kernel namespace kernel
{ {
template <typename ElementType, unsigned int Rank> template <typename ElementType, unsigned int Rank>
void reduce_max_all(ElementType* input, void reduce_max_all(void* input,
ElementType* output, void* output,
const Shape& input_shape, const Shape& input_shape,
const Shape& output_shape) const Shape& output_shape)
{ {
...@@ -44,16 +45,16 @@ namespace ngraph ...@@ -44,16 +45,16 @@ namespace ngraph
in_dims[i] = input_shape[i]; in_dims[i] = input_shape[i];
} }
Eigen::TensorMap<Eigen::Tensor<ElementType, 0, Eigen::RowMajor>> out(output, Eigen::TensorMap<Eigen::Tensor<ElementType, 0, Eigen::RowMajor>> out(
out_dims); static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(input, Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
in_dims); static_cast<ElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) = in.maximum(); out.device(eigen::global_thread_pool_device) = in.maximum();
} }
template <typename ElementType, unsigned int Rank, unsigned int ReductionDims> template <typename ElementType, unsigned int Rank, unsigned int ReductionDims>
void reduce_max(ElementType* input, void reduce_max(void* input,
ElementType* output, void* output,
const Shape& input_shape, const Shape& input_shape,
const Shape& output_shape, const Shape& output_shape,
const AxisSet& reduction_axes) const AxisSet& reduction_axes)
...@@ -80,11 +81,69 @@ namespace ngraph ...@@ -80,11 +81,69 @@ namespace ngraph
Eigen::TensorMap< Eigen::TensorMap<
Eigen::Tensor<ElementType, Rank - ReductionDims, Eigen::RowMajor>> Eigen::Tensor<ElementType, Rank - ReductionDims, Eigen::RowMajor>>
out(output, out_dims); out(static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(input, Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
in_dims); static_cast<ElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) = in.maximum(reduction_dims); out.device(eigen::global_thread_pool_device) = in.maximum(reduction_dims);
} }
template <typename ElementType, unsigned int Rank>
void reduce_max_1rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_max<ElementType, Rank, 1>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void reduce_max_3d_2rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_max<ElementType, 3, 2>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void reduce_max_4d_2rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_max<ElementType, 4, 2>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void reduce_max_5d_2rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_max<ElementType, 5, 2>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void max(void* arg,
void* out,
const Shape& in_shape,
const Shape& out_shape,
const AxisSet& reduction_axes)
{
reference::max(static_cast<ElementType*>(arg),
static_cast<ElementType*>(out),
in_shape,
out_shape,
reduction_axes);
}
} }
} }
} }
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
#include "ngraph/runtime/reference/min.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType, unsigned int Rank>
void reduce_min_all(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape)
{
Eigen::array<Eigen::Index, Rank> in_dims;
Eigen::array<Eigen::Index, 0> out_dims;
for (int i = 0; i < Rank; i++)
{
in_dims[i] = input_shape[i];
}
Eigen::TensorMap<Eigen::Tensor<ElementType, 0, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
static_cast<ElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) = in.minimum();
}
template <typename ElementType, unsigned int Rank, unsigned int ReductionDims>
void reduce_min(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
Eigen::array<Eigen::Index, Rank> in_dims;
Eigen::array<Eigen::Index, Rank - ReductionDims> out_dims;
Eigen::array<Eigen::Index, ReductionDims> reduction_dims;
for (int i = 0; i < Rank; i++)
{
in_dims[i] = input_shape[i];
}
for (int i = 0; i < Rank - ReductionDims; i++)
{
out_dims[i] = output_shape[i];
}
int i = 0;
for (auto axis : reduction_axes)
{
reduction_dims[i++] = axis;
}
Eigen::TensorMap<
Eigen::Tensor<ElementType, Rank - ReductionDims, Eigen::RowMajor>>
out(static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
static_cast<ElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) = in.minimum(reduction_dims);
}
template <typename ElementType, unsigned int Rank>
void reduce_min_1rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_min<ElementType, Rank, 1>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void reduce_min_3d_2rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_min<ElementType, 3, 2>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void reduce_min_4d_2rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_min<ElementType, 4, 2>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void reduce_min_5d_2rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_min<ElementType, 5, 2>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void min(void* arg,
void* out,
const Shape& in_shape,
const Shape& out_shape,
const AxisSet& reduction_axes)
{
reference::min(static_cast<ElementType*>(arg),
static_cast<ElementType*>(out),
in_shape,
out_shape,
reduction_axes);
}
}
}
}
}
...@@ -20,6 +20,7 @@ ...@@ -20,6 +20,7 @@
#include <unsupported/Eigen/CXX11/Tensor> #include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp" #include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
#include "ngraph/runtime/reference/sum.hpp"
#include "ngraph/shape.hpp" #include "ngraph/shape.hpp"
namespace ngraph namespace ngraph
...@@ -31,8 +32,8 @@ namespace ngraph ...@@ -31,8 +32,8 @@ namespace ngraph
namespace kernel namespace kernel
{ {
template <typename ElementType, unsigned int Rank> template <typename ElementType, unsigned int Rank>
void reduce_sum_all(ElementType* input, void reduce_sum_all(void* input,
ElementType* output, void* output,
const Shape& input_shape, const Shape& input_shape,
const Shape& output_shape) const Shape& output_shape)
{ {
...@@ -44,16 +45,16 @@ namespace ngraph ...@@ -44,16 +45,16 @@ namespace ngraph
in_dims[i] = input_shape[i]; in_dims[i] = input_shape[i];
} }
Eigen::TensorMap<Eigen::Tensor<ElementType, 0, Eigen::RowMajor>> out(output, Eigen::TensorMap<Eigen::Tensor<ElementType, 0, Eigen::RowMajor>> out(
out_dims); static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(input, Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
in_dims); static_cast<ElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) = in.sum(); out.device(eigen::global_thread_pool_device) = in.sum();
} }
template <typename ElementType, unsigned int Rank, unsigned int ReductionDims> template <typename ElementType, unsigned int Rank, unsigned int ReductionDims>
void reduce_sum(ElementType* input, void reduce_sum(void* input,
ElementType* output, void* output,
const Shape& input_shape, const Shape& input_shape,
const Shape& output_shape, const Shape& output_shape,
const AxisSet& reduction_axes) const AxisSet& reduction_axes)
...@@ -80,11 +81,69 @@ namespace ngraph ...@@ -80,11 +81,69 @@ namespace ngraph
Eigen::TensorMap< Eigen::TensorMap<
Eigen::Tensor<ElementType, Rank - ReductionDims, Eigen::RowMajor>> Eigen::Tensor<ElementType, Rank - ReductionDims, Eigen::RowMajor>>
out(output, out_dims); out(static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(input, Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(
in_dims); static_cast<ElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) = in.sum(reduction_dims); out.device(eigen::global_thread_pool_device) = in.sum(reduction_dims);
} }
template <typename ElementType, unsigned int Rank>
void reduce_sum_1rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_sum<ElementType, Rank, 1>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void reduce_sum_3d_2rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_sum<ElementType, 3, 2>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void reduce_sum_4d_2rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_sum<ElementType, 4, 2>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void reduce_sum_5d_2rd(void* input,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const AxisSet& reduction_axes)
{
reduce_sum<ElementType, 5, 2>(
input, output, input_shape, output_shape, reduction_axes);
}
template <typename ElementType>
void sum(void* arg,
void* out,
const Shape& in_shape,
const Shape& out_shape,
const AxisSet& reduction_axes)
{
reference::sum(static_cast<ElementType*>(arg),
static_cast<ElementType*>(out),
in_shape,
out_shape,
reduction_axes);
}
} }
} }
} }
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/runtime/reference/reverse.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void reverse(const void* arg,
void* out,
const Shape& arg_shape,
const Shape& out_shape,
const AxisSet& reversed_axes)
{
reference::reverse(static_cast<const ElementType*>(arg),
static_cast<ElementType*>(out),
arg_shape,
out_shape,
reversed_axes);
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <cstdint>
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename InputElementType, typename SeqLenType, unsigned int Rank>
void reverse_sequence(void* input,
void* output,
const Shape& input_shape,
size_t batch_axis,
size_t sequence_axis,
void* sequence_lengths)
{
Eigen::array<Eigen::Index, Rank> in_dims;
for (int i = 0; i < Rank; i++)
{
in_dims[i] = input_shape[i];
}
Eigen::TensorMap<Eigen::Tensor<InputElementType, Rank, Eigen::RowMajor>> out(
static_cast<InputElementType*>(output), in_dims);
Eigen::TensorMap<Eigen::Tensor<InputElementType, Rank, Eigen::RowMajor>> in(
static_cast<InputElementType*>(input), in_dims);
auto slv = static_cast<SeqLenType*>(sequence_lengths);
auto generator = [&](const Eigen::array<Eigen::DenseIndex, Rank>& i) {
Eigen::array<Eigen::DenseIndex, Rank> k = i;
if (i[sequence_axis] < slv[i[batch_axis]])
{
k[sequence_axis] = slv[i[batch_axis]] - i[sequence_axis] - 1;
}
return in(k);
};
out.device(eigen::global_thread_pool_device) = in.generate(generator);
}
template <typename InputElementType, unsigned int Rank>
void reverse_sequence_sli32(void* input,
void* output,
const Shape& input_shape,
size_t batch_axis,
size_t sequence_axis,
void* sequence_lengths)
{
reverse_sequence<InputElementType, int32_t, Rank>(
input, output, input_shape, batch_axis, sequence_axis, sequence_lengths);
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void select(void* input0, void* input1, void* input2, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<char, 1, Eigen::RowMajor>> in0(
static_cast<char*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in2(
static_cast<ElementType*>(input2), in_dims);
out.device(eigen::global_thread_pool_device) = in0.select(in1, in2);
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void sign(void* input0, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
out.device(eigen::global_thread_pool_device) = in0.sign();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void sqrt(void* input, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in(
static_cast<ElementType*>(input), in_dims);
out.device(eigen::global_thread_pool_device) = in.sqrt();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void subtract(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) = in0 - in1;
}
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment