Commit b33fc6a2 authored by Jaikrishnan Menon's avatar Jaikrishnan Menon

CPU Direct Execution: Implement Relu

parent 9d0a6998
...@@ -95,6 +95,7 @@ ...@@ -95,6 +95,7 @@
#include "ngraph/runtime/cpu/kernel/abs.hpp" #include "ngraph/runtime/cpu/kernel/abs.hpp"
#include "ngraph/runtime/cpu/kernel/add.hpp" #include "ngraph/runtime/cpu/kernel/add.hpp"
#include "ngraph/runtime/cpu/kernel/multiply.hpp" #include "ngraph/runtime/cpu/kernel/multiply.hpp"
#include "ngraph/runtime/cpu/kernel/relu.hpp"
#include "ngraph/runtime/cpu/kernel/result.hpp" #include "ngraph/runtime/cpu/kernel/result.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp" #include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
...@@ -229,6 +230,25 @@ namespace ngraph ...@@ -229,6 +230,25 @@ namespace ngraph
functors.emplace_back(functor); functors.emplace_back(functor);
} }
template <>
void Builder::BUILDER_DECL(ngraph::op::Relu)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
std::function<void(void*, void*, size_t)> kernel;
SELECT_KERNEL(kernel, out[0].get_element_type(), runtime::cpu::kernel::relu);
auto element_count = out[0].get_size();
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& out0_tensor = tensor_data[out[0].get_name()];
auto functor = [&, kernel, element_count](CPURuntimeContext* ctx) {
kernel(arg0_tensor, out0_tensor, element_count);
};
functors.emplace_back(functor);
}
template <> template <>
void Builder::BUILDER_DECL(ngraph::op::Result) void Builder::BUILDER_DECL(ngraph::op::Result)
{ {
...@@ -426,6 +446,7 @@ namespace ngraph ...@@ -426,6 +446,7 @@ namespace ngraph
{TI(ngraph::op::Multiply), &runtime::cpu::Builder::build<ngraph::op::Multiply>}, {TI(ngraph::op::Multiply), &runtime::cpu::Builder::build<ngraph::op::Multiply>},
{TI(ngraph::op::Parameter), &runtime::cpu::Builder::nop}, {TI(ngraph::op::Parameter), &runtime::cpu::Builder::nop},
{TI(ngraph::op::Abs), &runtime::cpu::Builder::build<ngraph::op::Abs>}, {TI(ngraph::op::Abs), &runtime::cpu::Builder::build<ngraph::op::Abs>},
{TI(ngraph::op::Relu), &runtime::cpu::Builder::build<ngraph::op::Relu>},
{TI(ngraph::op::Result), &runtime::cpu::Builder::build<ngraph::op::Result>}, {TI(ngraph::op::Result), &runtime::cpu::Builder::build<ngraph::op::Result>},
{TI(ngraph::op::MatmulBias), &runtime::cpu::Builder::build<ngraph::op::MatmulBias>}, {TI(ngraph::op::MatmulBias), &runtime::cpu::Builder::build<ngraph::op::MatmulBias>},
{TI(ngraph::op::Constant), &runtime::cpu::Builder::build<ngraph::op::Constant>}}; {TI(ngraph::op::Constant), &runtime::cpu::Builder::build<ngraph::op::Constant>}};
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void relu(void* input0, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
out.device(eigen::global_thread_pool_device) = in0.cwiseMax(ElementType(0));
}
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment