Commit 4efcb76e authored by Jaikrishnan Menon's avatar Jaikrishnan Menon Committed by Scott Cyphers

CPU Direct Execution: Implement ReduceWindow (#1351)

parent c2e98505
......@@ -44,6 +44,7 @@ set(SRC
builder/relu.cpp
builder/pad.cpp
builder/product.cpp
builder/reduce_function_window.cpp
builder/reshape.cpp
builder/reverse.cpp
builder/reverse_sequence.cpp
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/runtime/cpu/kernel/reduce_function_window.hpp"
#include "ngraph/op/reduce_window.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
#include "ngraph/runtime/tensor_view.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::ReduceWindow)
{
auto reduce_window = static_cast<const ngraph::op::ReduceWindow*>(node);
auto function = reduce_window->get_functions()[0];
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
auto& callees = external_function->get_callees();
if (!callees.count(function->get_name()))
{
callees[function->get_name()] = make_shared<CPU_ExternalFunction>(function);
}
auto& reducer_external_function = callees[function->get_name()];
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& arg1_tensor = tensor_data[args[1].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto arg0_shape = args[0].get_shape();
auto out_shape = out[0].get_shape();
auto window_shape = reduce_window->get_window_shape();
auto window_movement_strides = reduce_window->get_window_movement_strides();
std::function<decltype(runtime::cpu::kernel::reduce_function_window<float>)> kernel;
SELECT_KERNEL(kernel,
args[0].get_element_type(),
runtime::cpu::kernel::reduce_function_window);
auto functor =
[&, kernel, arg0_shape, out_shape, window_shape, window_movement_strides](
CPURuntimeContext* ctx) {
kernel(arg0_tensor,
arg1_tensor,
out_tensor,
arg0_shape,
out_shape,
window_shape,
window_movement_strides,
reducer_external_function);
};
functors.emplace_back(functor);
}
REGISTER_OP_BUILDER(ReduceWindow);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
#include "ngraph/runtime/reference/reduce_window.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void reduce_function_window(
void* input0,
void* input1,
void* output,
const Shape& input_shape,
const Shape& output_shape,
const Shape& window_shape,
const Strides& window_movement_strides,
const std::shared_ptr<CPU_ExternalFunction>& external_function)
{
auto backend = runtime::Backend::create("CPU");
auto reducer = [&](ElementType a, ElementType b) {
TensorViewPtrs inputs, outputs;
ElementType p __attribute__((aligned(64))) = a;
ElementType q __attribute__((aligned(64))) = b;
ElementType r __attribute__((aligned(64)));
inputs.emplace_back(backend->create_tensor(
ngraph::element::from<ElementType>(), Shape{}, &p));
inputs.emplace_back(backend->create_tensor(
ngraph::element::from<ElementType>(), Shape{}, &q));
outputs.emplace_back(backend->create_tensor(
ngraph::element::from<ElementType>(), Shape{}, &r));
auto call_frame = external_function->make_call_frame();
call_frame->call(outputs, inputs);
return r;
};
reference::reduce_window<ElementType>(static_cast<const ElementType*>(input0),
static_cast<const ElementType*>(input1),
static_cast<ElementType*>(output),
input_shape,
output_shape,
reducer,
window_shape,
window_movement_strides);
}
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment