Commit abb68627 authored by Jaikrishnan Menon's avatar Jaikrishnan Menon Committed by Scott Cyphers

Jmenon/dexec (#1092)

* CPU: Direct Execution
Part 1 with bare minimum infrastructure

* Refactor: Move build related functionality to a separate TU
and external function method

* Add TU back after merge

* Remove an assert

* Remove commented-out code
parent 79dd92d3
......@@ -16,6 +16,7 @@
set(SRC
cpu_backend.cpp
cpu_builder.cpp
cpu_call_frame.cpp
cpu_emitter.cpp
cpu_external_function.cpp
......
This diff is collapsed.
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <string>
#include <vector>
#include "ngraph/node.hpp"
#include "ngraph/runtime/cpu/cpu_external_function.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#define BUILDER_DECL(op_name) \
build<op_name>(CPU_ExternalFunction * external_function, \
const ngraph::Node* node, \
const std::vector<TensorViewWrapper>& args, \
const std::vector<TensorViewWrapper>& out)
namespace ngraph
{
namespace runtime
{
namespace cpu
{
using BuildOpFunction =
std::function<void(CPU_ExternalFunction* external_function,
const ngraph::Node*,
const std::vector<TensorViewWrapper>& inputs,
const std::vector<TensorViewWrapper>& outputs)>;
using BuildOpMap = std::unordered_map<std::type_index, BuildOpFunction>;
extern const BuildOpMap build_dispatcher;
class Builder
{
public:
template <typename OP>
static void build(CPU_ExternalFunction* external_function,
const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
const std::vector<TensorViewWrapper>& out)
{
throw std::runtime_error("Unimplemented op in CPU builder");
}
static void nop(CPU_ExternalFunction* external_function,
const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
const std::vector<TensorViewWrapper>& out)
{
}
static void buildBatchNorm(CPU_ExternalFunction* external_function,
const ngraph::Node* node,
const std::vector<TensorViewWrapper>& args,
const std::vector<TensorViewWrapper>& out,
bool append_relu = false);
};
}
}
}
......@@ -63,7 +63,14 @@ void runtime::cpu::CPU_CallFrame::call(
}
// Invoke compiled computation
m_compiled_function(inputs.data(), outputs.data(), ctx);
if (!m_external_function->is_direct_execution())
{
m_compiled_function(inputs.data(), outputs.data(), ctx);
}
else
{
m_external_function->get_executor()(ctx, inputs, outputs);
}
if (runtime::cpu::IsTracingEnabled())
{
......
......@@ -89,7 +89,6 @@
#include "ngraph/op/sum.hpp"
#include "ngraph/op/tan.hpp"
#include "ngraph/op/tanh.hpp"
#include "ngraph/runtime/cpu/cpu_emitter.hpp"
#include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
......
......@@ -17,6 +17,7 @@
#pragma once
#include <functional>
#include <list>
#include <map>
#include <memory>
#include <string>
......@@ -94,7 +95,19 @@ namespace ngraph
// Temporary Memory Pool alignment
static const size_t s_memory_pool_alignment;
std::list<std::function<void(CPURuntimeContext*)>>& get_functors()
{
return functors;
}
std::unordered_map<std::string, void*>& get_tensor_data() { return tensor_data; }
std::function<void(CPURuntimeContext*, std::vector<void*>&, std::vector<void*>&)>&
get_executor()
{
return executor;
}
bool is_direct_execution() const { return m_direct_execution; }
protected:
void build();
void compile();
private:
......@@ -126,6 +139,7 @@ namespace ngraph
std::unique_ptr<codegen::ExecutionEngine> m_execution_engine;
bool m_emit_timing;
bool m_use_tbb;
std::unordered_map<std::string, std::string> m_variable_name_map;
std::map<std::string, size_t> m_name_index_map;
......@@ -142,6 +156,15 @@ namespace ngraph
std::unique_ptr<MKLDNNEmitter> m_mkldnn_emitter;
std::string m_function_name;
std::list<std::function<void(CPURuntimeContext*)>> functors;
std::function<void(CPURuntimeContext*, std::vector<void*>&, std::vector<void*>&)>
executor;
std::unordered_map<std::string, void*> tensor_data;
std::unordered_map<std::string, size_t> intermediates_offsets;
std::unordered_map<std::string, size_t> function_input_index, function_output_index;
bool m_is_built;
bool m_direct_execution;
};
}
}
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void abs(void* input0, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
out.device(eigen::global_thread_pool_device) = in0.abs();
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void add(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) = in0 + in1;
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernel/eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void multiply(void* input0, void* input1, void* output, size_t count)
{
Eigen::array<Eigen::Index, 1> out_dims, in_dims;
out_dims[0] = in_dims[0] = count;
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> out(
static_cast<ElementType*>(output), out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in0(
static_cast<ElementType*>(input0), in_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, 1, Eigen::RowMajor>> in1(
static_cast<ElementType*>(input1), in_dims);
out.device(eigen::global_thread_pool_device) = in0 * in1;
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType>
void result(const void* arg, void* out, size_t count)
{
memcpy(out, arg, sizeof(ElementType) * count);
}
}
}
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment