Commit b0de2d3e authored by Robert Kimball's avatar Robert Kimball

input tensors defined

parent 073adbfb
......@@ -112,6 +112,7 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/cpu_kernels.cpp
runtime/cpu/emitter.cpp
runtime/cpu/external_function.cpp
runtime/cpu/tensor_view.cpp
)
# LLVM binary builds are typically built without RTTI
# The built-in headers are in a version-specific directory
......
......@@ -59,7 +59,7 @@ public:
size_t size() const;
void set_pool_offset(size_t);
size_t get_pool_offset() const;
const element::Type& get_element_type() const { return m_element_type; }
static std::string make_tensor_name(const Node* node, size_t value_index);
protected:
......
......@@ -26,9 +26,14 @@ using namespace std;
using namespace ngraph;
using namespace ngraph::descriptor;
pass::MemoryLayout::MemoryLayout(size_t alignment)
: m_alignment(alignment)
{
}
bool pass::MemoryLayout::run_on_call_graph(std::list<std::shared_ptr<Node>>& node_list)
{
MemoryManager mm;
MemoryManager mm(m_alignment);
for (shared_ptr<Node> node : node_list)
{
for (Tensor* tensor : node->liveness_new_list)
......
......@@ -33,9 +33,11 @@ namespace ngraph
class ngraph::pass::MemoryLayout : public CallGraphPass
{
public:
MemoryLayout(size_t alignment = 1);
virtual bool run_on_call_graph(std::list<std::shared_ptr<Node>>&) override;
private:
size_t m_alignment;
};
class ngraph::pass::MemoryManager
......
......@@ -24,6 +24,7 @@ using namespace ngraph::runtime;
std::shared_ptr<TensorView>
Backend::make_primary_tensor_view(const ngraph::element::Type& element_type, const Shape& shape)
{
NGRAPH_INFO;
return element_type.make_primary_tensor_view(shape);
}
......
......@@ -17,6 +17,7 @@
#include <memory>
#include "ngraph/common.hpp"
#include "ngraph/log.hpp"
#include "ngraph/runtime/ndarray.hpp"
namespace ngraph
......@@ -59,6 +60,7 @@ namespace ngraph
std::shared_ptr<ngraph::runtime::ParameterizedTensorView<ET>>
make_parameterized_tensor_view(const Shape& shape)
{
NGRAPH_INFO;
return std::dynamic_pointer_cast<ngraph::runtime::ParameterizedTensorView<ET>>(
make_primary_tensor_view(ET::element_type(), shape));
}
......@@ -67,6 +69,7 @@ namespace ngraph
std::shared_ptr<ngraph::runtime::ParameterizedTensorView<ET>>
make_parameterized_tensor_view(const NDArrayBase<typename ET::type>& ndarray)
{
NGRAPH_INFO;
auto result =
std::dynamic_pointer_cast<ngraph::runtime::ParameterizedTensorView<ET>>(
make_primary_tensor_view(ET::element_type(), ndarray.get_shape()));
......
......@@ -15,53 +15,68 @@
#include <algorithm>
#include "call_frame.hpp"
#include "ngraph/runtime/cpu/tensor_view.hpp"
using namespace std;
using namespace ngraph::runtime::cpu;
CallFrame::CallFrame(EntryPoint compiled_function,
size_t n_outputs,
size_t n_inputs,
const TensorViewPtrs& temps,
const std::vector<std::shared_ptr<CallFrame>>& callees)
: m_n_outputs(n_outputs)
, m_n_inputs(n_inputs)
, m_tensor_views(n_outputs + n_inputs + temps.size())
, m_compiled_function(compiled_function)
: m_compiled_function(compiled_function)
, m_callees(callees)
{
copy(temps.begin(), temps.end(), m_tensor_views.begin() + m_n_outputs + m_n_inputs);
}
void CallFrame::tensor_call(
const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& inputs,
const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& outputs)
const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& input_tvs,
const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& output_tvs)
{
copy(outputs.begin(), outputs.end(), m_tensor_views.begin());
copy(inputs.begin(), inputs.end(), m_tensor_views.begin() + m_n_outputs);
m_inputs.clear();
m_outputs.clear();
for (size_t i = 0; i < input_tvs.size(); i++)
{
shared_ptr<runtime::cpu::CPUTensorView> tv =
static_pointer_cast<runtime::cpu::CPUTensorView>(input_tvs[i]);
m_inputs.push_back(tv->get_data_ptr());
}
for (size_t i = 0; i < output_tvs.size(); i++)
{
shared_ptr<runtime::cpu::CPUTensorView> tv =
static_pointer_cast<runtime::cpu::CPUTensorView>(output_tvs[i]);
m_outputs.push_back(tv->get_data_ptr());
}
// Invoke compiled computation
m_compiled_function(this, m_tensor_views, m_callees);
// Don't hold onto inputs/outputs
fill_n(m_tensor_views.begin(), m_n_outputs + m_n_inputs, nullptr);
m_compiled_function(this);
}
void CallFrame::operator()(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& arguments,
const std::vector<std::shared_ptr<ngraph::runtime::Value>>& results)
{
// TODO: Check types of args and result
std::vector<std::shared_ptr<ngraph::runtime::TensorView>> inputs;
for (auto argument : arguments)
vector<shared_ptr<ngraph::runtime::TensorView>> inputs;
for (shared_ptr<ngraph::runtime::Value> argument : arguments)
{
argument->collect_tensor_views(inputs, argument);
}
std::vector<std::shared_ptr<ngraph::runtime::TensorView>> outputs;
for (auto result : results)
vector<shared_ptr<ngraph::runtime::TensorView>> outputs;
for (shared_ptr<ngraph::runtime::Value> result : results)
{
result->collect_tensor_views(outputs, result);
}
tensor_call(inputs, outputs);
}
void* CallFrame::get_input_data(size_t index)
{
void* rc = m_inputs.at(index);
return rc;
}
void* CallFrame::get_output_data(size_t index)
{
void* rc = m_outputs.at(index);
return rc;
}
......@@ -32,18 +32,15 @@ namespace ngraph
{
class CallFrame;
using EntryPoint = std::function<void(ngraph::runtime::cpu::CallFrame*,
ngraph::runtime::TensorViewPtrs&,
const std::vector<std::shared_ptr<CallFrame>>&)>;
using EntryPoint_t = void(ngraph::runtime::cpu::CallFrame* call_frame);
using EntryPoint = std::function<EntryPoint_t>;
// Compile and execute graphs
class CallFrame : public ngraph::runtime::CallFrame
{
public:
CallFrame(EntryPoint compiled_function,
size_t n_outputs,
size_t n_inputs,
const TensorViewPtrs& temps,
const std::vector<std::shared_ptr<CallFrame>>& callees);
/// @brief Invoke the function with values matching the signature of the function.
......@@ -53,30 +50,25 @@ namespace ngraph
operator()(const std::vector<std::shared_ptr<ngraph::runtime::Value>>& inputs,
const std::vector<std::shared_ptr<ngraph::runtime::Value>>& outputs);
/// @brief Invoke the function with tuples pre-expanded to their underlying tensor views.
void tensor_call(const TensorViewPtrs& inputs, const TensorViewPtrs& outputs);
/// @brief Invoke the function with tuples pre-expanded to their underlying
/// tensor views.
void tensor_call(const std::vector<std::shared_ptr<TensorView>>& inputs,
const std::vector<std::shared_ptr<TensorView>>& outputs);
void set_return() { m_return = true; }
std::shared_ptr<TensorView> get_tensor_view(size_t i) { return m_tensor_views[i]; }
template <typename ET>
ParameterizedTensorView<ET>* get_parameterized_tensor_view(size_t i)
{
return m_tensor_views[i]->get_parameterized_tensor_view<ET>();
}
// const std::vector<std::shared_ptr<ngraph::runtime::Value>>& get_inputs();
// const std::vector<std::shared_ptr<ngraph::runtime::Value>>& get_outputs();
template <typename ET>
typename ET::type* get_tensor_view_data(size_t i)
{
return &get_parameterized_tensor_view<ET>(i)->get_vector()[0];
}
void* get_input_data(size_t index);
void* get_output_data(size_t index);
protected:
size_t m_n_outputs;
size_t m_n_inputs;
TensorViewPtrs m_tensor_views;
bool m_return;
EntryPoint m_compiled_function;
std::vector<std::shared_ptr<CallFrame>> m_callees;
std::vector<void*> m_inputs;
std::vector<void*> m_outputs;
};
}
}
......
......@@ -13,12 +13,25 @@
// ----------------------------------------------------------------------------
#include "ngraph/runtime/cpu/cpu_backend.hpp"
#include "ngraph/runtime/cpu/tensor_view.hpp"
#include "ngraph/runtime/external_function.hpp"
using namespace ngraph::runtime::cpu;
using namespace ngraph;
using namespace std;
std::shared_ptr<ngraph::runtime::CallFrame>
CPUBackend::make_call_frame(const std::shared_ptr<ExternalFunction>& external_function)
extern "C" void
allocate_aligned_buffer(size_t size, size_t alignment, char** allocated, char** aligned_ptr);
std::shared_ptr<ngraph::runtime::CallFrame> runtime::cpu::CPUBackend::make_call_frame(
const std::shared_ptr<ExternalFunction>& external_function)
{
return external_function->make_call_frame();
}
std::shared_ptr<ngraph::runtime::TensorView>
runtime::cpu::CPUBackend::make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape)
{
auto rc = make_shared<runtime::cpu::CPUTensorView>(element_type, shape);
return dynamic_pointer_cast<runtime::TensorView>(rc);
}
......@@ -22,11 +22,18 @@ namespace ngraph
{
namespace cpu
{
class CPUBackend : public Backend
static size_t alignment = 64;
class CPUBackend : public runtime::Backend
{
public:
virtual std::shared_ptr<ngraph::runtime::CallFrame> make_call_frame(
const std::shared_ptr<ngraph::runtime::ExternalFunction>& external_function);
std::shared_ptr<ngraph::runtime::CallFrame> make_call_frame(
const std::shared_ptr<ngraph::runtime::ExternalFunction>& external_function)
override;
std::shared_ptr<ngraph::runtime::TensorView>
make_primary_tensor_view(const ngraph::element::Type& element_type,
const Shape& shape) override;
};
}
}
......
......@@ -37,25 +37,24 @@ namespace ngraph
using DynamicStrides = Eigen::Stride<Eigen::Dynamic, Eigen::Dynamic>;
using VectorStrides = Eigen::Stride<Eigen::Dynamic, 1>;
template <typename ET>
using DynamicArray =
Eigen::Array<typename ET::type, Eigen::Dynamic, Eigen::Dynamic>;
template <typename T>
using DynamicArray = Eigen::Array<T, Eigen::Dynamic, Eigen::Dynamic>;
template <typename ET>
using EigenArrayBase = Eigen::Map<DynamicArray<ET>, 0, DynamicStrides>;
template <typename T>
using EigenArrayBase = Eigen::Map<DynamicArray<T>, 0, DynamicStrides>;
template <typename ET>
using DynamicMatrix = Eigen::
Matrix<typename ET::type, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
template <typename T>
using DynamicMatrix =
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>;
template <typename ET>
using EigenMatrixBase = Eigen::Map<DynamicMatrix<ET>, 0, DynamicStrides>;
template <typename T>
using EigenMatrixBase = Eigen::Map<DynamicMatrix<T>, 0, DynamicStrides>;
template <typename ET>
using DynamicVector = Eigen::Matrix<typename ET::type, Eigen::Dynamic, 1>;
template <typename T>
using DynamicVector = Eigen::Matrix<T, Eigen::Dynamic, 1>;
template <typename ET>
using EigenVectorBase = Eigen::Map<DynamicVector<ET>, 0, VectorStrides>;
template <typename T>
using EigenVectorBase = Eigen::Map<DynamicVector<T>, 0, VectorStrides>;
namespace fmt
{
......@@ -117,7 +116,7 @@ namespace ngraph
// ET element type
// FMT array format (fmt::V for vector, etc.)
// BASE select array/matrix
template <typename ET,
template <typename T,
typename FMT,
typename BASE,
typename STRIDES = DynamicStrides>
......@@ -126,26 +125,19 @@ namespace ngraph
using base = BASE;
public:
EigenWrapper(typename ET::type* t, const FMT& fmt)
EigenWrapper(T* t, const FMT& fmt)
: base(t, fmt.l0, fmt.l1, STRIDES(fmt.s0, fmt.s1))
{
}
EigenWrapper(
typename ET::type* t,
T* t,
const std::shared_ptr<ngraph::descriptor::layout::DenseTensorViewLayout>&
layout)
: base(t, layout->get_size(), 1, DynamicStrides(1, 1))
{
}
EigenWrapper(CallFrame* call_frame, const TensorViewInfo& tensor_view_info)
: EigenWrapper(
call_frame->get_tensor_view_data<ET>(tensor_view_info.get_index()),
FMT(tensor_view_info))
{
}
template <typename U>
EigenWrapper& operator=(const U& other)
{
......@@ -154,17 +146,17 @@ namespace ngraph
}
};
template <typename ET, typename FMT = fmt::V>
using EigenArray1d = EigenWrapper<ET, FMT, EigenArrayBase<ET>>;
template <typename T, typename FMT = fmt::V>
using EigenArray1d = EigenWrapper<T, FMT, EigenArrayBase<T>>;
template <typename ET, typename FMT = fmt::M>
using EigenArray2d = EigenWrapper<ET, FMT, EigenArrayBase<ET>>;
template <typename T, typename FMT = fmt::M>
using EigenArray2d = EigenWrapper<T, FMT, EigenArrayBase<T>>;
template <typename ET, typename FMT = fmt::M>
using EigenMatrix = EigenWrapper<ET, FMT, EigenMatrixBase<ET>>;
template <typename T, typename FMT = fmt::M>
using EigenMatrix = EigenWrapper<T, FMT, EigenMatrixBase<T>>;
template <typename ET, typename FMT = fmt::V>
using EigenVector = EigenWrapper<ET, FMT, EigenVectorBase<ET>, VectorStrides>;
template <typename T, typename FMT = fmt::V>
using EigenVector = EigenWrapper<T, FMT, EigenVectorBase<T>, VectorStrides>;
}
}
}
......
......@@ -34,8 +34,10 @@
#include "ngraph/runtime/cpu/emitter.hpp"
#include "ngraph/runtime/cpu/external_function.hpp"
#include "ngraph/runtime/tensor_view_info.hpp"
#include "ngraph/util.hpp"
using namespace std;
using namespace ngraph;
using namespace ngraph::runtime::cpu;
using ngraph::descriptor::layout::DenseTensorViewLayout;
......@@ -54,34 +56,18 @@ static unordered_map<type_index, string> element_type_names = {
#define EIGEN_VECTOR_FORMAT(x) "fmt::V{" + to_string(x) + "}"
string eigen_vector_format(const runtime::TensorViewInfo& info)
{
stringstream ss;
ss << "fmt::V{" << info.get_layout<DenseTensorViewLayout>()->get_size() << "}";
return ss.str();
}
static std::string EIGEN_MATRIX_FORMAT(const ngraph::Shape& shape, const ngraph::Strides& strides)
{
std::string I;
for (size_t i = 0; i < shape.size(); i++)
{
if (!i)
{
I += "fmt::M{{" + to_string(shape[i]);
}
else
{
I += ", " + to_string(shape[i]);
}
}
I += "}, ";
for (size_t i = 0; i < strides.size(); i++)
{
if (!i)
{
I += "{" + to_string(strides[i]);
}
else
{
I += ", " + to_string(strides[i]);
}
}
I += "}}";
return I;
stringstream ss;
ss << "fmt::M{{" << join(shape) << "}, {" << join(strides) << "}}";
return ss.str();
}
void Emitter::EMITTER_DECL(EmitNop)
......@@ -93,18 +79,21 @@ void Emitter::EMITTER_DECL(EmitAdd)
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
TU += " {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(inputs[0].get_index()) + ");\n"
" auto arg1 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(inputs[1].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") +\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg1, "
EIGEN_VECTOR_FORMAT(inputs[1].get_layout<DenseTensorViewLayout>()->get_size()) ");\n"
" }\n";
string type = et.c_type_string();
TU.indent++;
TU << "{ // " << n->get_name() << "\n";
TU.indent++;
TU << "EigenArray1d<" << type << ">(" << outputs[0].get_tensor().get_name() << ", "
<< eigen_vector_format(outputs[0]) << ") =\n";
TU.indent++;
TU << "EigenArray1d<" << type << ">(" << inputs[0].get_tensor().get_name() << ", "
<< eigen_vector_format(inputs[0]) << ") +\n";
TU << "EigenArray1d<" << type << ">(" << inputs[1].get_tensor().get_name() << ", "
<< eigen_vector_format(inputs[1]) << ");\n";
TU.indent -= 2;
TU << "}\n";
TU.indent--;
}
void Emitter::EMITTER_DECL(EmitDot)
......@@ -248,16 +237,14 @@ void Emitter::EMITTER_DECL(EmitMultiply)
const element::Type& et =
(dynamic_pointer_cast<const TensorViewType>(n->get_arguments().at(0)->get_value_type()))
->get_element_type();
string type = et.c_type_string();
TU += " {\n"
" auto arg0 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(inputs[0].get_index()) + ");\n"
" auto arg1 = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(inputs[1].get_index()) + ");\n"
" auto out = call_frame->get_tensor_view_data<" + element_type_names[TI(et)] + ">(" + to_string(outputs[0].get_index()) + ");\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(out, "
TU += " { // " + n->get_name() + "\n"
" EigenArray1d<" + type + ">(" + outputs[0].get_tensor().get_name() + ", "
EIGEN_VECTOR_FORMAT(outputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") =\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg0, "
" EigenArray1d<" + type + ">(" + inputs[0].get_tensor().get_name() + ", "
EIGEN_VECTOR_FORMAT(inputs[0].get_layout<DenseTensorViewLayout>()->get_size()) ") *\n"
" EigenArray1d<" + element_type_names[TI(et)] + ">(arg1, "
" EigenArray1d<" + type + ">(" + inputs[1].get_tensor().get_name() + ", "
EIGEN_VECTOR_FORMAT(inputs[1].get_layout<DenseTensorViewLayout>()->get_size()) ");\n"
" }\n";
}
......@@ -1058,6 +1045,7 @@ void Emitter::EMITTER_DECL(EmitReshape)
void Emitter::EMITTER_DECL(EmitFunctionCall)
{
NGRAPH_INFO;
auto function_call = static_cast<const op::FunctionCall*>(n);
auto function = function_call->get_function();
......@@ -1073,6 +1061,7 @@ void Emitter::EMITTER_DECL(EmitFunctionCall)
function_map.insert({function, external});
}
NGRAPH_INFO;
std::shared_ptr<CallFrame> cf =
std::dynamic_pointer_cast<CallFrame>(external->make_call_frame());
......@@ -1110,6 +1099,7 @@ void Emitter::EMITTER_DECL(EmitFunctionCall)
void Emitter::EMITTER_DECL(EmitReduce)
{
NGRAPH_INFO;
auto reduce = static_cast<const op::Reduce*>(n);
auto reduction_function = reduce->get_reduction_function();
......
......@@ -162,12 +162,20 @@ static const OpMap dispatcher{
{TI(ngraph::op::Atan), &Emitter::EmitAtan},
};
#undef TI
static unordered_map<type_index, string> element_type_names = {
{TI(ngraph::element::Bool), "Bool"},
{TI(ngraph::element::Float32), "Float32"},
{TI(ngraph::element::Int8), "Int8"},
{TI(ngraph::element::Int32), "Int32"},
{TI(ngraph::element::Int64), "Int64"},
{TI(ngraph::element::UInt8), "UInt8"},
{TI(ngraph::element::UInt32), "UInt32"},
{TI(ngraph::element::UInt64), "UInt64"}};
ExternalFunction::ExternalFunction(const std::shared_ptr<ngraph::Function>& function,
bool release_function)
: ngraph::runtime::ExternalFunction(function, release_function)
, compiled_function(nullptr)
, m_compiled_function(nullptr)
{
}
......@@ -183,7 +191,7 @@ void ExternalFunction::compile(FunctionMap& function_map)
// For now, just make everyone row-major.
pass_manager.register_pass<pass::AssignLayout<DenseTensorViewLayout>>();
pass_manager.register_pass<pass::Liveness>();
pass_manager.register_pass<pass::MemoryLayout>();
pass_manager.register_pass<pass::MemoryLayout>(64);
pass_manager.run_passes(m_function);
// Determine tensor requirements for the call frame
......@@ -228,7 +236,10 @@ void ExternalFunction::compile(FunctionMap& function_map)
// Now we build the TU
Emitter emitter;
codegen::CodeWriter& TU = emitter.get_code_writer();
TU += R"(// Generated by the NGraph CPU backend
string function_name = m_function->get_name() + "_entrypoint";
TU +=
R"(// Generated by the NGraph CPU backend
#include <algorithm>
#include <cmath>
#include <memory>
......@@ -246,16 +257,20 @@ using namespace ngraph::element;
using namespace ngraph::runtime;
using namespace ngraph::runtime::cpu::eigen;
extern "C" void allocate_aligned_buffer(size_t size, size_t alignment, char** allocated, char** aligned_ptr);
extern "C" void free_aligned_buffer(void* allocated);
extern "C" void allocate_aligned_buffer(
size_t size,
size_t alignment,
char** allocated,
char** aligned_ptr);
extern "C" void free_aligned_buffer(void* allocated);
extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
ngraph::runtime::TensorViewPtrs& tensor_views,
const std::vector<std::shared_ptr<ngraph::runtime::cpu::CallFrame>>& callees)
{
)";
TU << "extern \"C\" void " << function_name << "(\n";
TU << " ngraph::runtime::cpu::CallFrame * call_frame)\n";
TU << "{\n";
TU.indent++;
TU << "// Allocate the memory pool\n";
size_t temp_pool_size = pass_manager.get_state().get_temporary_pool_size();
......@@ -265,19 +280,50 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
<< ", &allocated_buffer_pool, &aligned_buffer_pool);\n";
TU << "\n";
TU << "// Define tensors\n";
// for (shared_ptr<Node> node : m_function->get_ordered_ops())
// {
// NGRAPH_INFO << *node;
// for (descriptor::Tensor* tensor : node->liveness_new_list)
// {
// NGRAPH_INFO << *tensor;
// }
// }
TU << "// Define temporary tensors\n";
for (shared_ptr<Node> node : m_function->get_ordered_ops())
{
for (descriptor::Tensor* tensor : node->liveness_new_list)
{
TU << tensor->get_element_type() << "* " << tensor->get_name() << " = ("
<< tensor->get_element_type() << "*)(aligned_buffer_pool + "
<< tensor->get_pool_offset() << ");\n";
}
}
TU << "\n";
TU << "// Define inputs\n";
size_t arg_index = 0;
for (shared_ptr<op::Parameter> param : m_function->get_parameters())
{
for (const descriptor::Output& output : param->get_outputs())
{
shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
const element::Type& et = tv->get_tensor_view_type()->get_element_type();
string type = et.c_type_string();
TU << "" << type << "* " << tv->get_tensor().get_name() << " = static_cast<" << type
<< "*>(call_frame->get_input_data(" << arg_index << "));\n";
arg_index++;
}
}
TU << "\n";
TU << "// Define outputs\n";
size_t output_index = 0;
for (const descriptor::Output& output : m_function->get_result()->get_outputs())
{
shared_ptr<descriptor::TensorView> tv = output.get_tensor_view();
const element::Type& et = tv->get_tensor_view_type()->get_element_type();
string type = et.c_type_string();
TU << type << "* " << tv->get_tensor().get_name() << " = static_cast<" << type
<< "*>(call_frame->get_output_data(" << output_index << "));\n";
output_index++;
}
TU << "\n";
TU << "// Define tensor views\n";
TU << "\n";
TU.indent--;
for (shared_ptr<Node> node : m_function->get_ordered_ops())
......@@ -328,11 +374,8 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
assert(llvm_module);
estate.add_module(llvm_module);
estate.finalize();
compiled_function =
estate.find_function<void(ngraph::runtime::cpu::CallFrame*,
ngraph::runtime::TensorViewPtrs&,
const std::vector<std::shared_ptr<CallFrame>>&)>("__entrypoint");
assert(compiled_function);
m_compiled_function = estate.find_function<EntryPoint_t>(function_name);
assert(m_compiled_function);
m_is_compiled = true;
if (m_release_function)
......@@ -341,53 +384,6 @@ extern "C" void __entrypoint(ngraph::runtime::cpu::CallFrame* call_frame,
}
}
// Suppress Clang's complaints about the ,##__VA_ARGS__ token-pasting hack, which is a GNU extension
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wgnu-zero-variadic-macro-arguments"
#define DO_ON_ELEMENT_TYPE(et, err_msg, macro, ...) \
{ \
if (et == element::Bool::element_type()) \
{ \
macro(element::Bool, ##__VA_ARGS__); \
} \
else if (et == element::Float32::element_type()) \
{ \
macro(element::Float32, ##__VA_ARGS__); \
} \
else if (et == element::Int8::element_type()) \
{ \
macro(element::Int8, ##__VA_ARGS__); \
} \
else if (et == element::Int32::element_type()) \
{ \
macro(element::Int32, ##__VA_ARGS__); \
} \
else if (et == element::Int64::element_type()) \
{ \
macro(element::Int64, ##__VA_ARGS__); \
} \
else if (et == element::UInt8::element_type()) \
{ \
macro(element::UInt8, ##__VA_ARGS__); \
} \
else if (et == element::UInt32::element_type()) \
{ \
macro(element::UInt32, ##__VA_ARGS__); \
} \
else if (et == element::UInt64::element_type()) \
{ \
macro(element::UInt64, ##__VA_ARGS__); \
} \
else \
{ \
throw ngraph_error(err_msg); \
} \
}
// Turn off complaint suppression (see above)
#pragma clang diagnostic pop
shared_ptr<ngraph::runtime::CallFrame> ExternalFunction::make_call_frame()
{
FunctionMap function_map;
......@@ -397,17 +393,5 @@ shared_ptr<ngraph::runtime::CallFrame> ExternalFunction::make_call_frame()
compile(function_map);
}
std::vector<std::shared_ptr<ngraph::runtime::TensorView>> temps;
for (auto tv : m_temp_views)
{
auto& et = tv->get_tensor_view_type()->get_element_type();
auto shape = tv->get_tensor_view_type()->get_shape();
#define M(T) temps.push_back(ngraph::runtime::make_tensor<T>(shape));
DO_ON_ELEMENT_TYPE(
et, "Internal error: tried to create temporary for unhandled element type", M);
#undef M
}
return make_shared<ngraph::runtime::cpu::CallFrame>(
compiled_function, m_n_outputs, m_n_inputs, temps, callees);
return make_shared<ngraph::runtime::cpu::CallFrame>(m_compiled_function, callees);
}
......@@ -22,6 +22,7 @@
#include "ngraph/codegen/compiler.hpp"
#include "ngraph/function.hpp"
#include "ngraph/runtime/cpu/call_frame.hpp"
#include "ngraph/runtime/external_function.hpp"
#include "ngraph/runtime/tensor_view_info.hpp"
......@@ -47,11 +48,6 @@ namespace ngraph
using OpMap = std::unordered_map<std::type_index, OpFunction>;
using EntryPoint = std::function<void(
ngraph::runtime::cpu::CallFrame*,
ngraph::runtime::TensorViewPtrs&,
const std::vector<std::shared_ptr<ngraph::runtime::cpu::CallFrame>>&)>;
class ExternalFunction : public ngraph::runtime::ExternalFunction
{
public:
......@@ -65,7 +61,7 @@ namespace ngraph
size_t m_n_inputs;
size_t m_n_outputs;
ngraph::descriptor::TensorViewPtrs m_temp_views;
EntryPoint compiled_function;
EntryPoint m_compiled_function;
std::vector<std::shared_ptr<CallFrame>> callees;
};
}
......
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include <memory>
#include "cpu_backend.hpp"
#include "tensor_view.hpp"
using namespace ngraph;
using namespace std;
extern "C" void
allocate_aligned_buffer(size_t size, size_t alignment, char** allocated, char** aligned_ptr);
extern "C" void free_aligned_buffer(void* allocated);
runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_type,
const Shape& shape)
: runtime::TensorView(std::make_shared<ngraph::descriptor::PrimaryTensorView>(
std::make_shared<ngraph::TensorViewType>(element_type, shape), "external", true, true))
{
m_descriptor->set_tensor_view_layout(
std::make_shared<ngraph::descriptor::layout::DenseTensorViewLayout>(*m_descriptor));
m_buffer_size = m_descriptor->get_tensor_view_layout()->get_size() * element_type.size();
NGRAPH_INFO << m_buffer_size;
allocate_aligned_buffer(m_buffer_size, runtime::cpu::alignment, &m_allocated, &m_buffer);
}
runtime::cpu::CPUTensorView::~CPUTensorView()
{
NGRAPH_INFO;
free_aligned_buffer(m_allocated);
NGRAPH_INFO;
}
char* runtime::cpu::CPUTensorView::get_data_ptr()
{
return m_buffer;
}
const char* runtime::cpu::CPUTensorView::get_data_ptr() const
{
return m_buffer;
}
void runtime::cpu::CPUTensorView::write(const void* source, size_t tensor_offset, size_t n)
{
if (tensor_offset + n > m_buffer_size)
{
NGRAPH_INFO << m_buffer_size;
NGRAPH_INFO << n;
NGRAPH_INFO << tensor_offset;
throw out_of_range("write access past end of tensor");
}
char* target = get_data_ptr();
memcpy(&target[tensor_offset], source, n);
}
void runtime::cpu::CPUTensorView::read(void* target, size_t tensor_offset, size_t n) const
{
if (tensor_offset + n > m_buffer_size)
{
NGRAPH_INFO << m_buffer_size;
NGRAPH_INFO << n;
NGRAPH_INFO << tensor_offset;
throw out_of_range("read access past end of tensor");
}
const char* source = get_data_ptr();
memcpy(target, &source[tensor_offset], n);
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include <memory>
#include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/types/element_type.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
class CPUTensorView;
}
}
}
class ngraph::runtime::cpu::CPUTensorView : public ngraph::runtime::TensorView
{
public:
CPUTensorView(const ngraph::element::Type& element_type, const Shape& shape);
virtual ~CPUTensorView();
char* get_data_ptr();
const char* get_data_ptr() const;
/// @brief Write bytes directly into the tensor
/// @param p Pointer to source of data
/// @param tensor_offset Offset into tensor storage to begin writing. Must be element-aligned.
/// @param n Number of bytes to write, must be integral number of elements.
void write(const void* p, size_t tensor_offset, size_t n) override;
/// @brief Read bytes directly from the tensor
/// @param p Pointer to destination for data
/// @param tensor_offset Offset into tensor storage to begin reading. Must be element-aligned.
/// @param n Number of bytes to read, must be integral number of elements.
void read(void* p, size_t tensor_offset, size_t n) const override;
private:
char* m_allocated;
char* m_buffer;
size_t m_buffer_size;
};
......@@ -179,6 +179,8 @@ namespace ngraph
const vtype get_vector() const { return m_elements; }
operator const vtype() const { return m_elements; }
operator vtype() { return m_elements; }
void* data() { return m_elements.data(); }
const void* data() const { return m_elements.data(); }
bool operator==(const NDArrayBase<T>& other) const
{
return m_shape == other.m_shape && m_elements == other.m_elements;
......
......@@ -18,8 +18,11 @@
#include <vector>
#include "ngraph/descriptor/tensor_view.hpp"
#include "ngraph/log.hpp"
#include "ngraph/runtime/ndarray.hpp"
#include "ngraph/runtime/value.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/util.hpp"
namespace ngraph
{
......@@ -42,7 +45,6 @@ namespace ngraph
}
public:
TensorView() {}
virtual ~TensorView() {}
template <typename ET>
ParameterizedTensorView<ET>* get_parameterized_tensor_view()
......@@ -75,6 +77,29 @@ namespace ngraph
/// @param n Number of bytes to read, must be integral number of elements.
virtual void read(void* p, size_t tensor_offset, size_t n) const = 0;
// This is for unit test only
template <typename T>
bool operator==(const NDArrayBase<T>& ndarray) const
{
bool rc = false;
if (get_shape() == ndarray.get_shape())
{
std::vector<T> lhs(ndarray.get_vector().size());
read(lhs.data(), 0, ndarray.get_vector().size() * sizeof(T));
rc = (lhs == ndarray.get_vector());
}
return rc;
}
template <typename T>
std::vector<T> get_vector()
{
size_t element_count = shape_size(get_shape());
size_t size = element_count * sizeof(T);
std::vector<T> rc(element_count);
read(rc.data(), 0, size);
return rc;
}
protected:
std::shared_ptr<ngraph::descriptor::TensorView> m_descriptor;
};
......
......@@ -27,9 +27,10 @@ namespace ngraph
{
public:
TensorViewInfo(size_t index,
const std::shared_ptr<const ngraph::descriptor::TensorView>& descriptor)
std::shared_ptr<const ngraph::descriptor::TensorView> descriptor)
: m_index(index)
, m_layout(descriptor->get_tensor_view_layout())
, m_tensor_view(descriptor)
{
}
......@@ -46,9 +47,20 @@ namespace ngraph
return std::static_pointer_cast<LT>(m_layout);
}
std::shared_ptr<const ngraph::descriptor::TensorView> get_tensor_view() const
{
return m_tensor_view;
}
const ngraph::descriptor::Tensor& get_tensor() const
{
return m_tensor_view->get_tensor();
}
protected:
size_t m_index;
std::shared_ptr<ngraph::descriptor::layout::TensorViewLayout> m_layout;
std::shared_ptr<const ngraph::descriptor::TensorView> m_tensor_view;
};
}
}
This source diff could not be displayed because it is too large. You can view the blob instead.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment