Commit 0dbeb06e authored by Robert Kimball's avatar Robert Kimball

wip

parent f2a93568
...@@ -123,3 +123,49 @@ void runtime::Executable::save(std::ostream& output_stream) ...@@ -123,3 +123,49 @@ void runtime::Executable::save(std::ostream& output_stream)
{ {
throw runtime_error("save opertion unimplemented."); throw runtime_error("save opertion unimplemented.");
} }
vector<shared_ptr<runtime::Tensor>> runtime::Executable::create_input_tensor(size_t input_number,
size_t pipeline_depth)
{
vector<shared_ptr<runtime::Tensor>> tensors;
// if (m_backend)
// {
// const ParameterVector& parameters = get_parameters();
// if (index >= parameters.size())
// {
// throw runtime_error("create_tensor for input out of bounds");
// }
// shared_ptr<op::Parameter> parameter = parameters[index];
// tensor = m_backend->create_tensor(
// parameter->get_element_type(), parameter->get_shape(), memory_pointer);
// tensor->m_source_node = parameter;
// }
// else
// {
// throw runtime_error("Backend does not support Executable::create_tensor");
// }
return tensors;
}
vector<shared_ptr<runtime::Tensor>> runtime::Executable::create_output_tensor(size_t input_number,
size_t pipeline_depth)
{
vector<shared_ptr<runtime::Tensor>> tensors;
// if (m_backend)
// {
// const ResultVector& results = get_results();
// if (index >= results.size())
// {
// throw runtime_error("create_tensor for input out of bounds");
// }
// shared_ptr<op::Result> result = results[index];
// tensor = m_backend->create_tensor(
// result->get_element_type(), result->get_shape(), memory_pointer);
// tensor->m_source_node = result;
// }
// else
// {
// throw runtime_error("Backend does not support Executable::create_tensor");
// }
return tensors;
}
...@@ -73,6 +73,11 @@ public: ...@@ -73,6 +73,11 @@ public:
/// Saved stream may be read with Backend::load /// Saved stream may be read with Backend::load
virtual void save(std::ostream& output_stream); virtual void save(std::ostream& output_stream);
virtual std::vector<std::shared_ptr<runtime::Tensor>> create_input_tensor(size_t input_number,
size_t pipeline_depth = 1);
virtual std::vector<std::shared_ptr<runtime::Tensor>> create_output_tensor(size_t input_number,
size_t pipeline_depth = 1);
protected: protected:
/// \brief Called at the end of compile to the values to be returned by get_parameters /// \brief Called at the end of compile to the values to be returned by get_parameters
/// and get_results /// and get_results
......
...@@ -42,68 +42,68 @@ void set_denormals_flush_to_zero() ...@@ -42,68 +42,68 @@ void set_denormals_flush_to_zero()
} }
template <typename T> template <typename T>
void init_int_tensor(shared_ptr<runtime::Tensor> tensor, T min, T max) void init_int_tv(shared_ptr<runtime::Tensor> tv, T min, T max)
{ {
size_t size = tensor->get_element_count(); size_t size = tv->get_element_count();
uniform_int_distribution<T> dist(min, max); uniform_int_distribution<T> dist(min, max);
vector<T> vec(size); vector<T> vec(size);
for (T& element : vec) for (T& element : vec)
{ {
element = dist(s_random_engine); element = dist(s_random_engine);
} }
tensor->write(vec.data(), vec.size() * sizeof(T)); tv->write(vec.data(), vec.size() * sizeof(T));
} }
template <> template <>
void init_int_tensor<char>(shared_ptr<runtime::Tensor> tensor, char min, char max) void init_int_tv<char>(shared_ptr<runtime::Tensor> tv, char min, char max)
{ {
size_t size = tensor->get_element_count(); size_t size = tv->get_element_count();
uniform_int_distribution<int16_t> dist(static_cast<short>(min), static_cast<short>(max)); uniform_int_distribution<int16_t> dist(static_cast<short>(min), static_cast<short>(max));
vector<char> vec(size); vector<char> vec(size);
for (char& element : vec) for (char& element : vec)
{ {
element = static_cast<char>(dist(s_random_engine)); element = static_cast<char>(dist(s_random_engine));
} }
tensor->write(vec.data(), vec.size() * sizeof(char)); tv->write(vec.data(), vec.size() * sizeof(char));
} }
template <> template <>
void init_int_tensor<int8_t>(shared_ptr<runtime::Tensor> tensor, int8_t min, int8_t max) void init_int_tv<int8_t>(shared_ptr<runtime::Tensor> tv, int8_t min, int8_t max)
{ {
size_t size = tensor->get_element_count(); size_t size = tv->get_element_count();
uniform_int_distribution<int16_t> dist(static_cast<short>(min), static_cast<short>(max)); uniform_int_distribution<int16_t> dist(static_cast<short>(min), static_cast<short>(max));
vector<int8_t> vec(size); vector<int8_t> vec(size);
for (int8_t& element : vec) for (int8_t& element : vec)
{ {
element = static_cast<int8_t>(dist(s_random_engine)); element = static_cast<int8_t>(dist(s_random_engine));
} }
tensor->write(vec.data(), vec.size() * sizeof(int8_t)); tv->write(vec.data(), vec.size() * sizeof(int8_t));
} }
template <> template <>
void init_int_tensor<uint8_t>(shared_ptr<runtime::Tensor> tensor, uint8_t min, uint8_t max) void init_int_tv<uint8_t>(shared_ptr<runtime::Tensor> tv, uint8_t min, uint8_t max)
{ {
size_t size = tensor->get_element_count(); size_t size = tv->get_element_count();
uniform_int_distribution<int16_t> dist(static_cast<short>(min), static_cast<short>(max)); uniform_int_distribution<int16_t> dist(static_cast<short>(min), static_cast<short>(max));
vector<uint8_t> vec(size); vector<uint8_t> vec(size);
for (uint8_t& element : vec) for (uint8_t& element : vec)
{ {
element = static_cast<uint8_t>(dist(s_random_engine)); element = static_cast<uint8_t>(dist(s_random_engine));
} }
tensor->write(vec.data(), vec.size() * sizeof(uint8_t)); tv->write(vec.data(), vec.size() * sizeof(uint8_t));
} }
template <typename T> template <typename T>
void init_real_tensor(shared_ptr<runtime::Tensor> tensor, T min, T max) void init_real_tv(shared_ptr<runtime::Tensor> tv, T min, T max)
{ {
size_t size = tensor->get_element_count(); size_t size = tv->get_element_count();
uniform_real_distribution<T> dist(min, max); uniform_real_distribution<T> dist(min, max);
vector<T> vec(size); vector<T> vec(size);
for (T& element : vec) for (T& element : vec)
{ {
element = dist(s_random_engine); element = dist(s_random_engine);
} }
tensor->write(vec.data(), vec.size() * sizeof(T)); tv->write(vec.data(), vec.size() * sizeof(T));
} }
static void random_init(shared_ptr<runtime::Tensor> tensor) static void random_init(shared_ptr<runtime::Tensor> tensor)
...@@ -116,17 +116,17 @@ static void random_init(shared_ptr<runtime::Tensor> tensor) ...@@ -116,17 +116,17 @@ static void random_init(shared_ptr<runtime::Tensor> tensor)
#endif #endif
switch (et.get_type_enum()) switch (et.get_type_enum())
{ {
case element::Type_t::boolean: init_int_tensor<char>(tensor, 0, 1); break; case element::Type_t::boolean: init_int_tv<char>(tensor, 0, 1); break;
case element::Type_t::f32: init_real_tensor<float>(tensor, -1, 1); break; case element::Type_t::f32: init_real_tv<float>(tensor, -1, 1); break;
case element::Type_t::f64: init_real_tensor<double>(tensor, -1, 1); break; case element::Type_t::f64: init_real_tv<double>(tensor, -1, 1); break;
case element::Type_t::i8: init_int_tensor<int8_t>(tensor, -1, 1); break; case element::Type_t::i8: init_int_tv<int8_t>(tensor, -1, 1); break;
case element::Type_t::i16: init_int_tensor<int16_t>(tensor, -1, 1); break; case element::Type_t::i16: init_int_tv<int16_t>(tensor, -1, 1); break;
case element::Type_t::i32: init_int_tensor<int32_t>(tensor, 0, 1); break; case element::Type_t::i32: init_int_tv<int32_t>(tensor, 0, 1); break;
case element::Type_t::i64: init_int_tensor<int64_t>(tensor, 0, 1); break; case element::Type_t::i64: init_int_tv<int64_t>(tensor, 0, 1); break;
case element::Type_t::u8: init_int_tensor<uint8_t>(tensor, 0, 1); break; case element::Type_t::u8: init_int_tv<uint8_t>(tensor, 0, 1); break;
case element::Type_t::u16: init_int_tensor<uint16_t>(tensor, 0, 1); break; case element::Type_t::u16: init_int_tv<uint16_t>(tensor, 0, 1); break;
case element::Type_t::u32: init_int_tensor<uint32_t>(tensor, 0, 1); break; case element::Type_t::u32: init_int_tv<uint32_t>(tensor, 0, 1); break;
case element::Type_t::u64: init_int_tensor<uint64_t>(tensor, 0, 1); break; case element::Type_t::u64: init_int_tv<uint64_t>(tensor, 0, 1); break;
case element::Type_t::undefined: case element::Type_t::undefined:
case element::Type_t::dynamic: case element::Type_t::dynamic:
case element::Type_t::bf16: case element::Type_t::bf16:
...@@ -245,99 +245,99 @@ vector<runtime::PerformanceCounter> run_benchmark_double_buffered(shared_ptr<Fun ...@@ -245,99 +245,99 @@ vector<runtime::PerformanceCounter> run_benchmark_double_buffered(shared_ptr<Fun
cout << "compile time: " << timer.get_milliseconds() << "ms" << endl; cout << "compile time: " << timer.get_milliseconds() << "ms" << endl;
set_denormals_flush_to_zero(); set_denormals_flush_to_zero();
array<vector<shared_ptr<runtime::HostTensor>>, 2> args_data_set; // array<vector<shared_ptr<runtime::HostTensor>>, 2> args_data_set;
array<vector<shared_ptr<runtime::Tensor>>, 2> args_set; // array<vector<shared_ptr<runtime::Tensor>>, 2> args_set;
array<vector<shared_ptr<runtime::HostTensor>>, 2> results_data_set; // array<vector<shared_ptr<runtime::HostTensor>>, 2> results_data_set;
array<vector<shared_ptr<runtime::Tensor>>, 2> results_set; // array<vector<shared_ptr<runtime::Tensor>>, 2> results_set;
for (size_t i = 0; i < 2; i++) // for (size_t i = 0; i < 2; i++)
{ // {
vector<shared_ptr<runtime::HostTensor>> args_data; // vector<shared_ptr<runtime::HostTensor>> args_data;
vector<shared_ptr<runtime::Tensor>> args; // vector<shared_ptr<runtime::Tensor>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters()) // for (shared_ptr<op::Parameter> param : f->get_parameters())
{ // {
auto tensor = backend->create_tensor(param->get_element_type(), param->get_shape()); // auto tensor = backend->create_tensor(param->get_element_type(), param->get_shape());
auto tensor_data = // auto tensor_data =
make_shared<runtime::HostTensor>(param->get_element_type(), param->get_shape()); // make_shared<runtime::HostTensor>(param->get_element_type(), param->get_shape());
random_init(tensor_data); // random_init(tensor_data);
tensor->write(tensor_data->get_data_ptr(), // tensor->write(tensor_data->get_data_ptr(),
tensor_data->get_element_count() * // tensor_data->get_element_count() *
tensor_data->get_element_type().size()); // tensor_data->get_element_type().size());
args.push_back(tensor); // args.push_back(tensor);
args_data.push_back(tensor_data); // args_data.push_back(tensor_data);
} // }
args_set[i] = args; // args_set[i] = args;
args_data_set[i] = args_data; // args_data_set[i] = args_data;
vector<shared_ptr<runtime::Tensor>> results; // vector<shared_ptr<runtime::Tensor>> results;
vector<shared_ptr<runtime::HostTensor>> results_data; // vector<shared_ptr<runtime::HostTensor>> results_data;
for (shared_ptr<Node> out : f->get_results()) // for (shared_ptr<Node> out : f->get_results())
{ // {
auto result = backend->create_tensor(out->get_element_type(), out->get_shape()); // auto result = backend->create_tensor(out->get_element_type(), out->get_shape());
auto result_data = // auto result_data =
make_shared<runtime::HostTensor>(out->get_element_type(), out->get_shape()); // make_shared<runtime::HostTensor>(out->get_element_type(), out->get_shape());
results.push_back(result); // results.push_back(result);
results_data.push_back(result_data); // results_data.push_back(result_data);
} // }
results_set[i] = results; // results_set[i] = results;
results_data_set[i] = results_data; // results_data_set[i] = results_data;
} // }
stopwatch t1; // stopwatch t1;
// Before we start we write the first iteration's data // // Before we start we write the first iteration's data
size_t buffer_number = 0; // size_t buffer_number = 0;
auto args = args_set[buffer_number]; // auto args = args_set[buffer_number];
auto args_data = args_data_set[buffer_number]; // auto args_data = args_data_set[buffer_number];
for (size_t arg_index = 0; arg_index < args.size(); arg_index++) // for (size_t arg_index = 0; arg_index < args.size(); arg_index++)
{ // {
const shared_ptr<runtime::Tensor>& arg = args[arg_index]; // const shared_ptr<runtime::Tensor>& arg = args[arg_index];
const shared_ptr<runtime::HostTensor>& data = args_data[arg_index]; // const shared_ptr<runtime::HostTensor>& data = args_data[arg_index];
arg->begin_write(data->get_data_ptr(), // arg->begin_write(data->get_data_ptr(),
data->get_element_count() * data->get_element_type().size(), // data->get_element_count() * data->get_element_type().size(),
buffer_number); // buffer_number);
} // }
const vector<shared_ptr<runtime::Tensor>>& results = results_set[buffer_number]; // const vector<shared_ptr<runtime::Tensor>>& results = results_set[buffer_number];
const vector<shared_ptr<runtime::HostTensor>>& results_data = results_data_set[buffer_number]; // const vector<shared_ptr<runtime::HostTensor>>& results_data = results_data_set[buffer_number];
for (size_t i = 0; i < iterations + warmup_iterations; i++) // for (size_t i = 0; i < iterations + warmup_iterations; i++)
{ // {
if (i == warmup_iterations) // if (i == warmup_iterations)
{ // {
t1.start(); // t1.start();
} // }
future<void> exec_future = compiled_func->begin_execute(results, args); // future<void> exec_future = compiled_func->begin_execute(results, args);
if (i > 0) // if (i > 0)
{ // {
for (size_t result_index = 0; result_index < results.size(); result_index++) // for (size_t result_index = 0; result_index < results.size(); result_index++)
{ // {
const shared_ptr<runtime::HostTensor>& data = results_data[result_index]; // const shared_ptr<runtime::HostTensor>& data = results_data[result_index];
const shared_ptr<runtime::Tensor>& result = results[result_index]; // const shared_ptr<runtime::Tensor>& result = results[result_index];
result->begin_read(data->get_data_ptr(), // result->begin_read(data->get_data_ptr(),
data->get_element_count() * data->get_element_type().size(), // data->get_element_count() * data->get_element_type().size(),
(buffer_number - 1) & 1); // (buffer_number - 1) & 1);
} // }
} // }
buffer_number = (buffer_number + 1) & 1; // buffer_number = (buffer_number + 1) & 1;
for (size_t arg_index = 0; arg_index < args.size(); arg_index++) // for (size_t arg_index = 0; arg_index < args.size(); arg_index++)
{ // {
const shared_ptr<runtime::Tensor>& arg = args[arg_index]; // const shared_ptr<runtime::Tensor>& arg = args[arg_index];
const shared_ptr<runtime::HostTensor>& data = args_data[arg_index]; // const shared_ptr<runtime::HostTensor>& data = args_data[arg_index];
arg->begin_write(data->get_data_ptr(), // arg->begin_write(data->get_data_ptr(),
data->get_element_count() * data->get_element_type().size(), // data->get_element_count() * data->get_element_type().size(),
buffer_number); // buffer_number);
} // }
exec_future.get(); // exec_future.get();
} // }
for (size_t result_index = 0; result_index < results.size(); result_index++) // for (size_t result_index = 0; result_index < results.size(); result_index++)
{ // {
const shared_ptr<runtime::HostTensor>& data = results_data[result_index]; // const shared_ptr<runtime::HostTensor>& data = results_data[result_index];
const shared_ptr<runtime::Tensor>& result = results[result_index]; // const shared_ptr<runtime::Tensor>& result = results[result_index];
result->begin_read(data->get_data_ptr(), // result->begin_read(data->get_data_ptr(),
data->get_element_count() * data->get_element_type().size(), // data->get_element_count() * data->get_element_type().size(),
(buffer_number - 1) & 1); // (buffer_number - 1) & 1);
} // }
t1.stop(); // t1.stop();
float time = t1.get_milliseconds(); // float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl; // cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = compiled_func->get_performance_data(); vector<runtime::PerformanceCounter> perf_data = compiled_func->get_performance_data();
return perf_data; return perf_data;
......
...@@ -95,7 +95,6 @@ set_source_files_properties(includes.cpp PROPERTIES COMPILE_DEFINITIONS ...@@ -95,7 +95,6 @@ set_source_files_properties(includes.cpp PROPERTIES COMPILE_DEFINITIONS
if (NGRAPH_INTERPRETER_ENABLE) if (NGRAPH_INTERPRETER_ENABLE)
list(APPEND SRC list(APPEND SRC
async.cpp
backend_debug_api.cpp backend_debug_api.cpp
builder.cpp builder.cpp
backend_api.cpp) backend_api.cpp)
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <gtest/gtest.h>
#include "ngraph/op/add.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/util.hpp"
#include "util/test_tools.hpp"
using namespace ngraph;
using namespace std;
TEST(async, execute)
{
Shape shape{100000};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Add>(A, B), ParameterVector{A, B});
auto backend = runtime::Backend::create("INTERPRETER");
vector<float> data(shape_size(shape), 2);
vector<float> result_data(shape_size(shape), 0);
// Create some tensors for input/output
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape, data.data());
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape, data.data());
shared_ptr<runtime::Tensor> r = backend->create_tensor(element::f32, shape, result_data.data());
auto handle = backend->compile(f);
auto future = handle->begin_execute({r}, {a, b});
ASSERT_TRUE(future.valid());
future.get();
for (float x : result_data)
{
ASSERT_EQ(x, 4);
}
}
TEST(async, tensor_read_write)
{
chrono::milliseconds ten_ms(100);
Shape shape{100000};
auto A = make_shared<op::Parameter>(element::f32, shape);
auto B = make_shared<op::Parameter>(element::f32, shape);
auto f = make_shared<Function>(make_shared<op::Add>(A, B), ParameterVector{A, B});
auto backend = runtime::Backend::create("INTERPRETER");
auto handle = backend->compile(f);
vector<float> data(shape_size(shape), 2);
vector<float> data_r(shape_size(shape), 0);
// Create some tensors for input/output
shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape);
shared_ptr<runtime::Tensor> r = backend->create_tensor(element::f32, shape);
auto future_a = a->begin_write(data.data(), data.size() * sizeof(float), 0);
auto future_b = b->begin_write(data.data(), data.size() * sizeof(float), 0);
ASSERT_TRUE(future_a.valid());
ASSERT_TRUE(future_b.valid());
auto future = handle->begin_execute({r}, {a, b});
// get() waits for the result to be ready
future.get();
auto future_r = r->begin_read(data_r.data(), data_r.size() * sizeof(float), 0);
ASSERT_TRUE(future_r.valid());
EXPECT_EQ(future_a.wait_for(ten_ms), future_status::ready);
EXPECT_EQ(future_b.wait_for(ten_ms), future_status::ready);
EXPECT_EQ(future_r.wait_for(ten_ms), future_status::ready);
for (float x : data_r)
{
ASSERT_EQ(x, 4);
}
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment