Unverified Commit fdab16db authored by Adam Procter's avatar Adam Procter Committed by GitHub

Codegen for >2D concat following ref kernel pattern (#296)

parent c89b1a84
......@@ -151,6 +151,8 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/cpu_backend.cpp
runtime/cpu/cpu_manager.cpp
runtime/cpu/cpu_kernels.cpp
runtime/cpu/cpu_kernel_emitters.cpp
runtime/cpu/cpu_kernel_utils.cpp
runtime/cpu/cpu_emitter.cpp
runtime/cpu/cpu_external_function.cpp
runtime/cpu/cpu_tensor_view.cpp
......
......@@ -20,6 +20,7 @@ using namespace ngraph;
codegen::CodeWriter::CodeWriter()
: indent(0)
, m_pending_indent(true)
, m_temporary_name_count(0)
{
}
......@@ -32,3 +33,13 @@ void codegen::CodeWriter::operator+=(const std::string& s)
{
*this << s;
}
std::string codegen::CodeWriter::generate_temporary_name(std::string prefix)
{
std::stringstream ss;
ss << prefix << "__" << m_temporary_name_count;
m_temporary_name_count++;
return ss.str();
}
......@@ -66,7 +66,10 @@ public:
return out;
}
std::string generate_temporary_name(std::string prefix = "tempvar");
private:
std::stringstream m_ss;
bool m_pending_indent;
size_t m_temporary_name_count;
};
......@@ -30,18 +30,18 @@ inline T ceil_div(T x, T y)
return (x == 0 ? 0 : (1 + (x - 1) / y));
}
CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
CoordinateTransform::CoordinateTransform(const Shape& source_shape,
const Coordinate& source_start_corner,
const Coordinate& source_end_corner,
const Strides& source_strides,
const AxisVector& source_axis_order)
: m_source_space_shape(source_space_shape)
: m_source_shape(source_shape)
, m_source_start_corner(source_start_corner)
, m_source_end_corner(source_end_corner)
, m_source_strides(source_strides)
, m_source_axis_order(source_axis_order)
{
m_n_axes = source_space_shape.size();
m_n_axes = source_shape.size();
if (m_n_axes != source_start_corner.size())
{
......@@ -80,8 +80,8 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
for (size_t i = 0; i < m_n_axes; i++)
{
if (source_start_corner[i] >= source_space_shape[i] &&
!(source_start_corner[i] == 0 && source_space_shape[i] == 0))
if (source_start_corner[i] >= source_shape[i] &&
!(source_start_corner[i] == 0 && source_shape[i] == 0))
{
std::stringstream ss;
......@@ -92,7 +92,7 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
for (size_t i = 0; i < m_n_axes; i++)
{
if (source_end_corner[i] > source_space_shape[i])
if (source_end_corner[i] > source_shape[i])
{
std::stringstream ss;
......@@ -120,7 +120,7 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
}
}
AxisVector default_axis_order(size_t n_axes)
static AxisVector default_axis_order(size_t n_axes)
{
AxisVector result(n_axes);
size_t n = 0;
......@@ -129,50 +129,50 @@ AxisVector default_axis_order(size_t n_axes)
return result;
}
CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
CoordinateTransform::CoordinateTransform(const Shape& source_shape,
const Coordinate& source_start_corner,
const Coordinate& source_end_corner,
const Strides& source_strides)
: CoordinateTransform(source_space_shape,
: CoordinateTransform(source_shape,
source_start_corner,
source_end_corner,
source_strides,
default_axis_order(source_space_shape.size()))
default_axis_order(source_shape.size()))
{
}
Strides default_source_strides(size_t n_axes)
static Strides default_source_strides(size_t n_axes)
{
return AxisVector(n_axes, 1);
}
CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
CoordinateTransform::CoordinateTransform(const Shape& source_shape,
const Coordinate& source_start_corner,
const Coordinate& source_end_corner)
: CoordinateTransform(source_space_shape,
: CoordinateTransform(source_shape,
source_start_corner,
source_end_corner,
default_source_strides(source_space_shape.size()),
default_axis_order(source_space_shape.size()))
default_source_strides(source_shape.size()),
default_axis_order(source_shape.size()))
{
}
Coordinate default_source_start_corner(size_t n_axes)
static Coordinate default_source_start_corner(size_t n_axes)
{
return Coordinate(n_axes, 0);
}
Coordinate default_source_end_corner(const Shape& source_space_shape)
static Coordinate default_source_end_corner(const Shape& source_shape)
{
return source_space_shape;
return source_shape;
}
CoordinateTransform::CoordinateTransform(const Shape& source_space_shape)
: CoordinateTransform(source_space_shape,
default_source_start_corner(source_space_shape.size()),
default_source_end_corner(source_space_shape),
default_source_strides(source_space_shape.size()),
default_axis_order(source_space_shape.size()))
CoordinateTransform::CoordinateTransform(const Shape& source_shape)
: CoordinateTransform(source_shape,
default_source_start_corner(source_shape.size()),
default_source_end_corner(source_shape),
default_source_strides(source_shape.size()),
default_axis_order(source_shape.size()))
{
}
......@@ -185,7 +185,7 @@ size_t CoordinateTransform::index_source(const Coordinate& c) const
for (size_t axis = m_n_axes; axis-- > 0;)
{
index += c[axis] * stride;
stride *= m_source_space_shape[axis];
stride *= m_source_shape[axis];
}
return index;
......
......@@ -41,12 +41,17 @@ namespace ngraph
const Coordinate& source_start_corner,
const Coordinate& source_end_corner);
CoordinateTransform(const Shape& source_space_shape);
CoordinateTransform(const Shape& source_shape);
size_t index(const Coordinate& c) const;
bool in_bounds(const Coordinate& c) const;
Coordinate get_target_shape() const;
Shape get_source_shape() { return m_source_shape; }
Coordinate get_source_start_corner() { return m_source_start_corner; }
Coordinate get_source_end_corner() { return m_source_end_corner; }
Strides get_source_strides() { return m_source_strides; }
AxisVector get_source_axis_order() { return m_source_axis_order; }
class Iterator
{
public:
......@@ -73,7 +78,7 @@ namespace ngraph
Coordinate to_source_coordinate(const Coordinate& c) const;
size_t index_source(const Coordinate& c) const;
Shape m_source_space_shape;
Shape m_source_shape;
Shape m_source_start_corner;
Shape m_source_end_corner;
Strides m_source_strides;
......
......@@ -34,6 +34,7 @@
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/runtime/cpu/cpu_emitter.hpp"
#include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp"
#include "ngraph/util.hpp"
using namespace std;
......@@ -250,6 +251,49 @@ void runtime::cpu::CPU_Emitter::EmitConcat(const ngraph::Node* n,
m_out.indent--;
m_out << "}\n";
}
else
{
if (m_use_ref_kernels)
{
auto axis = (dynamic_cast<const op::Concat*>(n))->get_concatenation_axis();
std::vector<std::string> arg_names;
std::vector<std::string> arg_shape_strings;
for (auto arg : args)
{
arg_names.push_back(arg.get_name());
arg_shape_strings.push_back("{" + join(arg.get_shape()) + "}");
}
m_out << "kernel::concat<" << out[0].get_type() << ">({" << join(arg_names) << "},\n";
m_out << " " << out[0].get_name() << ",\n";
m_out << " {" << join(arg_shape_strings) << "},\n";
m_out << " {" << join(result_shape) << "},\n";
m_out << " " << axis << ");\n";
}
else
{
auto axis = (dynamic_cast<const op::Concat*>(n))->get_concatenation_axis();
std::vector<std::string> arg_names;
std::vector<Shape> arg_shapes;
for (auto arg : args)
{
arg_names.push_back(arg.get_name());
arg_shapes.push_back(arg.get_shape());
}
kernels::emit_concat(m_out,
args[0].get_element_type().c_type_string(),
arg_names,
out[0].get_name(),
arg_shapes,
result_shape,
axis);
}
}
}
void runtime::cpu::CPU_Emitter::EmitDivide(const ngraph::Node* n,
......
......@@ -37,10 +37,12 @@ namespace ngraph
{
protected:
codegen::CodeWriter m_out;
bool m_use_ref_kernels;
public:
CPU_Emitter()
: m_out()
, m_use_ref_kernels(std::getenv("NGRAPH_CPU_USE_REF_KERNELS") != nullptr)
{
}
std::string get_code() { return m_out.get_code(); }
......
......@@ -207,6 +207,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/kernel/broadcast.hpp"
#include "ngraph/runtime/kernel/concat.hpp"
#include "ngraph/runtime/kernel/dot.hpp"
#include "ngraph/runtime/kernel/one_hot.hpp"
#include "ngraph/runtime/kernel/reduce.hpp"
......
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp"
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/runtime/cpu/cpu_kernel_utils.hpp"
using namespace ngraph;
using namespace ngraph::runtime::cpu::kernels;
//
// For the reference kernel this is based on, see ngraph/runtime/kernel/concat.hpp.
//
void ngraph::runtime::cpu::kernels::emit_concat(codegen::CodeWriter& writer,
std::string element_type,
const std::vector<std::string> args,
std::string out,
const std::vector<Shape>& in_shapes,
const Shape& out_shape,
size_t concatenation_axis)
{
size_t concatenation_pos = 0;
for (size_t i = 0; i < args.size(); i++)
{
Coordinate out_start_coord = Coordinate(out_shape.size(), 0);
out_start_coord[concatenation_axis] = concatenation_pos;
Coordinate out_end_coord = out_shape;
out_end_coord[concatenation_axis] = concatenation_pos + in_shapes[i][concatenation_axis];
CoordinateTransform input_transform(in_shapes[i]);
CoordinateTransform output_chunk_transform(out_shape, out_start_coord, out_end_coord);
emit_pointwise_copy(
writer, element_type, args[i], out, input_transform, output_chunk_transform);
concatenation_pos += in_shapes[i][concatenation_axis];
}
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/common.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernels
{
void emit_concat(codegen::CodeWriter& writer,
std::string element_type,
const std::vector<std::string> args,
std::string out,
const std::vector<Shape>& in_shapes,
const Shape& out_shape,
size_t concatenation_axis);
}
}
}
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include "ngraph/runtime/cpu/cpu_kernel_utils.hpp"
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/common.hpp"
#include "ngraph/coordinate_transform.hpp"
#include "ngraph/util.hpp"
using namespace ngraph;
using namespace ngraph::runtime::cpu::kernels;
//
// Given a coordinate transform and a vector of index expressions relative to
// the target coordinate space, produces the strings needed to index into the
// source coordinate space if it is represented as a multidimensional array.
//
// For example,
//
// trans has stride (2,2,2), axis order (2,0,1), and start offsets (3,4,5)
//
// index_vars are "i", "j", "k"
//
// this will produce:
//
// {"((k) * 2 + 5)", "((i) * 2 + 3)", "((j) * 2 + 4)"}
//
//
std::vector<std::string>
ngraph::runtime::cpu::kernels::emit_multi_indices(CoordinateTransform trans,
std::vector<std::string> index_vars)
{
std::vector<std::string> result;
for (size_t i = 0; i < index_vars.size(); i++)
{
std::string index_var = index_vars[trans.get_source_axis_order()[i]];
size_t source_stride = trans.get_source_strides()[i];
size_t source_start = trans.get_source_start_corner()[i];
std::stringstream ss;
if (source_stride == 1 && source_start == 0)
{
ss << index_var;
}
else if (source_stride == 1)
{
ss << "((" << index_var << ") + " << source_start << ")";
}
else if (source_start == 0)
{
ss << "(" << source_stride << " * (" << index_var << "))";
}
else
{
ss << "(" << source_stride << " * (" << index_var << ") + " << source_start << ")";
}
result.push_back(ss.str());
}
return result;
}
//
// Given a coordinate transform and a vector of index expressions relative to
// the target coordinate space, produces the strings needed to index into the
// source coordinate space if it is represented as a multidimensional array.
//
// For example,
//
// trans has source shape (2,2,2) stride (2,2,2), axis order (2,0,1),
// and start offsets (3,4,5)
//
// index_vars are "i", "j", "k"
//
// this will produce:
//
// "((4 * ((k) * 2 + 5)) + (2 * ((i) * 2 + 3)) + ((j) * 2 + 4))"
//
//
std::string ngraph::runtime::cpu::kernels::emit_linear_index(CoordinateTransform trans,
std::vector<std::string> index_vars)
{
std::vector<std::string> multi_indices = emit_multi_indices(trans, index_vars);
size_t stride = 1;
for (size_t i = index_vars.size(); i-- > 0;)
{
// No need to do this (multiply by stride) if it's 1, though it wouldn't hurt anything.
if (stride != 1)
{
std::stringstream ss;
ss << "(" << stride << " * " << multi_indices[i] << ")";
multi_indices[i] = ss.str();
}
stride *= trans.get_source_shape()[i];
}
std::stringstream ss;
ss << "(" << join(multi_indices, " + ") << ")";
return ss.str();
}
//
// Begins an indexing loop (just a for-loop) with index_var as the index
// variable, starting at start, continuing while [index_var] < [end].
//
// Optionally emits an OpenMP parallel pragma, if "omp" is true.
//
std::string ngraph::runtime::cpu::kernels::start_index_loop(std::string index_var,
size_t start,
size_t end,
bool omp)
{
std::stringstream ss;
if (omp)
{
ss << "#pragma omp parallel for\n";
}
ss << "for(size_t " << index_var << " = " << start << "; " << index_var << " < " << end << "; "
<< index_var << "++)\n"
<< "{\n";
return ss.str();
}
//
// Ends an indexing loop on the index variable [index_var].
//
std::string ngraph::runtime::cpu::kernels::end_index_loop(std::string index_var)
{
std::stringstream ss;
ss << "} // end for(" << index_var << ")\n";
return ss.str();
}
std::string ngraph::runtime::cpu::kernels::emit_nd_sizes(CoordinateTransform trans)
{
std::stringstream ss;
for (size_t s : trans.get_source_shape())
{
ss << "[" << s << "]";
}
return ss.str();
}
std::string ngraph::runtime::cpu::kernels::emit_nd_index(CoordinateTransform trans,
std::vector<std::string> index_vars)
{
std::stringstream ss;
for (std::string index : emit_multi_indices(trans, index_vars))
{
ss << "[" << index << "]";
}
return ss.str();
}
//
// Emits a pointwise copy from source_buffer mediated by in_trans, to
// dest_buffer mediated by dest_trans.
//
void ngraph::runtime::cpu::kernels::emit_pointwise_copy(codegen::CodeWriter& writer,
std::string element_type,
std::string source_buffer,
std::string dest_buffer,
CoordinateTransform source_trans,
CoordinateTransform dest_trans)
{
std::vector<std::string> index_vars;
Shape source_start_corner = source_trans.get_source_start_corner();
Shape source_end_corner = source_trans.get_source_end_corner();
size_t n_axes = source_start_corner.size();
std::string source_nd_name = writer.generate_temporary_name("source_nd");
std::string dest_nd_name = writer.generate_temporary_name("dest_nd");
writer << element_type << "(&" << source_nd_name << ")" << emit_nd_sizes(source_trans)
<< " = *reinterpret_cast<" << element_type << "(*)" << emit_nd_sizes(source_trans)
<< ">(" << source_buffer << ");\n";
writer << element_type << "(&" << dest_nd_name << ")" << emit_nd_sizes(dest_trans)
<< " = *reinterpret_cast<" << element_type << "(*)" << emit_nd_sizes(dest_trans) << ">("
<< dest_buffer << ");\n";
for (size_t i = 0; i < n_axes; i++)
{
std::string index_var = writer.generate_temporary_name("i");
writer << start_index_loop(index_var, source_start_corner[i], source_end_corner[i], i == 0);
writer.indent++;
index_vars.push_back(index_var);
}
writer << dest_nd_name << emit_nd_index(dest_trans, index_vars) << " = " << source_nd_name
<< emit_nd_index(source_trans, index_vars) << ";\n";
for (size_t i = n_axes; i-- > 0;)
{
writer.indent--;
writer << end_index_loop(index_vars[i]);
}
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/common.hpp"
#include "ngraph/coordinate_transform.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernels
{
std::vector<std::string> emit_multi_indices(CoordinateTransform trans,
std::vector<std::string> index_vars);
std::string emit_linear_index(CoordinateTransform trans,
std::vector<std::string> index_vars);
std::string
start_index_loop(std::string index_var, size_t start, size_t end, bool omp);
std::string end_index_loop(std::string index_var);
std::string emit_nd_sizes(CoordinateTransform trans);
std::string emit_nd_index(CoordinateTransform trans,
std::vector<std::string> index_vars);
void emit_pointwise_copy(codegen::CodeWriter& writer,
std::string element_type,
std::string source_buffer,
std::string dest_buffer,
CoordinateTransform source_trans,
CoordinateTransform dest_trans);
}
}
}
}
......@@ -22,7 +22,6 @@ include_directories(
)
set (SRC
backend_performance.cpp
builder.cpp
builder_autobroadcast.cpp
build_graph.cpp
......@@ -69,7 +68,7 @@ endif()
if(NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR)
include_directories(SYSTEM ${LLVM_INCLUDE_DIR})
link_directories(${LLVM_LIB_DIR})
set(SRC ${SRC} codegen.cpp)
set(SRC ${SRC} backend_performance.cpp codegen.cpp)
set(BACKEND_NAMES ${BACKEND_NAMES} "CPU")
endif()
......
......@@ -22,6 +22,7 @@
#include "ngraph/codegen/execution_engine.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
......@@ -33,6 +34,13 @@
using namespace std;
using namespace ngraph;
template <typename T>
static void copy_data(shared_ptr<runtime::TensorView> tv, const vector<T>& data)
{
size_t data_size = data.size() * sizeof(T);
tv->write(data.data(), 0, data_size);
}
// Starting point CPU: 1.2ms/iteration
shared_ptr<runtime::TensorView> make_tensor(runtime::Backend& backend, const ValueType& value)
......@@ -124,3 +132,115 @@ TEST(benchmark, mxnet_10_bucket_lstm)
NGRAPH_INFO << p.name() << ", " << p.total_microseconds();
}
}
//
// Benchmarks a graph that concatenates six 32x1x200 arrays along the middle axis.
//
TEST(benchmark, concat_32x1x200_axis1_6)
{
const size_t n_arrays = 6;
Shape shape_of_each_array = Shape{32, 1, 200};
size_t concatenation_axis = 1;
Shape result_shape;
result_shape = shape_of_each_array;
result_shape[concatenation_axis] *= n_arrays;
size_t elements_per_array = 1;
for (size_t d : shape_of_each_array)
{
elements_per_array *= d;
}
vector<vector<float>> data_arrays(n_arrays);
for (size_t i = 0; i < n_arrays; i++)
{
data_arrays[i] = vector<float>(elements_per_array);
for (size_t j = 0; j < elements_per_array; j++)
{
data_arrays[i][j] = float(j + 1);
}
}
bool using_ref_kernels = (std::getenv("NGRAPH_CPU_USE_REF_KERNELS") != nullptr);
vector<std::string> backend_names{"INTERPRETER", "NGVM", "CPU"};
vector<int> n_runs{200, 200, using_ref_kernels ? 200 : 200000}; // one for each backend
vector<std::function<void()>> test_callbacks; // one for each backend
vector<std::shared_ptr<runtime::TensorView>> result_tvs; // one for each backend
for (std::string backend_name : backend_names)
{
vector<std::shared_ptr<op::Parameter>> params(n_arrays);
vector<std::shared_ptr<Node>> params_as_nodes(n_arrays);
for (size_t i = 0; i < n_arrays; i++)
{
auto param = make_shared<op::Parameter>(
make_shared<TensorViewType>(element::Float32::element_type(), shape_of_each_array));
params[i] = param;
params_as_nodes[i] = param;
}
auto concat = make_shared<op::Concat>(params_as_nodes, concatenation_axis);
auto f = make_shared<Function>(concat, params);
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
vector<shared_ptr<runtime::Value>> input_vals;
for (size_t i = 0; i < n_arrays; i++)
{
auto tv = backend->make_primary_tensor_view(element::Float32::element_type(),
shape_of_each_array);
copy_data(tv, data_arrays[i]);
input_vals.push_back(tv);
}
auto result_tv =
backend->make_primary_tensor_view(element::Float32::element_type(), result_shape);
result_tvs.push_back(result_tv);
std::function<void()> cb = [input_vals, result_tv, cf]() {
cf->call(input_vals, {result_tv});
};
test_callbacks.push_back(cb);
}
for (size_t i = 0; i < backend_names.size(); i++)
{
std::cout << backend_names[i] << ": " << n_runs[i] << " tests in " << std::flush;
stopwatch sw;
std::function<void()> cb = test_callbacks[i];
sw.start();
for (int j = 0; j < n_runs[i]; j++)
{
cb();
}
sw.stop();
std::cout << sw.get_milliseconds() << "ms (" << (sw.get_microseconds() / n_runs[i])
<< " us/test)" << std::endl;
}
for (size_t i = 1; i < backend_names.size(); i++)
{
std::cout << "Verifying " << backend_names[i] << " result against " << backend_names[0]
<< "..." << std::flush;
if (result_tvs[i]->get_vector<float>() == result_tvs[0]->get_vector<float>())
{
std::cout << " OK" << std::endl;
}
else
{
std::cout << " FAILED" << std::endl;
ADD_FAILURE();
}
}
}
......@@ -21,6 +21,7 @@
#include "ngraph/log.hpp"
#include "ngraph/ngraph.hpp"
#include "ngraph/serializer.hpp"
#include "util/all_close.hpp"
#include "util/ndarray.hpp"
using namespace std;
......@@ -441,6 +442,134 @@ TEST(${BACKEND_NAME}, concat_vector)
result->get_vector<float>());
}
// from numpy import *
// a=linspace(1,2*3*4*3*2,2*3*4*3*2)
// b=linspace(1000+1,1000+2*3*3*3*2,2*3*3*3*2)
// c=linspace(2000+1,2000+2*3*2*3*2,2*3*2*3*2)
// a.shape=(2,3,4,3,2)
// b.shape=(2,3,3,3,2)
// c.shape=(2,3,2,3,2)
// z=concatenate((a,b,c),axis=2)
// z.shape=(2*3*(4+3+2)*3*2)
// set_printoptions(suppress=True)
// print(z)
//
// [ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10.
// 11. 12. 13. 14. 15. 16. 17. 18. 19. 20.
// 21. 22. 23. 24. 1001. 1002. 1003. 1004. 1005. 1006.
// 1007. 1008. 1009. 1010. 1011. 1012. 1013. 1014. 1015. 1016.
// 1017. 1018. 2001. 2002. 2003. 2004. 2005. 2006. 2007. 2008.
// 2009. 2010. 2011. 2012. 25. 26. 27. 28. 29. 30.
// 31. 32. 33. 34. 35. 36. 37. 38. 39. 40.
// 41. 42. 43. 44. 45. 46. 47. 48. 1019. 1020.
// 1021. 1022. 1023. 1024. 1025. 1026. 1027. 1028. 1029. 1030.
// 1031. 1032. 1033. 1034. 1035. 1036. 2013. 2014. 2015. 2016.
// 2017. 2018. 2019. 2020. 2021. 2022. 2023. 2024. 49. 50.
// 51. 52. 53. 54. 55. 56. 57. 58. 59. 60.
// 61. 62. 63. 64. 65. 66. 67. 68. 69. 70.
// 71. 72. 1037. 1038. 1039. 1040. 1041. 1042. 1043. 1044.
// 1045. 1046. 1047. 1048. 1049. 1050. 1051. 1052. 1053. 1054.
// 2025. 2026. 2027. 2028. 2029. 2030. 2031. 2032. 2033. 2034.
// 2035. 2036. 73. 74. 75. 76. 77. 78. 79. 80.
// 81. 82. 83. 84. 85. 86. 87. 88. 89. 90.
// 91. 92. 93. 94. 95. 96. 1055. 1056. 1057. 1058.
// 1059. 1060. 1061. 1062. 1063. 1064. 1065. 1066. 1067. 1068.
// 1069. 1070. 1071. 1072. 2037. 2038. 2039. 2040. 2041. 2042.
// 2043. 2044. 2045. 2046. 2047. 2048. 97. 98. 99. 100.
// 101. 102. 103. 104. 105. 106. 107. 108. 109. 110.
// 111. 112. 113. 114. 115. 116. 117. 118. 119. 120.
// 1073. 1074. 1075. 1076. 1077. 1078. 1079. 1080. 1081. 1082.
// 1083. 1084. 1085. 1086. 1087. 1088. 1089. 1090. 2049. 2050.
// 2051. 2052. 2053. 2054. 2055. 2056. 2057. 2058. 2059. 2060.
// 121. 122. 123. 124. 125. 126. 127. 128. 129. 130.
// 131. 132. 133. 134. 135. 136. 137. 138. 139. 140.
// 141. 142. 143. 144. 1091. 1092. 1093. 1094. 1095. 1096.
// 1097. 1098. 1099. 1100. 1101. 1102. 1103. 1104. 1105. 1106.
// 1107. 1108. 2061. 2062. 2063. 2064. 2065. 2066. 2067. 2068.
// 2069. 2070. 2071. 2072.]
TEST(${BACKEND_NAME}, concat_5d)
{
vector<float> a_data(2 * 3 * 4 * 3 * 2);
for (int i = 0; i < 2 * 3 * 4 * 3 * 2; i++)
{
a_data[i] = float(i + 1);
}
vector<float> b_data(2 * 3 * 3 * 3 * 2);
for (int i = 0; i < 2 * 3 * 3 * 3 * 2; i++)
{
b_data[i] = 1000 + float(i + 1);
}
vector<float> c_data(2 * 3 * 2 * 3 * 2);
for (int i = 0; i < 2 * 3 * 2 * 3 * 2; i++)
{
c_data[i] = 2000 + float(i + 1);
}
auto shape_a = Shape{2, 3, 4, 3, 2};
auto A = make_shared<op::Parameter>(
make_shared<TensorViewType>(element::Float32::element_type(), shape_a));
auto shape_b = Shape{2, 3, 3, 3, 2};
auto B = make_shared<op::Parameter>(
make_shared<TensorViewType>(element::Float32::element_type(), shape_b));
auto shape_c = Shape{2, 3, 2, 3, 2};
auto C = make_shared<op::Parameter>(
make_shared<TensorViewType>(element::Float32::element_type(), shape_c));
auto shape_r = Shape{2, 3, 9, 3, 2};
auto rt = make_shared<TensorViewType>(element::Float32::element_type(), shape_r);
auto r = make_shared<op::Concat>(Nodes{A, B, C}, 2);
auto f = make_shared<Function>(r, rt, op::Parameters{A, B, C});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
// Create some tensors for input/output
auto a = backend->make_primary_tensor_view(element::Float32::element_type(), shape_a);
copy_data(a, a_data);
auto b = backend->make_primary_tensor_view(element::Float32::element_type(), shape_b);
copy_data(b, b_data);
auto c = backend->make_primary_tensor_view(element::Float32::element_type(), shape_c);
copy_data(c, c_data);
auto result = backend->make_primary_tensor_view(element::Float32::element_type(), shape_r);
cf->call({a, b, c}, {result});
EXPECT_EQ(
(vector<float>{
1., 2., 3., 4., 5., 6., 7., 8., 9., 10., 11., 12.,
13., 14., 15., 16., 17., 18., 19., 20., 21., 22., 23., 24.,
1001., 1002., 1003., 1004., 1005., 1006., 1007., 1008., 1009., 1010., 1011., 1012.,
1013., 1014., 1015., 1016., 1017., 1018., 2001., 2002., 2003., 2004., 2005., 2006.,
2007., 2008., 2009., 2010., 2011., 2012., 25., 26., 27., 28., 29., 30.,
31., 32., 33., 34., 35., 36., 37., 38., 39., 40., 41., 42.,
43., 44., 45., 46., 47., 48., 1019., 1020., 1021., 1022., 1023., 1024.,
1025., 1026., 1027., 1028., 1029., 1030., 1031., 1032., 1033., 1034., 1035., 1036.,
2013., 2014., 2015., 2016., 2017., 2018., 2019., 2020., 2021., 2022., 2023., 2024.,
49., 50., 51., 52., 53., 54., 55., 56., 57., 58., 59., 60.,
61., 62., 63., 64., 65., 66., 67., 68., 69., 70., 71., 72.,
1037., 1038., 1039., 1040., 1041., 1042., 1043., 1044., 1045., 1046., 1047., 1048.,
1049., 1050., 1051., 1052., 1053., 1054., 2025., 2026., 2027., 2028., 2029., 2030.,
2031., 2032., 2033., 2034., 2035., 2036., 73., 74., 75., 76., 77., 78.,
79., 80., 81., 82., 83., 84., 85., 86., 87., 88., 89., 90.,
91., 92., 93., 94., 95., 96., 1055., 1056., 1057., 1058., 1059., 1060.,
1061., 1062., 1063., 1064., 1065., 1066., 1067., 1068., 1069., 1070., 1071., 1072.,
2037., 2038., 2039., 2040., 2041., 2042., 2043., 2044., 2045., 2046., 2047., 2048.,
97., 98., 99., 100., 101., 102., 103., 104., 105., 106., 107., 108.,
109., 110., 111., 112., 113., 114., 115., 116., 117., 118., 119., 120.,
1073., 1074., 1075., 1076., 1077., 1078., 1079., 1080., 1081., 1082., 1083., 1084.,
1085., 1086., 1087., 1088., 1089., 1090., 2049., 2050., 2051., 2052., 2053., 2054.,
2055., 2056., 2057., 2058., 2059., 2060., 121., 122., 123., 124., 125., 126.,
127., 128., 129., 130., 131., 132., 133., 134., 135., 136., 137., 138.,
139., 140., 141., 142., 143., 144., 1091., 1092., 1093., 1094., 1095., 1096.,
1097., 1098., 1099., 1100., 1101., 1102., 1103., 1104., 1105., 1106., 1107., 1108.,
2061., 2062., 2063., 2064., 2065., 2066., 2067., 2068., 2069., 2070., 2071., 2072.}),
result->get_vector<float>());
}
TEST(${BACKEND_NAME}, divide)
{
auto manager = runtime::Manager::get("${BACKEND_NAME}");
......@@ -2594,7 +2723,7 @@ TEST(DISABLED_${BACKEND_NAME}, reshape_6d)
auto result = backend->make_primary_tensor_view(element::Float32::element_type(), shape_r);
cf->call({a}, {result});
ASSERT_EQ(
EXPECT_EQ(
(vector<float>{
1., 73., 9., 81., 17., 89., 2., 74., 10., 82., 18., 90., 3., 75.,
11., 83., 19., 91., 4., 76., 12., 84., 20., 92., 145., 217., 153., 225.,
......@@ -4190,7 +4319,7 @@ TEST(DISABLED_${BACKEND_NAME}, dot_3d_multi_axis)
auto result = backend->make_primary_tensor_view(element::Float32::element_type(), shape_r);
cf->call({a, b}, {result});
ASSERT_EQ((vector<float>{2938., 3016., 3094., 3172., 3250., 7042., 7264., 7486., 7708., 7930.}),
EXPECT_EQ((vector<float>{2938., 3016., 3094., 3172., 3250., 7042., 7264., 7486., 7708., 7930.}),
result->get_vector<float>());
}
......@@ -4247,7 +4376,7 @@ TEST(DISABLED_${BACKEND_NAME}, dot_3d_one_axis_arbitrary)
auto result = backend->make_primary_tensor_view(element::Float32::element_type(), shape_r);
cf->call({a, b}, {result});
ASSERT_EQ((vector<float>{483, 189, 331, 86, 85, 1262, 2155, 354, 83, 18, 58, 543, 77,
EXPECT_EQ((vector<float>{483, 189, 331, 86, 85, 1262, 2155, 354, 83, 18, 58, 543, 77,
241, 325, 286, 859, 144, 438, 1025, 317, 973, 1041, 2930, 163, 69,
117, 50, 29, 472, 819, 62, 785, 236, 476, 235, 175, 1521, 2387,
1402, 97, 29, 69, 412, 63, 286, 429, 218, 45, 11, 29, 162,
......@@ -4321,7 +4450,7 @@ TEST(DISABLED_${BACKEND_NAME}, dot_4d_5d_multi_axis)
auto result = backend->make_primary_tensor_view(element::Float32::element_type(), shape_r);
cf->call({a, b}, {result});
ASSERT_EQ(
EXPECT_EQ(
(vector<float>{6942., 7020., 7098., 7176., 7254., 7332., 7410., 7488., 7566.,
7644., 7722., 7800., 16590., 16812., 17034., 17256., 17478., 17700.,
17922., 18144., 18366., 18588., 18810., 19032., 26238., 26604., 26970.,
......@@ -4388,7 +4517,7 @@ TEST(DISABLED_${BACKEND_NAME}, dot_4d_5d_multi_axis_more)
auto result = backend->make_primary_tensor_view(element::Float32::element_type(), shape_r);
cf->call({a, b}, {result});
ASSERT_EQ((vector<float>{251412., 254040.}), result->get_vector<float>());
EXPECT_EQ((vector<float>{251412., 254040.}), result->get_vector<float>());
}
//
......@@ -4456,14 +4585,14 @@ TEST(DISABLED_${BACKEND_NAME}, dot_4d_5d_multi_axis_big_fp64_VERY_SLOW)
auto result = backend->make_primary_tensor_view(element::Float64::element_type(), shape_r);
cf->call({a, b}, {result});
ASSERT_EQ(
(vector<double>{
EXPECT_TRUE(test::all_close(
vector<double>{
2.48832025919525478400e+18, 2.48832051839533977600e+18, 2.48832077759658444800e+18,
2.48832103679413504000e+18, 2.48832129599669350400e+18, 2.48832155519793971200e+18,
2.48832181439802265600e+18, 2.48832207359808000000e+18, 2.48832233279813580800e+18,
2.48832259199822028800e+18, 2.48832285119946496000e+18, 2.48832311040043008000e+18,
2.48832336959957401600e+18, 2.48832362880081817600e+18, 2.48832388800090368000e+18,
2.48832414720096000000e+18, 2.48832440640101478400e+18, 2.48832466560109772800e+18,
2.48832492480234188800e+18, 2.48832518400031897600e+18}),
result->get_vector<double>());
2.48832492480234188800e+18, 2.48832518400031897600e+18},
result->get_vector<double>()));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment