Unverified Commit fdab16db authored by Adam Procter's avatar Adam Procter Committed by GitHub

Codegen for >2D concat following ref kernel pattern (#296)

parent c89b1a84
......@@ -151,6 +151,8 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/cpu_backend.cpp
runtime/cpu/cpu_manager.cpp
runtime/cpu/cpu_kernels.cpp
runtime/cpu/cpu_kernel_emitters.cpp
runtime/cpu/cpu_kernel_utils.cpp
runtime/cpu/cpu_emitter.cpp
runtime/cpu/cpu_external_function.cpp
runtime/cpu/cpu_tensor_view.cpp
......
......@@ -20,6 +20,7 @@ using namespace ngraph;
codegen::CodeWriter::CodeWriter()
: indent(0)
, m_pending_indent(true)
, m_temporary_name_count(0)
{
}
......@@ -32,3 +33,13 @@ void codegen::CodeWriter::operator+=(const std::string& s)
{
*this << s;
}
std::string codegen::CodeWriter::generate_temporary_name(std::string prefix)
{
std::stringstream ss;
ss << prefix << "__" << m_temporary_name_count;
m_temporary_name_count++;
return ss.str();
}
......@@ -66,7 +66,10 @@ public:
return out;
}
std::string generate_temporary_name(std::string prefix = "tempvar");
private:
std::stringstream m_ss;
bool m_pending_indent;
size_t m_temporary_name_count;
};
......@@ -30,18 +30,18 @@ inline T ceil_div(T x, T y)
return (x == 0 ? 0 : (1 + (x - 1) / y));
}
CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
CoordinateTransform::CoordinateTransform(const Shape& source_shape,
const Coordinate& source_start_corner,
const Coordinate& source_end_corner,
const Strides& source_strides,
const AxisVector& source_axis_order)
: m_source_space_shape(source_space_shape)
: m_source_shape(source_shape)
, m_source_start_corner(source_start_corner)
, m_source_end_corner(source_end_corner)
, m_source_strides(source_strides)
, m_source_axis_order(source_axis_order)
{
m_n_axes = source_space_shape.size();
m_n_axes = source_shape.size();
if (m_n_axes != source_start_corner.size())
{
......@@ -80,8 +80,8 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
for (size_t i = 0; i < m_n_axes; i++)
{
if (source_start_corner[i] >= source_space_shape[i] &&
!(source_start_corner[i] == 0 && source_space_shape[i] == 0))
if (source_start_corner[i] >= source_shape[i] &&
!(source_start_corner[i] == 0 && source_shape[i] == 0))
{
std::stringstream ss;
......@@ -92,7 +92,7 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
for (size_t i = 0; i < m_n_axes; i++)
{
if (source_end_corner[i] > source_space_shape[i])
if (source_end_corner[i] > source_shape[i])
{
std::stringstream ss;
......@@ -120,7 +120,7 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
}
}
AxisVector default_axis_order(size_t n_axes)
static AxisVector default_axis_order(size_t n_axes)
{
AxisVector result(n_axes);
size_t n = 0;
......@@ -129,50 +129,50 @@ AxisVector default_axis_order(size_t n_axes)
return result;
}
CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
CoordinateTransform::CoordinateTransform(const Shape& source_shape,
const Coordinate& source_start_corner,
const Coordinate& source_end_corner,
const Strides& source_strides)
: CoordinateTransform(source_space_shape,
: CoordinateTransform(source_shape,
source_start_corner,
source_end_corner,
source_strides,
default_axis_order(source_space_shape.size()))
default_axis_order(source_shape.size()))
{
}
Strides default_source_strides(size_t n_axes)
static Strides default_source_strides(size_t n_axes)
{
return AxisVector(n_axes, 1);
}
CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
CoordinateTransform::CoordinateTransform(const Shape& source_shape,
const Coordinate& source_start_corner,
const Coordinate& source_end_corner)
: CoordinateTransform(source_space_shape,
: CoordinateTransform(source_shape,
source_start_corner,
source_end_corner,
default_source_strides(source_space_shape.size()),
default_axis_order(source_space_shape.size()))
default_source_strides(source_shape.size()),
default_axis_order(source_shape.size()))
{
}
Coordinate default_source_start_corner(size_t n_axes)
static Coordinate default_source_start_corner(size_t n_axes)
{
return Coordinate(n_axes, 0);
}
Coordinate default_source_end_corner(const Shape& source_space_shape)
static Coordinate default_source_end_corner(const Shape& source_shape)
{
return source_space_shape;
return source_shape;
}
CoordinateTransform::CoordinateTransform(const Shape& source_space_shape)
: CoordinateTransform(source_space_shape,
default_source_start_corner(source_space_shape.size()),
default_source_end_corner(source_space_shape),
default_source_strides(source_space_shape.size()),
default_axis_order(source_space_shape.size()))
CoordinateTransform::CoordinateTransform(const Shape& source_shape)
: CoordinateTransform(source_shape,
default_source_start_corner(source_shape.size()),
default_source_end_corner(source_shape),
default_source_strides(source_shape.size()),
default_axis_order(source_shape.size()))
{
}
......@@ -185,7 +185,7 @@ size_t CoordinateTransform::index_source(const Coordinate& c) const
for (size_t axis = m_n_axes; axis-- > 0;)
{
index += c[axis] * stride;
stride *= m_source_space_shape[axis];
stride *= m_source_shape[axis];
}
return index;
......
......@@ -41,12 +41,17 @@ namespace ngraph
const Coordinate& source_start_corner,
const Coordinate& source_end_corner);
CoordinateTransform(const Shape& source_space_shape);
CoordinateTransform(const Shape& source_shape);
size_t index(const Coordinate& c) const;
bool in_bounds(const Coordinate& c) const;
Coordinate get_target_shape() const;
Shape get_source_shape() { return m_source_shape; }
Coordinate get_source_start_corner() { return m_source_start_corner; }
Coordinate get_source_end_corner() { return m_source_end_corner; }
Strides get_source_strides() { return m_source_strides; }
AxisVector get_source_axis_order() { return m_source_axis_order; }
class Iterator
{
public:
......@@ -73,7 +78,7 @@ namespace ngraph
Coordinate to_source_coordinate(const Coordinate& c) const;
size_t index_source(const Coordinate& c) const;
Shape m_source_space_shape;
Shape m_source_shape;
Shape m_source_start_corner;
Shape m_source_end_corner;
Strides m_source_strides;
......
......@@ -34,6 +34,7 @@
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/runtime/cpu/cpu_emitter.hpp"
#include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp"
#include "ngraph/util.hpp"
using namespace std;
......@@ -250,6 +251,49 @@ void runtime::cpu::CPU_Emitter::EmitConcat(const ngraph::Node* n,
m_out.indent--;
m_out << "}\n";
}
else
{
if (m_use_ref_kernels)
{
auto axis = (dynamic_cast<const op::Concat*>(n))->get_concatenation_axis();
std::vector<std::string> arg_names;
std::vector<std::string> arg_shape_strings;
for (auto arg : args)
{
arg_names.push_back(arg.get_name());
arg_shape_strings.push_back("{" + join(arg.get_shape()) + "}");
}
m_out << "kernel::concat<" << out[0].get_type() << ">({" << join(arg_names) << "},\n";
m_out << " " << out[0].get_name() << ",\n";
m_out << " {" << join(arg_shape_strings) << "},\n";
m_out << " {" << join(result_shape) << "},\n";
m_out << " " << axis << ");\n";
}
else
{
auto axis = (dynamic_cast<const op::Concat*>(n))->get_concatenation_axis();
std::vector<std::string> arg_names;
std::vector<Shape> arg_shapes;
for (auto arg : args)
{
arg_names.push_back(arg.get_name());
arg_shapes.push_back(arg.get_shape());
}
kernels::emit_concat(m_out,
args[0].get_element_type().c_type_string(),
arg_names,
out[0].get_name(),
arg_shapes,
result_shape,
axis);
}
}
}
void runtime::cpu::CPU_Emitter::EmitDivide(const ngraph::Node* n,
......
......@@ -37,10 +37,12 @@ namespace ngraph
{
protected:
codegen::CodeWriter m_out;
bool m_use_ref_kernels;
public:
CPU_Emitter()
: m_out()
, m_use_ref_kernels(std::getenv("NGRAPH_CPU_USE_REF_KERNELS") != nullptr)
{
}
std::string get_code() { return m_out.get_code(); }
......
......@@ -207,6 +207,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/kernel/broadcast.hpp"
#include "ngraph/runtime/kernel/concat.hpp"
#include "ngraph/runtime/kernel/dot.hpp"
#include "ngraph/runtime/kernel/one_hot.hpp"
#include "ngraph/runtime/kernel/reduce.hpp"
......
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp"
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/runtime/cpu/cpu_kernel_utils.hpp"
using namespace ngraph;
using namespace ngraph::runtime::cpu::kernels;
//
// For the reference kernel this is based on, see ngraph/runtime/kernel/concat.hpp.
//
void ngraph::runtime::cpu::kernels::emit_concat(codegen::CodeWriter& writer,
std::string element_type,
const std::vector<std::string> args,
std::string out,
const std::vector<Shape>& in_shapes,
const Shape& out_shape,
size_t concatenation_axis)
{
size_t concatenation_pos = 0;
for (size_t i = 0; i < args.size(); i++)
{
Coordinate out_start_coord = Coordinate(out_shape.size(), 0);
out_start_coord[concatenation_axis] = concatenation_pos;
Coordinate out_end_coord = out_shape;
out_end_coord[concatenation_axis] = concatenation_pos + in_shapes[i][concatenation_axis];
CoordinateTransform input_transform(in_shapes[i]);
CoordinateTransform output_chunk_transform(out_shape, out_start_coord, out_end_coord);
emit_pointwise_copy(
writer, element_type, args[i], out, input_transform, output_chunk_transform);
concatenation_pos += in_shapes[i][concatenation_axis];
}
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/common.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernels
{
void emit_concat(codegen::CodeWriter& writer,
std::string element_type,
const std::vector<std::string> args,
std::string out,
const std::vector<Shape>& in_shapes,
const Shape& out_shape,
size_t concatenation_axis);
}
}
}
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include "ngraph/runtime/cpu/cpu_kernel_utils.hpp"
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/common.hpp"
#include "ngraph/coordinate_transform.hpp"
#include "ngraph/util.hpp"
using namespace ngraph;
using namespace ngraph::runtime::cpu::kernels;
//
// Given a coordinate transform and a vector of index expressions relative to
// the target coordinate space, produces the strings needed to index into the
// source coordinate space if it is represented as a multidimensional array.
//
// For example,
//
// trans has stride (2,2,2), axis order (2,0,1), and start offsets (3,4,5)
//
// index_vars are "i", "j", "k"
//
// this will produce:
//
// {"((k) * 2 + 5)", "((i) * 2 + 3)", "((j) * 2 + 4)"}
//
//
std::vector<std::string>
ngraph::runtime::cpu::kernels::emit_multi_indices(CoordinateTransform trans,
std::vector<std::string> index_vars)
{
std::vector<std::string> result;
for (size_t i = 0; i < index_vars.size(); i++)
{
std::string index_var = index_vars[trans.get_source_axis_order()[i]];
size_t source_stride = trans.get_source_strides()[i];
size_t source_start = trans.get_source_start_corner()[i];
std::stringstream ss;
if (source_stride == 1 && source_start == 0)
{
ss << index_var;
}
else if (source_stride == 1)
{
ss << "((" << index_var << ") + " << source_start << ")";
}
else if (source_start == 0)
{
ss << "(" << source_stride << " * (" << index_var << "))";
}
else
{
ss << "(" << source_stride << " * (" << index_var << ") + " << source_start << ")";
}
result.push_back(ss.str());
}
return result;
}
//
// Given a coordinate transform and a vector of index expressions relative to
// the target coordinate space, produces the strings needed to index into the
// source coordinate space if it is represented as a multidimensional array.
//
// For example,
//
// trans has source shape (2,2,2) stride (2,2,2), axis order (2,0,1),
// and start offsets (3,4,5)
//
// index_vars are "i", "j", "k"
//
// this will produce:
//
// "((4 * ((k) * 2 + 5)) + (2 * ((i) * 2 + 3)) + ((j) * 2 + 4))"
//
//
std::string ngraph::runtime::cpu::kernels::emit_linear_index(CoordinateTransform trans,
std::vector<std::string> index_vars)
{
std::vector<std::string> multi_indices = emit_multi_indices(trans, index_vars);
size_t stride = 1;
for (size_t i = index_vars.size(); i-- > 0;)
{
// No need to do this (multiply by stride) if it's 1, though it wouldn't hurt anything.
if (stride != 1)
{
std::stringstream ss;
ss << "(" << stride << " * " << multi_indices[i] << ")";
multi_indices[i] = ss.str();
}
stride *= trans.get_source_shape()[i];
}
std::stringstream ss;
ss << "(" << join(multi_indices, " + ") << ")";
return ss.str();
}
//
// Begins an indexing loop (just a for-loop) with index_var as the index
// variable, starting at start, continuing while [index_var] < [end].
//
// Optionally emits an OpenMP parallel pragma, if "omp" is true.
//
std::string ngraph::runtime::cpu::kernels::start_index_loop(std::string index_var,
size_t start,
size_t end,
bool omp)
{
std::stringstream ss;
if (omp)
{
ss << "#pragma omp parallel for\n";
}
ss << "for(size_t " << index_var << " = " << start << "; " << index_var << " < " << end << "; "
<< index_var << "++)\n"
<< "{\n";
return ss.str();
}
//
// Ends an indexing loop on the index variable [index_var].
//
std::string ngraph::runtime::cpu::kernels::end_index_loop(std::string index_var)
{
std::stringstream ss;
ss << "} // end for(" << index_var << ")\n";
return ss.str();
}
std::string ngraph::runtime::cpu::kernels::emit_nd_sizes(CoordinateTransform trans)
{
std::stringstream ss;
for (size_t s : trans.get_source_shape())
{
ss << "[" << s << "]";
}
return ss.str();
}
std::string ngraph::runtime::cpu::kernels::emit_nd_index(CoordinateTransform trans,
std::vector<std::string> index_vars)
{
std::stringstream ss;
for (std::string index : emit_multi_indices(trans, index_vars))
{
ss << "[" << index << "]";
}
return ss.str();
}
//
// Emits a pointwise copy from source_buffer mediated by in_trans, to
// dest_buffer mediated by dest_trans.
//
void ngraph::runtime::cpu::kernels::emit_pointwise_copy(codegen::CodeWriter& writer,
std::string element_type,
std::string source_buffer,
std::string dest_buffer,
CoordinateTransform source_trans,
CoordinateTransform dest_trans)
{
std::vector<std::string> index_vars;
Shape source_start_corner = source_trans.get_source_start_corner();
Shape source_end_corner = source_trans.get_source_end_corner();
size_t n_axes = source_start_corner.size();
std::string source_nd_name = writer.generate_temporary_name("source_nd");
std::string dest_nd_name = writer.generate_temporary_name("dest_nd");
writer << element_type << "(&" << source_nd_name << ")" << emit_nd_sizes(source_trans)
<< " = *reinterpret_cast<" << element_type << "(*)" << emit_nd_sizes(source_trans)
<< ">(" << source_buffer << ");\n";
writer << element_type << "(&" << dest_nd_name << ")" << emit_nd_sizes(dest_trans)
<< " = *reinterpret_cast<" << element_type << "(*)" << emit_nd_sizes(dest_trans) << ">("
<< dest_buffer << ");\n";
for (size_t i = 0; i < n_axes; i++)
{
std::string index_var = writer.generate_temporary_name("i");
writer << start_index_loop(index_var, source_start_corner[i], source_end_corner[i], i == 0);
writer.indent++;
index_vars.push_back(index_var);
}
writer << dest_nd_name << emit_nd_index(dest_trans, index_vars) << " = " << source_nd_name
<< emit_nd_index(source_trans, index_vars) << ";\n";
for (size_t i = n_axes; i-- > 0;)
{
writer.indent--;
writer << end_index_loop(index_vars[i]);
}
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/common.hpp"
#include "ngraph/coordinate_transform.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernels
{
std::vector<std::string> emit_multi_indices(CoordinateTransform trans,
std::vector<std::string> index_vars);
std::string emit_linear_index(CoordinateTransform trans,
std::vector<std::string> index_vars);
std::string
start_index_loop(std::string index_var, size_t start, size_t end, bool omp);
std::string end_index_loop(std::string index_var);
std::string emit_nd_sizes(CoordinateTransform trans);
std::string emit_nd_index(CoordinateTransform trans,
std::vector<std::string> index_vars);
void emit_pointwise_copy(codegen::CodeWriter& writer,
std::string element_type,
std::string source_buffer,
std::string dest_buffer,
CoordinateTransform source_trans,
CoordinateTransform dest_trans);
}
}
}
}
......@@ -22,7 +22,6 @@ include_directories(
)
set (SRC
backend_performance.cpp
builder.cpp
builder_autobroadcast.cpp
build_graph.cpp
......@@ -69,7 +68,7 @@ endif()
if(NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR)
include_directories(SYSTEM ${LLVM_INCLUDE_DIR})
link_directories(${LLVM_LIB_DIR})
set(SRC ${SRC} codegen.cpp)
set(SRC ${SRC} backend_performance.cpp codegen.cpp)
set(BACKEND_NAMES ${BACKEND_NAMES} "CPU")
endif()
......
......@@ -22,6 +22,7 @@
#include "ngraph/codegen/execution_engine.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
......@@ -33,6 +34,13 @@
using namespace std;
using namespace ngraph;
template <typename T>
static void copy_data(shared_ptr<runtime::TensorView> tv, const vector<T>& data)
{
size_t data_size = data.size() * sizeof(T);
tv->write(data.data(), 0, data_size);
}
// Starting point CPU: 1.2ms/iteration
shared_ptr<runtime::TensorView> make_tensor(runtime::Backend& backend, const ValueType& value)
......@@ -124,3 +132,115 @@ TEST(benchmark, mxnet_10_bucket_lstm)
NGRAPH_INFO << p.name() << ", " << p.total_microseconds();
}
}
//
// Benchmarks a graph that concatenates six 32x1x200 arrays along the middle axis.
//
TEST(benchmark, concat_32x1x200_axis1_6)
{
const size_t n_arrays = 6;
Shape shape_of_each_array = Shape{32, 1, 200};
size_t concatenation_axis = 1;
Shape result_shape;
result_shape = shape_of_each_array;
result_shape[concatenation_axis] *= n_arrays;
size_t elements_per_array = 1;
for (size_t d : shape_of_each_array)
{
elements_per_array *= d;
}
vector<vector<float>> data_arrays(n_arrays);
for (size_t i = 0; i < n_arrays; i++)
{
data_arrays[i] = vector<float>(elements_per_array);
for (size_t j = 0; j < elements_per_array; j++)
{
data_arrays[i][j] = float(j + 1);
}
}
bool using_ref_kernels = (std::getenv("NGRAPH_CPU_USE_REF_KERNELS") != nullptr);
vector<std::string> backend_names{"INTERPRETER", "NGVM", "CPU"};
vector<int> n_runs{200, 200, using_ref_kernels ? 200 : 200000}; // one for each backend
vector<std::function<void()>> test_callbacks; // one for each backend
vector<std::shared_ptr<runtime::TensorView>> result_tvs; // one for each backend
for (std::string backend_name : backend_names)
{
vector<std::shared_ptr<op::Parameter>> params(n_arrays);
vector<std::shared_ptr<Node>> params_as_nodes(n_arrays);
for (size_t i = 0; i < n_arrays; i++)
{
auto param = make_shared<op::Parameter>(
make_shared<TensorViewType>(element::Float32::element_type(), shape_of_each_array));
params[i] = param;
params_as_nodes[i] = param;
}
auto concat = make_shared<op::Concat>(params_as_nodes, concatenation_axis);
auto f = make_shared<Function>(concat, params);
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
vector<shared_ptr<runtime::Value>> input_vals;
for (size_t i = 0; i < n_arrays; i++)
{
auto tv = backend->make_primary_tensor_view(element::Float32::element_type(),
shape_of_each_array);
copy_data(tv, data_arrays[i]);
input_vals.push_back(tv);
}
auto result_tv =
backend->make_primary_tensor_view(element::Float32::element_type(), result_shape);
result_tvs.push_back(result_tv);
std::function<void()> cb = [input_vals, result_tv, cf]() {
cf->call(input_vals, {result_tv});
};
test_callbacks.push_back(cb);
}
for (size_t i = 0; i < backend_names.size(); i++)
{
std::cout << backend_names[i] << ": " << n_runs[i] << " tests in " << std::flush;
stopwatch sw;
std::function<void()> cb = test_callbacks[i];
sw.start();
for (int j = 0; j < n_runs[i]; j++)
{
cb();
}
sw.stop();
std::cout << sw.get_milliseconds() << "ms (" << (sw.get_microseconds() / n_runs[i])
<< " us/test)" << std::endl;
}
for (size_t i = 1; i < backend_names.size(); i++)
{
std::cout << "Verifying " << backend_names[i] << " result against " << backend_names[0]
<< "..." << std::flush;
if (result_tvs[i]->get_vector<float>() == result_tvs[0]->get_vector<float>())
{
std::cout << " OK" << std::endl;
}
else
{
std::cout << " FAILED" << std::endl;
ADD_FAILURE();
}
}
}
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment