Unverified Commit fdab16db authored by Adam Procter's avatar Adam Procter Committed by GitHub

Codegen for >2D concat following ref kernel pattern (#296)

parent c89b1a84
...@@ -151,6 +151,8 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND ...@@ -151,6 +151,8 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/cpu_backend.cpp runtime/cpu/cpu_backend.cpp
runtime/cpu/cpu_manager.cpp runtime/cpu/cpu_manager.cpp
runtime/cpu/cpu_kernels.cpp runtime/cpu/cpu_kernels.cpp
runtime/cpu/cpu_kernel_emitters.cpp
runtime/cpu/cpu_kernel_utils.cpp
runtime/cpu/cpu_emitter.cpp runtime/cpu/cpu_emitter.cpp
runtime/cpu/cpu_external_function.cpp runtime/cpu/cpu_external_function.cpp
runtime/cpu/cpu_tensor_view.cpp runtime/cpu/cpu_tensor_view.cpp
......
...@@ -20,6 +20,7 @@ using namespace ngraph; ...@@ -20,6 +20,7 @@ using namespace ngraph;
codegen::CodeWriter::CodeWriter() codegen::CodeWriter::CodeWriter()
: indent(0) : indent(0)
, m_pending_indent(true) , m_pending_indent(true)
, m_temporary_name_count(0)
{ {
} }
...@@ -32,3 +33,13 @@ void codegen::CodeWriter::operator+=(const std::string& s) ...@@ -32,3 +33,13 @@ void codegen::CodeWriter::operator+=(const std::string& s)
{ {
*this << s; *this << s;
} }
std::string codegen::CodeWriter::generate_temporary_name(std::string prefix)
{
std::stringstream ss;
ss << prefix << "__" << m_temporary_name_count;
m_temporary_name_count++;
return ss.str();
}
...@@ -66,7 +66,10 @@ public: ...@@ -66,7 +66,10 @@ public:
return out; return out;
} }
std::string generate_temporary_name(std::string prefix = "tempvar");
private: private:
std::stringstream m_ss; std::stringstream m_ss;
bool m_pending_indent; bool m_pending_indent;
size_t m_temporary_name_count;
}; };
...@@ -30,18 +30,18 @@ inline T ceil_div(T x, T y) ...@@ -30,18 +30,18 @@ inline T ceil_div(T x, T y)
return (x == 0 ? 0 : (1 + (x - 1) / y)); return (x == 0 ? 0 : (1 + (x - 1) / y));
} }
CoordinateTransform::CoordinateTransform(const Shape& source_space_shape, CoordinateTransform::CoordinateTransform(const Shape& source_shape,
const Coordinate& source_start_corner, const Coordinate& source_start_corner,
const Coordinate& source_end_corner, const Coordinate& source_end_corner,
const Strides& source_strides, const Strides& source_strides,
const AxisVector& source_axis_order) const AxisVector& source_axis_order)
: m_source_space_shape(source_space_shape) : m_source_shape(source_shape)
, m_source_start_corner(source_start_corner) , m_source_start_corner(source_start_corner)
, m_source_end_corner(source_end_corner) , m_source_end_corner(source_end_corner)
, m_source_strides(source_strides) , m_source_strides(source_strides)
, m_source_axis_order(source_axis_order) , m_source_axis_order(source_axis_order)
{ {
m_n_axes = source_space_shape.size(); m_n_axes = source_shape.size();
if (m_n_axes != source_start_corner.size()) if (m_n_axes != source_start_corner.size())
{ {
...@@ -80,8 +80,8 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape, ...@@ -80,8 +80,8 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
for (size_t i = 0; i < m_n_axes; i++) for (size_t i = 0; i < m_n_axes; i++)
{ {
if (source_start_corner[i] >= source_space_shape[i] && if (source_start_corner[i] >= source_shape[i] &&
!(source_start_corner[i] == 0 && source_space_shape[i] == 0)) !(source_start_corner[i] == 0 && source_shape[i] == 0))
{ {
std::stringstream ss; std::stringstream ss;
...@@ -92,7 +92,7 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape, ...@@ -92,7 +92,7 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
for (size_t i = 0; i < m_n_axes; i++) for (size_t i = 0; i < m_n_axes; i++)
{ {
if (source_end_corner[i] > source_space_shape[i]) if (source_end_corner[i] > source_shape[i])
{ {
std::stringstream ss; std::stringstream ss;
...@@ -120,7 +120,7 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape, ...@@ -120,7 +120,7 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
} }
} }
AxisVector default_axis_order(size_t n_axes) static AxisVector default_axis_order(size_t n_axes)
{ {
AxisVector result(n_axes); AxisVector result(n_axes);
size_t n = 0; size_t n = 0;
...@@ -129,50 +129,50 @@ AxisVector default_axis_order(size_t n_axes) ...@@ -129,50 +129,50 @@ AxisVector default_axis_order(size_t n_axes)
return result; return result;
} }
CoordinateTransform::CoordinateTransform(const Shape& source_space_shape, CoordinateTransform::CoordinateTransform(const Shape& source_shape,
const Coordinate& source_start_corner, const Coordinate& source_start_corner,
const Coordinate& source_end_corner, const Coordinate& source_end_corner,
const Strides& source_strides) const Strides& source_strides)
: CoordinateTransform(source_space_shape, : CoordinateTransform(source_shape,
source_start_corner, source_start_corner,
source_end_corner, source_end_corner,
source_strides, source_strides,
default_axis_order(source_space_shape.size())) default_axis_order(source_shape.size()))
{ {
} }
Strides default_source_strides(size_t n_axes) static Strides default_source_strides(size_t n_axes)
{ {
return AxisVector(n_axes, 1); return AxisVector(n_axes, 1);
} }
CoordinateTransform::CoordinateTransform(const Shape& source_space_shape, CoordinateTransform::CoordinateTransform(const Shape& source_shape,
const Coordinate& source_start_corner, const Coordinate& source_start_corner,
const Coordinate& source_end_corner) const Coordinate& source_end_corner)
: CoordinateTransform(source_space_shape, : CoordinateTransform(source_shape,
source_start_corner, source_start_corner,
source_end_corner, source_end_corner,
default_source_strides(source_space_shape.size()), default_source_strides(source_shape.size()),
default_axis_order(source_space_shape.size())) default_axis_order(source_shape.size()))
{ {
} }
Coordinate default_source_start_corner(size_t n_axes) static Coordinate default_source_start_corner(size_t n_axes)
{ {
return Coordinate(n_axes, 0); return Coordinate(n_axes, 0);
} }
Coordinate default_source_end_corner(const Shape& source_space_shape) static Coordinate default_source_end_corner(const Shape& source_shape)
{ {
return source_space_shape; return source_shape;
} }
CoordinateTransform::CoordinateTransform(const Shape& source_space_shape) CoordinateTransform::CoordinateTransform(const Shape& source_shape)
: CoordinateTransform(source_space_shape, : CoordinateTransform(source_shape,
default_source_start_corner(source_space_shape.size()), default_source_start_corner(source_shape.size()),
default_source_end_corner(source_space_shape), default_source_end_corner(source_shape),
default_source_strides(source_space_shape.size()), default_source_strides(source_shape.size()),
default_axis_order(source_space_shape.size())) default_axis_order(source_shape.size()))
{ {
} }
...@@ -185,7 +185,7 @@ size_t CoordinateTransform::index_source(const Coordinate& c) const ...@@ -185,7 +185,7 @@ size_t CoordinateTransform::index_source(const Coordinate& c) const
for (size_t axis = m_n_axes; axis-- > 0;) for (size_t axis = m_n_axes; axis-- > 0;)
{ {
index += c[axis] * stride; index += c[axis] * stride;
stride *= m_source_space_shape[axis]; stride *= m_source_shape[axis];
} }
return index; return index;
......
...@@ -41,12 +41,17 @@ namespace ngraph ...@@ -41,12 +41,17 @@ namespace ngraph
const Coordinate& source_start_corner, const Coordinate& source_start_corner,
const Coordinate& source_end_corner); const Coordinate& source_end_corner);
CoordinateTransform(const Shape& source_space_shape); CoordinateTransform(const Shape& source_shape);
size_t index(const Coordinate& c) const; size_t index(const Coordinate& c) const;
bool in_bounds(const Coordinate& c) const; bool in_bounds(const Coordinate& c) const;
Coordinate get_target_shape() const; Coordinate get_target_shape() const;
Shape get_source_shape() { return m_source_shape; }
Coordinate get_source_start_corner() { return m_source_start_corner; }
Coordinate get_source_end_corner() { return m_source_end_corner; }
Strides get_source_strides() { return m_source_strides; }
AxisVector get_source_axis_order() { return m_source_axis_order; }
class Iterator class Iterator
{ {
public: public:
...@@ -73,7 +78,7 @@ namespace ngraph ...@@ -73,7 +78,7 @@ namespace ngraph
Coordinate to_source_coordinate(const Coordinate& c) const; Coordinate to_source_coordinate(const Coordinate& c) const;
size_t index_source(const Coordinate& c) const; size_t index_source(const Coordinate& c) const;
Shape m_source_space_shape; Shape m_source_shape;
Shape m_source_start_corner; Shape m_source_start_corner;
Shape m_source_end_corner; Shape m_source_end_corner;
Strides m_source_strides; Strides m_source_strides;
......
...@@ -34,6 +34,7 @@ ...@@ -34,6 +34,7 @@
#include "ngraph/ops/slice.hpp" #include "ngraph/ops/slice.hpp"
#include "ngraph/ops/sum.hpp" #include "ngraph/ops/sum.hpp"
#include "ngraph/runtime/cpu/cpu_emitter.hpp" #include "ngraph/runtime/cpu/cpu_emitter.hpp"
#include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp"
#include "ngraph/util.hpp" #include "ngraph/util.hpp"
using namespace std; using namespace std;
...@@ -250,6 +251,49 @@ void runtime::cpu::CPU_Emitter::EmitConcat(const ngraph::Node* n, ...@@ -250,6 +251,49 @@ void runtime::cpu::CPU_Emitter::EmitConcat(const ngraph::Node* n,
m_out.indent--; m_out.indent--;
m_out << "}\n"; m_out << "}\n";
} }
else
{
if (m_use_ref_kernels)
{
auto axis = (dynamic_cast<const op::Concat*>(n))->get_concatenation_axis();
std::vector<std::string> arg_names;
std::vector<std::string> arg_shape_strings;
for (auto arg : args)
{
arg_names.push_back(arg.get_name());
arg_shape_strings.push_back("{" + join(arg.get_shape()) + "}");
}
m_out << "kernel::concat<" << out[0].get_type() << ">({" << join(arg_names) << "},\n";
m_out << " " << out[0].get_name() << ",\n";
m_out << " {" << join(arg_shape_strings) << "},\n";
m_out << " {" << join(result_shape) << "},\n";
m_out << " " << axis << ");\n";
}
else
{
auto axis = (dynamic_cast<const op::Concat*>(n))->get_concatenation_axis();
std::vector<std::string> arg_names;
std::vector<Shape> arg_shapes;
for (auto arg : args)
{
arg_names.push_back(arg.get_name());
arg_shapes.push_back(arg.get_shape());
}
kernels::emit_concat(m_out,
args[0].get_element_type().c_type_string(),
arg_names,
out[0].get_name(),
arg_shapes,
result_shape,
axis);
}
}
} }
void runtime::cpu::CPU_Emitter::EmitDivide(const ngraph::Node* n, void runtime::cpu::CPU_Emitter::EmitDivide(const ngraph::Node* n,
......
...@@ -37,10 +37,12 @@ namespace ngraph ...@@ -37,10 +37,12 @@ namespace ngraph
{ {
protected: protected:
codegen::CodeWriter m_out; codegen::CodeWriter m_out;
bool m_use_ref_kernels;
public: public:
CPU_Emitter() CPU_Emitter()
: m_out() : m_out()
, m_use_ref_kernels(std::getenv("NGRAPH_CPU_USE_REF_KERNELS") != nullptr)
{ {
} }
std::string get_code() { return m_out.get_code(); } std::string get_code() { return m_out.get_code(); }
......
...@@ -207,6 +207,7 @@ void runtime::cpu::CPU_ExternalFunction::compile() ...@@ -207,6 +207,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp" #include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp" #include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/kernel/broadcast.hpp" #include "ngraph/runtime/kernel/broadcast.hpp"
#include "ngraph/runtime/kernel/concat.hpp"
#include "ngraph/runtime/kernel/dot.hpp" #include "ngraph/runtime/kernel/dot.hpp"
#include "ngraph/runtime/kernel/one_hot.hpp" #include "ngraph/runtime/kernel/one_hot.hpp"
#include "ngraph/runtime/kernel/reduce.hpp" #include "ngraph/runtime/kernel/reduce.hpp"
......
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp"
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/runtime/cpu/cpu_kernel_utils.hpp"
using namespace ngraph;
using namespace ngraph::runtime::cpu::kernels;
//
// For the reference kernel this is based on, see ngraph/runtime/kernel/concat.hpp.
//
void ngraph::runtime::cpu::kernels::emit_concat(codegen::CodeWriter& writer,
std::string element_type,
const std::vector<std::string> args,
std::string out,
const std::vector<Shape>& in_shapes,
const Shape& out_shape,
size_t concatenation_axis)
{
size_t concatenation_pos = 0;
for (size_t i = 0; i < args.size(); i++)
{
Coordinate out_start_coord = Coordinate(out_shape.size(), 0);
out_start_coord[concatenation_axis] = concatenation_pos;
Coordinate out_end_coord = out_shape;
out_end_coord[concatenation_axis] = concatenation_pos + in_shapes[i][concatenation_axis];
CoordinateTransform input_transform(in_shapes[i]);
CoordinateTransform output_chunk_transform(out_shape, out_start_coord, out_end_coord);
emit_pointwise_copy(
writer, element_type, args[i], out, input_transform, output_chunk_transform);
concatenation_pos += in_shapes[i][concatenation_axis];
}
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/common.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernels
{
void emit_concat(codegen::CodeWriter& writer,
std::string element_type,
const std::vector<std::string> args,
std::string out,
const std::vector<Shape>& in_shapes,
const Shape& out_shape,
size_t concatenation_axis);
}
}
}
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include "ngraph/runtime/cpu/cpu_kernel_utils.hpp"
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/common.hpp"
#include "ngraph/coordinate_transform.hpp"
#include "ngraph/util.hpp"
using namespace ngraph;
using namespace ngraph::runtime::cpu::kernels;
//
// Given a coordinate transform and a vector of index expressions relative to
// the target coordinate space, produces the strings needed to index into the
// source coordinate space if it is represented as a multidimensional array.
//
// For example,
//
// trans has stride (2,2,2), axis order (2,0,1), and start offsets (3,4,5)
//
// index_vars are "i", "j", "k"
//
// this will produce:
//
// {"((k) * 2 + 5)", "((i) * 2 + 3)", "((j) * 2 + 4)"}
//
//
std::vector<std::string>
ngraph::runtime::cpu::kernels::emit_multi_indices(CoordinateTransform trans,
std::vector<std::string> index_vars)
{
std::vector<std::string> result;
for (size_t i = 0; i < index_vars.size(); i++)
{
std::string index_var = index_vars[trans.get_source_axis_order()[i]];
size_t source_stride = trans.get_source_strides()[i];
size_t source_start = trans.get_source_start_corner()[i];
std::stringstream ss;
if (source_stride == 1 && source_start == 0)
{
ss << index_var;
}
else if (source_stride == 1)
{
ss << "((" << index_var << ") + " << source_start << ")";
}
else if (source_start == 0)
{
ss << "(" << source_stride << " * (" << index_var << "))";
}
else
{
ss << "(" << source_stride << " * (" << index_var << ") + " << source_start << ")";
}
result.push_back(ss.str());
}
return result;
}
//
// Given a coordinate transform and a vector of index expressions relative to
// the target coordinate space, produces the strings needed to index into the
// source coordinate space if it is represented as a multidimensional array.
//
// For example,
//
// trans has source shape (2,2,2) stride (2,2,2), axis order (2,0,1),
// and start offsets (3,4,5)
//
// index_vars are "i", "j", "k"
//
// this will produce:
//
// "((4 * ((k) * 2 + 5)) + (2 * ((i) * 2 + 3)) + ((j) * 2 + 4))"
//
//
std::string ngraph::runtime::cpu::kernels::emit_linear_index(CoordinateTransform trans,
std::vector<std::string> index_vars)
{
std::vector<std::string> multi_indices = emit_multi_indices(trans, index_vars);
size_t stride = 1;
for (size_t i = index_vars.size(); i-- > 0;)
{
// No need to do this (multiply by stride) if it's 1, though it wouldn't hurt anything.
if (stride != 1)
{
std::stringstream ss;
ss << "(" << stride << " * " << multi_indices[i] << ")";
multi_indices[i] = ss.str();
}
stride *= trans.get_source_shape()[i];
}
std::stringstream ss;
ss << "(" << join(multi_indices, " + ") << ")";
return ss.str();
}
//
// Begins an indexing loop (just a for-loop) with index_var as the index
// variable, starting at start, continuing while [index_var] < [end].
//
// Optionally emits an OpenMP parallel pragma, if "omp" is true.
//
std::string ngraph::runtime::cpu::kernels::start_index_loop(std::string index_var,
size_t start,
size_t end,
bool omp)
{
std::stringstream ss;
if (omp)
{
ss << "#pragma omp parallel for\n";
}
ss << "for(size_t " << index_var << " = " << start << "; " << index_var << " < " << end << "; "
<< index_var << "++)\n"
<< "{\n";
return ss.str();
}
//
// Ends an indexing loop on the index variable [index_var].
//
std::string ngraph::runtime::cpu::kernels::end_index_loop(std::string index_var)
{
std::stringstream ss;
ss << "} // end for(" << index_var << ")\n";
return ss.str();
}
std::string ngraph::runtime::cpu::kernels::emit_nd_sizes(CoordinateTransform trans)
{
std::stringstream ss;
for (size_t s : trans.get_source_shape())
{
ss << "[" << s << "]";
}
return ss.str();
}
std::string ngraph::runtime::cpu::kernels::emit_nd_index(CoordinateTransform trans,
std::vector<std::string> index_vars)
{
std::stringstream ss;
for (std::string index : emit_multi_indices(trans, index_vars))
{
ss << "[" << index << "]";
}
return ss.str();
}
//
// Emits a pointwise copy from source_buffer mediated by in_trans, to
// dest_buffer mediated by dest_trans.
//
void ngraph::runtime::cpu::kernels::emit_pointwise_copy(codegen::CodeWriter& writer,
std::string element_type,
std::string source_buffer,
std::string dest_buffer,
CoordinateTransform source_trans,
CoordinateTransform dest_trans)
{
std::vector<std::string> index_vars;
Shape source_start_corner = source_trans.get_source_start_corner();
Shape source_end_corner = source_trans.get_source_end_corner();
size_t n_axes = source_start_corner.size();
std::string source_nd_name = writer.generate_temporary_name("source_nd");
std::string dest_nd_name = writer.generate_temporary_name("dest_nd");
writer << element_type << "(&" << source_nd_name << ")" << emit_nd_sizes(source_trans)
<< " = *reinterpret_cast<" << element_type << "(*)" << emit_nd_sizes(source_trans)
<< ">(" << source_buffer << ");\n";
writer << element_type << "(&" << dest_nd_name << ")" << emit_nd_sizes(dest_trans)
<< " = *reinterpret_cast<" << element_type << "(*)" << emit_nd_sizes(dest_trans) << ">("
<< dest_buffer << ");\n";
for (size_t i = 0; i < n_axes; i++)
{
std::string index_var = writer.generate_temporary_name("i");
writer << start_index_loop(index_var, source_start_corner[i], source_end_corner[i], i == 0);
writer.indent++;
index_vars.push_back(index_var);
}
writer << dest_nd_name << emit_nd_index(dest_trans, index_vars) << " = " << source_nd_name
<< emit_nd_index(source_trans, index_vars) << ";\n";
for (size_t i = n_axes; i-- > 0;)
{
writer.indent--;
writer << end_index_loop(index_vars[i]);
}
}
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/common.hpp"
#include "ngraph/coordinate_transform.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernels
{
std::vector<std::string> emit_multi_indices(CoordinateTransform trans,
std::vector<std::string> index_vars);
std::string emit_linear_index(CoordinateTransform trans,
std::vector<std::string> index_vars);
std::string
start_index_loop(std::string index_var, size_t start, size_t end, bool omp);
std::string end_index_loop(std::string index_var);
std::string emit_nd_sizes(CoordinateTransform trans);
std::string emit_nd_index(CoordinateTransform trans,
std::vector<std::string> index_vars);
void emit_pointwise_copy(codegen::CodeWriter& writer,
std::string element_type,
std::string source_buffer,
std::string dest_buffer,
CoordinateTransform source_trans,
CoordinateTransform dest_trans);
}
}
}
}
...@@ -22,7 +22,6 @@ include_directories( ...@@ -22,7 +22,6 @@ include_directories(
) )
set (SRC set (SRC
backend_performance.cpp
builder.cpp builder.cpp
builder_autobroadcast.cpp builder_autobroadcast.cpp
build_graph.cpp build_graph.cpp
...@@ -69,7 +68,7 @@ endif() ...@@ -69,7 +68,7 @@ endif()
if(NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR) if(NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR)
include_directories(SYSTEM ${LLVM_INCLUDE_DIR}) include_directories(SYSTEM ${LLVM_INCLUDE_DIR})
link_directories(${LLVM_LIB_DIR}) link_directories(${LLVM_LIB_DIR})
set(SRC ${SRC} codegen.cpp) set(SRC ${SRC} backend_performance.cpp codegen.cpp)
set(BACKEND_NAMES ${BACKEND_NAMES} "CPU") set(BACKEND_NAMES ${BACKEND_NAMES} "CPU")
endif() endif()
......
...@@ -22,6 +22,7 @@ ...@@ -22,6 +22,7 @@
#include "ngraph/codegen/execution_engine.hpp" #include "ngraph/codegen/execution_engine.hpp"
#include "ngraph/file_util.hpp" #include "ngraph/file_util.hpp"
#include "ngraph/log.hpp" #include "ngraph/log.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/runtime/backend.hpp" #include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/call_frame.hpp" #include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_call_frame.hpp" #include "ngraph/runtime/cpu/cpu_call_frame.hpp"
...@@ -33,6 +34,13 @@ ...@@ -33,6 +34,13 @@
using namespace std; using namespace std;
using namespace ngraph; using namespace ngraph;
template <typename T>
static void copy_data(shared_ptr<runtime::TensorView> tv, const vector<T>& data)
{
size_t data_size = data.size() * sizeof(T);
tv->write(data.data(), 0, data_size);
}
// Starting point CPU: 1.2ms/iteration // Starting point CPU: 1.2ms/iteration
shared_ptr<runtime::TensorView> make_tensor(runtime::Backend& backend, const ValueType& value) shared_ptr<runtime::TensorView> make_tensor(runtime::Backend& backend, const ValueType& value)
...@@ -124,3 +132,115 @@ TEST(benchmark, mxnet_10_bucket_lstm) ...@@ -124,3 +132,115 @@ TEST(benchmark, mxnet_10_bucket_lstm)
NGRAPH_INFO << p.name() << ", " << p.total_microseconds(); NGRAPH_INFO << p.name() << ", " << p.total_microseconds();
} }
} }
//
// Benchmarks a graph that concatenates six 32x1x200 arrays along the middle axis.
//
TEST(benchmark, concat_32x1x200_axis1_6)
{
const size_t n_arrays = 6;
Shape shape_of_each_array = Shape{32, 1, 200};
size_t concatenation_axis = 1;
Shape result_shape;
result_shape = shape_of_each_array;
result_shape[concatenation_axis] *= n_arrays;
size_t elements_per_array = 1;
for (size_t d : shape_of_each_array)
{
elements_per_array *= d;
}
vector<vector<float>> data_arrays(n_arrays);
for (size_t i = 0; i < n_arrays; i++)
{
data_arrays[i] = vector<float>(elements_per_array);
for (size_t j = 0; j < elements_per_array; j++)
{
data_arrays[i][j] = float(j + 1);
}
}
bool using_ref_kernels = (std::getenv("NGRAPH_CPU_USE_REF_KERNELS") != nullptr);
vector<std::string> backend_names{"INTERPRETER", "NGVM", "CPU"};
vector<int> n_runs{200, 200, using_ref_kernels ? 200 : 200000}; // one for each backend
vector<std::function<void()>> test_callbacks; // one for each backend
vector<std::shared_ptr<runtime::TensorView>> result_tvs; // one for each backend
for (std::string backend_name : backend_names)
{
vector<std::shared_ptr<op::Parameter>> params(n_arrays);
vector<std::shared_ptr<Node>> params_as_nodes(n_arrays);
for (size_t i = 0; i < n_arrays; i++)
{
auto param = make_shared<op::Parameter>(
make_shared<TensorViewType>(element::Float32::element_type(), shape_of_each_array));
params[i] = param;
params_as_nodes[i] = param;
}
auto concat = make_shared<op::Concat>(params_as_nodes, concatenation_axis);
auto f = make_shared<Function>(concat, params);
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
vector<shared_ptr<runtime::Value>> input_vals;
for (size_t i = 0; i < n_arrays; i++)
{
auto tv = backend->make_primary_tensor_view(element::Float32::element_type(),
shape_of_each_array);
copy_data(tv, data_arrays[i]);
input_vals.push_back(tv);
}
auto result_tv =
backend->make_primary_tensor_view(element::Float32::element_type(), result_shape);
result_tvs.push_back(result_tv);
std::function<void()> cb = [input_vals, result_tv, cf]() {
cf->call(input_vals, {result_tv});
};
test_callbacks.push_back(cb);
}
for (size_t i = 0; i < backend_names.size(); i++)
{
std::cout << backend_names[i] << ": " << n_runs[i] << " tests in " << std::flush;
stopwatch sw;
std::function<void()> cb = test_callbacks[i];
sw.start();
for (int j = 0; j < n_runs[i]; j++)
{
cb();
}
sw.stop();
std::cout << sw.get_milliseconds() << "ms (" << (sw.get_microseconds() / n_runs[i])
<< " us/test)" << std::endl;
}
for (size_t i = 1; i < backend_names.size(); i++)
{
std::cout << "Verifying " << backend_names[i] << " result against " << backend_names[0]
<< "..." << std::flush;
if (result_tvs[i]->get_vector<float>() == result_tvs[0]->get_vector<float>())
{
std::cout << " OK" << std::endl;
}
else
{
std::cout << " FAILED" << std::endl;
ADD_FAILURE();
}
}
}
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment