Unverified Commit 9ede40cc authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Merge branch 'master' into tfl/gpu_framework_codegen

parents ba9a2a25 3380928c
......@@ -58,6 +58,7 @@ nervana_aeon.egg-info/
# vim
*.swp
*.swo
tags
build/
scripts/
......
......@@ -18,3 +18,4 @@ set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DIN_NGRAPH_LIBRARY")
add_subdirectory(resource)
add_subdirectory(ngraph)
add_subdirectory(tools)
......@@ -76,6 +76,7 @@ set (SRC
ops/sum.cpp
ops/tan.cpp
ops/tanh.cpp
ops/util/arithmetic_reduction.cpp
ops/util/binary_elementwise_arithmetic.cpp
ops/util/binary_elementwise_comparison.cpp
ops/util/binary_elementwise.cpp
......@@ -175,6 +176,8 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/cpu_tensor_view_wrapper.cpp
runtime/cpu/cpu_layout_descriptor.cpp
runtime/cpu/cpu_tracing.cpp
runtime/cpu/mkldnn_emitter.cpp
runtime/cpu/mkldnn_invoke.cpp
runtime/cpu/mkldnn_utils.cpp
runtime/cpu/ops/convert_layout.cpp
runtime/cpu/ops/matmul_bias.cpp
......
......@@ -90,8 +90,10 @@
#include "ngraph/ops/less.hpp"
#include "ngraph/ops/less_eq.hpp"
#include "ngraph/ops/log.hpp"
#include "ngraph/ops/max.hpp"
#include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/min.hpp"
#include "ngraph/ops/minimum.hpp"
#include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/negative.hpp"
......@@ -102,6 +104,7 @@
#include "ngraph/ops/pad.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/power.hpp"
#include "ngraph/ops/product.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reduce_window.hpp"
#include "ngraph/ops/remainder.hpp"
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/ops/util/arithmetic_reduction.hpp"
namespace ngraph
{
namespace op
{
/// \brief Max-reduction operation.
///
/// Reduces the tensor, eliminating the specified reduction axes by taking the maximum element.
///
/// This is equivalent to Reduce where `arg_init` = -inf and `reduction_function` is \f$f(x,y) = max(x,y)\f$.
///
/// ## Parameters
///
/// | | Description |
/// | -------------------- | -------------------------------------------- |
/// | `reduction_axes` | The axes to eliminate through max-reduction. |
///
/// ## Inputs
///
/// | | Type | Description |
/// | ----- | --------------------------------- | ------------------------------------------------------ |
/// | `arg` | \f$N[d_1,\dots,d_n]~(n \geq 0)\f$ | An input tensor of any shape and numeric element type. |
///
/// ## Output
///
/// | Type | Description |
/// | ----------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
/// | \f$N[\textit{delete}(A,d_1,\dots,d_n)]\f$ | The tensor \f$T\f$, where \f$T\f$ is the input tensor with the `reduction_axes` \f$A\f$ eliminated by taking the maximum element. |
class Max : public util::ArithmeticReduction
{
public:
/// \brief Constructs a max-reduction operation.
///
/// \param arg The tensor view to be reduced.
/// \param reduction_axes The axis positions (0-based) to be eliminated.
Max(const std::shared_ptr<Node>& arg, const AxisSet& reduction_axes)
: ArithmeticReduction("Max", arg, reduction_axes)
{
}
virtual std::shared_ptr<Node> copy_with_new_args(
const std::vector<std::shared_ptr<Node>>& new_args) const override
{
if (new_args.size() != 1)
{
throw ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<Max>(new_args.at(0), m_reduction_axes);
}
};
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/ops/util/arithmetic_reduction.hpp"
namespace ngraph
{
namespace op
{
/// \brief Min-reduction operation.
///
/// Reduces the tensor, eliminating the specified reduction axes by taking the minimum element.
///
/// This is equivalent to Reduce where `arg_init` = -inf and `reduction_function` is \f$f(x,y) = min(x,y)\f$.
///
/// ## Parameters
///
/// | | Description |
/// | -------------------- | -------------------------------------------- |
/// | `reduction_axes` | The axes to eliminate through min-reduction. |
///
/// ## Inputs
///
/// | | Type | Description |
/// | ----- | --------------------------------- | ------------------------------------------------------ |
/// | `arg` | \f$N[d_1,\dots,d_n]~(n \geq 0)\f$ | An input tensor of any shape and numeric element type. |
///
/// ## Output
///
/// | Type | Description |
/// | ----------------------------------------- | --------------------------------------------------------------------------------------------------------------------------------- |
/// | \f$N[\textit{delete}(A,d_1,\dots,d_n)]\f$ | The tensor \f$T\f$, where \f$T\f$ is the input tensor with the `reduction_axes` \f$A\f$ eliminated by taking the minimum element. |
class Min : public util::ArithmeticReduction
{
public:
/// \brief Constructs a min-reduction operation.
///
/// \param arg The tensor view to be reduced.
/// \param reduction_axes The axis positions (0-based) to be eliminated.
Min(const std::shared_ptr<Node>& arg, const AxisSet& reduction_axes)
: ArithmeticReduction("Min", arg, reduction_axes)
{
}
virtual std::shared_ptr<Node> copy_with_new_args(
const std::vector<std::shared_ptr<Node>>& new_args) const override
{
if (new_args.size() != 1)
{
throw ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<Min>(new_args.at(0), m_reduction_axes);
}
};
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/ops/util/arithmetic_reduction.hpp"
namespace ngraph
{
namespace op
{
/// \brief Product reduction operation.
///
/// Reduces the tensor, eliminating the specified reduction axes by taking the product.
///
/// \f[
/// \mathit{product}\left(\{0\},
/// \left[ \begin{array}{ccc}
/// 1 & 2 \\
/// 3 & 4 \\
/// 5 & 6 \end{array} \right]\right) =
/// \left[ (1 * 3 * 5), (2 * 4 * 6) \right] =
/// \left[ 15, 48 \right]~~~\text{(dimension 0 (rows) is eliminated)}
/// \f]
///
/// \f[
/// \mathit{product}\left(\{1\},
/// \left[ \begin{array}{ccc}
/// 1 & 2 \\
/// 3 & 4 \\
/// 5 & 6 \end{array} \right]\right) =
/// \left[ (1 * 2), (3 * 4), (5 * 6) \right] =
/// \left[ 2, 12, 30 \right]~~~\text{(dimension 1 (columns) is eliminated)}
/// \f]
///
/// \f[
/// \mathit{product}\left(\{0,1\},
/// \left[ \begin{array}{ccc}
/// 1 & 2 \\
/// 3 & 4 \\
/// 5 & 6 \end{array} \right]\right) =
/// (1 * 2) * (3 * 4) * (5 * 6) =
/// 720~~~\text{(both dimensions (rows and columns) are eliminated)}
/// \f]
///
/// This is equivalent to Reduce where `arg_init` = 1 and `reduction_function` is \f$f(x,y) = x*y\f$.
///
/// ## Parameters
///
/// | | Description |
/// | -------------------- | -------------------------------------- |
/// | `reduction_axes` | The axes to eliminate through product. |
///
/// ## Inputs
///
/// | | Type | Description |
/// | ----- | --------------------------------- | ------------------------------------------------------ |
/// | `arg` | \f$N[d_1,\dots,d_n]~(n \geq 0)\f$ | An input tensor of any shape and numeric element type. |
///
/// ## Output
///
/// | Type | Description |
/// | ----------------------------------------- | -------------------------------------------------------------------------------------------------------------- |
/// | \f$N[\textit{delete}(A,d_1,\dots,d_n)]\f$ | The tensor \f$T\f$, where \f$T\f$ is the input tensor with the `reduction_axes` \f$A\f$ eliminated by product. |
class Product : public util::ArithmeticReduction
{
public:
/// \brief Constructs a product reduction operation.
///
/// \param arg The tensor view to be reduced.
/// \param reduction_axes The axis positions (0-based) to be eliminated.
Product(const std::shared_ptr<Node>& arg, const AxisSet& reduction_axes)
: ArithmeticReduction("Product", arg, reduction_axes)
{
}
virtual std::shared_ptr<Node> copy_with_new_args(
const std::vector<std::shared_ptr<Node>>& new_args) const override
{
if (new_args.size() != 1)
{
throw ngraph_error("Incorrect number of new arguments");
}
return std::make_shared<Product>(new_args.at(0), m_reduction_axes);
}
};
}
}
......@@ -15,46 +15,11 @@
*******************************************************************************/
#include "ngraph/ops/sum.hpp"
#include "ngraph/function.hpp"
#include "ngraph/ops/broadcast.hpp"
using namespace std;
using namespace ngraph;
op::Sum::Sum(const std::shared_ptr<Node>& arg, const AxisSet& reduction_axes)
: RequiresTensorViewArgs("Sum", {arg})
, m_reduction_axes(reduction_axes)
{
auto& input = get_inputs().at(0);
auto& input_element_type = input.get_element_type();
if (input_element_type == element::boolean)
{
throw ngraph_error("Argument for sum must have numeric element type");
}
auto input_shape = input.get_shape();
for (auto axis : m_reduction_axes)
{
if (axis >= input_shape.size())
{
throw ngraph_error("Reduction axis for sum is out of bounds");
}
}
Shape result_shape;
for (size_t i = 0; i < input_shape.size(); i++)
{
if (m_reduction_axes.count(i) == 0)
{
result_shape.push_back(input_shape.at(i));
}
}
set_value_type_checked(input.get_element_type(), result_shape);
}
void op::Sum::generate_adjoints(autodiff::Adjoints& adjoints, const std::shared_ptr<Node>& delta)
{
auto x = get_inputs().at(0).get_output().get_node();
......
......@@ -16,7 +16,7 @@
#pragma once
#include "ngraph/ops/util/requires_tensor_view_args.hpp"
#include "ngraph/ops/util/arithmetic_reduction.hpp"
namespace ngraph
{
......@@ -76,14 +76,17 @@ namespace ngraph
/// | Type | Description |
/// | ----------------------------------------- | ---------------------------------------------------------------------------------------------------------------- |
/// | \f$N[\textit{delete}(A,d_1,\dots,d_n)]\f$ | The tensor \f$T\f$, where \f$T\f$ is the input tensor with the `reduction_axes` \f$A\f$ eliminated by summation. |
class Sum : public util::RequiresTensorViewArgs
class Sum : public util::ArithmeticReduction
{
public:
/// \brief Constructs a summation operation.
///
/// \param arg The tensor view to be summed.
/// \param reduction_axes The axis positions (0-based) to be eliminated.
Sum(const std::shared_ptr<Node>& arg, const AxisSet& reduction_axes);
Sum(const std::shared_ptr<Node>& arg, const AxisSet& reduction_axes)
: ArithmeticReduction("Sum", arg, reduction_axes)
{
}
virtual std::shared_ptr<Node> copy_with_new_args(
const std::vector<std::shared_ptr<Node>>& new_args) const override
......@@ -95,13 +98,9 @@ namespace ngraph
return std::make_shared<Sum>(new_args.at(0), m_reduction_axes);
}
/// \return The axis positions (0-based) to be eliminated through summation.
const AxisSet& get_reduction_axes() const { return m_reduction_axes; }
protected:
virtual void generate_adjoints(autodiff::Adjoints& adjoints,
const std::shared_ptr<Node>& delta) override;
AxisSet m_reduction_axes;
};
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/ops/util/arithmetic_reduction.hpp"
using namespace std;
using namespace ngraph;
op::util::ArithmeticReduction::ArithmeticReduction(const std::string& node_type,
const std::shared_ptr<Node>& arg,
const AxisSet& reduction_axes)
: RequiresTensorViewArgs(node_type, {arg})
, m_reduction_axes(reduction_axes)
{
auto& input = get_inputs().at(0);
auto input_shape = input.get_shape();
for (auto axis : m_reduction_axes)
{
if (axis >= input_shape.size())
{
throw ngraph_error("Reduction axis for arithmetic reduction operator is out of bounds");
}
}
Shape result_shape;
for (size_t i = 0; i < input_shape.size(); i++)
{
if (m_reduction_axes.count(i) == 0)
{
result_shape.push_back(input_shape.at(i));
}
}
set_value_type_checked(input.get_element_type(), result_shape);
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/ops/util/requires_tensor_view_args.hpp"
namespace ngraph
{
namespace op
{
namespace util
{
/// \brief Abstract base class for arithmetic reduction operations, i.e., operations where chosen axes of the input tensors
/// are eliminated (reduced out) by repeated application of a particular binary arithmetic operation.
class ArithmeticReduction : public RequiresTensorViewArgs
{
public:
/// \brief Constructs an arithmetic reduction operation.
///
/// \param arg Node that produces the first input tensor.
/// \param reduction_axes The axis positions (0-based) to be eliminated.
ArithmeticReduction(const std::string& node_type,
const std::shared_ptr<Node>& arg,
const AxisSet& reduction_axes);
/// \return The axis positions (0-based) to be eliminated through reduction.
const AxisSet& get_reduction_axes() const { return m_reduction_axes; }
protected:
AxisSet m_reduction_axes;
};
}
}
}
......@@ -142,6 +142,8 @@ void runtime::cpu::CPU_CallFrame::setup_runtime_context()
{
ctx->op_durations = new int64_t[m_external_function->get_op_attrs().size()];
}
const auto& mkldnn_emitter = m_external_function->get_mkldnn_emitter();
ctx->mkldnn_primitives = mkldnn_emitter->get_mkldnn_primitives().data();
}
void runtime::cpu::CPU_CallFrame::cleanup_runtime_context()
......
This diff is collapsed.
......@@ -61,8 +61,10 @@
#include "ngraph/ops/less.hpp"
#include "ngraph/ops/less_eq.hpp"
#include "ngraph/ops/log.hpp"
#include "ngraph/ops/max.hpp"
#include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/min.hpp"
#include "ngraph/ops/minimum.hpp"
#include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/negative.hpp"
......@@ -73,6 +75,7 @@
#include "ngraph/ops/pad.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/power.hpp"
#include "ngraph/ops/product.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reduce_window.hpp"
#include "ngraph/ops/remainder.hpp"
......@@ -223,6 +226,9 @@ static const runtime::cpu::OpMap dispatcher{
{TI(ngraph::op::Pad), &runtime::cpu::CPU_Emitter::emit<op::Pad>},
{TI(ngraph::op::BatchNorm), &runtime::cpu::CPU_Emitter::emit<op::BatchNorm>},
{TI(ngraph::op::MaxPoolBackprop), &runtime::cpu::CPU_Emitter::emit<op::MaxPoolBackprop>},
{TI(ngraph::op::Product), &runtime::cpu::CPU_Emitter::emit<op::Product>},
{TI(ngraph::op::Max), &runtime::cpu::CPU_Emitter::emit<op::Max>},
{TI(ngraph::op::Min), &runtime::cpu::CPU_Emitter::emit<op::Min>},
};
runtime::cpu::CPU_ExternalFunction::CPU_ExternalFunction(
......@@ -243,6 +249,8 @@ void runtime::cpu::CPU_ExternalFunction::compile()
string function_name = m_function->get_name();
m_mkldnn_emitter.reset(new MKLDNNEmitter(shared_from_this()));
ngraph::pass::Manager pass_manager;
pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();
......@@ -278,15 +286,19 @@ void runtime::cpu::CPU_ExternalFunction::compile()
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/kernel/avg_pool.hpp"
#include "ngraph/runtime/kernel/broadcast.hpp"
#include "ngraph/runtime/kernel/concat.hpp"
#include "ngraph/runtime/kernel/convolution.hpp"
#include "ngraph/runtime/kernel/dot.hpp"
#include "ngraph/runtime/kernel/max.hpp"
#include "ngraph/runtime/kernel/max_pool.hpp"
#include "ngraph/runtime/kernel/min.hpp"
#include "ngraph/runtime/kernel/not.hpp"
#include "ngraph/runtime/kernel/one_hot.hpp"
#include "ngraph/runtime/kernel/pad.hpp"
#include "ngraph/runtime/kernel/product.hpp"
#include "ngraph/runtime/kernel/reduce.hpp"
#include "ngraph/runtime/kernel/reduce_window.hpp"
#include "ngraph/runtime/kernel/replace_slice.hpp"
......
......@@ -31,6 +31,7 @@
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_emitter.hpp"
#include "ngraph/runtime/external_function.hpp"
namespace ngraph
......@@ -80,6 +81,11 @@ namespace ngraph
const LayoutDescriptorPtrs& get_result_layout_descriptors();
const std::vector<OpAttributes>& get_op_attrs() const { return m_op_attrs; }
const std::unique_ptr<MKLDNNEmitter>& get_mkldnn_emitter() const
{
return m_mkldnn_emitter;
}
protected:
void compile();
......@@ -115,6 +121,8 @@ namespace ngraph
LayoutDescriptorPtrs parameter_layout_descriptors;
LayoutDescriptorPtrs result_layout_descriptors;
std::vector<OpAttributes> m_op_attrs;
std::unique_ptr<MKLDNNEmitter> m_mkldnn_emitter;
};
}
}
......
......@@ -17,6 +17,11 @@
#include <chrono>
#include <cstdint>
namespace mkldnn
{
class primitive;
}
namespace ngraph
{
namespace runtime
......@@ -31,6 +36,7 @@ namespace ngraph
struct CPURuntimeContext
{
int64_t* op_durations;
mkldnn::primitive* const* mkldnn_primitives;
};
}
}
......
......@@ -56,7 +56,16 @@ runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_
throw ngraph_error("Error allocating CPU Tensor View memory");
}
buffer = static_cast<char*>(ptr);
// GCC major versions below 5 do not implement C++11 std::align
#if !defined(__GNUC__) || __GNUC__ >= 5
std::align(BufferAlignment, buffer_size, ptr, allocation_size);
#else
ptr = static_cast<char*>(ptr) + (BufferAlignment - 1);
ptr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(ptr) &
~(uintptr_t(BufferAlignment - 1)));
#endif
aligned_buffer = static_cast<char*>(ptr);
}
}
......
......@@ -69,3 +69,9 @@ bool runtime::cpu::TensorViewWrapper::is_output() const
{
return m_tensor_view->get_tensor().is_output();
}
const std::shared_ptr<descriptor::TensorView>
runtime::cpu::TensorViewWrapper::get_tensor_view() const
{
return m_tensor_view;
}
......@@ -45,6 +45,7 @@ public:
const std::string& get_name() const;
const std::string& get_type() const;
bool is_output() const;
const std::shared_ptr<descriptor::TensorView> get_tensor_view() const;
private:
std::shared_ptr<descriptor::TensorView> m_tensor_view;
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <memory>
#include "mkldnn_emitter.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
using namespace ngraph::runtime::cpu;
const std::vector<mkldnn::primitive*>& MKLDNNEmitter::get_mkldnn_primitives() const
{
return mkldnn_primitives;
}
size_t MKLDNNEmitter::insert_primitive(mkldnn::primitive* primitive)
{
mkldnn_primitives.emplace_back(primitive);
return (mkldnn_primitives.size() - 1);
}
const std::vector<size_t>& MKLDNNEmitter::get_primitive_deps(size_t index) const
{
return primitive_deps.at(index);
}
mkldnn::memory::desc MKLDNNEmitter::build_memory_descriptor(const TensorViewWrapper& tvw,
mkldnn::memory::format fmt) const
{
return mkldnn::memory::desc(
mkldnn::memory::dims(tvw.get_shape().begin(), tvw.get_shape().end()),
mkldnn_utils::GetDataType(tvw.get_element_type()),
fmt);
}
mkldnn::memory::desc MKLDNNEmitter::build_memory_descriptor(const TensorViewWrapper& tvw) const
{
auto layout =
std::static_pointer_cast<LayoutDescriptor>(tvw.get_tensor_view()->get_tensor_view_layout());
return build_memory_descriptor(tvw, layout->get_mkldnn_format());
}
mkldnn::memory MKLDNNEmitter::build_memory_primitive(const TensorViewWrapper& tvw) const
{
return mkldnn::memory({build_memory_descriptor(tvw), mkldnn_utils::global_cpu_engine}, nullptr);
}
size_t MKLDNNEmitter::build_memory_primitive(const mkldnn::memory::desc& desc)
{
// The MKL-DNN C++ API forces proper initialization of a memory primitive
// with a non-null pointer (unlike the C API)
// Primitives are initialized at runtime so we use a known-invalid address here
// to bypass this check
return insert_primitive(
new mkldnn::memory({desc, mkldnn_utils::global_cpu_engine}, reinterpret_cast<void*>(0x42)));
}
size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above)
{
size_t input_data_index = build_memory_primitive(input_data_desc);
size_t weights_index = build_memory_primitive(weights_desc);
size_t result_index = build_memory_primitive(result_desc);
size_t conv_index = insert_primitive(new mkldnn::convolution_forward(
{{mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
input_data_desc,
weights_desc,
result_desc,
mkldnn::memory::dims(strides.begin(), strides.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero},
mkldnn_utils::global_cpu_engine},
*mkldnn_primitives[input_data_index],
*mkldnn_primitives[weights_index],
*mkldnn_primitives[result_index]));
primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
return conv_index;
}
size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above)
{
size_t input_data_index = build_memory_primitive(input_data_desc);
size_t weights_index = build_memory_primitive(weights_desc);
size_t result_index = build_memory_primitive(result_desc);
size_t conv_index = insert_primitive(new mkldnn::convolution_forward(
{{mkldnn::prop_kind::forward,
mkldnn::algorithm::convolution_direct,
input_data_desc,
weights_desc,
result_desc,
mkldnn::memory::dims(strides.begin(), strides.end()),
mkldnn::memory::dims(dilation_strides.begin(), dilation_strides.end()),
mkldnn::memory::dims(padding_below.begin(), padding_below.end()),
mkldnn::memory::dims(padding_above.begin(), padding_above.end()),
mkldnn::padding_kind::zero},
mkldnn_utils::global_cpu_engine},
*mkldnn_primitives[input_data_index],
*mkldnn_primitives[weights_index],
*mkldnn_primitives[result_index]));
primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
return conv_index;
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <memory>
#include <unordered_map>
#include <vector>
#include <mkldnn.hpp>
#include "ngraph/common.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
class CPU_ExternalFunction;
class TensorViewWrapper;
class MKLDNNEmitter
{
public:
MKLDNNEmitter(std::shared_ptr<CPU_ExternalFunction> ef)
: external_function(ef)
{
}
const std::vector<mkldnn::primitive*>& get_mkldnn_primitives() const;
size_t insert_primitive(mkldnn::primitive* primitive);
const std::vector<size_t>& get_primitive_deps(size_t index) const;
// TODO(jmenon): Get rid of TensorViewWrappers at some point
mkldnn::memory::desc build_memory_descriptor(const TensorViewWrapper& tvw,
mkldnn::memory::format fmt) const;
mkldnn::memory::desc build_memory_descriptor(const TensorViewWrapper& tvw) const;
mkldnn::memory build_memory_primitive(const TensorViewWrapper& tvw) const;
size_t build_memory_primitive(const mkldnn::memory::desc& desc);
size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above);
size_t build_convolution_forward(const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc,
const mkldnn::memory::desc& result_desc,
const ngraph::Strides& strides,
const ngraph::Strides& dilation_strides,
const ngraph::CoordinateDiff& padding_below,
const ngraph::CoordinateDiff& padding_above);
private:
std::shared_ptr<CPU_ExternalFunction> external_function;
std::vector<mkldnn::primitive*> mkldnn_primitives;
std::vector<mkldnn::stream> mkldnn_streams;
std::unordered_map<size_t, std::vector<size_t>> primitive_deps;
};
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <mkldnn.hpp>
#include "mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
mkldnn::engine ngraph::runtime::cpu::mkldnn_utils::global_cpu_engine(mkldnn::engine::cpu, 0);
extern "C" void ngraph::runtime::cpu::mkldnn_utils::set_memory_ptr(CPURuntimeContext* ctx,
size_t primitive_index,
void* ptr)
{
auto primitive = static_cast<mkldnn::memory*>(ctx->mkldnn_primitives[primitive_index]);
primitive->set_data_handle(ptr);
}
extern "C" void ngraph::runtime::cpu::mkldnn_utils::mkldnn_invoke_primitive(CPURuntimeContext* ctx,
size_t primitive_index)
{
mkldnn::stream s(mkldnn::stream::kind::eager);
s.submit({*ctx->mkldnn_primitives[primitive_index]}).wait();
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <cstddef>
namespace ngraph
{
namespace runtime
{
namespace cpu
{
struct CPURuntimeContext;
namespace mkldnn_utils
{
extern "C" void
set_memory_ptr(CPURuntimeContext* ctx, size_t primitive_index, void* ptr);
extern "C" void mkldnn_invoke_primitive(CPURuntimeContext* ctx,
size_t primitive_index);
}
}
}
}
......@@ -17,6 +17,7 @@
#include <string>
#include <typeindex>
#include <typeinfo>
#include <unordered_map>
#include <unordered_set>
#include "ngraph/node.hpp"
......@@ -37,14 +38,37 @@ namespace ngraph
{
#define TI(x) std::type_index(typeid(x))
const std::unordered_set<std::type_index> s_op_registry{
static const std::unordered_set<std::type_index> s_op_registry{
TI(ngraph::op::AvgPool),
TI(ngraph::op::AvgPoolBackprop),
TI(ngraph::op::BatchNorm),
TI(ngraph::op::Convolution),
TI(ngraph::op::ConvolutionBackpropData),
TI(ngraph::op::ConvolutionBackpropFilters),
TI(ngraph::op::MaxPool),
TI(ngraph::op::BatchNorm)};
TI(ngraph::op::MaxPoolBackprop)};
static const std::unordered_map<std::string, const mkldnn::memory::data_type>
s_data_type_map{{"char", mkldnn::memory::data_type::s8},
{"float", mkldnn::memory::data_type::f32},
{"double", mkldnn::memory::data_type::data_undef},
{"int8_t", mkldnn::memory::data_type::s8},
{"int16_t", mkldnn::memory::data_type::s16},
{"int32_t", mkldnn::memory::data_type::s32},
{"int64_t", mkldnn::memory::data_type::data_undef},
{"uint8_t", mkldnn::memory::data_type::u8},
{"uint16_t", mkldnn::memory::data_type::data_undef},
{"uint32_t", mkldnn::memory::data_type::data_undef},
{"uint64_t", mkldnn::memory::data_type::data_undef}};
mkldnn::memory::data_type GetDataType(const ngraph::element::Type& et)
{
auto it = s_data_type_map.find(et.c_type_string());
if (it == s_data_type_map.end() ||
it->second == mkldnn::memory::data_type::data_undef)
throw ngraph_error("No MKLDNN data type exists for the given element type");
return it->second;
}
bool IsMKLDNNOp(ngraph::Node& op)
{
......
......@@ -16,15 +16,11 @@
#pragma once
#include <string>
#include <typeindex>
#include <typeinfo>
#include <unordered_set>
#include <mkldnn.hpp>
#include "ngraph/node.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/types/element_type.hpp"
namespace ngraph
{
......@@ -34,7 +30,12 @@ namespace ngraph
{
namespace mkldnn_utils
{
extern mkldnn::engine global_cpu_engine;
mkldnn::memory::data_type GetDataType(const ngraph::element::Type& et);
bool IsMKLDNNOp(ngraph::Node& op);
mkldnn::memory::format
CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout);
}
......
......@@ -29,9 +29,12 @@
#include "ngraph/ops/constant.hpp"
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/dot.hpp"
#include "ngraph/ops/max.hpp"
#include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/min.hpp"
#include "ngraph/ops/one_hot.hpp"
#include "ngraph/ops/pad.hpp"
#include "ngraph/ops/product.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reduce_window.hpp"
#include "ngraph/ops/replace_slice.hpp"
......@@ -67,8 +70,10 @@
#include "ngraph/runtime/kernel/less.hpp"
#include "ngraph/runtime/kernel/less_eq.hpp"
#include "ngraph/runtime/kernel/log.hpp"
#include "ngraph/runtime/kernel/max.hpp"
#include "ngraph/runtime/kernel/max_pool.hpp"
#include "ngraph/runtime/kernel/maximum.hpp"
#include "ngraph/runtime/kernel/min.hpp"
#include "ngraph/runtime/kernel/minimum.hpp"
#include "ngraph/runtime/kernel/multiply.hpp"
#include "ngraph/runtime/kernel/negate.hpp"
......@@ -77,6 +82,7 @@
#include "ngraph/runtime/kernel/one_hot.hpp"
#include "ngraph/runtime/kernel/pad.hpp"
#include "ngraph/runtime/kernel/power.hpp"
#include "ngraph/runtime/kernel/product.hpp"
#include "ngraph/runtime/kernel/reduce.hpp"
#include "ngraph/runtime/kernel/reduce_window.hpp"
#include "ngraph/runtime/kernel/replace_slice.hpp"
......@@ -489,6 +495,15 @@ private:
reinterpret_cast<T*>(out[0]->get_data_ptr()),
out[0]->get_element_count());
}
else if (node_op == "Max")
{
const op::Max* max = static_cast<const op::Max*>(&node);
kernel::max<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
reinterpret_cast<T*>(out[0]->get_data_ptr()),
args[0]->get_shape(),
out[0]->get_shape(),
max->get_reduction_axes());
}
else if (node_op == "Maximum")
{
kernel::maximum<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
......@@ -524,6 +539,15 @@ private:
max_pool_backprop->get_padding_below(),
max_pool_backprop->get_padding_above());
}
else if (node_op == "Min")
{
const op::Min* min = static_cast<const op::Min*>(&node);
kernel::min<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
reinterpret_cast<T*>(out[0]->get_data_ptr()),
args[0]->get_shape(),
out[0]->get_shape(),
min->get_reduction_axes());
}
else if (node_op == "Minimum")
{
kernel::minimum<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
......@@ -589,6 +613,15 @@ private:
reinterpret_cast<T*>(out[0]->get_data_ptr()),
out[0]->get_element_count());
}
else if (node_op == "Product")
{
const op::Product* product = static_cast<const op::Product*>(&node);
kernel::product<T>(reinterpret_cast<T*>(args[0]->get_data_ptr()),
reinterpret_cast<T*>(out[0]->get_data_ptr()),
args[0]->get_shape(),
out[0]->get_shape(),
product->get_reduction_axes());
}
else if (node_op == "Reduce")
{
ngraph::op::Reduce* reduce = dynamic_cast<ngraph::op::Reduce*>(&node);
......
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <cmath>
#include <limits>
#include "ngraph/common.hpp"
#include "ngraph/coordinate_transform.hpp"
namespace ngraph
{
namespace runtime
{
namespace kernel
{
template <typename T>
void max(T* arg,
T* out,
const Shape& in_shape,
const Shape& out_shape,
const AxisSet& reduction_axes)
{
T minval = std::numeric_limits<T>::has_infinity
? -std::numeric_limits<T>::infinity()
: std::numeric_limits<T>::min();
CoordinateTransform output_transform(out_shape);
for (const Coordinate& output_coord : output_transform)
{
out[output_transform.index(output_coord)] = minval;
}
CoordinateTransform input_transform(in_shape);
for (const Coordinate& input_coord : input_transform)
{
Coordinate output_coord = project_coordinate(input_coord, reduction_axes);
T x = arg[input_transform.index(input_coord)];
T max = out[output_transform.index(output_coord)];
if (x > max)
{
out[output_transform.index(output_coord)] = x;
}
}
}
}
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <cmath>
#include <limits>
#include "ngraph/common.hpp"
#include "ngraph/coordinate_transform.hpp"
namespace ngraph
{
namespace runtime
{
namespace kernel
{
template <typename T>
void min(T* arg,
T* out,
const Shape& in_shape,
const Shape& out_shape,
const AxisSet& reduction_axes)
{
T minval = std::numeric_limits<T>::has_infinity ? std::numeric_limits<T>::infinity()
: std::numeric_limits<T>::max();
CoordinateTransform output_transform(out_shape);
for (const Coordinate& output_coord : output_transform)
{
out[output_transform.index(output_coord)] = minval;
}
CoordinateTransform input_transform(in_shape);
for (const Coordinate& input_coord : input_transform)
{
Coordinate output_coord = project_coordinate(input_coord, reduction_axes);
T x = arg[input_transform.index(input_coord)];
T min = out[output_transform.index(output_coord)];
if (x < min)
{
out[output_transform.index(output_coord)] = x;
}
}
}
}
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include <cmath>
#include "ngraph/common.hpp"
#include "ngraph/coordinate_transform.hpp"
namespace ngraph
{
namespace runtime
{
namespace kernel
{
template <typename T>
void product(T* arg,
T* out,
const Shape& in_shape,
const Shape& out_shape,
const AxisSet& reduction_axes)
{
CoordinateTransform output_transform(out_shape);
for (const Coordinate& output_coord : output_transform)
{
out[output_transform.index(output_coord)] = 1;
}
CoordinateTransform input_transform(in_shape);
for (const Coordinate& input_coord : input_transform)
{
Coordinate output_coord = project_coordinate(input_coord, reduction_axes);
out[output_transform.index(output_coord)] *=
arg[input_transform.index(input_coord)];
}
}
}
}
}
......@@ -43,8 +43,10 @@
#include "ngraph/ops/less.hpp"
#include "ngraph/ops/less_eq.hpp"
#include "ngraph/ops/log.hpp"
#include "ngraph/ops/max.hpp"
#include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/maximum.hpp"
#include "ngraph/ops/min.hpp"
#include "ngraph/ops/minimum.hpp"
#include "ngraph/ops/multiply.hpp"
#include "ngraph/ops/negative.hpp"
......@@ -53,6 +55,7 @@
#include "ngraph/ops/one_hot.hpp"
#include "ngraph/ops/pad.hpp"
#include "ngraph/ops/power.hpp"
#include "ngraph/ops/product.hpp"
#include "ngraph/ops/reduce.hpp"
#include "ngraph/ops/reduce_window.hpp"
#include "ngraph/ops/remainder.hpp"
......@@ -552,6 +555,11 @@ static shared_ptr<ngraph::Function>
{
node = make_shared<op::Log>(args[0]);
}
else if (node_op == "Max")
{
auto reduction_axes = node_js.at("reduction_axes").get<set<size_t>>();
node = make_shared<op::Max>(args[0], reduction_axes);
}
else if (node_op == "MaxPool")
{
auto window_shape = node_js.at("window_shape").get<vector<size_t>>();
......@@ -601,6 +609,11 @@ static shared_ptr<ngraph::Function>
{
node = make_shared<op::Maximum>(args[0], args[1]);
}
else if (node_op == "Min")
{
auto reduction_axes = node_js.at("reduction_axes").get<set<size_t>>();
node = make_shared<op::Min>(args[0], reduction_axes);
}
else if (node_op == "Minimum")
{
node = make_shared<op::Minimum>(args[0], args[1]);
......@@ -647,6 +660,11 @@ static shared_ptr<ngraph::Function>
{
node = make_shared<op::Power>(args[0], args[1]);
}
else if (node_op == "Product")
{
auto reduction_axes = node_js.at("reduction_axes").get<set<size_t>>();
node = make_shared<op::Product>(args[0], reduction_axes);
}
else if (node_op == "Reduce")
{
auto reduction_axes = node_js.at("reduction_axes").get<set<size_t>>();
......@@ -951,6 +969,11 @@ static json write(const Node& n)
else if (node_op == "Log")
{
}
else if (node_op == "Max")
{
auto tmp = dynamic_cast<const op::Max*>(&n);
node["reduction_axes"] = tmp->get_reduction_axes();
}
else if (node_op == "MaxPool")
{
auto tmp = dynamic_cast<const op::MaxPool*>(&n);
......@@ -970,6 +993,11 @@ static json write(const Node& n)
else if (node_op == "Maximum")
{
}
else if (node_op == "Min")
{
auto tmp = dynamic_cast<const op::Min*>(&n);
node["reduction_axes"] = tmp->get_reduction_axes();
}
else if (node_op == "Minimum")
{
}
......@@ -1004,6 +1032,11 @@ static json write(const Node& n)
node["shape"] = tmp->get_shape();
node["element_type"] = write_element_type(tmp->get_element_type());
}
else if (node_op == "Product")
{
auto tmp = dynamic_cast<const op::Product*>(&n);
node["reduction_axes"] = tmp->get_reduction_axes();
}
else if (node_op == "Power")
{
}
......
# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
if(MKLDNN_INCLUDE_DIR)
link_directories(${MKLDNN_LIB_DIR})
endif()
if (NGRAPH_CPU_ENABLE)
set (SRC
nbench.cpp
${PROJECT_SOURCE_DIR}/test/util/benchmark.cpp
)
add_executable(nbench ${SRC})
add_dependencies(nbench ngraph)
set(HEADER_SEARCH_DEFINES
"NGRAPH_HEADERS_PATH=\"${NGRAPH_INCLUDE_PATH}\""
)
target_link_libraries(nbench ngraph)
set_source_files_properties(nbench.cpp PROPERTIES COMPILE_DEFINITIONS "${HEADER_SEARCH_DEFINES}")
endif()
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
// tool to benchmark any ngraph json model with given backend.
// compile and run with:
// g++ ./nbench.cpp -std=c++11 -I$HOME/ngraph_dist/include -L$HOME/ngraph_dist/lib -lngraph -o nbench
// env LD_LIBRARY_PATH=$HOME/ngraph_dist/lib env NGRAPH_INTERPRETER_EMIT_TIMING=1 ./nbench
// sample models are under ../../test/models
#include <fstream>
#include <ngraph/runtime/backend.hpp>
#include <ngraph/runtime/call_frame.hpp>
#include <ngraph/runtime/manager.hpp>
#include "../../test/util/benchmark.hpp"
#include "../../test/util/test_tools.hpp"
using namespace std;
int main(int argc, char** argv)
{
string model = "model.json";
string backend = "INTERPRETER";
int iter = 10;
bool failed = false;
for (size_t i = 1; i < argc; i++)
{
if (string(argv[i]) == "-f")
{
model = argv[++i];
}
else if (string(argv[i]) == "-b")
{
backend = argv[++i];
}
else if (string(argv[i]) == "-i")
{
try
{
iter = stoi(argv[++i]);
}
catch (...)
{
cout << "Invalid Argument\n";
failed = true;
}
}
}
if (!static_cast<bool>(ifstream(model)))
{
cout << "File " << model << " not found\n";
failed = true;
}
if (failed)
{
cout << R"###(
DESCRIPTION
Benchmark ngraph json model with given backend.
SYNOPSIS
nbench [-f <filename>] [-b <backend>] [-i <iterations>]
OPTIONS
-f model json file to use (default: model.json)
-b Backend to use (default: INTERPRETER)
-i Iterations (default: 10)
)###";
return 1;
}
cout << "Benchmarking " << model << ", " << backend << " backend, " << iter << " iterations.\n";
run_benchmark(model, backend, iter);
}
......@@ -50,6 +50,7 @@ set (SRC
type_prop.cpp
util/autodiff/backprop_function.cpp
util/test_tools.cpp
util/benchmark.cpp
util.cpp
uuid.cpp
)
......
......@@ -1455,3 +1455,88 @@ TEST(${BACKEND_NAME}, backwards_reverse_3d_02)
};
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph, {x}, .01f, .01f));
}
TEST(${BACKEND_NAME}, backwards_maxpool_n4c1h4w4_kh2kw2_sh1sw1)
{
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
Shape shape_a{4, 1, 4, 4}; //in NCHW
Shape maxpool_shape{4, 1, 3, 3};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape window_shape{2, 2};
auto window_movement_strides = Strides{1, 1};
auto maxpool = make_shared<op::MaxPool>(A, window_shape, window_movement_strides);
auto f = make_shared<Function>(maxpool, op::Parameters{A});
shared_ptr<runtime::TensorView> ep =
backend->make_primary_tensor_view(element::f32, maxpool_shape);
vector<float> dataEp(shape_size(maxpool_shape), 4);
shared_ptr<runtime::TensorView> input =
backend->make_primary_tensor_view(element::f32, shape_a);
shared_ptr<runtime::TensorView> output =
backend->make_primary_tensor_view(element::f32, shape_a);
vector<float> dataInput{11, 65, 44, 28, 31, 33, 21, 66, 40, 49, 69, 57, 47, 30, 24, 27,
13, 56, 46, 60, 61, 41, 25, 42, 48, 53, 51, 43, 59, 58, 29, 71,
17, 22, 72, 18, 39, 35, 15, 38, 64, 52, 73, 67, 62, 50, 10, 68,
45, 63, 16, 14, 55, 54, 37, 20, 36, 12, 70, 34, 19, 26, 32, 23};
vector<float> expected{//delta
0, 8, 0, 0, 0, 0, 0, 4, 0, 8, 16, 0, 0, 0, 0, 0, 0, 4, 0, 4, 8, 0,
0, 0, 0, 4, 4, 0, 4, 4, 0, 4, 0, 0, 8, 0, 4, 0, 0, 0, 8, 0, 16, 0,
0, 0, 0, 0, 0, 8, 0, 0, 4, 0, 4, 0, 4, 0, 16, 0, 0, 0, 0, 0};
copy_data(ep, dataEp);
copy_data(input, dataInput);
auto C = make_shared<op::Parameter>(element::f32, maxpool_shape);
auto df = autodiff::backprop_function(f);
auto external = manager->compile(df);
auto cf = backend->make_call_frame(external);
cf->tensor_call({input, ep}, {output});
ASSERT_TRUE(read_vector<float>(output) == expected);
}
TEST(${BACKEND_NAME}, backwards_maxpool_n2c1h5w5_kh3kw3_sh2sw2)
{
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
Shape shape_a{1, 2, 5, 5}; //in NCHW
Shape maxpool_shape{1, 2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape window_shape{3, 3};
auto window_movement_strides = Strides{2, 2};
auto maxpool = make_shared<op::MaxPool>(A, window_shape, window_movement_strides);
auto f = make_shared<Function>(maxpool, op::Parameters{A});
shared_ptr<runtime::TensorView> ep =
backend->make_primary_tensor_view(element::f32, maxpool_shape);
vector<float> dataEp(shape_size(maxpool_shape), 4);
shared_ptr<runtime::TensorView> input =
backend->make_primary_tensor_view(element::f32, shape_a);
shared_ptr<runtime::TensorView> output =
backend->make_primary_tensor_view(element::f32, shape_a);
vector<float> dataInput{58, 15, 51, 35, 18, 47, 31, 32, 52, 21, 36, 38, 57, 54, 25, 45, 23,
30, 16, 27, 48, 20, 41, 37, 43, 39, 22, 28, 33, 29, 12, 17, 44, 42,
19, 40, 10, 46, 34, 53, 26, 55, 50, 13, 24, 14, 49, 56, 59, 11};
vector<float> expected{//delta
4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0};
copy_data(ep, dataEp);
copy_data(input, dataInput);
auto C = make_shared<op::Parameter>(element::f32, maxpool_shape);
auto df = autodiff::backprop_function(f);
auto external = manager->compile(df);
auto cf = backend->make_call_frame(external);
cf->tensor_call({input, ep}, {output});
ASSERT_TRUE(read_vector<float>(output) == expected);
}
\ No newline at end of file
......@@ -31,92 +31,13 @@
#include "ngraph/runtime/manager.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
#include "util/benchmark.hpp"
#include "util/random.hpp"
#include "util/test_tools.hpp"
using namespace std;
using namespace ngraph;
static multimap<size_t, string>
agregate_timing(const vector<runtime::PerformanceCounter>& perf_data)
{
unordered_map<string, size_t> timing;
for (const runtime::PerformanceCounter& p : perf_data)
{
string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
multimap<size_t, string> rc;
for (const pair<string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
void run_benchmark(const string& json_path, const string& backend_name, size_t iterations)
{
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing)
{
cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
}
test::Uniform<float> rng{-1, 1, 0};
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = ngraph::deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto tensor =
backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
rng.initialize(tensor);
args.push_back(tensor);
}
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results())
{
auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
results.push_back(result);
}
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->tensor_call(args, results);
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
sort(perf_data.begin(),
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = agregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
}
}
TEST(benchmark, mxnet_mnist_mlp_forward)
{
const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json");
......
This diff is collapsed.
......@@ -546,4 +546,4 @@ TEST(pattern, variance)
auto var_graph = construct_variance_graph();
ASSERT_TRUE(n.match(var_graph, variance));
ASSERT_EQ(n.get_pattern_map()[var_graph], variance);
}
\ No newline at end of file
}
......@@ -631,7 +631,7 @@ TEST(type_prop, reduce_nonscalar)
{
auto r0 = make_shared<op::Reduce>(param_0, param_1, f, AxisSet{0});
// Should have thrown, so fail if it didn't
FAIL() << "Did not detect incorrect element types for arithmetic operator";
FAIL() << "Did not detect non-scalar initial value for reduce";
}
catch (const ngraph_error& error)
{
......@@ -656,7 +656,7 @@ TEST(type_prop, reduce_elem_type_mismatch)
{
auto r0 = make_shared<op::Reduce>(param_0, param_1, f, AxisSet{0});
// Should have thrown, so fail if it didn't
FAIL() << "Did not detect incorrect element types for arithmetic operator";
FAIL() << "Did not detect element type mismatch for reduce";
}
catch (const ngraph_error& error)
{
......@@ -816,7 +816,7 @@ TEST(type_prop, reduce_axis_oob)
{
auto r = make_shared<op::Reduce>(param_0, param_1, f, AxisSet{0, 2, 1});
// Should have thrown, so fail if it didn't
FAIL() << "Did not detect incorrect element types for arithmetic operator";
FAIL() << "Did not detect out-of-bound axis for reduce";
}
catch (const ngraph_error& error)
{
......@@ -6048,3 +6048,45 @@ TEST(type_prop, pad_deduce_interior_padding_wrong_rank)
FAIL() << "Deduced type check failed for unexpected reason";
}
}
TEST(type_prop, sum_deduce)
{
auto param_0 = make_shared<op::Parameter>(element::f32, Shape{2, 4});
auto r0 = make_shared<op::Sum>(param_0, AxisSet{0});
ASSERT_EQ(r0->get_element_type(), element::f32);
ASSERT_EQ(r0->get_shape(), (Shape{4}));
auto r1 = make_shared<op::Sum>(param_0, AxisSet{1});
ASSERT_EQ(r1->get_element_type(), element::f32);
ASSERT_EQ(r1->get_shape(), (Shape{2}));
auto r01 = make_shared<op::Sum>(param_0, AxisSet{0, 1});
ASSERT_EQ(r01->get_element_type(), element::f32);
ASSERT_EQ(r01->get_shape(), (Shape{}));
auto r_none = make_shared<op::Sum>(param_0, AxisSet{});
ASSERT_EQ(r_none->get_element_type(), element::f32);
ASSERT_EQ(r_none->get_shape(), (Shape{2, 4}));
}
TEST(type_prop, sum_axis_oob)
{
auto param_0 = make_shared<op::Parameter>(element::f32, Shape{2, 4});
try
{
auto r = make_shared<op::Sum>(param_0, AxisSet{0, 2, 1});
// Should have thrown, so fail if it didn't
FAIL() << "Did not detect out-of-bound axis for sum";
}
catch (const ngraph_error& error)
{
EXPECT_EQ(error.what(),
std::string("Reduction axis for arithmetic reduction operator is out of bounds"));
}
catch (...)
{
FAIL() << "Deduced type check failed for unexpected reason";
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "benchmark.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/manager.hpp"
#include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/serializer.hpp"
#include "random.hpp"
std::multimap<size_t, std::string>
aggregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data)
{
std::unordered_map<std::string, size_t> timing;
for (const ngraph::runtime::PerformanceCounter& p : perf_data)
{
std::string op = p.name().substr(0, p.name().find('_'));
timing[op] += p.microseconds();
}
std::multimap<size_t, std::string> rc;
for (const std::pair<std::string, size_t>& t : timing)
{
rc.insert({t.second, t.first});
}
return rc;
}
void run_benchmark(const std::string& json_path, const std::string& backend_name, size_t iterations)
{
using namespace std;
using namespace ngraph;
string env_var_name = "NGRAPH_" + backend_name + "_EMIT_TIMING";
bool emit_timing = (std::getenv(env_var_name.c_str()) != nullptr);
if (!emit_timing)
{
cout << "To get per-op timing set the environment variable " << env_var_name << "\n";
}
ngraph::test::Uniform<float> rng{-1, 1, 0};
const string json_string = file_util::read_file_to_string(json_path);
stringstream ss(json_string);
shared_ptr<Function> f = deserialize(ss);
stopwatch build_time;
build_time.start();
auto manager = runtime::Manager::get(backend_name);
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
build_time.stop();
cout << "build_time " << build_time.get_milliseconds() << "ms" << endl;
vector<shared_ptr<runtime::TensorView>> args;
for (shared_ptr<op::Parameter> param : f->get_parameters())
{
auto tensor =
backend->make_primary_tensor_view(param->get_element_type(), param->get_shape());
rng.initialize(tensor);
args.push_back(tensor);
}
vector<shared_ptr<runtime::TensorView>> results;
for (shared_ptr<Node> out : f->get_results())
{
auto result = backend->make_primary_tensor_view(out->get_element_type(), out->get_shape());
results.push_back(result);
}
stopwatch t1;
t1.start();
for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
{
cf->tensor_call(args, results);
}
t1.stop();
float time = t1.get_milliseconds();
cout << time / iterations << "ms per iteration" << endl;
vector<runtime::PerformanceCounter> perf_data = cf->get_performance_data();
sort(perf_data.begin(),
perf_data.end(),
[](const runtime::PerformanceCounter& p1, const runtime::PerformanceCounter& p2) {
return p1.total_microseconds() > p2.total_microseconds();
});
multimap<size_t, string> timing = aggregate_timing(perf_data);
for (auto it = timing.rbegin(); it != timing.rend(); it++)
{
cout.imbue(locale(""));
cout << setw(15) << left << it->second << " " << setw(10) << right << it->first << "us\n";
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <ngraph/runtime/call_frame.hpp>
#include "test_tools.hpp"
/// performance test utilities
std::multimap<size_t, std::string>
aggregate_timing(const std::vector<ngraph::runtime::PerformanceCounter>& perf_data);
void run_benchmark(const std::string& json_path,
const std::string& backend_name,
size_t iterations);
......@@ -21,7 +21,9 @@
#include <memory>
#include "ngraph/descriptor/layout/tensor_view_layout.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/runtime/tensor_view.hpp"
#include "ngraph/serializer.hpp"
#define SKIP_TEST_FOR(backend_to_skip, current_backend) \
if (backend_to_skip == current_backend) \
......@@ -80,3 +82,4 @@ size_t count_ops_of_type(std::shared_ptr<ngraph::Function> f)
return count;
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment