Commit 48c52da7 authored by nikolay.korovaiko's avatar nikolay.korovaiko

Merge remote-tracking branch 'origin/master' into krovatkin/rs_concat

parents 02a6b07c a534274f
......@@ -105,6 +105,7 @@ option(NGRAPH_GPU_ENABLE "Control the building of the GPU backend" FALSE)
option(NGRAPH_INTERPRETER_ENABLE "Control the building of the INTERPRETER backend" TRUE)
option(NGRAPH_NOP_ENABLE "Control the building of the NOP backend" TRUE)
option(NGRAPH_GPUH_ENABLE "Control the building of the Hybrid GPU backend" FALSE)
option(NGRAPH_GENERIC_CPU_ENABLE "Enable build nGraph for generic CPU backend" FALSE)
option(NGRAPH_DISTRIBUTED_ENABLE "Add distributed mode to the CPU backend" FALSE)
option(NGRAPH_DEBUG_ENABLE "Enable output for NGRAPH_DEBUG statements" FALSE)
option(NGRAPH_ONNX_IMPORT_ENABLE "Enable ONNX importer" FALSE)
......@@ -125,6 +126,7 @@ message(STATUS "NGRAPH_GPU_ENABLE: ${NGRAPH_GPU_ENABLE}")
message(STATUS "NGRAPH_INTERPRETER_ENABLE: ${NGRAPH_INTERPRETER_ENABLE}")
message(STATUS "NGRAPH_NOP_ENABLE: ${NGRAPH_NOP_ENABLE}")
message(STATUS "NGRAPH_GPUH_ENABLE: ${NGRAPH_GPUH_ENABLE}")
message(STATUS "NGRAPH_GENERIC_CPU_ENABLE: ${NGRAPH_GENERIC_CPU_ENABLE}")
message(STATUS "NGRAPH_DISTRIBUTED_ENABLE: ${NGRAPH_DISTRIBUTED_ENABLE}")
message(STATUS "NGRAPH_DEBUG_ENABLE: ${NGRAPH_DEBUG_ENABLE}")
message(STATUS "NGRAPH_ONNX_IMPORT_ENABLE: ${NGRAPH_ONNX_IMPORT_ENABLE}")
......
......@@ -6,11 +6,11 @@
/cmake/ @rkimballn1 @silee2
/.ci/ @aslepko @crlishka
/.ci/travis/ @postrational
/.ci/onnx/ @postrational
/contrib/docker/ @aslepko @crlishka
/.travis.yml @postrational
/.ci/ @aslepko
/.ci/travis/ @aslepko @postrational
/.ci/onnx/ @aslepko @postrational
/contrib/docker/ @aslepko
/.travis.yml @aslepko @postrational
/.clang-format @rkimballn1
/.gitattributes @rkimballn1
......
......@@ -22,7 +22,7 @@ include(ExternalProject)
#------------------------------------------------------------------------------
set(CLDNN_GIT_REPO_URL https://github.com/intel/clDNN.git)
set(CLDNN_GIT_LABEL 02add7c4ce2baa81e2a32fa02d733dcc4f013108)
set(CLDNN_GIT_LABEL f91d7d83d8f121e4e159776b108e316f2f08bdf5)
set(BOOST_VERSION 1.64.0)
set(OUT_DIR ${EXTERNAL_PROJECTS_ROOT}/cldnn/out)
......@@ -66,5 +66,5 @@ else()
ExternalProject_Get_Property(ext_cldnn SOURCE_DIR BINARY_DIR)
add_dependencies(libcldnn ext_cldnn)
target_include_directories(libcldnn SYSTEM INTERFACE ${SOURCE_DIR}/api)
target_link_libraries(libcldnn INTERFACE ${SOURCE_DIR}/build/out/Linux64/Release/libclDNN64.so)
target_link_libraries(libcldnn INTERFACE ${SOURCE_DIR}/build/out/Linux64/${CMAKE_BUILD_TYPE}/${CMAKE_SHARED_LIBRARY_PREFIX}clDNN64${CMAKE_SHARED_LIBRARY_SUFFIX})
endif()
......@@ -42,3 +42,7 @@ add_custom_command(
)
add_custom_target(python_wheel DEPENDS ngraph ${CMAKE_BINARY_DIR}/python/dist/)
if (NGRAPH_CPU_ENABLE)
add_dependencies(python_wheel ext_mkldnn)
endif()
......@@ -374,6 +374,10 @@ class BuildExt(build_ext):
build_ext.build_extensions(self)
if sys.platform == 'darwin':
# This turns out to be needed when building using Anaconda python on macOS.
os.environ['MACOSX_DEPLOYMENT_TARGET'] = '10.9'
with open(os.path.join(PYNGRAPH_ROOT_DIR, 'requirements.txt')) as req:
requirements = req.read().splitlines()
......
......@@ -147,6 +147,7 @@ set (SRC
pass/nop_elimination.cpp
pass/pass.cpp
pass/pass_config.cpp
pass/prefix_reshape_elimination.cpp
pass/propagate_cacheability.cpp
pass/reshape_elimination.cpp
pass/reshape_sinking.cpp
......
......@@ -99,6 +99,14 @@ namespace ngraph
return op_list;
}
bool is_operator_supported(const std::string& op_name,
std::int64_t version,
const std::string& domain)
{
return OperatorsBridge::is_operator_registered(
op_name, version, domain == "ai.onnx" ? "" : domain);
}
} // namespace onnx_import
} // namespace ngraph
......@@ -52,6 +52,18 @@ namespace ngraph
std::set<std::string> get_supported_operators(std::int64_t version,
const std::string& domain);
/// \brief Determines whether ONNX operator is supported.
///
/// \param[in] op_name The ONNX operator name.
/// \param[in] version The ONNX operator set version.
/// \param[in] domain The domain the ONNX operator is registered to.
///
/// \return True if operator is supported, False otherwise.
///
bool is_operator_supported(const std::string& op_name,
std::int64_t version,
const std::string& domain = "ai.onnx");
/// \brief Convert an ONNX model to nGraph function
/// The function translated serialized ONNX model to nGraph function. The serialized
/// ONNX model is read from input stream.
......
......@@ -21,6 +21,7 @@
#include <unordered_map>
#include "core/attribute.hpp"
#include "ngraph/log.hpp"
#include "op/abs.hpp"
#include "op/acos.hpp"
#include "op/add.hpp"
......@@ -102,20 +103,19 @@ namespace ngraph
{
namespace detail
{
const Operator& find(const std::string& name,
std::int64_t version,
const std::string& domain,
const std::map<std::int64_t, Operator>& map)
const std::map<std::int64_t, Operator>::const_iterator
find(std::int64_t version, const std::map<std::int64_t, Operator>& map)
{
std::map<std::int64_t, Operator>::const_iterator it{};
while (version > 0)
{
const auto it = map.find(version--);
it = map.find(version--);
if (it != std::end(map))
{
return it->second;
return it;
}
}
throw error::UnsupportedVersion{name, version, domain};
return it;
}
}
......@@ -136,13 +136,51 @@ namespace ngraph
{
throw error::UnknownDomain{domain};
}
if (version > OperatorsBridge::LATEST_SUPPORTED_OPSET_VERSION)
{
NGRAPH_WARN << "Currently operator set version: " << version << " is unsupported."
<< " Falling back to: "
<< OperatorsBridge::LATEST_SUPPORTED_OPSET_VERSION;
}
for (const auto& op : dm->second)
{
result.emplace(op.first, detail::find(op.first, version, domain, op.second));
const auto& it = detail::find(version, op.second);
if (it == std::end(op.second))
{
throw error::UnsupportedVersion{op.first, version, domain};
}
result.emplace(op.first, it->second);
}
return result;
}
bool OperatorsBridge::_is_operator_registered(const std::string& name,
std::int64_t version,
const std::string& domain)
{
// search for domain
auto dm_map = m_map.find(domain);
if (dm_map == std::end(m_map))
{
return false;
}
// search for name
auto op_map = dm_map->second.find(name);
if (op_map == std::end(dm_map->second))
{
return false;
}
if (detail::find(version, op_map->second) != std::end(op_map->second))
{
return true;
}
else
{
return false;
}
}
#define REGISTER_OPERATOR(name_, ver_, fn_) \
m_map[""][name_].emplace(ver_, std::bind(op::set_##ver_::fn_, std::placeholders::_1))
......
......@@ -62,6 +62,8 @@ namespace ngraph
class OperatorsBridge
{
public:
static constexpr const int LATEST_SUPPORTED_OPSET_VERSION = ONNX_OPSET_VERSION;
OperatorsBridge(const OperatorsBridge&) = delete;
OperatorsBridge& operator=(const OperatorsBridge&) = delete;
OperatorsBridge(OperatorsBridge&&) = delete;
......@@ -80,6 +82,13 @@ namespace ngraph
instance()._register_operator(name, version, domain, std::move(fn));
}
static bool is_operator_registered(const std::string& name,
std::int64_t version,
const std::string& domain)
{
return instance()._is_operator_registered(name, version, domain);
}
private:
std::unordered_map<std::string,
std::unordered_map<std::string, std::map<std::int64_t, Operator>>>
......@@ -98,6 +107,9 @@ namespace ngraph
const std::string& domain,
Operator fn);
OperatorSet _get_operator_set(std::int64_t version, const std::string& domain);
bool _is_operator_registered(const std::string& name,
std::int64_t version,
const std::string& domain);
};
} // namespace onnx_import
......
......@@ -30,7 +30,7 @@ using namespace ngraph;
void ngraph::default_logger_handler_func(const string& s)
{
cout << s << endl;
cout << s + "\n";
}
LogHelper::LogHelper(LOG_TYPE type,
......
......@@ -14,7 +14,7 @@
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/plaidml/plaidml_pass_reshape_elision.hpp"
#include "ngraph/pass/prefix_reshape_elimination.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/op/util/binary_elementwise_arithmetic.hpp"
......@@ -24,7 +24,7 @@
#include "ngraph/pattern/op/any_of.hpp"
#include "ngraph/pattern/op/label.hpp"
ngraph::runtime::plaidml::pass::ReshapeElision::ReshapeElision()
ngraph::pass::PrefixReshapeElimination::PrefixReshapeElimination()
{
auto src_op = std::make_shared<pattern::op::Label>(
element::i8, Shape{}, [](std::shared_ptr<Node>) { return true; });
......@@ -39,13 +39,10 @@ ngraph::runtime::plaidml::pass::ReshapeElision::ReshapeElision()
}
// Validate that this isn't a reordering-reshape.
for (std::size_t idx = 0; idx < reshape->get_input_order().size(); ++idx)
{
if (idx != reshape->get_input_order().at(idx))
if (reshape->get_is_transpose())
{
return false;
}
}
// Make sure that logical dimension sizes match.
const Shape& src_shape = reshape->get_input_shape(0);
......
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/pass/graph_rewrite.hpp"
namespace ngraph
{
namespace pass
{
class PrefixReshapeElimination;
}
}
// A pass to eliminate reshapes whose output shapes are the same as
// their input shape modulo leading size-1 axes.
//
// N.B. This pass MUST only be used by backends that can handle the
// omission of leading size-1 axes, e.g. backends that implement
// NumPy-style broadcast semantics.
class ngraph::pass::PrefixReshapeElimination final : public ngraph::pass::GraphRewrite
{
public:
PrefixReshapeElimination();
};
......@@ -37,6 +37,10 @@ if (NGRAPH_GPUH_ENABLE)
add_subdirectory(gpuh)
endif()
if (NGRAPH_GENERIC_CPU_ENABLE)
add_subdirectory(generic_cpu)
endif()
if (NGRAPH_PLAIDML_ENABLE)
add_subdirectory(plaidml)
endif()
......@@ -145,3 +145,8 @@ vector<runtime::PerformanceCounter>
}
return rc;
}
bool runtime::cpu::CPU_Backend::is_supported(const Node& op) const
{
return true;
}
......@@ -58,6 +58,8 @@ namespace ngraph
std::vector<PerformanceCounter>
get_performance_data(std::shared_ptr<Function> func) const override;
bool is_supported(const Node& node) const override;
private:
class FunctionInstance
{
......
......@@ -1982,10 +1982,10 @@ void runtime::cpu::CPU_ExternalFunction::build()
file_util::path_join(s_debug_dir, m_function_name + "_debug.txt");
std::stringstream ss;
ss << "EXECUTION PLAN:\n";
ss << "\nEXECUTION PLAN:\n";
for (size_t i = 0; i < functors.size(); i++)
{
ss << op_names.at(i) << "will be executed with the following inputs:\n";
ss << op_names.at(i) << " will be executed with the following inputs:\n";
for (auto is : this->m_op_attrs.at(i).Inputs)
{
ss << "\t" << is << " = " << this->get_tensor_data(is) << std::endl;
......
# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
if (NGRAPH_GENERIC_CPU_ENABLE)
find_package(OpenMP)
if (OPENMP_FOUND)
set(CMAKE_C_FLAGS "${CMAKE_C_FLAGS} ${OpenMP_C_FLAGS}")
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} ${OpenMP_CXX_FLAGS}")
endif()
add_library(gcpu_backend SHARED gcpu_backend.cpp node_wrapper.cpp)
if(NGRAPH_LIB_VERSIONING_ENABLE)
set_target_properties(gcpu_backend PROPERTIES
VERSION ${NGRAPH_VERSION}
SOVERSION ${NGRAPH_API_VERSION})
endif()
target_link_libraries(gcpu_backend PRIVATE ngraph libeigen hybrid_base interpreter_backend)
target_compile_options(gcpu_backend PUBLIC -fopenmp)
set_target_properties(gcpu_backend PROPERTIES LIBRARY_OUTPUT_DIRECTORY ${NGRAPH_BUILD_DIR})
install(TARGETS gcpu_backend
LIBRARY DESTINATION "${NGRAPH_INSTALL_LIB}"
ARCHIVE DESTINATION "${NGRAPH_INSTALL_LIB}"
)
endif()
This diff is collapsed.
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <Eigen/Dense>
#include <cmath>
#include <omp.h>
#include <utility>
#include "ngraph/runtime/reference/broadcast.hpp"
#include "ngraph/shape_util.hpp"
#include "ngraph/util.hpp"
namespace ngraph
{
namespace runtime
{
namespace gcpu
{
namespace kernel
{
std::tuple<size_t, size_t> get_start_finish(size_t size)
{
const size_t nthreads = omp_get_num_threads();
const size_t ithread = omp_get_thread_num();
const size_t start = ithread * size / nthreads;
const size_t finish = (ithread + 1) * size / nthreads;
return std::make_tuple(start, finish);
}
template <typename T>
void broadcast_2d(const T* in,
T* out,
const Shape& in_shape,
const Shape& out_shape,
const AxisSet& broadcast_axes)
{
size_t index[2];
size_t* out_index =
(broadcast_axes.find(0) == broadcast_axes.end() ? &index[0] : &index[1]);
for (index[0] = 0; index[0] < out_shape[0]; ++index[0])
{
for (index[1] = 0; index[1] < out_shape[1]; ++index[1])
{
out[index[0] * out_shape[1] + index[1]] = in[*out_index];
}
}
}
// #define PARALLEL
template <typename T>
void broadcast_3d(const T* in,
T* out,
const Shape& in_shape,
const Shape& out_shape,
const AxisSet& broadcast_axes)
{
#ifdef PARALLEL
#pragma omp parallel
#endif
{
size_t start;
size_t finish;
#ifdef PARALLEL
std::tie(start, finish) = get_start_finish(out_shape[0]);
#else
start = 0;
finish = out_shape[0];
#endif
size_t index[3];
size_t* out_index = 0;
for (size_t i = 0; i < 3; i++)
{
if (broadcast_axes.count(i) == 0)
{
out_index = &index[i];
break;
}
}
for (index[0] = start; index[0] < finish; ++index[0])
{
for (index[1] = 0; index[1] < out_shape[1]; ++index[1])
{
for (index[2] = 0; index[2] < out_shape[2]; ++index[2])
{
out[index[0] * out_shape[1] * out_shape[2] +
index[1] * out_shape[2] + index[2]] = in[*out_index];
}
}
}
}
}
template <typename T>
void broadcast_4d(const T* in,
T* out,
const Shape& in_shape,
const Shape& out_shape,
const AxisSet& broadcast_axes)
{
size_t index[4];
size_t* out_index = 0;
for (size_t i = 0; i < 4; i++)
{
if (broadcast_axes.count(i) == 0)
{
out_index = &index[i];
break;
}
}
for (index[0] = 0; index[0] < out_shape[0]; ++index[0])
{
for (index[1] = 0; index[1] < out_shape[1]; ++index[1])
{
for (index[2] = 0; index[2] < out_shape[2]; ++index[2])
{
for (index[3] = 0; index[3] < out_shape[3]; ++index[3])
{
out[index[0] * out_shape[1] * out_shape[2] * out_shape[3] +
index[1] * out_shape[2] * out_shape[3] +
index[2] * out_shape[3] + index[3]] = in[*out_index];
}
}
}
}
}
template <typename T>
void broadcast(const T* in,
T* out,
const Shape& in_shape,
const Shape& out_shape,
const AxisSet& broadcast_axes)
{
if (in_shape.size() == 0)
{
for (size_t i = 0; i < shape_size(out_shape); ++i)
{
out[i] = in[0];
}
}
else if (in_shape.size() == 1)
{
switch (out_shape.size())
{
case 2:
broadcast_2d<T>(in, out, in_shape, out_shape, broadcast_axes);
break;
case 3:
broadcast_3d<T>(in, out, in_shape, out_shape, broadcast_axes);
break;
case 4:
broadcast_4d<T>(in, out, in_shape, out_shape, broadcast_axes);
break;
}
}
else
{
runtime::reference::broadcast<T>(
in, out, in_shape, out_shape, broadcast_axes);
}
}
}
}
}
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <Eigen/Dense>
#include <cmath>
#include <omp.h>
#include <utility>
#include "ngraph/coordinate_transform.hpp"
#include "ngraph/shape_util.hpp"
namespace ngraph
{
namespace runtime
{
namespace gcpu
{
namespace kernel
{
template <typename T>
void dot(const T* arg0,
const T* arg1,
T* out,
const Shape& arg0_shape,
const Shape& arg1_shape,
const Shape& out_shape,
size_t reduction_axes_count)
{
if (arg0_shape.size() == 2 && arg1_shape.size() == 2 && out_shape.size() == 2)
{
Eigen::Map<
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
a0(const_cast<T*>(arg0), arg0_shape[0], arg0_shape[1]);
Eigen::Map<
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
a1(const_cast<T*>(arg1), arg1_shape[0], arg1_shape[1]);
Eigen::Map<
Eigen::Matrix<T, Eigen::Dynamic, Eigen::Dynamic, Eigen::RowMajor>>
o(const_cast<T*>(out), out_shape[0], out_shape[1]);
o = a0 * a1;
}
else
{
// Get the sizes of the dot axes. It's easiest to pull them from arg1 because they're
// right up front.
Shape dot_axis_sizes(reduction_axes_count);
std::copy(arg1_shape.begin(),
arg1_shape.begin() + reduction_axes_count,
dot_axis_sizes.begin());
CoordinateTransform arg0_transform(arg0_shape);
CoordinateTransform arg1_transform(arg1_shape);
CoordinateTransform output_transform(out_shape);
// Create coordinate transforms for arg0 and arg1 that throw away the dotted axes.
size_t arg0_projected_rank = arg0_shape.size() - reduction_axes_count;
size_t arg1_projected_rank = arg1_shape.size() - reduction_axes_count;
Shape arg0_projected_shape(arg0_projected_rank);
std::copy(arg0_shape.begin(),
arg0_shape.begin() + arg0_projected_rank,
arg0_projected_shape.begin());
Shape arg1_projected_shape(arg1_projected_rank);
std::copy(arg1_shape.begin() + reduction_axes_count,
arg1_shape.end(),
arg1_projected_shape.begin());
CoordinateTransform arg0_projected_transform(arg0_projected_shape);
CoordinateTransform arg1_projected_transform(arg1_projected_shape);
// Create a coordinate transform that allows us to iterate over all possible values
// for the dotted axes.
CoordinateTransform dot_axes_transform(dot_axis_sizes);
for (const Coordinate& arg0_projected_coord : arg0_projected_transform)
{
for (const Coordinate& arg1_projected_coord : arg1_projected_transform)
{
// The output coordinate is just the concatenation of the projected coordinates.
Coordinate out_coord(arg0_projected_coord.size() +
arg1_projected_coord.size());
auto out_coord_it = std::copy(arg0_projected_coord.begin(),
arg0_projected_coord.end(),
out_coord.begin());
std::copy(arg1_projected_coord.begin(),
arg1_projected_coord.end(),
out_coord_it);
// Zero out to start the sum.
T sum = 0;
size_t out_index = output_transform.index(out_coord);
// Walk along the dotted axes.
Coordinate arg0_coord(arg0_shape.size());
Coordinate arg1_coord(arg1_shape.size());
auto arg0_it = std::copy(arg0_projected_coord.begin(),
arg0_projected_coord.end(),
arg0_coord.begin());
for (const Coordinate& dot_axis_positions : dot_axes_transform)
{
// In order to find the points to multiply together, we need to inject our current
// positions along the dotted axes back into the projected arg0 and arg1 coordinates.
std::copy(dot_axis_positions.begin(),
dot_axis_positions.end(),
arg0_it);
auto arg1_it = std::copy(dot_axis_positions.begin(),
dot_axis_positions.end(),
arg1_coord.begin());
std::copy(arg1_projected_coord.begin(),
arg1_projected_coord.end(),
arg1_it);
// Multiply and add to the sum.
sum += arg0[arg0_transform.index(arg0_coord)] *
arg1[arg1_transform.index(arg1_coord)];
}
// Write the sum back.
out[out_index] = sum;
}
}
}
}
}
}
}
}
This diff is collapsed.
......@@ -16,27 +16,26 @@
#pragma once
#include "ngraph/pass/graph_rewrite.hpp"
#include <algorithm>
#include <cmath>
#include <numeric>
#include <vector>
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace plaidml
namespace gcpu
{
namespace pass
namespace kernel
{
class ReshapeElision;
template <typename T>
void result(const T* arg, T* out, size_t count)
{
memcpy(out, arg, sizeof(T) * count);
}
}
}
}
}
// A minor pass to elide unnecessary reshapes. A reshape is
// considered unnecessary if its output shape is the same as its input
// shape, modulo leading size-1 axes.
class ngraph::runtime::plaidml::pass::ReshapeElision final : public ngraph::pass::GraphRewrite
{
public:
ReshapeElision();
};
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/runtime/generic_cpu/node_wrapper.hpp"
using namespace ngraph;
using namespace std;
runtime::gcpu::NodeWrapper::NodeWrapper(const shared_ptr<const Node>& node)
: m_node{node}
{
// This expands the op list in op_tbl.hpp into a list of enumerations that look like this:
// {"Abs", runtime::gcpu::OP_TYPEID::Abs},
// {"Acos", runtime::gcpu::OP_TYPEID::Acos},
// ...
#define NGRAPH_OP(a, b) {#a, runtime::gcpu::OP_TYPEID::a},
static unordered_map<string, runtime::gcpu::OP_TYPEID> typeid_map{
#include "ngraph/op/op_tbl.hpp"
};
#undef NGRAPH_OP
auto it = typeid_map.find(m_node->description());
if (it != typeid_map.end())
{
m_typeid = it->second;
}
else
{
throw unsupported_op("Unsupported op '" + m_node->description() + "'");
}
}
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <memory>
#include "ngraph/node.hpp"
namespace ngraph
{
namespace runtime
{
namespace gcpu
{
enum class OP_TYPEID;
class NodeWrapper;
}
}
}
// This expands the op list in op_tbl.hpp into a list of enumerations that look like this:
// Abs,
// Acos,
// ...
#define NGRAPH_OP(a, b) a,
enum class ngraph::runtime::gcpu::OP_TYPEID
{
#include "ngraph/op/op_tbl.hpp"
};
#undef NGRAPH_OP
/// \brief This class allows adding an enum typeid to each Node. This makes dealing with
/// collections of Nodes a little easier and faster as we can use switch() instead of
/// if/else statements
class ngraph::runtime::gcpu::NodeWrapper
{
public:
NodeWrapper(const std::shared_ptr<const ngraph::Node>& node);
const Node& get_node() const { return *m_node; }
ngraph::runtime::gcpu::OP_TYPEID get_typeid() const { return m_typeid; }
private:
std::shared_ptr<const ngraph::Node> m_node;
OP_TYPEID m_typeid;
};
......@@ -34,6 +34,7 @@
#include "ngraph/node.hpp"
#include "ngraph/pass/algebraic_simplification.hpp"
#include "ngraph/pass/any_all_replacement.hpp"
#include "ngraph/pass/get_output_element_elimination.hpp"
#include "ngraph/pass/like_replacement.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp"
......@@ -173,6 +174,7 @@ void runtime::gpu::GPUCompiledFunction::compile()
pass_manager.register_pass<ngraph::pass::LikeReplacement>();
pass_manager.register_pass<runtime::gpu::pass::GPULayout>(this);
pass_manager.register_pass<ngraph::pass::AssignLayout<descriptor::layout::DenseTensorLayout>>();
pass_manager.register_pass<ngraph::pass::GetOutputElementElimination>();
pass_manager.register_pass<ngraph::pass::Liveness>();
pass_manager.register_pass<ngraph::pass::MemoryLayout>(get_memory_alignment());
pass_manager.register_pass<runtime::gpu::pass::TensorMemoryReservation>(
......
......@@ -18,13 +18,10 @@
#include "ngraph/graph_util.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/visualize_tree.hpp"
#include "ngraph/runtime/gpu/gpu_backend.hpp"
#include "ngraph/runtime/gpu/gpu_tensor.hpp"
#include "ngraph/runtime/host_tensor.hpp"
#include "ngraph/runtime/hybrid/hybrid_util.hpp"
#include "ngraph/runtime/hybrid/pass/assign_placement.hpp"
#include "ngraph/runtime/hybrid/pass/fix_get_output_element.hpp"
#include "ngraph/runtime/interpreter/int_backend.hpp"
#include "ngraph/runtime/tensor.hpp"
using namespace ngraph;
......@@ -205,32 +202,6 @@ bool runtime::hybrid::HybridBackend::is_supported(const Node& node) const
return true;
}
string runtime::hybrid::HybridBackend::get_placement_name(const runtime::Tensor* t)
{
string rc;
if (dynamic_cast<const runtime::HostTensor*>(t) != nullptr)
{
rc = "HostTensor";
}
else if (dynamic_cast<const runtime::gpu::GPUTensor*>(t) != nullptr)
{
rc = "GPUTensor";
}
return rc;
}
string runtime::hybrid::HybridBackend::get_placement_name(const runtime::Backend* t)
{
string rc;
if (dynamic_cast<const runtime::interpreter::INTBackend*>(t) != nullptr)
{
rc = "INTBackend";
}
else if (dynamic_cast<const runtime::gpu::GPU_Backend*>(t) != nullptr)
{
rc = "GPU_Backend";
}
return rc;
}
size_t runtime::hybrid::HybridBackend::get_placement(const runtime::Tensor* t)
{
size_t index = 0;
......
......@@ -70,7 +70,5 @@ private:
std::map<std::shared_ptr<ngraph::Function>, FunctionInstance> m_function_map;
std::vector<std::shared_ptr<runtime::Backend>> m_backend_list;
std::string get_placement_name(const runtime::Tensor* t);
std::string get_placement_name(const runtime::Backend* t);
size_t get_placement(const runtime::Tensor* t);
};
......@@ -194,6 +194,8 @@ static void do_eltwise_operation(cldnn::topology& topology,
{
arguments_check(op, 2, 1);
// Leave it here for some time
#if USE_INTELGPU_CUSTOM_KERNELS
if ((get_input_type(op) == element::i32 || get_input_type(op) == element::i64) &&
(mode == cldnn::eltwise_mode::min || mode == cldnn::eltwise_mode::max))
{
......@@ -229,6 +231,12 @@ static void do_eltwise_operation(cldnn::topology& topology,
get_output_name(op), {get_input_name(op, 0), get_input_name(op, 1)}, mode);
topology.add(op_add);
}
#else
const cldnn::eltwise op_eltwise(
get_output_name(op), {get_input_name(op, 0), get_input_name(op, 1)}, mode);
topology.add(op_eltwise);
#endif
}
static void do_unary_operation(cldnn::topology& topology,
......@@ -382,7 +390,14 @@ runtime::intelgpu::IntelGPUBackend::IntelGPUBackend()
m_function_cache_disabled = true;
}
cldnn::engine_configuration cldnn_configuration(profiling);
cldnn::engine_configuration cldnn_configuration(profiling,
false,
m_cldnn_dump_enable,
string(),
string(),
true,
string(),
m_cldnn_dump_dir);
ocl_engine = make_shared<cldnn::engine>(cldnn_configuration);
}
......@@ -411,6 +426,14 @@ runtime::Handle runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function>
set<cldnn::primitive_id> func_output_names;
cldnn::topology topology;
stopwatch timer_compile;
double mem_before_compile = 0.0;
if (m_profile_enable)
{
mem_before_compile = get_max_memory_rss();
timer_compile.start();
}
if (m_dump_graph_enable)
{
......@@ -1800,6 +1823,13 @@ runtime::Handle runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function>
instance.ocl_network =
make_shared<cldnn::network>(*ocl_engine, topology, network_build_options);
if (m_profile_enable)
{
timer_compile.stop();
instance.m_compilation_time = timer_compile.get_milliseconds();
instance.m_consumed_memory = get_max_memory_rss() - mem_before_compile;
}
return func;
}
......@@ -1807,17 +1837,8 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func,
const vector<shared_ptr<runtime::Tensor>>& outputs,
const vector<shared_ptr<runtime::Tensor>>& inputs)
{
double mem_before_call = 0.0f;
double mem_after_compilation = 0.0f;
double mem_after_call = 0.0f;
double mem_call_consumed = 0.0f;
stopwatch timer_call;
stopwatch timer_compile;
if (m_profile_enable)
{
mem_before_call = get_max_memory_rss();
timer_compile.start();
}
FunctionInstance& instance = ocl_networks[func];
if (instance.ocl_network == nullptr)
......@@ -1827,8 +1848,7 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func,
if (m_profile_enable)
{
timer_compile.stop();
mem_after_compilation = get_max_memory_rss();
mem_call_consumed = get_max_memory_rss();
timer_call.start();
}
......@@ -1876,15 +1896,18 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func,
if (m_profile_enable)
{
timer_call.stop();
mem_after_call = get_max_memory_rss();
mem_call_consumed = get_max_memory_rss() - mem_call_consumed;
print_call_performance(network,
func,
timer_compile.get_milliseconds(),
instance.m_compilation_time,
timer_call.get_milliseconds(),
mem_before_call,
mem_after_compilation,
mem_after_call);
instance.m_consumed_memory,
mem_call_consumed,
get_max_memory_rss());
// Output compile time only once
instance.m_compilation_time = 0.0;
}
if (m_function_cache_disabled)
......@@ -1987,11 +2010,11 @@ static Node* get_node_by_name(const shared_ptr<Function> func, const string& nam
void runtime::intelgpu::IntelGPUBackend::print_call_performance(
const shared_ptr<cldnn::network> network,
const shared_ptr<Function> func,
size_t time_compile,
size_t time_call,
double mem_before_call,
double mem_after_compilation,
double mem_after_call) const
double time_compile,
double time_call,
double mem_compilation_consumed,
double mem_call_consumed,
double mem_current) const
{
struct data_item
{
......@@ -2102,10 +2125,10 @@ void runtime::intelgpu::IntelGPUBackend::print_call_performance(
}
// Print time and memory consumed in ::call function
cout << func_name << delim << " Backend compilation(ms)" << delim << time_compile << " call(ms)"
<< delim << time_call << delim << "memory before call(B)" << delim << mem_before_call
<< delim << "after compilation(B)" << delim << mem_after_compilation << delim
<< "after call(B)" << delim << mem_after_call << endl;
cout << func_name << delim << " Backend compilation(ms)" << delim << time_compile << delim
<< "call(ms)" << delim << time_call << delim << "memory consumption compile(B)" << delim
<< mem_compilation_consumed << delim << "call(B)" << delim << mem_call_consumed << delim
<< "RSS(B)" << delim << mem_current << endl;
cout.flags(saved_stream_flags); // Restore stream configuration to leave it in original state
}
......@@ -64,6 +64,8 @@ private:
public:
std::shared_ptr<cldnn::network> ocl_network = nullptr;
bool m_performance_counters_enabled = false;
double m_compilation_time = 0.0;
double m_consumed_memory = 0.0;
};
std::map<std::shared_ptr<Function>, FunctionInstance> ocl_networks;
......@@ -74,11 +76,11 @@ private:
// Statistic related things
void print_call_performance(const std::shared_ptr<cldnn::network> network,
const std::shared_ptr<Function> func,
size_t time_compile,
size_t time_call,
double mem_before_call,
double mem_after_compilation,
double mem_after_call) const;
double time_compile,
double time_call,
double mem_compilation_consumed,
double mem_call_consumed,
double mem_current) const;
bool m_profile_enable = false;
long m_profile_lines_limit_count = 10;
......
......@@ -20,6 +20,7 @@ batch_norm_inference_f64
batch_norm_training_0eps_f64
batch_norm_one_output
batch_norm_three_outputs
batch_norm_bprop_n4c3h2w2
dequantize
dequantize_axes
dequantize_int32
......
......@@ -54,7 +54,6 @@ set(SRC
plaidml_pass_lower_convolutions.cpp
plaidml_pass_replicate_combination.cpp
plaidml_pass_replicate_elision.cpp
plaidml_pass_reshape_elision.cpp
plaidml_pass_winograd.cpp
plaidml_tensor.cpp
plaidml_translate.cpp
......
......@@ -31,14 +31,15 @@ ngraph::runtime::plaidml::PlaidML_Backend::PlaidML_Backend(const char* configura
std::shared_ptr<ngraph::runtime::Tensor> ngraph::runtime::plaidml::PlaidML_Backend::create_tensor(
const ngraph::element::Type& element_type, const ngraph::Shape& shape)
{
return std::make_shared<PlaidML_Tensor>(&m_config, element_type, shape, "direct_data", nullptr);
return std::make_shared<PlaidML_Tensor>(
this, &m_config, element_type, shape, "direct_data", nullptr);
}
std::shared_ptr<ngraph::runtime::Tensor> ngraph::runtime::plaidml::PlaidML_Backend::create_tensor(
const ngraph::element::Type& element_type, const Shape& shape, void* memory_pointer)
{
return std::make_shared<PlaidML_Tensor>(
&m_config, element_type, shape, "direct_data", memory_pointer);
this, &m_config, element_type, shape, "direct_data", memory_pointer);
}
std::shared_ptr<ngraph::Function>
......
......@@ -26,6 +26,7 @@
#include "ngraph/pass/liveness.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/nop_elimination.hpp"
#include "ngraph/pass/prefix_reshape_elimination.hpp"
#include "ngraph/pass/visualize_tree.hpp"
#include "ngraph/pass/zero_dim_tensor_elimination.hpp"
#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
......@@ -36,7 +37,6 @@
#include "ngraph/runtime/plaidml/plaidml_pass_lower_convolutions.hpp"
#include "ngraph/runtime/plaidml/plaidml_pass_replicate_combination.hpp"
#include "ngraph/runtime/plaidml/plaidml_pass_replicate_elision.hpp"
#include "ngraph/runtime/plaidml/plaidml_pass_reshape_elision.hpp"
#include "ngraph/runtime/plaidml/plaidml_pass_winograd.hpp"
namespace
......@@ -101,9 +101,9 @@ std::shared_ptr<ngraph::runtime::plaidml::CompiledFunction>
pass_manager.register_pass<ngraph::runtime::plaidml::pass::ReplicateElision>();
pass_manager.register_pass<ngraph::runtime::plaidml::pass::ReplicateCombination>();
pass_manager.register_pass<ngraph::runtime::plaidml::pass::ImplicitBroadcast>();
pass_manager.register_pass<ngraph::runtime::plaidml::pass::ReshapeElision>();
pass_manager.register_pass<ngraph::pass::PrefixReshapeElimination>();
pass_manager.register_pass<ngraph::runtime::plaidml::pass::LowerConvolutions>();
if (m_config->winograd)
if (pass_manager.get_pass_config().get_pass_enable("Winograd"))
{
pass_manager.register_pass<ngraph::runtime::plaidml::pass::Winograd>();
}
......
......@@ -77,7 +77,6 @@ ngraph::runtime::plaidml::Config
bool help = false;
bool list = false;
bool debug = false;
bool winograd = false;
std::size_t device_idx = 0;
std::string eventlog_config;
std::string graphviz;
......@@ -242,14 +241,6 @@ ngraph::runtime::plaidml::Config
continue;
}
// Check for Winograd. (Winograd is sometimes a performance
// boost, but not always, so we make it optional.)
if (is_opt("winograd"))
{
winograd = true;
continue;
}
// Reject unknown options
err = true;
}
......@@ -257,7 +248,7 @@ ngraph::runtime::plaidml::Config
constexpr char help_text[] =
"PlaidML Backend Specification: \""
"PlaidML[:[device_index][,debug][,help][,list_devices][,"
"eventlog=<filename>][,graphviz=<filename>][,winograd]]\". For example: \"PlaidML\", \""
"eventlog=<filename>][,graphviz=<filename>]]\". For example: \"PlaidML\", \""
"PlaidML:0,list_devices\"";
if (err)
{
......@@ -292,7 +283,5 @@ ngraph::runtime::plaidml::Config
result.graphviz = graphviz;
result.winograd = winograd;
return result;
}
......@@ -39,6 +39,5 @@ struct ngraph::runtime::plaidml::Config
std::shared_ptr<vertexai::ctx> ctx;
std::shared_ptr<vertexai::plaidml::device> dev;
bool debug;
bool winograd;
std::string graphviz;
};
......@@ -166,7 +166,7 @@ namespace ngraph
{
Impl impl;
impl.set_build(build);
impl.set_op(dynamic_cast<const typename Impl::Op*>(op));
impl.set_op(static_cast<const typename Impl::Op*>(op));
impl.Apply();
}
};
......
......@@ -33,7 +33,7 @@ ngraph::runtime::plaidml::pass::ConcatElision::ConcatElision()
});
pattern::graph_rewrite_callback callback = [](pattern::Matcher& m) {
auto concat = std::dynamic_pointer_cast<ngraph::op::Concat>(m.get_match_root());
auto concat = std::static_pointer_cast<ngraph::op::Concat>(m.get_match_root());
auto args = concat->get_arguments();
// Elide one-argument concats.
......
......@@ -45,7 +45,7 @@ ngraph::runtime::plaidml::pass::ImplicitBroadcast::ImplicitBroadcast()
// for the broadcast axes.
auto src = m.get_matched_nodes().at(2);
Shape src_shape = src->get_shape();
auto broadcast = std::dynamic_pointer_cast<op::Broadcast>(m.get_matched_nodes().at(1));
auto broadcast = std::static_pointer_cast<op::Broadcast>(m.get_matched_nodes().at(1));
AxisVector reshape_order;
Shape reshape_shape;
......
......@@ -53,9 +53,7 @@ ngraph::runtime::plaidml::pass::LowerConvolutions::LowerConvolutions()
{
return reshape->get_input_order();
}
AxisVector result(node->get_shape().size());
std::iota(result.begin(), result.end(), 0);
return result;
return get_default_order(node->get_shape());
};
std::shared_ptr<Node> node = m.get_match_root();
......
......@@ -37,8 +37,8 @@ ngraph::runtime::plaidml::pass::ReplicateCombination::ReplicateCombination()
pattern::graph_rewrite_callback callback = [](pattern::Matcher& m) {
auto nodes = m.get_matched_nodes();
auto lower = std::dynamic_pointer_cast<plaidml::op::Replicate>(nodes.at(0));
auto upper = std::dynamic_pointer_cast<plaidml::op::Replicate>(nodes.at(1));
auto lower = std::static_pointer_cast<plaidml::op::Replicate>(nodes.at(0));
auto upper = std::static_pointer_cast<plaidml::op::Replicate>(nodes.at(1));
std::vector<size_t> axes = lower->get_replication_axes();
const std::vector<size_t>& upper_axes = upper->get_replication_axes();
auto uit = upper_axes.begin();
......
......@@ -51,7 +51,7 @@ ngraph::runtime::plaidml::pass::ReplicateElision::ReplicateElision()
for (auto nit = nodes.begin() + 1; nit != nodes.end(); ++nit)
{
auto replicate = std::dynamic_pointer_cast<plaidml::op::Replicate>(*nit);
auto replicate = std::static_pointer_cast<plaidml::op::Replicate>(*nit);
const auto& replicate_axes = replicate->get_replication_axes();
bool elidable = true;
for (std::size_t idx = 0; idx < dim_limit; ++idx)
......
......@@ -112,7 +112,7 @@ ngraph::runtime::plaidml::pass::Winograd::Winograd()
});
pattern::graph_rewrite_callback callback = [](pattern::Matcher& m) {
auto conv = std::dynamic_pointer_cast<plaidml::op::Convolution>(m.get_match_root());
auto conv = std::static_pointer_cast<plaidml::op::Convolution>(m.get_match_root());
NodeVector args = conv->get_arguments();
std::shared_ptr<ngraph::op::Constant> a;
std::shared_ptr<ngraph::op::Constant> b;
......
......@@ -22,12 +22,13 @@
namespace vp = vertexai::plaidml;
ngraph::runtime::plaidml::PlaidML_Tensor::PlaidML_Tensor(Config* config,
ngraph::runtime::plaidml::PlaidML_Tensor::PlaidML_Tensor(Backend* parent,
Config* config,
const ngraph::element::Type& element_type,
const ngraph::Shape& shape,
const std::string& name,
void* memory)
: Tensor{std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, name)}
: Tensor{std::make_shared<ngraph::descriptor::Tensor>(element_type, shape, name), parent}
, m_tensor{config->dev->allocate(
to_plaidml(config->ctx, element_type, shape, ConversionUse::FOR_IO))}
, m_memory{memory}
......
......@@ -35,7 +35,8 @@ namespace ngraph
class ngraph::runtime::plaidml::PlaidML_Tensor final : public ngraph::runtime::Tensor
{
public:
PlaidML_Tensor(Config* config,
PlaidML_Tensor(Backend* parent,
Config* config,
const ngraph::element::Type& element_type,
const ngraph::Shape& shape,
const std::string& name,
......
......@@ -45,6 +45,9 @@ endif()
if (NGRAPH_PLAIDML_ENABLE)
target_link_libraries(nbench plaidml_backend)
endif()
if (NGRAPH_GENERIC_CPU_ENABLE)
target_link_libraries(nbench gcpu_backend)
endif()
if (NGRAPH_DISTRIBUTED_ENABLE)
target_compile_definitions(nbench PRIVATE NGRAPH_DISTRIBUTED)
......
......@@ -15,7 +15,9 @@
//*****************************************************************************
#include <random>
#if defined(__x86_64__) || defined(__amd64__)
#include <xmmintrin.h>
#endif
#include "benchmark.hpp"
#include "ngraph/file_util.hpp"
......@@ -107,53 +109,20 @@ void init_real_tv(shared_ptr<runtime::Tensor> tv, T min, T max)
static void random_init(shared_ptr<runtime::Tensor> tv)
{
element::Type et = tv->get_element_type();
if (et == element::boolean)
{
init_int_tv<char>(tv, 0, 1);
}
else if (et == element::f32)
{
init_real_tv<float>(tv, -1, 1);
}
else if (et == element::f64)
{
init_real_tv<double>(tv, -1, 1);
}
else if (et == element::i8)
{
init_int_tv<int8_t>(tv, -1, 1);
}
else if (et == element::i16)
{
init_int_tv<int16_t>(tv, -1, 1);
}
else if (et == element::i32)
{
init_int_tv<int32_t>(tv, 0, 1);
}
else if (et == element::i64)
{
init_int_tv<int64_t>(tv, -1, 1);
}
else if (et == element::u8)
{
init_int_tv<uint8_t>(tv, 0, 1);
}
else if (et == element::u16)
{
init_int_tv<uint16_t>(tv, 0, 1);
}
else if (et == element::u32)
{
init_int_tv<uint32_t>(tv, 0, 1);
}
else if (et == element::u64)
{
init_int_tv<uint64_t>(tv, 0, 1);
}
else
{
throw runtime_error("unsupported type");
switch (et.get_type_enum())
{
case element::Type_t::boolean: init_int_tv<char>(tv, 0, 1); break;
case element::Type_t::f32: init_real_tv<float>(tv, -1, 1); break;
case element::Type_t::f64: init_real_tv<double>(tv, -1, 1); break;
case element::Type_t::i8: init_int_tv<int8_t>(tv, -1, 1); break;
case element::Type_t::i16: init_int_tv<int16_t>(tv, -1, 1); break;
case element::Type_t::i32: init_int_tv<int32_t>(tv, 0, 1); break;
case element::Type_t::i64: init_int_tv<int64_t>(tv, -1, 1); break;
case element::Type_t::u8: init_int_tv<uint8_t>(tv, 0, 1); break;
case element::Type_t::u16: init_int_tv<uint16_t>(tv, 0, 1); break;
case element::Type_t::u32: init_int_tv<uint32_t>(tv, 0, 1); break;
case element::Type_t::u64: init_int_tv<uint64_t>(tv, 0, 1); break;
default: throw runtime_error("unsupported type");
}
}
......
......@@ -106,6 +106,10 @@ if (NGRAPH_PLAIDML_ENABLE)
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} PlaidML)
endif()
if (NGRAPH_GENERIC_CPU_ENABLE)
set(ACTIVE_BACKEND_LIST ${ACTIVE_BACKEND_LIST} GCPU)
endif()
add_subdirectory(models)
add_subdirectory(files)
add_subdirectory(util)
......
......@@ -1660,7 +1660,7 @@ TEST(onnx, model_argmax_int32)
std::vector<std::int64_t>{1, 1, 1, 1, 1, 1}};
std::vector<std::vector<std::int64_t>> outputs{
execute<std::int32_t, std::int64_t>(function, inputs, "CPU")};
execute<std::int32_t, std::int64_t>(function, inputs, "INTERPRETER")};
EXPECT_TRUE(test::all_close(expected_output.front(), outputs.front()));
}
......@@ -1675,6 +1675,37 @@ TEST(onnx, model_argmin_int32)
std::vector<std::vector<std::int64_t>> expected_output{std::vector<std::int64_t>{0, 0, 0, 0}};
std::vector<std::vector<std::int64_t>> outputs{
execute<std::int32_t, std::int64_t>(function, inputs, "CPU")};
execute<std::int32_t, std::int64_t>(function, inputs, "INTERPRETER")};
EXPECT_TRUE(test::all_close(expected_output.front(), outputs.front()));
}
TEST(onnx, model_is_op_supported)
{
// Simple case
EXPECT_TRUE(onnx_import::is_operator_supported("Sum", 1, "ai.onnx"));
// With fallback
EXPECT_TRUE(onnx_import::is_operator_supported("Sum", 100, "ai.onnx"));
// Different opset versions
EXPECT_TRUE(onnx_import::is_operator_supported("Add", 1, "ai.onnx"));
EXPECT_TRUE(onnx_import::is_operator_supported("Add", 7, "ai.onnx"));
// Default domain name
EXPECT_TRUE(onnx_import::is_operator_supported("Sum", 1));
// Unregistered operator
EXPECT_FALSE(onnx_import::is_operator_supported("DummyOp", 1));
EXPECT_FALSE(onnx_import::is_operator_supported("DummyOp", 1, "ai.onnx"));
EXPECT_FALSE(onnx_import::is_operator_supported("DummyOp", 10, "ai.onnx"));
// Operator with bad domain name
EXPECT_FALSE(onnx_import::is_operator_supported("Sum", 1, "bad.domain"));
// Registered custom operator
onnx_import::register_operator(
"AddQ", 1, "com.intel.ai", [](const onnx_import::Node& node) -> NodeVector {
NodeVector ng_inputs{node.get_ng_inputs()};
return {std::make_shared<ngraph::op::Add>(ng_inputs.at(0), ng_inputs.at(1))};
});
EXPECT_TRUE(onnx_import::is_operator_supported("AddQ", 1, "com.intel.ai"));
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment