Unverified Commit b6197967 authored by Matthew Brookhart's avatar Matthew Brookhart Committed by GitHub

Merge branch 'master' into ayzhuang/in-place-concat

parents ba848e20 5a32dfe4
...@@ -128,6 +128,7 @@ def Notify() { ...@@ -128,6 +128,7 @@ def Notify() {
String notifyPeople = "$ghprbPullAuthorEmail, $ghprbActualCommitAuthorEmail" String notifyPeople = "$ghprbPullAuthorEmail, $ghprbActualCommitAuthorEmail"
Closure notifyMethod = { configMap -> Closure notifyMethod = { configMap ->
if(currentBuild.result == "FAILURE") { if(currentBuild.result == "FAILURE") {
blue_ocean = "https://crackerjack.intel.com/blue/organizations/jenkins/onnx%2Fngraph_onnx_integration_ci/detail/ngraph_onnx_integration_ci/${BUILD_NUMBER}/pipeline"
emailext ( emailext (
subject: "NGraph-Onnx CI: NGraph PR $ghprbPullId $currentBuild.result!", subject: "NGraph-Onnx CI: NGraph PR $ghprbPullId $currentBuild.result!",
body: """ body: """
...@@ -135,11 +136,9 @@ def Notify() { ...@@ -135,11 +136,9 @@ def Notify() {
<tr><td>Status:</td> <td>${currentBuild.result}</td></tr> <tr><td>Status:</td> <td>${currentBuild.result}</td></tr>
<tr><td>Repository</td> <td>$ghprbGhRepository</td></tr> <tr><td>Repository</td> <td>$ghprbGhRepository</td></tr>
<tr><td>Branch:</td> <td>$ghprbSourceBranch</td></tr> <tr><td>Branch:</td> <td>$ghprbSourceBranch</td></tr>
<tr><td>Jenkins Job No:</td> <td>$BUILD_NUMBER</td></tr> <tr><td>Jenkins Build:</td> <td> <a href=$blue_ocean> ${BUILD_NUMBER} </a> </td></tr>
<tr><td>Jenkins Job Link:</td> <td>$BUILD_URL</td></tr> <tr><td>Pull Request:</td> <td><a href=$ghprbPullLink>$ghprbPullId</a> </td></tr>
<tr><td>Pull Request:</td> <td>$ghprbPullId</td></tr>
<tr><td>Commit SHA:</td> <td>$ghprbActualCommit</td></tr> <tr><td>Commit SHA:</td> <td>$ghprbActualCommit</td></tr>
<tr><td>Link:</td> <td>$ghprbPullLink</td></tr>
<tr><td>nGraph-ONNX Branch:</td> <td>${ONNX_BRANCH}</td></tr> <tr><td>nGraph-ONNX Branch:</td> <td>${ONNX_BRANCH}</td></tr>
</table> </table>
""", """,
......
...@@ -65,19 +65,19 @@ function build_ngraph() { ...@@ -65,19 +65,19 @@ function build_ngraph() {
mkdir -p ./build mkdir -p ./build
cd ./build cd ./build
cmake ../ -DNGRAPH_TOOLS_ENABLE=FALSE -DNGRAPH_UNIT_TEST_ENABLE=FALSE -DNGRAPH_USE_PREBUILT_LLVM=TRUE -DNGRAPH_ONNX_IMPORT_ENABLE=TRUE -DCMAKE_INSTALL_PREFIX="${ngraph_directory}/ngraph_dist" || return 1 cmake ../ -DNGRAPH_TOOLS_ENABLE=FALSE -DNGRAPH_UNIT_TEST_ENABLE=FALSE -DNGRAPH_USE_PREBUILT_LLVM=TRUE -DNGRAPH_ONNX_IMPORT_ENABLE=TRUE -DCMAKE_INSTALL_PREFIX="${ngraph_directory}/ngraph_dist" || return 1
rm -f "${ngraph_directory}"/ngraph/python/dist/ngraph*.whl
make -j $(lscpu --parse=CORE | grep -v '#' | sort | uniq | wc -l) || return 1 make -j $(lscpu --parse=CORE | grep -v '#' | sort | uniq | wc -l) || return 1
make install || return 1 make install || return 1
cd "${ngraph_directory}/ngraph/python" cd "${ngraph_directory}/ngraph/python"
if [ ! -d ./pybind11 ]; then if [ ! -d ./pybind11 ]; then
git clone --recursive https://github.com/pybind/pybind11.git git clone --recursive https://github.com/pybind/pybind11.git
fi fi
rm -f "${ngraph_directory}"/ngraph/python/dist/ngraph*.whl
rm -rf "${ngraph_directory}/ngraph/python/*.so ${ngraph_directory}/ngraph/python/build"
export PYBIND_HEADERS_PATH="${ngraph_directory}/ngraph/python/pybind11" export PYBIND_HEADERS_PATH="${ngraph_directory}/ngraph/python/pybind11"
export NGRAPH_CPP_BUILD_PATH="${ngraph_directory}/ngraph_dist" export NGRAPH_CPP_BUILD_PATH="${ngraph_directory}/ngraph_dist"
export NGRAPH_ONNX_IMPORT_ENABLE="TRUE" export NGRAPH_ONNX_IMPORT_ENABLE="TRUE"
python3 setup.py bdist_wheel python3 setup.py bdist_wheel
# Clean build artifacts # Clean build artifacts
rm -rf "${ngraph_directory}/ngraph/python/_pyngraph.cpython* ${ngraph_directory}/ngraph/python/build"
rm -rf "${ngraph_directory}/ngraph_dist" rm -rf "${ngraph_directory}/ngraph_dist"
return 0 return 0
} }
......
.. ops/index.rst .. ops/index.rst
Core Ops About Core Ops
======== ==============
An ``Op``'s primary role is to function as a node in a directed acyclic graph An ``Op``'s primary role is to function as a node in a directed acyclic graph
dependency computation graph. dependency computation graph.
...@@ -40,7 +40,7 @@ that must be performed are: ...@@ -40,7 +40,7 @@ that must be performed are:
Alphabetical list of Core ``ops`` Alphabetical list of Core ``ops``
---------------------------------- =================================
Not currently a comprehensive list. Not currently a comprehensive list.
...@@ -163,5 +163,3 @@ Not currently a comprehensive list. ...@@ -163,5 +163,3 @@ Not currently a comprehensive list.
sqrt.rst sqrt.rst
tan.rst tan.rst
tanh.rst tanh.rst
...@@ -155,6 +155,7 @@ set (SRC ...@@ -155,6 +155,7 @@ set (SRC
strides.cpp strides.cpp
type/element_type.cpp type/element_type.cpp
util.cpp util.cpp
validation_util.cpp
graph_util.cpp graph_util.cpp
placement.cpp placement.cpp
cpio.cpp cpio.cpp
......
...@@ -59,6 +59,10 @@ add_library(onnx_import STATIC ...@@ -59,6 +59,10 @@ add_library(onnx_import STATIC
op/floor.hpp op/floor.hpp
op/gemm.cpp op/gemm.cpp
op/gemm.hpp op/gemm.hpp
op/global_average_pool.cpp
op/global_average_pool.hpp
op/global_max_pool.cpp
op/global_max_pool.hpp
op/greater.hpp op/greater.hpp
op/hard_sigmoid.cpp op/hard_sigmoid.cpp
op/hard_sigmoid.hpp op/hard_sigmoid.hpp
......
...@@ -180,7 +180,7 @@ namespace ngraph ...@@ -180,7 +180,7 @@ namespace ngraph
} }
template <> template <>
inline const std::string& get_value(const onnx::AttributeProto& attribute) inline std::string get_value(const onnx::AttributeProto& attribute)
{ {
if (unlikely(attribute.type() != onnx::AttributeProto_AttributeType_STRING)) if (unlikely(attribute.type() != onnx::AttributeProto_AttributeType_STRING))
{ {
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/node.hpp"
#include "ngraph/node_vector.hpp"
#include "ngraph/op/avg_pool.hpp"
#include "utils/convpool.hpp"
namespace ngraph
{
namespace onnx_import
{
namespace op
{
namespace set_1
{
NodeVector global_average_pool(const Node& node)
{
return convpool::make_ng_pool<ngraph::op::AvgPool>(node);
}
} // namespace set_1
} // namespace op
} // namespace onnx_import
} // namespace ngraph
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/node_vector.hpp"
#include "core/node.hpp"
namespace ngraph
{
namespace onnx_import
{
namespace op
{
namespace set_1
{
/// \brief Convert ONNX GlobalAveragePool operation to an nGraph node.
///
/// \param node The ONNX node object representing this operation.
///
/// \return The vector containing Ngraph nodes producing output of ONNX GlobalAveragePool
/// operation.
NodeVector global_average_pool(const Node& node);
} // namespace set_1
} // namespace op
} // namespace onnx_import
} // namespace ngraph
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/node.hpp"
#include "ngraph/node_vector.hpp"
#include "ngraph/op/max_pool.hpp"
#include "utils/convpool.hpp"
namespace ngraph
{
namespace onnx_import
{
namespace op
{
namespace set_1
{
NodeVector global_max_pool(const Node& node)
{
return convpool::make_ng_pool<ngraph::op::MaxPool>(node);
}
} // namespace set_1
} // namespace op
} // namespace onnx_import
} // namespace ngraph
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/node_vector.hpp"
#include "core/node.hpp"
namespace ngraph
{
namespace onnx_import
{
namespace op
{
namespace set_1
{
/// \brief Convert ONNX GlobalMaxPool operation to an nGraph node.
///
/// \param node The ONNX node object representing this operation.
///
/// \return The vector containing Ngraph nodes producing output of ONNX GlobalMaxPool
/// operation.
NodeVector global_max_pool(const Node& node);
} // namespace set_1
} // namespace op
} // namespace onnx_import
} // namespace ngraph
...@@ -37,6 +37,8 @@ ...@@ -37,6 +37,8 @@
#include "op/flatten.hpp" #include "op/flatten.hpp"
#include "op/floor.hpp" #include "op/floor.hpp"
#include "op/gemm.hpp" #include "op/gemm.hpp"
#include "op/global_average_pool.hpp"
#include "op/global_max_pool.hpp"
#include "op/greater.hpp" #include "op/greater.hpp"
#include "op/hard_sigmoid.hpp" #include "op/hard_sigmoid.hpp"
#include "op/identity.hpp" #include "op/identity.hpp"
...@@ -148,6 +150,8 @@ namespace ngraph ...@@ -148,6 +150,8 @@ namespace ngraph
REGISTER_OPERATOR("Flatten", 1, flatten); REGISTER_OPERATOR("Flatten", 1, flatten);
REGISTER_OPERATOR("Floor", 1, floor); REGISTER_OPERATOR("Floor", 1, floor);
REGISTER_OPERATOR("Gemm", 1, gemm); REGISTER_OPERATOR("Gemm", 1, gemm);
REGISTER_OPERATOR("GlobalAveragePool", 1, global_average_pool);
REGISTER_OPERATOR("GlobalMaxPool", 1, global_max_pool);
REGISTER_OPERATOR("Greater", 1, greater); REGISTER_OPERATOR("Greater", 1, greater);
REGISTER_OPERATOR("HardSigmoid", 1, hard_sigmoid); REGISTER_OPERATOR("HardSigmoid", 1, hard_sigmoid);
REGISTER_OPERATOR("Identity", 1, identity); REGISTER_OPERATOR("Identity", 1, identity);
......
...@@ -31,7 +31,9 @@ namespace ngraph ...@@ -31,7 +31,9 @@ namespace ngraph
{ {
Shape get_kernel_shape(const Node& node) Shape get_kernel_shape(const Node& node)
{ {
return node.get_attribute_value<std::vector<std::size_t>>("kernel_shape", {1, 1}); std::size_t input_spacial_dims = node.get_ng_inputs()[0]->get_shape().size() - 2;
return node.get_attribute_value<std::vector<std::size_t>>(
"kernel_shape", std::vector<std::size_t>(input_spacial_dims, 1UL));
} }
namespace detail namespace detail
...@@ -121,7 +123,7 @@ namespace ngraph ...@@ -121,7 +123,7 @@ namespace ngraph
pads = CoordinateDiff(static_cast<std::ptrdiff_t>(kernel_shape.size()), 0UL); pads = CoordinateDiff(static_cast<std::ptrdiff_t>(kernel_shape.size()), 0UL);
} }
if (pads.size() <= 3) if (pads.size() != kernel_shape.size() * 2)
{ {
// Paddings specified in (H, W, C) format. // Paddings specified in (H, W, C) format.
return {pads, pads}; return {pads, pads};
......
...@@ -16,7 +16,10 @@ ...@@ -16,7 +16,10 @@
#pragma once #pragma once
#include <string>
#include "ngraph/coordinate_diff.hpp" #include "ngraph/coordinate_diff.hpp"
#include "ngraph/op/avg_pool.hpp"
#include "ngraph/shape.hpp" #include "ngraph/shape.hpp"
#include "core/attribute.hpp" #include "core/attribute.hpp"
...@@ -84,13 +87,11 @@ namespace ngraph ...@@ -84,13 +87,11 @@ namespace ngraph
return get_pads(node, get_kernel_shape(node)); return get_pads(node, get_kernel_shape(node));
} }
/** /// \brief Create an nGraph pooling operation based on an ONNX pooling op.
* @brief Create an nGraph pooling operation based on an ONNX pooling op. ///
* /// \param T Class of an nGraph pooling operation (e.g. AveragePool, MaxPool)
* @tparam T Class of an nGraph pooling operation (e.g. AveragePool, MaxPool) /// \param node incoming ONNX opearation
* @param node incoming ONNX opearation /// \return nGraph node equivalent of the ONNX operation
* @return nGraph node equivalent of the ONNX operation
*/
template <class T> template <class T>
inline NodeVector make_ng_pool(const Node& node) inline NodeVector make_ng_pool(const Node& node)
{ {
...@@ -98,19 +99,44 @@ namespace ngraph ...@@ -98,19 +99,44 @@ namespace ngraph
auto data = node.get_ng_inputs().at(0); auto data = node.get_ng_inputs().at(0);
// Parse ONNX op attributes // Parse ONNX op attributes
Shape kernel_shape = convpool::get_kernel_shape(node); Shape kernel_shape;
if (node.op_type().find("Global") != std::string::npos)
{
kernel_shape = node.get_ng_inputs()[0]->get_shape();
// Remove N and C dimensions and leave only spatial dims.
kernel_shape.erase(std::begin(kernel_shape),
std::next(std::begin(kernel_shape), 2));
}
else
{
kernel_shape = convpool::get_kernel_shape(node);
}
auto strides = convpool::get_strides(node); auto strides = convpool::get_strides(node);
auto dilations = convpool::get_dilations(node); auto dilations = convpool::get_dilations(node);
auto paddings = convpool::get_pads(node); auto paddings = convpool::get_pads(node);
bool count_include_pad = node.get_attribute_value<int64_t>("count_include_pad", 0);
// Convert padding from CoordinateDiff to Shape objects // Convert padding from CoordinateDiff to Shape objects
const CoordinateDiff& padding_above{paddings.first}; const CoordinateDiff& padding_above{paddings.first};
const CoordinateDiff& padding_below{paddings.second}; const CoordinateDiff& padding_below{paddings.second};
Shape padding_below_shape{std::begin(padding_below), std::end(padding_below)}; Shape padding_below_shape{std::begin(padding_below), std::end(padding_below)};
Shape padding_above_shape{std::begin(padding_above), std::end(padding_above)}; Shape padding_above_shape{std::begin(padding_above), std::end(padding_above)};
return {std::make_shared<T>( if (count_include_pad)
data, kernel_shape, strides, padding_below_shape, padding_above_shape)}; {
return {std::make_shared<ngraph::op::AvgPool>(data,
kernel_shape,
strides,
padding_below_shape,
padding_above_shape,
count_include_pad)};
}
else
{
return {std::make_shared<T>(
data, kernel_shape, strides, padding_below_shape, padding_above_shape)};
}
} }
} // namespace convpool } // namespace convpool
......
This diff is collapsed.
This diff is collapsed.
...@@ -356,6 +356,9 @@ namespace ngraph ...@@ -356,6 +356,9 @@ namespace ngraph
namespace util namespace util
{ {
// This is a legacy function, retained because the CPU backend uses it for now.
// TODO: Update CPU backend to use the new stuff in validation_util.hpp, and remove
// this function.
Shape infer_convolution_output_shape(const Node* node, Shape infer_convolution_output_shape(const Node* node,
const Shape& data_batch_shape, const Shape& data_batch_shape,
const Shape& filters_shape, const Shape& filters_shape,
......
This diff is collapsed.
...@@ -54,7 +54,7 @@ public: ...@@ -54,7 +54,7 @@ public:
/// \brief Create a tensor specific to this backend /// \brief Create a tensor specific to this backend
/// \param element_type The type of the tensor element /// \param element_type The type of the tensor element
/// \param shape The shape of the tensor /// \param shape The shape of the tensor
/// \returns shared_ptr to a new backend specific tensor /// \returns shared_ptr to a new backend-specific tensor
virtual std::shared_ptr<ngraph::runtime::Tensor> virtual std::shared_ptr<ngraph::runtime::Tensor>
create_tensor(const ngraph::element::Type& element_type, const Shape& shape) = 0; create_tensor(const ngraph::element::Type& element_type, const Shape& shape) = 0;
...@@ -64,7 +64,7 @@ public: ...@@ -64,7 +64,7 @@ public:
/// \param memory_pointer A pointer to a buffer used for this tensor. The size of the buffer /// \param memory_pointer A pointer to a buffer used for this tensor. The size of the buffer
/// must be sufficient to contain the tensor. The lifetime of the buffer is the /// must be sufficient to contain the tensor. The lifetime of the buffer is the
/// responsibility of the caller. /// responsibility of the caller.
/// \returns shared_ptr to a new backend specific tensor /// \returns shared_ptr to a new backend-specific tensor
virtual std::shared_ptr<ngraph::runtime::Tensor> create_tensor( virtual std::shared_ptr<ngraph::runtime::Tensor> create_tensor(
const ngraph::element::Type& element_type, const Shape& shape, void* memory_pointer) = 0; const ngraph::element::Type& element_type, const Shape& shape, void* memory_pointer) = 0;
...@@ -107,8 +107,8 @@ public: ...@@ -107,8 +107,8 @@ public:
/// \param func The function to execute /// \param func The function to execute
virtual void remove_compiled_function(std::shared_ptr<Function> func); virtual void remove_compiled_function(std::shared_ptr<Function> func);
/// \brief Enable the collection of per op performance information on a specified Function. /// \brief Enable the collection of per-op performance information on a specified Function.
/// Data is collection via the `get_performance_data` method. /// Data collection is via the `get_performance_data` method.
/// \param func The function to collect perfomance data on. /// \param func The function to collect perfomance data on.
/// \param enable Set to true to enable or false to disable data collection /// \param enable Set to true to enable or false to disable data collection
virtual void enable_performance_data(std::shared_ptr<Function> func, bool enable) {} virtual void enable_performance_data(std::shared_ptr<Function> func, bool enable) {}
......
...@@ -26,7 +26,7 @@ constexpr const uint32_t initial_buffer_size = 10 * 1024 * 1024; ...@@ -26,7 +26,7 @@ constexpr const uint32_t initial_buffer_size = 10 * 1024 * 1024;
runtime::gpu::GPUMemoryManager::GPUMemoryManager(GPUPrimitiveEmitter* emitter) runtime::gpu::GPUMemoryManager::GPUMemoryManager(GPUPrimitiveEmitter* emitter)
: m_buffer_offset(0) : m_buffer_offset(0)
, m_buffered_mem(initial_buffer_size) , m_buffered_mem(initial_buffer_size, 0)
, m_workspace_manager(new pass::MemoryManager(runtime::gpu::GPUMemoryManager::alignment)) , m_workspace_manager(new pass::MemoryManager(runtime::gpu::GPUMemoryManager::alignment))
, m_argspace_mem(1, {nullptr, 0}) , m_argspace_mem(1, {nullptr, 0})
, m_workspace_mem(1, {nullptr, 0}) , m_workspace_mem(1, {nullptr, 0})
...@@ -80,6 +80,8 @@ void runtime::gpu::GPUMemoryManager::allocate() ...@@ -80,6 +80,8 @@ void runtime::gpu::GPUMemoryManager::allocate()
m_argspace_mem.back().ptr, m_buffered_mem.data(), m_buffer_offset); m_argspace_mem.back().ptr, m_buffered_mem.data(), m_buffer_offset);
// add an empty node to the end of the list and zero offset // add an empty node to the end of the list and zero offset
m_argspace_mem.push_back({nullptr, 0}); m_argspace_mem.push_back({nullptr, 0});
m_buffered_mem.clear();
m_buffered_mem.resize(initial_buffer_size, 0);
m_buffer_offset = 0; m_buffer_offset = 0;
} }
...@@ -97,7 +99,9 @@ void runtime::gpu::GPUMemoryManager::allocate() ...@@ -97,7 +99,9 @@ void runtime::gpu::GPUMemoryManager::allocate()
size_t runtime::gpu::GPUMemoryManager::queue_for_transfer(const void* data, size_t size) size_t runtime::gpu::GPUMemoryManager::queue_for_transfer(const void* data, size_t size)
{ {
// if the current allocation will overflow the host buffer // if the current allocation will overflow the host buffer
size_t new_size = m_buffer_offset + size; size_t aligned_size =
ngraph::pass::MemoryManager::align(size, runtime::gpu::GPUMemoryManager::alignment);
size_t new_size = m_buffer_offset + aligned_size;
size_t buffer_size = m_buffered_mem.size(); size_t buffer_size = m_buffered_mem.size();
bool need_resize = false; bool need_resize = false;
while (buffer_size < new_size) while (buffer_size < new_size)
...@@ -109,12 +113,12 @@ size_t runtime::gpu::GPUMemoryManager::queue_for_transfer(const void* data, size ...@@ -109,12 +113,12 @@ size_t runtime::gpu::GPUMemoryManager::queue_for_transfer(const void* data, size
if (need_resize) if (need_resize)
{ {
m_buffered_mem.resize(buffer_size); m_buffered_mem.resize(buffer_size, 0);
} }
size_t offset = m_buffer_offset; size_t offset = m_buffer_offset;
std::memcpy(m_buffered_mem.data() + offset, data, size); std::memcpy(m_buffered_mem.data() + offset, data, size);
m_buffer_offset += size; m_buffer_offset += aligned_size;
return offset; return offset;
} }
...@@ -133,7 +137,6 @@ runtime::gpu::GPUAllocator::GPUAllocator(const GPUAllocator& g) ...@@ -133,7 +137,6 @@ runtime::gpu::GPUAllocator::GPUAllocator(const GPUAllocator& g)
size_t runtime::gpu::GPUAllocator::reserve_argspace(const void* data, size_t size) size_t runtime::gpu::GPUAllocator::reserve_argspace(const void* data, size_t size)
{ {
// add parameter data to host buffer that will be transfered to device // add parameter data to host buffer that will be transfered to device
size = ngraph::pass::MemoryManager::align(size, runtime::gpu::GPUMemoryManager::alignment);
size_t offset = m_manager->queue_for_transfer(data, size); size_t offset = m_manager->queue_for_transfer(data, size);
auto local = std::prev(m_manager->m_argspace_mem.end()); auto local = std::prev(m_manager->m_argspace_mem.end());
// return a lambda that will yield the gpu memory address. this // return a lambda that will yield the gpu memory address. this
......
...@@ -1565,14 +1565,13 @@ void runtime::intelgpu::do_arg_max_min_operation(cldnn::topology& topology, ...@@ -1565,14 +1565,13 @@ void runtime::intelgpu::do_arg_max_min_operation(cldnn::topology& topology,
{ {
gws = generate_loops(writer, output_shape, true); gws = generate_loops(writer, output_shape, true);
writer << get_opencl_type_name(output_type) << " " << var_name << " = " << infinity writer << get_opencl_type_name(input_type) << " " << var_name << " = " << infinity << ";\n";
<< ";\n"; writer << get_opencl_type_name(output_type) << " index = 0;\n";
writer << "uint index = -1;\n";
writer << "for (uint i = 0; i < " << input_shape.at(reduction_axis) << "; ++i)\n"; writer << "for (uint i = 0; i < " << input_shape.at(reduction_axis) << "; ++i)\n";
writer.block_begin(); writer.block_begin();
{ {
writer << "if(i == 0 || input0" << dims_buffer << operation_sign << var_name << ")\n"; writer << "if (input0" << dims_buffer << operation_sign << var_name << ")\n";
writer.block_begin(); writer.block_begin();
{ {
writer << var_name << " = input0" << dims_buffer << ";\n"; writer << var_name << " = input0" << dims_buffer << ";\n";
......
...@@ -7,6 +7,7 @@ backwards_dot_scalar_tensor ...@@ -7,6 +7,7 @@ backwards_dot_scalar_tensor
backwards_dot_tensor3_tensor3 backwards_dot_tensor3_tensor3
backwards_dot_tensor_scalar backwards_dot_tensor_scalar
backwards_dot_tensor_vector backwards_dot_tensor_vector
backwards_exp
backwards_maxpool_n2_c1_hw5_3x3_str2_max backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_maxpool_n4_c1_hw4_2x2_max backwards_maxpool_n4_c1_hw4_2x2_max
backwards_replace_slice backwards_replace_slice
...@@ -20,6 +21,9 @@ dequantize ...@@ -20,6 +21,9 @@ dequantize
dequantize_axes dequantize_axes
dequantize_int8 dequantize_int8
divide_by_zero_int32 divide_by_zero_int32
dot_3d_multi_axis
dot_4d_5d_multi_axis
dot_4d_5d_multi_axis_more
function_call function_call
max_pool_3d max_pool_3d
numeric_double_inf numeric_double_inf
......
...@@ -78,12 +78,12 @@ namespace ngraph ...@@ -78,12 +78,12 @@ namespace ngraph
/// \param layout Layout to set /// \param layout Layout to set
void set_tensor_layout(const std::shared_ptr<descriptor::layout::TensorLayout>& layout); void set_tensor_layout(const std::shared_ptr<descriptor::layout::TensorLayout>& layout);
/// \brief Get the stale value of the tensor. A tensor is stale if it's data is /// \brief Get the stale value of the tensor. A tensor is stale if its data is
/// changed. /// changed.
/// \return true if there is new data in this tensor /// \return true if there is new data in this tensor
bool get_stale() const; bool get_stale() const;
/// \brief Set the stale value of the tensor. A tensor is stale if it's data is /// \brief Set the stale value of the tensor. A tensor is stale if its data is
/// changed. /// changed.
void set_stale(bool val); void set_stale(bool val);
......
This diff is collapsed.
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <tuple>
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/op/op.hpp"
namespace ngraph
{
Shape infer_windowed_reduction_output_shape(const Node* node,
const Shape& data_shape,
const Strides& data_dilation,
const CoordinateDiff& data_padding_below,
const CoordinateDiff& data_padding_above,
const Shape& window_shape,
const Strides& window_strides,
const Strides& window_dilation,
bool is_window_all_in_padding_allowed);
std::tuple<element::Type, Shape>
infer_convolution_forward(const Node* node,
element::Type et_batch,
element::Type et_filters,
const Shape& data_batch_shape,
const Strides& data_dilation,
const CoordinateDiff& data_padding_below,
const CoordinateDiff& data_padding_above,
const Shape& filters_shape,
const Strides& filter_strides,
const Strides& filter_dilation);
Shape infer_batched_pooling_forward(const Node* node,
const Shape& data_batch_shape,
const CoordinateDiff& data_padding_below,
const CoordinateDiff& data_padding_above,
const Shape& window_shape,
const Strides& window_strides,
bool is_window_all_in_padding_allowed);
}
...@@ -83,6 +83,30 @@ TEST(gpu_test, memory_manager_extract_arguments) ...@@ -83,6 +83,30 @@ TEST(gpu_test, memory_manager_extract_arguments)
EXPECT_EQ(host, fp32_args); EXPECT_EQ(host, fp32_args);
} }
// This test is add to catch a potential bug in allocator
// previously allocator will copy extra data
// for exampele: alignment = 8 bytes, you reserve 4 bytes space
// previously allocator will copy 8 bytes data from input_args, this will lead to two potential bug:
// 1. copy extrea data intead of initial alignment data to 0.
// 2. out of boundary access for input_args which lead to undefined behavior
TEST(gpu_test, memory_manager_argspace_alignment)
{
size_t alignment = 8;
std::vector<char> input_args = {0, 1, 2, 3, 4, 5, 6, 7};
std::vector<char> ref_args = {0, 1, 2, 3, 0, 0, 0, 0};
std::vector<char> result_args(alignment, 0);
size_t idx;
runtime::gpu::GPUPrimitiveEmitter emitter;
{
auto allocator = emitter.get_memory_allocator();
idx = allocator.reserve_argspace(input_args.data(), 4 * sizeof(char));
}
emitter.allocate_primitive_memory();
runtime::gpu::memory_primitive& mem_primitive = emitter.get_memory_primitives()[idx];
runtime::gpu::cuda_memcpyDtH(result_args.data(), mem_primitive(), alignment * sizeof(char));
EXPECT_EQ(result_args, ref_args);
}
TEST(gpu_test, memory_manager_argspace_size) TEST(gpu_test, memory_manager_argspace_size)
{ {
std::vector<float> fp32_args = {2112.0f, 2112.0f}; std::vector<float> fp32_args = {2112.0f, 2112.0f};
......
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment