Merge branch 'master' into ayzhuang/in-place-concat

b6197967 · Matthew Brookhart · GitHub · ba848e20 · 5a32dfe4 · b6197967
Unverified Commit b6197967 authored Oct 16, 2018 by Matthew Brookhart Committed by GitHub Oct 16, 2018
26 changed files
--- a/.ci/onnx/jenkins/Jenkinsfile
+++ b/.ci/onnx/jenkins/Jenkinsfile
@@ -128,6 +128,7 @@ def Notify() {
    String notifyPeople = "$ghprbPullAuthorEmail, $ghprbActualCommitAuthorEmail"
    Closure notifyMethod = { configMap ->
        if(currentBuild.result == "FAILURE") {
+            blue_ocean = "https://crackerjack.intel.com/blue/organizations/jenkins/onnx%2Fngraph_onnx_integration_ci/detail/ngraph_onnx_integration_ci/${BUILD_NUMBER}/pipeline"
            emailext (
                subject: "NGraph-Onnx CI: NGraph PR $ghprbPullId $currentBuild.result!",
                body: """
@@ -135,11 +136,9 @@ def Notify() {
                        <tr><td>Status:</td> <td>${currentBuild.result}</td></tr>
                        <tr><td>Repository</td> <td>$ghprbGhRepository</td></tr>
                        <tr><td>Branch:</td> <td>$ghprbSourceBranch</td></tr>
-                        <tr><td>Jenkins Job No:</td> <td>$BUILD_NUMBER</td></tr>
+                        <tr><td>Jenkins Build:</td> <td> <a href=$blue_ocean> ${BUILD_NUMBER} </a> </td></tr>
-                        <tr><td>Jenkins Job Link:</td> <td>$BUILD_URL</td></tr>
+                        <tr><td>Pull Request:</td> <td><a href=$ghprbPullLink>$ghprbPullId</a> </td></tr>
-                        <tr><td>Pull Request:</td> <td>$ghprbPullId</td></tr>
                        <tr><td>Commit SHA:</td> <td>$ghprbActualCommit</td></tr>
-                        <tr><td>Link:</td> <td>$ghprbPullLink</td></tr>
                        <tr><td>nGraph-ONNX Branch:</td> <td>${ONNX_BRANCH}</td></tr>
                    </table>
                """,

--- a/.ci/onnx/jenkins/prepare_environment.sh
+++ b/.ci/onnx/jenkins/prepare_environment.sh
@@ -65,19 +65,19 @@ function build_ngraph() {
    mkdir -p ./build
    cd ./build
    cmake ../ -DNGRAPH_TOOLS_ENABLE=FALSE -DNGRAPH_UNIT_TEST_ENABLE=FALSE -DNGRAPH_USE_PREBUILT_LLVM=TRUE -DNGRAPH_ONNX_IMPORT_ENABLE=TRUE -DCMAKE_INSTALL_PREFIX="${ngraph_directory}/ngraph_dist" || return 1
-    rm -f "${ngraph_directory}"/ngraph/python/dist/ngraph*.whl
    make -j $(lscpu --parse=CORE | grep -v '#' | sort | uniq | wc -l) || return 1
    make install || return 1
    cd "${ngraph_directory}/ngraph/python"
    if [ ! -d ./pybind11 ]; then
        git clone --recursive https://github.com/pybind/pybind11.git
    fi
+    rm -f "${ngraph_directory}"/ngraph/python/dist/ngraph*.whl
+    rm -rf "${ngraph_directory}/ngraph/python/*.so ${ngraph_directory}/ngraph/python/build"
    export PYBIND_HEADERS_PATH="${ngraph_directory}/ngraph/python/pybind11"
    export NGRAPH_CPP_BUILD_PATH="${ngraph_directory}/ngraph_dist"
    export NGRAPH_ONNX_IMPORT_ENABLE="TRUE"
    python3 setup.py bdist_wheel
    # Clean build artifacts
-    rm -rf "${ngraph_directory}/ngraph/python/_pyngraph.cpython* ${ngraph_directory}/ngraph/python/build"
    rm -rf "${ngraph_directory}/ngraph_dist"
    return 0
 }

--- a/doc/sphinx/source/ops/index.rst
+++ b/doc/sphinx/source/ops/index.rst
 .. ops/index.rst
-Core Ops
+About Core Ops
-========
+==============
 An ``Op``'s primary role is to function as a node in a directed acyclic graph 
 dependency computation graph.  
@@ -40,7 +40,7 @@ that must be performed are:
 Alphabetical list of Core ``ops``
----------------------------------
+=================================
 Not currently a comprehensive list.  
@@ -163,5 +163,3 @@ Not currently a comprehensive list.
   sqrt.rst
   tan.rst
   tanh.rst
--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -155,6 +155,7 @@ set (SRC
    strides.cpp
    type/element_type.cpp
    util.cpp
+    validation_util.cpp
    graph_util.cpp
    placement.cpp
    cpio.cpp

--- a/src/ngraph/frontend/onnx_import/CMakeLists.txt
+++ b/src/ngraph/frontend/onnx_import/CMakeLists.txt
@@ -59,6 +59,10 @@ add_library(onnx_import STATIC
        op/floor.hpp
        op/gemm.cpp
        op/gemm.hpp
+        op/global_average_pool.cpp
+        op/global_average_pool.hpp
+        op/global_max_pool.cpp
+        op/global_max_pool.hpp
        op/greater.hpp
        op/hard_sigmoid.cpp
        op/hard_sigmoid.hpp

--- a/src/ngraph/frontend/onnx_import/core/attribute.hpp
+++ b/src/ngraph/frontend/onnx_import/core/attribute.hpp
@@ -180,7 +180,7 @@ namespace ngraph
                }
                template <>
-                inline const std::string& get_value(const onnx::AttributeProto& attribute)
+                inline std::string get_value(const onnx::AttributeProto& attribute)
                {
                    if (unlikely(attribute.type() != onnx::AttributeProto_AttributeType_STRING))
                    {

--- a/src/ngraph/frontend/onnx_import/op/global_average_pool.cpp
+++ b/src/ngraph/frontend/onnx_import/op/global_average_pool.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "ngraph/node.hpp"
+#include "ngraph/node_vector.hpp"
+#include "ngraph/op/avg_pool.hpp"
+#include "utils/convpool.hpp"
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                NodeVector global_average_pool(const Node& node)
+                {
+                    return convpool::make_ng_pool<ngraph::op::AvgPool>(node);
+                }
+            } // namespace set_1
+        } // namespace op
+    } // namespace onnx_import
+} // namespace ngraph
--- a/src/ngraph/frontend/onnx_import/op/global_average_pool.hpp
+++ b/src/ngraph/frontend/onnx_import/op/global_average_pool.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+#include "ngraph/node_vector.hpp"
+#include "core/node.hpp"
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                /// \brief Convert ONNX GlobalAveragePool operation to an nGraph node.
+                ///
+                /// \param node   The ONNX node object representing this operation.
+                ///
+                /// \return The vector containing Ngraph nodes producing output of ONNX GlobalAveragePool
+                ///         operation.
+                NodeVector global_average_pool(const Node& node);
+            } // namespace set_1
+        } // namespace op
+    } // namespace onnx_import
+} // namespace ngraph
--- a/src/ngraph/frontend/onnx_import/op/global_max_pool.cpp
+++ b/src/ngraph/frontend/onnx_import/op/global_max_pool.cpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "ngraph/node.hpp"
+#include "ngraph/node_vector.hpp"
+#include "ngraph/op/max_pool.hpp"
+#include "utils/convpool.hpp"
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                NodeVector global_max_pool(const Node& node)
+                {
+                    return convpool::make_ng_pool<ngraph::op::MaxPool>(node);
+                }
+            } // namespace set_1
+        } // namespace op
+    } // namespace onnx_import
+} // namespace ngraph
--- a/src/ngraph/frontend/onnx_import/op/global_max_pool.hpp
+++ b/src/ngraph/frontend/onnx_import/op/global_max_pool.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+#include "ngraph/node_vector.hpp"
+#include "core/node.hpp"
+namespace ngraph
+{
+    namespace onnx_import
+    {
+        namespace op
+        {
+            namespace set_1
+            {
+                /// \brief Convert ONNX GlobalMaxPool operation to an nGraph node.
+                ///
+                /// \param node   The ONNX node object representing this operation.
+                ///
+                /// \return The vector containing Ngraph nodes producing output of ONNX GlobalMaxPool
+                ///         operation.
+                NodeVector global_max_pool(const Node& node);
+            } // namespace set_1
+        } // namespace op
+    } // namespace onnx_import
+} // namespace ngraph
--- a/src/ngraph/frontend/onnx_import/ops_bridge.cpp
+++ b/src/ngraph/frontend/onnx_import/ops_bridge.cpp
@@ -37,6 +37,8 @@
 #include "op/flatten.hpp"
 #include "op/floor.hpp"
 #include "op/gemm.hpp"
+#include "op/global_average_pool.hpp"
+#include "op/global_max_pool.hpp"
 #include "op/greater.hpp"
 #include "op/hard_sigmoid.hpp"
 #include "op/identity.hpp"
@@ -148,6 +150,8 @@ namespace ngraph
                    REGISTER_OPERATOR("Flatten", 1, flatten);
                    REGISTER_OPERATOR("Floor", 1, floor);
                    REGISTER_OPERATOR("Gemm", 1, gemm);
+                    REGISTER_OPERATOR("GlobalAveragePool", 1, global_average_pool);
+                    REGISTER_OPERATOR("GlobalMaxPool", 1, global_max_pool);
                    REGISTER_OPERATOR("Greater", 1, greater);
                    REGISTER_OPERATOR("HardSigmoid", 1, hard_sigmoid);
                    REGISTER_OPERATOR("Identity", 1, identity);

--- a/src/ngraph/frontend/onnx_import/utils/convpool.cpp
+++ b/src/ngraph/frontend/onnx_import/utils/convpool.cpp
@@ -31,7 +31,9 @@ namespace ngraph
        {
            Shape get_kernel_shape(const Node& node)
            {
-                return node.get_attribute_value<std::vector<std::size_t>>("kernel_shape", {1, 1});
+                std::size_t input_spacial_dims = node.get_ng_inputs()[0]->get_shape().size() - 2;
+                return node.get_attribute_value<std::vector<std::size_t>>(
+                    "kernel_shape", std::vector<std::size_t>(input_spacial_dims, 1UL));
            }
            namespace detail
@@ -121,7 +123,7 @@ namespace ngraph
                    pads = CoordinateDiff(static_cast<std::ptrdiff_t>(kernel_shape.size()), 0UL);
                }
-                if (pads.size() <= 3)
+                if (pads.size() != kernel_shape.size() * 2)
                {
                    // Paddings specified in (H, W, C) format.
                    return {pads, pads};

--- a/src/ngraph/frontend/onnx_import/utils/convpool.hpp
+++ b/src/ngraph/frontend/onnx_import/utils/convpool.hpp
@@ -16,7 +16,10 @@
 #pragma once
+#include <string>
 #include "ngraph/coordinate_diff.hpp"
+#include "ngraph/op/avg_pool.hpp"
 #include "ngraph/shape.hpp"
 #include "core/attribute.hpp"
@@ -84,13 +87,11 @@ namespace ngraph
                return get_pads(node, get_kernel_shape(node));
            }
-            /**
+            /// \brief Create an nGraph pooling operation based on an ONNX pooling op.
-             * @brief Create an nGraph pooling operation based on an ONNX pooling op.
+            ///
-             *
+            /// \param T Class of an nGraph pooling operation (e.g. AveragePool, MaxPool)
-             * @tparam T Class of an nGraph pooling operation (e.g. AveragePool, MaxPool)
+            /// \param node incoming ONNX opearation
-             * @param node incoming ONNX opearation
+            /// \return nGraph node equivalent of the ONNX operation
-             * @return nGraph node equivalent of the ONNX operation
-             */
            template <class T>
            inline NodeVector make_ng_pool(const Node& node)
            {
@@ -98,19 +99,44 @@ namespace ngraph
                auto data = node.get_ng_inputs().at(0);
                // Parse ONNX op attributes
-                Shape kernel_shape = convpool::get_kernel_shape(node);
+                Shape kernel_shape;
+                if (node.op_type().find("Global") != std::string::npos)
+                {
+                    kernel_shape = node.get_ng_inputs()[0]->get_shape();
+                    // Remove N and C dimensions and leave only spatial dims.
+                    kernel_shape.erase(std::begin(kernel_shape),
+                                       std::next(std::begin(kernel_shape), 2));
+                }
+                else
+                {
+                    kernel_shape = convpool::get_kernel_shape(node);
+                }
                auto strides = convpool::get_strides(node);
                auto dilations = convpool::get_dilations(node);
                auto paddings = convpool::get_pads(node);
+                bool count_include_pad = node.get_attribute_value<int64_t>("count_include_pad", 0);
                // Convert padding from CoordinateDiff to Shape objects
                const CoordinateDiff& padding_above{paddings.first};
                const CoordinateDiff& padding_below{paddings.second};
                Shape padding_below_shape{std::begin(padding_below), std::end(padding_below)};
                Shape padding_above_shape{std::begin(padding_above), std::end(padding_above)};
-                return {std::make_shared<T>(
+                if (count_include_pad)
-                    data, kernel_shape, strides, padding_below_shape, padding_above_shape)};
+                {
+                    return {std::make_shared<ngraph::op::AvgPool>(data,
+                                                                  kernel_shape,
+                                                                  strides,
+                                                                  padding_below_shape,
+                                                                  padding_above_shape,
+                                                                  count_include_pad)};
+                }
+                else
+                {
+                    return {std::make_shared<T>(
+                        data, kernel_shape, strides, padding_below_shape, padding_above_shape)};
+                }
            }
        } // namespace convpool

--- a/src/ngraph/op/avg_pool.cpp
+++ b/src/ngraph/op/avg_pool.cpp
--- a/src/ngraph/op/convolution.cpp
+++ b/src/ngraph/op/convolution.cpp
--- a/src/ngraph/op/convolution.hpp
+++ b/src/ngraph/op/convolution.hpp
@@ -356,6 +356,9 @@ namespace ngraph
        namespace util
        {
+            // This is a legacy function, retained because the CPU backend uses it for now.
+            // TODO: Update CPU backend to use the new stuff in validation_util.hpp, and remove
+            // this function.
            Shape infer_convolution_output_shape(const Node* node,
                                                 const Shape& data_batch_shape,
                                                 const Shape& filters_shape,

--- a/src/ngraph/op/max_pool.cpp
+++ b/src/ngraph/op/max_pool.cpp
--- a/src/ngraph/runtime/backend.hpp
+++ b/src/ngraph/runtime/backend.hpp
@@ -54,7 +54,7 @@ public:
    /// \brief Create a tensor specific to this backend
    /// \param element_type The type of the tensor element
    /// \param shape The shape of the tensor
-    /// \returns shared_ptr to a new backend specific tensor
+    /// \returns shared_ptr to a new backend-specific tensor
    virtual std::shared_ptr<ngraph::runtime::Tensor>
        create_tensor(const ngraph::element::Type& element_type, const Shape& shape) = 0;
@@ -64,7 +64,7 @@ public:
    /// \param memory_pointer A pointer to a buffer used for this tensor. The size of the buffer
    ///     must be sufficient to contain the tensor. The lifetime of the buffer is the
    ///     responsibility of the caller.
-    /// \returns shared_ptr to a new backend specific tensor
+    /// \returns shared_ptr to a new backend-specific tensor
    virtual std::shared_ptr<ngraph::runtime::Tensor> create_tensor(
        const ngraph::element::Type& element_type, const Shape& shape, void* memory_pointer) = 0;
@@ -107,8 +107,8 @@ public:
    /// \param func The function to execute
    virtual void remove_compiled_function(std::shared_ptr<Function> func);
-    /// \brief Enable the collection of per op performance information on a specified Function.
+    /// \brief Enable the collection of per-op performance information on a specified Function.
-    ///     Data is collection via the `get_performance_data` method.
+    ///     Data collection is via the `get_performance_data` method.
    /// \param func The function to collect perfomance data on.
    /// \param enable Set to true to enable or false to disable data collection
    virtual void enable_performance_data(std::shared_ptr<Function> func, bool enable) {}

--- a/src/ngraph/runtime/gpu/gpu_memory_manager.cpp
+++ b/src/ngraph/runtime/gpu/gpu_memory_manager.cpp
@@ -26,7 +26,7 @@ constexpr const uint32_t initial_buffer_size = 10 * 1024 * 1024;
 runtime::gpu::GPUMemoryManager::GPUMemoryManager(GPUPrimitiveEmitter* emitter)
    : m_buffer_offset(0)
-    , m_buffered_mem(initial_buffer_size)
+    , m_buffered_mem(initial_buffer_size, 0)
    , m_workspace_manager(new pass::MemoryManager(runtime::gpu::GPUMemoryManager::alignment))
    , m_argspace_mem(1, {nullptr, 0})
    , m_workspace_mem(1, {nullptr, 0})
@@ -80,6 +80,8 @@ void runtime::gpu::GPUMemoryManager::allocate()
            m_argspace_mem.back().ptr, m_buffered_mem.data(), m_buffer_offset);
        // add an empty node to the end of the list and zero offset
        m_argspace_mem.push_back({nullptr, 0});
+        m_buffered_mem.clear();
+        m_buffered_mem.resize(initial_buffer_size, 0);
        m_buffer_offset = 0;
    }
@@ -97,7 +99,9 @@ void runtime::gpu::GPUMemoryManager::allocate()
 size_t runtime::gpu::GPUMemoryManager::queue_for_transfer(const void* data, size_t size)
 {
    // if the current allocation will overflow the host buffer
-    size_t new_size = m_buffer_offset + size;
+    size_t aligned_size =
+        ngraph::pass::MemoryManager::align(size, runtime::gpu::GPUMemoryManager::alignment);
+    size_t new_size = m_buffer_offset + aligned_size;
    size_t buffer_size = m_buffered_mem.size();
    bool need_resize = false;
    while (buffer_size < new_size)
@@ -109,12 +113,12 @@ size_t runtime::gpu::GPUMemoryManager::queue_for_transfer(const void* data, size
    if (need_resize)
    {
-        m_buffered_mem.resize(buffer_size);
+        m_buffered_mem.resize(buffer_size, 0);
    }
    size_t offset = m_buffer_offset;
    std::memcpy(m_buffered_mem.data() + offset, data, size);
-    m_buffer_offset += size;
+    m_buffer_offset += aligned_size;
    return offset;
 }
@@ -133,7 +137,6 @@ runtime::gpu::GPUAllocator::GPUAllocator(const GPUAllocator& g)
 size_t runtime::gpu::GPUAllocator::reserve_argspace(const void* data, size_t size)
 {
    // add parameter data to host buffer that will be transfered to device
-    size = ngraph::pass::MemoryManager::align(size, runtime::gpu::GPUMemoryManager::alignment);
    size_t offset = m_manager->queue_for_transfer(data, size);
    auto local = std::prev(m_manager->m_argspace_mem.end());
    // return a lambda that will yield the gpu memory address. this

--- a/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+++ b/src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
@@ -1565,14 +1565,13 @@ void runtime::intelgpu::do_arg_max_min_operation(cldnn::topology& topology,
    {
        gws = generate_loops(writer, output_shape, true);
-        writer << get_opencl_type_name(output_type) << " " << var_name << " = " << infinity
+        writer << get_opencl_type_name(input_type) << " " << var_name << " = " << infinity << ";\n";
-               << ";\n";
+        writer << get_opencl_type_name(output_type) << " index = 0;\n";
-        writer << "uint index = -1;\n";
        writer << "for (uint i = 0; i < " << input_shape.at(reduction_axis) << "; ++i)\n";
        writer.block_begin();
        {
-            writer << "if(i == 0 || input0" << dims_buffer << operation_sign << var_name << ")\n";
+            writer << "if (input0" << dims_buffer << operation_sign << var_name << ")\n";
            writer.block_begin();
            {
                writer << var_name << " = input0" << dims_buffer << ";\n";

--- a/src/ngraph/runtime/intelgpu/unit_test.manifest
+++ b/src/ngraph/runtime/intelgpu/unit_test.manifest
@@ -7,6 +7,7 @@ backwards_dot_scalar_tensor
 backwards_dot_tensor3_tensor3
 backwards_dot_tensor_scalar
 backwards_dot_tensor_vector
+backwards_exp
 backwards_maxpool_n2_c1_hw5_3x3_str2_max
 backwards_maxpool_n4_c1_hw4_2x2_max
 backwards_replace_slice
@@ -20,6 +21,9 @@ dequantize
 dequantize_axes
 dequantize_int8
 divide_by_zero_int32
+dot_3d_multi_axis
+dot_4d_5d_multi_axis
+dot_4d_5d_multi_axis_more
 function_call
 max_pool_3d
 numeric_double_inf

--- a/src/ngraph/runtime/tensor.hpp
+++ b/src/ngraph/runtime/tensor.hpp
@@ -78,12 +78,12 @@ namespace ngraph
            /// \param layout Layout to set
            void set_tensor_layout(const std::shared_ptr<descriptor::layout::TensorLayout>& layout);
-            /// \brief Get the stale value of the tensor. A tensor is stale if it's data is
+            /// \brief Get the stale value of the tensor. A tensor is stale if its data is
            /// changed.
            /// \return true if there is new data in this tensor
            bool get_stale() const;
-            /// \brief Set the stale value of the tensor. A tensor is stale if it's data is
+            /// \brief Set the stale value of the tensor. A tensor is stale if its data is
            /// changed.
            void set_stale(bool val);

--- a/src/ngraph/validation_util.cpp
+++ b/src/ngraph/validation_util.cpp
--- a/src/ngraph/validation_util.hpp
+++ b/src/ngraph/validation_util.hpp
+//*****************************************************************************
+// Copyright 2017-2018 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#pragma once
+#include <tuple>
+#include "ngraph/coordinate_diff.hpp"
+#include "ngraph/op/op.hpp"
+namespace ngraph
+{
+    Shape infer_windowed_reduction_output_shape(const Node* node,
+                                                const Shape& data_shape,
+                                                const Strides& data_dilation,
+                                                const CoordinateDiff& data_padding_below,
+                                                const CoordinateDiff& data_padding_above,
+                                                const Shape& window_shape,
+                                                const Strides& window_strides,
+                                                const Strides& window_dilation,
+                                                bool is_window_all_in_padding_allowed);
+    std::tuple<element::Type, Shape>
+        infer_convolution_forward(const Node* node,
+                                  element::Type et_batch,
+                                  element::Type et_filters,
+                                  const Shape& data_batch_shape,
+                                  const Strides& data_dilation,
+                                  const CoordinateDiff& data_padding_below,
+                                  const CoordinateDiff& data_padding_above,
+                                  const Shape& filters_shape,
+                                  const Strides& filter_strides,
+                                  const Strides& filter_dilation);
+    Shape infer_batched_pooling_forward(const Node* node,
+                                        const Shape& data_batch_shape,
+                                        const CoordinateDiff& data_padding_below,
+                                        const CoordinateDiff& data_padding_above,
+                                        const Shape& window_shape,
+                                        const Strides& window_strides,
+                                        bool is_window_all_in_padding_allowed);
+}
--- a/test/gpu_test.cpp
+++ b/test/gpu_test.cpp
@@ -83,6 +83,30 @@ TEST(gpu_test, memory_manager_extract_arguments)
    EXPECT_EQ(host, fp32_args);
 }
+// This test is add to catch a potential bug in allocator
+// previously allocator will copy extra data
+// for exampele: alignment = 8 bytes, you reserve 4 bytes space
+// previously allocator will copy 8 bytes data from input_args, this will lead to two potential bug:
+// 1. copy extrea data intead of initial alignment data to 0.
+// 2. out of boundary access for input_args which lead to undefined behavior
+TEST(gpu_test, memory_manager_argspace_alignment)
+{
+    size_t alignment = 8;
+    std::vector<char> input_args = {0, 1, 2, 3, 4, 5, 6, 7};
+    std::vector<char> ref_args = {0, 1, 2, 3, 0, 0, 0, 0};
+    std::vector<char> result_args(alignment, 0);
+    size_t idx;
+    runtime::gpu::GPUPrimitiveEmitter emitter;
+    {
+        auto allocator = emitter.get_memory_allocator();
+        idx = allocator.reserve_argspace(input_args.data(), 4 * sizeof(char));
+    }
+    emitter.allocate_primitive_memory();
+    runtime::gpu::memory_primitive& mem_primitive = emitter.get_memory_primitives()[idx];
+    runtime::gpu::cuda_memcpyDtH(result_args.data(), mem_primitive(), alignment * sizeof(char));
+    EXPECT_EQ(result_args, ref_args);
+}
 TEST(gpu_test, memory_manager_argspace_size)
 {
    std::vector<float> fp32_args = {2112.0f, 2112.0f};

--- a/test/type_prop.cpp
+++ b/test/type_prop.cpp