Merge branch 'master' into nmostafa/recompile

956e8b3a · Robert Kimball · GitHub · a2b9c6b8 · 5d3456e4 · 956e8b3a
Unverified Commit 956e8b3a authored Jul 29, 2019 by Robert Kimball Committed by GitHub Jul 29, 2019
84 changed files
--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -61,7 +61,7 @@ project/doc-contributor-README.rst  @indie
 /src/ngraph/type/                   @diyessi
 /src/ngraph/serializer.*pp          @rkimballn1
-/test/distributed.in.cpp            @wenzhe-nrv @diyessi @jianyinglang
+/test/backend/distributed.in.cpp    @wenzhe-nrv @diyessi @jianyinglang
 # Putting this last so it's not overridden by directory rules
 CMakeLists.txt                      @rkimballn1 @silee2

--- a/src/contrib/mlir/compiler.cpp
+++ b/src/contrib/mlir/compiler.cpp
@@ -14,6 +14,9 @@
 // limitations under the License.
 //*****************************************************************************
+// NOTE: This file follows nGraph format style and naming convention since it
+// exposes a public API to the rest of nGraph codebase.
 #include "compiler.hpp"
 #include "dialect/dialect.hpp"
@@ -423,6 +426,12 @@ namespace ngraph
                            compiler.m_builder->getI64IntegerAttr(ng_node_gather->get_axis()));
                return op;
            }
+            template <>
+            mlir::Operation* MLIRCompiler::COMPILE_OP_DECL(ngraph::op::Relu)
+            {
+                return compiler.create_generic_op<mlir::NGReluOp>(ng_node);
+            }
        }
    }
 }

--- a/src/contrib/mlir/compiler.hpp
+++ b/src/contrib/mlir/compiler.hpp
@@ -14,6 +14,9 @@
 // limitations under the License.
 //*****************************************************************************
+// NOTE: This file follows nGraph format style and naming convention since it
+// exposes a public API to the rest of nGraph codebase.
 #pragma once
 #include "memory_manager.hpp"

--- a/src/contrib/mlir/dialect/dialect.cpp
+++ b/src/contrib/mlir/dialect/dialect.cpp
@@ -14,6 +14,9 @@
 // limitations under the License.
 //*****************************************************************************
+// NOTE: This file follows nGraph format style and MLIR naming convention since it does
+// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
 #include "dialect.hpp"
 #include "ngraph/check.hpp"
 #include "ops.hpp"
@@ -41,12 +44,12 @@ void NGraphOpsDialect::printType(mlir::Type type, raw_ostream& os) const
    case NG_TENSOR_TYPE_ID:
    {
        os << "tensor<";
-        auto tensor_ty = type.cast<NGTensorType>();
+        auto tensorTy = type.cast<NGTensorType>();
-        for (auto dim : tensor_ty.getShape())
+        for (auto dim : tensorTy.getShape())
        {
            os << dim << 'x';
        }
-        os << tensor_ty.getElementType() << '>';
+        os << tensorTy.getElementType() << '>';
        return;
    }
    case NG_I8_TYPE_ID:
@@ -58,8 +61,8 @@ void NGraphOpsDialect::printType(mlir::Type type, raw_ostream& os) const
    case NG_U32_TYPE_ID:
    case NG_U64_TYPE_ID:
    {
-        auto int_ty = type.cast<NGIntegerType>();
+        auto intTy = type.cast<NGIntegerType>();
-        os << "i" << int_ty.getWidth();
+        os << "i" << intTy.getWidth();
        return;
    }
    case NG_BOOL_TYPE_ID:

--- a/src/contrib/mlir/dialect/dialect.hpp
+++ b/src/contrib/mlir/dialect/dialect.hpp
@@ -13,6 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //*****************************************************************************
+// NOTE: This file follows nGraph format style and MLIR naming convention since it does
+// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
 #pragma once
 #include "mlir/IR/Dialect.h"
@@ -23,6 +27,7 @@
 #include "mlir/IR/TypeSupport.h"
 #include "mlir/IR/Types.h"
 #include "ngraph/check.hpp"
 namespace mlir
 {
    class NGraphOpsDialect : public mlir::Dialect

--- a/src/contrib/mlir/dialect/ops.cpp
+++ b/src/contrib/mlir/dialect/ops.cpp
@@ -13,6 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //*****************************************************************************
+// NOTE: This file follows nGraph format style and MLIR naming convention since it does
+// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
 #include "ops.hpp"
 #include "assertion.hpp"
 #include "llvm/Support/ErrorHandling.h"

--- a/src/contrib/mlir/dialect/ops.hpp
+++ b/src/contrib/mlir/dialect/ops.hpp
@@ -13,6 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //*****************************************************************************
+// NOTE: This file follows nGraph format style and MLIR naming convention since it does
+// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
 #pragma once
 #include <cstdarg>

--- a/src/contrib/mlir/dialect/ops.td
+++ b/src/contrib/mlir/dialect/ops.td
@@ -18,6 +18,9 @@
 //
 //===----------------------------------------------------------------------===//
+// NOTE: This file follows nGraph format style and MLIR naming convention since it does
+// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
 include "mlir/IR/OpBase.td"
 // nGraph Dialect operations definitions

--- a/src/contrib/mlir/dialect/type.cpp
+++ b/src/contrib/mlir/dialect/type.cpp
@@ -12,6 +12,10 @@
 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 // See the License for the specific language governing permissions and
 // limitations under the License.
+//*****************************************************************************
+// NOTE: This file follows nGraph format style and MLIR naming convention since it does
+// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
 #include "type.hpp"

--- a/src/contrib/mlir/dialect/type.hpp
+++ b/src/contrib/mlir/dialect/type.hpp
@@ -13,6 +13,10 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.
 //*****************************************************************************
+// NOTE: This file follows nGraph format style and MLIR naming convention since it does
+// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
 #pragma once
 #include "mlir/IR/Dialect.h"
@@ -198,19 +202,19 @@ namespace mlir
            return new (storage) NGTensorTypeStorage(eltType, shape);
        }
-        Shape getShape() const { return m_shape; }
+        Shape getShape() const { return shape; }
-        int64_t getRank() const { return m_shape.size(); }
+        int64_t getRank() const { return shape.size(); }
-        EltType getElementType() const { return m_eltType; }
+        EltType getElementType() const { return eltType; }
    private:
        NGTensorTypeStorage(EltType eltType, Shape shape)
-            : m_eltType(eltType)
+            : eltType(eltType)
-            , m_shape(shape)
+            , shape(shape)
        {
        }
    private:
-        EltType m_eltType;
+        EltType eltType;
-        Shape m_shape;
+        Shape shape;
    };
    /// NGraph Tensor Type

--- a/src/contrib/mlir/lowerer.cpp
+++ b/src/contrib/mlir/lowerer.cpp
--- a/src/contrib/mlir/lowerer.hpp
+++ b/src/contrib/mlir/lowerer.hpp
@@ -14,6 +14,9 @@
 // limitations under the License.
 //*****************************************************************************
+// NOTE: This file follows nGraph format style and MLIR naming convention since it does
+// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
 #pragma once
 #include "contrib/mlir/compiler.hpp"

--- a/src/contrib/mlir/memory_manager.cpp
+++ b/src/contrib/mlir/memory_manager.cpp
@@ -14,6 +14,9 @@
 // limitations under the License.
 //*****************************************************************************
+// NOTE: This file follows nGraph format style and MLIR naming convention since it does
+// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
 #include "memory_manager.hpp"
 #include <memory>
 #include "ngraph/ngraph_visibility.hpp"
@@ -21,9 +24,9 @@
 using namespace ngraph::runtime::ngmlir;
 /// Call back to allocate memory for temps from JIT'ed code
-extern "C" NGRAPH_API void* __mlir_allocate(MLIRMemMgr* mem_mgr, size_t size)
+extern "C" NGRAPH_API void* __mlir_allocate(MLIRMemMgr* memMgr, size_t size)
 {
-    return mem_mgr->allocate(size);
+    return memMgr->allocate(size);
 }
 void* MLIRMemMgr::allocate(size_t size)

--- a/src/contrib/mlir/memory_manager.hpp
+++ b/src/contrib/mlir/memory_manager.hpp
@@ -14,11 +14,15 @@
 // limitations under the License.
 //*****************************************************************************
+// NOTE: This file follows nGraph format style and MLIR naming convention since it does
+// not expose public API to the rest of nGraph codebase and heavily depends on MLIR API.
 #pragma once
 #include <stdint.h>
 #include <stdlib.h>
 #include <vector>
 namespace ngraph
 {
    namespace runtime

--- a/src/contrib/mlir/pass/mlir_subgraph_extraction.cpp
+++ b/src/contrib/mlir/pass/mlir_subgraph_extraction.cpp
@@ -14,6 +14,9 @@
 // limitations under the License.
 //*****************************************************************************
+// NOTE: This file follows nGraph format style and naming convention since it
+// exposes a public API to the rest of nGraph codebase.
 #include "mlir_subgraph_extraction.hpp"
 #include "ngraph/assertion.hpp"
 #include "ngraph/graph_util.hpp"

--- a/src/contrib/mlir/pass/mlir_subgraph_extraction.hpp
+++ b/src/contrib/mlir/pass/mlir_subgraph_extraction.hpp
@@ -14,6 +14,9 @@
 // limitations under the License.
 //*****************************************************************************
+// NOTE: This file follows nGraph format style and naming convention since it
+// exposes a public API to the rest of nGraph codebase.
 #pragma once
 #include <mutex>

--- a/src/ngraph/builder/quantization.cpp
+++ b/src/ngraph/builder/quantization.cpp
--- a/src/ngraph/builder/quantization.hpp
+++ b/src/ngraph/builder/quantization.hpp
--- a/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
+++ b/src/ngraph/builder/quantization/quantized_linear_convolution.cpp
@@ -36,25 +36,25 @@ namespace ngraph
    {
        namespace quantization
        {
-            shared_ptr<Node> QuantizedLinearConvolutionBias(const shared_ptr<Node>& input,
+            shared_ptr<Node> QuantizedLinearConvolutionBias(const Output<Node>& input,
-                                                            const shared_ptr<Node>& filter,
+                                                            const Output<Node>& filter,
-                                                            const shared_ptr<Node>& bias,
+                                                            const Output<Node>& bias,
                                                            const Strides& window_movement_strides,
                                                            const Strides& window_dilation_strides,
                                                            const CoordinateDiff& padding_below,
                                                            const CoordinateDiff& padding_above,
                                                            const Strides& data_dilation_strides,
-                                                            const shared_ptr<Node>& input_scale,
+                                                            const Output<Node>& input_scale,
-                                                            const shared_ptr<Node>& filter_scale,
+                                                            const Output<Node>& filter_scale,
-                                                            const shared_ptr<Node>& output_scale)
+                                                            const Output<Node>& output_scale)
            {
                // TODO: need to establish cross-nGraph view of scale (mult or div)
                auto requantization_scale = (input_scale * filter_scale) / output_scale;
                auto mybias = bias;
-                if (bias->get_element_type() != element::i32)
+                if (bias.get_element_type() != element::i32)
                {
-                    const auto zero = make_constant(element::i32, input_scale->get_shape(), 0);
+                    const auto zero = make_constant(element::i32, input_scale.get_shape(), 0);
                    const AxisSet quantization_axes;
                    const auto bias_scale = input_scale * filter_scale;
                    op::Quantize::RoundMode round_mode =

--- a/src/ngraph/builder/quantization/quantized_linear_convolution.hpp
+++ b/src/ngraph/builder/quantization/quantized_linear_convolution.hpp
@@ -26,17 +26,17 @@ namespace ngraph
        namespace quantization
        {
            std::shared_ptr<Node>
-                QuantizedLinearConvolutionBias(const std::shared_ptr<Node>& input,
+                QuantizedLinearConvolutionBias(const Output<Node>& input,
-                                               const std::shared_ptr<Node>& filter,
+                                               const Output<Node>& filter,
-                                               const std::shared_ptr<Node>& bias,
+                                               const Output<Node>& bias,
                                               const Strides& window_movement_strides,
                                               const Strides& window_dilation_strides,
                                               const CoordinateDiff& padding_below,
                                               const CoordinateDiff& padding_above,
                                               const Strides& data_dilation_strides,
-                                               const std::shared_ptr<Node>& input_scale,
+                                               const Output<Node>& input_scale,
-                                               const std::shared_ptr<Node>& filter_scale,
+                                               const Output<Node>& filter_scale,
-                                               const std::shared_ptr<Node>& output_scale);
+                                               const Output<Node>& output_scale);
        }
    }
 }
--- a/src/ngraph/builder/quantization/quantized_linear_matmul.cpp
+++ b/src/ngraph/builder/quantization/quantized_linear_matmul.cpp
@@ -39,14 +39,14 @@ namespace ngraph
        {
            // TODO: this code is falling back to fp32 dot
            //       1) add support in reference kernel for zero point
-            shared_ptr<Node> QuantizedLinearMatmul(const shared_ptr<Node>& input0,
+            shared_ptr<Node> QuantizedLinearMatmul(const Output<Node>& input0,
-                                                   const shared_ptr<Node>& input1,
+                                                   const Output<Node>& input1,
-                                                   const shared_ptr<Node>& input0_scale,
+                                                   const Output<Node>& input0_scale,
-                                                   const shared_ptr<Node>& input0_zero_point,
+                                                   const Output<Node>& input0_zero_point,
-                                                   const shared_ptr<Node>& input1_scale,
+                                                   const Output<Node>& input1_scale,
-                                                   const shared_ptr<Node>& input1_zero_point,
+                                                   const Output<Node>& input1_zero_point,
-                                                   const shared_ptr<Node>& output_scale,
+                                                   const Output<Node>& output_scale,
-                                                   const shared_ptr<Node>& output_zero_point)
+                                                   const Output<Node>& output_zero_point)
            {
                // Check if zero point is constant and zero
                if (ngraph::is_zero(input0_zero_point) && ngraph::is_zero(input1_zero_point) &&
@@ -62,13 +62,13 @@ namespace ngraph
                    auto dq_input0 = make_shared<op::Dequantize>(input0,
                                                                 input0_scale,
                                                                 input0_zero_point,
-                                                                 input0_scale->get_element_type(),
+                                                                 input0_scale.get_element_type(),
                                                                 axes);
                    auto dq_input1 = make_shared<op::Dequantize>(input1,
                                                                 input1_scale,
                                                                 input1_zero_point,
-                                                                 input1_scale->get_element_type(),
+                                                                 input1_scale.get_element_type(),
                                                                 axes);
                    auto dot = make_shared<op::Dot>(dq_input0, dq_input1, 1);
@@ -76,24 +76,23 @@ namespace ngraph
                        dot,
                        output_scale,
                        output_zero_point,
-                        output_zero_point->get_element_type(),
+                        output_zero_point.get_element_type(),
                        axes,
                        op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
                }
            }
-            shared_ptr<Node> QuantizedLinearMatmulInteger(const shared_ptr<Node>& input0,
+            shared_ptr<Node> QuantizedLinearMatmulInteger(const Output<Node>& input0,
-                                                          const shared_ptr<Node>& input1)
+                                                          const Output<Node>& input1)
            {
                auto output_scale = make_constant(element::f32, Shape{}, 1);
                return make_shared<op::QuantizedDot>(input0, input1, output_scale, false, false);
            }
-            shared_ptr<Node>
+            shared_ptr<Node> QuantizedLinearMatmulInteger(const Output<Node>& input0,
-                QuantizedLinearMatmulInteger(const std::shared_ptr<Node>& input0,
+                                                          const Output<Node>& input1,
-                                             const std::shared_ptr<Node>& input1,
+                                                          const Output<Node>& input0_zero_point,
-                                             const std::shared_ptr<Node>& input0_zero_point,
+                                                          const Output<Node>& input1_zero_point)
-                                             const std::shared_ptr<Node>& input1_zero_point)
            {
                // Check if zero points are constant and zero
                if (ngraph::is_zero(input0_zero_point) && ngraph::is_zero(input1_zero_point))

--- a/src/ngraph/builder/quantization/quantized_linear_matmul.hpp
+++ b/src/ngraph/builder/quantization/quantized_linear_matmul.hpp
@@ -25,24 +25,23 @@ namespace ngraph
    {
        namespace quantization
        {
-            std::shared_ptr<Node>
+            std::shared_ptr<Node> QuantizedLinearMatmul(const Output<Node>& input0,
-                QuantizedLinearMatmul(const std::shared_ptr<Node>& input0,
+                                                        const Output<Node>& input1,
-                                      const std::shared_ptr<Node>& input1,
+                                                        const Output<Node>& input0_scale,
-                                      const std::shared_ptr<Node>& input0_scale,
+                                                        const Output<Node>& input0_zero_point,
-                                      const std::shared_ptr<Node>& input0_zero_point,
+                                                        const Output<Node>& input1_scale,
-                                      const std::shared_ptr<Node>& input1_scale,
+                                                        const Output<Node>& input1_zero_point,
-                                      const std::shared_ptr<Node>& input1_zero_point,
+                                                        const Output<Node>& output_scale,
-                                      const std::shared_ptr<Node>& output_scale,
+                                                        const Output<Node>& output_zero_point);
-                                      const std::shared_ptr<Node>& output_zero_point);
-            std::shared_ptr<Node> QuantizedLinearMatmulInteger(const std::shared_ptr<Node>& input0,
+            std::shared_ptr<Node> QuantizedLinearMatmulInteger(const Output<Node>& input0,
-                                                               const std::shared_ptr<Node>& input1);
+                                                               const Output<Node>& input1);
            std::shared_ptr<Node>
-                QuantizedLinearMatmulInteger(const std::shared_ptr<Node>& input0,
+                QuantizedLinearMatmulInteger(const Output<Node>& input0,
-                                             const std::shared_ptr<Node>& input1,
+                                             const Output<Node>& input1,
-                                             const std::shared_ptr<Node>& input0_zero_point,
+                                             const Output<Node>& input0_zero_point,
-                                             const std::shared_ptr<Node>& input1_zero_point);
+                                             const Output<Node>& input1_zero_point);
        }
    }
 }
--- a/src/ngraph/builder/quantization_util.hpp
+++ b/src/ngraph/builder/quantization_util.hpp
--- a/src/ngraph/builder/quantization_utils.cpp
+++ b/src/ngraph/builder/quantization_utils.cpp
@@ -22,26 +22,26 @@ namespace ngraph
    {
        namespace quantization_utils
        {
-            std::shared_ptr<Node> max_abs(std::shared_ptr<Node> a, std::shared_ptr<Node> b)
+            std::shared_ptr<Node> max_abs(const Output<Node>& a, const Output<Node>& b)
            {
                auto abs_a = std::make_shared<op::Abs>(a);
                auto abs_b = std::make_shared<op::Abs>(b);
                return std::make_shared<op::Maximum>(abs_a, abs_b);
            }
-            std::shared_ptr<Node> get_scale(std::shared_ptr<Node> input_min_range,
+            std::shared_ptr<Node> get_scale(const Output<Node>& input_min_range,
-                                            std::shared_ptr<Node> input_max_range,
+                                            const Output<Node>& input_max_range,
                                            const ngraph::element::Type& quant_type,
                                            bool bump_by_eps)
            {
-                auto type = input_min_range->get_element_type();
+                auto type = input_min_range.get_element_type();
-                if (type != input_max_range->get_element_type())
+                if (type != input_max_range.get_element_type())
                {
                    throw ngraph_error("get_scale: min and max must have same type");
                }
-                auto shape = input_min_range->get_shape();
+                auto shape = input_min_range.get_shape();
-                if (shape != input_max_range->get_shape())
+                if (shape != input_max_range.get_shape())
                {
                    throw ngraph_error("get_scale: min and max must have same shape");
                }

--- a/src/ngraph/builder/quantization_utils.hpp
+++ b/src/ngraph/builder/quantization_utils.hpp
@@ -37,10 +37,10 @@ namespace ngraph
    {
        namespace quantization_utils
        {
-            std::shared_ptr<Node> max_abs(std::shared_ptr<Node> a, std::shared_ptr<Node> b);
+            std::shared_ptr<Node> max_abs(const Output<Node>& a, const Output<Node>& b);
-            std::shared_ptr<Node> get_scale(std::shared_ptr<Node> input_min_range,
+            std::shared_ptr<Node> get_scale(const Output<Node>& input_min_range,
-                                            std::shared_ptr<Node> input_max_range,
+                                            const Output<Node>& input_max_range,
                                            const ngraph::element::Type& quant_type,
                                            bool bump_by_eps = false);
        }

--- a/src/ngraph/builder/quantized_conv_builder.cpp
+++ b/src/ngraph/builder/quantized_conv_builder.cpp
@@ -26,35 +26,34 @@ namespace ngraph
 {
    namespace builder
    {
-        shared_ptr<Node> QuantizedConvolutionBuilder(const shared_ptr<Node>& input,
+        shared_ptr<Node> QuantizedConvolutionBuilder(const Output<Node>& input,
-                                                     const shared_ptr<Node>& filters,
+                                                     const Output<Node>& filters,
                                                     const Strides& window_movement_strides,
                                                     const Strides& window_dilation_strides,
                                                     const CoordinateDiff& padding_below,
                                                     const CoordinateDiff& padding_above,
                                                     const Strides& data_dilation_strides,
-                                                     const shared_ptr<Node>& min_input,
+                                                     const Output<Node>& min_input,
-                                                     const shared_ptr<Node>& max_input,
+                                                     const Output<Node>& max_input,
-                                                     const shared_ptr<Node>& min_filter,
+                                                     const Output<Node>& min_filter,
-                                                     const shared_ptr<Node>& max_filter,
+                                                     const Output<Node>& max_filter,
-                                                     const shared_ptr<Node>& min_output,
+                                                     const Output<Node>& min_output,
-                                                     const shared_ptr<Node>& max_output,
+                                                     const Output<Node>& max_output,
                                                     const ngraph::element::Type& output_type,
                                                     const ngraph::AxisSet& input_axes,
                                                     const ngraph::AxisSet& filter_axes,
                                                     const ngraph::AxisSet& output_axes)
        {
            auto input_scale =
-                quantization_utils::get_scale(min_input, max_input, input->get_element_type());
+                quantization_utils::get_scale(min_input, max_input, input.get_element_type());
            auto filter_scale =
-                quantization_utils::get_scale(min_filter, max_filter, filters->get_element_type());
+                quantization_utils::get_scale(min_filter, max_filter, filters.get_element_type());
            auto output_scale = quantization_utils::get_scale(min_output, max_output, output_type);
            // TODO: Check for this later
            // For Builders the zero point is assumed to be zero (for now)
-            auto input_zero_point = op::Constant::create(input->get_element_type(), Shape{}, {0});
+            auto input_zero_point = op::Constant::create(input.get_element_type(), Shape{}, {0});
-            auto filter_zero_point =
+            auto filter_zero_point = op::Constant::create(filters.get_element_type(), Shape{}, {0});
-                op::Constant::create(filters->get_element_type(), Shape{}, {0});
            return make_shared<op::QuantizedConvolution>(
                input,

--- a/src/ngraph/builder/quantized_conv_builder.hpp
+++ b/src/ngraph/builder/quantized_conv_builder.hpp
@@ -26,19 +26,19 @@ namespace ngraph
    namespace builder
    {
        std::shared_ptr<Node>
-            QuantizedConvolutionBuilder(const std::shared_ptr<Node>& input,
+            QuantizedConvolutionBuilder(const Output<Node>& input,
-                                        const std::shared_ptr<Node>& filters,
+                                        const Output<Node>& filters,
                                        const Strides& window_movement_strides,
                                        const Strides& window_dilation_strides,
                                        const CoordinateDiff& padding_below,
                                        const CoordinateDiff& padding_above,
                                        const Strides& data_dilation_strides,
-                                        const std::shared_ptr<Node>& min_input,
+                                        const Output<Node>& min_input,
-                                        const std::shared_ptr<Node>& max_input,
+                                        const Output<Node>& max_input,
-                                        const std::shared_ptr<Node>& min_filter,
+                                        const Output<Node>& min_filter,
-                                        const std::shared_ptr<Node>& max_filter,
+                                        const Output<Node>& max_filter,
-                                        const std::shared_ptr<Node>& min_output,
+                                        const Output<Node>& min_output,
-                                        const std::shared_ptr<Node>& max_output,
+                                        const Output<Node>& max_output,
                                        const ngraph::element::Type& output_type,
                                        const ngraph::AxisSet& input_axes = ngraph::AxisSet{},
                                        const ngraph::AxisSet& filter_axes = ngraph::AxisSet{},

--- a/src/ngraph/graph_util.cpp
+++ b/src/ngraph/graph_util.cpp
@@ -293,9 +293,9 @@ std::shared_ptr<ngraph::Function> ngraph::clone_function(const ngraph::Function&
    return std::make_shared<ngraph::Function>(cloned_results, cloned_params);
 }
-bool ngraph::is_equal_to_const_value(std::string const_value, std::shared_ptr<Node> reduce_constant)
+bool ngraph::is_equal_to_const_value(std::string const_value, const Output<Node>& reduce_constant)
 {
-    if (auto rc = dynamic_pointer_cast<ngraph::op::Constant>(reduce_constant))
+    if (auto rc = dynamic_pointer_cast<ngraph::op::Constant>(reduce_constant.get_node_shared_ptr()))
    {
        auto cshape = rc->get_shape();
        size_t n = shape_size(cshape);
@@ -454,7 +454,7 @@ std::shared_ptr<Node> ngraph::make_constant_from_string(std::string val,
    return std::make_shared<op::Constant>(element_type, shape, cvals);
 }
-bool ngraph::is_zero(std::shared_ptr<Node> reduce_constant)
+bool ngraph::is_zero(const Output<Node>& reduce_constant)
 {
    auto result_bool = is_equal_to_const_value("0", reduce_constant);
    return result_bool;

--- a/src/ngraph/graph_util.hpp
+++ b/src/ngraph/graph_util.hpp
@@ -349,7 +349,7 @@ namespace ngraph
    // Check if all paths from X to a result go through Y
    bool is_post_dominated(Node* X, Node* Y);
-    bool is_equal_to_const_value(std::string const_value, std::shared_ptr<Node> reduce_constant);
+    bool is_equal_to_const_value(std::string const_value, const Output<Node>& reduce_constant);
    // input nodes are cloned and returned
    // NodeMap input may contain default node mapping i.e. pre-cloned nodes
@@ -383,7 +383,7 @@ namespace ngraph
                                                    const element::Type& element_type,
                                                    const Shape& shape);
-    bool is_zero(std::shared_ptr<Node> reduce_constant);
+    bool is_zero(const Output<Node>& reduce_constant);
    NodeVector get_subgraph_outputs(const NodeVector& nodes,
                                    const NodeVector& exclusions,

--- a/src/ngraph/node.cpp
+++ b/src/ngraph/node.cpp
@@ -86,6 +86,18 @@ std::shared_ptr<Node> Node::copy_with_new_inputs(const OutputVector& inputs) con
    return copy_with_new_inputs(inputs, get_control_dependencies());
 }
+std::shared_ptr<Node> Node::get_output_as_single_output_node(size_t i)
+{
+    for (auto in : output(i).get_target_inputs())
+    {
+        if (in.get_node()->description() == op::GetOutputElement::type_name)
+        {
+            return in.get_node()->shared_from_this();
+        }
+    }
+    return get_output_element(output(i), true);
+}
 std::shared_ptr<Node>
    Node::copy_with_new_inputs(const OutputVector& inputs,
                               const std::vector<std::shared_ptr<Node>>& control_dependencies) const

--- a/src/ngraph/node.hpp
+++ b/src/ngraph/node.hpp
@@ -279,6 +279,8 @@ namespace ngraph
        /// Returns the partial shape for output i
        const PartialShape& get_output_partial_shape(size_t i) const;
+        std::shared_ptr<Node> get_output_as_single_output_node(size_t i);
        /// Checks that there is exactly one output and returns its shape
        // TODO: deprecate in favor of node->output(0).get_shape() with a suitable check in the
        // calling code, or updates to the calling code if it is making an invalid assumption of
@@ -554,6 +556,13 @@ namespace ngraph
        ///
        /// TODO: Make a plan to deprecate this.
        std::shared_ptr<NodeType> get_node_shared_ptr() const { return m_node; }
+        /// \return A useable shared pointer to this output. If index 0, the node,
+        /// otherwise find or create a GOE.
+        std::shared_ptr<Node> as_single_output_node() const NGRAPH_DEPRECATED("Transitional.")
+        {
+            return m_node->get_output_as_single_output_node(m_index);
+        }
        /// \return The index of the output referred to by this output handle.
        size_t get_index() const { return m_index; }
        /// \return A reference to the tensor descriptor for this output.

--- a/src/ngraph/op/experimental/quantized_avg_pool.cpp
+++ b/src/ngraph/op/experimental/quantized_avg_pool.cpp
@@ -21,13 +21,15 @@
 using namespace std;
 using namespace ngraph;
-op::QuantizedAvgPool::QuantizedAvgPool(const shared_ptr<Node>& arg,
+const string op::QuantizedAvgPool::type_name{"QuantizedAvgPool"};
+op::QuantizedAvgPool::QuantizedAvgPool(const Output<Node>& arg,
                                       const Shape& window_shape,
                                       const Strides& window_movement_strides,
                                       const Shape& padding_below,
                                       const Shape& padding_above,
                                       bool include_padding_in_avg_computation)
-    : Op("QuantizedAvgPool", check_single_output_args({arg}))
+    : Op({arg})
    , m_window_shape(window_shape)
    , m_window_movement_strides(window_movement_strides)
    , m_padding_below(padding_below)
@@ -35,15 +37,16 @@ op::QuantizedAvgPool::QuantizedAvgPool(const shared_ptr<Node>& arg,
    , m_include_padding_in_avg_computation(include_padding_in_avg_computation)
 {
    constructor_validate_and_infer_types();
+}
-    if (arg->get_element_type() != element::u8 && arg->get_element_type() != element::i8)
+void op::QuantizedAvgPool::validate_and_infer_types()
+{
+    auto arg(input(0).get_source_output());
+    if (arg.get_element_type() != element::u8 && arg.get_element_type() != element::i8)
    {
        throw ngraph_error("QuantizedAvgPool supported only for i8/u8!");
    }
-}
-void op::QuantizedAvgPool::validate_and_infer_types()
-{
    auto& arg_shape = get_input_shape(0);
    if (0 == m_window_movement_strides.size() && arg_shape.size() > 2)

--- a/src/ngraph/op/experimental/quantized_avg_pool.hpp
+++ b/src/ngraph/op/experimental/quantized_avg_pool.hpp
@@ -28,6 +28,11 @@ namespace ngraph
        class QuantizedAvgPool : public Op
        {
        public:
+            NGRAPH_API
+            static const std::string type_name;
+            const std::string& description() const override { return type_name; }
+            /// \brief Constructs a batched average pooling operation.
+            QuantizedAvgPool() = default;
            /// \brief Constructs a batched average pooling operation.
            ///
            /// \param arg The node producing the input data batch tensor.<br>
@@ -43,7 +48,7 @@ namespace ngraph
            /// \param include_padding_in_avg_computation If true then averages include padding
            ///  elements, each treated as the number zero.  If false, padding elements are entirely
            ///  ignored when computing averages.
-            QuantizedAvgPool(const std::shared_ptr<Node>& arg,
+            QuantizedAvgPool(const Output<Node>& arg,
                             const Shape& window_shape,
                             const Strides& window_movement_strides,
                             const Shape& padding_below,

--- a/src/ngraph/op/experimental/quantized_conv_bias.cpp
+++ b/src/ngraph/op/experimental/quantized_conv_bias.cpp
@@ -27,17 +27,19 @@
 using namespace std;
 using namespace ngraph;
-op::QuantizedConvolutionBias::QuantizedConvolutionBias(const shared_ptr<Node>& data_batch,
+const string op::QuantizedConvolutionBias::type_name{"QuantizedConvolutionBias"};
-                                                       const shared_ptr<Node>& filters,
-                                                       const shared_ptr<Node>& bias,
+op::QuantizedConvolutionBias::QuantizedConvolutionBias(const Output<Node>& data_batch,
+                                                       const Output<Node>& filters,
+                                                       const Output<Node>& bias,
                                                       const Strides& window_movement_strides,
                                                       const Strides& window_dilation_strides,
                                                       const CoordinateDiff& padding_below,
                                                       const CoordinateDiff& padding_above,
                                                       const Strides& data_dilation_strides,
-                                                       const shared_ptr<Node>& scale,
+                                                       const Output<Node>& scale,
                                                       const bool with_relu)
-    : Op("QuantizedConvolutionBias", check_single_output_args({data_batch, filters, bias, scale}))
+    : Op({data_batch, filters, bias, scale})
    , m_window_movement_strides(window_movement_strides)
    , m_window_dilation_strides(window_dilation_strides)
    , m_padding_below(padding_below)
@@ -47,8 +49,8 @@ op::QuantizedConvolutionBias::QuantizedConvolutionBias(const shared_ptr<Node>& d
 {
    constructor_validate_and_infer_types();
-    auto& data_batch_shape = data_batch->get_shape();
+    auto& data_batch_shape = data_batch.get_shape();
-    auto& filters_shape = filters->get_shape();
+    auto& filters_shape = filters.get_shape();
    // TODO: call ngraph util
    // util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape());
@@ -92,20 +94,21 @@ shared_ptr<Node> op::QuantizedConvolutionBias::copy_with_new_args(const NodeVect
                                                         m_with_relu));
 }
-op::QuantizedConvolutionBiasAdd::QuantizedConvolutionBiasAdd(const shared_ptr<Node>& data_batch,
+const string op::QuantizedConvolutionBiasAdd::type_name{"QuantizedConvolutionBiasAdd"};
-                                                             const shared_ptr<Node>& filters,
-                                                             const shared_ptr<Node>& bias,
+op::QuantizedConvolutionBiasAdd::QuantizedConvolutionBiasAdd(const Output<Node>& data_batch,
-                                                             const shared_ptr<Node>& sum_input,
+                                                             const Output<Node>& filters,
+                                                             const Output<Node>& bias,
+                                                             const Output<Node>& sum_input,
                                                             const Strides& window_movement_strides,
                                                             const Strides& window_dilation_strides,
                                                             const CoordinateDiff& padding_below,
                                                             const CoordinateDiff& padding_above,
                                                             const Strides& data_dilation_strides,
-                                                             const shared_ptr<Node>& scale,
+                                                             const Output<Node>& scale,
-                                                             const shared_ptr<Node>& sum_scale,
+                                                             const Output<Node>& sum_scale,
                                                             const bool with_relu)
-    : Op("QuantizedConvolutionBiasAdd",
+    : Op({data_batch, filters, bias, sum_input, scale, sum_scale})
-         check_single_output_args({data_batch, filters, bias, sum_input, scale, sum_scale}))
    , m_window_movement_strides(window_movement_strides)
    , m_window_dilation_strides(window_dilation_strides)
    , m_padding_below(padding_below)
@@ -115,8 +118,8 @@ op::QuantizedConvolutionBiasAdd::QuantizedConvolutionBiasAdd(const shared_ptr<No
 {
    constructor_validate_and_infer_types();
-    auto& data_batch_shape = data_batch->get_shape();
+    auto& data_batch_shape = data_batch.get_shape();
-    auto& filters_shape = filters->get_shape();
+    auto& filters_shape = filters.get_shape();
    // TODO: call ngraph util
    // util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape());
@@ -163,21 +166,22 @@ shared_ptr<Node>
                                                            m_with_relu));
 }
+const string op::QuantizedConvolutionBiasSignedAdd::type_name{"QuantizedConvolutionBiasSignedAdd"};
 op::QuantizedConvolutionBiasSignedAdd::QuantizedConvolutionBiasSignedAdd(
-    const shared_ptr<Node>& data_batch,
+    const Output<Node>& data_batch,
-    const shared_ptr<Node>& filters,
+    const Output<Node>& filters,
-    const shared_ptr<Node>& bias,
+    const Output<Node>& bias,
-    const shared_ptr<Node>& sum_input,
+    const Output<Node>& sum_input,
    const Strides& window_movement_strides,
    const Strides& window_dilation_strides,
    const CoordinateDiff& padding_below,
    const CoordinateDiff& padding_above,
    const Strides& data_dilation_strides,
-    const shared_ptr<Node>& scale,
+    const Output<Node>& scale,
-    const shared_ptr<Node>& sum_scale,
+    const Output<Node>& sum_scale,
    const bool with_relu)
-    : Op("QuantizedConvolutionBiasSignedAdd",
+    : Op({data_batch, filters, bias, sum_input, scale, sum_scale})
-         check_single_output_args({data_batch, filters, bias, sum_input, scale, sum_scale}))
    , m_window_movement_strides(window_movement_strides)
    , m_window_dilation_strides(window_dilation_strides)
    , m_padding_below(padding_below)
@@ -187,8 +191,8 @@ op::QuantizedConvolutionBiasSignedAdd::QuantizedConvolutionBiasSignedAdd(
 {
    constructor_validate_and_infer_types();
-    auto& data_batch_shape = data_batch->get_shape();
+    auto& data_batch_shape = data_batch.get_shape();
-    auto& filters_shape = filters->get_shape();
+    auto& filters_shape = filters.get_shape();
    // TODO: call ngraph util
    // util::validate_convbias_shapes(data_batch_shape, filters_shape, bias->get_shape());

--- a/src/ngraph/op/experimental/quantized_conv_bias.hpp
+++ b/src/ngraph/op/experimental/quantized_conv_bias.hpp
@@ -27,15 +27,18 @@ namespace ngraph
        class QuantizedConvolutionBias : public Op
        {
        public:
-            QuantizedConvolutionBias(const std::shared_ptr<Node>& data_batch,
+            NGRAPH_API
-                                     const std::shared_ptr<Node>& filters,
+            static const std::string type_name;
-                                     const std::shared_ptr<Node>& bias,
+            const std::string& description() const override { return type_name; }
+            QuantizedConvolutionBias(const Output<Node>& data_batch,
+                                     const Output<Node>& filters,
+                                     const Output<Node>& bias,
                                     const Strides& window_movement_strides,
                                     const Strides& window_dilation_strides,
                                     const CoordinateDiff& padding_below,
                                     const CoordinateDiff& padding_above,
                                     const Strides& data_dilation_strides,
-                                     const std::shared_ptr<Node>& scale,
+                                     const Output<Node>& scale,
                                     const bool with_relu = false);
            const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
@@ -43,9 +46,9 @@ namespace ngraph
            const CoordinateDiff& get_padding_below() const { return m_padding_below; }
            const CoordinateDiff& get_padding_above() const { return m_padding_above; }
            const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
-            std::shared_ptr<Node> get_bias() { return get_argument(2); }
+            Output<Node> get_bias() { return input(2).get_source_output(); }
-            std::shared_ptr<Node> get_filters() { return get_argument(1); }
+            Output<Node> get_filters() { return input(1).get_source_output(); }
-            std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
+            Output<Node> get_data_batch() { return input(0).get_source_output(); }
            bool with_relu() const { return m_with_relu; }
            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;
@@ -62,17 +65,20 @@ namespace ngraph
        class QuantizedConvolutionBiasAdd : public Op
        {
        public:
-            QuantizedConvolutionBiasAdd(const std::shared_ptr<Node>& data_batch,
+            NGRAPH_API
-                                        const std::shared_ptr<Node>& filters,
+            static const std::string type_name;
-                                        const std::shared_ptr<Node>& bias,
+            const std::string& description() const override { return type_name; }
-                                        const std::shared_ptr<Node>& sum_input,
+            QuantizedConvolutionBiasAdd(const Output<Node>& data_batch,
+                                        const Output<Node>& filters,
+                                        const Output<Node>& bias,
+                                        const Output<Node>& sum_input,
                                        const Strides& window_movement_strides,
                                        const Strides& window_dilation_strides,
                                        const CoordinateDiff& padding_below,
                                        const CoordinateDiff& padding_above,
                                        const Strides& data_dilation_strides,
-                                        const std::shared_ptr<Node>& scale,
+                                        const Output<Node>& scale,
-                                        const std::shared_ptr<Node>& sum_scale,
+                                        const Output<Node>& sum_scale,
                                        const bool with_relu = false);
            const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
@@ -80,9 +86,9 @@ namespace ngraph
            const CoordinateDiff& get_padding_below() const { return m_padding_below; }
            const CoordinateDiff& get_padding_above() const { return m_padding_above; }
            const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
-            std::shared_ptr<Node> get_bias() { return get_argument(2); }
+            Output<Node> get_bias() { return input(2).get_source_output(); }
-            std::shared_ptr<Node> get_filters() { return get_argument(1); }
+            Output<Node> get_filters() { return input(1).get_source_output(); }
-            std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
+            Output<Node> get_data_batch() { return input(0).get_source_output(); }
            bool with_relu() const { return m_with_relu; }
            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;
@@ -99,17 +105,20 @@ namespace ngraph
        class QuantizedConvolutionBiasSignedAdd : public Op
        {
        public:
-            QuantizedConvolutionBiasSignedAdd(const std::shared_ptr<Node>& data_batch,
+            NGRAPH_API
-                                              const std::shared_ptr<Node>& filters,
+            static const std::string type_name;
-                                              const std::shared_ptr<Node>& bias,
+            const std::string& description() const override { return type_name; }
-                                              const std::shared_ptr<Node>& sum_input,
+            QuantizedConvolutionBiasSignedAdd(const Output<Node>& data_batch,
+                                              const Output<Node>& filters,
+                                              const Output<Node>& bias,
+                                              const Output<Node>& sum_input,
                                              const Strides& window_movement_strides,
                                              const Strides& window_dilation_strides,
                                              const CoordinateDiff& padding_below,
                                              const CoordinateDiff& padding_above,
                                              const Strides& data_dilation_strides,
-                                              const std::shared_ptr<Node>& scale,
+                                              const Output<Node>& scale,
-                                              const std::shared_ptr<Node>& sum_scale,
+                                              const Output<Node>& sum_scale,
                                              const bool with_relu = false);
            const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
@@ -117,9 +126,9 @@ namespace ngraph
            const CoordinateDiff& get_padding_below() const { return m_padding_below; }
            const CoordinateDiff& get_padding_above() const { return m_padding_above; }
            const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
-            std::shared_ptr<Node> get_bias() { return get_argument(2); }
+            Output<Node> get_bias() { return input(2).get_source_output(); }
-            std::shared_ptr<Node> get_filters() { return get_argument(1); }
+            Output<Node> get_filters() { return input(1).get_source_output(); }
-            std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
+            Output<Node> get_data_batch() { return input(0).get_source_output(); }
            bool with_relu() const { return m_with_relu; }
            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

--- a/src/ngraph/op/experimental/quantized_conv_relu.cpp
+++ b/src/ngraph/op/experimental/quantized_conv_relu.cpp
@@ -26,15 +26,17 @@
 using namespace std;
 using namespace ngraph;
-op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(const shared_ptr<Node>& data_batch,
+const string op::QuantizedConvolutionRelu::type_name{"QuantizedConvolutionRelu"};
-                                                       const shared_ptr<Node>& filters,
+op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(const Output<Node>& data_batch,
+                                                       const Output<Node>& filters,
                                                       const Strides& window_movement_strides,
                                                       const Strides& window_dilation_strides,
                                                       const CoordinateDiff& padding_below,
                                                       const CoordinateDiff& padding_above,
                                                       const Strides& data_dilation_strides,
-                                                       const shared_ptr<Node>& scale)
+                                                       const Output<Node>& scale)
-    : Op("QuantizedConvolutionRelu", check_single_output_args({data_batch, filters, scale}))
+    : Op({data_batch, filters, scale})
    , m_window_movement_strides(window_movement_strides)
    , m_window_dilation_strides(window_dilation_strides)
    , m_padding_below(padding_below)
@@ -43,8 +45,8 @@ op::QuantizedConvolutionRelu::QuantizedConvolutionRelu(const shared_ptr<Node>& d
 {
    constructor_validate_and_infer_types();
-    auto& data_batch_shape = data_batch->get_shape();
+    auto& data_batch_shape = data_batch.get_shape();
-    auto& filters_shape = filters->get_shape();
+    auto& filters_shape = filters.get_shape();
    set_output_type(0,
                    element::u8,

--- a/src/ngraph/op/experimental/quantized_conv_relu.hpp
+++ b/src/ngraph/op/experimental/quantized_conv_relu.hpp
@@ -27,22 +27,25 @@ namespace ngraph
        class QuantizedConvolutionRelu : public Op
        {
        public:
-            QuantizedConvolutionRelu(const std::shared_ptr<Node>& data_batch,
+            NGRAPH_API
-                                     const std::shared_ptr<Node>& filters,
+            static const std::string type_name;
+            const std::string& description() const override { return type_name; }
+            QuantizedConvolutionRelu(const Output<Node>& data_batch,
+                                     const Output<Node>& filters,
                                     const Strides& window_movement_strides,
                                     const Strides& window_dilation_strides,
                                     const CoordinateDiff& padding_below,
                                     const CoordinateDiff& padding_above,
                                     const Strides& data_dilation_strides,
-                                     const std::shared_ptr<Node>& scale);
+                                     const Output<Node>& scale);
            const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
            const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
            const CoordinateDiff& get_padding_below() const { return m_padding_below; }
            const CoordinateDiff& get_padding_above() const { return m_padding_above; }
            const Strides& get_data_dilation_strides() const { return m_data_dilation_strides; }
-            std::shared_ptr<Node> get_filters() { return get_argument(1); }
+            Output<Node> get_filters() { return input(1).get_source_output(); }
-            std::shared_ptr<Node> get_data_batch() { return get_argument(0); }
+            Output<Node> get_data_batch() { return input(0).get_source_output(); }
            bool with_relu() const { return true; }
            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

--- a/src/ngraph/op/experimental/quantized_dot.cpp
+++ b/src/ngraph/op/experimental/quantized_dot.cpp
@@ -24,19 +24,21 @@
 using namespace std;
 using namespace ngraph;
-op::QuantizedDot::QuantizedDot(const shared_ptr<Node>& data,
+const string op::QuantizedDot::type_name{"QuantizedDot"};
-                               const shared_ptr<Node>& weights,
-                               const shared_ptr<Node>& scale,
+op::QuantizedDot::QuantizedDot(const Output<Node>& data,
+                               const Output<Node>& weights,
+                               const Output<Node>& scale,
                               bool requantize,
                               bool with_relu)
-    : Op("QuantizedDot", check_single_output_args({data, weights, scale}))
+    : Op({data, weights, scale})
    , m_requantize(requantize)
    , m_with_relu(with_relu)
 {
    constructor_validate_and_infer_types();
-    auto& data_shape = data->get_shape();
+    auto& data_shape = data.get_shape();
-    auto& weights_shape = weights->get_shape();
+    auto& weights_shape = weights.get_shape();
    // QuantizedDot does [m ,n] * [n, k] = [m, k]
    NODE_VALIDATION_CHECK(this,
                          data_shape.size() == 2 && weights_shape.size() == 2 &&
@@ -47,7 +49,7 @@ op::QuantizedDot::QuantizedDot(const shared_ptr<Node>& data,
                          weights_shape);
    auto output_et = requantize ? (with_relu ? element::u8 : element::i8) : element::i32;
-    if (data->get_element_type() == element::u8 && weights->get_element_type() == element::u8)
+    if (data.get_element_type() == element::u8 && weights.get_element_type() == element::u8)
    {
        output_et = element::u8;
    }

--- a/src/ngraph/op/experimental/quantized_dot.hpp
+++ b/src/ngraph/op/experimental/quantized_dot.hpp
@@ -27,9 +27,12 @@ namespace ngraph
        class QuantizedDot : public Op
        {
        public:
-            QuantizedDot(const std::shared_ptr<Node>& data,
+            NGRAPH_API
-                         const std::shared_ptr<Node>& weights,
+            static const std::string type_name;
-                         const std::shared_ptr<Node>& scale,
+            const std::string& description() const override { return type_name; }
+            QuantizedDot(const Output<Node>& data,
+                         const Output<Node>& weights,
+                         const Output<Node>& scale,
                         bool requantize = true,
                         bool with_relu = false);

--- a/src/ngraph/op/experimental/quantized_dot_bias.cpp
+++ b/src/ngraph/op/experimental/quantized_dot_bias.cpp
@@ -24,21 +24,23 @@
 using namespace std;
 using namespace ngraph;
-op::QuantizedDotBias::QuantizedDotBias(const shared_ptr<Node>& data,
+const string op::QuantizedDotBias::type_name{"QuantizedDotBias"};
-                                       const shared_ptr<Node>& weights,
-                                       const shared_ptr<Node>& bias,
+op::QuantizedDotBias::QuantizedDotBias(const Output<Node>& data,
-                                       const shared_ptr<Node>& scale,
+                                       const Output<Node>& weights,
+                                       const Output<Node>& bias,
+                                       const Output<Node>& scale,
                                       bool requantize,
                                       bool with_relu)
-    : Op("QuantizedDotBias", check_single_output_args({data, weights, bias, scale}))
+    : Op({data, weights, bias, scale})
    , m_requantize(requantize)
    , m_with_relu(with_relu)
 {
    constructor_validate_and_infer_types();
-    auto& data_shape = data->get_shape();
+    auto& data_shape = data.get_shape();
-    auto& weights_shape = weights->get_shape();
+    auto& weights_shape = weights.get_shape();
-    auto& bias_shape = bias->get_shape();
+    auto& bias_shape = bias.get_shape();
    NODE_VALIDATION_CHECK(this,
                          data_shape.size() == 2 && weights_shape.size() == 2 &&
                              data_shape[1] == weights_shape[1],

--- a/src/ngraph/op/experimental/quantized_dot_bias.hpp
+++ b/src/ngraph/op/experimental/quantized_dot_bias.hpp
@@ -27,10 +27,13 @@ namespace ngraph
        class QuantizedDotBias : public Op
        {
        public:
-            QuantizedDotBias(const std::shared_ptr<Node>& data,
+            NGRAPH_API
-                             const std::shared_ptr<Node>& weights,
+            static const std::string type_name;
-                             const std::shared_ptr<Node>& bias,
+            const std::string& description() const override { return type_name; }
-                             const std::shared_ptr<Node>& scale,
+            QuantizedDotBias(const Output<Node>& data,
+                             const Output<Node>& weights,
+                             const Output<Node>& bias,
+                             const Output<Node>& scale,
                             bool requantize = true,
                             bool with_relu = false);

--- a/src/ngraph/op/experimental/quantized_max_pool.cpp
+++ b/src/ngraph/op/experimental/quantized_max_pool.cpp
@@ -22,12 +22,14 @@
 using namespace std;
 using namespace ngraph;
-op::QuantizedMaxPool::QuantizedMaxPool(const shared_ptr<Node>& arg,
+const string op::QuantizedMaxPool::type_name{"QuantizedMaxPool"};
+op::QuantizedMaxPool::QuantizedMaxPool(const Output<Node>& arg,
                                       const Shape& window_shape,
                                       const Strides& window_movement_strides,
                                       const Shape& padding_below,
                                       const Shape& padding_above)
-    : Op("QuantizedMaxPool", check_single_output_args({arg}))
+    : Op({arg})
    , m_window_shape(window_shape)
    , m_window_movement_strides(window_movement_strides)
    , m_padding_below(padding_below)
@@ -35,7 +37,7 @@ op::QuantizedMaxPool::QuantizedMaxPool(const shared_ptr<Node>& arg,
 {
    constructor_validate_and_infer_types();
-    if (arg->get_element_type() != element::u8 && arg->get_element_type() != element::i8)
+    if (arg.get_element_type() != element::u8 && arg.get_element_type() != element::i8)
    {
        throw ngraph_error("QuantizedMaxPool supported only for i8/u8!");
    }

--- a/src/ngraph/op/experimental/quantized_max_pool.hpp
+++ b/src/ngraph/op/experimental/quantized_max_pool.hpp
@@ -26,6 +26,9 @@ namespace ngraph
        class QuantizedMaxPool : public Op
        {
        public:
+            NGRAPH_API
+            static const std::string type_name;
+            const std::string& description() const override { return type_name; }
            /// \brief Constructs a batched max pooling operation.
            ///
            /// \param arg The node producing the input data batch tensor.
@@ -33,7 +36,7 @@ namespace ngraph
            /// \param window_movement_strides The window movement strides.
            /// \param padding_below The below-padding shape.
            /// \param padding_above The above-padding shape.
-            QuantizedMaxPool(const std::shared_ptr<Node>& arg,
+            QuantizedMaxPool(const Output<Node>& arg,
                             const Shape& window_shape,
                             const Strides& window_movement_strides,
                             const Shape& padding_below,

--- a/src/ngraph/op/get_output_element.cpp
+++ b/src/ngraph/op/get_output_element.cpp
@@ -71,20 +71,7 @@ NodeVector op::get_output_elements(const shared_ptr<Node>& mon)
    NodeVector goes(mon->get_output_size());
    for (auto o : mon->outputs())
    {
-        shared_ptr<Node> goe;
+        goes.at(o.get_index()) = o.as_single_output_node();
-        for (auto in : o.get_target_inputs())
-        {
-            if (in.get_node()->description() == op::GetOutputElement::type_name)
-            {
-                goe = in.get_node()->shared_from_this();
-                break;
-            }
-        }
-        if (goe == nullptr)
-        {
-            goe = get_output_element(o, true);
-        }
-        goes.at(o.get_index()) = goe;
    }
    return goes;
 }
--- a/src/ngraph/op/quantize.cpp
+++ b/src/ngraph/op/quantize.cpp
@@ -20,14 +20,16 @@
 using namespace std;
 using namespace ngraph;
-op::Quantize::Quantize(const shared_ptr<Node>& input,
+const string op::Quantize::type_name{"Quantize"};
-                       const shared_ptr<Node>& scale,
-                       const shared_ptr<Node>& zero_point,
+op::Quantize::Quantize(const Output<Node>& input,
+                       const Output<Node>& scale,
+                       const Output<Node>& zero_point,
                       const element::Type& type,
                       const AxisSet& axes,
                       RoundMode round_mode)
-    : Op("Quantize", check_single_output_args({input, scale, zero_point}))
+    : Op({input, scale, zero_point})
    , m_type(type)
    , m_axes(axes)
    , m_round_mode(round_mode)

--- a/src/ngraph/op/quantize.hpp
+++ b/src/ngraph/op/quantize.hpp
@@ -30,6 +30,9 @@ namespace ngraph
        class Quantize : public ngraph::op::Op
        {
        public:
+            NGRAPH_API
+            static const std::string type_name;
+            const std::string& description() const override { return type_name; }
            enum class RoundMode
            {
                // round to nearest integer
@@ -82,9 +85,9 @@ namespace ngraph
            /// \param type output element type
            /// \param axes axis positions on which `scale` and `zero_point` are specified
            /// \param round_mode describes how to perform ROUND function (see above)
-            Quantize(const std::shared_ptr<Node>& input,
+            Quantize(const Output<Node>& input,
-                     const std::shared_ptr<Node>& scale,
+                     const Output<Node>& scale,
-                     const std::shared_ptr<Node>& zero_point,
+                     const Output<Node>& zero_point,
                     const ngraph::element::Type& type,
                     const ngraph::AxisSet& axes,
                     RoundMode round_mode);

--- a/src/ngraph/op/quantized_convolution.cpp
+++ b/src/ngraph/op/quantized_convolution.cpp
@@ -24,32 +24,33 @@
 using namespace std;
 using namespace ngraph;
-op::QuantizedConvolution::QuantizedConvolution(const shared_ptr<Node>& input,
+const string op::QuantizedConvolution::type_name{"QuantizedConvolution"};
-                                               const shared_ptr<Node>& filters,
+op::QuantizedConvolution::QuantizedConvolution(const Output<Node>& input,
+                                               const Output<Node>& filters,
                                               const Strides& window_movement_strides,
                                               const Strides& window_dilation_strides,
                                               const CoordinateDiff& padding_below,
                                               const CoordinateDiff& padding_above,
                                               const Strides& data_dilation_strides,
-                                               const shared_ptr<Node>& input_scale,
+                                               const Output<Node>& input_scale,
-                                               const shared_ptr<Node>& input_zero_point,
+                                               const Output<Node>& input_zero_point,
-                                               const shared_ptr<Node>& filter_scale,
+                                               const Output<Node>& filter_scale,
-                                               const shared_ptr<Node>& filter_zero_point,
+                                               const Output<Node>& filter_zero_point,
-                                               const shared_ptr<Node>& output_scale,
+                                               const Output<Node>& output_scale,
-                                               const shared_ptr<Node>& output_zero_point,
+                                               const Output<Node>& output_zero_point,
                                               const element::Type& output_type,
                                               const AxisSet& input_axes,
                                               const AxisSet& filter_axes,
                                               const AxisSet& output_axes)
-    : Op("QuantizedConvolution",
+    : Op({input,
-         check_single_output_args({input,
+          filters,
-                                   filters,
+          input_scale,
-                                   input_scale,
+          input_zero_point,
-                                   input_zero_point,
+          filter_scale,
-                                   filter_scale,
+          filter_zero_point,
-                                   filter_zero_point,
+          output_scale,
-                                   output_scale,
+          output_zero_point})
-                                   output_zero_point}))
    , m_window_movement_strides(window_movement_strides)
    , m_window_dilation_strides(window_dilation_strides)
    , m_padding_below(padding_below)

--- a/src/ngraph/op/quantized_convolution.hpp
+++ b/src/ngraph/op/quantized_convolution.hpp
@@ -26,6 +26,9 @@ namespace ngraph
        class QuantizedConvolution : public Op
        {
        public:
+            NGRAPH_API
+            static const std::string type_name;
+            const std::string& description() const override { return type_name; }
            /// \brief Constructs a quantized convolution operation.
            ///
            /// \param input The node producing the input data batch tensor.
@@ -45,19 +48,19 @@ namespace ngraph
            /// \param input_axes Input axes set for channel wise quantization
            /// \param filter_axes Filter axes set for channel wise quantization
            /// \param output_axes Output axes set for channel wise quantization
-            QuantizedConvolution(const std::shared_ptr<Node>& input,
+            QuantizedConvolution(const Output<Node>& input,
-                                 const std::shared_ptr<Node>& filters,
+                                 const Output<Node>& filters,
                                 const Strides& window_movement_strides,
                                 const Strides& window_dilation_strides,
                                 const CoordinateDiff& padding_below,
                                 const CoordinateDiff& padding_above,
                                 const Strides& data_dilation_strides,
-                                 const std::shared_ptr<Node>& input_scale,
+                                 const Output<Node>& input_scale,
-                                 const std::shared_ptr<Node>& input_zero_point,
+                                 const Output<Node>& input_zero_point,
-                                 const std::shared_ptr<Node>& filter_scale,
+                                 const Output<Node>& filter_scale,
-                                 const std::shared_ptr<Node>& filter_zero_point,
+                                 const Output<Node>& filter_zero_point,
-                                 const std::shared_ptr<Node>& output_scale,
+                                 const Output<Node>& output_scale,
-                                 const std::shared_ptr<Node>& output_zero_point,
+                                 const Output<Node>& output_zero_point,
                                 const ngraph::element::Type& output_type,
                                 const ngraph::AxisSet& input_axes = ngraph::AxisSet{},
                                 const ngraph::AxisSet& filter_axes = ngraph::AxisSet{},

--- a/src/ngraph/op/select.cpp
+++ b/src/ngraph/op/select.cpp
@@ -26,10 +26,10 @@
 using namespace std;
 using namespace ngraph;
-op::Select::Select(const shared_ptr<Node>& arg0,
+const string op::Select::type_name{"Select"};
-                   const shared_ptr<Node>& arg1,
-                   const shared_ptr<Node>& arg2)
+op::Select::Select(const Output<Node>& arg0, const Output<Node>& arg1, const Output<Node>& arg2)
-    : Op("Select", check_single_output_args({arg0, arg1, arg2}))
+    : Op({arg0, arg1, arg2})
 {
    constructor_validate_and_infer_types();
 }

--- a/src/ngraph/op/select.hpp
+++ b/src/ngraph/op/select.hpp
@@ -40,14 +40,17 @@ namespace ngraph
        class Select : public Op
        {
        public:
+            NGRAPH_API
+            static const std::string type_name;
+            const std::string& description() const override { return type_name; }
+            /// \brief Constructs a selection operation.
+            Select() = default;
            /// \brief Constructs a selection operation.
            ///
            /// \param arg0 Node that produces the first input tensor.
            /// \param arg1 Node that produces the second input tensor.
            /// \param arg2 Node that produces the third input tensor.
-            Select(const std::shared_ptr<Node>& arg0,
+            Select(const Output<Node>& arg0, const Output<Node>& arg1, const Output<Node>& arg2);
-                   const std::shared_ptr<Node>& arg1,
-                   const std::shared_ptr<Node>& arg2);
            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

--- a/src/ngraph/op/subtract.cpp
+++ b/src/ngraph/op/subtract.cpp
@@ -20,10 +20,12 @@
 using namespace std;
 using namespace ngraph;
-op::Subtract::Subtract(const shared_ptr<Node>& arg0,
+const string op::Subtract::type_name{"Subtract"};
-                       const shared_ptr<Node>& arg1,
+op::Subtract::Subtract(const Output<Node>& arg0,
+                       const Output<Node>& arg1,
                       const AutoBroadcastSpec& autob)
-    : BinaryElementwiseArithmetic("Subtract", arg0, arg1, autob)
+    : BinaryElementwiseArithmetic(arg0, arg1, autob)
 {
    constructor_validate_and_infer_types();
 }
@@ -50,8 +52,7 @@ void op::Subtract::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVec
    adjoints.add_delta(y, -delta);
 }
-shared_ptr<ngraph::Node> ngraph::operator-(const shared_ptr<ngraph::Node> arg0,
+shared_ptr<ngraph::Node> ngraph::operator-(const Output<Node> arg0, const Output<Node> arg1)
-                                           const shared_ptr<ngraph::Node> arg1)
 {
    return make_shared<ngraph::op::Subtract>(arg0, arg1);
 }
--- a/src/ngraph/op/subtract.hpp
+++ b/src/ngraph/op/subtract.hpp
@@ -26,13 +26,17 @@ namespace ngraph
        class Subtract : public util::BinaryElementwiseArithmetic
        {
        public:
+            NGRAPH_API
+            static const std::string type_name;
+            const std::string& description() const override { return type_name; }
+            Subtract() = default;
            /// \brief Constructs an subtraction operation.
            ///
            /// \param arg0 Node that produces the first input tensor.
            /// \param arg1 Node that produces the second input tensor.
            /// \param autob Auto broadcast specification
-            Subtract(const std::shared_ptr<Node>& arg0,
+            Subtract(const Output<Node>& arg0,
-                     const std::shared_ptr<Node>& arg1,
+                     const Output<Node>& arg1,
                     const AutoBroadcastSpec& autob = AutoBroadcastSpec());
            virtual std::shared_ptr<Node>
@@ -42,6 +46,6 @@ namespace ngraph
                                           const NodeVector& deltas) override;
        };
    }
-    std::shared_ptr<ngraph::Node> operator-(const std::shared_ptr<ngraph::Node> arg0,
+    std::shared_ptr<ngraph::Node> operator-(const Output<ngraph::Node> arg0,
-                                            const std::shared_ptr<ngraph::Node> arg1);
+                                            const Output<ngraph::Node> arg1);
 }
--- a/src/ngraph/pass/constant_folding.cpp
+++ b/src/ngraph/pass/constant_folding.cpp
--- a/src/ngraph/pass/constant_folding.hpp
+++ b/src/ngraph/pass/constant_folding.hpp
@@ -42,7 +42,9 @@ public:
        CONVERT,
        SHAPE_OF,
        REVERSE,
-        PRODUCT
+        PRODUCT,
+        SUM,
+        CONCAT
    };
    ConstantFolding(const ngraph::BuildNodeExecutorMap& cfmap = ngraph::BuildNodeExecutorMap())
@@ -60,6 +62,8 @@ public:
        construct_constant_shape_of();
        construct_constant_reverse();
        construct_constant_product();
+        construct_constant_sum();
+        construct_constant_concat();
    }
    //this allows to specify the order in which matchers will be run
@@ -84,6 +88,8 @@ public:
            case CFTransformations::SHAPE_OF: construct_constant_shape_of(); break;
            case CFTransformations::REVERSE: construct_constant_reverse(); break;
            case CFTransformations::PRODUCT: construct_constant_product(); break;
+            case CFTransformations::SUM: construct_constant_sum(); break;
+            case CFTransformations::CONCAT: construct_constant_concat(); break;
            }
        }
    }
@@ -100,6 +106,8 @@ private:
    void construct_constant_shape_of();
    void construct_constant_reverse();
    void construct_constant_product();
+    void construct_constant_sum();
+    void construct_constant_concat();
    ngraph::BuildNodeExecutorMap m_cfmap;
 };
--- a/src/ngraph/pass/reshape_sinking.cpp
+++ b/src/ngraph/pass/reshape_sinking.cpp
--- a/src/ngraph/runtime/allocator.cpp
+++ b/src/ngraph/runtime/allocator.cpp
@@ -51,6 +51,6 @@ public:
 ngraph::runtime::Allocator* ngraph::runtime::get_default_allocator()
 {
-    static std::unique_ptr<DefaultAllocator> allocator(new DefaultAllocator());
+    static DefaultAllocator* allocator = new DefaultAllocator();
-    return allocator.get();
+    return allocator;
 }
--- a/src/ngraph/runtime/backend.hpp
+++ b/src/ngraph/runtime/backend.hpp
@@ -146,7 +146,7 @@ public:
    virtual Allocator* get_host_memory_allocator() { return nullptr; }
    /// \brief Set the host memory allocator to be used by the backend
    /// \param allocator is pointer to host memory allocator object
-    virtual void set_host_memory_allocator(std::unique_ptr<Allocator> allocator) {}
+    virtual void set_host_memory_allocator(Allocator* allocator) {}
    /// \brief Returns memory allocator used by backend for device allocations
    virtual Allocator* get_device_memory_allocator()
    {

--- a/src/ngraph/runtime/cpu/cpu_backend.cpp
+++ b/src/ngraph/runtime/cpu/cpu_backend.cpp
@@ -187,11 +187,10 @@ runtime::Allocator* runtime::cpu::CPU_Backend::get_host_memory_allocator()
    {
        return runtime::get_default_allocator();
    }
-    return m_allocator.get();
+    return m_allocator;
 }
-void runtime::cpu::CPU_Backend::set_host_memory_allocator(
+void runtime::cpu::CPU_Backend::set_host_memory_allocator(Allocator* allocator)
-    std::unique_ptr<runtime::Allocator> allocator)
 {
    if (m_allocator)
    {
@@ -200,7 +199,7 @@ void runtime::cpu::CPU_Backend::set_host_memory_allocator(
        throw ngraph_error(
            "Allocator already exists. Changing allocators mid-execution is not permitted.");
    }
-    m_allocator = std::move(allocator);
+    m_allocator = allocator;
 }
 vector<runtime::PerformanceCounter> runtime::cpu::CPU_Executable::get_performance_data() const

--- a/src/ngraph/runtime/cpu/cpu_backend.hpp
+++ b/src/ngraph/runtime/cpu/cpu_backend.hpp
@@ -65,7 +65,7 @@ namespace ngraph
                void remove_compiled_function(std::shared_ptr<Executable> exec) override;
                Allocator* get_host_memory_allocator() override;
-                void set_host_memory_allocator(std::unique_ptr<Allocator> allocator) override;
+                void set_host_memory_allocator(Allocator* allocator) override;
                bool is_supported(const Node& node) const override;
                bool is_supported_property(const Property prop) const override;
@@ -76,7 +76,7 @@ namespace ngraph
                std::mutex m_exec_map_mutex;
                std::unordered_map<std::shared_ptr<Function>, std::shared_ptr<Executable>>
                    m_exec_map;
-                std::unique_ptr<Allocator> m_allocator;
+                Allocator* m_allocator;
            };
            class CPU_BACKEND_API CPU_Executable : public runtime::Executable

--- a/src/ngraph/runtime/cpu/cpu_builder.cpp
+++ b/src/ngraph/runtime/cpu/cpu_builder.cpp
@@ -470,6 +470,12 @@ namespace ngraph
                BUILD_UNARY_ELEMWISE_CF_FUNCTOR(runtime::cpu::kernel::sqrt);
            }
+            template <>
+            NodeExecutorTy Builder::BUILDER_CF_DECL(ngraph::op::Sign)
+            {
+                BUILD_UNARY_ELEMWISE_CF_FUNCTOR(runtime::cpu::kernel::sign);
+            }
 #define TI(x) type_index(typeid(x))
            BuildOpMap& GetGlobalBuildDispatcher()
@@ -536,6 +542,7 @@ namespace ngraph
            REGISTER_CF_BUILDER(Negative);
            REGISTER_CF_BUILDER(Relu);
            REGISTER_CF_BUILDER(Sqrt);
+            REGISTER_CF_BUILDER(Sign);
        }
    }
 }
--- a/src/ngraph/runtime/gpu/unit_test.manifest
+++ b/src/ngraph/runtime/gpu/unit_test.manifest
@@ -98,7 +98,7 @@ all_2x2x3_eliminate_dims_0_1
 all_2x2x3_eliminate_dims_0_2
 all_2x2x3_eliminate_dims_1_2
 all_2x2x3_eliminate_dims_0_1_2
-dynamic_GPU.all
+all_dynamic
 # GPU backend uses floats to implement these ops for int32
 floor_int32

--- a/src/ngraph/runtime/reference/abs.hpp
+++ b/src/ngraph/runtime/reference/abs.hpp
@@ -30,7 +30,7 @@ namespace ngraph
                for (size_t i = 0; i < count; i++)
                {
                    // TODO: generic "abs" doesn't work here for some reason.
-                    out[i] = (arg[i] < 0 ? -arg[i] : arg[i]);
+                    out[i] = (arg[i] < T(0) ? T(-arg[i]) : arg[i]);
                }
            }
        }

--- a/src/ngraph/runtime/reference/sign.hpp
+++ b/src/ngraph/runtime/reference/sign.hpp
@@ -29,7 +29,7 @@ namespace ngraph
            {
                for (size_t i = 0; i < count; i++)
                {
-                    out[i] = (arg[i] < 0 ? -1 : (arg[i] > 0 ? 1 : 0));
+                    out[i] = (arg[i] < T(0) ? T(-1) : (arg[i] > T(0) ? T(1) : T(0)));
                }
            }
        }

--- a/src/tools/nbench/benchmark_pipelined.cpp
+++ b/src/tools/nbench/benchmark_pipelined.cpp
@@ -96,6 +96,10 @@ static void
                result->read(data->get_data_ptr(),
                             data->get_element_count() * data->get_element_type().size());
            }
+            if (current_iteration == (s_iterations + s_warmup_iterations - 1))
+            {
+                s_timer.stop();
+            }
        }
    }
 }
@@ -177,7 +181,6 @@ vector<runtime::PerformanceCounter> run_benchmark_pipelined(shared_ptr<Function>
    {
        threads[i].join();
    }
-    s_timer.stop();
    float time = s_timer.get_milliseconds();
    cout << time / iterations << "ms per iteration" << endl;

--- a/test/CMakeLists.txt
+++ b/test/CMakeLists.txt
@@ -207,20 +207,20 @@ add_subdirectory(util)
 # backend specific test files must meet the following requirements:
 # 1) The must be named <name>.in.cpp
-# 2) They must be in the test directory
+# 2) They must be in the `test/backend` directory
 # 3) Include "util/test_control.hpp" in your cpp file
 # 4) add the line `static string s_manifest = "${MANIFEST}";` to your cpp file
 # 5) Use the `NGRAPH_TEST` macro in place of `TEST`.
 # All such files are configured via cmake which replaces all instances of cmake variables
 # such as ${BACKEND_NAME} with their values, such as CPU, GPU, or INTERPRETER.
 set(MULTI_TEST_SRC
-    autodiff.in.cpp
    backend/abc.in.cpp
    backend/aliased_output.in.cpp
    backend/all.in.cpp
    backend/any.in.cpp
    backend/api.in.cpp
    backend/arg_reduce.in.cpp
+    backend/autodiff.in.cpp
    backend/batch_mat_mul.in.cpp
    backend/batch_norm.in.cpp
    backend/binary_elementwise.in.cpp
@@ -231,7 +231,13 @@ set(MULTI_TEST_SRC
    backend/constant.in.cpp
    backend/convert.in.cpp
    backend/convolution.in.cpp
+    backend/convolution_reference.in.cpp
    backend/dot.in.cpp
+    backend/dyn_broadcast.in.cpp
+    backend/dyn_replace_slice_reference.in.cpp
+    backend/dyn_reshape.in.cpp
+    backend/dyn_slice_reference.in.cpp
+    backend/dynamic.in.cpp
    backend/embedding_lookup.in.cpp
    backend/function_name.in.cpp
    backend/fused_op.in.cpp
@@ -253,6 +259,7 @@ set(MULTI_TEST_SRC
    backend/product.in.cpp
    backend/quantize_dequantize.in.cpp
    backend/quantized_convolution.in.cpp
+    backend/range.in.cpp
    backend/relu.in.cpp
    backend/replace_slice.in.cpp
    backend/reshape.in.cpp
@@ -267,14 +274,11 @@ set(MULTI_TEST_SRC
    backend/sum.in.cpp
    backend/tensorview_custom_mem.in.cpp
    backend/topk.in.cpp
+    backend/transpose.in.cpp
    backend/unhandled_op.in.cpp
    backend/unary_elementwise.in.cpp
    backend/validate_call.in.cpp
    backend/zero_sized.in.cpp
-    convolution_test.in.cpp
-    dyn_replace_slice_test.in.cpp
-    dyn_slice_test.in.cpp
-    dynamic.in.cpp
 )
 if (NGRAPH_MLIR_ENABLE)
@@ -282,11 +286,11 @@ if (NGRAPH_MLIR_ENABLE)
 endif()
 if(NGRAPH_DISTRIBUTED_ENABLE)
-    list(APPEND MULTI_TEST_SRC distributed.in.cpp)
+    list(APPEND MULTI_TEST_SRC backend/distributed.in.cpp)
 endif()
 if (NGRAPH_CPU_ENABLE)
-    list(APPEND MULTI_TEST_SRC backend_graph_comparison.in.cpp)
+    list(APPEND MULTI_TEST_SRC backend/graph_comparison.in.cpp)
 endif()
 if (NGRAPH_ONNX_IMPORT_ENABLE)

--- a/test/backend/all.in.cpp
+++ b/test/backend/all.in.cpp
@@ -316,3 +316,53 @@ NGRAPH_TEST(${BACKEND_NAME}, all_change_axis)
    handle->call_with_validate({result}, {a});
    EXPECT_EQ((vector<char>{1, 0, 1}), read_vector<char>(result));
 }
+NGRAPH_TEST(${BACKEND_NAME}, all_dynamic)
+{
+    // Create a graph for f(x,axes:int32) = All(x,Convert<int64>(axes)).
+    auto x = make_shared<op::Parameter>(element::boolean, PartialShape::dynamic());
+    auto axes = make_shared<op::Parameter>(element::i32, PartialShape{Dimension::dynamic()});
+    auto axes_i64 = make_shared<op::Convert>(axes, element::i64);
+    auto all = make_shared<op::All>(x, axes_i64);
+    ASSERT_TRUE(all->get_output_partial_shape(0).rank().is_dynamic());
+    auto f = make_shared<Function>(NodeVector{all}, ParameterVector{x, axes});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}", true);
+    auto ex = backend->compile(f);
+    auto t_r = backend->create_dynamic_tensor(element::boolean, PartialShape::dynamic());
+    std::vector<Shape> x_shapes{
+        Shape{2, 3}, Shape{2, 3}, Shape{2, 3}, Shape{2, 3}, Shape{5}, Shape{5}};
+    std::vector<std::vector<int32_t>> axeses{{}, {0}, {1}, {0, 1}, {}, {0}};
+    std::vector<std::vector<char>> inputs{{1, 0, 1, 0, 1, 0},
+                                          {1, 0, 1, 0, 0, 1},
+                                          {1, 0, 1, 1, 1, 1},
+                                          {1, 0, 1, 0, 1, 0},
+                                          {1, 0, 1, 0, 1},
+                                          {1, 0, 1, 0, 1}};
+    std::vector<Shape> expected_result_shapes{
+        Shape{2, 3}, Shape{3}, Shape{2}, Shape{}, Shape{5}, Shape{}};
+    std::vector<std::vector<char>> expected_results{
+        {1, 0, 1, 0, 1, 0}, {0, 0, 1}, {0, 1}, {0}, {1, 0, 1, 0, 1}, {0}};
+    for (size_t i = 0; i < x_shapes.size(); i++)
+    {
+        auto t_x = backend->create_tensor(element::boolean, x_shapes[i]);
+        auto t_axes = backend->create_tensor(element::i32, Shape{axeses[i].size()});
+        copy_data(t_x, inputs[i]);
+        copy_data(t_axes, axeses[i]);
+        ex->call_with_validate({t_r}, {t_x, t_axes});
+        ASSERT_EQ(t_r->get_shape(), expected_result_shapes[i]);
+        auto results = read_vector<char>(t_r);
+        ASSERT_EQ(results, expected_results[i]);
+    }
+}
--- a/test/autodiff.in.cpp
+++ b/test/autodiff.in.cpp
--- a/test/backend/batch_mat_mul.in.cpp
+++ b/test/backend/batch_mat_mul.in.cpp
@@ -89,7 +89,8 @@ NGRAPH_TEST(${BACKEND_NAME}, batch_mat_mul_forward)
    auto backend_results = execute(backend_f, batchmatmul_args, "${BACKEND_NAME}");
    for (size_t i = 0; i < ref_results.size(); i++)
    {
-        EXPECT_TRUE(test::all_close(ref_results.at(i), backend_results.at(i), 1.0e-4f, 1.0e-4f));
+        EXPECT_TRUE(test::all_close_f(
+            ref_results.at(i), backend_results.at(i), DEFAULT_FLOAT_TOLERANCE_BITS + 3));
    }
 }
 #endif
--- a/test/backend/computation_reuse.in.cpp
+++ b/test/backend/computation_reuse.in.cpp
@@ -55,16 +55,19 @@ NGRAPH_TEST(${BACKEND_NAME}, computation_reuse)
    vector<float> weights(512, 0.5f);
    vector<float> rv(128);
-    auto a = backend->create_tensor(element::f32, shape_a, input.data());
+    auto a = backend->create_tensor(element::f32, shape_a);
-    auto b = backend->create_tensor(element::f32, shape_b, weights.data());
+    auto b = backend->create_tensor(element::f32, shape_b);
-    auto result = backend->create_tensor(element::f32, shape_r, rv.data());
+    auto result = backend->create_tensor(element::f32, shape_r);
-    auto handle = backend->compile(f);
+    copy_data(a, input);
-    handle->call_with_validate({result}, {a, b});
+    copy_data(b, weights);
-    vector<float> rv_saved(rv);
+    auto exec = backend->compile(f);
+    exec->call_with_validate({result}, {a, b});
+    vector<float> rv_saved(read_vector<float>(result));
    b->set_stale(false);
-    handle->call_with_validate({result}, {a, b});
+    exec->call_with_validate({result}, {a, b});
-    EXPECT_TRUE(test::all_close_f(rv_saved, rv));
+    EXPECT_TRUE(test::all_close_f(rv_saved, read_vector<float>(result)));
 }
--- a/test/convolution_test.in.cpp
+++ b/test/convolution_test.in.cpp
--- a/test/distributed.in.cpp
+++ b/test/distributed.in.cpp
--- a/test/backend/dyn_broadcast.in.cpp
+++ b/test/backend/dyn_broadcast.in.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "util/all_close_f.hpp"
+#include "util/test_control.hpp"
+#include "util/test_tools.hpp"
+using namespace std;
+using namespace ngraph;
+static string s_manifest = "${MANIFEST}";
+NGRAPH_TEST(${BACKEND_NAME}, dyn_broadcast)
+{
+    // Create a graph for
+    //   f(x,shape:i32,axes:32) = Broadcast(x,Convert<i64>(shape),Convert<i64>(axes)).
+    auto x = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+    auto shape = make_shared<op::Parameter>(element::i32, PartialShape{Dimension::dynamic()});
+    auto axes = make_shared<op::Parameter>(element::i32, PartialShape{Dimension::dynamic()});
+    auto shape_i64 = make_shared<op::Convert>(shape, element::i64);
+    auto axes_i64 = make_shared<op::Convert>(axes, element::i64);
+    auto bc = make_shared<op::DynBroadcast>(x, shape_i64, axes_i64);
+    auto f = make_shared<Function>(NodeVector{bc}, ParameterVector{x, shape, axes});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}", true);
+    auto ex = backend->compile(f);
+    auto t_r = backend->create_dynamic_tensor(element::f32, PartialShape::dynamic());
+    std::vector<Shape> x_shapes{Shape{}, Shape{}, Shape{2}, Shape{2}};
+    std::vector<std::vector<int32_t>> shapes{{2, 2}, {2, 2, 2}, {3, 2}, {2, 3}};
+    std::vector<std::vector<int32_t>> axeses{{0, 1}, {0, 1, 2}, {0}, {1}};
+    std::vector<std::vector<float>> inputs{{6}, {7}, {10, 11}, {10, 11}};
+    std::vector<Shape> expected_result_shapes{
+        Shape{2, 2}, Shape{2, 2, 2}, Shape{3, 2}, Shape{2, 3}};
+    std::vector<std::vector<float>> expected_results{
+        {6, 6, 6, 6}, {7, 7, 7, 7, 7, 7, 7, 7}, {10, 11, 10, 11, 10, 11}, {10, 10, 10, 11, 11, 11}};
+    for (size_t i = 0; i < x_shapes.size(); i++)
+    {
+        auto t_x = backend->create_tensor(element::f32, x_shapes[i]);
+        auto t_shape = backend->create_tensor(element::i32, Shape{shapes[i].size()});
+        auto t_axes = backend->create_tensor(element::i32, Shape{axeses[i].size()});
+        copy_data(t_x, inputs[i]);
+        copy_data(t_shape, shapes[i]);
+        copy_data(t_axes, axeses[i]);
+        ex->call_with_validate({t_r}, {t_x, t_shape, t_axes});
+        ASSERT_EQ(t_r->get_shape(), expected_result_shapes[i]);
+        auto results = read_vector<float>(t_r);
+        ASSERT_TRUE(test::all_close_f(results, expected_results[i], MIN_FLOAT_TOLERANCE_BITS));
+    }
+}
--- a/test/dyn_replace_slice_test.in.cpp
+++ b/test/dyn_replace_slice_test.in.cpp
--- a/test/backend/dyn_reshape.in.cpp
+++ b/test/backend/dyn_reshape.in.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "util/all_close_f.hpp"
+#include "util/test_control.hpp"
+#include "util/test_tools.hpp"
+using namespace std;
+using namespace ngraph;
+static string s_manifest = "${MANIFEST}";
+NGRAPH_TEST(${BACKEND_NAME}, dyn_reshape)
+{
+    auto backend = runtime::Backend::create("${BACKEND_NAME}", true);
+    auto build_graph = [&backend](bool zero_flag) {
+        // Create a graph for f(x,shape) = DynReshape(x,shape,zero_flag=zero_flag).
+        auto x = make_shared<op::Parameter>(element::i32, PartialShape::dynamic());
+        auto shape = make_shared<op::Parameter>(element::i64, PartialShape::dynamic(1));
+        auto dyn_reshape = make_shared<op::DynReshape>(x, shape, zero_flag);
+        EXPECT_TRUE(dyn_reshape->get_output_partial_shape(0).same_scheme(PartialShape::dynamic()));
+        auto f = make_shared<Function>(NodeVector{dyn_reshape}, ParameterVector{x, shape});
+        auto ex = backend->compile(f);
+        return ex;
+    };
+    auto t_r = backend->create_dynamic_tensor(element::i32, PartialShape::dynamic());
+    auto ex_flag_off = build_graph(false);
+    auto ex_flag_on = build_graph(true);
+    std::vector<std::tuple<bool, Shape, std::vector<int32_t>, std::vector<int64_t>, Shape>> tests;
+    tests.emplace_back(make_tuple(
+        false, Shape{2, 3}, vector<int32_t>{1, 2, 3, 4, 5, 6}, vector<int64_t>{6}, Shape{6}));
+    tests.emplace_back(make_tuple(
+        true, Shape{2, 3}, vector<int32_t>{1, 2, 3, 4, 5, 6}, vector<int64_t>{6}, Shape{6}));
+    tests.emplace_back(make_tuple(
+        false, Shape{2, 3}, vector<int32_t>{1, 2, 3, 4, 5, 6}, vector<int64_t>{-1}, Shape{6}));
+    tests.emplace_back(make_tuple(false,
+                                  Shape{2, 3},
+                                  vector<int32_t>{1, 2, 3, 4, 5, 6},
+                                  vector<int64_t>{2, -1},
+                                  Shape{2, 3}));
+    tests.emplace_back(make_tuple(false,
+                                  Shape{2, 3},
+                                  vector<int32_t>{1, 2, 3, 4, 5, 6},
+                                  vector<int64_t>{3, -1},
+                                  Shape{3, 2}));
+    tests.emplace_back(make_tuple(false,
+                                  Shape{2, 3},
+                                  vector<int32_t>{1, 2, 3, 4, 5, 6},
+                                  vector<int64_t>{3, 2, -1},
+                                  Shape{3, 2, 1}));
+    tests.emplace_back(make_tuple(true,
+                                  Shape{2, 3},
+                                  vector<int32_t>{1, 2, 3, 4, 5, 6},
+                                  vector<int64_t>{3, 2, -1},
+                                  Shape{3, 2, 1}));
+    tests.emplace_back(make_tuple(true,
+                                  Shape{2, 3},
+                                  vector<int32_t>{1, 2, 3, 4, 5, 6},
+                                  vector<int64_t>{0, 0, -1},
+                                  Shape{2, 3, 1}));
+    tests.emplace_back(make_tuple(true,
+                                  Shape{2, 3},
+                                  vector<int32_t>{1, 2, 3, 4, 5, 6},
+                                  vector<int64_t>{2, 0, -1},
+                                  Shape{2, 3, 1}));
+    tests.emplace_back(make_tuple(
+        true, Shape{0, 3, 4}, vector<int32_t>{}, vector<int64_t>{3, -1, 2}, Shape{3, 0, 2}));
+    for (auto& test : tests)
+    {
+        bool zero_flag = get<0>(test);
+        const Shape& in_shape = get<1>(test);
+        const std::vector<int32_t>& data = get<2>(test);
+        const std::vector<int64_t>& dims = get<3>(test);
+        const Shape& out_shape = get<4>(test);
+        auto t_x = backend->create_tensor(element::i32, in_shape);
+        auto t_shape = backend->create_tensor(element::i64, Shape{dims.size()});
+        copy_data(t_x, data);
+        copy_data(t_shape, dims);
+        auto ex = zero_flag ? ex_flag_on : ex_flag_off;
+        ex->call_with_validate({t_r}, {t_x, t_shape});
+        ASSERT_EQ(t_r->get_element_type(), element::i32);
+        ASSERT_EQ(t_r->get_shape(), out_shape);
+        auto results = read_vector<int32_t>(t_r);
+        ASSERT_EQ(results, data);
+    }
+}
--- a/test/dyn_slice_test.in.cpp
+++ b/test/dyn_slice_test.in.cpp
--- a/test/dynamic.in.cpp
+++ b/test/dynamic.in.cpp
--- a/test/backend_graph_comparison.in.cpp
+++ b/test/backend_graph_comparison.in.cpp
--- a/test/backend/range.in.cpp
+++ b/test/backend/range.in.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "util/all_close_f.hpp"
+#include "util/test_control.hpp"
+#include "util/test_tools.hpp"
+using namespace std;
+using namespace ngraph;
+static string s_manifest = "${MANIFEST}";
+template <typename T>
+struct RangeTest
+{
+    T start;
+    T stop;
+    T step;
+    Shape expected_result_shape;
+    std::vector<T> expected_result;
+};
+// TODO(amprocte): We should test this with more than just int32, but there is a bug in the
+// handling of element type-changing that is currently blocking doing that easily.
+NGRAPH_TEST(${BACKEND_NAME}, range)
+{
+    // Create a graph for f(start,stop,step) = Range(start,stop,step).
+    auto start = make_shared<op::Parameter>(element::i32, Shape{});
+    auto stop = make_shared<op::Parameter>(element::i32, Shape{});
+    auto step = make_shared<op::Parameter>(element::i32, Shape{});
+    auto range = make_shared<op::Range>(start, stop, step);
+    ASSERT_TRUE(range->get_output_partial_shape(0).same_scheme(PartialShape::dynamic(1)));
+    auto f = make_shared<Function>(NodeVector{range}, ParameterVector{start, stop, step});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}", true);
+    auto ex = backend->compile(f);
+    auto t_r = backend->create_dynamic_tensor(element::i32, PartialShape::dynamic());
+    std::vector<RangeTest<int32_t>> int32_tests = {
+        RangeTest<int32_t>{0, 10, 1, Shape{10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}},
+        RangeTest<int32_t>{-5, 6, 3, Shape{4}, {-5, -2, 1, 4}},
+        RangeTest<int32_t>{10, 0, 1, Shape{0}, {}},
+        RangeTest<int32_t>{10, 5, -3, Shape{2}, {10, 7}}};
+    for (auto& test : int32_tests)
+    {
+        auto t_start = backend->create_tensor(element::i32, Shape{});
+        auto t_stop = backend->create_tensor(element::i32, Shape{});
+        auto t_step = backend->create_tensor(element::i32, Shape{});
+        copy_data(t_start, std::vector<int32_t>{test.start});
+        copy_data(t_stop, std::vector<int32_t>{test.stop});
+        copy_data(t_step, std::vector<int32_t>{test.step});
+        ex->call_with_validate({t_r}, {t_start, t_stop, t_step});
+        ASSERT_EQ(t_r->get_element_type(), element::i32);
+        ASSERT_EQ(t_r->get_shape(), test.expected_result_shape);
+        auto results = read_vector<int32_t>(t_r);
+        ASSERT_EQ(results, test.expected_result);
+    }
+}
--- a/test/backend/sum.in.cpp
+++ b/test/backend/sum.in.cpp
@@ -690,3 +690,53 @@ NGRAPH_TEST(${BACKEND_NAME}, sum_stable_simple_double)
 #endif
 #endif
+NGRAPH_TEST(${BACKEND_NAME}, sum_dynamic)
+{
+    // Create a graph for f(x,axes:int32) = Sum(x,Convert<int64>(axes)).
+    auto x = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+    auto axes = make_shared<op::Parameter>(element::i32, PartialShape{Dimension::dynamic()});
+    auto axes_i64 = make_shared<op::Convert>(axes, element::i64);
+    auto sum = make_shared<op::Sum>(x, axes_i64);
+    ASSERT_TRUE(sum->get_output_partial_shape(0).rank().is_dynamic());
+    auto f = make_shared<Function>(NodeVector{sum}, ParameterVector{x, axes});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}", true);
+    auto ex = backend->compile(f);
+    auto t_r = backend->create_dynamic_tensor(element::f32, PartialShape::dynamic());
+    std::vector<Shape> x_shapes{
+        Shape{2, 3}, Shape{2, 3}, Shape{2, 3}, Shape{2, 3}, Shape{5}, Shape{5}};
+    std::vector<std::vector<int32_t>> axeses{{}, {0}, {1}, {0, 1}, {}, {0}};
+    std::vector<std::vector<float>> inputs{{1, 2, 3, 4, 5, 6},
+                                           {1, 2, 3, 4, 5, 6},
+                                           {1, 2, 3, 4, 5, 6},
+                                           {1, 2, 3, 4, 5, 6},
+                                           {1, 2, 3, 4, 5},
+                                           {1, 2, 3, 4, 5}};
+    std::vector<Shape> expected_result_shapes{
+        Shape{2, 3}, Shape{3}, Shape{2}, Shape{}, Shape{5}, Shape{}};
+    std::vector<std::vector<float>> expected_results{
+        {1, 2, 3, 4, 5, 6}, {5, 7, 9}, {6, 15}, {21}, {1, 2, 3, 4, 5}, {15}};
+    for (size_t i = 0; i < x_shapes.size(); i++)
+    {
+        auto t_x = backend->create_tensor(element::f32, x_shapes[i]);
+        auto t_axes = backend->create_tensor(element::i32, Shape{axeses[i].size()});
+        copy_data(t_x, inputs[i]);
+        copy_data(t_axes, axeses[i]);
+        ex->call_with_validate({t_r}, {t_x, t_axes});
+        ASSERT_EQ(t_r->get_shape(), expected_result_shapes[i]);
+        auto results = read_vector<float>(t_r);
+        ASSERT_TRUE(test::all_close_f(results, expected_results[i], MIN_FLOAT_TOLERANCE_BITS));
+    }
+}
--- a/test/backend/transpose.in.cpp
+++ b/test/backend/transpose.in.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+#include "gtest/gtest.h"
+#include "ngraph/ngraph.hpp"
+#include "util/all_close_f.hpp"
+#include "util/test_control.hpp"
+#include "util/test_tools.hpp"
+using namespace std;
+using namespace ngraph;
+static string s_manifest = "${MANIFEST}";
+NGRAPH_TEST(${BACKEND_NAME}, transpose)
+{
+    //
+    // Create a graph for f(x,perm) = Transpose(x,Convert<i64>(perm)). We'll do the permutation in
+    // i32 and cast it to i64, just for fun (and to mirror the TensorFlow test I am porting here).
+    //
+    auto x = make_shared<op::Parameter>(element::f32, PartialShape::dynamic());
+    auto perm = make_shared<op::Parameter>(element::i32, PartialShape{Dimension::dynamic()});
+    auto perm_i64 = make_shared<op::Convert>(perm, element::i64);
+    auto x_transpose = make_shared<op::Transpose>(x, perm_i64);
+    auto f = make_shared<Function>(NodeVector{x_transpose}, ParameterVector{x, perm});
+    auto backend = runtime::Backend::create("${BACKEND_NAME}", true);
+    auto ex = backend->compile(f);
+    auto t_r = backend->create_dynamic_tensor(element::f32, PartialShape::dynamic());
+    std::vector<Shape> x_shapes{Shape{2, 3}, Shape{2, 3}, Shape{2, 2, 3}};
+    std::vector<std::vector<int32_t>> perms{{0, 1}, {1, 0}, {2, 1, 0}};
+    std::vector<std::vector<float>> inputs{
+        {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12}};
+    std::vector<Shape> expected_result_shapes{Shape{2, 3}, Shape{3, 2}, {3, 2, 2}};
+    // Generated with numpy, so don't worry. :)
+    std::vector<std::vector<float>> expected_results{
+        {1, 2, 3, 4, 5, 6}, {1, 4, 2, 5, 3, 6}, {1, 7, 4, 10, 2, 8, 5, 11, 3, 9, 6, 12}};
+    for (size_t i = 0; i < x_shapes.size(); i++)
+    {
+        auto t_x = backend->create_tensor(element::f32, x_shapes[i]);
+        auto t_perm = backend->create_tensor(element::i32, Shape{perms[i].size()});
+        copy_data(t_x, inputs[i]);
+        copy_data(t_perm, perms[i]);
+        ex->call_with_validate({t_r}, {t_x, t_perm});
+        ASSERT_EQ(t_r->get_shape(), expected_result_shapes[i]);
+        auto results = read_vector<float>(t_r);
+        ASSERT_TRUE(test::all_close_f(results, expected_results[i], MIN_FLOAT_TOLERANCE_BITS));
+    }
+}
--- a/test/constant_folding.cpp
+++ b/test/constant_folding.cpp
@@ -408,6 +408,57 @@ TEST(constant_folding, const_product)
    ASSERT_EQ(values_expected, values_out);
 }
+TEST(constant_folding, const_sum)
+{
+    Shape input_shape{3, 3};
+    vector<int32_t> values_in{1, 2, 3, 4, 5, 6, 7, 8, 9};
+    auto constant = op::Constant::create(element::i32, input_shape, values_in);
+    auto convert = make_shared<op::Sum>(constant, AxisSet{1});
+    auto f = make_shared<Function>(convert, ParameterVector{});
+    pass::Manager pass_manager;
+    pass_manager.register_pass<pass::ConstantFolding>();
+    pass_manager.run_passes(f);
+    ASSERT_EQ(count_ops_of_type<op::Sum>(f), 0);
+    ASSERT_EQ(count_ops_of_type<op::Constant>(f), 1);
+    auto new_const =
+        std::dynamic_pointer_cast<op::Constant>(f->get_results().at(0)->get_argument(0));
+    ASSERT_TRUE(new_const);
+    auto values_out = new_const->get_vector<int32_t>();
+    vector<int32_t> values_expected{6, 15, 24};
+    ASSERT_EQ(values_expected, values_out);
+}
+TEST(constant_folding, const_concat)
+{
+    auto constant0 =
+        op::Constant::create(element::i32, Shape{2, 3}, vector<int32_t>{1, 2, 3, 4, 5, 6});
+    auto constant1 = op::Constant::create(element::i32, Shape{2, 1}, vector<int32_t>{7, 8});
+    auto concat = make_shared<op::Concat>(NodeVector{constant0, constant1}, 1);
+    auto f = make_shared<Function>(concat, ParameterVector{});
+    pass::Manager pass_manager;
+    pass_manager.register_pass<pass::ConstantFolding>();
+    pass_manager.run_passes(f);
+    ASSERT_EQ(count_ops_of_type<op::Concat>(f), 0);
+    ASSERT_EQ(count_ops_of_type<op::Constant>(f), 1);
+    auto new_const =
+        std::dynamic_pointer_cast<op::Constant>(f->get_results().at(0)->get_argument(0));
+    ASSERT_TRUE(new_const);
+    auto values_out = new_const->get_vector<int32_t>();
+    vector<int32_t> values_expected{1, 2, 3, 7, 4, 5, 6, 8};
+    ASSERT_EQ(values_expected, values_out);
+}
 TEST(constant_folding, pass_property)
 {
    auto pass = std::make_shared<ngraph::pass::ConstantFolding>();

--- a/test/update_convolution_reference.sh
+++ b/test/update_convolution_reference.sh
@@ -15,4 +15,4 @@
 # limitations under the License.
 # ******************************************************************************
 declare THIS_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-python ${THIS_SCRIPT_DIR}/ref_generators/generate_convolution_ref.py ${THIS_SCRIPT_DIR}/convolution_test.in.cpp
+python ${THIS_SCRIPT_DIR}/ref_generators/generate_convolution_ref.py ${THIS_SCRIPT_DIR}/backend/convolution_reference.in.cpp
--- a/test/update_dyn_replace_slice_reference.sh
+++ b/test/update_dyn_replace_slice_reference.sh
@@ -15,4 +15,4 @@
 # limitations under the License.
 # ******************************************************************************
 declare THIS_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-python ${THIS_SCRIPT_DIR}/ref_generators/generate_dyn_replace_slice_ref.py ${THIS_SCRIPT_DIR}/dyn_replace_slice_test.in.cpp
+python ${THIS_SCRIPT_DIR}/ref_generators/generate_dyn_replace_slice_ref.py ${THIS_SCRIPT_DIR}/backend/dyn_replace_slice_reference.in.cpp
--- a/test/update_dyn_slice_reference.sh
+++ b/test/update_dyn_slice_reference.sh
@@ -15,4 +15,4 @@
 # limitations under the License.
 # ******************************************************************************
 declare THIS_SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
-python ${THIS_SCRIPT_DIR}/ref_generators/generate_dyn_slice_ref.py ${THIS_SCRIPT_DIR}/dyn_slice_test.in.cpp
+python ${THIS_SCRIPT_DIR}/ref_generators/generate_dyn_slice_ref.py ${THIS_SCRIPT_DIR}/backend/dyn_slice_reference.in.cpp