Merge branch 'master' into etusien/depth_to_space

ba1141e0 · Ewa Tusień · GitHub · cad5a6b7 · 31fae943 · ba1141e0
Unverified Commit ba1141e0 authored Aug 06, 2019 by Ewa Tusień Committed by GitHub Aug 06, 2019
72 changed files
--- a/doc/sphinx/ngraph_theme/ngversions.html
+++ b/doc/sphinx/ngraph_theme/ngversions.html
@@ -9,11 +9,11 @@
        <dt>{{ _('Recent Versions') }}</dt>
        <dd><!-- Until our https://docs.ngraph.ai/ publishing is set up, we link to GitHub -->  
          <ul>
+           <li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.24.0">0.24.0</a></li>
           <li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.23.0">0.23.0</a></li>
           <li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.22.0">0.22.0</a></li>
           <li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.21.0">0.21.0</a></li>
           <li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.20.0">0.20.0</a></li>
-           <li><a href="https://github.com/NervanaSystems/ngraph/releases/tag/v0.19.0">0.19.0</a></li>
         </ul></dd>
      </dl>
      <dl>

--- a/doc/sphinx/source/conf.py
+++ b/doc/sphinx/source/conf.py
@@ -73,11 +73,11 @@ author = 'Intel Corporation'
 # built documents.
 #
 # The short X.Y version.
-version = '0.24'
+version = '0.25'

 # The Documentation full version, including alpha/beta/rc tags. Some features
 # available in the latest code will not necessarily be documented first
-release = '0.24.0'
+release = '0.25.0'

 # The language for content autogenerated by Sphinx. Refer to documentation
 # for a list of supported languages.

--- a/doc/sphinx/source/project/release-notes.rst
+++ b/doc/sphinx/source/project/release-notes.rst
@@ -16,14 +16,11 @@ We are pleased to announce the release of version |version|-doc.

 Core updates for |version|
 ~~~~~~~~~~~~~~~~~~~~~~~~~~~
-
-+ More ONNX ops
-+ Elementwise divide defaults to Python semantics
-+ GenerateMask seed optional
-+ Graph visualization improvements
-+ Preserve control dependencies in more places
-+ GetOutputElement has single input
-
+ Better PlaidML support
+ Double-buffering support
+ Constant folding
+ Support for static linking
+ Additional ops

 .. Latest doc updates
 .. ~~~~~~~~~~~~~~~~~~
@@ -37,6 +34,19 @@ Core updates for |version|
 Changelog on Previous Releases
 ==============================

+0.24
+----
+
+ Fixes reshape sink/swim issue
+ More ONNX ops
+ Elementwise divide defaults to Python semantics
+ GenerateMask seed optional
+ Graph visualization improvements
+ Preserve control dependencies in more places
+ GetOutputElement has single input
+.. + Add instructions how to build ``NGRAPH_PLAIDML`` backend.
+
+
 0.23
 ----


--- a/src/ngraph/op/pad.cpp
+++ b/src/ngraph/op/pad.cpp
@@ -23,12 +23,12 @@ using namespace ngraph;

 const string op::Pad::type_name{"Pad"};

-op::Pad::Pad(const shared_ptr<Node>& arg,
-             const shared_ptr<Node>& arg_pad_value,
+op::Pad::Pad(const Output<Node>& arg,
+             const Output<Node>& arg_pad_value,
             const CoordinateDiff& padding_below,
             const CoordinateDiff& padding_above,
             PadMode pad_mode)
-    : Op(check_single_output_args({arg, arg_pad_value}))
+    : Op({arg, arg_pad_value})
    , m_padding_below(padding_below)
    , m_padding_above(padding_above)
    , m_padding_interior_fake(padding_below.size())

--- a/src/ngraph/op/pad.hpp
+++ b/src/ngraph/op/pad.hpp
@@ -32,14 +32,16 @@ namespace ngraph
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
            /// \brief Constructs a generic padding operation.
+            Pad() = default;
+            /// \brief Constructs a generic padding operation.
            ///
            /// \param arg The node producing input tensor to be padded.
            /// \param arg_pad_value The node producing the scalar value to be inserted for padding.
            /// \param padding_below The padding-below widths.
            /// \param padding_above The padding-above widths.
            /// \param pad_mode The padding mode: CONSTANT(default), EDGE, REFLECT or SYMMETRIC.
-            Pad(const std::shared_ptr<Node>& arg,
-                const std::shared_ptr<Node>& arg_pad_value,
+            Pad(const Output<Node>& arg,
+                const Output<Node>& arg_pad_value,
                const CoordinateDiff& padding_below,
                const CoordinateDiff& padding_above,
                PadMode pad_mode = PadMode::CONSTANT);
@@ -49,14 +51,24 @@ namespace ngraph
            void validate_and_infer_types() override;
            /// \return The padding-below sizes.
            const CoordinateDiff& get_padding_below() const { return m_padding_below; }
+            void set_padding_below(const CoordinateDiff& padding_below)
+            {
+                m_padding_below = padding_below;
+            }
            /// \return The padding-above sizes.
            const CoordinateDiff& get_padding_above() const { return m_padding_above; }
+            void set_padding_above(const CoordinateDiff& padding_above)
+            {
+                m_padding_below = padding_above;
+            }
+
            /// \brief DEPRECATED. This is just a stub for backends that used to implement the
            ///        interior padding feature, which is no longer supported.
            /// \return Returns a shape full of zeros, with the same rank as get_padding_below().
            const Shape& get_padding_interior() const { return m_padding_interior_fake; }
            /// \return The padding mode.
            PadMode get_pad_mode() const { return m_pad_mode; }
+            void set_pad_mode(PadMode pad_mode) { m_pad_mode = pad_mode; }
            /// \return The default value for Pad.
            virtual std::shared_ptr<Node> get_default_value() const override;


--- a/src/ngraph/op/parameter.hpp
+++ b/src/ngraph/op/parameter.hpp
@@ -38,6 +38,7 @@ namespace ngraph
            NGRAPH_API
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
+            Parameter() = default;
            /// \brief Constructions a tensor-typed parameter node.
            ///
            /// \param element_type The element type of the parameter.

--- a/src/ngraph/op/passthrough.cpp
+++ b/src/ngraph/op/passthrough.cpp
@@ -38,6 +38,21 @@ ngraph::op::Passthrough::Passthrough(const std::string& logical_type,
    constructor_validate_and_infer_types();
 }

+ngraph::op::Passthrough::Passthrough(const std::string& logical_type,
+                                     const std::string& language,
+                                     const std::string& function,
+                                     const OutputVector& args,
+                                     std::vector<std::tuple<element::Type, PartialShape>> outputs)
+    : Op{args}
+    , m_logical_type{logical_type}
+    , m_language{language}
+    , m_function{function}
+    , m_output_shapes{std::move(outputs)}
+{
+    set_output_size(m_output_shapes.size());
+    constructor_validate_and_infer_types();
+}
+
 void ngraph::op::Passthrough::validate_and_infer_types()
 {
    // N.B. It would be useful to have the backend deduce the output

--- a/src/ngraph/op/passthrough.hpp
+++ b/src/ngraph/op/passthrough.hpp
@@ -41,12 +41,19 @@ public:
    NGRAPH_API
    static const std::string type_name;
    const std::string& description() const override { return type_name; }
+    Passthrough() = default;
    Passthrough(const std::string& logical_type, // aka "What this operation is doing"
                const std::string& language,     // The language the implementation is written in
                const std::string& function,     // The operation implementation
                const NodeVector& args,
                std::vector<std::tuple<element::Type, PartialShape>> outputs);

+    Passthrough(const std::string& logical_type, // aka "What this operation is doing"
+                const std::string& language,     // The language the implementation is written in
+                const std::string& function,     // The operation implementation
+                const OutputVector& args,
+                std::vector<std::tuple<element::Type, PartialShape>> outputs);
+
    void validate_and_infer_types() final;

    std::shared_ptr<Node> copy_with_new_args(const NodeVector& new_args) const final;

--- a/src/ngraph/op/power.cpp
+++ b/src/ngraph/op/power.cpp
@@ -24,9 +24,7 @@ using namespace ngraph;

 const string op::Power::type_name{"Power"};

-op::Power::Power(const shared_ptr<Node>& arg0,
-                 const shared_ptr<Node>& arg1,
-                 const AutoBroadcastSpec& autob)
+op::Power::Power(const Output<Node>& arg0, const Output<Node>& arg1, const AutoBroadcastSpec& autob)
    : BinaryElementwiseArithmetic(arg0, arg1, autob)
 {
    constructor_validate_and_infer_types();

--- a/src/ngraph/op/power.hpp
+++ b/src/ngraph/op/power.hpp
@@ -42,13 +42,14 @@ namespace ngraph
            NGRAPH_API
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
+            Power() = default;
            /// \brief Constructs an exponentiation operation.
            ///
            /// \param arg0 Node that produces the first input tensor.
            /// \param arg1 Node that produces the second input tensor.
            /// \param autob Auto broadcast specification
-            Power(const std::shared_ptr<Node>& arg0,
-                  const std::shared_ptr<Node>& arg1,
+            Power(const Output<Node>& arg0,
+                  const Output<Node>& arg1,
                  const AutoBroadcastSpec& autob = AutoBroadcastSpec());

            virtual std::shared_ptr<Node>

--- a/src/ngraph/op/product.cpp
+++ b/src/ngraph/op/product.cpp
@@ -21,10 +21,6 @@ using namespace ngraph;

 const string op::Product::type_name{"Product"};

-op::Product::Product()
-{
-}
-
 op::Product::Product(const Output<Node>& arg, const AxisSet& reduction_axes)
    : ArithmeticReduction(arg, reduction_axes)
 {

--- a/src/ngraph/op/product.hpp
+++ b/src/ngraph/op/product.hpp
@@ -33,7 +33,7 @@ namespace ngraph
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
            /// \brief Constructs a product reduction operation.
-            Product();
+            Product() = default;
            /// \brief Constructs a product reduction operation.
            ///
            /// \param arg The tensor to be reduced.

--- a/src/ngraph/op/quantize.hpp
+++ b/src/ngraph/op/quantize.hpp
@@ -92,6 +92,8 @@ namespace ngraph
                     const ngraph::AxisSet& axes,
                     RoundMode round_mode);

+            Quantize() = default;
+
            void validate_and_infer_types() override;

            virtual std::shared_ptr<Node>

--- a/src/ngraph/op/quantized_convolution.hpp
+++ b/src/ngraph/op/quantized_convolution.hpp
@@ -66,6 +66,8 @@ namespace ngraph
                                 const ngraph::AxisSet& filter_axes = ngraph::AxisSet{},
                                 const ngraph::AxisSet& output_axes = ngraph::AxisSet{});

+            QuantizedConvolution() = default;
+
            const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
            const Strides& get_window_dilation_strides() const { return m_window_dilation_strides; }
            const CoordinateDiff& get_padding_below() const { return m_padding_below; }

--- a/src/ngraph/op/relu.cpp
+++ b/src/ngraph/op/relu.cpp
@@ -23,7 +23,7 @@ using namespace ngraph;
 const string op::Relu::type_name{"Relu"};
 const string op::ReluBackprop::type_name{"ReluBackprop"};

-op::Relu::Relu(shared_ptr<Node> arg)
+op::Relu::Relu(const Output<Node>& arg)
    : UnaryElementwiseArithmetic(arg)
 {
    constructor_validate_and_infer_types();

--- a/src/ngraph/op/relu.hpp
+++ b/src/ngraph/op/relu.hpp
@@ -36,10 +36,11 @@ namespace ngraph
            NGRAPH_API
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
+            Relu() = default;
            /// \brief Constructs a Relu operation.
            ///
            /// \param arg Node that produces the input tensor.
-            Relu(std::shared_ptr<ngraph::Node> arg);
+            Relu(const Output<ngraph::Node>& arg);

            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

--- a/src/ngraph/op/replace_slice.hpp
+++ b/src/ngraph/op/replace_slice.hpp
@@ -53,6 +53,7 @@ namespace ngraph
            NGRAPH_API
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
+            ReplaceSlice() = default;
            /// \brief Constructs a tensor slice replacement operation.
            ///
            /// \param arg0 The tensor to overwrite into.
@@ -85,10 +86,16 @@ namespace ngraph

            /// \return The inclusive lower-bound coordinates.
            const Coordinate& get_lower_bounds() const { return m_lower_bounds; }
+            void set_lower_bounds(const Coordinate& lower_bounds) { m_lower_bounds = lower_bounds; }
            /// \return The exclusive upper-bound coordinates.
            const Coordinate& get_upper_bounds() const { return m_upper_bounds; }
+            void set_uppper_bounds(const Coordinate& upper_bounds)
+            {
+                m_upper_bounds = upper_bounds;
+            }
            /// \return The slicing strides.
            const Strides& get_strides() const { return m_strides; }
+            void set_strides(const Strides& strides) { m_strides = strides; }
        protected:
            virtual void generate_adjoints(autodiff::Adjoints& adjoints,
                                           const NodeVector& deltas) override;

--- a/src/ngraph/op/reverse.cpp
+++ b/src/ngraph/op/reverse.cpp
@@ -25,8 +25,8 @@ using namespace ngraph;

 const string op::Reverse::type_name{"Reverse"};

-op::Reverse::Reverse(const shared_ptr<Node>& arg, const AxisSet& reversed_axes)
-    : Op(check_single_output_args({arg}))
+op::Reverse::Reverse(const Output<Node>& arg, const AxisSet& reversed_axes)
+    : Op({arg})
    , m_reversed_axes(reversed_axes)
 {
    constructor_validate_and_infer_types();
@@ -65,7 +65,7 @@ void op::Reverse::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVect
 {
    auto delta = deltas.at(0);

-    auto x = get_argument(0);
+    auto x = input(0).get_source_output();

    adjoints.add_delta(x, make_shared<op::Reverse>(delta, m_reversed_axes));
 }
--- a/src/ngraph/op/reverse.hpp
+++ b/src/ngraph/op/reverse.hpp
@@ -49,11 +49,12 @@ namespace ngraph
            NGRAPH_API
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
+            Reverse() = default;
            /// \brief Constructs a reverse operation.
            ///
            /// \param arg The input tensor, some of whose axes are to be reversed.
            /// \param reversed_axes The axes to reverse.
-            Reverse(const std::shared_ptr<Node>& arg, const AxisSet& reversed_axes);
+            Reverse(const Output<Node>& arg, const AxisSet& reversed_axes);

            void validate_and_infer_types() override;

@@ -62,11 +63,16 @@ namespace ngraph

            /// \return The set of axes to reverse.
            const AxisSet& get_reversed_axes() const { return m_reversed_axes; }
+            void set_reversed_axes(const AxisSet& reversed_axes)
+            {
+                m_reversed_axes = reversed_axes;
+            }
+
        protected:
            virtual void generate_adjoints(autodiff::Adjoints& adjoints,
                                           const NodeVector& deltas) override;

-            const AxisSet m_reversed_axes;
+            AxisSet m_reversed_axes;
        };
    }
 }
--- a/src/ngraph/op/reverse_sequence.cpp
+++ b/src/ngraph/op/reverse_sequence.cpp
@@ -27,11 +27,11 @@ using namespace ngraph;

 const string op::ReverseSequence::type_name{"ReverseSequence"};

-op::ReverseSequence::ReverseSequence(const std::shared_ptr<Node> arg,
-                                     const std::shared_ptr<Node> seq_indices,
+op::ReverseSequence::ReverseSequence(const Output<Node>& arg,
+                                     const Output<Node>& seq_indices,
                                     size_t batch_axis,
                                     size_t seq_axis)
-    : Op(check_single_output_args({arg, seq_indices}))
+    : Op({arg, seq_indices})
    , m_batch_axis(batch_axis)
    , m_seq_axis(seq_axis)
 {
@@ -104,8 +104,8 @@ shared_ptr<Node> op::ReverseSequence::copy_with_new_args(const NodeVector& new_a

 void op::ReverseSequence::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVector& deltas)
 {
-    auto x = get_argument(0);
-    auto rs_delta =
-        make_shared<ReverseSequence>(deltas.at(0), get_argument(1), m_batch_axis, m_seq_axis);
+    auto x = input(0).get_source_output();
+    auto rs_delta = make_shared<ReverseSequence>(
+        deltas.at(0), input(1).get_source_output(), m_batch_axis, m_seq_axis);
    adjoints.add_delta(x, rs_delta);
 }
--- a/src/ngraph/op/reverse_sequence.hpp
+++ b/src/ngraph/op/reverse_sequence.hpp
@@ -28,11 +28,12 @@ namespace ngraph
            NGRAPH_API
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
+            ReverseSequence() = default;
            /// \brief Constructs an arcsin operation.
            ///
            /// \param arg Node that produces the input tensor.
-            ReverseSequence(const std::shared_ptr<Node> arg,
-                            const std::shared_ptr<Node> seq_lengths,
+            ReverseSequence(const Output<Node>& arg,
+                            const Output<Node>& seq_lengths,
                            size_t batch_axis,
                            size_t seq_axis);

@@ -42,7 +43,9 @@ namespace ngraph
                copy_with_new_args(const NodeVector& new_args) const override;

            size_t get_batch_axis() const { return m_batch_axis; }
+            void set_batch_axis(size_t batch_axis) { m_batch_axis = batch_axis; }
            size_t get_sequence_axis() const { return m_seq_axis; }
+            void set_sequence_axis(size_t sequence_axis) { m_seq_axis = sequence_axis; }
        protected:
            virtual void generate_adjoints(autodiff::Adjoints& adjoints,
                                           const NodeVector& deltas) override;

--- a/src/ngraph/op/scatter_add.hpp
+++ b/src/ngraph/op/scatter_add.hpp
@@ -29,13 +29,14 @@ namespace ngraph
            NGRAPH_API
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
+            ScatterAdd() = default;
            /// \param inputs Tensor
            /// \param indices Index tensor: Data type must be `element::i32` or `element::i64`
            /// \param updates Tensor: Must have same type as inputs
-            ScatterAdd(const std::shared_ptr<Node>& inputs,
-                       const std::shared_ptr<Node>& indices,
-                       const std::shared_ptr<Node>& updates)
-                : Op(check_single_output_args({inputs, indices, updates}))
+            ScatterAdd(const Output<Node>& inputs,
+                       const Output<Node>& indices,
+                       const Output<Node>& updates)
+                : Op({inputs, indices, updates})
            {
                constructor_validate_and_infer_types();
            }

--- a/src/ngraph/op/scatter_nd_add.hpp
+++ b/src/ngraph/op/scatter_nd_add.hpp
@@ -29,13 +29,14 @@ namespace ngraph
            NGRAPH_API
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
+            ScatterNDAdd() = default;
            /// \param inputs Tensor
            /// \param indices Index tensor: Data type must be `element::i32` or `element::i64`
            /// \param updates Tensor: Must have same type as inputs
-            ScatterNDAdd(const std::shared_ptr<Node>& inputs,
-                         const std::shared_ptr<Node>& indices,
-                         const std::shared_ptr<Node>& updates)
-                : Op(check_single_output_args({inputs, indices, updates}))
+            ScatterNDAdd(const Output<Node>& inputs,
+                         const Output<Node>& indices,
+                         const Output<Node>& updates)
+                : Op({inputs, indices, updates})
            {
                constructor_validate_and_infer_types();
            }

--- a/src/ngraph/op/select.cpp
+++ b/src/ngraph/op/select.cpp
@@ -72,12 +72,12 @@ void op::Select::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVecto
 {
    auto delta = deltas.at(0);

-    auto p = get_argument(0);
-    auto x = get_argument(1);
-    auto y = get_argument(2);
+    auto p = input(0).get_source_output();
+    auto x = input(1).get_source_output();
+    auto y = input(2).get_source_output();

-    auto p_as_x_type = make_shared<op::Convert>(p, x->get_element_type());
-    auto not_p_as_y_type = make_shared<op::Convert>(make_shared<op::Not>(p), y->get_element_type());
+    auto p_as_x_type = make_shared<op::Convert>(p, x.get_element_type());
+    auto not_p_as_y_type = make_shared<op::Convert>(make_shared<op::Not>(p), y.get_element_type());

    adjoints.add_delta(x, delta * p_as_x_type);
    adjoints.add_delta(y, delta * not_p_as_y_type);

--- a/src/ngraph/op/sigmoid.cpp
+++ b/src/ngraph/op/sigmoid.cpp
@@ -30,13 +30,13 @@ shared_ptr<Node> op::Sigmoid::copy_with_new_args(const NodeVector& new_args) con
    return make_shared<Sigmoid>(new_args.at(0));
 }

-op::Sigmoid::Sigmoid(shared_ptr<Node> arg)
+op::Sigmoid::Sigmoid(const Output<Node>& arg)
    : UnaryElementwiseArithmetic(arg)
 {
    constructor_validate_and_infer_types();
 }

-op::SigmoidBackprop::SigmoidBackprop(shared_ptr<Node> arg, shared_ptr<Node> delta)
+op::SigmoidBackprop::SigmoidBackprop(const Output<Node>& arg, const Output<Node>& delta)
    : BinaryElementwiseArithmetic(arg, delta)
 {
    constructor_validate_and_infer_types();
@@ -52,6 +52,6 @@ void op::Sigmoid::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVect
 {
    auto delta = deltas.at(0);

-    auto backprop = make_shared<op::SigmoidBackprop>(get_argument(0), delta);
-    adjoints.add_delta(get_argument(0), backprop);
+    auto backprop = make_shared<op::SigmoidBackprop>(input(0).get_source_output(), delta);
+    adjoints.add_delta(input(0).get_source_output(), backprop);
 }
--- a/src/ngraph/op/sigmoid.hpp
+++ b/src/ngraph/op/sigmoid.hpp
@@ -31,7 +31,8 @@ namespace ngraph
            NGRAPH_API
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
-            Sigmoid(std::shared_ptr<Node> arg);
+            Sigmoid(const Output<Node>& arg);
+            Sigmoid() = default;
            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;
            virtual void generate_adjoints(autodiff::Adjoints& adjoints,
@@ -46,10 +47,11 @@ namespace ngraph
            NGRAPH_API
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
+            SigmoidBackprop() = default;
            /// \brief Constructs a SigmoidBackprop operation.
            ///
            /// \param arg Node that produces the Sigmoid forward input tensor.
-            SigmoidBackprop(std::shared_ptr<ngraph::Node> arg, std::shared_ptr<ngraph::Node> delta);
+            SigmoidBackprop(const Output<Node>& arg, const Output<Node>& delta);

            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

--- a/src/ngraph/op/sign.cpp
+++ b/src/ngraph/op/sign.cpp
@@ -21,7 +21,7 @@ using namespace ngraph;

 const string op::Sign::type_name{"Sign"};

-op::Sign::Sign(const shared_ptr<Node>& arg)
+op::Sign::Sign(const Output<Node>& arg)
    : UnaryElementwiseArithmetic(arg)
 {
    constructor_validate_and_infer_types();

--- a/src/ngraph/op/sign.hpp
+++ b/src/ngraph/op/sign.hpp
@@ -30,10 +30,11 @@ namespace ngraph
            NGRAPH_API
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
+            Sign() = default;
            /// \brief Constructs an elementwise sign operation.
            ///
            /// \param arg Node that produces the input tensor.
-            Sign(const std::shared_ptr<Node>& arg);
+            Sign(const Output<Node>& arg);

            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

--- a/src/ngraph/op/sin.cpp
+++ b/src/ngraph/op/sin.cpp
@@ -23,7 +23,7 @@ using namespace ngraph;

 const string op::Sin::type_name{"Sin"};

-op::Sin::Sin(const shared_ptr<Node>& arg)
+op::Sin::Sin(const Output<Node>& arg)
    : UnaryElementwiseArithmetic(arg)
 {
    constructor_validate_and_infer_types();
@@ -39,7 +39,7 @@ void op::Sin::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVector&
 {
    auto delta = deltas.at(0);

-    auto x = get_argument(0);
+    auto x = input(0).get_source_output();

    adjoints.add_delta(x, delta * (make_shared<op::Cos>(x)));
 }
--- a/src/ngraph/op/sin.hpp
+++ b/src/ngraph/op/sin.hpp
@@ -44,7 +44,8 @@ namespace ngraph
            /// \brief Constructs a sine operation.
            ///
            /// \param arg Node that produces the input tensor.
-            Sin(const std::shared_ptr<Node>& arg);
+            Sin(const Output<Node>& arg);
+            Sin() = default;

            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

--- a/src/ngraph/op/sinh.cpp
+++ b/src/ngraph/op/sinh.cpp
@@ -23,7 +23,7 @@ using namespace ngraph;

 const string op::Sinh::type_name{"Sinh"};

-op::Sinh::Sinh(const shared_ptr<Node>& arg)
+op::Sinh::Sinh(const Output<Node>& arg)
    : UnaryElementwiseArithmetic(arg)
 {
    constructor_validate_and_infer_types();

--- a/src/ngraph/op/sinh.hpp
+++ b/src/ngraph/op/sinh.hpp
@@ -32,7 +32,8 @@ namespace ngraph
            /// \brief Constructs a hyperbolic sine operation.
            ///
            /// \param arg Node that produces the input tensor.
-            Sinh(const std::shared_ptr<Node>& arg);
+            Sinh(const Output<Node>& arg);
+            Sinh() = default;

            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

--- a/src/ngraph/op/slice.cpp
+++ b/src/ngraph/op/slice.cpp
@@ -21,10 +21,6 @@ using namespace ngraph;

 const string op::Slice::type_name{"Slice"};

-op::Slice::Slice()
-{
-}
-
 op::Slice::Slice(const Output<Node>& arg,
                 const Coordinate& lower_bounds,
                 const Coordinate& upper_bounds,
@@ -139,7 +135,7 @@ void op::Slice::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVector
 {
    auto delta = deltas.at(0);

-    auto x = get_argument(0);
+    auto x = input(0).get_source_output();

    adjoints.add_delta_to_slice(x, delta, m_lower_bounds, m_upper_bounds, m_strides);
 }
--- a/src/ngraph/op/slice.hpp
+++ b/src/ngraph/op/slice.hpp
@@ -32,7 +32,7 @@ namespace ngraph
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
            /// \brief Constructs a tensor slice operation
-            Slice();
+            Slice() = default;
            /// \brief Constructs a tensor slice operation.
            ///
            /// \param arg The tensor to be sliced.

--- a/src/ngraph/op/softmax.cpp
+++ b/src/ngraph/op/softmax.cpp
@@ -31,7 +31,7 @@ using namespace ngraph;

 const string op::Softmax::type_name{"Softmax"};

-op::Softmax::Softmax(const shared_ptr<Node>& arg, const AxisSet& axes)
+op::Softmax::Softmax(const Output<Node>& arg, const AxisSet& axes)
    : UnaryElementwiseArithmetic(arg)
    , m_axes(axes)
 {
@@ -88,6 +88,6 @@ void op::Softmax::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVect

    auto adjoint = z - builder::make_with_numpy_broadcast<op::Multiply>(output(0), zreshape);

-    auto x = get_argument(0);
+    auto x = input(0).get_source_output();
    adjoints.add_delta(x, adjoint);
 }
--- a/src/ngraph/op/softmax.hpp
+++ b/src/ngraph/op/softmax.hpp
@@ -30,6 +30,7 @@ namespace ngraph
            NGRAPH_API
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
+            Softmax() = default;
            /// \brief Constructs a softmax operation.
            ///
            /// \param arg Node that produces the first input tensor.<br>
@@ -38,12 +39,13 @@ namespace ngraph
            ///
            /// Output `[d0, ...]`
            ///
-            Softmax(const std::shared_ptr<Node>& arg, const AxisSet& axes);
+            Softmax(const Output<Node>& arg, const AxisSet& axes);

            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

            const AxisSet& get_axes() const { return m_axes; }
+            void set_axes(const AxisSet& axes) { m_axes = axes; }
        protected:
            virtual void generate_adjoints(autodiff::Adjoints& adjoints,
                                           const NodeVector& deltas) override;

--- a/src/ngraph/op/sqrt.cpp
+++ b/src/ngraph/op/sqrt.cpp
@@ -23,7 +23,7 @@ using namespace ngraph;

 const string op::Sqrt::type_name{"Sqrt"};

-op::Sqrt::Sqrt(const shared_ptr<Node>& arg)
+op::Sqrt::Sqrt(const Output<Node>& arg)
    : UnaryElementwiseArithmetic(arg)
 {
    constructor_validate_and_infer_types();
@@ -39,7 +39,7 @@ void op::Sqrt::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVector&
 {
    auto delta = deltas.at(0);

-    auto x = get_argument(0);
+    auto x = input(0).get_source_output();

    adjoints.add_delta(x, delta / (shared_from_this() + shared_from_this()));
 }
--- a/src/ngraph/op/sqrt.hpp
+++ b/src/ngraph/op/sqrt.hpp
@@ -44,7 +44,8 @@ namespace ngraph
            /// \brief Constructs a square operation.
            ///
            /// \param arg Node that produces the input tensor.
-            Sqrt(const std::shared_ptr<Node>& arg);
+            Sqrt(const Output<Node>& arg);
+            Sqrt() = default;

            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

--- a/src/ngraph/op/stop_gradient.cpp
+++ b/src/ngraph/op/stop_gradient.cpp
@@ -23,7 +23,7 @@ using namespace ngraph;

 const string op::StopGradient::type_name{"StopGradient"};

-op::StopGradient::StopGradient(const shared_ptr<Node>& arg)
+op::StopGradient::StopGradient(const Output<Node>& arg)
    : UnaryElementwiseArithmetic(arg)
 {
    constructor_validate_and_infer_types();

--- a/src/ngraph/op/stop_gradient.hpp
+++ b/src/ngraph/op/stop_gradient.hpp
@@ -32,7 +32,8 @@ namespace ngraph
            /// \brief Constructs StopGradient
            ///
            /// \param arg Node that produces the input tensor.
-            StopGradient(const std::shared_ptr<Node>& arg);
+            StopGradient(const Output<Node>& arg);
+            StopGradient() = default;

            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

--- a/src/ngraph/op/subtract.cpp
+++ b/src/ngraph/op/subtract.cpp
@@ -45,8 +45,8 @@ void op::Subtract::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVec

    auto delta = deltas.at(0);

-    auto x = get_argument(0);
-    auto y = get_argument(1);
+    auto x = input(0).get_source_output();
+    auto y = input(1).get_source_output();

    adjoints.add_delta(x, delta);
    adjoints.add_delta(y, -delta);

--- a/src/ngraph/op/sum.cpp
+++ b/src/ngraph/op/sum.cpp
@@ -22,10 +22,6 @@ using namespace ngraph;

 const string op::Sum::type_name{"Sum"};

-op::Sum::Sum()
-{
-}
-
 op::Sum::Sum(const Output<Node>& arg, const AxisSet& reduction_axes)
    : ArithmeticReduction(arg, reduction_axes)
 {

--- a/src/ngraph/op/sum.hpp
+++ b/src/ngraph/op/sum.hpp
@@ -78,7 +78,7 @@ namespace ngraph
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
            /// \brief Constructs a summation operation.
-            Sum();
+            Sum() = default;
            /// \brief Constructs a summation operation.
            ///
            /// \param arg The tensor to be summed.

--- a/src/ngraph/op/tan.cpp
+++ b/src/ngraph/op/tan.cpp
@@ -24,7 +24,7 @@ using namespace ngraph;

 const string op::Tan::type_name{"Tan"};

-op::Tan::Tan(const shared_ptr<Node>& arg)
+op::Tan::Tan(const Output<Node>& arg)
    : UnaryElementwiseArithmetic(arg)
 {
    constructor_validate_and_infer_types();
@@ -40,7 +40,7 @@ void op::Tan::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVector&
 {
    auto delta = deltas.at(0);

-    auto x = get_argument(0);
+    auto x = input(0).get_source_output();

    auto c = make_shared<op::Cos>(x);


--- a/src/ngraph/op/tan.hpp
+++ b/src/ngraph/op/tan.hpp
@@ -44,7 +44,8 @@ namespace ngraph
            /// \brief Constructs a tangent operation.
            ///
            /// \param arg Node that produces the input tensor.
-            Tan(const std::shared_ptr<Node>& arg);
+            Tan(const Output<Node>& arg);
+            Tan() = default;

            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

--- a/src/ngraph/op/tanh.cpp
+++ b/src/ngraph/op/tanh.cpp
@@ -23,7 +23,7 @@ using namespace ngraph;

 const string op::Tanh::type_name{"Tanh"};

-op::Tanh::Tanh(const shared_ptr<Node>& arg)
+op::Tanh::Tanh(const Output<Node>& arg)
    : UnaryElementwiseArithmetic(arg)
 {
    constructor_validate_and_infer_types();
@@ -39,7 +39,7 @@ void op::Tanh::generate_adjoints(autodiff::Adjoints& adjoints, const NodeVector&
 {
    auto delta = deltas.at(0);

-    auto x = get_argument(0);
+    auto x = input(0).get_source_output();

    adjoints.add_delta(x, delta - (delta * (shared_from_this() * shared_from_this())));
 }
--- a/src/ngraph/op/tanh.hpp
+++ b/src/ngraph/op/tanh.hpp
@@ -32,7 +32,8 @@ namespace ngraph
            /// \brief Constructs a hyperbolic tangent operation.
            ///
            /// \param arg Node that produces the input tensor.
-            Tanh(const std::shared_ptr<Node>& arg);
+            Tanh(const Output<Node>& arg);
+            Tanh() = default;

            virtual std::shared_ptr<Node>
                copy_with_new_args(const NodeVector& new_args) const override;

--- a/src/ngraph/op/topk.cpp
+++ b/src/ngraph/op/topk.cpp
@@ -26,10 +26,6 @@ using namespace ngraph;

 const string op::TopK::type_name{"TopK"};

-op::TopK::TopK()
-{
-}
-
 op::TopK::TopK(const Output<Node>& arg,
               size_t top_k_axis,
               const element::Type& index_element_type,
@@ -63,7 +59,8 @@ op::TopK::TopK(const Output<Node>& arg,
 size_t op::TopK::get_k() const
 {
    size_t k = 0;
-    if (auto const_op = dynamic_pointer_cast<op::Constant>(get_argument(1)))
+    if (auto const_op =
+            dynamic_pointer_cast<op::Constant>(input(1).get_source_output().get_node_shared_ptr()))
    {
        k = const_op->get_vector<int64_t>()[0];
    }

--- a/src/ngraph/op/topk.hpp
+++ b/src/ngraph/op/topk.hpp
@@ -44,7 +44,7 @@ namespace ngraph
            static const std::string type_name;
            const std::string& description() const override { return type_name; }
            /// \brief Constructs a TopK operation
-            TopK();
+            TopK() = default;
            /// \brief Constructs a TopK operation.
            ///
            /// \param arg The input tensor

--- a/src/ngraph/pass/constant_folding.cpp
+++ b/src/ngraph/pass/constant_folding.cpp
--- a/src/ngraph/runtime/interpreter/int_executable.cpp
+++ b/src/ngraph/runtime/interpreter/int_executable.cpp
@@ -24,7 +24,6 @@
 #include "ngraph/pass/assign_layout.hpp"
 #include "ngraph/pass/core_fusion.hpp"
 #include "ngraph/pass/fused_op_decomposition.hpp"
-#include "ngraph/pass/implicit_broadcast_elimination.hpp"
 #include "ngraph/pass/like_replacement.hpp"
 #include "ngraph/pass/liveness.hpp"
 #include "ngraph/pass/manager.hpp"
@@ -48,7 +47,6 @@ runtime::interpreter::INTExecutable::INTExecutable(const shared_ptr<Function>& f
    pass::Manager pass_manager;
    pass_manager.register_pass<pass::LikeReplacement>();
    pass_manager.register_pass<pass::FusedOpDecomposition>();
-    pass_manager.register_pass<pass::ImplicitBroadcastElimination>();
    pass_manager.register_pass<pass::AssignLayout<DenseTensorLayout>>();
    pass_manager.register_pass<pass::Liveness>();
    pass_manager.run_passes(m_function);

--- a/src/ngraph/runtime/interpreter/int_executable.hpp
+++ b/src/ngraph/runtime/interpreter/int_executable.hpp
--- a/src/ngraph/runtime/reference/add.hpp
+++ b/src/ngraph/runtime/reference/add.hpp
@@ -18,6 +18,11 @@

 #include <cstddef>

+#include "ngraph/coordinate_transform.hpp"
+#include "ngraph/op/util/attr_types.hpp"
+#include "ngraph/runtime/reference/autobroadcast_binop.hpp"
+#include "ngraph/shape_util.hpp"
+
 namespace ngraph
 {
    namespace runtime
@@ -32,6 +37,20 @@ namespace ngraph
                    out[i] = arg0[i] + arg1[i];
                }
            }
+
+            template <typename T>
+            void add(const T* arg0,
+                     const T* arg1,
+                     T* out,
+                     const Shape& arg0_shape,
+                     const Shape& arg1_shape,
+                     const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return x + y;
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/runtime/reference/and.hpp
+++ b/src/ngraph/runtime/reference/and.hpp
@@ -32,6 +32,20 @@ namespace ngraph
                    out[i] = static_cast<T>(arg0[i] && arg1[i]);
                }
            }
+
+            template <typename T>
+            void logical_and(const T* arg0,
+                             const T* arg1,
+                             T* out,
+                             const Shape& arg0_shape,
+                             const Shape& arg1_shape,
+                             const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return static_cast<T>(x && y);
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/runtime/reference/autobroadcast_binop.hpp
+++ b/src/ngraph/runtime/reference/autobroadcast_binop.hpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#pragma once
+
+#include <cstddef>
+
+#include "ngraph/coordinate_transform.hpp"
+#include "ngraph/op/util/attr_types.hpp"
+#include "ngraph/shape_util.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace reference
+        {
+            /// \brief Helper function to implement autobroadcasting elementwise binop references.
+            ///
+            /// \tparam T Element type of the input tensors.
+            /// \tparam U Element type of the output tensor.
+            /// \tparam Functor Type of the functor for the elementwise operation. Must support
+            ///                 operator()(T,T), and operator()(T,T) must return a value of type
+            ///                 U.
+            ///
+            /// \param arg0 Pointer to the buffer for left operand input tensor.
+            /// \param arg1 Pointer to the buffer for right operand input tensor.
+            /// \param out Pointer to the buffer for output tensor. This must be pre-allocated by
+            ///            the caller, and must be large enough to hold a tensor of the correct
+            ///            shape.
+            /// \param broadcast_spec Specification of the auto-broadcasting scheme.
+            /// \param elementwise_functor Functor implementing the elementwise operation to be
+            ///                            applied across the input tensors. Must accept two
+            ///                            arguments of type T, and return a value of type U.
+            template <typename T, typename U, typename Functor>
+            void autobroadcast_binop(const T* arg0,
+                                     const T* arg1,
+                                     U* out,
+                                     const Shape& arg0_shape,
+                                     const Shape& arg1_shape,
+                                     const op::AutoBroadcastSpec& broadcast_spec,
+                                     Functor elementwise_functor)
+            {
+                switch (broadcast_spec.m_type)
+                {
+                case op::AutoBroadcastType::NONE:
+                    for (size_t i = 0; i < shape_size(arg0_shape); i++)
+                    {
+                        out[i] = elementwise_functor(arg0[i], arg1[i]);
+                    }
+                    break;
+                case op::AutoBroadcastType::NUMPY:
+                    // We'll be using CoordinateTransform to handle the broadcasting. The general
+                    // procedure is as follows:
+                    //
+                    // (1) Left pad the shorter of the two shapes with ones.
+                    // (2) Squeeze (remove ones from) both shapes, and record the squeezed axis
+                    //     indices.
+                    // (3) Using CoordinateTransform, broadcast both args to the final output
+                    //     shape. The "broadcasted axes" will be those that were squeezed in step
+                    //     2.
+                    //
+                    // Example:
+                    //
+                    //    Input shape->Padded shape->Squeezed Shape/Squeezed Axes
+                    //    -----------  ------------  ----------------------------
+                    // a: [ 3, 2, 1]   [ 3, 2, 1]    [ 3, 2   ]     {2}
+                    // b: [    1, 6]   [ 1, 1, 6]    [       6]     {0,1}
+                    //                   |  |  |
+                    //                   v  v  v
+                    //                 Output shape
+                    //                 ------------
+                    //                 [ 3, 2, 6]
+                    Shape arg0_padded_shape = arg0_shape;
+                    Shape arg1_padded_shape = arg1_shape;
+
+                    while (arg0_padded_shape.size() < arg1_padded_shape.size())
+                    {
+                        arg0_padded_shape.insert(arg0_padded_shape.begin(), 1);
+                    }
+
+                    while (arg1_padded_shape.size() < arg0_padded_shape.size())
+                    {
+                        arg1_padded_shape.insert(arg1_padded_shape.begin(), 1);
+                    }
+
+                    Shape arg0_squeezed_shape;
+                    Shape arg1_squeezed_shape;
+                    AxisSet arg0_squeezed_axes;
+                    AxisSet arg1_squeezed_axes;
+                    Shape output_shape;
+
+                    for (size_t i = 0; i < arg0_padded_shape.size(); i++)
+                    {
+                        if (arg0_padded_shape[i] == 1)
+                        {
+                            arg0_squeezed_axes.insert(i);
+                        }
+                        else
+                        {
+                            arg0_squeezed_shape.push_back(arg0_padded_shape[i]);
+                        }
+
+                        if (arg1_padded_shape[i] == 1)
+                        {
+                            arg1_squeezed_axes.insert(i);
+                        }
+                        else
+                        {
+                            arg1_squeezed_shape.push_back(arg1_padded_shape[i]);
+                        }
+
+                        output_shape.push_back(arg0_padded_shape[i] == 1 ? arg1_padded_shape[i]
+                                                                         : arg0_padded_shape[i]);
+                    }
+
+                    CoordinateTransform arg0_transform(arg0_squeezed_shape);
+                    CoordinateTransform arg1_transform(arg1_squeezed_shape);
+                    CoordinateTransform output_transform(output_shape);
+
+                    for (const Coordinate& output_coord : output_transform)
+                    {
+                        Coordinate arg0_coord = reduce(output_coord, arg0_squeezed_axes);
+                        Coordinate arg1_coord = reduce(output_coord, arg1_squeezed_axes);
+                        out[output_transform.index(output_coord)] =
+                            elementwise_functor(arg0[arg0_transform.index(arg0_coord)],
+                                                arg1[arg1_transform.index(arg1_coord)]);
+                    }
+                }
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/reference/divide.hpp
+++ b/src/ngraph/runtime/reference/divide.hpp
@@ -67,6 +67,47 @@ namespace ngraph
                }
            }

+            template <typename T>
+            typename std::enable_if<std::is_integral<T>::value>::type
+                divide(const T* arg0,
+                       const T* arg1,
+                       T* out,
+                       const Shape& arg0_shape,
+                       const Shape& arg1_shape,
+                       const op::AutoBroadcastSpec& broadcast_spec,
+                       bool pythondiv)
+            {
+                auto functor = [pythondiv](T x, T y) -> T {
+                    if (pythondiv)
+                    {
+                        if (y == 0)
+                        {
+                            throw std::domain_error("integer division by zero");
+                        }
+                        T quot = x / y;
+                        T rem = x % y;
+                        if ((rem != 0) && ((x < 0) != (y < 0)))
+                        {
+                            return quot - 1;
+                        }
+                        else
+                        {
+                            return quot;
+                        }
+                    }
+                    else
+                    {
+                        if (y == 0)
+                        {
+                            throw std::domain_error("integer division by zero");
+                        }
+                        return x / y;
+                    }
+                };
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, functor);
+            }
+
            // In English: return type is void and T must be a standard floating point type, or
            // bfloat16, or float16.
            template <typename T>
@@ -83,6 +124,25 @@ namespace ngraph
                    out[i] = arg0[i] / arg1[i];
                }
            }
+
+            template <typename T>
+            typename std::enable_if<std::is_floating_point<T>::value ||
+                                    std::is_same<T, bfloat16>::value ||
+                                    std::is_same<T, float16>::value>::type
+                divide(const T* arg0,
+                       const T* arg1,
+                       T* out,
+                       const Shape& arg0_shape,
+                       const Shape& arg1_shape,
+                       const op::AutoBroadcastSpec& broadcast_spec,
+                       bool pythondiv)
+            {
+                (void)pythondiv;
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return x / y;
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/runtime/reference/equal.hpp
+++ b/src/ngraph/runtime/reference/equal.hpp
@@ -40,6 +40,20 @@ namespace ngraph
                    out[i] = arg0[i] == arg1[i];
                }
            }
+
+            template <typename T>
+            void equal(const T* arg0,
+                       const T* arg1,
+                       char* out,
+                       const Shape& arg0_shape,
+                       const Shape& arg1_shape,
+                       const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return x == y;
+                    });
+            }
        }
    }
 }

--- a/src/ngraph/runtime/reference/greater.hpp
+++ b/src/ngraph/runtime/reference/greater.hpp
@@ -35,6 +35,20 @@ namespace ngraph
                    out[i] = arg0[i] > arg1[i];
                }
            }
+
+            template <typename T>
+            void greater(const T* arg0,
+                         const T* arg1,
+                         char* out,
+                         const Shape& arg0_shape,
+                         const Shape& arg1_shape,
+                         const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return x > y;
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/runtime/reference/greater_eq.hpp
+++ b/src/ngraph/runtime/reference/greater_eq.hpp
@@ -35,6 +35,20 @@ namespace ngraph
                    out[i] = arg0[i] >= arg1[i];
                }
            }
+
+            template <typename T>
+            void greater_eq(const T* arg0,
+                            const T* arg1,
+                            char* out,
+                            const Shape& arg0_shape,
+                            const Shape& arg1_shape,
+                            const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return x >= y;
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/runtime/reference/less.hpp
+++ b/src/ngraph/runtime/reference/less.hpp
@@ -35,6 +35,20 @@ namespace ngraph
                    out[i] = arg0[i] < arg1[i];
                }
            }
+
+            template <typename T>
+            void less(const T* arg0,
+                      const T* arg1,
+                      char* out,
+                      const Shape& arg0_shape,
+                      const Shape& arg1_shape,
+                      const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return x < y;
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/runtime/reference/less_eq.hpp
+++ b/src/ngraph/runtime/reference/less_eq.hpp
@@ -35,6 +35,20 @@ namespace ngraph
                    out[i] = arg0[i] <= arg1[i];
                }
            }
+
+            template <typename T>
+            void less_eq(const T* arg0,
+                         const T* arg1,
+                         char* out,
+                         const Shape& arg0_shape,
+                         const Shape& arg1_shape,
+                         const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return x <= y;
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/runtime/reference/maximum.hpp
+++ b/src/ngraph/runtime/reference/maximum.hpp
@@ -32,6 +32,20 @@ namespace ngraph
                    out[i] = arg0[i] > arg1[i] ? arg0[i] : arg1[i];
                }
            }
+
+            template <typename T>
+            void maximum(const T* arg0,
+                         const T* arg1,
+                         T* out,
+                         const Shape& arg0_shape,
+                         const Shape& arg1_shape,
+                         const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return x > y ? x : y;
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/runtime/reference/minimum.hpp
+++ b/src/ngraph/runtime/reference/minimum.hpp
@@ -32,6 +32,20 @@ namespace ngraph
                    out[i] = arg0[i] < arg1[i] ? arg0[i] : arg1[i];
                }
            }
+
+            template <typename T>
+            void minimum(const T* arg0,
+                         const T* arg1,
+                         T* out,
+                         const Shape& arg0_shape,
+                         const Shape& arg1_shape,
+                         const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return x < y ? x : y;
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/runtime/reference/multiply.hpp
+++ b/src/ngraph/runtime/reference/multiply.hpp
@@ -32,6 +32,20 @@ namespace ngraph
                    out[i] = arg0[i] * arg1[i];
                }
            }
+
+            template <typename T>
+            void multiply(const T* arg0,
+                          const T* arg1,
+                          T* out,
+                          const Shape& arg0_shape,
+                          const Shape& arg1_shape,
+                          const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return x * y;
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/runtime/reference/not_equal.hpp
+++ b/src/ngraph/runtime/reference/not_equal.hpp
@@ -40,6 +40,20 @@ namespace ngraph
                    out[i] = arg0[i] != arg1[i];
                }
            }
+
+            template <typename T>
+            void not_equal(const T* arg0,
+                           const T* arg1,
+                           char* out,
+                           const Shape& arg0_shape,
+                           const Shape& arg1_shape,
+                           const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return x != y;
+                    });
+            }
        }
    }
 }

--- a/src/ngraph/runtime/reference/or.hpp
+++ b/src/ngraph/runtime/reference/or.hpp
@@ -32,6 +32,20 @@ namespace ngraph
                    out[i] = static_cast<T>(arg0[i] || arg1[i]);
                }
            }
+
+            template <typename T>
+            void logical_or(const T* arg0,
+                            const T* arg1,
+                            T* out,
+                            const Shape& arg0_shape,
+                            const Shape& arg1_shape,
+                            const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return static_cast<T>(x || y);
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/runtime/reference/power.hpp
+++ b/src/ngraph/runtime/reference/power.hpp
@@ -33,6 +33,20 @@ namespace ngraph
                    out[i] = std::pow(arg0[i], arg1[i]);
                }
            }
+
+            template <typename T>
+            void power(const T* arg0,
+                       const T* arg1,
+                       T* out,
+                       const Shape& arg0_shape,
+                       const Shape& arg1_shape,
+                       const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return std::pow(x, y);
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/runtime/reference/subtract.hpp
+++ b/src/ngraph/runtime/reference/subtract.hpp
@@ -32,6 +32,20 @@ namespace ngraph
                    out[i] = arg0[i] - arg1[i];
                }
            }
+
+            template <typename T>
+            void subtract(const T* arg0,
+                          const T* arg1,
+                          T* out,
+                          const Shape& arg0_shape,
+                          const Shape& arg1_shape,
+                          const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return x - y;
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/runtime/reference/xor.hpp
+++ b/src/ngraph/runtime/reference/xor.hpp
@@ -32,6 +32,20 @@ namespace ngraph
                    out[i] = static_cast<T>((arg0[i] || arg1[i]) && !(arg0[i] && arg1[i]));
                }
            }
+
+            template <typename T>
+            void logical_xor(const T* arg0,
+                             const T* arg1,
+                             T* out,
+                             const Shape& arg0_shape,
+                             const Shape& arg1_shape,
+                             const op::AutoBroadcastSpec& broadcast_spec)
+            {
+                autobroadcast_binop(
+                    arg0, arg1, out, arg0_shape, arg1_shape, broadcast_spec, [](T x, T y) -> T {
+                        return static_cast<T>((x || y) && !(x && y));
+                    });
+            }
        }
    }
 }
--- a/src/ngraph/serializer.cpp
+++ b/src/ngraph/serializer.cpp
@@ -1576,7 +1576,7 @@ shared_ptr<Node> JSONDeserializer::deserialize_node(json node_js)
            node = make_shared<op::Passthrough>(node_js.at("logical_type"),
                                                node_js.at("language"),
                                                node_js.at("function"),
-                                                args,
+                                                static_cast<OutputVector>(args),
                                                std::move(outputs));
            break;
        }

--- a/test/constant_folding.cpp
+++ b/test/constant_folding.cpp
@@ -168,15 +168,24 @@ static std::vector<T> get_result_constant(std::shared_ptr<Function> f, size_t po

 TEST(constant_folding, constant_unary_binary)
 {
-    Shape shape_in{4};
    vector<int> values_a{1, 2, 3, 4};
    vector<int> values_b{1, 2, 3, 4};
    vector<int> values_c{-1, -1, -1, -1};
    vector<int> values_d{1, 4, 9, 16};
-    auto a = make_shared<op::Constant>(element::i32, shape_in, values_a);
-    auto b = make_shared<op::Constant>(element::i32, shape_in, values_b);
-    auto c = make_shared<op::Constant>(element::i32, shape_in, values_c);
-    auto d = make_shared<op::Constant>(element::i32, shape_in, values_d);
+    vector<int> values_e{5, 6};
+    vector<int> values_f{0, 10};
+    vector<int> values_g{1, 4};
+    vector<char> values_h{0, 0, 1, 1};
+    vector<char> values_i{0, 1};
+    auto a = make_shared<op::Constant>(element::i32, Shape{2, 2}, values_a);
+    auto b = make_shared<op::Constant>(element::i32, Shape{2, 2}, values_b);
+    auto c = make_shared<op::Constant>(element::i32, Shape{2, 2}, values_c);
+    auto d = make_shared<op::Constant>(element::i32, Shape{2, 2}, values_d);
+    auto e = make_shared<op::Constant>(element::i32, Shape{2}, values_e);
+    auto f = make_shared<op::Constant>(element::i32, Shape{2}, values_f);
+    auto g = make_shared<op::Constant>(element::i32, Shape{2}, values_g);
+    auto h = make_shared<op::Constant>(element::boolean, Shape{2, 2}, values_h);
+    auto i = make_shared<op::Constant>(element::boolean, Shape{2}, values_i);

    auto add = a + b;
    auto sub = a - b;
@@ -187,15 +196,54 @@ TEST(constant_folding, constant_unary_binary)
    auto absn = make_shared<op::Abs>(c);
    auto neg = make_shared<op::Negative>(c);
    auto sqrt = make_shared<op::Sqrt>(d);
+    auto add_autob_numpy = make_shared<op::Add>(a, e, op::AutoBroadcastType::NUMPY);
+    auto sub_autob_numpy = make_shared<op::Subtract>(a, e, op::AutoBroadcastType::NUMPY);
+    auto mul_autob_numpy = make_shared<op::Multiply>(a, e, op::AutoBroadcastType::NUMPY);
+    auto div_autob_numpy = make_shared<op::Divide>(a, g, op::AutoBroadcastType::NUMPY);
+    auto min_autob_numpy = make_shared<op::Minimum>(a, f, op::AutoBroadcastType::NUMPY);
+    auto max_autob_numpy = make_shared<op::Maximum>(a, f, op::AutoBroadcastType::NUMPY);
+    auto equal_autob_numpy = make_shared<op::Equal>(a, g, op::AutoBroadcastType::NUMPY);
+    auto not_equal_autob_numpy = make_shared<op::NotEqual>(a, g, op::AutoBroadcastType::NUMPY);
+    auto greater_autob_numpy = make_shared<op::Greater>(a, g, op::AutoBroadcastType::NUMPY);
+    auto greater_eq_autob_numpy = make_shared<op::GreaterEq>(a, g, op::AutoBroadcastType::NUMPY);
+    auto less_autob_numpy = make_shared<op::Less>(a, g, op::AutoBroadcastType::NUMPY);
+    auto less_eq_autob_numpy = make_shared<op::LessEq>(a, g, op::AutoBroadcastType::NUMPY);
+    auto logical_and_autob_numpy = make_shared<op::And>(h, i, op::AutoBroadcastType::NUMPY);
+    auto logical_or_autob_numpy = make_shared<op::Or>(h, i, op::AutoBroadcastType::NUMPY);
+    auto logical_xor_autob_numpy = make_shared<op::Xor>(h, i, op::AutoBroadcastType::NUMPY);
+
    auto neg_sqrt = make_shared<op::Sqrt>(c);

-    auto f = make_shared<Function>(NodeVector{add, sub, mul, divn, min, max, absn, neg, sqrt},
-                                   ParameterVector{});
-    auto f_error = make_shared<Function>(NodeVector{neg_sqrt}, ParameterVector{});
+    auto func = make_shared<Function>(NodeVector{add,
+                                                 sub,
+                                                 mul,
+                                                 divn,
+                                                 min,
+                                                 max,
+                                                 absn,
+                                                 neg,
+                                                 sqrt,
+                                                 add_autob_numpy,
+                                                 sub_autob_numpy,
+                                                 mul_autob_numpy,
+                                                 div_autob_numpy,
+                                                 min_autob_numpy,
+                                                 max_autob_numpy,
+                                                 equal_autob_numpy,
+                                                 not_equal_autob_numpy,
+                                                 greater_autob_numpy,
+                                                 greater_eq_autob_numpy,
+                                                 less_autob_numpy,
+                                                 less_eq_autob_numpy,
+                                                 logical_and_autob_numpy,
+                                                 logical_or_autob_numpy,
+                                                 logical_xor_autob_numpy},
+                                      ParameterVector{});
+    auto func_error = make_shared<Function>(NodeVector{neg_sqrt}, ParameterVector{});

    pass::Manager pass_manager;
    pass_manager.register_pass<pass::ConstantFolding>();
-    pass_manager.run_passes(f);
+    pass_manager.run_passes(func);

    //expected values
    vector<int> add_expected{2, 4, 6, 8};
@@ -206,17 +254,47 @@ TEST(constant_folding, constant_unary_binary)
    vector<int> max_expected{1, 2, 3, 4};
    vector<int> abs_neg_expected{1, 1, 1, 1};
    vector<int> sqrt_expected{1, 2, 3, 4};
-
-    ASSERT_EQ(get_result_constant<int>(f, 0), add_expected);
-    ASSERT_EQ(get_result_constant<int>(f, 1), sub_expected);
-    ASSERT_EQ(get_result_constant<int>(f, 2), mul_expected);
-    ASSERT_EQ(get_result_constant<int>(f, 3), div_expected);
-    ASSERT_EQ(get_result_constant<int>(f, 4), min_expected);
-    ASSERT_EQ(get_result_constant<int>(f, 5), max_expected);
-    ASSERT_EQ(get_result_constant<int>(f, 6), abs_neg_expected);
-    ASSERT_EQ(get_result_constant<int>(f, 7), abs_neg_expected);
-    ASSERT_EQ(get_result_constant<int>(f, 8), sqrt_expected);
-    ASSERT_ANY_THROW(pass_manager.run_passes(f_error));
+    vector<int> add_autob_numpy_expected{6, 8, 8, 10};
+    vector<int> sub_autob_numpy_expected{-4, -4, -2, -2};
+    vector<int> mul_autob_numpy_expected{5, 12, 15, 24};
+    vector<int> div_autob_numpy_expected{1, 0, 3, 1};
+    vector<int> min_autob_numpy_expected{0, 2, 0, 4};
+    vector<int> max_autob_numpy_expected{1, 10, 3, 10};
+    vector<char> equal_autob_numpy_expected{1, 0, 0, 1};
+    vector<char> not_equal_autob_numpy_expected{0, 1, 1, 0};
+    vector<char> greater_autob_numpy_expected{0, 0, 1, 0};
+    vector<char> greater_eq_autob_numpy_expected{1, 0, 1, 1};
+    vector<char> less_autob_numpy_expected{0, 1, 0, 0};
+    vector<char> less_eq_autob_numpy_expected{1, 1, 0, 1};
+    vector<char> logical_and_autob_numpy_expected{0, 0, 0, 1};
+    vector<char> logical_or_autob_numpy_expected{0, 1, 1, 1};
+    vector<char> logical_xor_autob_numpy_expected{0, 1, 1, 0};
+
+    ASSERT_EQ(get_result_constant<int>(func, 0), add_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 1), sub_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 2), mul_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 3), div_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 4), min_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 5), max_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 6), abs_neg_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 7), abs_neg_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 8), sqrt_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 9), add_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 10), sub_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 11), mul_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 12), div_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 13), min_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 14), max_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<char>(func, 15), equal_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<char>(func, 16), not_equal_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<char>(func, 17), greater_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<char>(func, 18), greater_eq_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<char>(func, 19), less_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<char>(func, 20), less_eq_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<char>(func, 21), logical_and_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<char>(func, 22), logical_or_autob_numpy_expected);
+    ASSERT_EQ(get_result_constant<char>(func, 23), logical_xor_autob_numpy_expected);
+    ASSERT_ANY_THROW(pass_manager.run_passes(func_error));
 }

 TEST(constant_folding, const_dequantize)

--- a/test/cpu_test.cpp
+++ b/test/cpu_test.cpp
@@ -1172,7 +1172,7 @@ static std::vector<T> get_result_constant(std::shared_ptr<Function> f, size_t po

 TEST(cpu_test, constant_unary_binary)
 {
-    Shape shape_in{4};
+    Shape shape_in{2, 2};
    vector<int> values_a{1, 2, 3, 4};
    vector<int> values_b{1, 2, 3, 4};
    vector<int> values_c{-1, -1, -1, -1};
@@ -1184,6 +1184,7 @@ TEST(cpu_test, constant_unary_binary)
    vector<char> values_i{0, 0, 1, 1};
    vector<char> values_j{0, 1, 0, 1};
    vector<float> values_k{-0.1f, 0.0f, -1.5f, 2.6f};
+    vector<int> values_l{1, 2};
    auto a = make_shared<op::Constant>(element::i32, shape_in, values_a);
    auto b = make_shared<op::Constant>(element::i32, shape_in, values_b);
    auto c = make_shared<op::Constant>(element::i32, shape_in, values_c);
@@ -1195,6 +1196,7 @@ TEST(cpu_test, constant_unary_binary)
    auto i = make_shared<op::Constant>(element::boolean, shape_in, values_i);
    auto j = make_shared<op::Constant>(element::boolean, shape_in, values_j);
    auto k = make_shared<op::Constant>(element::f32, shape_in, values_k);
+    auto l = make_shared<op::Constant>(element::i32, Shape{2}, values_l);

    auto add = a + b;
    auto sub = a - b;
@@ -1220,12 +1222,17 @@ TEST(cpu_test, constant_unary_binary)
    auto ceil = make_shared<op::Ceiling>(k);
    auto floor = make_shared<op::Floor>(k);
    auto logical_not = make_shared<op::Not>(j);
+    // Note: The CPU functors do not actually support autobroadcast yet; instead the pass itself
+    // falls back if autobroadcasting is in use. Putting this check here just to make sure the
+    // fallback works as expected, but if direct support for autobroadcast is added to the CPU
+    // folders we should add more comprehensive tests here. --amprocte
+    auto add_autob_numpy = make_shared<op::Add>(a, l, op::AutoBroadcastType::NUMPY);

    auto func = make_shared<Function>(
-        NodeVector{add,        sub,         mul,        divn,  min,        max,
-                   absn,       neg,         sqrt,       relu,  sign,       equal,
-                   not_equal,  greater,     greater_eq, less,  less_eq,    logical_and,
-                   logical_or, logical_xor, ceil,       floor, logical_not},
+        NodeVector{add,        sub,         mul,        divn,  min,         max,
+                   absn,       neg,         sqrt,       relu,  sign,        equal,
+                   not_equal,  greater,     greater_eq, less,  less_eq,     logical_and,
+                   logical_or, logical_xor, ceil,       floor, logical_not, add_autob_numpy},
        ParameterVector{});

    auto func_error = make_shared<Function>(NodeVector{neg_sqrt}, ParameterVector{});
@@ -1282,6 +1289,7 @@ TEST(cpu_test, constant_unary_binary)
    vector<float> ceil_expected{0.0f, 0.0f, -1.0f, 3.0f};
    vector<float> floor_expected{-1.0f, 0.0f, -2.0f, 2.0f};
    vector<char> not_expected{1, 0, 1, 0};
+    vector<int> add_autob_numpy_expected{2, 4, 4, 6};

    ASSERT_EQ(get_result_constant<int>(func, 0), add_expected);
    ASSERT_EQ(get_result_constant<int>(func, 1), sub_expected);
@@ -1308,6 +1316,7 @@ TEST(cpu_test, constant_unary_binary)
    ASSERT_TRUE(test::all_close_f(
        get_result_constant<float>(func, 21), floor_expected, MIN_FLOAT_TOLERANCE_BITS));
    ASSERT_EQ(get_result_constant<char>(func, 22), not_expected);
+    ASSERT_EQ(get_result_constant<int>(func, 23), add_autob_numpy_expected);
    ASSERT_ANY_THROW(pass_manager.run_passes(func_error));
 }