Implement reduce operator through VM (#181)

e7799ae2 · Adam Procter · GitHub · aa3d8338 · e7799ae2 · e7799ae2
Commit e7799ae2 authored Oct 06, 2017 by Adam Procter Committed by GitHub Oct 06, 2017
5 changed files
--- a/src/ngraph/runtime/ngvm/eigen/reduce_matrix_columns.hpp
+++ b/src/ngraph/runtime/ngvm/eigen/reduce_matrix_columns.hpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+
+#pragma once
+
+#include "ngraph/runtime/external_function.hpp"
+#include "ngraph/runtime/ngvm/call_frame.hpp"
+#include "ngraph/runtime/ngvm/eigen/utils.hpp"
+#include "ngraph/runtime/ngvm/instruction.hpp"
+#include "ngraph/runtime/tensor_view.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace ngvm
+        {
+            namespace eigen
+            {
+                template <typename ET>
+                class ReduceMatrixColumnsInstruction : public Instruction
+                {
+                public:
+                    ReduceMatrixColumnsInstruction(std::shared_ptr<ExternalFunction> ef,
+                                                   const TensorViewInfo& arg0,
+                                                   const TensorViewInfo& arg1,
+                                                   const TensorViewInfo& out)
+                        : m_external_function(ef)
+                        , m_arg0(arg0)
+                        , m_arg1(arg1)
+                        , m_out(out)
+                    {
+                    }
+
+                    virtual void execute(CallFrame& call_frame) const override
+                    {
+                        auto ef = m_external_function;
+                        auto f = [ef](typename ET::type x, typename ET::type y) -> typename ET::type
+                        {
+                            std::shared_ptr<CallFrame> cf =
+                                std::dynamic_pointer_cast<CallFrame>(ef->make_call_frame());
+
+                            auto tx = ngraph::runtime::make_tensor<ET>(Shape{});
+                            *tx = std::vector<typename ET::type>({x});
+                            auto ty = ngraph::runtime::make_tensor<ET>(Shape{});
+                            *ty = std::vector<typename ET::type>({y});
+                            auto tr = ngraph::runtime::make_tensor<ET>(Shape{});
+
+                            (*cf)({tx, ty}, {tr});
+                            return tr->get_vector()[0];
+                        };
+                        EigenVector<ET>(call_frame, m_out) =
+                            EigenMatrix<ET>(call_frame, m_arg0).colwise().redux(f);
+                    }
+
+                protected:
+                    std::shared_ptr<ExternalFunction> m_external_function;
+                    TensorViewInfo m_arg0;
+                    TensorViewInfo m_arg1;
+                    TensorViewInfo m_out;
+                };
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/ngvm/eigen/reduce_matrix_rows.hpp
+++ b/src/ngraph/runtime/ngvm/eigen/reduce_matrix_rows.hpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+
+#pragma once
+
+#include "ngraph/runtime/external_function.hpp"
+#include "ngraph/runtime/ngvm/call_frame.hpp"
+#include "ngraph/runtime/ngvm/eigen/utils.hpp"
+#include "ngraph/runtime/ngvm/instruction.hpp"
+#include "ngraph/runtime/tensor_view.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace ngvm
+        {
+            namespace eigen
+            {
+                template <typename ET>
+                class ReduceMatrixRowsInstruction : public Instruction
+                {
+                public:
+                    ReduceMatrixRowsInstruction(std::shared_ptr<ExternalFunction> ef,
+                                                const TensorViewInfo& arg0,
+                                                const TensorViewInfo& arg1,
+                                                const TensorViewInfo& out)
+                        : m_external_function(ef)
+                        , m_arg0(arg0)
+                        , m_arg1(arg1)
+                        , m_out(out)
+                    {
+                    }
+
+                    virtual void execute(CallFrame& call_frame) const override
+                    {
+                        auto ef = m_external_function;
+                        auto f = [ef](typename ET::type x, typename ET::type y) -> typename ET::type
+                        {
+                            std::shared_ptr<CallFrame> cf =
+                                std::dynamic_pointer_cast<CallFrame>(ef->make_call_frame());
+
+                            auto tx = ngraph::runtime::make_tensor<ET>(Shape{});
+                            *tx = std::vector<typename ET::type>({x});
+                            auto ty = ngraph::runtime::make_tensor<ET>(Shape{});
+                            *ty = std::vector<typename ET::type>({y});
+                            auto tr = ngraph::runtime::make_tensor<ET>(Shape{});
+
+                            (*cf)({tx, ty}, {tr});
+                            return tr->get_vector()[0];
+                        };
+                        EigenVector<ET>(call_frame, m_out) =
+                            EigenMatrix<ET>(call_frame, m_arg0).rowwise().redux(f);
+                    }
+
+                protected:
+                    std::shared_ptr<ExternalFunction> m_external_function;
+                    TensorViewInfo m_arg0;
+                    TensorViewInfo m_arg1;
+                    TensorViewInfo m_out;
+                };
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/ngvm/eigen/reduce_to_scalar.hpp
+++ b/src/ngraph/runtime/ngvm/eigen/reduce_to_scalar.hpp
+// ----------------------------------------------------------------------------
+// Copyright 2017 Nervana Systems Inc.
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//      http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// ----------------------------------------------------------------------------
+
+#pragma once
+
+#include "ngraph/runtime/external_function.hpp"
+#include "ngraph/runtime/ngvm/call_frame.hpp"
+#include "ngraph/runtime/ngvm/eigen/utils.hpp"
+#include "ngraph/runtime/ngvm/instruction.hpp"
+#include "ngraph/runtime/tensor_view.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace ngvm
+        {
+            namespace eigen
+            {
+                template <typename ET>
+                class ReduceToScalarInstruction : public Instruction
+                {
+                public:
+                    ReduceToScalarInstruction(std::shared_ptr<ExternalFunction> ef,
+                                              const TensorViewInfo& arg0,
+                                              const TensorViewInfo& arg1,
+                                              const TensorViewInfo& out)
+                        : m_external_function(ef)
+                        , m_arg0(arg0)
+                        , m_arg1(arg1)
+                        , m_out(out)
+                    {
+                    }
+
+                    virtual void execute(CallFrame& call_frame) const override
+                    {
+                        auto ef = m_external_function;
+                        auto f = [ef](typename ET::type x, typename ET::type y) -> typename ET::type
+                        {
+                            std::shared_ptr<CallFrame> cf =
+                                std::dynamic_pointer_cast<CallFrame>(ef->make_call_frame());
+
+                            auto tx = ngraph::runtime::make_tensor<ET>(Shape{});
+                            *tx = std::vector<typename ET::type>({x});
+                            auto ty = ngraph::runtime::make_tensor<ET>(Shape{});
+                            *ty = std::vector<typename ET::type>({y});
+                            auto tr = ngraph::runtime::make_tensor<ET>(Shape{});
+
+                            (*cf)({tx, ty}, {tr});
+                            return tr->get_vector()[0];
+                        };
+                        EigenArray1d<ET>(call_frame, m_out) =
+                            EigenArray1d<ET>(call_frame, m_arg0).redux(f);
+                    }
+
+                protected:
+                    std::shared_ptr<ExternalFunction> m_external_function;
+                    TensorViewInfo m_arg0;
+                    TensorViewInfo m_arg1;
+                    TensorViewInfo m_out;
+                };
+            }
+        }
+    }
+}
--- a/src/ngraph/runtime/ngvm/external_function.cpp
+++ b/src/ngraph/runtime/ngvm/external_function.cpp
@@ -76,6 +76,9 @@
 #include "ngraph/runtime/ngvm/eigen/multiply.hpp"
 #include "ngraph/runtime/ngvm/eigen/negate.hpp"
 #include "ngraph/runtime/ngvm/eigen/not_equal.hpp"
+#include "ngraph/runtime/ngvm/eigen/reduce_matrix_columns.hpp"
+#include "ngraph/runtime/ngvm/eigen/reduce_matrix_rows.hpp"
+#include "ngraph/runtime/ngvm/eigen/reduce_to_scalar.hpp"
 #include "ngraph/runtime/ngvm/eigen/return.hpp"
 #include "ngraph/runtime/ngvm/eigen/scalar_tensor_product.hpp"
 #include "ngraph/runtime/ngvm/eigen/select.hpp"
@@ -624,7 +627,7 @@ ExternalFunction::OpMap& ExternalFunction::get_op_map()
            }
            catch (const std::out_of_range)
            {
-                external = make_shared<ExternalFunction>(function_call->get_function());
+                external = make_shared<ExternalFunction>(function);
                function_map.insert({function, external});
            }

@@ -632,7 +635,148 @@ ExternalFunction::OpMap& ExternalFunction::get_op_map()
                make_shared<eigen::CallInstruction>(external, in, out));
        };

-        REGISTER_TO_OP_MAP(op::Reduce) { throw ngraph_error("op::Reduce not implemented yet"); };
+        REGISTER_TO_OP_MAP(op::Reduce)
+        {
+            auto reduce = static_cast<const op::Reduce*>(n);
+            auto reduction_function = reduce->get_reduction_function();
+
+            std::shared_ptr<ExternalFunction> external;
+
+            try
+            {
+                external = function_map.at(reduction_function);
+            }
+            catch (const std::out_of_range)
+            {
+                external = make_shared<ExternalFunction>(reduction_function);
+                function_map.insert({reduction_function, external});
+            }
+
+            auto reductee_type = reduce->get_arguments().at(0)->get_value_type();
+            auto reductee_tensor_view_type =
+                dynamic_pointer_cast<const TensorViewType>(reductee_type);
+            assert(nullptr != reductee_tensor_view_type);
+            auto reductee_shape = reductee_tensor_view_type->get_shape();
+
+            auto f_result_type = reduction_function->get_result_type();
+            auto f_result_tensor_view_type =
+                dynamic_pointer_cast<const TensorViewType>(f_result_type);
+            assert(nullptr != f_result_tensor_view_type);
+            auto& f_result_element_type = f_result_tensor_view_type->get_element_type();
+
+            auto result_type = reduce->get_value_type();
+            auto result_tensor_view_type = dynamic_pointer_cast<const TensorViewType>(result_type);
+            assert(nullptr != result_tensor_view_type);
+            auto result_shape = result_tensor_view_type->get_shape();
+
+            auto& reduction_axes = reduce->get_reduction_axes();
+
+            // Trivial case: no reduction axes (this includes the scalar-reductee case).
+            if (reduction_axes.empty())
+            {
+                PUSH_POLYMORPHIC_INSTRUCTION(f_result_element_type,
+                                             "Reduce has unhandled element type",
+                                             runtime::ngvm::eigen::CopyInstruction,
+                                             in.at(0).get_index(),
+                                             out.at(0).get_index());
+            }
+            // Behavior for zero-size axes bears some explanation here. XLA's reduce
+            // operator provides an "base" element (usually, but not necessarily,
+            // an identity element) that it apparently *may* choose to insert anywhere
+            // in the reduction any number of times. For example, given:
+            //
+            //   reduce{{1,2,3},b,+)
+            //
+            // any of the following are valid reductions (I think!):
+            //
+            //   b+(b+1+2)+3
+            //   b+(1+(2+3))
+            //   (1+2)+3 (I think!)
+            //
+            // etc. Here we will choose never to instantiate the base element, which
+            // works well with Eigen's default behavior for non-zero-length axes. The
+            // exceptional case is when we reduce on a zero-length axis. In this case,
+            // Eigen's default behavior is to put a zero in the output,  which is not
+            // what we want, so we detect that case here and override with a copy
+            // instruction (for reduce-to-scalar) or a broadcast (for reduce-to-vector)
+            // from the base element.
+            //
+            // What I'm actually not sure about is whether the identity element is
+            // required to appear at least once. If so, this will need to be reworked,
+            // assuming we actually want to mimic XLA's semantics that closely, which
+            // we may not.
+            else if ((reductee_shape.size() == 1 && reduction_axes == AxisSet{0}) ||
+                     (reductee_shape.size() == 2 && reduction_axes == AxisSet{0, 1}))
+            {
+                if (reductee_shape.at(0) == 0 ||
+                    (reductee_shape.size() == 2 && reductee_shape.at(1) == 0))
+                {
+                    PUSH_POLYMORPHIC_INSTRUCTION(f_result_element_type,
+                                                 "Reduce has unhandled element type",
+                                                 runtime::ngvm::eigen::CopyInstruction,
+                                                 in.at(1).get_index(),
+                                                 out.at(0).get_index());
+                }
+                else
+                {
+                    PUSH_POLYMORPHIC_INSTRUCTION(f_result_element_type,
+                                                 "Reduce has unhandled element type",
+                                                 runtime::ngvm::eigen::ReduceToScalarInstruction,
+                                                 external,
+                                                 in[0],
+                                                 in[1],
+                                                 out[0]);
+                }
+            }
+            else if (reductee_shape.size() == 2 && reduction_axes == AxisSet{1})
+            {
+                if (reductee_shape.at(1) == 0)
+                {
+                    PUSH_POLYMORPHIC_INSTRUCTION(f_result_element_type,
+                                                 "Reduce has unhandled element type",
+                                                 runtime::ngvm::eigen::BroadcastScalarInstruction,
+                                                 in[1],
+                                                 out[0]);
+                }
+                else
+                {
+                    PUSH_POLYMORPHIC_INSTRUCTION(f_result_element_type,
+                                                 "Reduce has unhandled element type",
+                                                 runtime::ngvm::eigen::ReduceMatrixRowsInstruction,
+                                                 external,
+                                                 in[0],
+                                                 in[1],
+                                                 out[0]);
+                }
+            }
+            else if (reductee_shape.size() == 2 && reduction_axes == AxisSet{0})
+            {
+                if (reductee_shape.at(0) == 0)
+                {
+                    PUSH_POLYMORPHIC_INSTRUCTION(f_result_element_type,
+                                                 "Reduce has unhandled element type",
+                                                 runtime::ngvm::eigen::BroadcastScalarInstruction,
+                                                 in[1],
+                                                 out[0]);
+                }
+                else
+                {
+                    PUSH_POLYMORPHIC_INSTRUCTION(
+                        f_result_element_type,
+                        "Reduce has unhandled element type",
+                        runtime::ngvm::eigen::ReduceMatrixColumnsInstruction,
+                        external,
+                        in[0],
+                        in[1],
+                        out[0]);
+                }
+            }
+            else
+            {
+                throw ngraph_error("Reduce: only vectors and matrices are currently supported");
+            }
+        };
+
        initialized = true;
    }
    return op_map;

--- a/test/execute.cpp
+++ b/test/execute.cpp
@@ -1322,3 +1322,301 @@ TEST(execute, convert_float32_bool)
    (*cf)({a}, {result});
    ASSERT_EQ((vector<element::Bool::type>{1, 2, 3, 4}), result->get_vector());
 }
+
+// Trivial case with no reduction axes.
+TEST(execute, reduce_trivial)
+{
+    // First, the reduction function (f(x:float32[],y:float32[]) = x+y).
+    auto f_A = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_rt = make_shared<TensorViewType>(element::Float32::element_type(), Shape{});
+    auto f = make_shared<Function>(make_shared<op::Add>(f_A, f_B), f_rt, op::Parameters{f_A, f_B});
+
+    // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})).
+    auto shape = Shape{2, 2};
+    auto g_A = make_shared<op::Parameter>(element::Float32::element_type(), shape);
+    auto g_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto g_rt = make_shared<TensorViewType>(element::Float32::element_type(), shape);
+    auto g = make_shared<Function>(
+        make_shared<op::Reduce>(g_A, g_B, f, AxisSet{}), g_rt, op::Parameters{g_A, g_B});
+
+    auto manager = runtime::Manager::get("NGVM");
+    auto external = manager->compile(g);
+    auto backend = manager->allocate_backend();
+    auto cf = backend->make_call_frame(external);
+
+    // Create some tensors for input/output
+    auto a = ngraph::runtime::make_tensor<element::Float32>(shape);
+    *a = vector<float>{1, 2, 3, 4};
+    auto b = ngraph::runtime::make_tensor<element::Float32>(shape);
+    *b = vector<float>{0};
+    auto result = ngraph::runtime::make_tensor<element::Float32>(shape);
+
+    (*cf)({a, b}, {result});
+    ASSERT_EQ((vector<float>{1, 2, 3, 4}), result->get_vector());
+}
+
+TEST(execute, reduce_to_scalar)
+{
+    // First, the reduction function (f(x:float32[],y:float32[]) = x+y).
+    auto f_A = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_rt = make_shared<TensorViewType>(element::Float32::element_type(), Shape{});
+    auto f = make_shared<Function>(make_shared<op::Add>(f_A, f_B), f_rt, op::Parameters{f_A, f_B});
+
+    // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})).
+    auto shape = Shape{2, 2};
+    auto g_A = make_shared<op::Parameter>(element::Float32::element_type(), shape);
+    auto g_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto g_rt = make_shared<TensorViewType>(element::Float32::element_type(), Shape{});
+    auto g = make_shared<Function>(
+        make_shared<op::Reduce>(g_A, g_B, f, AxisSet{0, 1}), g_rt, op::Parameters{g_A, g_B});
+
+    auto manager = runtime::Manager::get("NGVM");
+    auto external = manager->compile(g);
+    auto backend = manager->allocate_backend();
+    auto cf = backend->make_call_frame(external);
+
+    // Create some tensors for input/output
+    auto a = ngraph::runtime::make_tensor<element::Float32>(shape);
+    *a = vector<float>{1, 2, 3, 4};
+    auto b = ngraph::runtime::make_tensor<element::Float32>(Shape{});
+    *b = vector<float>{0};
+    auto result = ngraph::runtime::make_tensor<element::Float32>(Shape{});
+
+    (*cf)({a, b}, {result});
+    ASSERT_EQ((vector<float>{10}), result->get_vector());
+
+    // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the
+    // input tensors, so let's do this too.
+    ASSERT_EQ((vector<float>{1, 2, 3, 4}), a->get_vector());
+    ASSERT_EQ((vector<float>{0}), b->get_vector());
+}
+
+TEST(execute, reduce_matrix_columns)
+{
+    // First, the reduction function (f(x:float32[],y:float32[]) = x+y).
+    auto f_A = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_rt = make_shared<TensorViewType>(element::Float32::element_type(), Shape{});
+    auto f = make_shared<Function>(make_shared<op::Add>(f_A, f_B), f_rt, op::Parameters{f_A, f_B});
+
+    // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})).
+    auto shape_a = Shape{3, 2};
+    auto g_A = make_shared<op::Parameter>(element::Float32::element_type(), shape_a);
+    auto g_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto shape_rt = Shape{2};
+    auto g_rt = make_shared<TensorViewType>(element::Float32::element_type(), shape_rt);
+    auto g = make_shared<Function>(
+        make_shared<op::Reduce>(g_A, g_B, f, AxisSet{0}), g_rt, op::Parameters{g_A, g_B});
+
+    auto manager = runtime::Manager::get("NGVM");
+    auto external = manager->compile(g);
+    auto backend = manager->allocate_backend();
+    auto cf = backend->make_call_frame(external);
+
+    // Create some tensors for input/output
+    auto a = ngraph::runtime::make_tensor<element::Float32>(shape_a);
+    *a = vector<float>{1, 2, 3, 4, 5, 6};
+    auto b = ngraph::runtime::make_tensor<element::Float32>(Shape{});
+    *b = vector<float>{0};
+    auto result = ngraph::runtime::make_tensor<element::Float32>(shape_rt);
+
+    (*cf)({a, b}, {result});
+    ASSERT_EQ((vector<float>{9, 12}), result->get_vector());
+
+    // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the
+    // input tensors, so let's do this too.
+    ASSERT_EQ((vector<float>{1, 2, 3, 4, 5, 6}), a->get_vector());
+    ASSERT_EQ((vector<float>{0}), b->get_vector());
+}
+
+TEST(execute, reduce_matrix_rows)
+{
+    // First, the reduction function (f(x:float32[],y:float32[]) = x+y).
+    auto f_A = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_rt = make_shared<TensorViewType>(element::Float32::element_type(), Shape{});
+    auto f = make_shared<Function>(make_shared<op::Add>(f_A, f_B), f_rt, op::Parameters{f_A, f_B});
+
+    // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})).
+    auto shape_a = Shape{3, 2};
+    auto g_A = make_shared<op::Parameter>(element::Float32::element_type(), shape_a);
+    auto g_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto shape_rt = Shape{3};
+    auto g_rt = make_shared<TensorViewType>(element::Float32::element_type(), shape_rt);
+    auto g = make_shared<Function>(
+        make_shared<op::Reduce>(g_A, g_B, f, AxisSet{1}), g_rt, op::Parameters{g_A, g_B});
+
+    auto manager = runtime::Manager::get("NGVM");
+    auto external = manager->compile(g);
+    auto backend = manager->allocate_backend();
+    auto cf = backend->make_call_frame(external);
+
+    // Create some tensors for input/output
+    auto a = ngraph::runtime::make_tensor<element::Float32>(shape_a);
+    *a = vector<float>{1, 2, 3, 4, 5, 6};
+    auto b = ngraph::runtime::make_tensor<element::Float32>(Shape{});
+    *b = vector<float>{0};
+    auto result = ngraph::runtime::make_tensor<element::Float32>(shape_rt);
+
+    (*cf)({a, b}, {result});
+    ASSERT_EQ((vector<float>{3, 7, 11}), result->get_vector());
+
+    // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the
+    // input tensors, so let's do this too.
+    ASSERT_EQ((vector<float>{1, 2, 3, 4, 5, 6}), a->get_vector());
+    ASSERT_EQ((vector<float>{0}), b->get_vector());
+}
+
+TEST(execute, reduce_matrix_rows_zero)
+{
+    // First, the reduction function (f(x:float32[],y:float32[]) = x+y).
+    auto f_A = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_rt = make_shared<TensorViewType>(element::Float32::element_type(), Shape{});
+    auto f = make_shared<Function>(make_shared<op::Add>(f_A, f_B), f_rt, op::Parameters{f_A, f_B});
+
+    // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})).
+    auto shape_a = Shape{3, 0};
+    auto g_A = make_shared<op::Parameter>(element::Float32::element_type(), shape_a);
+    auto g_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto shape_rt = Shape{3};
+    auto g_rt = make_shared<TensorViewType>(element::Float32::element_type(), shape_rt);
+    auto g = make_shared<Function>(
+        make_shared<op::Reduce>(g_A, g_B, f, AxisSet{1}), g_rt, op::Parameters{g_A, g_B});
+
+    auto manager = runtime::Manager::get("NGVM");
+    auto external = manager->compile(g);
+    auto backend = manager->allocate_backend();
+    auto cf = backend->make_call_frame(external);
+
+    // Create some tensors for input/output
+    auto a = ngraph::runtime::make_tensor<element::Float32>(shape_a);
+    *a = vector<float>{};
+    auto b = ngraph::runtime::make_tensor<element::Float32>(Shape{});
+    *b = vector<float>{66};
+    auto result = ngraph::runtime::make_tensor<element::Float32>(shape_rt);
+
+    (*cf)({a, b}, {result});
+    ASSERT_EQ((vector<float>{66, 66, 66}), result->get_vector());
+
+    // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the
+    // input tensors, so let's do this too.
+    ASSERT_EQ((vector<float>{}), a->get_vector());
+    ASSERT_EQ((vector<float>{66}), b->get_vector());
+}
+
+TEST(execute, reduce_matrix_cols_zero)
+{
+    // First, the reduction function (f(x:float32[],y:float32[]) = x+y).
+    auto f_A = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_rt = make_shared<TensorViewType>(element::Float32::element_type(), Shape{});
+    auto f = make_shared<Function>(make_shared<op::Add>(f_A, f_B), f_rt, op::Parameters{f_A, f_B});
+
+    // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})).
+    auto shape_a = Shape{0, 2};
+    auto g_A = make_shared<op::Parameter>(element::Float32::element_type(), shape_a);
+    auto g_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto shape_rt = Shape{2};
+    auto g_rt = make_shared<TensorViewType>(element::Float32::element_type(), shape_rt);
+    auto g = make_shared<Function>(
+        make_shared<op::Reduce>(g_A, g_B, f, AxisSet{0}), g_rt, op::Parameters{g_A, g_B});
+
+    auto manager = runtime::Manager::get("NGVM");
+    auto external = manager->compile(g);
+    auto backend = manager->allocate_backend();
+    auto cf = backend->make_call_frame(external);
+
+    // Create some tensors for input/output
+    auto a = ngraph::runtime::make_tensor<element::Float32>(shape_a);
+    *a = vector<float>{};
+    auto b = ngraph::runtime::make_tensor<element::Float32>(Shape{});
+    *b = vector<float>{77};
+    auto result = ngraph::runtime::make_tensor<element::Float32>(shape_rt);
+
+    (*cf)({a, b}, {result});
+    ASSERT_EQ((vector<float>{77, 77}), result->get_vector());
+
+    // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the
+    // input tensors, so let's do this too.
+    ASSERT_EQ((vector<float>{}), a->get_vector());
+    ASSERT_EQ((vector<float>{77}), b->get_vector());
+}
+
+TEST(execute, reduce_vector_zero)
+{
+    // First, the reduction function (f(x:float32[],y:float32[]) = x+y).
+    auto f_A = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_rt = make_shared<TensorViewType>(element::Float32::element_type(), Shape{});
+    auto f = make_shared<Function>(make_shared<op::Add>(f_A, f_B), f_rt, op::Parameters{f_A, f_B});
+
+    // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})).
+    auto shape_a = Shape{0};
+    auto g_A = make_shared<op::Parameter>(element::Float32::element_type(), shape_a);
+    auto g_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto shape_rt = Shape{};
+    auto g_rt = make_shared<TensorViewType>(element::Float32::element_type(), shape_rt);
+    auto g = make_shared<Function>(
+        make_shared<op::Reduce>(g_A, g_B, f, AxisSet{0}), g_rt, op::Parameters{g_A, g_B});
+
+    auto manager = runtime::Manager::get("NGVM");
+    auto external = manager->compile(g);
+    auto backend = manager->allocate_backend();
+    auto cf = backend->make_call_frame(external);
+
+    // Create some tensors for input/output
+    auto a = ngraph::runtime::make_tensor<element::Float32>(shape_a);
+    *a = vector<float>{};
+    auto b = ngraph::runtime::make_tensor<element::Float32>(Shape{});
+    *b = vector<float>{88};
+    auto result = ngraph::runtime::make_tensor<element::Float32>(shape_rt);
+
+    (*cf)({a, b}, {result});
+    ASSERT_EQ((vector<float>{88}), result->get_vector());
+
+    // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the
+    // input tensors, so let's do this too.
+    ASSERT_EQ((vector<float>{}), a->get_vector());
+    ASSERT_EQ((vector<float>{88}), b->get_vector());
+}
+
+TEST(execute, reduce_matrix_to_scalar_zero_by_zero)
+{
+    // First, the reduction function (f(x:float32[],y:float32[]) = x+y).
+    auto f_A = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto f_rt = make_shared<TensorViewType>(element::Float32::element_type(), Shape{});
+    auto f = make_shared<Function>(make_shared<op::Add>(f_A, f_B), f_rt, op::Parameters{f_A, f_B});
+
+    // Now the reduction (g(x:float32[2,2],y:float32[]) = reduce(x,y,f,axes={})).
+    auto shape_a = Shape{0, 0};
+    auto g_A = make_shared<op::Parameter>(element::Float32::element_type(), shape_a);
+    auto g_B = make_shared<op::Parameter>(element::Float32::element_type(), Shape{});
+    auto shape_rt = Shape{};
+    auto g_rt = make_shared<TensorViewType>(element::Float32::element_type(), shape_rt);
+    auto g = make_shared<Function>(
+        make_shared<op::Reduce>(g_A, g_B, f, AxisSet{0, 1}), g_rt, op::Parameters{g_A, g_B});
+
+    auto manager = runtime::Manager::get("NGVM");
+    auto external = manager->compile(g);
+    auto backend = manager->allocate_backend();
+    auto cf = backend->make_call_frame(external);
+
+    // Create some tensors for input/output
+    auto a = ngraph::runtime::make_tensor<element::Float32>(shape_a);
+    *a = vector<float>{};
+    auto b = ngraph::runtime::make_tensor<element::Float32>(Shape{});
+    *b = vector<float>{99};
+    auto result = ngraph::runtime::make_tensor<element::Float32>(shape_rt);
+
+    (*cf)({a, b}, {result});
+    ASSERT_EQ((vector<float>{99}), result->get_vector());
+
+    // For some reason I'm feeling extra paranoid about making sure reduction doesn't clobber the
+    // input tensors, so let's do this too.
+    ASSERT_EQ((vector<float>{}), a->get_vector());
+    ASSERT_EQ((vector<float>{99}), b->get_vector());
+}