CallFrame order (#702)

Adjust CallFrame argument order to match Function

CallFrame order (#702)
Adjust CallFrame argument order to match Function
12876342 · Yixing Lao · adstraw · 6b0b64b4 · 12876342 · 12876342
Commit 12876342 authored Mar 21, 2018 by Yixing Lao Committed by adstraw Mar 21, 2018
25 changed files
--- a/src/ngraph/builder/xla_tuple.cpp
+++ b/src/ngraph/builder/xla_tuple.cpp
@@ -184,10 +184,10 @@ namespace
 }
 void xla::call(shared_ptr<runtime::CallFrame> call_frame,
-               const runtime::TensorViewPtrs& inputs,
+               const runtime::TensorViewPtrs& outputs,
-               const runtime::TensorViewPtrs& outputs)
+               const runtime::TensorViewPtrs& inputs)
 {
-    runtime::TensorViewPtrs flat_inputs(flatten(inputs));
    runtime::TensorViewPtrs flat_outputs(flatten(outputs));
-    call_frame->tensor_call(flat_inputs, flat_outputs);
+    runtime::TensorViewPtrs flat_inputs(flatten(inputs));
+    call_frame->tensor_call(flat_outputs, flat_inputs);
 }
--- a/src/ngraph/builder/xla_tuple.hpp
+++ b/src/ngraph/builder/xla_tuple.hpp
@@ -100,7 +100,7 @@ namespace ngraph
        /// Invoke a call frame where some arguments might be XLATuples
        void call(std::shared_ptr<runtime::CallFrame> call_frame,
-                  const ngraph::runtime::TensorViewPtrs& inputs,
+                  const ngraph::runtime::TensorViewPtrs& outputs,
-                  const ngraph::runtime::TensorViewPtrs& outputs);
+                  const ngraph::runtime::TensorViewPtrs& inputs);
    }
 }
--- a/src/ngraph/runtime/call_frame.hpp
+++ b/src/ngraph/runtime/call_frame.hpp
@@ -54,12 +54,12 @@ namespace ngraph
            virtual ~CallFrame() {}
            /// @brief Invoke the function with values matching the signature of the function.
            ///
-            virtual void call(const std::vector<std::shared_ptr<runtime::TensorView>>& inputs,
+            virtual void call(const std::vector<std::shared_ptr<runtime::TensorView>>& outputs,
-                              const std::vector<std::shared_ptr<runtime::TensorView>>& outputs) = 0;
+                              const std::vector<std::shared_ptr<runtime::TensorView>>& inputs) = 0;
            /// @brief Invoke the function
-            virtual void tensor_call(const TensorViewPtrs& inputs,
+            virtual void tensor_call(const TensorViewPtrs& outputs,
-                                     const TensorViewPtrs& outputs) = 0;
+                                     const TensorViewPtrs& inputs) = 0;
            virtual std::vector<PerformanceCounter> get_performance_data() const
            {

--- a/src/ngraph/runtime/cpu/cpu_call_frame.cpp
+++ b/src/ngraph/runtime/cpu/cpu_call_frame.cpp
@@ -38,8 +38,8 @@ runtime::cpu::CPU_CallFrame::~CPU_CallFrame()
 }
 void runtime::cpu::CPU_CallFrame::tensor_call(
-    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& input_tvs,
+    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& output_tvs,
-    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& output_tvs)
+    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& input_tvs)
 {
    vector<void*> inputs;
    vector<void*> outputs;
@@ -72,8 +72,8 @@ void runtime::cpu::CPU_CallFrame::tensor_call(
 }
 void runtime::cpu::CPU_CallFrame::call(
-    const std::vector<std::shared_ptr<runtime::TensorView>>& arguments,
+    const std::vector<std::shared_ptr<runtime::TensorView>>& results,
-    const std::vector<std::shared_ptr<runtime::TensorView>>& results)
+    const std::vector<std::shared_ptr<runtime::TensorView>>& arguments)
 {
    // TODO: Check types of args and result
    vector<shared_ptr<runtime::TensorView>> inputs;
@@ -88,7 +88,7 @@ void runtime::cpu::CPU_CallFrame::call(
        result->collect_tensor_views(outputs, result);
    }
-    tensor_call(inputs, outputs);
+    tensor_call(outputs, inputs);
 }
 void runtime::cpu::CPU_CallFrame::propagate_layouts(

--- a/src/ngraph/runtime/cpu/cpu_call_frame.hpp
+++ b/src/ngraph/runtime/cpu/cpu_call_frame.hpp
@@ -52,14 +52,13 @@ namespace ngraph
                /// @brief Invoke the function with values matching the signature of the function.
                ///
                /// Tuples will be expanded into their tensor views to build the call frame.
-                void
+                void call(const std::vector<std::shared_ptr<runtime::TensorView>>& outputs,
-                    call(const std::vector<std::shared_ptr<runtime::TensorView>>& inputs,
+                          const std::vector<std::shared_ptr<runtime::TensorView>>& inputs) override;
-                         const std::vector<std::shared_ptr<runtime::TensorView>>& outputs) override;
                /// @brief Invoke the function with tuples pre-expanded to their underlying
                /// tensor views.
-                void tensor_call(const std::vector<std::shared_ptr<TensorView>>& inputs,
+                void tensor_call(const std::vector<std::shared_ptr<TensorView>>& outputs,
-                                 const std::vector<std::shared_ptr<TensorView>>& outputs) override;
+                                 const std::vector<std::shared_ptr<TensorView>>& inputs) override;
                void propagate_layouts(const std::vector<std::shared_ptr<runtime::TensorView>>& tvs,
                                       const LayoutDescriptorPtrs& layouts) const;

--- a/src/ngraph/runtime/gpu/gpu_call_frame.cpp
+++ b/src/ngraph/runtime/gpu/gpu_call_frame.cpp
@@ -57,8 +57,8 @@ runtime::gpu::GPU_CallFrame::~GPU_CallFrame()
 }
 void runtime::gpu::GPU_CallFrame::tensor_call(
-    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& input_tvs,
+    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& output_tvs,
-    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& output_tvs)
+    const std::vector<std::shared_ptr<ngraph::runtime::TensorView>>& input_tvs)
 {
    //Device tensors
    vector<void*> inputs;
@@ -81,8 +81,8 @@ void runtime::gpu::GPU_CallFrame::tensor_call(
 }
 void runtime::gpu::GPU_CallFrame::call(
-    const std::vector<std::shared_ptr<runtime::TensorView>>& arguments,
+    const std::vector<std::shared_ptr<runtime::TensorView>>& results,
-    const std::vector<std::shared_ptr<runtime::TensorView>>& results)
+    const std::vector<std::shared_ptr<runtime::TensorView>>& arguments)
 {
    // TODO: Check types of args and result
    vector<shared_ptr<runtime::TensorView>> inputs;
@@ -97,5 +97,5 @@ void runtime::gpu::GPU_CallFrame::call(
        result->collect_tensor_views(outputs, result);
    }
-    tensor_call(inputs, outputs);
+    tensor_call(outputs, inputs);
 }
--- a/src/ngraph/runtime/gpu/gpu_call_frame.hpp
+++ b/src/ngraph/runtime/gpu/gpu_call_frame.hpp
@@ -57,14 +57,13 @@ namespace ngraph
                /// @brief Invoke the function with values matching the signature of the function.
                ///
                /// Tuples will be expanded into their tensor views to build the call frame.
-                void
+                void call(const std::vector<std::shared_ptr<runtime::TensorView>>& outputs,
-                    call(const std::vector<std::shared_ptr<runtime::TensorView>>& inputs,
+                          const std::vector<std::shared_ptr<runtime::TensorView>>& inputs) override;
-                         const std::vector<std::shared_ptr<runtime::TensorView>>& outputs) override;
                /// @brief Invoke the function with tuples pre-expanded to their underlying
                /// tensor views.
-                void tensor_call(const std::vector<std::shared_ptr<TensorView>>& inputs,
+                void tensor_call(const std::vector<std::shared_ptr<TensorView>>& outputs,
-                                 const std::vector<std::shared_ptr<TensorView>>& outputs) override;
+                                 const std::vector<std::shared_ptr<TensorView>>& inputs) override;
            protected:
                std::shared_ptr<GPU_ExternalFunction> m_external_function;

--- a/src/ngraph/runtime/interpreter/int_call_frame.cpp
+++ b/src/ngraph/runtime/interpreter/int_call_frame.cpp
@@ -36,8 +36,8 @@ runtime::interpreter::INT_CallFrame::INT_CallFrame(shared_ptr<ExternalFunction>
 void runtime::interpreter::INT_CallFrame::call(
    std::shared_ptr<Function> function,
-    const vector<shared_ptr<runtime::HostTensorView>>& input_tvs,
+    const vector<shared_ptr<runtime::HostTensorView>>& output_tvs,
-    const vector<shared_ptr<runtime::HostTensorView>>& output_tvs)
+    const vector<shared_ptr<runtime::HostTensorView>>& input_tvs)
 {
    if (m_nan_check)
    {
@@ -211,15 +211,15 @@ void runtime::interpreter::INT_CallFrame::generate_calls(
 }
 void runtime::interpreter::INT_CallFrame::tensor_call(
-    const vector<shared_ptr<runtime::HostTensorView>>& input_tvs,
+    const vector<shared_ptr<runtime::HostTensorView>>& output_tvs,
-    const vector<shared_ptr<runtime::HostTensorView>>& output_tvs)
+    const vector<shared_ptr<runtime::HostTensorView>>& input_tvs)
 {
-    call(m_function, input_tvs, output_tvs);
+    call(m_function, output_tvs, input_tvs);
 }
 void runtime::interpreter::INT_CallFrame::tensor_call(
-    const vector<shared_ptr<runtime::TensorView>>& input_tvs,
+    const vector<shared_ptr<runtime::TensorView>>& output_tvs,
-    const vector<shared_ptr<runtime::TensorView>>& output_tvs)
+    const vector<shared_ptr<runtime::TensorView>>& input_tvs)
 {
    vector<shared_ptr<runtime::HostTensorView>> args;
    vector<shared_ptr<runtime::HostTensorView>> out;
@@ -231,12 +231,12 @@ void runtime::interpreter::INT_CallFrame::tensor_call(
    {
        out.push_back(static_pointer_cast<runtime::HostTensorView>(tv));
    }
-    tensor_call(args, out);
+    tensor_call(out, args);
 }
 void runtime::interpreter::INT_CallFrame::call(
-    const vector<shared_ptr<runtime::TensorView>>& arguments,
+    const vector<shared_ptr<runtime::TensorView>>& results,
-    const vector<shared_ptr<runtime::TensorView>>& results)
+    const vector<shared_ptr<runtime::TensorView>>& arguments)
 {
    vector<shared_ptr<runtime::TensorView>> inputs;
    for (shared_ptr<runtime::TensorView> argument : arguments)
@@ -250,7 +250,7 @@ void runtime::interpreter::INT_CallFrame::call(
        result->collect_tensor_views(outputs, result);
    }
-    tensor_call(inputs, outputs);
+    tensor_call(outputs, inputs);
 }
 vector<runtime::PerformanceCounter>

--- a/src/ngraph/runtime/interpreter/int_call_frame.hpp
+++ b/src/ngraph/runtime/interpreter/int_call_frame.hpp
@@ -135,8 +135,8 @@ public:
    /// @brief Invoke the function with values matching the signature of the function.
    ///
    /// Tuples will be expanded into their tensor views to build the call frame.
-    void call(const std::vector<std::shared_ptr<runtime::TensorView>>& inputs,
+    void call(const std::vector<std::shared_ptr<runtime::TensorView>>& outputs,
-              const std::vector<std::shared_ptr<runtime::TensorView>>& outputs) override;
+              const std::vector<std::shared_ptr<runtime::TensorView>>& inputs) override;
    std::vector<runtime::PerformanceCounter> get_performance_data() const override;
    void set_nan_check(bool);
@@ -144,13 +144,13 @@ public:
 private:
    /// @brief Invoke the function with tuples pre-expanded to their underlying
    /// tensor views.
-    void tensor_call(const std::vector<std::shared_ptr<TensorView>>& inputs,
+    void tensor_call(const std::vector<std::shared_ptr<TensorView>>& outputs,
-                     const std::vector<std::shared_ptr<TensorView>>& outputs) override;
+                     const std::vector<std::shared_ptr<TensorView>>& inputs) override;
-    void tensor_call(const std::vector<std::shared_ptr<HostTensorView>>& inputs,
+    void tensor_call(const std::vector<std::shared_ptr<HostTensorView>>& outputs,
-                     const std::vector<std::shared_ptr<HostTensorView>>& outputs);
+                     const std::vector<std::shared_ptr<HostTensorView>>& inputs);
    void call(std::shared_ptr<Function> function,
-              const std::vector<std::shared_ptr<runtime::HostTensorView>>& input_tvs,
+              const std::vector<std::shared_ptr<runtime::HostTensorView>>& output_tvs,
-              const std::vector<std::shared_ptr<runtime::HostTensorView>>& output_tvs);
+              const std::vector<std::shared_ptr<runtime::HostTensorView>>& input_tvs);
    static void perform_nan_check(const std::vector<std::shared_ptr<HostTensorView>>&,
                                  const Node* op = nullptr);
@@ -466,7 +466,7 @@ private:
        else if (node_op == "FunctionCall")
        {
            std::shared_ptr<Function> function = node.get_functions()[0];
-            call(function, args, out);
+            call(function, out, args);
        }
        else if (node_op == "Greater")
        {
@@ -643,7 +643,7 @@ private:
                    node.get_output_element_type(0), Shape{}, "reduce_temp_r");
                *(reinterpret_cast<T*>(tx->get_data_ptr())) = x;
                *(reinterpret_cast<T*>(ty->get_data_ptr())) = y;
-                call(reduction_function, {tx, ty}, {tr});
+                call(reduction_function, {tr}, {tx, ty});
                return *(reinterpret_cast<T*>(tr->get_data_ptr()));
            };
@@ -671,7 +671,7 @@ private:
                    node.get_output_element_type(0), Shape{}, "reduce_window_temp_r");
                *(reinterpret_cast<T*>(tx->get_data_ptr())) = x;
                *(reinterpret_cast<T*>(ty->get_data_ptr())) = y;
-                call(reduction_function, {tx, ty}, {tr});
+                call(reduction_function, {tr}, {tx, ty});
                return *(reinterpret_cast<T*>(tr->get_data_ptr()));
            };
@@ -763,7 +763,7 @@ private:
                    element::boolean, Shape{}, "selection_temp_r");
                *(reinterpret_cast<T*>(tx->get_data_ptr())) = x;
                *(reinterpret_cast<T*>(ty->get_data_ptr())) = y;
-                call(selection_function, {tx, ty}, {tr});
+                call(selection_function, {tr}, {tx, ty});
                return *(reinterpret_cast<char*>(tr->get_data_ptr()));
            };
@@ -778,7 +778,7 @@ private:
                    node.get_output_element_type(0), Shape{}, "scatter_temp_r");
                *(reinterpret_cast<T*>(tx->get_data_ptr())) = x;
                *(reinterpret_cast<T*>(ty->get_data_ptr())) = y;
-                call(scatter_function, {tx, ty}, {tr});
+                call(scatter_function, {tr}, {tx, ty});
                return *(reinterpret_cast<T*>(tr->get_data_ptr()));
            };

--- a/test/autodiff.in.cpp
+++ b/test/autodiff.in.cpp
@@ -74,7 +74,7 @@ TEST(${BACKEND_NAME}, backwards_maxpool_n4_c1_hw4_2x2_max)
    auto df = autodiff::backprop_function(f);
    auto external = manager->compile(df);
    auto cf = backend->make_call_frame(external);
-    cf->tensor_call({input, ep}, {output});
+    cf->tensor_call({output}, {input, ep});
    ASSERT_TRUE(read_vector<int>(output) == expected);
 }
@@ -119,7 +119,7 @@ TEST(${BACKEND_NAME}, backwards_maxpool_n2_c1_hw5_3x3_str2_max)
    auto df = autodiff::backprop_function(f);
    auto external = manager->compile(df);
    auto cf = backend->make_call_frame(external);
-    cf->tensor_call({input, ep}, {output});
+    cf->tensor_call({output}, {input, ep});
    ASSERT_TRUE(read_vector<int>(output) == expected);
 }
@@ -162,7 +162,7 @@ TEST(${BACKEND_NAME}, backwards_avgpool_n1_c1_hw2x2)
    auto df = autodiff::backprop_function(f);
    auto external = manager->compile(df);
    auto cf = backend->make_call_frame(external);
-    cf->tensor_call({input, ep}, {output});
+    cf->tensor_call({output}, {input, ep});
    ASSERT_TRUE(read_vector<int>(output) == dataEp);
 }
@@ -202,7 +202,7 @@ TEST(${BACKEND_NAME}, backwards_avgpool_n1_c1_hw4x4)
    auto df = autodiff::backprop_function(f);
    auto external = manager->compile(df);
    auto cf = backend->make_call_frame(external);
-    cf->tensor_call({input, ep}, {output});
+    cf->tensor_call({output}, {input, ep});
    ASSERT_TRUE(read_vector<int>(output) == expected);
 }
@@ -308,7 +308,7 @@ TEST(${BACKEND_NAME}, backwards_avgpool_n2_c2_hw4x4)
    auto df = autodiff::backprop_function(f);
    auto external = manager->compile(df);
    auto cf = backend->make_call_frame(external);
-    cf->tensor_call({input, ep}, {output});
+    cf->tensor_call({output}, {input, ep});
    ASSERT_TRUE(read_vector<int>(output) == expected);
 }
@@ -1633,7 +1633,7 @@ TEST(${BACKEND_NAME}, backwards_maxpool_n4c1h4w4_kh2kw2_sh1sw1)
    auto df = autodiff::backprop_function(f);
    auto external = manager->compile(df);
    auto cf = backend->make_call_frame(external);
-    cf->tensor_call({input, ep}, {output});
+    cf->tensor_call({output}, {input, ep});
    ASSERT_TRUE(read_vector<float>(output) == expected);
 }
@@ -1677,6 +1677,6 @@ TEST(${BACKEND_NAME}, backwards_maxpool_n2c1h5w5_kh3kw3_sh2sw2)
    auto df = autodiff::backprop_function(f);
    auto external = manager->compile(df);
    auto cf = backend->make_call_frame(external);
-    cf->tensor_call({input, ep}, {output});
+    cf->tensor_call({output}, {input, ep});
    ASSERT_TRUE(read_vector<float>(output) == expected);
 }
--- a/test/backend_debug_api.cpp
+++ b/test/backend_debug_api.cpp
@@ -51,7 +51,7 @@ TEST(INTERPRETER, nan_check_input)
    auto result = backend->make_primary_tensor_view(element::f32, shape);
    icf->set_nan_check(true);
-    EXPECT_ANY_THROW(icf->call({a, b}, {result}));
+    EXPECT_ANY_THROW(icf->call({result}, {a, b}));
 }
 TEST(INTERPRETER, nan_check_output)
@@ -77,5 +77,5 @@ TEST(INTERPRETER, nan_check_output)
    auto result = backend->make_primary_tensor_view(element::f32, shape);
    icf->set_nan_check(true);
-    EXPECT_ANY_THROW(icf->call({a, b}, {result}));
+    EXPECT_ANY_THROW(icf->call({result}, {a, b}));
 }
--- a/test/backend_performance.cpp
+++ b/test/backend_performance.cpp
@@ -162,7 +162,7 @@ TEST(benchmark, concat_32x1x200_axis1_6)
        result_tvs.push_back(result_tv);
        std::function<void()> cb = [input_vals, result_tv, cf]() {
-            cf->call(input_vals, {result_tv});
+            cf->call({result_tv}, input_vals);
        };
        test_callbacks.push_back(cb);

--- a/test/backend_test.in.cpp
+++ b/test/backend_test.in.cpp
--- a/test/builder.cpp
+++ b/test/builder.cpp
@@ -37,7 +37,7 @@ shared_ptr<runtime::TensorView>
    auto a = backend->make_primary_tensor_view(element::f32, shape_a);
    copy_data(a, vector<float>{1, 2, 3, 4, 5, 6});
    auto result = backend->make_primary_tensor_view(element::f32, shape_rt);
-    cf->call({a}, {result});
+    cf->call({result}, {a});
    return result;
 }
@@ -57,7 +57,7 @@ shared_ptr<runtime::TensorView> make_reduce_result_true(
    auto a = backend->make_primary_tensor_view(element::f32, shape_a);
    copy_data(a, vector<float>{1, 2, 3, 4, 5, 6});
    auto result = backend->make_primary_tensor_view(element::f32, shape_rt);
-    cf->call({a}, {result});
+    cf->call({result}, {a});
    return result;
 }
@@ -77,7 +77,7 @@ shared_ptr<runtime::TensorView> make_reduce_result_false(
    auto a = backend->make_primary_tensor_view(element::f32, shape_a);
    copy_data(a, vector<float>{1, 2, 3, 4, 5, 6});
    auto result = backend->make_primary_tensor_view(element::f32, shape_rt);
-    cf->call({a}, {result});
+    cf->call({result}, {a});
    return result;
 }

--- a/test/builder_xla.cpp
+++ b/test/builder_xla.cpp
@@ -59,13 +59,13 @@ TEST(builder_xla, simple)
    auto result = backend->make_primary_tensor_view(element::f32, shape);
    auto result_tuple = xla::make_tuple({result});
-    xla::call(cf, {abc}, {result_tuple});
+    xla::call(cf, {result_tuple}, {abc});
    EXPECT_EQ((vector<float>{54, 80, 110, 144}), read_vector<float>(result));
-    xla::call(cf, {bac}, {result_tuple});
+    xla::call(cf, {result_tuple}, {bac});
    EXPECT_EQ((vector<float>{54, 80, 110, 144}), read_vector<float>(result));
-    xla::call(cf, {acb}, {result_tuple});
+    xla::call(cf, {result_tuple}, {acb});
    EXPECT_EQ((vector<float>{50, 72, 98, 128}), read_vector<float>(result));
 }

--- a/test/convolution_test.in.cpp
+++ b/test/convolution_test.in.cpp
--- a/test/cpu_fusion.cpp
+++ b/test/cpu_fusion.cpp
@@ -133,7 +133,7 @@ TEST(cpu_fusion, gemm_cpu_broadcast_row)
    copy_data(a, dataA);
    copy_data(b, dataB);
-    cf->call({a, b}, {result});
+    cf->call({result}, {a, b});
    vector<float> expected{11, 30, 38, 111};
    EXPECT_EQ(read_vector<float>(result), expected);
 }
@@ -168,7 +168,7 @@ TEST(cpu_fusion, gemm_cpu_broadcast_column)
    copy_data(a, dataA);
    copy_data(b, dataB);
-    cf->call({a, b}, {result});
+    cf->call({result}, {a, b});
    vector<float> expected{11, 29, 39, 111};
    EXPECT_EQ(read_vector<float>(result), expected);
 }
@@ -207,7 +207,7 @@ TEST(cpu_fusion, gemm_cpu_broadcast_matrix)
    copy_data(a, dataA);
    copy_data(b, dataB);
-    cf->call({a, b}, {result});
+    cf->call({result}, {a, b});
    vector<float> expected{10, 28, 37, 109};
    ASSERT_TRUE(read_vector<float>(result) == expected);
 }
@@ -243,7 +243,7 @@ TEST(cpu_fusion, gemm_cpu_no_bias)
    copy_data(a, dataA);
    copy_data(b, dataB);
-    cf->call({a, b}, {result});
+    cf->call({result}, {a, b});
    vector<float> expected{9, 27, 36, 108};
    ASSERT_TRUE(read_vector<float>(result) == expected);
 }
@@ -384,7 +384,7 @@ TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2)
    vector<float> expected_mean{0.602912f, 0.599727f};
    vector<float> expected_variance{0.00472505f, 0.0361782f};
-    cf->call({_input, _gamma, _beta}, {bn_output, result_mean, result_variance});
+    cf->call({bn_output, result_mean, result_variance}, {_input, _gamma, _beta});
    EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(bn_output)));
    EXPECT_TRUE(test::all_close(expected_mean, read_vector<float>(result_mean)));
@@ -439,7 +439,7 @@ TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1)
        -0.30327f, 1.1561f, -0.0963782f, -0.434702f, -1.4011f, 0.548275f, -1.06187f, 1.59295f};
    vector<float> expected_mean{0.583388f, 0.619252f};
    vector<float> expected_variance{0.0119972f, 0.0282681f};
-    cf->call({_input, _gamma, _beta}, {bn_output, result_mean, result_variance});
+    cf->call({bn_output, result_mean, result_variance}, {_input, _gamma, _beta});
    EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(bn_output)));
    EXPECT_TRUE(test::all_close(expected_mean, read_vector<float>(result_mean)));
@@ -558,7 +558,7 @@ TEST(cpu_fusion, bn_bprop_n4c3h2w2)
    shared_ptr<runtime::TensorView> _dbeta =
        backend->make_primary_tensor_view(element::f32, beta_shape);
-    cf->call({_mean, _var, _input, _gamma, _beta, _delta}, {_dinput, _dgamma, _dbeta});
+    cf->call({_dinput, _dgamma, _dbeta}, {_mean, _var, _input, _gamma, _beta, _delta});
    vector<float> expected_input{
        8.17051607e-06f,  4.77576657e-06f,  1.02257760e-05f,  1.20387525e-06f,  -1.73868522e-06f,
@@ -812,8 +812,8 @@ TEST(cpu_fusion, conv_bias_fprop_n1c1h3w3)
    auto external = manager->compile(f);
    auto cf = backend->make_call_frame(external);
-    cf->call({conv_test.data_val, conv_test.weights_val, conv_test.bias_val},
+    cf->call({conv_test.result_val},
-             {conv_test.result_val});
+             {conv_test.data_val, conv_test.weights_val, conv_test.bias_val});
    auto result_vec = read_vector<float>(conv_test.result_val);
    EXPECT_TRUE(
@@ -845,8 +845,8 @@ TEST(cpu_fusion, conv_bias_bprop_n1c1h3w3)
    auto external = manager->compile(df);
    auto cf = backend->make_call_frame(external);
-    cf->call({conv_test.data_val, conv_test.weights_val, conv_test.bias_val, conv_test.delta_val},
+    cf->call({conv_test.d_data_val, conv_test.d_weights_val, conv_test.d_bias_val},
-             {conv_test.d_data_val, conv_test.d_weights_val, conv_test.d_bias_val});
+             {conv_test.data_val, conv_test.weights_val, conv_test.bias_val, conv_test.delta_val});
    EXPECT_TRUE(
        test::all_close(conv_test.expected_d_data_val, read_vector<float>(conv_test.d_data_val)));
@@ -888,7 +888,7 @@ TEST(cpu_fusion, sigmoid_n1c1h2w2)
    vector<float> dataA{1.0f, 4.0f, 1.0f, 4.0f};
    copy_data(a, dataA);
-    cf->call({a}, {result});
+    cf->call({result}, {a});
    vector<float> expected{0.73105858f, 0.98201379f, 0.73105858f, 0.98201379f};
    ASSERT_TRUE(read_vector<float>(result) == expected);
 }
@@ -912,7 +912,7 @@ TEST(cpu_fusion, sigmoid_n1c1h4)
    vector<float> dataA{1.0f, 4.0f, 1.0f, 4.0f};
    copy_data(a, dataA);
-    cf->call({a}, {result});
+    cf->call({result}, {a});
    vector<float> expected{0.73105858f, 0.98201379f, 0.73105858f, 0.98201379f};
    ASSERT_TRUE(read_vector<float>(result) == expected);
 }
@@ -955,7 +955,7 @@ TEST(cpu_fusion, sigmoid_bprop_n1c1h4)
    copy_data(a, dataA);
    copy_data(b, dataB);
-    cf->call({a, b}, {result});
+    cf->call({result}, {a, b});
    vector<float> expected{0.196612f, 0.0176627f, 0.196612f, 0.0176627f};
    EXPECT_TRUE(test::all_close(expected, read_vector<float>(result)));

--- a/test/distributed.cpp
+++ b/test/distributed.cpp
@@ -53,6 +53,6 @@ TEST(distributed_${BACKEND_NAME}, allreduce)
    std::transform(
        v.begin(), v.end(), v.begin(), std::bind1st(std::multiplies<float>(), comm_size));
-    cf->call({a}, {result});
+    cf->call({result}, {a});
    EXPECT_EQ(v, read_vector<float>(result));
 }
--- a/test/graph_partition.cpp
+++ b/test/graph_partition.cpp
@@ -72,8 +72,8 @@ public:
    {
    }
-    void call(const vector<shared_ptr<runtime::TensorView>>& inputs,
+    void call(const vector<shared_ptr<runtime::TensorView>>& outputs,
-              const vector<shared_ptr<runtime::TensorView>>& outputs)
+              const vector<shared_ptr<runtime::TensorView>>& inputs)
    {
        // Every parameter and result node in every sub_function maps to one TensorView
        unordered_map<shared_ptr<Node>, shared_ptr<runtime::TensorView>> map_node_to_tensor_view;
@@ -142,7 +142,7 @@ public:
            }
            // Call
-            call_frame->call(parameter_tvs, result_tvs);
+            call_frame->call(result_tvs, parameter_tvs);
        }
    }
@@ -336,7 +336,7 @@ TEST(graph_partition, hybrid_abc_manual)
    auto f0 = make_shared<Function>(ResultVector{R0, R1}, op::ParameterVector{A, B, C});
    auto f0_external = int_manager->compile(f0);
    auto f0_call_frame = int_backend->make_call_frame(f0_external);
-    f0_call_frame->call({a, b, c}, {r0, r1});
+    f0_call_frame->call({r0, r1}, {a, b, c});
    // f1 on CPU
    auto p0 = cpu_backend->make_primary_tensor_view(element::f32, shape);
@@ -348,7 +348,7 @@ TEST(graph_partition, hybrid_abc_manual)
    auto f1 = make_shared<Function>(ResultVector{R2}, op::ParameterVector{P0, P1});
    auto f1_external = cpu_manager->compile(f1);
    auto f1_call_frame = cpu_backend->make_call_frame(f1_external);
-    f1_call_frame->call({p0, p1}, {r2});
+    f1_call_frame->call({r2}, {p0, p1});
    // f2 on INT
    auto p2 = int_backend->make_primary_tensor_view(element::f32, shape);
@@ -358,7 +358,7 @@ TEST(graph_partition, hybrid_abc_manual)
    auto f2 = make_shared<Function>(ResultVector{R}, op::ParameterVector{P2});
    auto f2_external = int_manager->compile(f2);
    auto f2_call_frame = int_backend->make_call_frame(f2_external);
-    f2_call_frame->call({p2}, {r});
+    f2_call_frame->call({r}, {p2});
    // Check final result on INT
    EXPECT_EQ(read_vector<float>(r),
@@ -410,7 +410,7 @@ TEST(graph_partition, hybrid_abc)
    copy_data(b, test::NDArray<float, 2>({{5, 6}, {7, 8}}).get_vector());
    copy_data(c, test::NDArray<float, 2>({{9, 10}, {11, 12}}).get_vector());
-    cf->call({a, b, c}, {r});
+    cf->call({r}, {a, b, c});
    EXPECT_EQ(read_vector<float>(r),
              (test::NDArray<float, 2>({{54, 80}, {110, 144}})).get_vector());
 }
@@ -453,7 +453,7 @@ TEST(graph_partition, hybrid_abcd)
    copy_data(c, test::NDArray<float, 2>({{9, 10}, {11, 12}}).get_vector());
    copy_data(d, test::NDArray<float, 2>({{13, 14}, {15, 16}}).get_vector());
-    cf->call({a, b, c, d}, {r});
+    cf->call({r}, {a, b, c, d});
    EXPECT_EQ(read_vector<float>(r), (test::NDArray<float, 2>({{32, 48}, {68, 92}})).get_vector());
 }
@@ -491,7 +491,7 @@ TEST(graph_partition, hybrid_back_and_forth)
    copy_data(b, test::NDArray<float, 2>({{5, 6}, {7, 8}}).get_vector());
    copy_data(c, test::NDArray<float, 2>({{9, 10}, {11, 12}}).get_vector());
-    cf->call({a, b, c}, {r});
+    cf->call({r}, {a, b, c});
    EXPECT_EQ(read_vector<float>(r),
              (test::NDArray<float, 2>({{90, 180}, {308, 480}})).get_vector());
 }
@@ -532,7 +532,7 @@ TEST(graph_partition, hybrid_multi_middle_nodes)
    copy_data(b, test::NDArray<float, 2>({{5, 6}, {7, 8}}).get_vector());
    copy_data(c, test::NDArray<float, 2>({{9, 10}, {11, 12}}).get_vector());
-    cf->call({a, b, c}, {r});
+    cf->call({r}, {a, b, c});
    EXPECT_EQ(read_vector<float>(r),
              (test::NDArray<float, 2>({{210, 288}, {378, 480}})).get_vector());
 }
@@ -562,6 +562,6 @@ TEST(graph_partition, hybrid_no_split)
    copy_data(a, test::NDArray<float, 2>({{1, 2}, {3, 4}}).get_vector());
    copy_data(b, test::NDArray<float, 2>({{5, 6}, {7, 8}}).get_vector());
-    cf->call({a, b}, {c});
+    cf->call({c}, {a, b});
    EXPECT_EQ(read_vector<float>(c), (test::NDArray<float, 2>({{6, 8}, {10, 12}})).get_vector());
 }
--- a/test/ref_generators/generate_convolution_ref.py
+++ b/test/ref_generators/generate_convolution_ref.py
@@ -241,7 +241,7 @@ TEST (${BACKEND_NAME}, %s)
    vector<float> expected_result{%s};
-    cf->call({a, b}, {result});
+    cf->call({result}, {a, b});
    EXPECT_TRUE(all_close<float>(vector<float>{expected_result}, read_vector<float>(result)));
    // only test backprop for certain cases as it takes significant compute resources
    if(%s) {

--- a/test/ref_generators/test.cpp
+++ b/test/ref_generators/test.cpp
--- a/test/serialize.cpp
+++ b/test/serialize.cpp
@@ -96,13 +96,13 @@ TEST(serialize, main)
    copy_data(z, vector<float>{9, 10, 11, 12});
    auto result = backend->make_primary_tensor_view(element::f32, shape);
-    cf->call({x, y, z}, {result});
+    cf->call({result}, {x, y, z});
    EXPECT_EQ((vector<float>{216, 320, 440, 576}), read_vector<float>(result));
-    cf->call({y, x, z}, {result});
+    cf->call({result}, {y, x, z});
    EXPECT_EQ((vector<float>{216, 320, 440, 576}), read_vector<float>(result));
-    cf->call({x, z, y}, {result});
+    cf->call({result}, {x, z, y});
    EXPECT_EQ((vector<float>{200, 288, 392, 512}), read_vector<float>(result));
 }

--- a/test/util/autodiff/backprop_derivative.hpp
+++ b/test/util/autodiff/backprop_derivative.hpp
@@ -99,7 +99,7 @@ namespace ngraph
                write_vector(c_arg, c_vec);
                // call modified df/dX* = f'(c, cached)
-                cf->tensor_call(df_input_args, df_output_args);
+                cf->tensor_call(df_output_args, df_input_args);
                // reset the adjoint element
                c_vec[i] = 0;
@@ -192,7 +192,7 @@ namespace ngraph
            auto clone_fwd = clone_function(fprop_cache.fprop, nm1);
            auto cache_fwd = manager->compile(clone_fwd);
            auto cache_fwd_cf = backend->make_call_frame(cache_fwd);
-            cache_fwd_cf->tensor_call(f_input_args, mod_f_output_args);
+            cache_fwd_cf->tensor_call(mod_f_output_args, f_input_args);
            // call modfied f'(c, cached) to get df/dX*
            NodeMap nm2;

--- a/test/util/autodiff/numeric_derivative.hpp
+++ b/test/util/autodiff/numeric_derivative.hpp
@@ -63,7 +63,7 @@ namespace ngraph
            // ref_y is the function evaluated at the args
            auto ref_y = backend->make_primary_tensor_view<T>(y_shape);
-            cf->tensor_call(args, std::vector<std::shared_ptr<ngraph::runtime::TensorView>>{ref_y});
+            cf->tensor_call(std::vector<std::shared_ptr<ngraph::runtime::TensorView>>{ref_y}, args);
            auto ref_vec = read_vector<T>(ref_y);
            // inc_y will hold f(x+dx) values
@@ -88,7 +88,7 @@ namespace ngraph
                        auto old_val = vec[j];
                        vec[j] += delta;
                        write_vector(arg, vec);
-                        cf->tensor_call(args, {inc_y});
+                        cf->tensor_call({inc_y}, args);
                        auto inc_vec = read_vector<T>(inc_y);
                        vec[j] = old_val;
                        write_vector(arg, vec);

--- a/test/util/benchmark.cpp
+++ b/test/util/benchmark.cpp
@@ -158,7 +158,7 @@ void run_benchmark(shared_ptr<Function> f,
    t1.start();
    for (size_t i = 0; i < static_cast<size_t>(iterations); i++)
    {
-        cf->tensor_call(args, results);
+        cf->tensor_call(results, args);
    }
    t1.stop();
    float time = t1.get_milliseconds();