RNN fusion across layers (#1085)

* - Added graph pass for fusing RNN op across layer - Added test case for inter v/s cpu for verifying layer fused RNN - more sanity checks in the RNN fusion graph pass - added support to replace the recurrent cell state correctly in the fused RNN op * Fixed multi layer rnn fusion unit test failure * Addressed PR comments

RNN fusion across layers (#1085)
* - Added graph pass for fusing RNN op across layer - Added test case for inter v/s cpu for verifying layer fused RNN - more sanity checks in the RNN fusion graph pass - added support to replace the recurrent cell state correctly in the fused RNN op * Fixed multi layer rnn fusion unit test failure * Addressed PR comments
f75b8006 · Pruthvi · Adam Procter · 7c8e9250 · f75b8006 · f75b8006
Commit f75b8006 authored Jun 15, 2018 by Pruthvi Committed by Adam Procter Jun 15, 2018
5 changed files
--- a/src/ngraph/runtime/cpu/cpu_external_function.cpp
+++ b/src/ngraph/runtime/cpu/cpu_external_function.cpp
@@ -344,9 +344,10 @@ void runtime::cpu::CPU_ExternalFunction::compile()
    pass_manager.register_pass<ngraph::pass::NopElimination>();
    pass_manager.register_pass<runtime::cpu::pass::LSTMFusion>();
    pass_manager.register_pass<runtime::cpu::pass::RNNFusion>();
-    pass_manager.register_pass<runtime::cpu::pass::CPUBatchFusion>();
-    pass_manager.register_pass<runtime::cpu::pass::ConcatInputs>();
    pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>();
+    pass_manager.register_pass<runtime::cpu::pass::MultiLayerRNNFusion>();
+    pass_manager.register_pass<runtime::cpu::pass::ConcatInputs>();
+    pass_manager.register_pass<runtime::cpu::pass::CPUBatchFusion>();
    pass_manager.register_pass<ngraph::pass::CommonSubexpressionElimination>();
    pass_manager.register_pass<ngraph::pass::CoreFusion>();
    pass_manager.register_pass<runtime::cpu::pass::CPUFusion>();

--- a/src/ngraph/runtime/cpu/pass/cpu_rnn_fusion.cpp
+++ b/src/ngraph/runtime/cpu/pass/cpu_rnn_fusion.cpp
@@ -242,10 +242,12 @@ void ngraph::runtime::cpu::pass::LSTMFusion::construct_lstm_fprop()
        auto ht_output = std::make_shared<op::GetOutputElement>(lstm, 0);
        auto ct_output = std::make_shared<op::GetOutputElement>(lstm, 1);
+        if (lstm->get_outputs().at(0).get_inputs().size() != 2)
+        {
+            throw ngraph_error("Lstm node doesnt have two outputs");
+        }
        // Now identify the nodes which consumes the output of LSTM nodes
        // and replace them accordingly
-        std::vector<std::shared_ptr<Node>> new_args;
        // find the user's for {ht|ct} and replace them with lstm_goe_1
        for (auto node : pattern_map[ct_label]->get_users())
        {
@@ -280,8 +282,15 @@ static std::shared_ptr<ngraph::Node>
    if (concat_all)
    {
        auto node_labels = m.get_bound_nodes_for_pattern(rnn_labels[0]);
-        std::reverse(node_labels.begin(), node_labels.end());
+        if (node_labels.size() > 1)
-        return std::make_shared<op::Concat>(node_labels, 0);
+        {
+            std::reverse(node_labels.begin(), node_labels.end());
+            return std::make_shared<op::Concat>(node_labels, 0);
+        }
+        else
+        {
+            return node_labels[0];
+        }
    }
    // src_iter -> concatenate ht_1|ct_1 of the first LSTM cells belonging to same RNN layer
@@ -437,7 +446,15 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
        NGRAPH_DEBUG << "batch_size: " << batch_size;
        NGRAPH_DEBUG << "feature_size: " << feature_size;
-        if ((src_layer->get_arguments().size()) != sequence_len)
+        if ((src_layer->get_arguments().size()) != sequence_len &&
+            !std::dynamic_pointer_cast<op::Parameter>(src_layer))
+        {
+            throw ngraph_error(
+                "number of lstm inputs captured in the RNN fusion is not equal to "
+                "src_sequence_length");
+        }
+        if (std::dynamic_pointer_cast<op::Parameter>(src_layer) && sequence_len != 1)
        {
            throw ngraph_error(
                "number of lstm inputs captured in the RNN fusion is not equal to "
@@ -491,7 +508,7 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
        std::vector<std::shared_ptr<op::Slice>> ht_slice_per_timestep(num_of_lstm_matched, nullptr);
        auto rnn_ht_out = std::make_shared<op::GetOutputElement>(rnn, 0);
-        auto rnn_ct_out = std::make_shared<op::GetOutputElement>(rnn, 1);
+        auto rnn_ht_ct_out = std::make_shared<op::GetOutputElement>(rnn, 1);
        //slice the rnn ht's
        size_t start_index = 0;
@@ -547,24 +564,43 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
                if (goe_node->get_n() == 0)
                {
                    goe_0 = goes->get_node();
+                    for (auto goe0_user : goe_0->get_users())
+                    {
+                        if (std::find(lstm_nodes.begin(), lstm_nodes.end(), goe0_user) ==
+                                lstm_nodes.end() &&
+                            !is_unreachable(goe0_user))
+                        {
+                            lstm_goe0_user.insert(goe0_user);
+                            map_goe_to_lstm_slices[goe_0] = ht_slice_per_timestep[index];
+                            NGRAPH_DEBUG << "ht_slice: " << ht_slice_per_timestep[index]->get_name()
+                                         << " goe0_user " << goe0_user->get_name() << " ";
+                        }
+                    }
                }
-            }
+                // we need to only check the last LSTM cell Ct user and replace if needed.
+                if ((index == 0) && (goe_node->get_n() == 1))
-            for (auto goe0_user : goe_0->get_users())
-            {
-                if (std::find(lstm_nodes.begin(), lstm_nodes.end(), goe0_user) ==
-                        lstm_nodes.end() &&
-                    !is_unreachable(goe0_user))
                {
-                    lstm_goe0_user.insert(goe0_user);
+                    // dst_iter of lstm mkldnn output holds the results of both recurrent state
-                    map_goe_to_lstm_slices[goe_0] = ht_slice_per_timestep[index];
+                    // tensor outputs. we need to slice the ct.
-                    NGRAPH_DEBUG << "ht_slice: " << ht_slice_per_timestep[index]->get_name()
+                    auto ht_slice = std::make_shared<op::Slice>(
-                                 << " goe0_user " << goe0_user->get_name() << " ";
+                        rnn_ht_ct_out,
+                        Coordinate{0, 0},
+                        Coordinate{static_cast<unsigned long>(batch_size),
+                                   static_cast<unsigned long>(feature_size)});
+                    auto ct_slice = std::make_shared<op::Slice>(
+                        rnn_ht_ct_out,
+                        Coordinate{static_cast<unsigned long>(batch_size), 0},
+                        Coordinate{static_cast<unsigned long>(2 * batch_size),
+                                   static_cast<unsigned long>(feature_size)});
+                    // check if the last LSTM cell has any consumers
+                    auto n_time_step_lstm_ct_goe = goes->get_node();
+                    ngraph::replace_node(n_time_step_lstm_ct_goe, ct_slice);
                }
            }
        }
-        //now go through the lstm consumers and replace them with the slice
+        //now go through the lstm goe_0 consumers and replace them with the slice
        for (auto& node : lstm_goe0_user)
        {
            for (size_t i = 0; i < node->get_input_size(); i++)
@@ -577,6 +613,7 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
                }
            }
        }
        NGRAPH_DEBUG << "End of recurrent fusion call back "
                     << "matched_node: " << m.get_match_root()->get_name();
        return true;
@@ -588,3 +625,213 @@ void ngraph::runtime::cpu::pass::RNNFusion::construct_rnn_lstm_fprop()
        lstm_node_label, rpattern_ct_1, empty_correlated_matches, callback);
    this->add_matcher(m);
 }
+static std::shared_ptr<Node>
+    compute_multi_layer_rnn_inputs(const std::shared_ptr<pattern::op::Label>& rnn_label,
+                                   pattern::RecurrentMatcher& m)
+{
+    auto node_labels = m.get_bound_nodes_for_pattern(rnn_label);
+    std::reverse(node_labels.begin(), node_labels.end());
+    return std::make_shared<op::Concat>(node_labels, 0);
+}
+void ngraph::runtime::cpu::pass::MultiLayerRNNFusion::construct_multi_layer_rnn_fusion_fprop()
+{
+    auto src_layer_label = std::make_shared<pattern::op::Label>(element::f32, Shape{30, 100});
+    auto slice_pred = [](std::shared_ptr<Node> n) {
+        return static_cast<bool>(std::dynamic_pointer_cast<op::Slice>(n));
+    };
+    auto src_slice = std::make_shared<pattern::op::Skip>(src_layer_label, slice_pred);
+    auto src_iter_label = std::make_shared<pattern::op::Label>(element::f32, Shape{20, 100});
+    auto weights_layer_label = std::make_shared<pattern::op::Label>(element::f32, Shape{400, 100});
+    auto weights_iter_label = std::make_shared<pattern::op::Label>(element::f32, Shape{400, 100});
+    auto bias_label = std::make_shared<pattern::op::Label>(element::f32, Shape{400});
+    size_t ref_number_of_timesteps = 3;
+    size_t ref_number_of_gates_per_cell = 4;
+    size_t ref_src_seq_length = 3;
+    size_t ref_src_layer_feature_size = 100;
+    size_t ref_feature_size = 100;
+    size_t ref_num_rnn_cell_states = 2;
+    size_t ref_rnn_direction = 1;
+    size_t ref_num_of_rnn_fused_layer = 1;
+    auto ref_rnn_node = std::make_shared<op::Rnn>(src_slice,
+                                                  src_iter_label,
+                                                  weights_layer_label,
+                                                  weights_iter_label,
+                                                  bias_label,
+                                                  ref_number_of_timesteps,
+                                                  ref_number_of_gates_per_cell,
+                                                  ref_src_seq_length,
+                                                  ref_src_layer_feature_size,
+                                                  ref_feature_size,
+                                                  ref_num_rnn_cell_states,
+                                                  ref_rnn_direction,
+                                                  ref_num_of_rnn_fused_layer);
+    NodeVector ht_slice_per_timestep;
+    auto rnn_ht_out = std::make_shared<op::GetOutputElement>(ref_rnn_node, 0);
+    auto rnn_ht_label =
+        std::make_shared<pattern::op::Label>(rnn_ht_out, nullptr, NodeVector{rnn_ht_out});
+    auto rnn_ct_out = std::make_shared<op::GetOutputElement>(ref_rnn_node, 1);
+    pattern::recurrent_graph_rewrite_callback callback = [src_layer_label,
+                                                          src_iter_label,
+                                                          weights_layer_label,
+                                                          weights_iter_label,
+                                                          bias_label,
+                                                          rnn_ht_label](
+        pattern::RecurrentMatcher& m) {
+        if (m.get_number_of_recurrent_matches() <= 1)
+        {
+            return false;
+        }
+        auto src_nodes = m.get_bound_nodes_for_pattern(src_layer_label);
+        auto rnn_ht_out_nodes = m.get_bound_nodes_for_pattern(rnn_ht_label);
+        auto number_of_rnn_cell_matched = m.get_number_of_recurrent_matches();
+        NGRAPH_DEBUG << " In Recurrent multi layer RNN fusion callback ";
+        NGRAPH_DEBUG << "Number of RNN's Matched: " << number_of_rnn_cell_matched;
+        NGRAPH_DEBUG << "matched_root: " << m.get_match_root()->get_name();
+        NGRAPH_DEBUG << "src_layer_node: " << src_nodes[0]->get_name();
+        //  we can fuse across different RNN layers only if SLC == DLC
+        for (size_t i = 0; i < number_of_rnn_cell_matched; i++)
+        {
+            if (src_nodes[i]->get_shape()[1] != rnn_ht_out_nodes[i]->get_shape()[1])
+            {
+                NGRAPH_DEBUG << "Not fusing since the feature sizes for xt and ht_1 dont match";
+                return false;
+            }
+        }
+        // we just need to capture the input symbols {x0 | x1.....| xt} of the first lstm layer
+        // the intermediate inputs for the next layer will be computed by the MKLDNN
+        auto src_layer_nodes = m.get_bound_nodes_for_pattern(src_layer_label);
+        auto src_layer = src_layer_nodes[src_layer_nodes.size() - 1];
+        auto src_iter = compute_multi_layer_rnn_inputs(src_iter_label, m);
+        auto weights_layer = compute_multi_layer_rnn_inputs(weights_layer_label, m);
+        auto weights_iter = compute_multi_layer_rnn_inputs(weights_iter_label, m);
+        auto bias = compute_multi_layer_rnn_inputs(bias_label, m);
+        std::shared_ptr<op::Rnn> rnn_node = nullptr;
+        for (auto& rnn_goe_input : m.get_bound_nodes_for_pattern(rnn_ht_label)[0]->get_arguments())
+        {
+            if (std::dynamic_pointer_cast<op::Rnn>(rnn_goe_input))
+            {
+                rnn_node = std::dynamic_pointer_cast<op::Rnn>(rnn_goe_input);
+            }
+            else
+            {
+                throw ngraph_error("Input for RNN output GetOuputElement Op should be RNN");
+            }
+        }
+        size_t num_time_steps = rnn_node->get_num_timesteps();
+        size_t num_gates_in_lstm = rnn_node->get_gates_per_cell();
+        size_t batch_size = rnn_node->get_batch_size();
+        size_t sequence_len = rnn_node->get_src_sequence_length();
+        size_t src_layer_feature_size = rnn_node->get_src_layer_feature_size();
+        size_t feature_size = rnn_node->get_src_iter_feature_size();
+        size_t num_rnn_cell_states = rnn_node->get_num_cell_states();
+        size_t rnn_direction = rnn_node->get_direction();
+        size_t num_fused_rnn_layers = m.get_number_of_recurrent_matches();
+        NGRAPH_DEBUG << "src_layer: " << join(src_layer->get_shape());
+        NGRAPH_DEBUG << "src_iter: " << join(src_iter->get_shape());
+        NGRAPH_DEBUG << "weights_layer: " << join(weights_layer->get_shape());
+        NGRAPH_DEBUG << "weights_iter: " << join(weights_iter->get_shape());
+        NGRAPH_DEBUG << "bias: " << join(bias->get_shape());
+        NGRAPH_DEBUG << "src_seq_len: " << sequence_len;
+        NGRAPH_DEBUG << "batch_size: " << batch_size;
+        NGRAPH_DEBUG << "feature_size: " << feature_size;
+        if ((src_layer->get_arguments().size()) != rnn_node->get_num_timesteps() &&
+            !std::dynamic_pointer_cast<op::Parameter>(src_layer))
+        {
+            throw ngraph_error(
+                " input symbols for the layer fused RNN op, should be captured only for the first "
+                "layer");
+        }
+        if (std::dynamic_pointer_cast<op::Parameter>(src_layer) &&
+            rnn_node->get_num_timesteps() != 1)
+        {
+            throw ngraph_error(
+                " input symbols for the layer fused RNN op, should be captured only for the first "
+                "layer");
+        }
+        if ((src_iter->get_arguments().size()) != num_fused_rnn_layers)
+        {
+            throw ngraph_error(
+                "number of states(ht_1|ct_1) for RNN op in the layer fusion is not equal to num of "
+                "fused_rnn_layers");
+        }
+        if ((weights_layer->get_arguments().size()) != num_fused_rnn_layers)
+        {
+            throw ngraph_error(
+                "weights w.r.to input symbols of RNN op in the layer fusion is not equal to num of "
+                "fused_rnn_layers");
+        }
+        if ((weights_iter->get_arguments().size()) != num_fused_rnn_layers)
+        {
+            throw ngraph_error(
+                "weights w.r.to cell states of RNN op in the layer fusion is not equal to num of "
+                "fused_rnn_layers");
+        }
+        if ((bias->get_arguments().size()) != num_fused_rnn_layers)
+        {
+            throw ngraph_error(
+                "bias of RNN op in the layer fusion is not equal to num of fused_rnn_layers");
+        }
+        auto rnn = std::make_shared<op::Rnn>(src_layer,
+                                             src_iter,
+                                             weights_layer,
+                                             weights_iter,
+                                             bias,
+                                             num_time_steps,
+                                             num_gates_in_lstm,
+                                             sequence_len,
+                                             src_layer_feature_size,
+                                             feature_size,
+                                             num_rnn_cell_states,
+                                             rnn_direction,
+                                             num_fused_rnn_layers);
+        auto layer_rnn_ht = std::make_shared<op::GetOutputElement>(rnn, 0);
+        auto layer_rnn_ht_ct = std::make_shared<op::GetOutputElement>(rnn, 1);
+        // find the last RNN cell GOE's and replace them with the layer fused RNN GOE.
+        for (auto& rnn_goes : rnn_node->get_users())
+        {
+            NGRAPH_DEBUG << "rnn_goes: " << rnn_goes->get_name();
+            if (auto rnn_goe_node = std::dynamic_pointer_cast<op::GetOutputElement>(rnn_goes))
+            {
+                if (rnn_goe_node->get_n() == 0)
+                {
+                    ngraph::replace_node(rnn_goes, layer_rnn_ht);
+                }
+                else if (rnn_goe_node->get_n() == 1)
+                {
+                    ngraph::replace_node(rnn_goes, layer_rnn_ht_ct);
+                }
+            }
+        }
+        return true;
+    };
+    std::set<std::shared_ptr<pattern::op::Label>> empty_correlated_matches;
+    auto m = std::make_shared<pattern::RecurrentMatcher>(
+        rnn_ht_label, src_layer_label, empty_correlated_matches, callback);
+    this->add_matcher(m);
+}
--- a/src/ngraph/runtime/cpu/pass/cpu_rnn_fusion.hpp
+++ b/src/ngraph/runtime/cpu/pass/cpu_rnn_fusion.hpp
@@ -29,6 +29,7 @@ namespace ngraph
            {
                class LSTMFusion;
                class RNNFusion;
+                class MultiLayerRNNFusion;
            }
        }
    }
@@ -61,3 +62,16 @@ public:
 private:
    void construct_rnn_lstm_fprop();
 };
+class ngraph::runtime::cpu::pass::MultiLayerRNNFusion : public ngraph::pass::RecurrentGraphRewrite
+{
+public:
+    MultiLayerRNNFusion()
+        : RecurrentGraphRewrite()
+    {
+        construct_multi_layer_rnn_fusion_fprop();
+    }
+private:
+    void construct_multi_layer_rnn_fusion_fprop();
+};
--- a/test/cpu_fusion.cpp
+++ b/test/cpu_fusion.cpp
@@ -35,6 +35,7 @@
 #include "ngraph/op/relu.hpp"
 #include "ngraph/op/sum.hpp"
 #include "ngraph/op/tanh.hpp"
+#include "ngraph/pass/algebraic_simplification.hpp"
 #include "ngraph/pass/graph_rewrite.hpp"
 #include "ngraph/pass/manager.hpp"
 #include "ngraph/pass/reshape_elimination.hpp"
@@ -2197,3 +2198,45 @@ TEST(cpu_fusion, fuse_batch_dot_forward)
        EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i), 1.0e-4f, 1.0e-4f));
    }
 }
+TEST(cpu_fusion, fuse_rnn_across_layer)
+{
+    pass::Manager pass_manager;
+    pass_manager.register_pass<runtime::cpu::pass::LSTMFusion>();
+    pass_manager.register_pass<runtime::cpu::pass::RNNFusion>();
+    pass_manager.register_pass<ngraph::pass::AlgebraicSimplification>();
+    pass_manager.register_pass<runtime::cpu::pass::MultiLayerRNNFusion>();
+    const string json_path =
+        file_util::path_join(SERIALIZED_ZOO, "mxnet/2rnn_layer_1timestep.json");
+    const string json_string = file_util::read_file_to_string(json_path);
+    stringstream ss(json_string);
+    shared_ptr<Function> func = ngraph::deserialize(ss);
+    pass_manager.run_passes(func);
+    size_t ref_rnn_count = 1;
+    auto rnn_count = count_ops_of_type<op::Rnn>(func);
+    EXPECT_EQ(ref_rnn_count, rnn_count);
+}
+TEST(cpu_fusion, fuse_rnn_across_2layer_1timestep)
+{
+    const std::string file_name("mxnet/2rnn_layer_1timestep.json");
+    auto cpu_f = make_function(file_name);
+    auto int_f = make_function(file_name);
+    test::Uniform<float> rng(0.0f, 1.0f);
+    vector<vector<float>> args;
+    for (shared_ptr<op::Parameter> param : int_f->get_parameters())
+    {
+        vector<float> tensor_val(shape_size(param->get_shape()));
+        rng.initialize(tensor_val);
+        args.push_back(tensor_val);
+    }
+    auto int_results = execute(int_f, args, "INTERPRETER");
+    auto cpu_results = execute(cpu_f, args, "CPU");
+    EXPECT_EQ(1, count_ops_of_type<op::Rnn>(cpu_f));
+    for (size_t i = 0; i < cpu_results.size(); i++)
+    {
+        EXPECT_TRUE(test::all_close(cpu_results.at(1), int_results.at(1), 1.0e-4f, 1.0e-4f));
+    }
+}
--- a/test/models/mxnet/2rnn_layer_1timestep.json
+++ b/test/models/mxnet/2rnn_layer_1timestep.json
+[
+  {
+    "name": "Function_0",
+    "ops": [
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Parameter_55",
+        "op": "Parameter",
+        "outputs": [
+          "Parameter_55_0"
+        ],
+        "shape": [
+          400
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Parameter_54",
+        "op": "Parameter",
+        "outputs": [
+          "Parameter_54_0"
+        ],
+        "shape": [
+          400,
+          100
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Parameter_47",
+        "op": "Parameter",
+        "outputs": [
+          "Parameter_47_0"
+        ],
+        "shape": [
+          400
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Parameter_46",
+        "op": "Parameter",
+        "outputs": [
+          "Parameter_46_0"
+        ],
+        "shape": [
+          400,
+          100
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Parameter_10",
+        "op": "Parameter",
+        "outputs": [
+          "Parameter_10_0"
+        ],
+        "shape": [
+          400
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Parameter_9",
+        "op": "Parameter",
+        "outputs": [
+          "Parameter_9_0"
+        ],
+        "shape": [
+          400,
+          100
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Parameter_2",
+        "op": "Parameter",
+        "outputs": [
+          "Parameter_2_0"
+        ],
+        "shape": [
+          400
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Parameter_1",
+        "op": "Parameter",
+        "outputs": [
+          "Parameter_1_0"
+        ],
+        "shape": [
+          400,
+          100
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Parameter_0",
+        "op": "Parameter",
+        "outputs": [
+          "Parameter_0_0"
+        ],
+        "shape": [
+          10,
+          100
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Constant_52",
+        "op": "Constant",
+        "outputs": [
+          "Constant_52_0"
+        ],
+        "shape": [],
+        "value": [
+          "0"
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Constant_7",
+        "op": "Constant",
+        "outputs": [
+          "Constant_7_0"
+        ],
+        "shape": [],
+        "value": [
+          "0"
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Constant_34",
+        "op": "Constant",
+        "outputs": [
+          "Constant_34_0"
+        ],
+        "shape": [],
+        "value": [
+          "1"
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Constant_30",
+        "op": "Constant",
+        "outputs": [
+          "Constant_30_0"
+        ],
+        "shape": [],
+        "value": [
+          "0"
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Constant_24",
+        "op": "Constant",
+        "outputs": [
+          "Constant_24_0"
+        ],
+        "shape": [],
+        "value": [
+          "1"
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Constant_17",
+        "op": "Constant",
+        "outputs": [
+          "Constant_17_0"
+        ],
+        "shape": [],
+        "value": [
+          "1"
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Constant_79",
+        "op": "Constant",
+        "outputs": [
+          "Constant_79_0"
+        ],
+        "shape": [],
+        "value": [
+          "1"
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Constant_75",
+        "op": "Constant",
+        "outputs": [
+          "Constant_75_0"
+        ],
+        "shape": [],
+        "value": [
+          "0"
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Constant_69",
+        "op": "Constant",
+        "outputs": [
+          "Constant_69_0"
+        ],
+        "shape": [],
+        "value": [
+          "1"
+        ]
+      },
+      {
+        "element_type": "float",
+        "inputs": [],
+        "name": "Constant_62",
+        "op": "Constant",
+        "outputs": [
+          "Constant_62_0"
+        ],
+        "shape": [],
+        "value": [
+          "1"
+        ]
+      },
+      {
+        "axes": [
+          0
+        ],
+        "inputs": [
+          "Parameter_55"
+        ],
+        "name": "Broadcast_58",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_58_0"
+        ],
+        "shape": [
+          10,
+          400
+        ]
+      },
+      {
+        "input_order": [
+          1,
+          0
+        ],
+        "inputs": [
+          "Parameter_54"
+        ],
+        "name": "Reshape_56",
+        "op": "Reshape",
+        "output_shape": [
+          100,
+          400
+        ],
+        "outputs": [
+          "Reshape_56_0"
+        ]
+      },
+      {
+        "axes": [
+          0
+        ],
+        "inputs": [
+          "Parameter_47"
+        ],
+        "name": "Broadcast_50",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_50_0"
+        ],
+        "shape": [
+          10,
+          400
+        ]
+      },
+      {
+        "input_order": [
+          1,
+          0
+        ],
+        "inputs": [
+          "Parameter_46"
+        ],
+        "name": "Reshape_48",
+        "op": "Reshape",
+        "output_shape": [
+          100,
+          400
+        ],
+        "outputs": [
+          "Reshape_48_0"
+        ]
+      },
+      {
+        "axes": [
+          0
+        ],
+        "inputs": [
+          "Parameter_10"
+        ],
+        "name": "Broadcast_13",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_13_0"
+        ],
+        "shape": [
+          10,
+          400
+        ]
+      },
+      {
+        "input_order": [
+          1,
+          0
+        ],
+        "inputs": [
+          "Parameter_9"
+        ],
+        "name": "Reshape_11",
+        "op": "Reshape",
+        "output_shape": [
+          100,
+          400
+        ],
+        "outputs": [
+          "Reshape_11_0"
+        ]
+      },
+      {
+        "axes": [
+          0
+        ],
+        "inputs": [
+          "Parameter_2"
+        ],
+        "name": "Broadcast_5",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_5_0"
+        ],
+        "shape": [
+          10,
+          400
+        ]
+      },
+      {
+        "input_order": [
+          1,
+          0
+        ],
+        "inputs": [
+          "Parameter_1"
+        ],
+        "name": "Reshape_3",
+        "op": "Reshape",
+        "output_shape": [
+          100,
+          400
+        ],
+        "outputs": [
+          "Reshape_3_0"
+        ]
+      },
+      {
+        "axes": [
+          0,
+          1
+        ],
+        "inputs": [
+          "Constant_52"
+        ],
+        "name": "Broadcast_53",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_53_0"
+        ],
+        "shape": [
+          10,
+          100
+        ]
+      },
+      {
+        "axes": [
+          0,
+          1
+        ],
+        "inputs": [
+          "Constant_7"
+        ],
+        "name": "Broadcast_8",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_8_0"
+        ],
+        "shape": [
+          10,
+          100
+        ]
+      },
+      {
+        "axes": [
+          0,
+          1
+        ],
+        "inputs": [
+          "Constant_34"
+        ],
+        "name": "Broadcast_35",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_35_0"
+        ],
+        "shape": [
+          10,
+          100
+        ]
+      },
+      {
+        "axes": [
+          0,
+          1
+        ],
+        "inputs": [
+          "Constant_30"
+        ],
+        "name": "Broadcast_31",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_31_0"
+        ],
+        "shape": [
+          10,
+          100
+        ]
+      },
+      {
+        "axes": [
+          0,
+          1
+        ],
+        "inputs": [
+          "Constant_24"
+        ],
+        "name": "Broadcast_25",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_25_0"
+        ],
+        "shape": [
+          10,
+          100
+        ]
+      },
+      {
+        "axes": [
+          0,
+          1
+        ],
+        "inputs": [
+          "Constant_17"
+        ],
+        "name": "Broadcast_18",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_18_0"
+        ],
+        "shape": [
+          10,
+          100
+        ]
+      },
+      {
+        "axes": [
+          0,
+          1
+        ],
+        "inputs": [
+          "Constant_79"
+        ],
+        "name": "Broadcast_80",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_80_0"
+        ],
+        "shape": [
+          10,
+          100
+        ]
+      },
+      {
+        "axes": [
+          0,
+          1
+        ],
+        "inputs": [
+          "Constant_75"
+        ],
+        "name": "Broadcast_76",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_76_0"
+        ],
+        "shape": [
+          10,
+          100
+        ]
+      },
+      {
+        "axes": [
+          0,
+          1
+        ],
+        "inputs": [
+          "Constant_69"
+        ],
+        "name": "Broadcast_70",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_70_0"
+        ],
+        "shape": [
+          10,
+          100
+        ]
+      },
+      {
+        "axes": [
+          0,
+          1
+        ],
+        "inputs": [
+          "Constant_62"
+        ],
+        "name": "Broadcast_63",
+        "op": "Broadcast",
+        "outputs": [
+          "Broadcast_63_0"
+        ],
+        "shape": [
+          10,
+          100
+        ]
+      },
+      {
+        "inputs": [
+          "Parameter_0",
+          "Reshape_3"
+        ],
+        "name": "Dot_4",
+        "op": "Dot",
+        "outputs": [
+          "Dot_4_0"
+        ],
+        "reduction_axes_count": 1
+      },
+      {
+        "inputs": [
+          "Broadcast_53",
+          "Reshape_56"
+        ],
+        "name": "Dot_57",
+        "op": "Dot",
+        "outputs": [
+          "Dot_57_0"
+        ],
+        "reduction_axes_count": 1
+      },
+      {
+        "inputs": [
+          "Broadcast_8",
+          "Reshape_11"
+        ],
+        "name": "Dot_12",
+        "op": "Dot",
+        "outputs": [
+          "Dot_12_0"
+        ],
+        "reduction_axes_count": 1
+      },
+      {
+        "inputs": [
+          "Dot_4",
+          "Broadcast_5"
+        ],
+        "name": "Add_6",
+        "op": "Add",
+        "outputs": [
+          "Add_6_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Dot_57",
+          "Broadcast_58"
+        ],
+        "name": "Add_59",
+        "op": "Add",
+        "outputs": [
+          "Add_59_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Dot_12",
+          "Broadcast_13"
+        ],
+        "name": "Add_14",
+        "op": "Add",
+        "outputs": [
+          "Add_14_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Add_6",
+          "Add_14"
+        ],
+        "name": "Add_15",
+        "op": "Add",
+        "outputs": [
+          "Add_15_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Add_15"
+        ],
+        "lower_bounds": [
+          0,
+          300
+        ],
+        "name": "Slice_16",
+        "op": "Slice",
+        "outputs": [
+          "Slice_16_0"
+        ],
+        "strides": [
+          1,
+          1
+        ],
+        "upper_bounds": [
+          10,
+          400
+        ]
+      },
+      {
+        "inputs": [
+          "Add_15"
+        ],
+        "lower_bounds": [
+          0,
+          100
+        ],
+        "name": "Slice_23",
+        "op": "Slice",
+        "outputs": [
+          "Slice_23_0"
+        ],
+        "strides": [
+          1,
+          1
+        ],
+        "upper_bounds": [
+          10,
+          200
+        ]
+      },
+      {
+        "inputs": [
+          "Add_15"
+        ],
+        "lower_bounds": [
+          0,
+          0
+        ],
+        "name": "Slice_33",
+        "op": "Slice",
+        "outputs": [
+          "Slice_33_0"
+        ],
+        "strides": [
+          1,
+          1
+        ],
+        "upper_bounds": [
+          10,
+          100
+        ]
+      },
+      {
+        "inputs": [
+          "Add_15"
+        ],
+        "lower_bounds": [
+          0,
+          200
+        ],
+        "name": "Slice_40",
+        "op": "Slice",
+        "outputs": [
+          "Slice_40_0"
+        ],
+        "strides": [
+          1,
+          1
+        ],
+        "upper_bounds": [
+          10,
+          300
+        ]
+      },
+      {
+        "inputs": [
+          "Slice_16"
+        ],
+        "name": "Negative_19",
+        "op": "Negative",
+        "outputs": [
+          "Negative_19_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Slice_23"
+        ],
+        "name": "Negative_26",
+        "op": "Negative",
+        "outputs": [
+          "Negative_26_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Slice_33"
+        ],
+        "name": "Negative_36",
+        "op": "Negative",
+        "outputs": [
+          "Negative_36_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Slice_40"
+        ],
+        "name": "Tanh_41",
+        "op": "Tanh",
+        "outputs": [
+          "Tanh_41_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Negative_19"
+        ],
+        "name": "Exp_20",
+        "op": "Exp",
+        "outputs": [
+          "Exp_20_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Negative_26"
+        ],
+        "name": "Exp_27",
+        "op": "Exp",
+        "outputs": [
+          "Exp_27_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Negative_36"
+        ],
+        "name": "Exp_37",
+        "op": "Exp",
+        "outputs": [
+          "Exp_37_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Broadcast_18",
+          "Exp_20"
+        ],
+        "name": "Add_21",
+        "op": "Add",
+        "outputs": [
+          "Add_21_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Broadcast_25",
+          "Exp_27"
+        ],
+        "name": "Add_28",
+        "op": "Add",
+        "outputs": [
+          "Add_28_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Broadcast_35",
+          "Exp_37"
+        ],
+        "name": "Add_38",
+        "op": "Add",
+        "outputs": [
+          "Add_38_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Broadcast_18",
+          "Add_21"
+        ],
+        "name": "Divide_22",
+        "op": "Divide",
+        "outputs": [
+          "Divide_22_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Broadcast_25",
+          "Add_28"
+        ],
+        "name": "Divide_29",
+        "op": "Divide",
+        "outputs": [
+          "Divide_29_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Broadcast_35",
+          "Add_38"
+        ],
+        "name": "Divide_39",
+        "op": "Divide",
+        "outputs": [
+          "Divide_39_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Divide_29",
+          "Broadcast_31"
+        ],
+        "name": "Multiply_32",
+        "op": "Multiply",
+        "outputs": [
+          "Multiply_32_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Divide_39",
+          "Tanh_41"
+        ],
+        "name": "Multiply_42",
+        "op": "Multiply",
+        "outputs": [
+          "Multiply_42_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Multiply_32",
+          "Multiply_42"
+        ],
+        "name": "Add_43",
+        "op": "Add",
+        "outputs": [
+          "Add_43_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Add_43"
+        ],
+        "name": "Tanh_44",
+        "op": "Tanh",
+        "outputs": [
+          "Tanh_44_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Divide_22",
+          "Tanh_44"
+        ],
+        "name": "Multiply_45",
+        "op": "Multiply",
+        "outputs": [
+          "Multiply_45_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Multiply_45",
+          "Reshape_48"
+        ],
+        "name": "Dot_49",
+        "op": "Dot",
+        "outputs": [
+          "Dot_49_0"
+        ],
+        "reduction_axes_count": 1
+      },
+      {
+        "inputs": [
+          "Dot_49",
+          "Broadcast_50"
+        ],
+        "name": "Add_51",
+        "op": "Add",
+        "outputs": [
+          "Add_51_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Add_51",
+          "Add_59"
+        ],
+        "name": "Add_60",
+        "op": "Add",
+        "outputs": [
+          "Add_60_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Add_60"
+        ],
+        "lower_bounds": [
+          0,
+          300
+        ],
+        "name": "Slice_61",
+        "op": "Slice",
+        "outputs": [
+          "Slice_61_0"
+        ],
+        "strides": [
+          1,
+          1
+        ],
+        "upper_bounds": [
+          10,
+          400
+        ]
+      },
+      {
+        "inputs": [
+          "Add_60"
+        ],
+        "lower_bounds": [
+          0,
+          100
+        ],
+        "name": "Slice_68",
+        "op": "Slice",
+        "outputs": [
+          "Slice_68_0"
+        ],
+        "strides": [
+          1,
+          1
+        ],
+        "upper_bounds": [
+          10,
+          200
+        ]
+      },
+      {
+        "inputs": [
+          "Add_60"
+        ],
+        "lower_bounds": [
+          0,
+          0
+        ],
+        "name": "Slice_78",
+        "op": "Slice",
+        "outputs": [
+          "Slice_78_0"
+        ],
+        "strides": [
+          1,
+          1
+        ],
+        "upper_bounds": [
+          10,
+          100
+        ]
+      },
+      {
+        "inputs": [
+          "Add_60"
+        ],
+        "lower_bounds": [
+          0,
+          200
+        ],
+        "name": "Slice_85",
+        "op": "Slice",
+        "outputs": [
+          "Slice_85_0"
+        ],
+        "strides": [
+          1,
+          1
+        ],
+        "upper_bounds": [
+          10,
+          300
+        ]
+      },
+      {
+        "inputs": [
+          "Slice_61"
+        ],
+        "name": "Negative_64",
+        "op": "Negative",
+        "outputs": [
+          "Negative_64_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Slice_68"
+        ],
+        "name": "Negative_71",
+        "op": "Negative",
+        "outputs": [
+          "Negative_71_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Slice_78"
+        ],
+        "name": "Negative_81",
+        "op": "Negative",
+        "outputs": [
+          "Negative_81_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Slice_85"
+        ],
+        "name": "Tanh_86",
+        "op": "Tanh",
+        "outputs": [
+          "Tanh_86_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Negative_64"
+        ],
+        "name": "Exp_65",
+        "op": "Exp",
+        "outputs": [
+          "Exp_65_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Negative_71"
+        ],
+        "name": "Exp_72",
+        "op": "Exp",
+        "outputs": [
+          "Exp_72_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Negative_81"
+        ],
+        "name": "Exp_82",
+        "op": "Exp",
+        "outputs": [
+          "Exp_82_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Broadcast_63",
+          "Exp_65"
+        ],
+        "name": "Add_66",
+        "op": "Add",
+        "outputs": [
+          "Add_66_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Broadcast_70",
+          "Exp_72"
+        ],
+        "name": "Add_73",
+        "op": "Add",
+        "outputs": [
+          "Add_73_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Broadcast_80",
+          "Exp_82"
+        ],
+        "name": "Add_83",
+        "op": "Add",
+        "outputs": [
+          "Add_83_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Broadcast_63",
+          "Add_66"
+        ],
+        "name": "Divide_67",
+        "op": "Divide",
+        "outputs": [
+          "Divide_67_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Broadcast_70",
+          "Add_73"
+        ],
+        "name": "Divide_74",
+        "op": "Divide",
+        "outputs": [
+          "Divide_74_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Broadcast_80",
+          "Add_83"
+        ],
+        "name": "Divide_84",
+        "op": "Divide",
+        "outputs": [
+          "Divide_84_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Divide_74",
+          "Broadcast_76"
+        ],
+        "name": "Multiply_77",
+        "op": "Multiply",
+        "outputs": [
+          "Multiply_77_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Divide_84",
+          "Tanh_86"
+        ],
+        "name": "Multiply_87",
+        "op": "Multiply",
+        "outputs": [
+          "Multiply_87_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Multiply_77",
+          "Multiply_87"
+        ],
+        "name": "Add_88",
+        "op": "Add",
+        "outputs": [
+          "Add_88_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Add_88"
+        ],
+        "name": "Tanh_89",
+        "op": "Tanh",
+        "outputs": [
+          "Tanh_89_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Add_88"
+        ],
+        "name": "Result_94",
+        "op": "Result",
+        "outputs": [
+          "Result_94_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Divide_67",
+          "Tanh_89"
+        ],
+        "name": "Multiply_90",
+        "op": "Multiply",
+        "outputs": [
+          "Multiply_90_0"
+        ]
+      },
+      {
+        "inputs": [
+          "Multiply_90"
+        ],
+        "name": "Result_93",
+        "op": "Result",
+        "outputs": [
+          "Result_93_0"
+        ]
+      }
+    ],
+    "parameters": [
+      "Parameter_0",
+      "Parameter_1",
+      "Parameter_2",
+      "Parameter_9",
+      "Parameter_10",
+      "Parameter_46",
+      "Parameter_47",
+      "Parameter_54",
+      "Parameter_55"
+    ],
+    "result": [
+      "Result_93",
+      "Result_94"
+    ]
+  }
+]