//***************************************************************************** // Copyright 2017-2019 Intel Corporation // // Licensed under the Apache License, Version 2.0 (the "License"); // you may not use this file except in compliance with the License. // You may obtain a copy of the License at // // http://www.apache.org/licenses/LICENSE-2.0 // // Unless required by applicable law or agreed to in writing, software // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. //***************************************************************************** #include <algorithm> #include <cstdio> #include <iostream> #include <list> #include <memory> #include "gtest/gtest.h" #include "ngraph/file_util.hpp" #include "ngraph/graph_util.hpp" #include "ngraph/log.hpp" #include "ngraph/ngraph.hpp" #include "ngraph/op/fused/group_conv.hpp" #include "ngraph/op/relu.hpp" #include "ngraph/op/reshape.hpp" #include "ngraph/op/softmax.hpp" #include "ngraph/pass/batch_fusion.hpp" #include "ngraph/pass/core_fusion.hpp" #include "ngraph/pass/graph_rewrite.hpp" #include "ngraph/pass/manager.hpp" #include "ngraph/pattern/matcher.hpp" #include "ngraph/pattern/op/label.hpp" #include "ngraph/pattern/op/skip.hpp" #include "ngraph/serializer.hpp" #include "ngraph/util.hpp" #include "util/all_close.hpp" #include "util/autodiff/backprop_function.hpp" #include "util/matcher.hpp" #include "util/random.hpp" #include "util/test_tools.hpp" using namespace ngraph; using namespace std; TEST(core_fusion, core_fusion_pass_basic) { auto shape_a = Shape{1, 5}; auto A = op::Constant::create(element::f32, shape_a, {0, 0, 0, 0, 0}); auto B = make_shared<op::Parameter>(element::f32, shape_a); auto max = make_shared<op::Maximum>(A, B); auto graph = make_shared<op::Abs>(max); pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(); auto func = make_shared<Function>(graph, ParameterVector{B}); pass_manager.run_passes(func); ASSERT_NE(as_type_ptr<op::Relu>(graph->get_argument(0)), nullptr); } #ifndef NGRAPH_JSON_DISABLE TEST(core_fusion, sigmoid_fprop_fusion) { pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(); const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/Graph_fprop_sigmoid.json"); const string json_string = file_util::read_file_to_string(json_path); stringstream ss(json_string); shared_ptr<Function> func = ngraph::deserialize(ss); pass_manager.run_passes(func); size_t ccg = count_ops_of_type<op::Sigmoid>(func); ASSERT_EQ(ccg, 1); } TEST(core_fusion, sigmoid_bprop_fusion) { const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/Graph_fprop_sigmoid.json"); const string json_string = file_util::read_file_to_string(json_path); stringstream ss(json_string); shared_ptr<Function> func = ngraph::deserialize(ss); auto df = autodiff::backprop_function(func); auto backend = runtime::Backend::create("CPU"); backend->compile(df); size_t ccg = count_ops_of_type<op::SigmoidBackprop>(df); ASSERT_EQ(ccg, 1); } #endif TEST(core_fusion, sigmoid_fprop_fusion_no_broadcast) { auto make_function = []() { auto input = std::make_shared<op::Parameter>(element::f32, Shape{3, 4}); auto neg_input = std::make_shared<op::Negative>(input); auto exp_neg_input = std::make_shared<op::Exp>(neg_input); auto constant = op::Constant::create(element::f32, Shape{3, 4}, {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}); auto add_exp = std::make_shared<op::Add>(exp_neg_input, constant); auto divide_1_over_exp = std::make_shared<op::Divide>(constant, add_exp); return make_shared<Function>(NodeVector{divide_1_over_exp}, ParameterVector{input}); }; auto func = make_function(); // Check fusion happens pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(); pass_manager.run_passes(func); size_t ccg = count_ops_of_type<op::Sigmoid>(func); ASSERT_EQ(ccg, 1); } TEST(core_fusion, sigmoid_fprop_fusion_no_broadcast2) { auto make_function = []() { auto input = std::make_shared<op::Parameter>(element::f32, Shape{3, 4}); auto neg_input = std::make_shared<op::Negative>(input); auto exp_neg_input = std::make_shared<op::Exp>(neg_input); auto constant = op::Constant::create(element::f32, Shape{3, 4}, {1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1}); auto add_exp = std::make_shared<op::Add>(exp_neg_input, constant); auto divide_1_over_exp = std::make_shared<op::Divide>(constant, add_exp); return make_shared<Function>(NodeVector{divide_1_over_exp}, ParameterVector{input}); }; auto func = make_function(); pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(); pass_manager.run_passes(func); size_t ccg = count_ops_of_type<op::Sigmoid>(func); ASSERT_EQ(ccg, 0); } TEST(core_fusion, reshape_broadcast) { auto generate_func = []() { auto input = make_shared<op::Parameter>(element::f32, Shape{10}); auto reshape1 = make_shared<op::Reshape>(input, AxisVector{0}, Shape{1, 10, 1}); auto broadcast = make_shared<op::Broadcast>(reshape1, Shape{1, 5, 10, 8, 1, 20}, AxisSet{1, 3, 5}); auto f = make_shared<Function>(broadcast, ParameterVector{input}); return f; }; auto baseline_f = generate_func(); auto optimized_f = generate_func(); auto baseline_input_shape = baseline_f->get_parameters().at(0)->get_shape(); pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(); pass_manager.run_passes(optimized_f); test::Uniform<float> rng(0.0f, 100.0f); vector<vector<float>> args; vector<float> tensor_val(shape_size(baseline_input_shape)); rng.initialize(tensor_val); args.push_back(tensor_val); auto baseline_results = execute(baseline_f, args, "INTERPRETER"); auto optimized_results = execute(optimized_f, args, "INTERPRETER"); EXPECT_TRUE(test::all_close(baseline_results.at(0), optimized_results.at(0))); } TEST(core_fusion, reshape_broadcast_graph_optimized) { auto input = make_shared<op::Parameter>(element::f32, Shape{10}); auto reshape1 = make_shared<op::Reshape>(input, AxisVector{0}, Shape{1, 10, 1}); auto broadcast = make_shared<op::Broadcast>(reshape1, Shape{1, 5, 10, 8, 1, 20}, AxisSet{1, 3, 5}); auto optimized_f = make_shared<Function>(broadcast, ParameterVector{input}); pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(); pass_manager.run_passes(optimized_f); auto new_broadcast = as_type_ptr<op::Broadcast>(optimized_f->get_results().at(0)->get_argument(0)); EXPECT_EQ(new_broadcast->get_argument(0), input); EXPECT_EQ(new_broadcast->get_broadcast_axes(), (AxisSet{0, 1, 3, 4, 5})); } TEST(core_fusion, reshape_broadcast_adds_one) { auto input = make_shared<op::Parameter>(element::f32, Shape{10}); auto reshape1 = make_shared<op::Reshape>(input, AxisVector{0}, Shape{1, 10, 1}); auto broadcast = make_shared<op::Broadcast>(reshape1, Shape{1, 5, 10, 8, 1, 20, 1}, AxisSet{1, 3, 5, 6}); auto optimized_f = make_shared<Function>(broadcast, ParameterVector{input}); pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(); pass_manager.run_passes(optimized_f); auto new_broadcast = as_type_ptr<op::Broadcast>(optimized_f->get_results().at(0)->get_argument(0)); EXPECT_EQ(new_broadcast, broadcast); EXPECT_EQ(new_broadcast->get_argument(0), reshape1); } TEST(core_fusion, reshape_broadcast_wrong_reshape) { auto input = make_shared<op::Parameter>(element::f32, Shape{10}); auto reshape1 = make_shared<op::Reshape>(input, AxisVector{0}, Shape{1, 5, 2}); auto broadcast = make_shared<op::Broadcast>(reshape1, Shape{1, 5, 5, 8, 2, 20}, AxisSet{1, 3, 5}); auto optimized_f = make_shared<Function>(broadcast, ParameterVector{input}); pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(); pass_manager.run_passes(optimized_f); auto new_broadcast = as_type_ptr<op::Broadcast>(optimized_f->get_results().at(0)->get_argument(0)); EXPECT_EQ(new_broadcast, broadcast); EXPECT_EQ(new_broadcast->get_argument(0), reshape1); } TEST(core_fusion, sparsity_opt_56x56) { Shape win_size_3{1, 1, 3, 3}; Shape win_size_1{1, 1, 1, 1}; Strides stride_2{2, 2}; Strides stride_1{1, 1}; CoordinateDiff pad_0{0, 0}; CoordinateDiff pad_1{1, 1}; auto data_stride3 = std::make_shared<op::Parameter>(element::f32, Shape{1, 64, 56, 56}); auto weights_stride3 = std::make_shared<op::Parameter>(element::f32, Shape{64, 64, 3, 3}); auto conv_stride3 = std::make_shared<op::Convolution>( data_stride3, weights_stride3, stride_1, stride_1, pad_1, pad_1); auto param_broadcast_w3 = std::make_shared<op::Parameter>(element::f32, Shape{64}); auto broadcast_w3 = std::make_shared<op::Broadcast>(param_broadcast_w3, Shape{1, 64, 56, 56}, AxisSet{0, 2, 3}); auto add_w3 = std::make_shared<op::Add>(conv_stride3, broadcast_w3); auto relu_w3 = std::make_shared<op::Relu>(add_w3); /// auto weights_stride1 = std::make_shared<op::Parameter>(element::f32, Shape{256, 64, 1, 1}); auto conv_stride1 = std::make_shared<op::Convolution>(relu_w3, weights_stride1); auto param_broadcast_w1 = std::make_shared<op::Parameter>(element::f32, Shape{256}); auto broadcast_w1 = std::make_shared<op::Broadcast>( param_broadcast_w1, Shape{1, 256, 56, 56}, AxisSet{0, 2, 3}); auto add_w1 = std::make_shared<op::Add>(conv_stride1, broadcast_w1); //// auto other_arg = std::make_shared<op::Parameter>(element::f32, Shape{1, 256, 56, 56}); auto add_two_convs = std::make_shared<op::Add>(add_w1, other_arg); auto relu_two_convs = std::make_shared<op::Relu>(add_two_convs); /// auto weights_conv_s2 = std::make_shared<op::Parameter>(element::f32, Shape{512, 256, 1, 1}); auto conv_s2_1 = std::make_shared<op::Convolution>(relu_two_convs, weights_conv_s2, stride_2); auto conv_s2_2 = std::make_shared<op::Convolution>(relu_two_convs, weights_conv_s2, stride_2); pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(); auto params = ParameterVector{data_stride3, weights_stride3, param_broadcast_w3, weights_stride1, param_broadcast_w1, other_arg, weights_conv_s2}; auto func = make_shared<Function>(NodeVector{conv_s2_1, conv_s2_2}, params); pass_manager.run_passes(func); auto results = func->get_results(); auto t_eltwise_conv1 = as_type_ptr<op::Convolution>(results.at(0)->get_argument(0)); auto t_eltwise_conv2 = as_type_ptr<op::Convolution>(results.at(1)->get_argument(0)); ASSERT_TRUE(t_eltwise_conv1); ASSERT_TRUE(t_eltwise_conv2); ASSERT_EQ(t_eltwise_conv1->get_window_movement_strides(), stride_1); ASSERT_EQ(t_eltwise_conv2->get_window_movement_strides(), stride_1); } static std::shared_ptr<Function> generate_reshape_softmax_reshape() { Shape shape_nchw{10, 20, 30, 40}; Shape shape_nhwc{10, 30, 40, 20}; AxisVector to_nhwc{0, 2, 3, 1}; AxisVector to_nchw{0, 3, 1, 2}; auto input = make_shared<op::Parameter>(element::f32, shape_nchw); auto reshape1 = make_shared<op::Reshape>(input, to_nhwc, shape_nhwc); auto softmax = make_shared<op::Softmax>(reshape1, AxisSet{1, 2, 3}); auto reshape2 = make_shared<op::Reshape>(softmax, to_nchw, shape_nchw); auto f = make_shared<Function>(reshape2, ParameterVector{input}); return f; } TEST(core_fusion, reshape_softmax_reshape) { auto baseline_f = generate_reshape_softmax_reshape(); auto optimized_f = generate_reshape_softmax_reshape(); auto baseline_input = baseline_f->get_parameters().at(0); pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(); pass_manager.run_passes(optimized_f); test::Uniform<float> rng(0.0f, 100.0f); vector<vector<float>> args; vector<float> tensor_val(shape_size(baseline_input->get_shape())); rng.initialize(tensor_val); args.push_back(tensor_val); auto baseline_results = execute(baseline_f, args, "INTERPRETER"); auto optimized_results = execute(optimized_f, args, "INTERPRETER"); EXPECT_TRUE(test::all_close(baseline_results.at(0), optimized_results.at(0))); } TEST(core_fusion, zero_padded_reshaped_conv) { auto X = make_shared<op::Parameter>(element::f32, Shape{1, 2, 2, 1}); auto F = make_shared<op::Parameter>(element::f32, Shape{1, 1, 1, 1}); auto pad_value = op::Constant::create<float>(element::f32, Shape{}, std::vector<float>{0.0f}); auto pad = make_shared<op::Pad>(X, pad_value, CoordinateDiff{0, 1, 0, 0}, CoordinateDiff{0, 0, 1, 0}); auto reshape = make_shared<op::Reshape>(pad, AxisVector{0, 3, 1, 2}, Shape{1, 1, 3, 3}); auto conv = make_shared<op::Convolution>(reshape, F, Strides{1, 1}, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1}); auto func = make_shared<Function>(conv, ParameterVector{X, F}); ASSERT_EQ(count_ops_of_type<op::Pad>(func), 1); auto backend = runtime::Backend::create("CPU"); backend->compile(func); ASSERT_EQ(count_ops_of_type<op::Pad>(func), 0); } TEST(core_fusion, zero_padded_conv) { auto X = make_shared<op::Parameter>(element::f32, Shape{1, 1, 2, 2}); auto F = make_shared<op::Parameter>(element::f32, Shape{1, 1, 1, 1}); auto pad_value = op::Constant::create<float>(element::f32, Shape{}, std::vector<float>{0.0f}); auto pad = make_shared<op::Pad>(X, pad_value, CoordinateDiff{0, 0, 0, 1}, CoordinateDiff{0, 0, 1, 0}); auto conv = make_shared<op::Convolution>(pad, F, Strides{1, 1}, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1}); auto func = make_shared<Function>(conv, ParameterVector{X, F}); ASSERT_EQ(count_ops_of_type<op::Pad>(func), 1); auto backend = runtime::Backend::create("CPU"); backend->compile(func); ASSERT_EQ(count_ops_of_type<op::Pad>(func), 0); } TEST(core_fusion, non_zero_padded_conv) { auto X = make_shared<op::Parameter>(element::f32, Shape{1, 1, 2, 2}); auto F = make_shared<op::Parameter>(element::f32, Shape{1, 1, 1, 1}); auto pad_value = op::Constant::create<float>(element::f32, Shape{}, std::vector<float>{1.0f}); auto pad = make_shared<op::Pad>(X, pad_value, CoordinateDiff{0, 0, 0, 1}, CoordinateDiff{0, 0, 1, 0}); auto conv = make_shared<op::Convolution>(pad, F, Strides{1, 1}, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1}); auto func = make_shared<Function>(conv, ParameterVector{X, F}); ASSERT_EQ(count_ops_of_type<op::Pad>(func), 1); auto backend = runtime::Backend::create("CPU"); backend->compile(func); ASSERT_EQ(count_ops_of_type<op::Pad>(func), 1); } TEST(core_fusion, zero_padded_conv_backprop_filters) { auto X = make_shared<op::Parameter>(element::f32, Shape{1, 1, 2, 2}); auto F = make_shared<op::Parameter>(element::f32, Shape{1, 1, 2, 2}); auto pad_value = op::Constant::create<float>(element::f32, Shape{}, std::vector<float>{0.0f}); auto pad = make_shared<op::Pad>(X, pad_value, CoordinateDiff{0, 0, 0, 1}, CoordinateDiff{0, 0, 1, 0}); auto conv = make_shared<op::ConvolutionBackpropFilters>(pad, Shape{1, 1, 2, 2}, F, Strides{1, 1}, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1}); auto func = make_shared<Function>(conv, ParameterVector{X, F}); ASSERT_EQ(count_ops_of_type<op::Pad>(func), 1); auto backend = runtime::Backend::create("CPU"); backend->compile(func); ASSERT_EQ(count_ops_of_type<op::Pad>(func), 0); } TEST(core_fusion, conv_bias) { auto gen_f = [](bool with_fused_op) { auto data = make_shared<op::Parameter>(element::f32, Shape{2, 3, 4, 5}); auto weights = make_shared<op::Parameter>(element::f32, Shape{4, 3, 2, 2}); auto bias = make_shared<op::Parameter>(element::f32, Shape{4}); if (with_fused_op) { return make_shared<Function>(make_shared<op::ConvolutionBias>(data, weights, bias), ParameterVector{data, weights, bias}); } else { auto conv = make_shared<op::Convolution>(data, weights); auto conv_bias = conv + make_shared<op::Broadcast>(bias, conv->get_shape(), AxisSet{0, 2, 3}); return make_shared<Function>(conv_bias, ParameterVector{data, weights, bias}); } }; auto fused_f = gen_f(true); auto decomp_f1 = gen_f(false); auto decomp_f2 = gen_f(false); pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(ngraph::pass::FusionType::ALL_FUSIONS); pass_manager.run_passes(decomp_f1); ASSERT_EQ(count_ops_of_type<op::ConvolutionBias>(decomp_f1), 1); test::Uniform<float> rng(0.0f, 1.0f); vector<vector<float>> args; for (shared_ptr<op::Parameter> param : fused_f->get_parameters()) { vector<float> tensor_val(shape_size(param->get_shape())); rng.initialize(tensor_val); args.push_back(tensor_val); } auto fused_r = execute(fused_f, args, "INTERPRETER"); auto decomp_r1 = execute(decomp_f1, args, "INTERPRETER"); auto decomp_r2 = execute(decomp_f2, args, "INTERPRETER"); for (size_t i = 0; i < fused_r.size(); i++) { EXPECT_TRUE(test::all_close(fused_r.at(i), decomp_r1.at(i))); EXPECT_TRUE(test::all_close(fused_r.at(i), decomp_r2.at(i))); } } TEST(core_fusion, conv_bias_bcast_reshape) { // PaddlePaddle pattern auto gen_f = [](bool with_fused_op) { auto data = make_shared<op::Parameter>(element::f32, Shape{2, 3, 4, 5}); auto weights = make_shared<op::Parameter>(element::f32, Shape{4, 3, 2, 2}); auto bias = make_shared<op::Parameter>(element::f32, Shape{4}); if (with_fused_op) { return make_shared<Function>(make_shared<op::ConvolutionBias>(data, weights, bias), ParameterVector{data, weights, bias}); } else { auto conv = make_shared<op::Convolution>(data, weights); auto bias_bcast = make_shared<op::Broadcast>(bias, Shape{2, 4, 12}, AxisSet{0, 2}); auto conv_bias = conv + make_shared<op::Reshape>(bias_bcast, AxisVector{0, 1, 2}, conv->get_shape()); return make_shared<Function>(conv_bias, ParameterVector{data, weights, bias}); } }; auto fused_f = gen_f(true); auto decomp_f1 = gen_f(false); auto decomp_f2 = gen_f(false); pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(ngraph::pass::FusionType::ALL_FUSIONS); pass_manager.run_passes(decomp_f1); ASSERT_EQ(count_ops_of_type<op::ConvolutionBias>(decomp_f1), 1); test::Uniform<float> rng(0.0f, 1.0f); vector<vector<float>> args; for (shared_ptr<op::Parameter> param : fused_f->get_parameters()) { vector<float> tensor_val(shape_size(param->get_shape())); rng.initialize(tensor_val); args.push_back(tensor_val); } auto fused_r = execute(fused_f, args, "INTERPRETER"); auto decomp_r1 = execute(decomp_f1, args, "INTERPRETER"); auto decomp_r2 = execute(decomp_f2, args, "INTERPRETER"); for (size_t i = 0; i < fused_r.size(); i++) { EXPECT_TRUE(test::all_close(fused_r.at(i), decomp_r1.at(i))); EXPECT_TRUE(test::all_close(fused_r.at(i), decomp_r2.at(i))); } } TEST(core_fusion, conv_bias_add) { auto gen_f = [](bool with_fused_op) { auto data = make_shared<op::Parameter>(element::f32, Shape{2, 3, 4, 5}); auto weights = make_shared<op::Parameter>(element::f32, Shape{4, 3, 2, 2}); auto bias = make_shared<op::Parameter>(element::f32, Shape{4}); auto add = make_shared<op::Parameter>(element::f32, Shape{2, 4, 3, 4}); if (with_fused_op) { auto conv_bias = make_shared<op::ConvolutionBias>(data, weights, bias); return make_shared<Function>(make_shared<op::ConvolutionBiasAdd>(conv_bias, add), ParameterVector{data, weights, bias, add}); } else { auto conv = make_shared<op::Convolution>(data, weights); auto conv_bias = conv + make_shared<op::Broadcast>(bias, conv->get_shape(), AxisSet{0, 2, 3}); return make_shared<Function>(conv_bias + add, ParameterVector{data, weights, bias, add}); } }; auto fused_f = gen_f(true); auto decomp_f1 = gen_f(false); auto decomp_f2 = gen_f(false); pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(ngraph::pass::FusionType::ALL_FUSIONS); pass_manager.run_passes(decomp_f1); ASSERT_EQ(count_ops_of_type<op::ConvolutionBiasAdd>(decomp_f1), 1); test::Uniform<float> rng(0.0f, 1.0f); vector<vector<float>> args; for (shared_ptr<op::Parameter> param : fused_f->get_parameters()) { vector<float> tensor_val(shape_size(param->get_shape())); rng.initialize(tensor_val); args.push_back(tensor_val); } auto fused_r = execute(fused_f, args, "INTERPRETER"); auto decomp_r1 = execute(decomp_f1, args, "INTERPRETER"); auto decomp_r2 = execute(decomp_f2, args, "INTERPRETER"); for (size_t i = 0; i < fused_r.size(); i++) { EXPECT_TRUE(test::all_close(fused_r.at(i), decomp_r1.at(i))); EXPECT_TRUE(test::all_close(fused_r.at(i), decomp_r2.at(i))); } } // TODO: Enable once fusion is moved to core TEST(core_fusion, DISABLED_conv_bias_bprop) { auto gen_f = [](bool with_fused_op) { auto data = make_shared<op::Parameter>(element::f32, Shape{2, 3, 4, 5}); auto weights = make_shared<op::Parameter>(element::f32, Shape{4, 3, 2, 2}); auto bias = make_shared<op::Parameter>(element::f32, Shape{4}); auto delta = make_shared<op::Parameter>(element::f32, Shape{2, 4, 3, 4}); if (with_fused_op) { auto conv_bprop = make_shared<op::ConvolutionBiasBackpropFiltersBias>(data, weights->get_shape(), bias->get_shape(), delta, Strides{1, 1}, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1}); auto goe0 = make_shared<op::GetOutputElement>(conv_bprop, 0); auto goe1 = make_shared<op::GetOutputElement>(conv_bprop, 1); return make_shared<Function>(NodeVector{goe0, goe1}, ParameterVector{data, delta}); } else { auto conv_bprop = make_shared<op::ConvolutionBackpropFilters>(data, weights->get_shape(), delta, Strides{1, 1}, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1}); auto bias_bprop = make_shared<op::Sum>(delta, AxisSet{0, 2, 3}); return make_shared<Function>(NodeVector{conv_bprop, bias_bprop}, ParameterVector{data, delta}); } }; auto fused_f = gen_f(true); auto decomp_f1 = gen_f(false); auto decomp_f2 = gen_f(false); pass::Manager pass_manager; pass_manager.register_pass<pass::CoreFusion>(ngraph::pass::FusionType::ALL_FUSIONS); pass_manager.run_passes(decomp_f1); ASSERT_EQ(count_ops_of_type<op::ConvolutionBiasBackpropFiltersBias>(decomp_f1), 1); test::Uniform<float> rng(0.0f, 1.0f); vector<vector<float>> args; for (shared_ptr<op::Parameter> param : fused_f->get_parameters()) { vector<float> tensor_val(shape_size(param->get_shape())); rng.initialize(tensor_val); args.push_back(tensor_val); } auto fused_r = execute(fused_f, args, "INTERPRETER"); auto decomp_r1 = execute(decomp_f1, args, "INTERPRETER"); auto decomp_r2 = execute(decomp_f2, args, "INTERPRETER"); for (size_t i = 0; i < fused_r.size(); i++) { EXPECT_TRUE(test::all_close(fused_r.at(i), decomp_r1.at(i))); EXPECT_TRUE(test::all_close(fused_r.at(i), decomp_r2.at(i))); } } TEST(batch_fusion, group_convolution_fusion) { Shape shape_a{1, 32, 2, 2}; auto A = make_shared<op::Parameter>(element::f32, shape_a); Shape shape_b{2, 16, 1, 1}; auto B = make_shared<op::Parameter>(element::f32, shape_b); Shape shape_r{1, 2, 2, 2}; auto a_slice0 = std::make_shared<op::Slice>(A, Coordinate{0, 0, 0, 0}, Coordinate{1, 16, 2, 2}); auto a_slice1 = std::make_shared<op::Slice>(A, Coordinate{0, 16, 0, 0}, Coordinate{1, 32, 2, 2}); auto b_slice0 = std::make_shared<op::Slice>(B, Coordinate{0, 0, 0, 0}, Coordinate{1, 16, 1, 1}); auto b_slice1 = std::make_shared<op::Slice>(B, Coordinate{1, 0, 0, 0}, Coordinate{2, 16, 1, 1}); auto conv_lower = make_shared<op::Convolution>(a_slice0, b_slice0, Strides{1, 1}, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1}); auto conv_upper = make_shared<op::Convolution>(a_slice1, b_slice1, Strides{1, 1}, Strides{1, 1}, CoordinateDiff{0, 0}, CoordinateDiff{0, 0}, Strides{1, 1}); auto concat = make_shared<op::Concat>(NodeVector{conv_lower, conv_upper}, 1); auto f = make_shared<Function>(NodeVector{concat}, ParameterVector{A, B}); pass::Manager pass_manager; pass_manager.register_pass<pass::BatchFusion>(); pass_manager.run_passes(f); auto gc = as_type_ptr<op::GroupConvolution>(f->get_results().at(0)->get_argument(0)); ASSERT_TRUE(gc); } TEST(core_fusion, pass_property) { auto pass = std::make_shared<ngraph::pass::CoreFusion>(); ASSERT_FALSE(pass->get_property(pass::PassProperty::REQUIRE_STATIC_SHAPE)); ASSERT_FALSE(pass->get_property(pass::PassProperty::CHANGE_DYNAMIC_STATE)); } TEST(batch_fusion, pass_property) { auto pass = std::make_shared<ngraph::pass::BatchFusion>(); ASSERT_TRUE(pass->get_property(pass::PassProperty::REQUIRE_STATIC_SHAPE)); ASSERT_FALSE(pass->get_property(pass::PassProperty::CHANGE_DYNAMIC_STATE)); } #ifndef NGRAPH_JSON_DISABLE TEST(core_fusion, softmax_crossentropy_fprop_1) { const std::string file_name("paddlepaddle/ngraph-paddlepaddle-function3.json"); auto cpu_f = make_function_from_file(file_name); auto int_f = make_function_from_file(file_name); test::Uniform<double> rng(-1.0, 1.0); vector<vector<double>> args; for (shared_ptr<op::Parameter> param : int_f->get_parameters()) { vector<double> tensor_val(shape_size(param->get_shape())); rng.initialize(tensor_val); args.push_back(tensor_val); } auto int_results = execute(int_f, args, "INTERPRETER"); auto cpu_results = execute(cpu_f, args, "CPU"); for (size_t i = 0; i < cpu_results.size(); i++) { EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i))); } // during this optimization for numeric stability we will reduce softmax operation to // - summation (labels (input - max(input) - log (summation(exp ^ (input - max(input))) // count_of(softmax) should be equal to zero if fusion is successful size_t softmax = count_ops_of_type<op::Softmax>(cpu_f); ASSERT_EQ(softmax, 0); } TEST(core_fusion, softmax_crossentropy_fprop_2) { const std::string file_name("paddlepaddle/ngraph-paddlepaddle-function1.json"); auto cpu_f = make_function_from_file(file_name); auto int_f = make_function_from_file(file_name); test::Uniform<double> rng(-1.0, 1.0); vector<vector<double>> args; for (shared_ptr<op::Parameter> param : int_f->get_parameters()) { vector<double> tensor_val(shape_size(param->get_shape())); rng.initialize(tensor_val); args.push_back(tensor_val); } auto int_results = execute(int_f, args, "INTERPRETER"); auto cpu_results = execute(cpu_f, args, "CPU"); for (size_t i = 0; i < cpu_results.size(); i++) { EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i))); } // during this optimization for numeric stability we will reduce softmax operation to // - summation (labels (input - max(input) - log (summation(exp ^ (input - max(input))) // count_of(softmax) should be equal to zero if fusion is successful size_t softmax = count_ops_of_type<op::Softmax>(cpu_f); ASSERT_EQ(softmax, 0); } TEST(core_fusion, softmax_crossentropy_bprop_with_soft_labels) { const std::string file_name("paddlepaddle/ngraph-paddlepaddle-bprop0.json"); auto cpu_f = make_function_from_file(file_name); auto int_f = make_function_from_file(file_name); test::Uniform<double> rng(-1.0, 1.0); vector<vector<double>> args; for (shared_ptr<op::Parameter> param : int_f->get_parameters()) { vector<double> tensor_val(shape_size(param->get_shape())); rng.initialize(tensor_val); args.push_back(tensor_val); } auto int_results = execute(int_f, args, "INTERPRETER"); auto cpu_results = execute(cpu_f, args, "CPU"); for (size_t i = 0; i < cpu_results.size(); i++) { EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i))); } // during this optimization for numeric stability we will eliminate (softmax / softmax) // the number of div operator for cpu_f should be zero if the fusion is valid size_t divide = count_ops_of_type<op::Divide>(cpu_f); ASSERT_EQ(divide, 0); } TEST(core_fusion, softmax_crossentropy_bprop_with_ignore_mask) { const std::string file_name("paddlepaddle/ngraph-paddlepaddle-bprop1.json"); auto cpu_f = make_function_from_file(file_name); auto int_f = make_function_from_file(file_name); test::Uniform<double> rng(-1.0, 1.0); vector<vector<double>> args; for (shared_ptr<op::Parameter> param : int_f->get_parameters()) { vector<double> tensor_val(shape_size(param->get_shape())); rng.initialize(tensor_val); args.push_back(tensor_val); } auto int_results = execute(int_f, args, "INTERPRETER"); auto cpu_results = execute(cpu_f, args, "CPU"); for (size_t i = 0; i < cpu_results.size(); i++) { EXPECT_TRUE(test::all_close(cpu_results.at(i), int_results.at(i))); } // during this optimization for numeric stability we will eliminate (softmax / softmax) // the number of div operator for cpu_f should be zero if the fusion is valid size_t divide = count_ops_of_type<op::Divide>(cpu_f); ASSERT_EQ(divide, 0); } #endif void test_softmax_crossentropy(Shape input_shape, Shape label_shape, bool soft_label, int64_t ignore_index) { auto input = std::make_shared<op::Parameter>(element::f64, input_shape); auto labels = std::make_shared<op::Parameter>(element::i64, label_shape); auto sm_ce = std::make_shared<op::SoftmaxCrossEntropy>(input, labels, soft_label, ignore_index); auto cpu_f = make_shared<Function>(sm_ce, ParameterVector{input, labels}); test::Uniform<double> rng(-1.0, 1.0); vector<vector<double>> args; for (shared_ptr<op::Parameter> param : cpu_f->get_parameters()) { vector<double> tensor_val(shape_size(param->get_shape())); rng.initialize(tensor_val); args.push_back(tensor_val); } auto cpu_results = execute(cpu_f, args, "CPU"); // if softlabels = flase, we will have one one hot encoding for labels if (!soft_label) { size_t onehot = count_ops_of_type<op::OneHot>(cpu_f); ASSERT_EQ(onehot, 1); } if (ignore_index >= 0 && !soft_label) // check for the mask { size_t not_equal = count_ops_of_type<op::NotEqual>(cpu_f); ASSERT_EQ(not_equal, 1); } } TEST(core_fusion, softmax_crossentropy) { test_softmax_crossentropy(Shape{41, 37}, Shape{41, 37}, true, -1); test_softmax_crossentropy(Shape{41, 37}, Shape{41, 1}, false, 5); }