/******************************************************************************* * Copyright 2017-2018 Intel Corporation * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at * * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * See the License for the specific language governing permissions and * limitations under the License. *******************************************************************************/ #include <algorithm> #include <cstdio> #include <iostream> #include <list> #include <memory> #include "gtest/gtest.h" #include "ngraph/graph_util.hpp" #include "ngraph/log.hpp" #include "ngraph/ngraph.hpp" #include "ngraph/ops/batch_norm.hpp" #include "ngraph/ops/sum.hpp" #include "ngraph/pass/graph_rewrite.hpp" #include "ngraph/pass/manager.hpp" #include "ngraph/pattern/matcher.hpp" #include "ngraph/pattern/op/any.hpp" #include "ngraph/pattern/op/label.hpp" // #include "ngraph/file_util.hpp" #include "ngraph/pass/reshape_elimination.hpp" #include "ngraph/pass/visualize_tree.hpp" #include "ngraph/runtime/cpu/ops/matmul_bias.hpp" #include "ngraph/runtime/cpu/pass/cpu_fusion.hpp" #include "ngraph/serializer.hpp" #include "ngraph/util.hpp" #include "nlohmann/json.hpp" #include "util/all_close.hpp" #include "util/matcher.hpp" #include "util/test_tools.hpp" using namespace ngraph; using namespace std; TEST(cpu_fusion, gemm_pattern) { Shape shape_w{2, 4}; Shape shape_x{4, 1}; Shape shape_b{1}; auto A = make_shared<op::Parameter>(element::f32, shape_w); auto B = make_shared<op::Parameter>(element::f32, shape_x); auto C = make_shared<op::Parameter>(element::f32, shape_b); auto dot = make_shared<op::Dot>(A, B); auto broadcast = make_shared<op::Broadcast>(C, dot->get_shape(), AxisSet{0}); auto add = dot + broadcast; auto W = std::make_shared<pattern::op::Label>(A); auto x = std::make_shared<pattern::op::Label>(B); auto reshape_pred = [](std::shared_ptr<Node> n) { return static_cast<bool>(std::dynamic_pointer_cast<op::Reshape>(n)); }; auto skip_w = std::make_shared<pattern::op::Any>(W, reshape_pred); auto skip_x = std::make_shared<pattern::op::Any>(x, reshape_pred); auto pdot = make_shared<op::Dot>(skip_w, skip_x); auto b = std::make_shared<pattern::op::Label>(C); auto pbroadcast = make_shared<op::Broadcast>(b, dot->get_shape(), AxisSet{0}); auto padd = pdot + pbroadcast; TestMatcher n(nullptr); ASSERT_TRUE(n.match(padd, add)); ASSERT_EQ(n.get_pattern_map()[W], A); ASSERT_EQ(n.get_pattern_map()[x], B); ASSERT_EQ(n.get_pattern_map()[b], C); auto reshape_w = make_shared<op::Reshape>(A, AxisVector{1, 0}, W->get_shape()); auto reshape_x = make_shared<op::Reshape>(B, AxisVector{1, 0}, x->get_shape()); auto re_dot = make_shared<op::Dot>(reshape_w, reshape_x); auto re_add = re_dot + broadcast; ASSERT_TRUE(n.match(padd, re_add)); ASSERT_EQ(n.get_pattern_map()[W], A); ASSERT_EQ(n.get_pattern_map()[x], B); ASSERT_EQ(n.get_pattern_map()[b], C); auto cg = make_shared<op::MatmulBias>(W, x, broadcast, W->get_shape(), x->get_shape(), false, false); } TEST(cpu_fusion, gemm_cpu) { Shape shapeA{3, 2}; Shape shapeB{2, 3}; Shape shapeC{2, 2}; auto A = make_shared<op::Parameter>(element::f32, shapeA); auto B = make_shared<op::Parameter>(element::f32, shapeB); auto reshape_w = make_shared<op::Reshape>(A, AxisVector{1, 0}, Shape{2, 3}); auto reshape_x = make_shared<op::Reshape>(B, AxisVector{1, 0}, Shape{3, 2}); auto one = op::Constant::create<float>(element::f32, Shape{}, std::vector<float>{1.0f}); auto broadcast = make_shared<op::Broadcast>(one, shapeC, AxisSet{0, 1}); auto cg = make_shared<op::MatmulBias>(A, B, broadcast, A->get_shape(), B->get_shape(), true, true); auto f = make_shared<Function>(cg, op::ParameterVector{A, B}); auto manager = runtime::Manager::get("CPU"); auto external = manager->compile(f); auto backend = manager->allocate_backend(); auto cf = backend->make_call_frame(external); shared_ptr<runtime::TensorView> a = backend->make_primary_tensor_view(element::f32, shapeA); shared_ptr<runtime::TensorView> b = backend->make_primary_tensor_view(element::f32, shapeB); shared_ptr<runtime::TensorView> result = backend->make_primary_tensor_view(element::f32, shapeC); vector<float> dataA{1.0f, 4.0f, 1.0f, 4.0f, 1.0f, 4.0f}; vector<float> dataB{3.0f, 3.0f, 3.0f, 9.0f, 9.0f, 9.0f}; copy_data(a, dataA); copy_data(b, dataB); cf->call({a, b}, {result}); vector<float> expected{10, 28, 37, 109}; ASSERT_TRUE(read_vector<float>(result) == expected); } TEST(cpu_fusion, cpu_fusion_pass_basic) { Shape shape{}; Shape shape_w{2, 4}; Shape shape_x{4, 1}; Shape shape_b{1}; auto A = make_shared<op::Parameter>(element::f32, shape_w); auto B = make_shared<op::Parameter>(element::f32, shape_x); auto C = make_shared<op::Parameter>(element::f32, shape_b); auto dot = make_shared<op::Dot>(A, B); auto broadcast = make_shared<op::Broadcast>(C, dot->get_shape(), AxisSet{0}); auto add = dot + broadcast; auto graph = make_shared<op::Abs>(add); pass::Manager pass_manager; pass_manager.register_pass<runtime::cpu::pass::CPUFusion>(); auto func = make_shared<Function>(graph, op::ParameterVector{A, B, C}); pass_manager.run_passes(func); ASSERT_NE(std::dynamic_pointer_cast<op::MatmulBias>(graph->get_input_op(0)), nullptr); } TEST(cpu_fusion, gemm_mlp) { const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/mnist_mlp_forward.json"); const string json_string = file_util::read_file_to_string(json_path); stringstream ss(json_string); shared_ptr<Function> func = ngraph::deserialize(ss); pass::Manager pass_manager; pass_manager.register_pass<runtime::cpu::pass::CPUFusion>(); pass_manager.run_passes(func); size_t ccg = count_ops_of_type<op::MatmulBias>(func); ASSERT_EQ(ccg, 3); } //TODO: Move this test to backend_test.in.cpp once we have the INTERPRETER // implementation for batchnorm TEST(cpu_fusion, batchnorm_fprop_b1c2h2w2) { auto input_shape = Shape{1, 2, 2, 2}; auto input = make_shared<op::Parameter>(element::f32, input_shape); auto mean_shape = Shape{2}; auto mean = make_shared<op::Parameter>(element::f32, mean_shape); auto var_shape = Shape{2}; auto var = make_shared<op::Parameter>(element::f32, var_shape); auto gamma_shape = Shape{2}; auto gamma = make_shared<op::Parameter>(element::f32, gamma_shape); auto beta_shape = Shape{2}; auto beta = make_shared<op::Parameter>(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{1, 2, 2, 2}; auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input, mean, var); auto f = make_shared<Function>(bn, op::ParameterVector{mean, var, input, gamma, beta}); auto manager = runtime::Manager::get("CPU"); auto external = manager->compile(f); auto backend = manager->allocate_backend(); auto cf = backend->make_call_frame(external); // Create some tensors for input/output auto _input = backend->make_primary_tensor_view(element::f32, Shape{1, 2, 2, 2}); copy_data(_input, vector<float>{0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.4375872f, 0.89177299f}); auto _mean = backend->make_primary_tensor_view(element::f32, mean_shape); copy_data(_mean, vector<float>{0.60291237f, 0.59972727f}); auto _var = backend->make_primary_tensor_view(element::f32, var_shape); copy_data(_var, vector<float>{0.00472505f, 0.03617825f}); auto _gamma = backend->make_primary_tensor_view(element::f32, gamma_shape); copy_data(_gamma, vector<float>{1.0f, 1.0f}); auto _beta = backend->make_primary_tensor_view(element::f32, beta_shape); copy_data(_beta, vector<float>{0.0f, 0.0f}); auto result = backend->make_primary_tensor_view(element::f32, shape_r); vector<float> expected_result{-0.71498716f, 1.48388731f, -0.00196938f, -0.76693159f, -0.91316032f, 0.23943391f, -0.84090298f, 1.51462936f}; cf->call({_mean, _var, _input, _gamma, _beta}, {result}); EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(result))); } TEST(cpu_fusion, batchnorm_fprop_b2c2h2w1) { auto input_shape = Shape{2, 2, 2, 1}; auto input = make_shared<op::Parameter>(element::f32, input_shape); auto mean_shape = Shape{2}; auto mean = make_shared<op::Parameter>(element::f32, mean_shape); auto var_shape = Shape{2}; auto var = make_shared<op::Parameter>(element::f32, var_shape); auto gamma_shape = Shape{2}; auto gamma = make_shared<op::Parameter>(element::f32, gamma_shape); auto beta_shape = Shape{2}; auto beta = make_shared<op::Parameter>(element::f32, beta_shape); double eps = 0.001; auto shape_r = Shape{2, 2, 2, 1}; auto bn = make_shared<op::BatchNorm>(eps, gamma, beta, input, mean, var); auto f = make_shared<Function>(bn, op::ParameterVector{mean, var, input, gamma, beta}); auto manager = runtime::Manager::get("CPU"); auto external = manager->compile(f); auto backend = manager->allocate_backend(); auto cf = backend->make_call_frame(external); // Create some tensors for input/output auto _input = backend->make_primary_tensor_view(element::f32, Shape{2, 2, 2, 1}); copy_data(_input, vector<float>{0.54881352f, 0.71518934f, 0.60276335f, 0.54488319f, 0.42365479f, 0.64589411f, 0.4375872f, 0.89177299f}); auto _mean = backend->make_primary_tensor_view(element::f32, mean_shape); copy_data(_mean, vector<float>{0.60291237f, 0.59972727f}); auto _var = backend->make_primary_tensor_view(element::f32, var_shape); copy_data(_var, vector<float>{0.00472505f, 0.03617825f}); auto _gamma = backend->make_primary_tensor_view(element::f32, gamma_shape); copy_data(_gamma, vector<float>{1.0f, 1.0f}); auto _beta = backend->make_primary_tensor_view(element::f32, beta_shape); copy_data(_beta, vector<float>{0.0f, 0.0f}); auto result = backend->make_primary_tensor_view(element::f32, shape_r); vector<float> expected_result{ -0.714987f, 1.48389f, 0.015746f, -0.284436f, -2.36912f, 0.56806f, -0.840903f, 1.51463f}; cf->call({_mean, _var, _input, _gamma, _beta}, {result}); EXPECT_TRUE(test::all_close(expected_result, read_vector<float>(result))); } TEST(cpu_fusion, fuse_fprop_bn) { pass::Manager pass_manager; pass_manager.register_pass<pass::VisualizeTree>("bn_fprop_before_fusion.png"); pass_manager.register_pass<ngraph::pass::ReshapeElimination>(); pass_manager.register_pass<runtime::cpu::pass::CPUFusion>(); pass_manager.register_pass<pass::VisualizeTree>("bn_fprop_after_fusion.png"); const string json_path = file_util::path_join(SERIALIZED_ZOO, "mxnet/bn_fprop_b2c3h2w2.json"); const string json_string = file_util::read_file_to_string(json_path); stringstream ss(json_string); shared_ptr<Function> func = ngraph::deserialize(ss); pass_manager.run_passes(func); size_t ccg = count_ops_of_type<op::BatchNorm>(func); ASSERT_EQ(ccg, 1); } class UnhandledOp : public ngraph::op::Abs { public: UnhandledOp(const std::shared_ptr<Node>& arg) : Abs(arg) { } }; TEST(cpu_fusion, unhandled_op) { auto A = make_shared<op::Parameter>(element::f32, Shape{}); auto unhandled = make_shared<UnhandledOp>(A); auto f = make_shared<Function>(unhandled, op::ParameterVector{A}); auto manager = runtime::Manager::get("CPU"); auto backend = manager->allocate_backend(); auto external = manager->compile(f); ASSERT_THROW(backend->make_call_frame(external), ngraph_error); }