distributed.in.cpp 7.34 KB
Newer Older
1
//*****************************************************************************
2
// Copyright 2017-2019 Intel Corporation
3 4 5 6 7 8 9 10 11 12 13 14 15
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//     http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
16 17 18 19 20 21

#include <fstream>
#include <sstream>

#include "gtest/gtest.h"

22
#include "ngraph/distributed.hpp"
23 24 25
#include "ngraph/file_util.hpp"
#include "ngraph/ngraph.hpp"
#include "ngraph/serializer.hpp"
26
#include "util/all_close_f.hpp"
27
#include "util/random.hpp"
28
#include "util/test_control.hpp"
29 30 31 32

using namespace std;
using namespace ngraph;

33 34
static string s_manifest = "${MANIFEST}";

35
static void test_allreduce_common(reduction::Type reduce_type)
36
{
37
    auto comm_size = get_distributed_interface()->get_size();
38 39 40 41
    if (comm_size > 1)
    {
        auto shape = Shape{2, 2};
        auto A = make_shared<op::Parameter>(element::f32, shape);
42 43
        auto f =
            make_shared<Function>(make_shared<op::AllReduce>(A, reduce_type), ParameterVector{A});
44

45
        auto backend = runtime::Backend::create("${BACKEND_NAME}");
46

47 48 49
        auto v = vector<float>{1, 2, 3, 4};
        auto a = backend->create_tensor(element::f32, shape);
        auto result = backend->create_tensor(element::f32, shape);
50

Sang Ik Lee's avatar
Sang Ik Lee committed
51
#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
52 53 54 55
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
56
        switch (reduce_type)
57
        {
58
        case reduction::Type::SUM:
59 60 61 62
            copy_data(a, v);
            std::transform(
                v.begin(), v.end(), v.begin(), std::bind1st(std::multiplies<float>(), comm_size));
            break;
63
        case reduction::Type::PROD:
64 65 66 67 68
            copy_data(a, v);
            std::transform(v.begin(), v.end(), v.begin(), [&](float elm) -> float {
                return pow(elm, comm_size);
            });
            break;
69 70
        case reduction::Type::MIN:
        case reduction::Type::MAX:
71 72 73
            auto shift = get_distributed_interface()->get_rank();
            std::rotate(v.begin(), v.begin() + shift % v.size(), v.end());
            copy_data(a, v);
74
            if (reduce_type == reduction::Type::MIN)
75 76 77 78 79 80 81 82 83 84 85 86
            {
                std::fill(v.begin(), v.end(), 1);
                for (int i = 1; i < static_cast<int>(v.size()) - comm_size + 1; i++)
                    v[i] = i + 1;
            }
            else
            {
                std::fill(v.begin(), v.end(), v.size());
                for (int i = 0; i < static_cast<int>(v.size()) - comm_size; i++)
                    v[i] = i + 2;
            }
        }
Sang Ik Lee's avatar
Sang Ik Lee committed
87
#if defined(__GNUC__) && !(__GNUC__ == 4 && __GNUC_MINOR__ == 8)
88 89
#pragma GCC diagnostic pop
#endif
90

91
        auto handle = backend->compile(f);
92
        handle->call_with_validate({result}, {a});
93
        EXPECT_TRUE(test::all_close_f(v, read_vector<float>(result)));
94
    }
95
}
96

fenglei's avatar
fenglei committed
97
NGRAPH_TEST(${BACKEND_NAME}, allreduce_sum)
98
{
99
    test_allreduce_common(reduction::Type::SUM);
100 101
}

fenglei's avatar
fenglei committed
102
NGRAPH_TEST(${BACKEND_NAME}, allreduce_min)
103
{
104
    test_allreduce_common(reduction::Type::MIN);
105 106
}

fenglei's avatar
fenglei committed
107
NGRAPH_TEST(${BACKEND_NAME}, allreduce_max)
108
{
109
    test_allreduce_common(reduction::Type::MAX);
110 111 112
}

#if !defined(NGRAPH_DISTRIBUTED_MLSL_ENABLE)
fenglei's avatar
fenglei committed
113
NGRAPH_TEST(${BACKEND_NAME}, allreduce_prod)
114
{
115
    test_allreduce_common(reduction::Type::PROD);
116 117 118
}
#endif

fenglei's avatar
fenglei committed
119
NGRAPH_TEST(${BACKEND_NAME}, broadcastdistributed)
120 121 122
{
    auto shape = Shape{2, 2};
    auto A = make_shared<op::Parameter>(element::f32, shape);
123 124 125 126 127
    auto comm_size = get_distributed_interface()->get_size();
    for (int root_id = 0; root_id < comm_size; ++root_id)
    {
        auto f = make_shared<Function>(make_shared<op::BroadcastDistributed>(A, root_id),
                                       ParameterVector{A});
128

129
        auto backend = runtime::Backend::create("${BACKEND_NAME}");
130

131 132 133
        auto v = vector<float>{1, 2, 3, 4};
        auto result = backend->create_tensor(element::f32, shape);
        copy_data(result, vector<float>(4, 0));
134

135 136 137 138 139
        auto processIdx = get_distributed_interface()->get_rank();
        if (processIdx == root_id)
        {
            copy_data(result, v);
        }
140

141 142 143 144
        auto handle = backend->compile(f);
        handle->call_with_validate({result}, {result});
        EXPECT_EQ(v, read_vector<float>(result));
    }
145
}
fenglei's avatar
fenglei committed
146

fenglei's avatar
fenglei committed
147 148
//MLSL does not support send recv
#if !defined(NGRAPH_DISTRIBUTED_MLSL_ENABLE)
fenglei's avatar
fenglei committed
149
NGRAPH_TEST(${BACKEND_NAME}, send_recv)
fenglei's avatar
fenglei committed
150 151 152 153
{
    auto shape = Shape{2, 2};
    auto A = make_shared<op::Parameter>(element::f32, shape);
    auto comm_size = get_distributed_interface()->get_size();
fenglei's avatar
fenglei committed
154
    // this test only works for 2 nodes
fenglei's avatar
fenglei committed
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
    if (comm_size != 2)
    {
        return;
    }
    auto rank = get_distributed_interface()->get_rank();
    std::shared_ptr<Function> f;
    if (rank == 0)
    {
        f = make_shared<Function>(make_shared<op::Send>(A, 1), ParameterVector{A});
    }
    else
    {
        f = make_shared<Function>(make_shared<op::Recv>(A, 0), ParameterVector{A});
    }
    auto backend = runtime::Backend::create("${BACKEND_NAME}");
fenglei's avatar
fenglei committed
170
    auto v = vector<float>{1, 2, 3, 4};
fenglei's avatar
fenglei committed
171 172 173 174 175 176 177 178 179 180 181 182
    auto result = backend->create_tensor(element::f32, shape);
    copy_data(result, vector<float>(4, 0));

    if (rank == 0)
    {
        copy_data(result, v);
    }

    auto handle = backend->compile(f);
    handle->call_with_validate({result}, {result});
    EXPECT_EQ(v, read_vector<float>(result));
}
fenglei's avatar
fenglei committed
183
#endif
fenglei's avatar
fenglei committed
184 185 186 187 188 189 190 191

//MLSL does not support send recv
#if !defined(NGRAPH_DISTRIBUTED_MLSL_ENABLE)
NGRAPH_TEST(${BACKEND_NAME}, send_recv_ring)
{
    auto shape = Shape{2, 2};
    auto A = make_shared<op::Parameter>(element::f32, shape);
    auto comm_size = get_distributed_interface()->get_size();
fenglei's avatar
fenglei committed
192 193 194 195 196 197
    // test only works for at least 2 nodes
    if (comm_size < 2)
    {
        return;
    }

fenglei's avatar
fenglei committed
198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
    auto rank = get_distributed_interface()->get_rank();
    std::shared_ptr<Function> f_send;
    std::shared_ptr<Function> f_recv;
    auto backend = runtime::Backend::create("${BACKEND_NAME}");
    auto v = vector<float>{1, 2, 3, 4};
    auto result = backend->create_tensor(element::f32, shape);
    copy_data(result, vector<float>(4, 0));

    if (rank != 0)
    {
        f_recv = make_shared<Function>(make_shared<op::Recv>(A, rank - 1), ParameterVector{A});
        auto handle = backend->compile(f_recv);
        handle->call_with_validate({result}, {result});
        EXPECT_EQ(v, read_vector<float>(result));
    }
    else
    {
        copy_data(result, v);
    }

fenglei's avatar
fenglei committed
218 219
    f_send =
        make_shared<Function>(make_shared<op::Send>(A, (rank + 1) % comm_size), ParameterVector{A});
220
    backend->compile(f_send)->call_with_validate({result}, {result});
fenglei's avatar
fenglei committed
221 222 223 224

    if (rank == 0)
    {
        f_recv = make_shared<Function>(make_shared<op::Recv>(A, comm_size - 1), ParameterVector{A});
225
        auto handle = backend->compile(f_recv);
fenglei's avatar
fenglei committed
226
        copy_data(result, vector<float>(4, 0));
227
        backend->compile(f_recv)->call_with_validate({result}, {result});
fenglei's avatar
fenglei committed
228 229 230 231
        EXPECT_EQ(v, read_vector<float>(result));
    }
}
#endif