Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
dbf3703a
Commit
dbf3703a
authored
Dec 13, 2018
by
Aleksey Marchuk
Committed by
Scott Cyphers
Dec 13, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Integration of MLSL library (#1520)
parent
18034315
Show whitespace changes
Inline
Side-by-side
Showing
26 changed files
with
199 additions
and
89 deletions
+199
-89
CMakeLists.txt
CMakeLists.txt
+4
-0
external_mlsl.cmake
cmake/external_mlsl.cmake
+70
-0
CMakeLists.txt
doc/examples/mnist_mlp/CMakeLists.txt
+3
-2
dist_mnist_mlp.cpp
doc/examples/mnist_mlp/dist_mnist_mlp.cpp
+1
-1
mnist_loader.cpp
doc/examples/mnist_mlp/mnist_loader.cpp
+2
-2
index.rst
doc/sphinx/source/distr/index.rst
+2
-2
distribute-train.rst
doc/sphinx/source/howto/distribute-train.rst
+6
-5
CMakeLists.txt
src/ngraph/CMakeLists.txt
+3
-3
CMakeLists.txt
src/ngraph/codegen/CMakeLists.txt
+3
-3
compiler.cpp
src/ngraph/codegen/compiler.cpp
+1
-1
distributed.cpp
src/ngraph/distributed.cpp
+12
-14
distributed.hpp
src/ngraph/distributed.hpp
+4
-2
CMakeLists.txt
src/ngraph/runtime/cpu/CMakeLists.txt
+3
-7
allreduce.cpp
src/ngraph/runtime/cpu/builder/allreduce.cpp
+8
-6
cpu_builder.cpp
src/ngraph/runtime/cpu/cpu_builder.cpp
+0
-5
cpu_call_frame.cpp
src/ngraph/runtime/cpu/cpu_call_frame.cpp
+16
-0
cpu_emitter.cpp
src/ngraph/runtime/cpu/cpu_emitter.cpp
+9
-7
cpu_external_function.cpp
src/ngraph/runtime/cpu/cpu_external_function.cpp
+5
-4
cpu_runtime_context.hpp
src/ngraph/runtime/cpu/cpu_runtime_context.hpp
+8
-0
CMakeLists.txt
src/ngraph/runtime/interpreter/CMakeLists.txt
+6
-10
int_backend.hpp
src/ngraph/runtime/interpreter/int_backend.hpp
+1
-1
allreduce.hpp
src/ngraph/runtime/reference/allreduce.hpp
+16
-6
CMakeLists.txt
src/tools/nbench/CMakeLists.txt
+5
-0
nbench.cpp
src/tools/nbench/nbench.cpp
+8
-0
CMakeLists.txt
test/CMakeLists.txt
+1
-4
distributed.in.cpp
test/distributed.in.cpp
+2
-4
No files found.
CMakeLists.txt
View file @
dbf3703a
...
...
@@ -290,6 +290,10 @@ else()
endif
()
include
(
cmake/external_tbb.cmake
)
if
(
NGRAPH_DISTRIBUTED_ENABLE
)
include
(
cmake/external_mlsl.cmake
)
endif
()
if
(
NGRAPH_HALIDE
)
message
(
WARNING
"Halide build system integration is currently using an older LLVM release \
...
...
cmake/external_mlsl.cmake
0 → 100755
View file @
dbf3703a
# ******************************************************************************
# Copyright 2017-2018 Intel Corporation
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ******************************************************************************
# Enable ExternalProject CMake module
include
(
ExternalProject
)
#------------------------------------------------------------------------------
# Download MLSL
#------------------------------------------------------------------------------
set
(
MLSL_GIT_URL https://github.com/intel/MLSL
)
set
(
MLSL_GIT_TAG d1bcc74cccdd86cae8841dab67723c811ddbd592
)
find_program
(
MAKE_EXE NAMES gmake nmake make
)
ExternalProject_Add
(
MLSL
PREFIX MLSL
GIT_REPOSITORY
${
MLSL_GIT_URL
}
GIT_TAG
${
MLSL_GIT_TAG
}
UPDATE_COMMAND
""
CONFIGURE_COMMAND
""
BUILD_COMMAND
${
MAKE_EXE
}
-j 1 ENABLE_INTERNAL_ENV_UPDATE=1
INSTALL_COMMAND
${
MAKE_EXE
}
install PREFIX=
${
EXTERNAL_PROJECTS_ROOT
}
/MLSL/install
BUILD_IN_SOURCE TRUE
TMP_DIR
"
${
EXTERNAL_PROJECTS_ROOT
}
/MLSL/tmp"
STAMP_DIR
"
${
EXTERNAL_PROJECTS_ROOT
}
/MLSL/stamp"
SOURCE_DIR
"
${
EXTERNAL_PROJECTS_ROOT
}
/MLSL/src"
INSTALL_DIR
"
${
EXTERNAL_PROJECTS_ROOT
}
/MLSL/install"
EXCLUDE_FROM_ALL TRUE
)
ExternalProject_Get_Property
(
MLSL SOURCE_DIR
)
ExternalProject_Get_Property
(
MLSL INSTALL_DIR
)
add_library
(
libmlsl INTERFACE
)
target_include_directories
(
libmlsl SYSTEM INTERFACE
${
SOURCE_DIR
}
/include
)
target_link_libraries
(
libmlsl INTERFACE
${
INSTALL_DIR
}
/intel64/lib/thread/libmlsl.so
)
link_directories
(
${
INSTALL_DIR
}
/intel64/lib/thread
)
add_dependencies
(
libmlsl MLSL
)
#installation
#mlsl & mpi & fabric libraries
install
(
DIRECTORY
"
${
INSTALL_DIR
}
/intel64/lib/thread/"
DESTINATION
${
NGRAPH_INSTALL_LIB
}
)
#install mpi binaries
install
(
DIRECTORY
"
${
INSTALL_DIR
}
/intel64/bin/thread/"
USE_SOURCE_PERMISSIONS
DESTINATION
${
NGRAPH_INSTALL_BIN
}
)
#install mpi tunning data
install
(
DIRECTORY
"
${
INSTALL_DIR
}
/intel64/etc/"
DESTINATION
${
CMAKE_INSTALL_PREFIX
}
/etc
)
#mlsl header
install
(
FILES
${
SOURCE_DIR
}
/include/mlsl.hpp
DESTINATION
${
NGRAPH_INSTALL_INCLUDE
}
/ngraph
)
doc/examples/mnist_mlp/CMakeLists.txt
View file @
dbf3703a
...
...
@@ -19,6 +19,7 @@ add_dependencies(mnist_mlp ngraph cpu_backend)
target_link_libraries
(
mnist_mlp ngraph cpu_backend
)
if
(
NGRAPH_DISTRIBUTED_ENABLE
)
add_executable
(
dist_mnist_mlp mnist_loader.cpp dist_mnist_mlp.cpp
)
add_dependencies
(
dist_mnist_mlp ngraph cpu_backend
)
target_link_libraries
(
dist_mnist_mlp ngraph cpu_backend
)
target_compile_definitions
(
dist_mnist_mlp PRIVATE NGRAPH_DISTRIBUTED
)
target_include_directories
(
dist_mnist_mlp SYSTEM PRIVATE libmlsl
)
target_link_libraries
(
dist_mnist_mlp ngraph cpu_backend libmlsl
)
endif
()
doc/examples/mnist_mlp/dist_mnist_mlp.cpp
View file @
dbf3703a
...
...
@@ -106,7 +106,7 @@ float test_accuracy(MNistDataLoader& loader,
static_cast
<
float
>
(
sample_count
);
}
int
main
(
int
argc
,
c
onst
c
har
*
argv
[])
int
main
(
int
argc
,
char
*
argv
[])
{
ngraph
::
Distributed
dist
;
...
...
doc/examples/mnist_mlp/mnist_loader.cpp
View file @
dbf3703a
...
...
@@ -83,8 +83,8 @@ void MNistLoader::open()
m_file
=
fopen
(
m_filename
.
c_str
(),
"rb"
);
if
(
m_file
==
nullptr
)
{
throw
std
::
runtime_error
(
std
::
string
(
"File"
)
+
m_filename
+
"couldn't be opened. Make sure the file "
throw
std
::
runtime_error
(
std
::
string
(
"File
"
)
+
m_filename
+
"
couldn't be opened. Make sure the file "
"exists in the current directory"
);
}
read_header
();
...
...
doc/sphinx/source/distr/index.rst
View file @
dbf3703a
...
...
@@ -28,7 +28,7 @@ How? (Generic frameworks)
To synchronize gradients across all workers, the essential operation for data
parallel training, due to its simplicity and scalability over parameter servers,
is
“allreduce”
. The AllReduce op is one of the nGraph Library’s core ops. To
is
``allreduce``
. The AllReduce op is one of the nGraph Library’s core ops. To
enable gradient synchronization for a network, we simply inject the AllReduce op
into the computation graph, connecting the graph for the autodiff computation
and optimizer update (which then becomes part of the nGraph graph). The
...
...
@@ -64,7 +64,7 @@ MXNet
We implemented a KVStore in MXNet\* (KVStore is unique to MXNet) to modify
the SGD update op so the nGraph graph will contain the allreduce op and generate
corresponding collective communication kernels for different backends. We are
using
OpenMPI for CPU backends and plan to integrate `Intel MLSL`_ in future.
using
`Intel MLSL`_ for CPU backends.
The figure below shows a bar chart with preliminary results from a Resnet-50
I1K training in MXNet 1, 2, 4, (and 8 if available) nodes, x-axis is the number
...
...
doc/sphinx/source/howto/distribute-train.rst
View file @
dbf3703a
...
...
@@ -8,10 +8,10 @@ In the :doc:`previous section <../howto/derive-for-training>`, we described the
steps needed to create a "trainable" nGraph model. Here we demonstrate how to
train a data parallel model by distributing the graph across devices.
To use this mode of training, first install a supported version of `OpenMPI`_
(1.10 or newer).
Next, create an nGraph build with the cmake flag ``-DNGRAPH_DISTRIBUTED_ENABLE=TRUE``.
To use this mode of training, create an nGraph build with the cmake flag
``-DNGRAPH_DISTRIBUTED_ENABLE=TRUE``.
To deploy data-parallel training on backends supported by nGraph API, the
``AllReduce`` op should be added after the steps needed to complete the
...
...
@@ -25,7 +25,8 @@ To deploy data-parallel training on backends supported by nGraph API, the
We need to initialize and finalize distributed training with ``Distributed`` object;
see the `full raw code`_.
Finally, to run the training using two nGraph devices, invoke :command:`mpirun`.
Finally, to run the training using two nGraph devices, invoke :command:`mpirun` which is a distributed with
`Intel MLSL`_ library.
This will launch two nGraph CPU backends.
...
...
@@ -34,5 +35,5 @@ This will launch two nGraph CPU backends.
$ mpirun -np 2 dist_mnist_mlp
.. _
OpenMPI: https://www.open-mpi.org/software/ompi/v3.1
.. _
Intel MLSL: https://github.com/intel/MLSL/releases
.. _full raw code: https://github.com/NervanaSystems/ngraph/blob/master/doc/examples/mnist_mlp/dist_mnist_mlp.cpp
src/ngraph/CMakeLists.txt
View file @
dbf3703a
...
...
@@ -181,10 +181,10 @@ endif()
add_library
(
ngraph SHARED
${
SRC
}
)
if
(
NGRAPH_DISTRIBUTED_ENABLE
)
find_package
(
MPI REQUIRED
)
target_sources
(
ngraph PRIVATE distributed.cpp
)
target_compile_definitions
(
ngraph PRIVATE NGRAPH_DISTRIBUTED
)
target_include_directories
(
ngraph SYSTEM PRIVATE
${
MPI_C_INCLUDE_PATH
}
${
MPI_CXX_INCLUDE_PATH
}
)
target_link_libraries
(
ngraph PRIVATE
${
MPI_C_LIBRARIES
}
${
MPI_CXX_LIBRARIES
}
)
target_include_directories
(
ngraph SYSTEM PRIVATE
libmlsl
)
target_link_libraries
(
ngraph PRIVATE
libmlsl
)
endif
()
add_subdirectory
(
frontend
)
...
...
src/ngraph/codegen/CMakeLists.txt
View file @
dbf3703a
...
...
@@ -48,9 +48,9 @@ if (NGRAPH_GPU_ENABLE OR (NGRAPH_CPU_ENABLE AND NOT NGRAPH_DEX_ONLY))
list
(
APPEND HEADER_SEARCH_DEFINES NGRAPH_HEADERS_PATH=
"
${
NGRAPH_INCLUDE_PATH
}
"
)
if
(
NGRAPH_DISTRIBUTED_ENABLE
)
find_package
(
MPI REQUIRED
)
target_compile_definitions
(
codegen PRIVATE NGRAPH_DISTRIBUTED
)
add_definitions
(
-D
MPI_HEADER_PATH=
"
${
MPI_PATH
}
"
)
get_target_property
(
MLSL_INCLUDE_DIR libmlsl INTERFACE_INCLUDE_DIRECTORIES
)
list
(
APPEND HEADER_SEARCH_DEFINES MLSL_HEADER_PATH=
"
${
MLSL_INCLUDE_DIR
}
"
)
add_definitions
(
-D
NGRAPH_DISTRIBUTED
)
endif
()
if
(
NGRAPH_GPU_ENABLE
)
...
...
src/ngraph/codegen/compiler.cpp
View file @
dbf3703a
...
...
@@ -473,7 +473,7 @@ void codegen::CompilerCore::configure_search_path()
#endif
#ifdef NGRAPH_DISTRIBUTED
add_header_search_path
(
M
PI
_HEADER_PATH
);
add_header_search_path
(
M
LSL
_HEADER_PATH
);
#endif
}
...
...
src/ngraph/distributed.cpp
View file @
dbf3703a
...
...
@@ -16,37 +16,35 @@
#ifdef NGRAPH_DISTRIBUTED
#include <mlsl.hpp>
#include "ngraph/distributed.hpp"
#include <mpi.h>
using
namespace
ngraph
;
ngraph
::
Distributed
::
Distributed
()
{
int
flag
=
0
;
MPI_Initialized
(
&
flag
);
if
(
!
flag
)
if
(
!
MLSL
::
Environment
::
GetEnv
().
IsInitialized
())
{
M
PI_Init
(
NULL
,
NULL
);
M
LSL
::
Environment
::
GetEnv
().
Init
(
nullptr
,
nullptr
);
}
}
ngraph
::
Distributed
::~
Distributed
()
{
MPI_Finalize
();
if
(
MLSL
::
Environment
::
GetEnv
().
IsInitialized
())
{
MLSL
::
Environment
::
GetEnv
().
Finalize
();
}
}
in
t
ngraph
::
Distributed
::
get_size
()
const
size_
t
ngraph
::
Distributed
::
get_size
()
const
{
int
size
;
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
size
);
return
size
;
return
MLSL
::
Environment
::
GetEnv
().
GetProcessCount
();
}
in
t
ngraph
::
Distributed
::
get_rank
()
const
size_
t
ngraph
::
Distributed
::
get_rank
()
const
{
int
rank
;
MPI_Comm_rank
(
MPI_COMM_WORLD
,
&
rank
);
return
rank
;
return
MLSL
::
Environment
::
GetEnv
().
GetProcessIdx
();
}
#endif
src/ngraph/distributed.hpp
View file @
dbf3703a
...
...
@@ -16,6 +16,8 @@
#pragma once
#include <cstddef>
namespace
ngraph
{
class
Distributed
...
...
@@ -23,7 +25,7 @@ namespace ngraph
public
:
Distributed
();
~
Distributed
();
in
t
get_size
()
const
;
in
t
get_rank
()
const
;
size_
t
get_size
()
const
;
size_
t
get_rank
()
const
;
};
}
src/ngraph/runtime/cpu/CMakeLists.txt
View file @
dbf3703a
...
...
@@ -190,13 +190,9 @@ if (NGRAPH_CPU_ENABLE)
endif
()
if
(
NGRAPH_DISTRIBUTED_ENABLE
)
find_package
(
MPI REQUIRED
)
target_compile_definitions
(
cpu_backend
PRIVATE NGRAPH_DISTRIBUTED
)
target_include_directories
(
cpu_backend
SYSTEM PRIVATE
${
MPI_C_INCLUDE_PATH
}
${
MPI_CXX_INCLUDE_PATH
}
)
target_link_libraries
(
cpu_backend
PRIVATE
${
MPI_C_LIBRARIES
}
${
MPI_CXX_LIBRARIES
}
)
target_compile_definitions
(
cpu_backend PRIVATE NGRAPH_DISTRIBUTED
)
target_include_directories
(
cpu_backend SYSTEM PRIVATE libmlsl
)
target_link_libraries
(
cpu_backend PRIVATE libmlsl
)
endif
()
add_dependencies
(
cpu_backend ext_mkldnn ext_eigen
)
...
...
src/ngraph/runtime/cpu/builder/allreduce.cpp
View file @
dbf3703a
...
...
@@ -15,8 +15,9 @@
//*****************************************************************************
#ifdef NGRAPH_DISTRIBUTED
#include <mlsl.hpp>
#include "ngraph/op/allreduce.hpp"
#include <mpi.h>
#include "ngraph/runtime/cpu/cpu_builder.hpp"
using
namespace
std
;
...
...
@@ -36,21 +37,22 @@ namespace ngraph
auto
&
arg_tensor
=
external_function
->
get_tensor_data
(
args
[
0
].
get_name
());
auto
&
out_tensor
=
external_function
->
get_tensor_data
(
out
[
0
].
get_name
());
auto
count
=
static_cast
<
int
>
(
out
[
0
].
get_size
());
auto
data_type
=
M
PI
_FLOAT
;
auto
data_type
=
M
LSL
::
DT
_FLOAT
;
if
(
args
[
0
].
get_element_type
()
==
element
::
f32
)
{
data_type
=
M
PI
_FLOAT
;
data_type
=
M
LSL
::
DT
_FLOAT
;
}
else
if
(
args
[
0
].
get_element_type
()
==
element
::
f64
)
{
data_type
=
M
PI
_DOUBLE
;
data_type
=
M
LSL
::
DT
_DOUBLE
;
}
auto
functor
=
[
&
,
count
,
data_type
](
CPURuntimeContext
*
ctx
,
CPUExecutionContext
*
ectx
)
{
MPI_Allreduce
(
arg_tensor
,
out_tensor
,
count
,
data_type
,
MPI_SUM
,
MPI_COMM_WORLD
);
MLSL
::
CommReq
*
req
=
ctx
->
mlsl_dist
->
AllReduce
(
arg_tensor
,
out_tensor
,
count
,
data_type
,
MLSL
::
RT_SUM
,
MLSL
::
GT_DATA
);
ctx
->
mlsl_env
->
Wait
(
req
);
};
functors
.
emplace_back
(
functor
);
...
...
src/ngraph/runtime/cpu/cpu_builder.cpp
View file @
dbf3703a
...
...
@@ -103,11 +103,6 @@
#include "ngraph/type/element_type.hpp"
#include "ngraph/util.hpp"
#ifdef NGRAPH_DISTRIBUTED
#include <mpi.h>
#include "ngraph/op/allreduce.hpp"
#endif
using
namespace
std
;
using
namespace
ngraph
;
...
...
src/ngraph/runtime/cpu/cpu_call_frame.cpp
View file @
dbf3703a
...
...
@@ -22,6 +22,10 @@
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/cpu/cpu_tracing.hpp"
#ifdef NGRAPH_DISTRIBUTED
#include <mlsl.hpp>
#endif
using
namespace
std
;
using
namespace
ngraph
;
...
...
@@ -139,6 +143,12 @@ void runtime::cpu::CPU_CallFrame::setup_runtime_context()
const
auto
parallelism
=
envParallelism
==
nullptr
?
1
:
std
::
atoi
(
envParallelism
);
ctx
->
c
=
new
tbb
::
global_control
(
tbb
::
global_control
::
max_allowed_parallelism
,
parallelism
);
}
#ifdef NGRAPH_DISTRIBUTED
NGRAPH_ASSERT
(
MLSL
::
Environment
::
GetEnv
().
IsInitialized
());
ctx
->
mlsl_env
=
&
MLSL
::
Environment
::
GetEnv
();
ctx
->
mlsl_dist
=
ctx
->
mlsl_env
->
CreateDistribution
(
ctx
->
mlsl_env
->
GetProcessCount
(),
1
);
#endif
}
void
runtime
::
cpu
::
CPU_CallFrame
::
cleanup_runtime_context
()
...
...
@@ -165,5 +175,11 @@ void runtime::cpu::CPU_CallFrame::cleanup_runtime_context()
}
delete
ctx
->
c
;
}
#ifdef NGRAPH_DISTRIBUTED
if
(
MLSL
::
Environment
::
GetEnv
().
IsInitialized
()
&&
ctx
->
mlsl_dist
!=
nullptr
)
{
ctx
->
mlsl_env
->
DeleteDistribution
(
ctx
->
mlsl_dist
);
}
#endif
delete
ctx
;
}
src/ngraph/runtime/cpu/cpu_emitter.cpp
View file @
dbf3703a
...
...
@@ -126,7 +126,8 @@
#include "ngraph/util.hpp"
#ifdef NGRAPH_DISTRIBUTED
#include <mpi.h>
#include <mlsl.hpp>
#include "ngraph/op/allreduce.hpp"
#endif
...
...
@@ -221,21 +222,22 @@ namespace ngraph
void
CPU_Emitter
::
EMITTER_DECL
(
ngraph
::
op
::
AllReduce
)
{
const
element
::
Type
&
element_type
=
args
[
0
].
get_element_type
();
auto
data_type
=
"M
PI
_FLOAT"
;
auto
data_type
=
"M
LSL::DT
_FLOAT"
;
if
(
element_type
==
element
::
f32
)
{
data_type
=
"M
PI
_FLOAT"
;
data_type
=
"M
LSL::DT
_FLOAT"
;
}
else
if
(
element_type
==
element
::
f64
)
{
data_type
=
"M
PI
_DOUBLE"
;
data_type
=
"M
LSL::DT
_DOUBLE"
;
}
writer
.
block_begin
();
writer
<<
"MPI_Allreduce("
<<
args
[
0
].
get_name
()
<<
", "
<<
out
[
0
].
get_name
()
<<
", "
<<
out
[
0
].
get_size
()
<<
", "
<<
data_type
<<
", MPI_SUM, MPI_COMM_WORLD);
\n
"
;
writer
<<
"MLSL::CommReq* req = ctx->mlsl_dist->AllReduce("
<<
args
[
0
].
get_name
()
<<
", "
<<
out
[
0
].
get_name
()
<<
", "
<<
out
[
0
].
get_size
()
<<
", "
<<
data_type
<<
", MLSL::RT_SUM, MLSL::GT_DATA);
\n
"
;
writer
<<
"ctx->mlsl_env->Wait(req);
\n
"
;
writer
.
block_end
();
}
#endif
...
...
src/ngraph/runtime/cpu/cpu_external_function.cpp
View file @
dbf3703a
...
...
@@ -476,6 +476,11 @@ void runtime::cpu::CPU_ExternalFunction::compile()
writer
<<
"#include <tbb/flow_graph.h>"
;
}
#ifdef NGRAPH_DISTRIBUTED
writer
<<
"#include <mlsl.hpp>
\n
"
;
writer
<<
"#define NGRAPH_DISTRIBUTED
\n
"
;
#endif
writer
+=
R"(
#include <cmath>
...
...
@@ -529,10 +534,6 @@ using namespace ngraph::runtime;
)"
;
#ifdef NGRAPH_DISTRIBUTED
writer
<<
"#include <mpi.h>
\n\n
"
;
#endif
string
pch_header_source
=
writer
.
get_code
();
// The "dso_handle" symbol is required by __cxa_atexit()
...
...
src/ngraph/runtime/cpu/cpu_runtime_context.hpp
View file @
dbf3703a
...
...
@@ -26,6 +26,10 @@
#include <tbb/global_control.h>
#include <tbb/task_scheduler_init.h>
#ifdef NGRAPH_DISTRIBUTED
#include <mlsl.hpp>
#endif
namespace
mkldnn
{
class
primitive
;
...
...
@@ -65,6 +69,10 @@ namespace ngraph
State
*
const
*
states
;
std
::
set
<
size_t
>
breakpoints
;
size_t
pc
;
#ifdef NGRAPH_DISTRIBUTED
MLSL
::
Environment
*
mlsl_env
;
MLSL
::
Distribution
*
mlsl_dist
;
#endif
};
}
...
...
src/ngraph/runtime/interpreter/CMakeLists.txt
View file @
dbf3703a
...
...
@@ -24,18 +24,14 @@ if (NGRAPH_INTERPRETER_ENABLE)
target_link_libraries
(
interpreter_backend PUBLIC ngraph
)
set_target_properties
(
interpreter_backend PROPERTIES LIBRARY_OUTPUT_DIRECTORY
${
NGRAPH_BUILD_DIR
}
)
if
(
NGRAPH_DISTRIBUTED_ENABLE
)
target_compile_definitions
(
interpreter_backend PRIVATE NGRAPH_DISTRIBUTED
)
target_include_directories
(
interpreter_backend SYSTEM PRIVATE libmlsl
)
target_link_libraries
(
interpreter_backend PRIVATE libmlsl
)
endif
()
install
(
TARGETS interpreter_backend
LIBRARY DESTINATION
"
${
NGRAPH_INSTALL_LIB
}
"
ARCHIVE DESTINATION
"
${
NGRAPH_INSTALL_LIB
}
"
)
if
(
NGRAPH_DISTRIBUTED_ENABLE
)
find_package
(
MPI REQUIRED
)
target_compile_definitions
(
interpreter_backend
PRIVATE NGRAPH_DISTRIBUTED
)
target_include_directories
(
interpreter_backend
SYSTEM PRIVATE
${
MPI_C_INCLUDE_PATH
}
${
MPI_CXX_INCLUDE_PATH
}
)
target_link_libraries
(
interpreter_backend
PRIVATE
${
MPI_C_LIBRARIES
}
${
MPI_CXX_LIBRARIES
}
)
endif
()
endif
()
src/ngraph/runtime/interpreter/int_backend.hpp
View file @
dbf3703a
...
...
@@ -254,7 +254,7 @@ private:
}
case
OP_TYPEID
:
:
AllReduce
:
{
#ifdef NGRAPH_DISTRIBUTED
reference
::
allreduce
<
T
>
(
static_cast
<
const
T
*>
(
args
[
0
]
),
reference
::
allreduce
<
T
>
(
static_cast
<
T
*>
(
const_cast
<
void
*>
(
args
[
0
])
),
static_cast
<
T
*>
(
out
[
0
]),
node
.
get_input_element_type
(
0
),
static_cast
<
int
>
(
shape_size
(
node
.
get_input_shape
(
0
))));
...
...
src/ngraph/runtime/reference/allreduce.hpp
View file @
dbf3703a
...
...
@@ -18,7 +18,8 @@
#ifdef NGRAPH_DISTRIBUTED
#include <mpi.h>
#include <mlsl.hpp>
#include "ngraph/type/element_type.hpp"
namespace
ngraph
...
...
@@ -28,20 +29,29 @@ namespace ngraph
namespace
reference
{
template
<
typename
T
>
void
allreduce
(
const
T
*
arg
,
T
*
out
,
const
element
::
Type
element_type
,
int
count
)
void
allreduce
(
T
*
arg
,
T
*
out
,
const
element
::
Type
element_type
,
int
count
)
{
auto
data_type
=
M
PI
_FLOAT
;
auto
data_type
=
M
LSL
::
DT
_FLOAT
;
if
(
element_type
==
element
::
f32
)
{
data_type
=
M
PI
_FLOAT
;
data_type
=
M
LSL
::
DT
_FLOAT
;
}
else
if
(
element_type
==
element
::
f64
)
{
data_type
=
MPI_DOUBLE
;
data_type
=
MLSL
::
DT_DOUBLE
;
}
else
{
throw
std
::
runtime_error
(
"AllReduce op supports only f32 and f64 types"
);
}
MPI_Allreduce
(
arg
,
out
,
count
,
data_type
,
MPI_SUM
,
MPI_COMM_WORLD
);
MLSL
::
Environment
&
env
=
MLSL
::
Environment
::
GetEnv
();
MLSL
::
Distribution
*
distribution
=
env
.
CreateDistribution
(
env
.
GetProcessCount
(),
1
);
MLSL
::
CommReq
*
req
=
distribution
->
AllReduce
(
arg
,
out
,
count
,
data_type
,
MLSL
::
RT_SUM
,
MLSL
::
GT_DATA
);
env
.
Wait
(
req
);
env
.
DeleteDistribution
(
distribution
);
}
}
}
...
...
src/tools/nbench/CMakeLists.txt
View file @
dbf3703a
...
...
@@ -46,4 +46,9 @@ if (NGRAPH_PLAIDML_ENABLE)
target_link_libraries
(
nbench plaidml_backend
)
endif
()
if
(
NGRAPH_DISTRIBUTED_ENABLE
)
target_compile_definitions
(
nbench PRIVATE NGRAPH_DISTRIBUTED
)
target_link_libraries
(
nbench libmlsl
)
endif
()
install
(
TARGETS nbench RUNTIME DESTINATION
${
NGRAPH_INSTALL_BIN
}
)
src/tools/nbench/nbench.cpp
View file @
dbf3703a
...
...
@@ -33,6 +33,10 @@
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
#ifdef NGRAPH_DISTRIBUTED
#include "ngraph/distributed.hpp"
#endif
using
namespace
std
;
using
namespace
ngraph
;
...
...
@@ -294,6 +298,10 @@ OPTIONS
return
1
;
}
#ifdef NGRAPH_DISTRIBUTED
ngraph
::
Distributed
dist
;
#endif
vector
<
string
>
models
;
if
(
!
directory
.
empty
())
{
...
...
test/CMakeLists.txt
View file @
dbf3703a
...
...
@@ -178,11 +178,8 @@ if(NGRAPH_ADDRESS_SANITIZER)
endif
()
if
(
NGRAPH_DISTRIBUTED_ENABLE
)
find_package
(
MPI REQUIRED
)
target_compile_definitions
(
unit-test PRIVATE NGRAPH_DISTRIBUTED
)
target_include_directories
(
unit-test
SYSTEM PRIVATE
${
MPI_C_INCLUDE_PATH
}
${
MPI_CXX_INCLUDE_PATH
}
)
target_link_libraries
(
unit-test PRIVATE
${
MPI_C_LIBRARIES
}
${
MPI_CXX_LIBRARIES
}
)
target_link_libraries
(
unit-test PRIVATE libmlsl
)
endif
()
target_link_libraries
(
unit-test PRIVATE ngraph_test_util
)
...
...
test/distributed.in.cpp
View file @
dbf3703a
...
...
@@ -17,7 +17,7 @@
#include <fstream>
#include <sstream>
#include <m
pi.h
>
#include <m
lsl.hpp
>
#include "gtest/gtest.h"
...
...
@@ -36,9 +36,7 @@ TEST(distributed_${BACKEND_NAME}, allreduce)
auto
f
=
make_shared
<
Function
>
(
make_shared
<
op
::
AllReduce
>
(
A
),
ParameterVector
{
A
});
auto
backend
=
runtime
::
Backend
::
create
(
"${BACKEND_NAME}"
);
int
comm_size
;
MPI_Comm_size
(
MPI_COMM_WORLD
,
&
comm_size
);
auto
comm_size
=
MLSL
::
Environment
::
GetEnv
().
GetProcessCount
();
auto
v
=
vector
<
float
>
{
1
,
2
,
3
,
4
};
auto
a
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment