From e2e60722d8aca11168b82b7ee6307a1b48df4a1c Mon Sep 17 00:00:00 2001 From: Sandeep <sandeep.aswath.narayana@intel.com> Date: Thu, 7 Mar 2019 13:02:43 -0800 Subject: [PATCH] limit distributed user option to one flag (#2512) * limit user option to one flag * add space for if * check if intel cpu only backend * cosmetic * make it clear * Align to look pretty * docs update --- CMakeLists.txt | 45 ++++++++++++------- doc/sphinx/source/buildlb.rst | 6 +-- .../constructing-graphs/distribute-train.rst | 4 +- 3 files changed, 32 insertions(+), 23 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 3b415d53e..891824cf9 100755 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -144,23 +144,37 @@ option(NGRAPH_DEX_ONLY "Build CPU DEX without codegen" FALSE) option(NGRAPH_CODE_COVERAGE_ENABLE "Enable code coverage data collection" FALSE) option(NGRAPH_LIB_VERSIONING_ENABLE "Enable shared library versioning" FALSE) option(NGRAPH_PYTHON_BUILD_ENABLE "Enable build nGraph python package wheel" FALSE) -option(NGRAPH_DISTRIBUTED_MLSL_ENABLE "Add distributed MLSL mode for CPU only backend" FALSE) -option(NGRAPH_DISTRIBUTED_OMPI_ENABLE "Add distributed Open-MPI mode for all backend" FALSE) option(NGRAPH_PLAIDML_ENABLE "Enable the PlaidML backend" ${PLAIDML_FOUND}) +option(NGRAPH_DISTRIBUTED_ENABLE "Enable distributed training using MLSL/OpenMPI" OFF) -if (NGRAPH_GPUH_ENABLE) - set(NGRAPH_GPU_ENABLE TRUE) +if (NGRAPH_CPU_ENABLE + AND + ((NOT NGRAPH_GPU_ENABLE) AND (NOT NGRAPH_GPUH_ENABLE) + AND (NOT NGRAPH_GENERIC_CPU_ENABLE) AND (NOT NGRAPH_INTELGPU_ENABLE)) + ) + set(NGRAPH_INTEL_CPU_ONLY_ENABLE ON) endif() -if (NGRAPH_DISTRIBUTED_MLSL_ENABLE AND NGRAPH_DISTRIBUTED_OMPI_ENABLE) - message(FATAL_ERROR - "Does not support the use of two distributed libraries simultaneously.\n" - "If CPU only backend recommend Intel MLSL by setting NGRAPH_DISTRIBUTED_MLSL_ENABLE flag to true.\n" - "For all other backends use OpenMPI by setting NGRAPH_DISTRIBUTED_OMPI_ENABLE flag to true.\n") -elseif(NGRAPH_DISTRIBUTED_MLSL_ENABLE OR NGRAPH_DISTRIBUTED_OMPI_ENABLE) - set(NGRAPH_DISTRIBUTED_ENABLE TRUE) -else() - set(NGRAPH_DISTRIBUTED_ENABLE FALSE) +if (NGRAPH_DISTRIBUTED_ENABLE) + if ("${NGRAPH_DISTRIBUTED_ENABLE}" STREQUAL "MLSL") + if (NGRAPH_INTEL_CPU_ONLY_ENABLE) + set(NGRAPH_DISTRIBUTED_MLSL_ENABLE TRUE) + else() + message(FATAL_ERROR + "-DNGRAPH_DISTRIBUTED_ENABLE=MLSL to be used, if Intel CPU is the only backend enabled.\n" + "Use -DNGRAPH_DISTRIBUTED_ENABLE=OMPI for all other situations.\n") + endif() + elseif("${NGRAPH_DISTRIBUTED_ENABLE}" STREQUAL "OMPI") + set(NGRAPH_DISTRIBUTED_OMPI_ENABLE TRUE) + else() + message(FATAL_ERROR + "Invalid arguments passed to NGRAPH_DISTRIBUTED_ENABLE, must select one of MLSL, OMPI or OFF.\n" + "If using Intel CPU only backend, recommend Intel MLSL by setting -DNGRAPH_DISTRIBUTED_ENABLE=MLSL .\n") + endif() +endif() + +if (NGRAPH_GPUH_ENABLE) + set(NGRAPH_GPU_ENABLE TRUE) endif() if (NGRAPH_ONNX_IMPORT_ENABLE) @@ -185,8 +199,6 @@ NORMALIZE_BOOL(NGRAPH_INTERPRETER_ENABLE) NORMALIZE_BOOL(NGRAPH_NOP_ENABLE) NORMALIZE_BOOL(NGRAPH_GPUH_ENABLE) NORMALIZE_BOOL(NGRAPH_GENERIC_CPU_ENABLE) -NORMALIZE_BOOL(NGRAPH_DISTRIBUTED_MLSL_ENABLE) -NORMALIZE_BOOL(NGRAPH_DISTRIBUTED_OMPI_ENABLE) NORMALIZE_BOOL(NGRAPH_DEBUG_ENABLE) NORMALIZE_BOOL(NGRAPH_ONNX_IMPORT_ENABLE) NORMALIZE_BOOL(NGRAPH_DEX_ONLY) @@ -205,8 +217,6 @@ message(STATUS "NGRAPH_INTERPRETER_ENABLE: ${NGRAPH_INTERPRETER_ENABLE}") message(STATUS "NGRAPH_NOP_ENABLE: ${NGRAPH_NOP_ENABLE}") message(STATUS "NGRAPH_GPUH_ENABLE: ${NGRAPH_GPUH_ENABLE}") message(STATUS "NGRAPH_GENERIC_CPU_ENABLE: ${NGRAPH_GENERIC_CPU_ENABLE}") -message(STATUS "NGRAPH_DISTRIBUTED_MLSL_ENABLE: ${NGRAPH_DISTRIBUTED_MLSL_ENABLE}") -message(STATUS "NGRAPH_DISTRIBUTED_OMPI_ENABLE: ${NGRAPH_DISTRIBUTED_OMPI_ENABLE}") message(STATUS "NGRAPH_DEBUG_ENABLE: ${NGRAPH_DEBUG_ENABLE}") message(STATUS "NGRAPH_ONNX_IMPORT_ENABLE: ${NGRAPH_ONNX_IMPORT_ENABLE}") message(STATUS "NGRAPH_DEX_ONLY: ${NGRAPH_DEX_ONLY}") @@ -215,6 +225,7 @@ message(STATUS "NGRAPH_LIB_VERSIONING_ENABLE: ${NGRAPH_LIB_VERSIONING_ENABLE}" message(STATUS "NGRAPH_PYTHON_BUILD_ENABLE: ${NGRAPH_PYTHON_BUILD_ENABLE}") message(STATUS "NGRAPH_USE_PREBUILT_LLVM: ${NGRAPH_USE_PREBUILT_LLVM}") message(STATUS "NGRAPH_PLAIDML_ENABLE: ${NGRAPH_PLAIDML_ENABLE}") +message(STATUS "NGRAPH_DISTRIBUTED_ENABLE: ${NGRAPH_DISTRIBUTED_ENABLE}") #----------------------------------------------------------------------------------------------- # Installation logic... diff --git a/doc/sphinx/source/buildlb.rst b/doc/sphinx/source/buildlb.rst index 8f71c3066..b750e301a 100644 --- a/doc/sphinx/source/buildlb.rst +++ b/doc/sphinx/source/buildlb.rst @@ -54,8 +54,6 @@ as needed: -- NGRAPH_NOP_ENABLE: ON -- NGRAPH_GPUH_ENABLE: OFF -- NGRAPH_GENERIC_CPU_ENABLE: OFF - -- NGRAPH_DISTRIBUTED_MLSL_ENABLE: OFF - -- NGRAPH_DISTRIBUTED_OMPI_ENABLE: OFF -- NGRAPH_DEBUG_ENABLE: OFF -- NGRAPH_ONNX_IMPORT_ENABLE: OFF -- NGRAPH_DEX_ONLY: OFF @@ -64,7 +62,7 @@ as needed: -- NGRAPH_PYTHON_BUILD_ENABLE: OFF -- NGRAPH_USE_PREBUILT_LLVM: OFF -- NGRAPH_PLAIDML_ENABLE: OFF - + -- NGRAPH_DISTRIBUTED_ENABLE: OFF .. important:: The default :program:`cmake` procedure (no build flags) will install ``ngraph_dist`` to an OS-level location like ``/usr/bin/ngraph_dist`` @@ -268,4 +266,4 @@ be updated frequently in the coming months. Stay tuned! .. _NervanaSystems: https://github.com/NervanaSystems/ngraph/blob/master/README.md .. _ONNX: http://onnx.ai .. _website docs: http://ngraph.nervanasys.com/docs/latest/ -.. _googletest framework: https://github.com/google/googletest.git \ No newline at end of file +.. _googletest framework: https://github.com/google/googletest.git diff --git a/doc/sphinx/source/core/constructing-graphs/distribute-train.rst b/doc/sphinx/source/core/constructing-graphs/distribute-train.rst index 6dc4f02fe..96d6dc0fd 100644 --- a/doc/sphinx/source/core/constructing-graphs/distribute-train.rst +++ b/doc/sphinx/source/core/constructing-graphs/distribute-train.rst @@ -16,12 +16,12 @@ more than one device. Frameworks can implement distributed training with nGraph versions prior to `0.13`: -* Use ``-DNGRAPH_DISTRIBUTED_OMPI_ENABLE=TRUE`` to enable distributed training +* Use ``-DNGRAPH_DISTRIBUTED_ENABLE=OMPI`` to enable distributed training with OpenMPI. Use of this flag requires that OpenMPI be a pre-existing library in the system. If it's not present on the system, install `OpenMPI`_ version ``2.1.1`` or later before running the compile. -* Use ``-DNGRAPH_DISTRIBUTED_MLSL_ENABLE=TRUE`` to enable the option for +* Use ``-DNGRAPH_DISTRIBUTED_ENABLE=MLSL`` to enable the option for :abbr:`Intel庐 Machine Learning Scaling Library (MLSL)` for Linux* OS: .. note:: The Intel庐 MLSL option applies to Intel庐 Architecture CPUs -- 2.18.0