Merge branch 'r0.25' into cyphers/25tomaster

f820195b · Scott Cyphers · 32b54cc4 · 9937f8b5 · f820195b · f820195b
Commit f820195b authored Aug 07, 2019 by Scott Cyphers
15 changed files
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -398,6 +398,10 @@ if (NGRAPH_MLIR_ENABLE)
    set(NGRAPH_MLIR_SOURCE_DIR ${CMAKE_SOURCE_DIR}/src/contrib/mlir)
 endif()

+if (NGRAPH_STATIC_LIB_ENABLE)
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNGRAPH_STATIC_LIB_ENABLE")
+endif()
+
 if (NGRAPH_PLAIDML_ENABLE)
    find_package(PlaidML CONFIG)
    if (NOT PLAIDML_FOUND)

--- a/CODEOWNERS
+++ b/CODEOWNERS
@@ -55,7 +55,7 @@ project/doc-contributor-README.rst  @indie
 /src/ngraph/runtime/hybrid/         @rkimballn1
 /src/ngraph/runtime/intelgpu/       @dmyershov
 /src/ngraph/runtime/interpreter/    @rkimballn1
-/src/ngraph/runtime/plaidml/        @earhart
+/src/ngraph/runtime/plaidml/        @earhart @dgkutnic
 /src/ngraph/runtime/reference/      @aprocter
 /src/ngraph/runtime/reference/allreduce.*pp      @wenzhe-nrv @aprocter
 /src/ngraph/type/                   @diyessi

--- a/src/ngraph/CMakeLists.txt
+++ b/src/ngraph/CMakeLists.txt
@@ -519,6 +519,14 @@ if(NOT NGRAPH_JSON_ENABLE)
    target_compile_definitions(ngraph PUBLIC NGRAPH_JSON_DISABLE)
 endif()

+if(NGRAPH_INTERPRETER_STATIC_LIB_ENABLE)
+    target_compile_definitions(ngraph PUBLIC NGRAPH_INTERPRETER_STATIC_LIB_ENABLE)
+endif()
+
+if(NGRAPH_CPU_STATIC_LIB_ENABLE)
+    target_compile_definitions(ngraph PUBLIC NGRAPH_CPU_STATIC_LIB_ENABLE)
+endif()
+
 if(NGRAPH_DISTRIBUTED_ENABLE)
    if(NGRAPH_DISTRIBUTED_MLSL_ENABLE)
        target_include_directories(ngraph SYSTEM PRIVATE libmlsl)

--- a/src/ngraph/runtime/backend.cpp
+++ b/src/ngraph/runtime/backend.cpp
@@ -37,6 +37,9 @@ std::string runtime::Backend::s_backend_shared_library_search_directory;
 // This finds the full path of the containing shared library
 static string find_my_pathname()
 {
+#ifdef NGRAPH_STATIC_LIB_ENABLE
+    return "";
+#else
 #ifdef _WIN32
    HMODULE hModule = GetModuleHandleW(L"ngraph.dll");
    WCHAR wpath[MAX_PATH];
@@ -52,6 +55,7 @@ static string find_my_pathname()
    dladdr(reinterpret_cast<void*>(find_my_pathname), &dl_info);
    return dl_info.dli_fname;
 #endif
+#endif
 }

 runtime::Backend::~Backend()

--- a/src/ngraph/runtime/backend_manager.cpp
+++ b/src/ngraph/runtime/backend_manager.cpp
@@ -32,12 +32,18 @@
 using namespace std;
 using namespace ngraph;

+#ifdef NGRAPH_STATIC_LIB_ENABLE
+#define DLERROR() ""
+#else
 #ifdef _WIN32
 #define CLOSE_LIBRARY(a) FreeLibrary(a)
 #define DLSYM(a, b) GetProcAddress(a, b)
+#define DLERROR() ""
 #else
 #define CLOSE_LIBRARY(a) dlclose(a)
 #define DLSYM(a, b) dlsym(a, b)
+#define DLERROR() dlerror()
+#endif
 #endif

 unordered_map<string, runtime::BackendConstructor*>& runtime::BackendManager::get_registry()
@@ -101,19 +107,19 @@ shared_ptr<runtime::Backend> runtime::BackendManager::create_backend(const std::
    }
    else
    {
+#ifndef NGRAPH_STATIC_LIB_ENABLE
        DL_HANDLE handle = open_shared_library(type);
        if (!handle)
        {
            stringstream ss;
            ss << "Backend '" << type << "' not registered. Error:";
 #ifndef _WIN32
-            ss << dlerror();
+            ss << DLERROR();
 #endif
            throw runtime_error(ss.str());
        }
-
 #ifndef _WIN32
-        dlerror(); // Clear any pending errors
+        DLERROR(); // Clear any pending errors
 #endif
        function<runtime::BackendConstructor*()> get_backend_constructor_pointer =
            reinterpret_cast<runtime::BackendConstructor* (*)()>(
@@ -127,7 +133,7 @@ shared_ptr<runtime::Backend> runtime::BackendManager::create_backend(const std::
        {
            string error;
 #ifndef _WIN32
-            const char* err = dlerror();
+            const char* err = DLERROR();
            error = (err ? err : "");
 #endif
            CLOSE_LIBRARY(handle);
@@ -136,6 +142,7 @@ shared_ptr<runtime::Backend> runtime::BackendManager::create_backend(const std::
                "library.\nError='" +
                error + "'");
        }
+#endif
    }
    return backend;
 }
@@ -146,6 +153,7 @@ DL_HANDLE runtime::BackendManager::open_shared_library(string type)
    string lib_suffix = SHARED_LIB_SUFFIX;

    DL_HANDLE handle = nullptr;
+#ifndef NGRAPH_STATIC_LIB_ENABLE

    // strip off attributes, IE:CPU becomes IE
    auto colon = type.find(":");
@@ -163,9 +171,9 @@ DL_HANDLE runtime::BackendManager::open_shared_library(string type)
    SetDllDirectory((LPCSTR)my_directory.c_str());
    handle = LoadLibrary(library_path.c_str());
 #else
-    dlerror(); // Clear any pending errors
+    DLERROR(); // Clear any pending errors
    handle = dlopen(library_path.c_str(), RTLD_NOW | RTLD_GLOBAL);
-    const char* err = dlerror();
+    const char* err = DLERROR();
    error = (err ? err : "");
 #endif
    if (!handle)
@@ -175,12 +183,14 @@ DL_HANDLE runtime::BackendManager::open_shared_library(string type)
        ss << "\nOpen error message '" << error << "'";
        throw runtime_error(ss.str());
    }
+#endif
    return handle;
 }

 map<string, string> runtime::BackendManager::get_registered_device_map()
 {
    map<string, string> rc;
+#ifndef NGRAPH_STATIC_LIB_ENABLE
    string my_directory =
        file_util::get_directory(Backend::get_backend_shared_library_search_directory());
    vector<string> backend_list;
@@ -197,6 +207,7 @@ map<string, string> runtime::BackendManager::get_registered_device_map()
        }
    };
    file_util::iterate_files(my_directory, f, false, true);
+#endif
    return rc;
 }


--- a/src/ngraph/runtime/cpu/builder/quantized_concat.cpp
+++ b/src/ngraph/runtime/cpu/builder/quantized_concat.cpp
@@ -95,6 +95,9 @@ namespace ngraph
                }
            }
            REGISTER_OP_BUILDER(QuantizedConcat);
+#ifdef NGRAPH_CPU_STATIC_LIB_ENABLE
+            void register_builders_quantized_concat_cpp() {}
+#endif
        }
    }
 }
--- a/src/ngraph/runtime/cpu/builder/quantized_max_pool.cpp
+++ b/src/ngraph/runtime/cpu/builder/quantized_max_pool.cpp
@@ -69,6 +69,9 @@ namespace ngraph
                }
            }
            REGISTER_OP_BUILDER(QuantizedMaxPool);
+#ifdef NGRAPH_CPU_STATIC_LIB_ENABLE
+            void register_builders_quantized_max_pool_cpp() {}
+#endif
        }
    }
 }
--- a/src/ngraph/runtime/cpu/cpu_builder_registry.cpp
+++ b/src/ngraph/runtime/cpu/cpu_builder_registry.cpp
@@ -77,6 +77,8 @@ namespace ngraph
                register_builders_tile_cpp();
                register_builders_topk_cpp();
                register_builders_update_slice_cpp();
+                register_builders_quantized_concat_cpp();
+                register_builders_quantized_max_pool_cpp();
            }
        }
    }

--- a/src/ngraph/runtime/cpu/cpu_builder_registry.hpp
+++ b/src/ngraph/runtime/cpu/cpu_builder_registry.hpp
@@ -76,6 +76,8 @@ namespace ngraph
            void register_builders_tile_cpp();
            void register_builders_topk_cpp();
            void register_builders_update_slice_cpp();
+            void register_builders_quantized_concat_cpp();
+            void register_builders_quantized_max_pool_cpp();
        }
    }
 }

--- a/src/ngraph/runtime/cpu/unit_test.manifest
+++ b/src/ngraph/runtime/cpu/unit_test.manifest
@@ -10,3 +10,6 @@ max_3d_to_scalar_int32
 # Not implemented
 send_recv
 send_recv_ring
+
+# param not supported in CPU backend
+group_conv_data_dilation
--- a/src/ngraph/runtime/plaidml/CMakeLists.txt
+++ b/src/ngraph/runtime/plaidml/CMakeLists.txt
@@ -33,6 +33,7 @@ set(SRC
    plaidml_ops_convolution.cpp
    plaidml_ops_dot.cpp
    plaidml_ops_general.cpp
+    plaidml_ops_group_convolution.cpp
    plaidml_ops_implicit_broadcast.cpp
    plaidml_ops_index_reduction.cpp
    plaidml_ops_io.cpp

--- a/src/ngraph/runtime/plaidml/plaidml_compiler.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_compiler.cpp
@@ -17,6 +17,7 @@
 #include "ngraph/runtime/plaidml/plaidml_compiler.hpp"
 #include "ngraph/graph_util.hpp"
 #include "ngraph/log.hpp"
+#include "ngraph/op/fused/group_conv.hpp"
 #include "ngraph/pass/algebraic_simplification.hpp"
 #include "ngraph/pass/core_fusion.hpp"
 #include "ngraph/pass/cse.hpp"
@@ -66,7 +67,7 @@ namespace
            PLAIDML_DEBUG << "Retire tensor: " << t;
        }
    }
-}
+} // namespace

 ngraph::runtime::plaidml::Compiler::Compiler(Config* config)
    : m_config{config}
@@ -87,7 +88,11 @@ std::shared_ptr<ngraph::runtime::plaidml::PlaidML_Executable>
    pass_manager.set_per_pass_validation(false);

    // We apply the same general-purposes passes as the CPU backend.
-    pass_manager.register_pass<ngraph::pass::FusedOpDecomposition>();
+    pass_manager.register_pass<ngraph::pass::FusedOpDecomposition>([](const Node& node) -> bool {
+        if (node.description() == ngraph::op::GroupConvolution().description())
+            return true;
+        return false;
+    });
    pass_manager.register_pass<ngraph::pass::LikeReplacement>();
    pass_manager.register_pass<ngraph::pass::NopElimination>();
    pass_manager.register_pass<ngraph::pass::ZeroDimTensorElimination>();

--- a/src/ngraph/runtime/plaidml/plaidml_ops_group_convolution.cpp
+++ b/src/ngraph/runtime/plaidml/plaidml_ops_group_convolution.cpp
+//*****************************************************************************
+// Copyright 2017-2019 Intel Corporation
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+//*****************************************************************************
+
+#include "ngraph/except.hpp"
+#include "ngraph/log.hpp"
+#include "ngraph/op/fused/group_conv.hpp"
+#include "ngraph/op/slice.hpp"
+#include "ngraph/runtime/plaidml/plaidml_impl.hpp"
+
+namespace ngraph
+{
+    namespace runtime
+    {
+        namespace plaidml
+        {
+            NGRAPH_PLAIDML_OP_CLASS(ImplGroupConvolution, OpImpl<::ngraph::op::GroupConvolution>);
+        }
+    } // namespace runtime
+} // namespace ngraph
+
+// GroupConvolution implements a grouped convolution, with optional striding, padding, and dilation.
+void ngraph::runtime::plaidml::ImplGroupConvolution::Apply()
+{
+    this->check_inputs(2);
+    this->check_outputs(1);
+
+    const auto& image = op_input(0);
+    const auto& filter = op_input(1);
+
+    auto rank = op().get_input_shape(0).size() - 2;
+    const auto& groups = op().get_groups();
+    const auto& padding_above = op().get_padding_above();
+    const auto& padding_below = op().get_padding_below();
+    const auto& strides = op().get_window_movement_strides();
+    const auto& filter_dilation = op().get_window_dilation_strides();
+    const auto& data_dilation = op().get_data_dilation_strides();
+
+    const auto& grps =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(groups));
+    const auto& dd0 =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(data_dilation[0]));
+    const auto& dd1 =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(data_dilation[1]));
+    const auto& fd0 =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(filter_dilation[0]));
+    const auto& fd1 =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(filter_dilation[1]));
+    const auto& pxb =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(padding_below[0]));
+    const auto& pyb =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(padding_below[1]));
+    const auto& pxa =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(padding_above[0]));
+    const auto& pya =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(padding_above[1]));
+    const auto& sx =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(strides[0]));
+    const auto& sy =
+        static_cast<::vertexai::plaidml::variable>(static_cast<const int64_t>(strides[1]));
+
+    this->set_output(::vertexai::plaidml::function{R"(
+            function (I[N, CI, XI0, XI1], F[CO, FCI, XF0, XF1], DD0, DD1, FD0, FD1, G, PXB, PYB, PXA, PYA, SX, SY) -> (O) {
+                O[n, (CO/G) * g + co, x, y: N, CO, ((DD0 * (XI0 - 1) + PXA + PXB) - (FD0 * (XF0 - 1)) + SX) / SX, ((DD1 * (XI1 - 1) + PYA + PYB) - (FD1 * (XF1 - 1)) + SY) / SY] = 
+                    +(I[n, (CI/G) * g + ci, (x + FD0 * xf0 - PXB)/DD0, (y + FD1 * xf1 - PYB)/DD1] * F[(CO/G) * g + co, ci, xf0, xf1]), co < CO/G;
+            })"}(image, filter, dd0, dd1, fd0, fd1, grps, pxb, pyb, pxa, pya, sx, sy));
+}
--- a/src/ngraph/runtime/plaidml/unit_test.manifest
+++ b/src/ngraph/runtime/plaidml/unit_test.manifest
@@ -178,7 +178,6 @@ conv_bias_2d
 conv_bias_3d
 conv_bias_bprop_2d
 conv_bias_add_2d
-group_conv
 space_to_depth
 depth_to_space
 normalize_across_chw_scalar_scale_4d
@@ -278,8 +277,6 @@ lstm_cell_no_bias_no_peepholes
 lstm_cell_bias_peepholes
 lstm_cell_bias_peepholes_clip_input_forget
 lstm_cell_activaction_functions
-group_conv_transpose
-group_conv_transpose_output_shape
 divide_python_rounding_int32
 backwards_batchmatmul_tensor2_tensor2


--- a/test/backend/fused_op.in.cpp
+++ b/test/backend/fused_op.in.cpp