Commit e26d602a authored by Amy Zhuang's avatar Amy Zhuang Committed by Scott Cyphers

Use mkl-dnn v1.0 or v0.x depending on compilation flag. (#3227)

* Use mkl-dnn v1.0 or v0.x depending on compilation flag.

* Change cpu builder files.

* Modify cmake files.

Use mkldnn-v1.0 for DEX if NGRAPH_USE_MKLDNN_V1 is set to true, otherwise use mkldnn-v0.x.

CODEGEN only builds with mkldnn-v1.0.

* Implement mkldnn utility functions for mkldnn-v1.0.

User mode scratchpad management for mkldnn-v1.0.

* Query scratchpad size and allocate a buffer of max scratchpad size.

* Do not create mkldnn::memory when query scratchpad size of Reorder.

Modify mkldnn utility functions.

Fix convolution_forward_init and inner_product_forward_init.

Modify CPURuntimeContextCG.

* Add user mode scratchpad to CODEGEN.

* mkldnn-v1.0 splits LSTM states. Update Rnn/Lstm Op accordingly.

* Address PR feedback: use MKLDNN_MAJOR_VERSION.

* Modify cpu rnn fusion pass and related unit tests.

* Change Rnn/Lstm arg types to Output.

* Fix Lstm for CODEGEN.

* Set native layout for Slice when input format is blocked.

* Do not print scratchpad size.

* Change external_mkldnn_v1.cmake.

Fix a typo.

* Add mkldnn_v1.patch for mkldnn-v1.0.

* Address PR feedback.

* Define MKLDNN_ERROR_MESSAGE.

* Address PR feedback: change to NGRAPH_USE_LEGACY_MKLDNN.

* Fix a bug.

* Remove unused variable.

* Fix compiler warnings.

* Fix a bug for CODEGEN.

* Move variable only needed for mkldnn-v0.20 inside #if.

* Remove unused variables.

* No in place Reshape rotation for blocked data layout with mkldnn-v1.0.

* Modify mkldnn_v1.patch to force mkldnn to link to libiomp.

* Fix style.

* Change path for find_library and find_file.

* Do not insert ConvertLayout before/after Quantize/DeQuantize for blocked data layout.

* Write strides information to visualized graph.

* Move variables only needed for mkldnn-v0 under #if.

* Move more variables in rnn fusion.

* Fix ConvertLayout constant folding for mkldnn-v1.0.
parent cc1daca8
......@@ -168,6 +168,7 @@ ngraph_var(NGRAPH_USE_PREBUILT_LLVM DEFAULT "FALSE")
option(NGRAPH_UNIT_TEST_ENABLE "Control the building of unit tests" TRUE)
option(NGRAPH_TOOLS_ENABLE "Control the building of tool" TRUE)
option(NGRAPH_CPU_ENABLE "Control the building of the CPU backend" TRUE)
option(NGRAPH_USE_LEGACY_MKLDNN "Use legacy MKLDNN" TRUE)
option(NGRAPH_MLIR_ENABLE "Control the building of MLIR backend" FALSE)
option(NGRAPH_INTELGPU_ENABLE "Control the building of the Intel GPU backend with clDNN" FALSE)
option(NGRAPH_INTERPRETER_ENABLE "Control the building of the INTERPRETER backend" TRUE)
......@@ -241,6 +242,7 @@ endmacro()
NORMALIZE_BOOL(NGRAPH_UNIT_TEST_ENABLE)
NORMALIZE_BOOL(NGRAPH_TOOLS_ENABLE)
NORMALIZE_BOOL(NGRAPH_CPU_ENABLE)
NORMALIZE_BOOL(NGRAPH_USE_LEGACY_MKLDNN)
NORMALIZE_BOOL(NGRAPH_MLIR_ENABLE)
NORMALIZE_BOOL(NGRAPH_INTELGPU_ENABLE)
NORMALIZE_BOOL(NGRAPH_INTERPRETER_ENABLE)
......@@ -257,6 +259,7 @@ NORMALIZE_BOOL(NGRAPH_PYTHON_BUILD_ENABLE)
NORMALIZE_BOOL(NGRAPH_USE_PREBUILT_LLVM)
NORMALIZE_BOOL(NGRAPH_PLAIDML_ENABLE)
NORMALIZE_BOOL(NGRAPH_JSON_ENABLE)
NORMALIZE_BOOL(NGRAPH_STATIC_LIB_ENABLE)
NORMALIZE_BOOL(NGRAPH_INTERPRETER_STATIC_LIB_ENABLE)
NORMALIZE_BOOL(NGRAPH_CPU_STATIC_LIB_ENABLE)
......@@ -266,6 +269,7 @@ message(STATUS "NGRAPH_CXX_STANDARD: ${NGRAPH_CXX_STANDARD}")
message(STATUS "NGRAPH_UNIT_TEST_ENABLE: ${NGRAPH_UNIT_TEST_ENABLE}")
message(STATUS "NGRAPH_TOOLS_ENABLE: ${NGRAPH_TOOLS_ENABLE}")
message(STATUS "NGRAPH_CPU_ENABLE: ${NGRAPH_CPU_ENABLE}")
message(STATUS "NGRAPH_USE_LEGACY_MKLDNN: ${NGRAPH_USE_LEGACY_MKLDNN}")
message(STATUS "NGRAPH_MLIR_ENABLE: ${NGRAPH_MLIR_ENABLE}")
message(STATUS "NGRAPH_INTELGPU_ENABLE: ${NGRAPH_INTELGPU_ENABLE}")
message(STATUS "NGRAPH_INTERPRETER_ENABLE: ${NGRAPH_INTERPRETER_ENABLE}")
......@@ -391,6 +395,10 @@ if (NGRAPH_CPU_ENABLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNGRAPH_CPU_ENABLE")
endif()
if (NGRAPH_USE_LEGACY_MKLDNN)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNGRAPH_USE_LEGACY_MKLDNN")
endif()
if (NGRAPH_MLIR_ENABLE)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -DNGRAPH_MLIR_ENABLE")
set(NGRAPH_MLIR_SOURCE_DIR ${CMAKE_SOURCE_DIR}/src/contrib/mlir)
......@@ -534,7 +542,12 @@ if(NGRAPH_CPU_ENABLE OR NGRAPH_GENERIC_CPU_ENABLE)
include(cmake/external_eigen.cmake)
endif()
if(NGRAPH_CPU_ENABLE)
include(cmake/external_mkldnn.cmake)
if(NGRAPH_USE_LEGACY_MKLDNN)
include(cmake/external_mkldnn.cmake)
set(NGRAPH_DEX_ONLY TRUE)
else()
include(cmake/external_mkldnn_v1.cmake)
endif()
endif()
if (NGRAPH_MLIR_ENABLE)
include(cmake/external_mlir.cmake)
......
This diff is collapsed.
diff --git a/cmake/OpenMP.cmake b/cmake/OpenMP.cmake
index 99970659..ef88a0a7 100644
--- a/cmake/OpenMP.cmake
+++ b/cmake/OpenMP.cmake
@@ -28,7 +28,7 @@ if (APPLE AND CMAKE_CXX_COMPILER_ID STREQUAL "Clang")
# But we still want to build the library.
set(_omp_severity "WARNING")
else()
- set(_omp_severity "FATAL_ERROR")
+ set(_omp_severity "WARNING")
endif()
macro(forbid_link_compiler_omp_rt)
@@ -45,6 +45,42 @@ macro(forbid_link_compiler_omp_rt)
endif()
endmacro()
+macro(use_intel_omp_rt)
+ # fast return
+ if (CMAKE_CXX_COMPILER_ID STREQUAL "Intel")
+ set(MKLDNN_USES_INTEL_OPENMP TRUE)
+ return()
+ endif()
+
+ # Do not link with compiler-native OpenMP library if Intel MKL is present.
+ # Rationale: Intel MKL comes with Intel OpenMP library which is compatible
+ # with all libraries shipped with compilers that Intel MKL-DNN supports.
+ find_library(IOMP5LIB
+ NAMES "iomp5" "iomp5md" "libiomp5" "libiomp5md"
+ PATHS
+ ${CMAKE_CURRENT_SOURCE_DIR}/external/mkl/lib
+ NO_DEFAULT_PATH)
+ if(IOMP5LIB)
+ forbid_link_compiler_omp_rt()
+ if (WIN32)
+ find_file(IOMP5DLL
+ NAMES "libiomp5.dll" "libiomp5md.dll"
+ PATHS
+ ${CMAKE_CURRENT_SOURCE_DIR}/external/mkl/lib
+ NO_DEFAULT_PATH)
+ endif()
+ list(APPEND EXTRA_SHARED_LIBS ${IOMP5LIB})
+ else()
+ if (MKLDNN_THREADING STREQUAL "OMP:INTEL")
+ message(${_omp_severity} "Intel OpenMP runtime could not be found. "
+ "Please either use OpenMP runtime that comes with the compiler "
+ "(via -DMKLDNN_THREADING={OMP,OMP:COMP}), or "
+ "explicitely provide the path to libiomp with the "
+ "-DCMAKE_LIBRARY_PATH option")
+ endif()
+ endif()
+endmacro()
+
if(WIN32 AND ${CMAKE_CXX_COMPILER_ID} STREQUAL MSVC)
add_definitions(/Qpar)
add_definitions(/openmp)
@@ -78,6 +114,7 @@ if (MKLDNN_CPU_RUNTIME MATCHES "OMP")
message(${_omp_severity} "OpenMP library could not be found. "
"Proceeding might lead to highly sub-optimal performance.")
endif()
+ use_intel_omp_rt()
else()
# Compilation happens with OpenMP to enable `#pragma omp simd`
# but during linkage OpenMP dependency should be avoided
diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt
index 60bb0c94..cc3fc9d6 100644
--- a/src/CMakeLists.txt
+++ b/src/CMakeLists.txt
@@ -73,8 +73,10 @@ endif()
add_library(${LIB_NAME}
${MKLDNN_LIBRARY_TYPE} ${HEADERS} ${${LIB_NAME}_SUB_OBJS})
-set_property(TARGET ${LIB_NAME} PROPERTY VERSION "${PROJECT_VERSION}.0")
-set_property(TARGET ${LIB_NAME} PROPERTY SOVERSION "0")
+if(MKLDNN_LIB_VERSIONING_ENABLE)
+ set_property(TARGET ${LIB_NAME} PROPERTY VERSION "${PROJECT_VERSION}.0")
+ set_property(TARGET ${LIB_NAME} PROPERTY SOVERSION "0")
+endif()
set_property(TARGET ${LIB_NAME} PROPERTY PUBLIC_HEADER ${HEADERS})
target_include_directories(${LIB_NAME} PUBLIC
......@@ -190,6 +190,10 @@ void codegen::CompilerCore::initialize()
// Prevent Eigen from using any LGPL3 code
args.push_back("-DEIGEN_MPL2_ONLY");
#if defined(NGRAPH_USE_LEGACY_MKLDNN)
args.push_back("-DNGRAPH_USE_LEGACY_MKLDNN");
#endif
// Prepare DiagnosticEngine
IntrusiveRefCntPtr<DiagnosticOptions> diag_options = new DiagnosticOptions();
diag_options->ErrorLimit = 20;
......
......@@ -124,7 +124,6 @@ set(SRC
pass/cpu_mat_fusion.cpp
pass/cpu_memory_assignment.cpp
pass/cpu_memory_optimization.cpp
pass/cpu_mkldnn_primitive_build.cpp
pass/cpu_post_layout_optimizations.cpp
pass/cpu_rnn_fusion.cpp
pass/cpu_workspace_insertion.cpp
......@@ -136,6 +135,7 @@ if (NOT NGRAPH_DEX_ONLY)
cpu_emitter.cpp
cpu_kernel_emitters.cpp
cpu_kernel_utils.cpp
pass/cpu_mkldnn_primitive_build.cpp
)
endif()
......@@ -168,6 +168,9 @@ if (NGRAPH_CPU_ENABLE)
VERSION ${NGRAPH_VERSION}
SOVERSION ${NGRAPH_API_VERSION})
endif()
if (NGRAPH_USE_LEGACY_MKLDNN)
target_compile_definitions(cpu_backend PRIVATE "NGRAPH_USE_LEGACY_MKLDNN")
endif()
if (NGRAPH_DEX_ONLY)
target_compile_definitions(cpu_backend PRIVATE "NGRAPH_DEX_ONLY")
endif()
......@@ -207,7 +210,8 @@ if (NGRAPH_CPU_ENABLE)
target_compile_definitions(cpu_backend PRIVATE CPU_BACKEND_DLL_EXPORTS)
add_dependencies(cpu_backend libmkldnn ext_eigen)
target_link_libraries(cpu_backend PUBLIC ngraph libmkldnn libmkl libeigen libtbb)
target_link_libraries(cpu_backend PUBLIC ngraph libmkldnn libmkl libeigen libtbb)
if (NGRAPH_JSON_ENABLE)
target_link_libraries(cpu_backend PUBLIC libjson)
endif()
......
......@@ -40,6 +40,8 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto sum_pd = mkldnn_emitter->get_elementwise_add_desc(node);
QUERY_SCRATCHPAD(sum, sum_pd);
// Add needs 4 primitives: input0, input1, result, and sum.
size_t add_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(add_index);
......@@ -59,8 +61,12 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_elementwise_add(
ctx->mkldnn_primitives, sum_pd, deps, add_index);
mkldnn_emitter->build_elementwise_add(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
sum_pd,
deps,
add_index);
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[0], ctx->buffer_data[arg0_buffer_index]);
......@@ -68,7 +74,9 @@ namespace ngraph
ctx, deps[1], ctx->buffer_data[arg1_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[2], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, add_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, add_index, deps, cpu::mkldnn_utils::OpType::ADD);
};
functors.emplace_back(functor);
}
......
......@@ -55,6 +55,8 @@ namespace ngraph
auto avg_pool_desc =
mkldnn_emitter->get_avg_pooling_forward_desc<ngraph::op::AvgPool>(node,
false);
QUERY_SCRATCHPAD(pooling_forward, avg_pool_desc);
// AvgPool needs 3 primitives: input, result, and pooling_forward.
size_t avg_pool_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index);
......@@ -64,14 +66,20 @@ namespace ngraph
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_forward(
ctx->mkldnn_primitives, avg_pool_desc, deps, avg_pool_index);
mkldnn_emitter->build_pooling_forward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
avg_pool_desc,
deps,
avg_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[0], ctx->buffer_data[arg0_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, avg_pool_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, avg_pool_index, deps, cpu::mkldnn_utils::OpType::AVGPOOL);
};
functors.emplace_back(functor);
}
......@@ -137,6 +145,8 @@ namespace ngraph
auto avg_pool_desc =
mkldnn_emitter->get_avg_pooling_backward_desc<ngraph::op::AvgPoolBackprop>(
node);
QUERY_SCRATCHPAD_2ARGS(avg_pooling_backward, avg_pool_fwd_desc, avg_pool_desc);
// AvgPoolBackprop needs 3 primitives: input, result, and pooling_backward.
size_t avg_pool_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(avg_pool_index);
......@@ -150,7 +160,9 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_backward(ctx->mkldnn_primitives,
mkldnn_emitter->build_pooling_backward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
avg_pool_desc,
avg_pool_fwd_desc,
deps,
......@@ -160,7 +172,9 @@ namespace ngraph
ctx, deps[0], ctx->buffer_data[delta_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, avg_pool_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, avg_pool_index, deps, cpu::mkldnn_utils::OpType::AVGPOOLBACKPROP);
};
functors.emplace_back(functor);
}
......
......@@ -84,10 +84,11 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto batchnorm_desc =
mkldnn_emitter->get_batchnorm_forward_desc<OP>(node, true);
QUERY_SCRATCHPAD_2ARGS(batchnorm_forward, batchnorm_desc, ops);
auto weights_shape = Shape{2, args[0].get_size()};
auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
weights_shape, args[0].get_element_type(), mkldnn::memory::FORMAT::nc);
// batchnorm forward needs 6 primitives: input, weights, result, mean,
// variance, and batch_normalization_forward.
......@@ -111,7 +112,9 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_batchnorm_forward(ctx->mkldnn_primitives,
mkldnn_emitter->build_batchnorm_forward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
batchnorm_desc,
weights_desc,
training,
......@@ -136,7 +139,8 @@ namespace ngraph
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[4], ctx->buffer_data[out2_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, batchnorm_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, batchnorm_index, deps, cpu::mkldnn_utils::OpType::BATCHNORM3ARGS);
};
functors.emplace_back(functor);
}
......@@ -151,9 +155,11 @@ namespace ngraph
auto batchnorm_desc =
mkldnn_emitter->get_batchnorm_forward_desc<OP>(node, false);
QUERY_SCRATCHPAD_2ARGS(batchnorm_forward, batchnorm_desc, ops);
auto weights_shape = Shape{2, args[0].get_size()};
auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
weights_shape, args[0].get_element_type(), mkldnn::memory::FORMAT::nc);
// batchnorm forward needs 6 primitives: input, weights, result, mean,
// variance, and batch_normalization_forward.
......@@ -177,7 +183,9 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_batchnorm_forward(ctx->mkldnn_primitives,
mkldnn_emitter->build_batchnorm_forward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
batchnorm_desc,
weights_desc,
training,
......@@ -202,7 +210,8 @@ namespace ngraph
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[4], ctx->buffer_data[out0_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, batchnorm_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, batchnorm_index, deps, cpu::mkldnn_utils::OpType::BATCHNORM5ARGS);
};
functors.emplace_back(functor);
}
......@@ -421,17 +430,24 @@ namespace ngraph
auto batchnorm_desc = mkldnn_emitter->get_batchnorm_backward_desc(node);
auto weights_shape = Shape{2, args[0].get_size()};
auto weights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
weights_shape, args[0].get_element_type(), mkldnn::memory::FORMAT::nc);
auto dweights_desc = mkldnn_emitter->build_memory_descriptor(
weights_shape, args[0].get_element_type(), mkldnn::memory::format::nc);
weights_shape, args[0].get_element_type(), mkldnn::memory::FORMAT::nc);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 2);
// batchnorm backward needs 8 primitives: weights, input, mean, variance,
// dinput, dweights, and batch_normalization_backward.
auto batchnorm_index = mkldnn_emitter->reserve_primitive_space(8);
auto& deps = mkldnn_emitter->get_primitive_deps(batchnorm_index);
const ngraph::op::BatchNormTrainingBackprop* batchnorm =
static_cast<const ngraph::op::BatchNormTrainingBackprop*>(node);
auto eps = batchnorm->get_eps_value();
QUERY_SCRATCHPAD_3ARGS(batchnorm_backward, batchnorm_desc, input_desc, eps);
auto functor = [&,
batchnorm_desc,
input_desc,
weights_desc,
dweights_desc,
batchnorm_index,
......@@ -450,10 +466,14 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_batchnorm_backward(ctx->mkldnn_primitives,
mkldnn_emitter->build_batchnorm_backward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
batchnorm_desc,
input_desc,
weights_desc,
dweights_desc,
eps,
deps,
batchnorm_index);
}
......@@ -477,7 +497,8 @@ namespace ngraph
ctx, deps[5], ctx->buffer_data[out0_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[6], stacked_dweights.get());
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, batchnorm_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, batchnorm_index, deps, cpu::mkldnn_utils::OpType::BATCHNORMBACKPROP);
memcpy(ctx->buffer_data[out1_buffer_index],
stacked_dweights.get(),
......
......@@ -44,6 +44,8 @@ namespace ngraph
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto bounded_relu_desc = mkldnn_emitter->get_bounded_relu_desc(node);
QUERY_SCRATCHPAD(eltwise_forward, bounded_relu_desc);
// BoundedRelu needs 3 primitives: input, result, and eltwise_forward.
auto bounded_relu_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(bounded_relu_index);
......@@ -56,7 +58,9 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_bounded_relu(ctx->mkldnn_primitives,
mkldnn_emitter->build_bounded_relu(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
bounded_relu_desc,
deps,
bounded_relu_index);
......@@ -65,7 +69,9 @@ namespace ngraph
ctx, deps[0], ctx->buffer_data[input_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, bounded_relu_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, bounded_relu_index, deps, cpu::mkldnn_utils::OpType::BOUNDEDRELU);
};
functors.emplace_back(functor);
}
......
......@@ -101,6 +101,8 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto concat_pd =
mkldnn_emitter->get_concat_desc<ngraph::op::Concat>(node, nargs);
QUERY_SCRATCHPAD(concat, concat_pd);
std::vector<mkldnn::memory::desc> inputs_data_desc;
for (size_t i = 0; i < nargs; i++)
{
......@@ -121,7 +123,9 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_concat(ctx->mkldnn_primitives,
mkldnn_emitter->build_concat(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
concat_pd,
inputs_data_desc,
deps,
......@@ -134,7 +138,9 @@ namespace ngraph
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[nargs], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, concat_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, concat_index, deps, cpu::mkldnn_utils::OpType::CONCAT);
};
functors.emplace_back(functor);
......
......@@ -43,6 +43,7 @@ namespace ngraph
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
#if MKLDNN_VERSION_MAJOR < 1
if (input_desc.data.format == mkldnn_nchw &&
result_desc.data.format == mkldnn_goihw)
{
......@@ -80,7 +81,58 @@ namespace ngraph
mkldnn_utils::get_mkldnn_data_type(args[0].get_element_type()),
mkldnn::memory::format::goihw);
}
#else
bool input_format_is_nchw = mkldnn_utils::mkldnn_md_matches_format_tag(
input_desc.data, mkldnn::memory::format_tag::nchw);
if (input_format_is_nchw &&
mkldnn_utils::mkldnn_md_matches_format_tag(result_desc.data,
mkldnn::memory::format_tag::goihw))
{
// becomes a copy
input_desc = result_desc;
}
else if ((input_format_is_nchw ||
mkldnn_utils::mkldnn_md_matches_format_tag(
input_desc.data, mkldnn::memory::format_tag::nhwc)) &&
(mkldnn_utils::mkldnn_md_matches_format_tag(
result_desc.data, mkldnn::memory::format_tag::OIhw4i16o4i) &&
// check if compensation is conv_s8s8(1U)
result_desc.data.extra.flags & 0x1U))
{
auto arg0_shape = args[0].get_shape();
input_desc = mkldnn::memory::desc(
mkldnn::memory::dims(arg0_shape.begin(), arg0_shape.end()),
mkldnn_utils::get_mkldnn_data_type(args[0].get_element_type()),
mkldnn::memory::format_tag::oihw);
}
else if (input_format_is_nchw && input_desc.data.ndims == 4 &&
result_desc.data.ndims == 5 && node->get_users().size() == 1)
{
Shape weights_shape_groups;
if (auto gconv = std::dynamic_pointer_cast<ngraph::op::GroupConvolution>(
node->get_users()[0]))
{
weights_shape_groups = gconv->get_weights_dimensions();
}
else if (auto gconvb =
std::dynamic_pointer_cast<ngraph::op::GroupConvolutionBias>(
node->get_users()[0]))
{
weights_shape_groups = gconvb->get_weights_dimensions();
}
else
{
throw ngraph_error("Incompatible input/output shape in ConvertLayout op");
}
input_desc = mkldnn::memory::desc(
mkldnn::memory::dims(weights_shape_groups.begin(),
weights_shape_groups.end()),
mkldnn_utils::get_mkldnn_data_type(args[0].get_element_type()),
mkldnn::memory::format_tag::goihw);
}
mkldnn_emitter->query_scratchpad_reorder(input_desc, result_desc);
#endif
// ConvertLayout needs 3 primitives: input, result, and reorder.
size_t reorder_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(reorder_index);
......@@ -89,7 +141,9 @@ namespace ngraph
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_reorder(ctx->mkldnn_primitives,
mkldnn_emitter->build_reorder(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
input_desc,
result_desc,
deps,
......@@ -99,7 +153,9 @@ namespace ngraph
ctx, deps[0], ctx->buffer_data[arg_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, reorder_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, reorder_index, deps, cpu::mkldnn_utils::OpType::CONVERTLAYOUT);
};
functors.emplace_back(functor);
}
......
......@@ -44,6 +44,8 @@ namespace ngraph
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto leaky_relu_desc = mkldnn_emitter->get_leaky_relu_desc(node);
QUERY_SCRATCHPAD(eltwise_forward, leaky_relu_desc);
// CPULeakyRelu needs 3 primitives: input, result, and eltwise_forward.
auto leaky_relu_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(leaky_relu_index);
......@@ -56,14 +58,20 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_leaky_relu(
ctx->mkldnn_primitives, leaky_relu_desc, deps, leaky_relu_index);
mkldnn_emitter->build_leaky_relu(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
leaky_relu_desc,
deps,
leaky_relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[0], ctx->buffer_data[input_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, leaky_relu_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, leaky_relu_index, deps, cpu::mkldnn_utils::OpType::LEAKYRELU);
};
functors.emplace_back(functor);
}
......
......@@ -44,6 +44,8 @@ namespace ngraph
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto lrn_desc = mkldnn_emitter->get_lrn_forward_desc(node);
QUERY_SCRATCHPAD(lrn_forward, lrn_desc);
// LRN needs 3 primitives: input, result, and lrn_forward.
auto lrn_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(lrn_index);
......@@ -52,14 +54,20 @@ namespace ngraph
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_lrn_forward(
ctx->mkldnn_primitives, lrn_desc, deps, lrn_index);
mkldnn_emitter->build_lrn_forward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
lrn_desc,
deps,
lrn_index);
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[0], ctx->buffer_data[arg_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, lrn_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, lrn_index, deps, cpu::mkldnn_utils::OpType::LRN);
};
}
else
......
......@@ -37,23 +37,27 @@ namespace ngraph
"Lstm is supported only through MKLDNN and doesnt have reference "
"INTERPRETER implementation");
}
#if MKLDNN_VERSION_MAJOR < 1
if (args.size() != 5)
{
throw ngraph_error(
"Lstm op doesnt have the required number of inputs to create MKLDNN "
"kernel");
}
#else
if (args.size() != 6)
{
throw ngraph_error(
"Lstm op doesnt have the required number of inputs to create MKLDNN "
"kernel");
}
#endif
auto& functors = external_function->get_functors();
auto src_layer_buffer_index =
external_function->get_buffer_index(args[0].get_name());
auto src_iter_buffer_index =
external_function->get_buffer_index(args[1].get_name());
auto weights_layer_buffer_index =
external_function->get_buffer_index(args[2].get_name());
auto weights_iter_buffer_index =
external_function->get_buffer_index(args[3].get_name());
auto bias_buffer_index = external_function->get_buffer_index(args[4].get_name());
auto dst_layer_buffer_index =
external_function->get_buffer_index(out[0].get_name());
auto dst_iter_buffer_index = external_function->get_buffer_index(out[1].get_name());
......@@ -61,6 +65,14 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto lstm_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Lstm>(node, args, out);
#if MKLDNN_VERSION_MAJOR < 1
auto weights_layer_buffer_index =
external_function->get_buffer_index(args[2].get_name());
auto weights_iter_buffer_index =
external_function->get_buffer_index(args[3].get_name());
auto bias_buffer_index = external_function->get_buffer_index(args[4].get_name());
// Lstm needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, workspace, and rnn_forward.
// It needs a new workspace.
......@@ -81,7 +93,9 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_rnn_forward(ctx->mkldnn_primitives,
mkldnn_emitter->build_rnn_forward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
ctx->mkldnn_workspaces,
lstm_desc,
deps,
......@@ -103,9 +117,81 @@ namespace ngraph
ctx, deps[6], ctx->buffer_data[dst_iter_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[7], ctx->mkldnn_workspaces[deps[8]]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, lstm_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, lstm_index, deps, cpu::mkldnn_utils::OpType::LSTM);
};
functors.emplace_back(functor);
#else
mkldnn_emitter->query_scratchpad_rnn_forward(lstm_desc);
auto src_iter_c_buffer_index =
external_function->get_buffer_index(args[2].get_name());
auto weights_layer_buffer_index =
external_function->get_buffer_index(args[3].get_name());
auto weights_iter_buffer_index =
external_function->get_buffer_index(args[4].get_name());
auto bias_buffer_index = external_function->get_buffer_index(args[5].get_name());
auto dst_iter_c_buffer_index =
external_function->get_buffer_index(out[2].get_name());
// Lstm needs 11 primitives: src_layer, src_iter, src_iter_c, weights_layer,
// weights_iter, bias,
// dst_layer, dst_iter, dst_iter_c, workspace, and lstm_forward.
// It needs a new workspace.
auto lstm_index =
mkldnn_emitter->reserve_primitive_space(11, true /* new workspace */);
auto& deps = mkldnn_emitter->get_primitive_deps(lstm_index);
auto functor = [&,
lstm_desc,
lstm_index,
src_layer_buffer_index,
src_iter_buffer_index,
src_iter_c_buffer_index,
weights_layer_buffer_index,
weights_iter_buffer_index,
bias_buffer_index,
dst_layer_buffer_index,
dst_iter_buffer_index,
dst_iter_c_buffer_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_rnn_forward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
ctx->mkldnn_workspaces,
lstm_desc,
deps,
lstm_index);
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[0], ctx->buffer_data[src_layer_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[src_iter_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[2], ctx->buffer_data[src_iter_c_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[3], ctx->buffer_data[weights_layer_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[4], ctx->buffer_data[weights_iter_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[5], ctx->buffer_data[bias_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[6], ctx->buffer_data[dst_layer_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[7], ctx->buffer_data[dst_iter_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[8], ctx->buffer_data[dst_iter_c_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[9], ctx->mkldnn_workspaces[deps[10]]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, lstm_index, deps, cpu::mkldnn_utils::OpType::LSTM);
};
functors.emplace_back(functor);
#endif
}
void register_builders_lstm_cpp() { REGISTER_OP_BUILDER(Lstm); }
......
......@@ -54,6 +54,8 @@ namespace ngraph
auto max_pool_desc =
mkldnn_emitter->get_max_pooling_forward_desc<ngraph::op::MaxPool>(node,
false);
QUERY_SCRATCHPAD(pooling_forward, max_pool_desc);
// MaxPool needs 3 primitives: input, result, and pooling_forward.
size_t max_pool_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(max_pool_index);
......@@ -63,14 +65,20 @@ namespace ngraph
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_pooling_forward(
ctx->mkldnn_primitives, max_pool_desc, deps, max_pool_index);
mkldnn_emitter->build_pooling_forward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
max_pool_desc,
deps,
max_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[0], ctx->buffer_data[arg0_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, max_pool_index, deps, cpu::mkldnn_utils::OpType::MAXPOOL);
};
functors.emplace_back(functor);
}
......@@ -134,6 +142,7 @@ namespace ngraph
mkldnn_emitter->get_max_pooling_backward_desc<ngraph::op::MaxPoolBackprop>(
node);
auto fprop_src_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
QUERY_SCRATCHPAD_2ARGS(max_pooling_backward, fwd_pool_desc, bwd_pool_desc);
// MaxPoolBackprop forward needs 4 primitives: fprop_src, diff_src, workspace,
// and pooling_forward.
......@@ -151,7 +160,11 @@ namespace ngraph
ctx, fdeps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, fdeps[2], ctx->mkldnn_workspaces[fdeps[3]]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, fwd_pool_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx,
fwd_pool_index,
fdeps,
cpu::mkldnn_utils::OpType::MAXPOOLBACKPROPFORWARD);
};
// MaxPoolBackprop backward needs 4 primitives: diff_dst, workspace, diff_src,
......@@ -168,7 +181,11 @@ namespace ngraph
ctx, bdeps[1], ctx->mkldnn_workspaces[bdeps[3]]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, bdeps[2], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, bwd_pool_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx,
bwd_pool_index,
bdeps,
cpu::mkldnn_utils::OpType::MAXPOOLBACKPROPBACKWARD);
};
auto functor = [&,
bwd_pool_desc,
......@@ -181,7 +198,9 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_max_pooling_backward(ctx->mkldnn_primitives,
mkldnn_emitter->build_max_pooling_backward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
ctx->mkldnn_workspaces,
bwd_pool_desc,
fwd_pool_desc,
......@@ -249,6 +268,7 @@ namespace ngraph
mkldnn_emitter
->get_max_pooling_with_indices_forward_desc<ngraph::op::MaxPoolWithIndices>(
node);
QUERY_SCRATCHPAD(pooling_forward, max_pool_desc);
// MaxPoolWithIndices needs 4 primitives: src, dst, workspace, and pooling_forward.
size_t max_pool_index = mkldnn_emitter->reserve_primitive_space(4);
......@@ -264,7 +284,12 @@ namespace ngraph
if (ctx->first_iteration)
{
mkldnn_emitter->build_max_pooling_with_indices_forward(
ctx->mkldnn_primitives, max_pool_desc, deps, max_pool_index);
ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
max_pool_desc,
deps,
max_pool_index);
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[0], ctx->buffer_data[arg0_buffer_index]);
......@@ -272,7 +297,9 @@ namespace ngraph
ctx, deps[1], ctx->buffer_data[out0_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[2], ctx->buffer_data[out1_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, max_pool_index, deps, cpu::mkldnn_utils::OpType::MAXPOOLWITHINDICES);
};
functors.emplace_back(functor);
}
......@@ -300,6 +327,9 @@ namespace ngraph
mkldnn_emitter
->get_max_pooling_backward_desc<ngraph::op::MaxPoolWithIndicesBackprop>(
node);
QUERY_SCRATCHPAD_2ARGS(
max_pooling_with_indices_backward, fwd_pool_desc, bwd_pool_desc);
// MaxPoolWithIndicesBackprop needs 4 primitives: diff_dst, fprop_workspace,
// diff_src, and pooling_backward.
size_t max_pool_index = mkldnn_emitter->reserve_primitive_space(4);
......@@ -316,7 +346,9 @@ namespace ngraph
if (ctx->first_iteration)
{
mkldnn_emitter->build_max_pooling_with_indices_backward(
ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
bwd_pool_desc,
fwd_pool_desc,
deps,
......@@ -328,7 +360,12 @@ namespace ngraph
ctx, deps[1], ctx->buffer_data[arg2_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[2], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, max_pool_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx,
max_pool_index,
deps,
cpu::mkldnn_utils::OpType::MAXPOOLWITHINDICESBACKPROP);
};
functors.emplace_back(functor);
}
......
......@@ -53,8 +53,11 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
QUERY_SCRATCHPAD_2ARGS(reorder, input_desc, result_desc);
auto scale_const_op = std::dynamic_pointer_cast<ngraph::op::Constant>(
dequantize->get_argument(1));
if (scale_const_op == nullptr)
{
auto arg1_buffer_index =
......@@ -83,7 +86,9 @@ namespace ngraph
static_cast<float*>(ctx->buffer_data[arg1_buffer_index]),
static_cast<float*>(ctx->buffer_data[arg1_buffer_index]) +
scales_size);
mkldnn_emitter->build_quantize_reorder(ctx->mkldnn_primitives,
mkldnn_emitter->build_quantize_reorder(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
input_desc,
result_desc,
dyn_scales,
......@@ -94,7 +99,9 @@ namespace ngraph
ctx, deps[0], ctx->buffer_data[arg0_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, dequantize_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, dequantize_index, deps, cpu::mkldnn_utils::OpType::DEQUANTIZE);
};
functors.emplace_back(functor);
}
......@@ -116,7 +123,9 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_quantize_reorder(ctx->mkldnn_primitives,
mkldnn_emitter->build_quantize_reorder(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
input_desc,
result_desc,
scales,
......@@ -127,7 +136,9 @@ namespace ngraph
ctx, deps[0], ctx->buffer_data[arg0_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, dequantize_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, dequantize_index, deps, cpu::mkldnn_utils::OpType::DEQUANTIZE);
};
functors.emplace_back(functor);
}
......@@ -314,6 +325,7 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
QUERY_SCRATCHPAD_2ARGS(reorder, input_desc, result_desc);
auto scale_const_op =
std::dynamic_pointer_cast<ngraph::op::Constant>(quantize->get_argument(1));
......@@ -351,7 +363,9 @@ namespace ngraph
}
// quantize across first dim (mask=2^0) if dyn_scales is a vector
const int mask = scales_size == 1 ? 0 : 1;
mkldnn_emitter->build_quantize_reorder(ctx->mkldnn_primitives,
mkldnn_emitter->build_quantize_reorder(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
input_desc,
result_desc,
dyn_scales,
......@@ -363,7 +377,9 @@ namespace ngraph
ctx, deps[0], ctx->buffer_data[arg0_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quantize_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, quantize_index, deps, cpu::mkldnn_utils::OpType::QUANTIZE);
};
functors.emplace_back(functor);
}
......@@ -385,7 +401,9 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_quantize_reorder(ctx->mkldnn_primitives,
mkldnn_emitter->build_quantize_reorder(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
input_desc,
result_desc,
scales,
......@@ -396,7 +414,9 @@ namespace ngraph
ctx, deps[0], ctx->buffer_data[arg0_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quantize_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, quantize_index, deps, cpu::mkldnn_utils::OpType::QUANTIZE);
};
functors.emplace_back(functor);
}
......
......@@ -66,6 +66,8 @@ namespace ngraph
auto conv_attr =
mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolution>(node);
QUERY_SCRATCHPAD_2ARGS(convolution_forward, conv_desc, conv_attr);
size_t conv_index = mkldnn_emitter->convolution_forward_init();
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
......@@ -94,7 +96,9 @@ namespace ngraph
// use conv channelwise (dim 1, mask=2^1) if dyn_scales is a vector
conv_attr.set_output_scales(0, dyn_scales);
mkldnn_emitter->build_convolution_forward<false>(
ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
conv_desc,
conv_attr,
executor::global_cpu_engine,
......@@ -107,7 +111,9 @@ namespace ngraph
ctx, deps[1], ctx->buffer_data[arg1_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[2], ctx->buffer_data[out0_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, conv_index, deps, cpu::mkldnn_utils::OpType::QUANTIZEDCONVOLUTION);
};
functors.emplace_back(functor);
}
......@@ -339,6 +345,8 @@ namespace ngraph
mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionRelu>(
node);
QUERY_SCRATCHPAD_2ARGS(convolution_forward, conv_desc, conv_attr);
size_t conv_index = mkldnn_emitter->convolution_forward_init();
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
......@@ -364,7 +372,9 @@ namespace ngraph
const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->build_convolution_forward<false>(
ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
conv_desc,
conv_attr,
executor::global_cpu_engine,
......@@ -377,7 +387,12 @@ namespace ngraph
ctx, deps[1], ctx->buffer_data[arg1_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[2], ctx->buffer_data[out0_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx,
conv_index,
deps,
cpu::mkldnn_utils::OpType::QUANTIZEDCONVOLUTIONRELU);
};
functors.emplace_back(functor);
}
......@@ -415,6 +430,8 @@ namespace ngraph
mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionBias>(
node);
QUERY_SCRATCHPAD_2ARGS(convolution_forward, conv_desc, conv_attr);
size_t conv_index = mkldnn_emitter->convolution_forward_init(true);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
......@@ -441,7 +458,9 @@ namespace ngraph
const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->build_convolution_forward<true>(
ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
conv_desc,
conv_attr,
executor::global_cpu_engine,
......@@ -456,7 +475,12 @@ namespace ngraph
ctx, deps[2], ctx->buffer_data[arg2_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[3], ctx->buffer_data[out0_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx,
conv_index,
deps,
cpu::mkldnn_utils::OpType::QUANTIZEDCONVOLUTIONBIAS);
};
functors.emplace_back(functor);
}
......@@ -501,6 +525,8 @@ namespace ngraph
mkldnn_emitter
->get_convolution_forward_attr<ngraph::op::QuantizedConvolutionBiasAdd>(
node);
QUERY_SCRATCHPAD_2ARGS(convolution_forward, conv_desc, conv_attr);
size_t conv_index = mkldnn_emitter->convolution_forward_init(true);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
......@@ -553,7 +579,9 @@ namespace ngraph
conv_attr.set_output_scales(mask, dyn_scales);
conv_attr.set_post_ops(new_pops);
mkldnn_emitter->build_convolution_forward<true>(
ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
conv_desc,
conv_attr,
executor::global_cpu_engine,
......@@ -576,7 +604,12 @@ namespace ngraph
ctx, deps[2], ctx->buffer_data[arg2_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[3], ctx->buffer_data[out0_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx,
conv_index,
deps,
cpu::mkldnn_utils::OpType::QUANTIZEDCONVOLUTIONBIASADD);
};
functors.emplace_back(functor);
}
......@@ -617,6 +650,8 @@ namespace ngraph
ngraph::op::QuantizedConvolutionBiasSignedAdd>(node);
auto conv_attr = mkldnn_emitter->get_convolution_forward_attr<
ngraph::op::QuantizedConvolutionBiasSignedAdd>(node);
QUERY_SCRATCHPAD_2ARGS(convolution_forward, conv_desc, conv_attr);
size_t conv_index = mkldnn_emitter->convolution_forward_init(true);
auto& deps = mkldnn_emitter->get_primitive_deps(conv_index);
......@@ -669,7 +704,9 @@ namespace ngraph
const int mask = scales_size == 1 ? 0 : 2;
conv_attr.set_output_scales(mask, dyn_scales);
mkldnn_emitter->build_convolution_forward<true>(
ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
conv_desc,
conv_attr,
executor::global_cpu_engine,
......@@ -692,7 +729,12 @@ namespace ngraph
ctx, deps[2], ctx->buffer_data[arg2_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[3], ctx->buffer_data[out0_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, conv_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx,
conv_index,
deps,
cpu::mkldnn_utils::OpType::QUANTIZEDCONVOLUTIONBIASSIGNEDADD);
};
functors.emplace_back(functor);
}
......
......@@ -63,6 +63,8 @@ namespace ngraph
auto ip_attr =
mkldnn_emitter
->get_inner_product_forward_attr<ngraph::op::QuantizedDotBias>(node);
QUERY_SCRATCHPAD_2ARGS(ip_forward, ip_desc, ip_attr);
size_t ip_index = mkldnn_emitter->inner_product_forward_init(true);
auto& deps = mkldnn_emitter->get_primitive_deps(ip_index);
......@@ -87,7 +89,9 @@ namespace ngraph
scales_size);
ip_attr.set_output_scales(0, dyn_scales);
mkldnn_emitter->build_inner_product_forward<true>(
ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
ip_desc,
ip_attr,
executor::global_cpu_engine,
......@@ -102,7 +106,9 @@ namespace ngraph
ctx, deps[2], ctx->buffer_data[arg2_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[3], ctx->buffer_data[out0_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, ip_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, ip_index, deps, cpu::mkldnn_utils::OpType::QUANTIZEDDOTBIAS);
};
functors.emplace_back(functor);
}
......
......@@ -56,6 +56,8 @@ namespace ngraph
auto ip_attr =
mkldnn_emitter->get_inner_product_forward_attr<ngraph::op::QuantizedMatmul>(
node);
QUERY_SCRATCHPAD_2ARGS(ip_forward, ip_desc, ip_attr);
size_t ip_index = mkldnn_emitter->inner_product_forward_init(false);
auto& deps = mkldnn_emitter->get_primitive_deps(ip_index);
......@@ -76,7 +78,9 @@ namespace ngraph
*(static_cast<float*>(ctx->buffer_data[arg2_buffer_index])));
ip_attr.set_output_scales(0, dyn_scales);
mkldnn_emitter->build_inner_product_forward<false>(
ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
ip_desc,
ip_attr,
executor::global_cpu_engine,
......@@ -89,7 +93,9 @@ namespace ngraph
ctx, deps[1], ctx->buffer_data[arg1_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[2], ctx->buffer_data[out0_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, ip_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, ip_index, deps, cpu::mkldnn_utils::OpType::QUANTIZEDMATMUL);
};
functors.emplace_back(functor);
}
......
......@@ -41,6 +41,8 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto relu_desc = mkldnn_emitter->get_relu_forward_desc(node);
QUERY_SCRATCHPAD(eltwise_forward, relu_desc);
// Relu needs 3 primitives: input, result, and eltwise_forward.
size_t relu_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(relu_index);
......@@ -49,14 +51,20 @@ namespace ngraph
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_relu_forward(
ctx->mkldnn_primitives, relu_desc, deps, relu_index);
mkldnn_emitter->build_relu_forward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
relu_desc,
deps,
relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[0], ctx->buffer_data[arg_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, relu_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, relu_index, deps, cpu::mkldnn_utils::OpType::RELU);
};
functors.emplace_back(functor);
}
......@@ -81,6 +89,8 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto bwd_desc = mkldnn_emitter->get_relu_backward_desc(node);
auto fwd_desc = mkldnn_emitter->get_relu_forward_desc(node);
QUERY_SCRATCHPAD_2ARGS(eltwise_backward, fwd_desc, bwd_desc);
// ReluBackprop needs 4 primitives: input, delta, result, and eltwise_backward.
size_t relu_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(relu_index);
......@@ -95,8 +105,13 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_relu_backward(
ctx->mkldnn_primitives, bwd_desc, fwd_desc, deps, relu_index);
mkldnn_emitter->build_relu_backward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
bwd_desc,
fwd_desc,
deps,
relu_index);
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[0], ctx->buffer_data[arg_fwd_buffer_index]);
......@@ -104,7 +119,9 @@ namespace ngraph
ctx, deps[1], ctx->buffer_data[delta_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[2], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, relu_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, relu_index, deps, cpu::mkldnn_utils::OpType::RELUBACKPROP);
};
functors.emplace_back(functor);
}
......
......@@ -44,11 +44,6 @@ namespace ngraph
external_function->get_buffer_index(args[0].get_name());
auto src_iter_buffer_index =
external_function->get_buffer_index(args[1].get_name());
auto weights_layer_buffer_index =
external_function->get_buffer_index(args[2].get_name());
auto weights_iter_buffer_index =
external_function->get_buffer_index(args[3].get_name());
auto bias_buffer_index = external_function->get_buffer_index(args[4].get_name());
auto dst_layer_buffer_index =
external_function->get_buffer_index(out[0].get_name());
auto dst_iter_buffer_index = external_function->get_buffer_index(out[1].get_name());
......@@ -56,6 +51,14 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto rnn_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Rnn>(node, args, out);
#if MKLDNN_VERSION_MAJOR < 1
auto weights_layer_buffer_index =
external_function->get_buffer_index(args[2].get_name());
auto weights_iter_buffer_index =
external_function->get_buffer_index(args[3].get_name());
auto bias_buffer_index = external_function->get_buffer_index(args[4].get_name());
// Rnn needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, workspace, and rnn_forward.
// It needs a new workspace.
......@@ -76,7 +79,9 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_rnn_forward(ctx->mkldnn_primitives,
mkldnn_emitter->build_rnn_forward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
ctx->mkldnn_workspaces,
rnn_desc,
deps,
......@@ -98,9 +103,81 @@ namespace ngraph
ctx, deps[6], ctx->buffer_data[dst_iter_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[7], ctx->mkldnn_workspaces[deps[8]]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, rnn_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, rnn_index, deps, cpu::mkldnn_utils::OpType::RNN);
};
functors.emplace_back(functor);
#else
mkldnn_emitter->query_scratchpad_rnn_forward(rnn_desc);
auto src_iter_c_buffer_index =
external_function->get_buffer_index(args[2].get_name());
auto weights_layer_buffer_index =
external_function->get_buffer_index(args[3].get_name());
auto weights_iter_buffer_index =
external_function->get_buffer_index(args[4].get_name());
auto bias_buffer_index = external_function->get_buffer_index(args[5].get_name());
auto dst_iter_c_buffer_index =
external_function->get_buffer_index(out[2].get_name());
// Rnn needs 11 primitives: src_layer, src_iter, src_iter_c, weights_layer,
// weights_iter, bias,
// dst_layer, dst_iter, dst_iter_c, workspace, and lstm_forward.
// It needs a new workspace.
auto rnn_index =
mkldnn_emitter->reserve_primitive_space(11, true /* new workspace */);
auto& deps = mkldnn_emitter->get_primitive_deps(rnn_index);
auto functor = [&,
rnn_desc,
rnn_index,
src_layer_buffer_index,
src_iter_buffer_index,
src_iter_c_buffer_index,
weights_layer_buffer_index,
weights_iter_buffer_index,
bias_buffer_index,
dst_layer_buffer_index,
dst_iter_buffer_index,
dst_iter_c_buffer_index](CPURuntimeContext* ctx,
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_rnn_forward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
ctx->mkldnn_workspaces,
rnn_desc,
deps,
rnn_index);
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[0], ctx->buffer_data[src_layer_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[src_iter_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[2], ctx->buffer_data[src_iter_c_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[3], ctx->buffer_data[weights_layer_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[4], ctx->buffer_data[weights_iter_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[5], ctx->buffer_data[bias_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[6], ctx->buffer_data[dst_layer_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[7], ctx->buffer_data[dst_iter_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[8], ctx->buffer_data[dst_iter_c_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[9], ctx->mkldnn_workspaces[deps[10]]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, rnn_index, deps, cpu::mkldnn_utils::OpType::RNN);
};
functors.emplace_back(functor);
#endif
}
void register_builders_rnn_cpp() { REGISTER_OP_BUILDER(Rnn); }
......
......@@ -43,6 +43,8 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto sigmoid_desc = mkldnn_emitter->get_sigmoid_forward_desc(node, false);
QUERY_SCRATCHPAD(eltwise_forward, sigmoid_desc);
// Sigmoid needs 3 primitives: input, result, and eltwise_forward.
auto sigmoid_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index);
......@@ -52,14 +54,20 @@ namespace ngraph
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_sigmoid_forward(
ctx->mkldnn_primitives, sigmoid_desc, deps, sigmoid_index);
mkldnn_emitter->build_sigmoid_forward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
sigmoid_desc,
deps,
sigmoid_index);
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[0], ctx->buffer_data[arg0_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, sigmoid_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, sigmoid_index, deps, cpu::mkldnn_utils::OpType::SIGMOID);
};
functors.emplace_back(functor);
}
......@@ -80,6 +88,8 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto fwd_desc = mkldnn_emitter->get_sigmoid_forward_desc(node, true);
auto bwd_desc = mkldnn_emitter->get_sigmoid_backward_desc(node);
QUERY_SCRATCHPAD_2ARGS(eltwise_backward, fwd_desc, bwd_desc);
// SigmoidBackprop needs 4 primitives: input, delta, result, and eltwise_backward.
size_t sigmoid_index = mkldnn_emitter->reserve_primitive_space(4);
auto& deps = mkldnn_emitter->get_primitive_deps(sigmoid_index);
......@@ -94,8 +104,13 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_sigmoid_backward(
ctx->mkldnn_primitives, bwd_desc, fwd_desc, deps, sigmoid_index);
mkldnn_emitter->build_sigmoid_backward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
bwd_desc,
fwd_desc,
deps,
sigmoid_index);
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[0], ctx->buffer_data[arg0_buffer_index]);
......@@ -103,7 +118,9 @@ namespace ngraph
ctx, deps[1], ctx->buffer_data[arg1_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[2], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, sigmoid_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, sigmoid_index, deps, cpu::mkldnn_utils::OpType::SIGMOIDBACKPROP);
};
functors.emplace_back(functor);
}
......
......@@ -93,6 +93,8 @@ namespace ngraph
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
QUERY_SCRATCHPAD_4ARGS(slice, input_desc, result_desc, lower_bounds, out_shape);
// Slice needs 3 primitives: input, result, and reorder.
auto slice_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(slice_index);
......@@ -108,7 +110,9 @@ namespace ngraph
CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_slice(ctx->mkldnn_primitives,
mkldnn_emitter->build_slice(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
input_desc,
result_desc,
lower_bounds,
......@@ -120,7 +124,9 @@ namespace ngraph
ctx, deps[0], ctx->buffer_data[arg_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, slice_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, slice_index, deps, cpu::mkldnn_utils::OpType::SLICE);
};
functors.emplace_back(functor);
......
......@@ -48,6 +48,8 @@ namespace ngraph
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
auto softmax_desc = mkldnn_emitter->get_softmax_forward_desc(node);
QUERY_SCRATCHPAD(softmax_forward, softmax_desc);
// Softmax needs 3 primitives: input, result, and softmax_forward.
size_t softmax_index = mkldnn_emitter->reserve_primitive_space(3);
auto& deps = mkldnn_emitter->get_primitive_deps(softmax_index);
......@@ -57,14 +59,20 @@ namespace ngraph
CPURuntimeContext* ctx, CPUExecutionContext* ectx) {
if (ctx->first_iteration)
{
mkldnn_emitter->build_softmax_forward(
ctx->mkldnn_primitives, softmax_desc, deps, softmax_index);
mkldnn_emitter->build_softmax_forward(ctx->mkldnn_memories,
ctx->mkldnn_primitives,
ctx->mkldnn_scratchpad_mds,
softmax_desc,
deps,
softmax_index);
}
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[0], ctx->buffer_data[arg_buffer_index]);
cpu::mkldnn_utils::set_memory_ptr(
ctx, deps[1], ctx->buffer_data[out_buffer_index]);
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, softmax_index);
cpu::mkldnn_utils::mkldnn_invoke_primitive(
ctx, softmax_index, deps, cpu::mkldnn_utils::OpType::SOFTMAX);
};
functors.emplace_back(functor);
}
......
......@@ -207,11 +207,17 @@ void runtime::cpu::CPU_CallFrame::setup_runtime_context(Allocator* allocator)
ctx->memory_buffers.push_back(buffer);
}
const auto& mkldnn_emitter = m_external_function->get_mkldnn_emitter();
// Create scratchpad
auto scratchpad_size = mkldnn_emitter->get_max_scratchpad_size();
if (m_external_function->is_direct_execution())
{
ctx->mkldnn_primitives =
std::vector<mkldnn::primitive*>(mkldnn_emitter->get_mkldnn_primitives().size());
ctx->mkldnn_memories =
std::vector<mkldnn::memory*>(mkldnn_emitter->get_mkldnn_memories().size());
ctx->mkldnn_scratchpad_mds = std::vector<mkldnn::memory::desc*>(
mkldnn_emitter->get_mkldnn_scratchpad_mds().size());
ctx->scratchpad_buffer = new AlignedBuffer(scratchpad_size, alignment);
}
else
{
......@@ -249,10 +255,23 @@ void runtime::cpu::CPU_CallFrame::cleanup_runtime_context()
{
delete p;
}
for (auto m : ctx->mkldnn_memories)
{
delete m;
}
for (auto buffer : ctx->memory_buffers)
{
delete buffer;
}
for (auto s : ctx->mkldnn_scratchpad_mds)
{
delete s;
}
if (m_external_function->is_direct_execution())
{
delete ctx->scratchpad_buffer;
}
if (m_external_function->is_direct_execution() &&
std::getenv("NGRAPH_CPU_USE_TBB") != nullptr)
{
......
This diff is collapsed.
......@@ -135,8 +135,11 @@ namespace ngraph
static CPUExecutor cpu_executor(num_thread_pools < 1 ? 1 : num_thread_pools);
return cpu_executor;
}
#if MKLDNN_VERSION_MAJOR < 1
mkldnn::engine global_cpu_engine(mkldnn::engine::cpu, 0);
#else
mkldnn::engine global_cpu_engine(mkldnn::engine::kind::cpu, 0);
#endif
}
}
}
......
......@@ -679,7 +679,14 @@ using namespace ngraph::runtime;
writer << "void inline CPURuntimeContextCG::init_mkldnn_primitives()\n";
writer.block_begin();
writer << "mkldnn_primitives = std::vector<mkldnn::primitive*>("
<< to_string(m_mkldnn_emitter->get_mkldnn_primitives_cg().size()) << ");\n";
<< to_string(m_mkldnn_emitter->get_mkldnn_primitives().size()) << ");\n";
writer << "mkldnn_memories = std::vector<mkldnn::memory*>("
<< to_string(m_mkldnn_emitter->get_mkldnn_memories().size()) << ");\n";
writer << "mkldnn_scratchpad_mds = std::vector<mkldnn::memory::desc*>("
<< to_string(m_mkldnn_emitter->get_mkldnn_scratchpad_mds().size()) << ");\n";
writer << "size_t scratchpad_size = " << m_mkldnn_emitter->get_max_scratchpad_size() << ";\n";
writer << "size_t alignment = 4096;\n";
writer << "scratchpad_buffer = new AlignedBuffer(scratchpad_size, alignment);\n";
writer.block_end();
writer << "\n";
......@@ -742,9 +749,8 @@ using namespace ngraph::runtime;
writer.block_begin();
writer << "// read in memory descriptors and build mkldnn primitives\n";
writer << "std::ifstream desc_file (\"" << m_desc_filename << "\", std::ios::binary);\n";
writer << "deserialize_memory_descs_and_build_memory_primitives(" << m_desc_filename
<< ", cg_ctx, " << to_string(m_mkldnn_emitter->get_mkldnn_descriptors_size())
<< ");\n";
writer << "deserialize_memory_descs_and_build_memory(" << m_desc_filename << ", cg_ctx, "
<< to_string(m_mkldnn_emitter->get_mkldnn_descriptors_size()) << ");\n";
writer.block_end();
}
......
......@@ -21,6 +21,14 @@
#include "ngraph/runtime/cpu/cpu_executor.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#if MKLDNN_VERSION_MAJOR < 1
#define UNDEF format_undef
#define F32 f32
#else
#define UNDEF undef
#define F32 data_type::f32
#endif
namespace ngraph
{
namespace runtime
......@@ -29,8 +37,8 @@ namespace ngraph
{
const mkldnn::memory::desc
LayoutDescriptor::DummyDesc(mkldnn::memory::dims(TENSOR_MAX_DIMS),
mkldnn::memory::f32,
mkldnn::memory::format::format_undef);
mkldnn::memory::F32,
mkldnn::memory::FORMAT::UNDEF);
LayoutDescriptor::LayoutDescriptor(const ngraph::descriptor::Tensor& tv)
: TensorLayout(tv)
......@@ -109,15 +117,18 @@ namespace ngraph
// http://intel.github.io/mkl-dnn/understanding_memory_formats.html
try
{
#if MKLDNN_VERSION_MAJOR < 1
auto mem_prim_desc =
mkldnn::memory::primitive_desc(md, executor::global_cpu_engine);
m_buffer_size = mem_prim_desc.get_size();
#else
m_buffer_size = md.get_size();
#endif
}
catch (const mkldnn::error& e)
{
throw ngraph_error(
"error in computing mkldnn memory size from memory primitive desc: " +
e.message);
throw ngraph_error("error in computing mkldnn memory size from memory desc: " +
MKLDNN_ERROR_MESSAGE);
}
}
......
......@@ -50,7 +50,12 @@ namespace ngraph
void set_mkldnn_md(const mkldnn::memory::desc& md);
bool is_mkldnn_layout() const
{
#if MKLDNN_VERSION_MAJOR < 1
return m_mkldnn_md.data.format != mkldnn::memory::format::format_undef;
#else
return static_cast<mkldnn::memory::format_kind>(m_mkldnn_md.data.format_kind) !=
mkldnn::memory::format_kind::undef;
#endif
}
bool is_row_major_layout();
......
......@@ -63,8 +63,11 @@ namespace ngraph
bool first_iteration;
// stores tensor pointers
std::vector<void*> buffer_data;
std::vector<mkldnn::memory*> mkldnn_memories;
std::vector<mkldnn::primitive*> mkldnn_primitives;
std::vector<AlignedBuffer*> memory_buffers;
std::vector<mkldnn::memory::desc*> mkldnn_scratchpad_mds;
AlignedBuffer* scratchpad_buffer;
std::vector<char*> mkldnn_workspaces;
tbb::flow::graph* G;
tbb::global_control* c;
......
......@@ -142,11 +142,20 @@ void runtime::cpu::CPUTensorView::read(void* target, size_t n) const
auto output_desc = mkldnn_utils::create_blocked_mkldnn_md(
this->get_shape(), cpu_tvl->get_strides(), this->get_element_type());
#if MKLDNN_VERSION_MAJOR < 1
memory input{{input_desc, executor::global_cpu_engine}, aligned_buffer};
memory output{{output_desc, executor::global_cpu_engine}, target};
reorder prim{input, output};
mkldnn::stream s(mkldnn::stream::kind::eager);
s.submit({prim}).wait();
#else
memory input{input_desc, executor::global_cpu_engine, aligned_buffer};
memory output{output_desc, executor::global_cpu_engine, target};
reorder prim{input, output};
mkldnn::stream s(executor::global_cpu_engine);
prim.execute(s, {{MKLDNN_ARG_SRC, input}, {MKLDNN_ARG_DST, output}});
s.wait();
#endif
}
else
{
......
......@@ -15,6 +15,7 @@
//*****************************************************************************
#include "cpu_visualize_tree.hpp"
#include <string>
#include "ngraph/op/reshape.hpp"
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
......@@ -49,12 +50,29 @@ static void visualize_layout_format(const Node& node, ostream& ss)
{
ss << "\ninput_order=" << reshape->get_input_order();
}
ss << "\nin="
<< runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
static_cast<mkldnn::memory::format>(in_tvl->get_mkldnn_md().data.format));
ss << " out="
<< runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
static_cast<mkldnn::memory::format>(out_tvl->get_mkldnn_md().data.format));
#if MKLDNN_VERSION_MAJOR >= 1
auto in_md = in_tvl->get_mkldnn_md();
auto out_md = out_tvl->get_mkldnn_md();
ss << "\nin strides={";
for (auto i = 0; i < in_md.data.ndims - 1; i++)
{
ss << in_md.data.format_desc.blocking.strides[i] << ",";
}
ss << in_md.data.format_desc.blocking.strides[in_md.data.ndims - 1] << "}";
ss << "\nout strides={";
for (auto i = 0; i < out_md.data.ndims - 1; i++)
{
ss << out_md.data.format_desc.blocking.strides[i] << ",";
}
ss << out_md.data.format_desc.blocking.strides[out_md.data.ndims - 1] << "}";
#else
ss << "\nin=" << runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
static_cast<mkldnn::memory::FORMAT_KIND>(
in_tvl->get_mkldnn_md().data.FORMAT_KIND));
ss << " out=" << runtime::cpu::mkldnn_utils::get_mkldnn_format_string(
static_cast<mkldnn::memory::FORMAT_KIND>(
out_tvl->get_mkldnn_md().data.FORMAT_KIND));
#endif
ss << " ";
}
catch (...)
......
This diff is collapsed.
This diff is collapsed.
......@@ -19,20 +19,22 @@
#include <mkldnn.hpp>
#include "mkldnn_invoke.hpp"
#include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_executor.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#if MKLDNN_VERSION_MAJOR < 1
extern "C" void ngraph::runtime::cpu::mkldnn_utils::set_memory_ptr(CPURuntimeContext* ctx,
size_t primitive_index,
size_t index,
void* ptr)
{
auto primitive = static_cast<mkldnn::memory*>(ctx->mkldnn_primitives[primitive_index]);
auto primitive = static_cast<mkldnn::memory*>(ctx->mkldnn_primitives[index]);
primitive->set_data_handle(ptr);
}
extern "C" void ngraph::runtime::cpu::mkldnn_utils::mkldnn_invoke_primitive(CPURuntimeContext* ctx,
size_t primitive_index)
extern "C" void ngraph::runtime::cpu::mkldnn_utils::mkldnn_invoke_primitive(
CPURuntimeContext* ctx, size_t primitive_index, std::vector<size_t>& deps, OpType type)
{
mkldnn::stream s(mkldnn::stream::kind::eager);
try
......@@ -41,6 +43,175 @@ extern "C" void ngraph::runtime::cpu::mkldnn_utils::mkldnn_invoke_primitive(CPUR
}
catch (const mkldnn::error& e)
{
throw ngraph_error("Could not run mkdnn primitive " + e.message);
throw ngraph_error("Could not run mkdnn primitive " + MKLDNN_ERROR_MESSAGE);
}
}
#else
extern "C" void ngraph::runtime::cpu::mkldnn_utils::set_memory_ptr(CPURuntimeContext* ctx,
size_t index,
void* ptr)
{
auto memory = ctx->mkldnn_memories[index];
memory->set_data_handle(ptr);
}
extern "C" void ngraph::runtime::cpu::mkldnn_utils::mkldnn_invoke_primitive(
CPURuntimeContext* ctx, size_t primitive_index, std::vector<size_t>& deps, OpType type)
{
std::unordered_map<int, mkldnn::memory> exec_args;
size_t nargs;
switch (type)
{
case OpType::ADD:
exec_args = {{MKLDNN_ARG_MULTIPLE_SRC, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_MULTIPLE_SRC + 1, *ctx->mkldnn_memories[deps[1]]},
{MKLDNN_ARG_DST, *ctx->mkldnn_memories[deps[2]]}};
break;
case OpType::AVGPOOL:
case OpType::BOUNDEDRELU:
case OpType::CONVERTLAYOUT:
case OpType::LEAKYRELU:
case OpType::LRN:
case OpType::MAXPOOL:
case OpType::QUANTIZE:
case OpType::DEQUANTIZE:
case OpType::QUANTIZEDAVGPOOL:
case OpType::QUANTIZEDMAXPOOL:
case OpType::RELU:
case OpType::SIGMOID:
case OpType::SLICE:
case OpType::SOFTMAX:
exec_args = {{MKLDNN_ARG_SRC, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_DST, *ctx->mkldnn_memories[deps[1]]}};
break;
case OpType::AVGPOOLBACKPROP:
exec_args = {{MKLDNN_ARG_DIFF_DST, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_DIFF_SRC, *ctx->mkldnn_memories[deps[1]]}};
break;
case OpType::BATCHNORM3ARGS:
exec_args = {{MKLDNN_ARG_SRC, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_WEIGHTS, *ctx->mkldnn_memories[deps[1]]},
{MKLDNN_ARG_DST, *ctx->mkldnn_memories[deps[2]]},
{MKLDNN_ARG_MEAN, *ctx->mkldnn_memories[deps[3]]},
{MKLDNN_ARG_VARIANCE, *ctx->mkldnn_memories[deps[4]]}};
break;
case OpType::BATCHNORM5ARGS:
exec_args = {{MKLDNN_ARG_SRC, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_MEAN, *ctx->mkldnn_memories[deps[1]]},
{MKLDNN_ARG_VARIANCE, *ctx->mkldnn_memories[deps[2]]},
{MKLDNN_ARG_WEIGHTS, *ctx->mkldnn_memories[deps[3]]},
{MKLDNN_ARG_DST, *ctx->mkldnn_memories[deps[4]]}};
break;
case OpType::BATCHNORMBACKPROP:
exec_args = {{MKLDNN_ARG_WEIGHTS, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_SRC, *ctx->mkldnn_memories[deps[1]]},
{MKLDNN_ARG_MEAN, *ctx->mkldnn_memories[deps[2]]},
{MKLDNN_ARG_VARIANCE, *ctx->mkldnn_memories[deps[3]]},
{MKLDNN_ARG_DIFF_DST, *ctx->mkldnn_memories[deps[4]]},
{MKLDNN_ARG_DIFF_SRC, *ctx->mkldnn_memories[deps[5]]},
{MKLDNN_ARG_DIFF_WEIGHTS, *ctx->mkldnn_memories[deps[6]]}};
break;
case OpType::CONCAT:
case OpType::QUANTIZEDCONCAT:
nargs = deps.size() - 1;
for (size_t i = 0; i < nargs; i++)
{
exec_args.insert({MKLDNN_ARG_MULTIPLE_SRC + i, *ctx->mkldnn_memories[deps[i]]});
}
exec_args.insert({MKLDNN_ARG_DST, *ctx->mkldnn_memories[deps[nargs]]});
break;
case OpType::CONVOLUTION:
case OpType::CONVOLUTIONRELU:
case OpType::CONVOLUTIONADD:
case OpType::GROUPCONVOLUTION:
case OpType::QUANTIZEDMATMUL:
case OpType::QUANTIZEDCONVOLUTION:
case OpType::QUANTIZEDCONVOLUTIONRELU:
exec_args = {{MKLDNN_ARG_SRC, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_WEIGHTS, *ctx->mkldnn_memories[deps[1]]},
{MKLDNN_ARG_DST, *ctx->mkldnn_memories[deps[2]]}};
break;
case OpType::CONVOLUTIONBIAS:
case OpType::CONVOLUTIONBIASADD:
case OpType::GROUPCONVOLUTIONBIAS:
case OpType::QUANTIZEDDOTBIAS:
case OpType::QUANTIZEDCONVOLUTIONBIAS:
case OpType::QUANTIZEDCONVOLUTIONBIASADD:
case OpType::QUANTIZEDCONVOLUTIONBIASSIGNEDADD:
exec_args = {{MKLDNN_ARG_SRC, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_WEIGHTS, *ctx->mkldnn_memories[deps[1]]},
{MKLDNN_ARG_BIAS, *ctx->mkldnn_memories[deps[2]]},
{MKLDNN_ARG_DST, *ctx->mkldnn_memories[deps[3]]}};
break;
case OpType::CONVOLUTIONBACKPROPDATA:
exec_args = {{MKLDNN_ARG_DIFF_DST, *ctx->mkldnn_memories[deps[1]]},
{MKLDNN_ARG_WEIGHTS, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_DIFF_SRC, *ctx->mkldnn_memories[deps[2]]}};
break;
case OpType::CONVOLUTIONBACKPROPWEIGHTS:
exec_args = {{MKLDNN_ARG_SRC, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_DIFF_DST, *ctx->mkldnn_memories[deps[1]]},
{MKLDNN_ARG_DIFF_WEIGHTS, *ctx->mkldnn_memories[deps[2]]}};
break;
case OpType::CONVOLUTIONBACKPROPWEIGHTSBIAS:
exec_args = {{MKLDNN_ARG_SRC, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_DIFF_DST, *ctx->mkldnn_memories[deps[1]]},
{MKLDNN_ARG_DIFF_WEIGHTS, *ctx->mkldnn_memories[deps[2]]},
{MKLDNN_ARG_DIFF_BIAS, *ctx->mkldnn_memories[deps[3]]}};
break;
case OpType::DECONVOLUTIONBIAS:
exec_args = {{MKLDNN_ARG_WEIGHTS, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_SRC, *ctx->mkldnn_memories[deps[1]]},
{MKLDNN_ARG_BIAS, *ctx->mkldnn_memories[deps[2]]},
{MKLDNN_ARG_DST, *ctx->mkldnn_memories[deps[3]]}};
break;
case OpType::LSTM:
case OpType::RNN:
exec_args = {{MKLDNN_ARG_SRC_LAYER, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_SRC_ITER, *ctx->mkldnn_memories[deps[1]]},
{MKLDNN_ARG_SRC_ITER_C, *ctx->mkldnn_memories[deps[2]]},
{MKLDNN_ARG_WEIGHTS_LAYER, *ctx->mkldnn_memories[deps[3]]},
{MKLDNN_ARG_WEIGHTS_ITER, *ctx->mkldnn_memories[deps[4]]},
{MKLDNN_ARG_BIAS, *ctx->mkldnn_memories[deps[5]]},
{MKLDNN_ARG_DST_LAYER, *ctx->mkldnn_memories[deps[6]]},
{MKLDNN_ARG_DST_ITER, *ctx->mkldnn_memories[deps[7]]},
{MKLDNN_ARG_DST_ITER_C, *ctx->mkldnn_memories[deps[8]]},
{MKLDNN_ARG_WORKSPACE, *ctx->mkldnn_memories[deps[9]]}};
break;
case OpType::MAXPOOLBACKPROPFORWARD:
case OpType::MAXPOOLWITHINDICES:
exec_args = {{MKLDNN_ARG_SRC, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_WORKSPACE, *ctx->mkldnn_memories[deps[2]]},
{MKLDNN_ARG_DST, *ctx->mkldnn_memories[deps[1]]}};
break;
case OpType::MAXPOOLBACKPROPBACKWARD:
case OpType::MAXPOOLWITHINDICESBACKPROP:
exec_args = {{MKLDNN_ARG_DIFF_DST, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_WORKSPACE, *ctx->mkldnn_memories[deps[1]]},
{MKLDNN_ARG_DIFF_SRC, *ctx->mkldnn_memories[deps[2]]}};
break;
case OpType::RELUBACKPROP:
case OpType::SIGMOIDBACKPROP:
exec_args = {{MKLDNN_ARG_SRC, *ctx->mkldnn_memories[deps[0]]},
{MKLDNN_ARG_DIFF_DST, *ctx->mkldnn_memories[deps[1]]},
{MKLDNN_ARG_DIFF_SRC, *ctx->mkldnn_memories[deps[2]]}};
break;
}
mkldnn::memory scratchpad(*ctx->mkldnn_scratchpad_mds[primitive_index],
executor::global_cpu_engine,
ctx->scratchpad_buffer->get_ptr());
exec_args.insert({MKLDNN_ARG_SCRATCHPAD, scratchpad});
mkldnn::stream s(executor::global_cpu_engine);
try
{
(*ctx->mkldnn_primitives[primitive_index]).execute(s, exec_args);
s.wait();
}
catch (const mkldnn::error& e)
{
throw ngraph_error("Could not run mkdnn primitive " + MKLDNN_ERROR_MESSAGE);
}
}
#endif
......@@ -28,10 +28,61 @@ namespace ngraph
namespace mkldnn_utils
{
extern "C" void
set_memory_ptr(CPURuntimeContext* ctx, size_t primitive_index, void* ptr);
enum class OpType
{
ADD,
AVGPOOL,
AVGPOOLBACKPROP,
BATCHNORM3ARGS,
BATCHNORM5ARGS,
BATCHNORMBACKPROP,
BOUNDEDRELU,
CONCAT,
CONVERTLAYOUT,
CONVOLUTION,
CONVOLUTIONRELU,
CONVOLUTIONADD,
CONVOLUTIONBIAS,
CONVOLUTIONBIASADD,
CONVOLUTIONBACKPROPDATA,
CONVOLUTIONBACKPROPWEIGHTS,
CONVOLUTIONBACKPROPWEIGHTSBIAS,
GROUPCONVOLUTION,
GROUPCONVOLUTIONBIAS,
DECONVOLUTIONBIAS,
LEAKYRELU,
LRN,
LSTM,
MAXPOOL,
MAXPOOLBACKPROPFORWARD,
MAXPOOLBACKPROPBACKWARD,
MAXPOOLWITHINDICES,
MAXPOOLWITHINDICESBACKPROP,
QUANTIZE,
DEQUANTIZE,
QUANTIZEDAVGPOOL,
QUANTIZEDMAXPOOL,
QUANTIZEDCONCAT,
QUANTIZEDDOTBIAS,
QUANTIZEDMATMUL,
QUANTIZEDCONVOLUTION,
QUANTIZEDCONVOLUTIONBIAS,
QUANTIZEDCONVOLUTIONBIASADD,
QUANTIZEDCONVOLUTIONBIASSIGNEDADD,
QUANTIZEDCONVOLUTIONRELU,
RELU,
RELUBACKPROP,
RNN,
SIGMOID,
SIGMOIDBACKPROP,
SLICE,
SOFTMAX
};
extern "C" void set_memory_ptr(CPURuntimeContext* ctx, size_t index, void* ptr);
extern "C" void mkldnn_invoke_primitive(CPURuntimeContext* ctx,
size_t primitive_index);
size_t primitive_index,
std::vector<size_t>& deps,
OpType type);
}
}
}
......
This diff is collapsed.
......@@ -24,6 +24,73 @@
#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
#include "ngraph/type/element_type.hpp"
#if MKLDNN_VERSION_MAJOR < 1
#define FORMAT format
#define FORMAT_KIND format
#define FORMAT_KIND_UNDEF mkdnn_format_undef
#define FORMAT_ANY mkldnn_any
#define FORMAT_UNDEF mkldnn_undef
#define DATA_UNDEF data_undef
#define CHANGE_FORMAT \
if (weights_desc.data.format == mkldnn_nchw) \
{ \
weights_desc.data.format = mkldnn_oihw; \
} \
if (weights_desc.data.format == mkldnn_ncdhw) \
{ \
weights_desc.data.format = mkldnn_oidhw; \
}
#define BN_FLAG_CLASS batch_normalization_flag
#define PADDING , mkldnn::padding_kind::zero
#define SET_ROUND_MODE attr.set_int_output_round_mode(mkldnn::round_mode::round_nearest);
#define QUERY_SCRATCHPAD(op_name, x)
#define QUERY_SCRATCHPAD_2ARGS(op_name, x, y)
#define QUERY_SCRATCHPAD_3ARGS(op_name, x, y, z)
#define QUERY_SCRATCHPAD_4ARGS(op_name, x, y, z, u)
#define MKLDNN_ERROR_MESSAGE e.message
#else
#define TENSOR_MAX_DIMS MKLDNN_MAX_NDIMS
#define FORMAT format_tag
#define FORMAT_KIND format_kind
#define FORMAT_KIND_UNDEF format_kind::undef
#define FORMAT_ANY static_cast<mkldnn_format_kind_t>(mkldnn::memory::format_kind::any)
#define FORMAT_UNDEF format_tag::undef
#define DATA_UNDEF undef
#define CHANGE_FORMAT
#define BN_FLAG_CLASS normalization_flags
#define PADDING
#define SET_ROUND_MODE
#define QUERY_SCRATCHPAD(op_name, x) mkldnn_emitter->query_scratchpad_##op_name(x)
#define QUERY_SCRATCHPAD_2ARGS(op_name, x, y) mkldnn_emitter->query_scratchpad_##op_name(x, y)
#define QUERY_SCRATCHPAD_3ARGS(op_name, x, y, z) mkldnn_emitter->query_scratchpad_##op_name(x, y, z)
#define QUERY_SCRATCHPAD_4ARGS(op_name, x, y, z, u) \
mkldnn_emitter->query_scratchpad_##op_name(x, y, z, u)
#define ATTR_S \
mkldnn::primitive_attr attr; \
attr.set_scratchpad_mode(mkldnn::scratchpad_mode::user);
#define GET_SIZE \
mkldnn::memory::desc scratchpad_md = pd.scratchpad_desc(); \
size_t size = scratchpad_md.get_size(); \
m_max_scratchpad_size = size > m_max_scratchpad_size ? size : m_max_scratchpad_size;
#define MKLDNN_ERROR_MESSAGE std::string(e.message)
#endif
namespace ngraph
{
namespace runtime
......@@ -36,12 +103,12 @@ namespace ngraph
#ifndef _WIN32
extern "C" void mkl_serv_free_buffers();
#endif
mkldnn::memory::format
mkldnn::memory::FORMAT
CreateNativeDataFormat(const ngraph::runtime::cpu::LayoutDescriptor& layout);
mkldnn::memory::format CreateNativeDataFormat(const Shape& shape);
mkldnn::memory::FORMAT CreateNativeDataFormat(const Shape& shape);
const std::string& get_mkldnn_data_type_string(const ngraph::element::Type& type);
mkldnn::memory::data_type get_mkldnn_data_type(const ngraph::element::Type& type);
const std::string& get_mkldnn_format_string(mkldnn::memory::format fmt);
const std::string& get_mkldnn_format_string(mkldnn::memory::FORMAT fmt);
const mkldnn::memory::desc& get_input_mkldnn_md(const Node* node, size_t index);
const mkldnn::memory::desc& get_output_mkldnn_md(const Node* node, size_t index);
......@@ -49,7 +116,7 @@ namespace ngraph
mkldnn::memory::desc create_default_mkldnn_md(const Node* node,
size_t index,
bool is_output,
mkldnn::memory::format format);
mkldnn::memory::FORMAT format);
bool is_perm_sorted(const Strides& a, const AxisVector& perm);
bool can_create_mkldnn_md(const ngraph::element::Type type);
bool can_create_mkldnn_md(const Shape& dims,
......@@ -58,6 +125,11 @@ namespace ngraph
mkldnn::memory::desc create_blocked_mkldnn_md(const Shape& dims,
const Strides& strides,
const ngraph::element::Type type);
mkldnn::memory::desc
create_blocked_mkldnn_md_helper(const mkldnn::memory::dims& dim,
const Strides& strides,
const mkldnn::memory::dims& stride,
const mkldnn::memory::data_type dtype);
mkldnn::memory::desc try_get_named_md(const mkldnn_memory_desc_t& md);
mkldnn::memory::desc rotate_blocked_md(const mkldnn::memory::desc& in,
const AxisVector& axis_order);
......@@ -66,13 +138,13 @@ namespace ngraph
mkldnn::memory::desc expand_blocked_md(const mkldnn::memory::desc& in,
AxisVector& axis_list);
bool compare_mkldnn_formats(mkldnn::memory::format lhs, mkldnn::memory::format rhs);
bool compare_mkldnn_formats(mkldnn::memory::FORMAT lhs, mkldnn::memory::FORMAT rhs);
bool compare_mkldnn_mds(const mkldnn::memory::desc& lhs,
const mkldnn::memory::desc& rhs);
bool is_mkldnn_padded_layout(const mkldnn::memory::desc& in,
const AxisVector& axis_list);
bool is_mkldnn_filter_format(mkldnn::memory::format fmt);
bool is_mkldnn_blocked_data_format(mkldnn::memory::format fmt);
bool is_mkldnn_filter_format(mkldnn::memory::FORMAT fmt);
bool is_mkldnn_blocked_data_format(mkldnn::memory::FORMAT fmt);
bool can_use_mkldnn_batchnorm_fprop(const ngraph::Node* node);
bool can_use_mkldnn_batchnorm_bprop(const ngraph::Node* node);
......@@ -95,8 +167,8 @@ namespace ngraph
std::map<element::Type, const mkldnn::memory::data_type>&
get_mkldnn_data_type_map();
std::map<element::Type, const std::string>& get_mkldnn_data_type_string_map();
std::map<mkldnn::memory::format, const std::string>& get_mkldnn_format_string_map();
std::set<mkldnn::memory::format>& get_filter_formats();
std::map<mkldnn::memory::FORMAT, const std::string>& get_mkldnn_format_string_map();
std::set<mkldnn::memory::FORMAT>& get_filter_formats();
template <typename T>
bool can_use_mkldnn_conv(ngraph::Node* node)
{
......@@ -152,6 +224,26 @@ namespace ngraph
}
return true;
}
#if MKLDNN_VERSION_MAJOR >= 1
std::map<mkldnn::memory::format_kind, const std::string>&
get_mkldnn_format_kind_string_map();
const std::string&
get_mkldnn_format_kind_string(mkldnn::memory::format_kind fmt_kind);
bool inline compare_mkldnn_dims(mkldnn_dims_t& arr1,
mkldnn_dims_t& arr2,
size_t size);
bool compare_mkldnn_strides_order(mkldnn_dims_t& stride1,
mkldnn_dims_t& stride2,
size_t size);
bool compare_mkldnn_md_formats(const mkldnn::memory::desc& lhs,
const mkldnn::memory::desc& rhs);
bool mkldnn_md_matches_format_tag(const mkldnn::memory::desc&,
const mkldnn::memory::format_tag&);
mkldnn::memory::desc create_default_mkldnn_md_with_strides(
const Node* node, size_t index, mkldnn::memory::dims& strides, bool is_output);
bool is_mkldnn_desc_blocked_data_format(const mkldnn::memory::desc& desc);
#endif
}
}
}
......
This diff is collapsed.
......@@ -16,6 +16,7 @@
#pragma once
#include <mkldnn.hpp>
#include "ngraph/op/op.hpp"
#include "ngraph/runtime/cpu/op/rnn_utils.hpp"
#include "ngraph/util.hpp"
......@@ -29,22 +30,23 @@ namespace ngraph
public:
static const std::string type_name;
const std::string& description() const override { return type_name; }
// INPUTS:
// [0] - {Xt} input tensor of layout TNC, Shape{sequence length*batch_size,
// feature_size}
// [1] - recurrent state tensors {ht_1 | ct_1} of Shape{sequence length*batch_size,
// feature_size}
// [2] - initializer for the input weights matrix, used for the linear transformation of
// the inputs.
// [3] - initializer for the recurrent weights matrix, used for the linear
// transformation of the recurrent state.
// [4] - Initializer for the bias vector w.r.to inputs + hidden state (ibh_bias +
// hbh_bias)
// INPUTS:
// [0] - {Xt} input tensor of layout TNC, Shape{sequence length*batch_size,
// feature_size}
// [1] - recurrent state tensors {ht_1 | ct_1} of Shape{sequence length*batch_size,
// feature_size}
// [2] - initializer for the input weights matrix, used for the linear transformation of
// the inputs.
// [3] - initializer for the recurrent weights matrix, used for the linear
// transformation of the recurrent state.
// [4] - Initializer for the bias vector w.r.to inputs + hidden state (ibh_bias +
// hbh_bias)
// OUTPUT VALUE: A tuple with the following structure:
// [0] - ht, output tensor with shape (sequence_length*batch_size, num_hidden) .
// [1] - {ht | ct} output recurrent state tensor with the same shape as states
// OUTPUT VALUE: A tuple with the following structure:
// [0] - ht, output tensor with shape (sequence_length*batch_size, num_hidden) .
// [1] - {ht | ct} output recurrent state tensor with the same shape as states
#if MKLDNN_VERSION_MAJOR < 1
// This version of the LSTM op supports MKLDNN emitter code, this can be used standalone
// for computing RNN
// without fusing RNN cell (LSTM)'s across time steps.
......@@ -54,6 +56,15 @@ namespace ngraph
const Output<Node>& weights_iter,
const Output<Node>& bias,
ngraph::runtime::cpu::rnn_utils::rnntype rnn_type);
#else
Lstm(const Output<Node>& src_layer,
const Output<Node>& src_iter,
const Output<Node>& src_iter_c,
const Output<Node>& weights_layer,
const Output<Node>& weights_iter,
const Output<Node>& bias,
ngraph::runtime::cpu::rnn_utils::rnntype rnn_type);
#endif
Shape get_output_tensor_shape() const { return m_output_tensor_shape; }
Shape get_output_cell_shape() const { return m_output_cell_shape; }
ngraph::runtime::cpu::rnn_utils::rnntype get_rnn_type() const { return m_rnntype; }
......
This diff is collapsed.
This diff is collapsed.
......@@ -1961,6 +1961,8 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_deconvolution_affine_foldi
prelu, "CPUFusion.deconvolution_affine_folding_relu");
this->add_matcher(m, callback);
}
#if MKLDNN_VERSION_MAJOR < 1
void ngraph::runtime::cpu::pass::CPUFusion::construct_fuse_lstm_recurrent_state()
{
auto src_layer_label = std::make_shared<pattern::op::Label>(element::f32, Shape{30, 100});
......@@ -2009,6 +2011,7 @@ void ngraph::runtime::cpu::pass::CPUFusion::construct_fuse_lstm_recurrent_state(
"CPUFusion.fuse_lstm_recurrent_state");
this->add_matcher(m, callback);
}
#endif
void ngraph::runtime::cpu::pass::CPUFusion::construct_update_slice()
{
......
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
This diff is collapsed.
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment