Commit e8b5d11b authored by Amy Zhuang's avatar Amy Zhuang Committed by Robert Kimball

Create mkldnn primitives at first iteration for codegen - part1. (#2806)

* Create mkldnn primitives at first iteration for CODEGEN.

 OPs: add, lstm, and rnn.

*  OPs: batchnorm.

*  OPs: concat and lrn.

Remove dead code.

* Skip in place concat, relu, reshape, and slice when building node_primitive_string_deps_index map.

* Change NGRAPH_ASSERT to NGRAPH_CHECK.

* Ops: Qconv

* Ops: Convs

* Address PR Feedback.

* Dynamic scale support for qconvs

* updating to amy's recent change

* GroupConv and Cleaning dead code

* Address PR Feedback.

* Remove unused variable.

* Fix a bug.

* Fix style error.
parent d77ace68
...@@ -62,7 +62,7 @@ namespace ngraph ...@@ -62,7 +62,7 @@ namespace ngraph
auto lstm_desc = auto lstm_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Lstm>(node, args, out); mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Lstm>(node, args, out);
// Lstm needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias, // Lstm needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, and rnn_forward. // dst_layer, dst_iter, workspace, and rnn_forward.
// It needs a new workspace. // It needs a new workspace.
auto lstm_index = auto lstm_index =
mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */); mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */);
......
...@@ -57,7 +57,7 @@ namespace ngraph ...@@ -57,7 +57,7 @@ namespace ngraph
auto rnn_desc = auto rnn_desc =
mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Rnn>(node, args, out); mkldnn_emitter->get_rnn_forward_desc<ngraph::op::Rnn>(node, args, out);
// Rnn needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias, // Rnn needs 9 primitives: src_layer, src_iter, weights_layer, weights_iter, bias,
// dst_layer, dst_iter, and rnn_forward. // dst_layer, dst_iter, workspace, and rnn_forward.
// It needs a new workspace. // It needs a new workspace.
auto rnn_index = auto rnn_index =
mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */); mkldnn_emitter->reserve_primitive_space(9, true /* new workspace */);
......
This diff is collapsed.
...@@ -155,6 +155,7 @@ ...@@ -155,6 +155,7 @@
#include "ngraph/runtime/cpu/cpu_tensor_view.hpp" #include "ngraph/runtime/cpu/cpu_tensor_view.hpp"
#include "ngraph/runtime/cpu/cpu_tracing.hpp" #include "ngraph/runtime/cpu/cpu_tracing.hpp"
#include "ngraph/runtime/cpu/cpu_visualize_tree.hpp" #include "ngraph/runtime/cpu/cpu_visualize_tree.hpp"
#include "ngraph/runtime/cpu/mkldnn_emitter.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp" #include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/batch_mat_mul_transpose.hpp" #include "ngraph/runtime/cpu/op/batch_mat_mul_transpose.hpp"
#include "ngraph/runtime/cpu/op/batch_norm_relu.hpp" #include "ngraph/runtime/cpu/op/batch_norm_relu.hpp"
...@@ -473,7 +474,10 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_ ...@@ -473,7 +474,10 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_
// Build mkldnn primitives for codegen. // Build mkldnn primitives for codegen.
pass_manager.register_pass<runtime::cpu::pass::MKLDNNPrimitiveBuildPass>( pass_manager.register_pass<runtime::cpu::pass::MKLDNNPrimitiveBuildPass>(
*m_mkldnn_emitter, m_node_primitive_idx_map); m_desc_filename,
*m_mkldnn_emitter,
m_node_primitive_idx_map,
m_node_primitive_string_deps_index_map);
unordered_map<Node*, Node*> node_function_map; unordered_map<Node*, Node*> node_function_map;
string common_function_string; string common_function_string;
...@@ -510,13 +514,17 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_ ...@@ -510,13 +514,17 @@ void runtime::cpu::CPU_ExternalFunction::compile(ngraph::pass::PassConfig& pass_
writer += writer +=
R"( R"(
#include <cmath> #include <cmath>
#include <fstream>
#include <mkldnn.hpp>
#include "ngraph/distributed.hpp" #include "ngraph/distributed.hpp"
#include "ngraph/except.hpp" #include "ngraph/except.hpp"
#include "ngraph/runtime/aligned_buffer.hpp" #include "ngraph/runtime/aligned_buffer.hpp"
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp" #include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
#include "ngraph/runtime/cpu/cpu_executor.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp" #include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/cpu/cpu_runtime_context.hpp" #include "ngraph/runtime/cpu/cpu_runtime_context.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp" #include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/reference/all.hpp" #include "ngraph/runtime/reference/all.hpp"
#include "ngraph/runtime/reference/and.hpp" #include "ngraph/runtime/reference/and.hpp"
#include "ngraph/runtime/reference/any.hpp" #include "ngraph/runtime/reference/any.hpp"
...@@ -668,6 +676,14 @@ using namespace ngraph::runtime; ...@@ -668,6 +676,14 @@ using namespace ngraph::runtime;
writer << common_function_string << "\n"; writer << common_function_string << "\n";
//initiate mkldnn_primitives for CPURuntimeContextCG
writer << "void inline CPURuntimeContextCG::init_mkldnn_primitives()\n";
writer.block_begin();
writer << "mkldnn_primitives = std::vector<mkldnn::primitive*>("
<< to_string(m_mkldnn_emitter->get_mkldnn_primitives_cg().size()) << ");\n";
writer.block_end();
writer << "\n";
for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions()) for (shared_ptr<Function> current_function : pass_manager.get_state().get_functions())
{ {
auto ordered_ops = function_ordered_ops.at(current_function); auto ordered_ops = function_ordered_ops.at(current_function);
...@@ -722,6 +738,16 @@ using namespace ngraph::runtime; ...@@ -722,6 +738,16 @@ using namespace ngraph::runtime;
writer << "extern \"C\" void " << current_function->get_name() << func_params << "\n"; writer << "extern \"C\" void " << current_function->get_name() << func_params << "\n";
writer << "{\n"; writer << "{\n";
writer.indent++; writer.indent++;
writer << "std::ifstream desc_file (\"" << m_desc_filename << "\", std::ios::binary);\n";
//deserialize and build mkldnn primitives
writer << "if (ctx->first_iteration)\n";
writer.block_begin();
writer << "// read in memory descriptors and build mkldnn primitives\n";
writer << "deserialize_memory_descs_and_build_memory_primitives(" << m_desc_filename
<< ", cg_ctx, " << to_string(m_mkldnn_emitter->get_mkldnn_descriptors_size())
<< ");\n";
writer.block_end();
// Execution tracing support // Execution tracing support
if (runtime::cpu::IsTracingEnabled() && current_function->get_name() == m_function_name) if (runtime::cpu::IsTracingEnabled() && current_function->get_name() == m_function_name)
......
...@@ -125,6 +125,18 @@ namespace ngraph ...@@ -125,6 +125,18 @@ namespace ngraph
return it->second; return it->second;
} }
// Return the tuple including the string to create mkldnn primitive, the deps and the index in CODEGEN
const std::tuple<std::string, std::vector<size_t>, size_t>&
get_primitive_build_tuple(const Node* node) const
{
auto it = m_node_primitive_string_deps_index_map.find(node);
NGRAPH_CHECK(it != m_node_primitive_string_deps_index_map.end(),
"Primitive build tuple not found for node ",
node->description());
return it->second;
}
size_t add_state(ngraph::State* state) size_t add_state(ngraph::State* state)
{ {
m_states.push_back(state); m_states.push_back(state);
...@@ -318,6 +330,11 @@ namespace ngraph ...@@ -318,6 +330,11 @@ namespace ngraph
/// Map each node with mkldnn implementation to its mkldnn primitive index. /// Map each node with mkldnn implementation to its mkldnn primitive index.
std::unordered_map<const Node*, size_t> m_node_primitive_idx_map; std::unordered_map<const Node*, size_t> m_node_primitive_idx_map;
/// Map each node with mkldnn implementation to its mkldnn primitive creating string, deps, and mkldnn primitive index.
std::map<const Node*, std::tuple<std::string, std::vector<size_t>, size_t>>
m_node_primitive_string_deps_index_map;
/// Name of the file to store descriptors for mkldnn_primitives
const std::string m_desc_filename = "desc_file";
}; };
} }
} }
......
This diff is collapsed.
This diff is collapsed.
...@@ -18,6 +18,7 @@ ...@@ -18,6 +18,7 @@
#include "ngraph/pass/pass.hpp" #include "ngraph/pass/pass.hpp"
#include <fstream>
#include <functional> #include <functional>
#include <typeindex> #include <typeindex>
#include <unordered_map> #include <unordered_map>
...@@ -26,6 +27,15 @@ ...@@ -26,6 +27,15 @@
build_primitive<op_name>(ngraph::runtime::cpu::MKLDNNEmitter & mkldnn_emitter, \ build_primitive<op_name>(ngraph::runtime::cpu::MKLDNNEmitter & mkldnn_emitter, \
ngraph::Node * node) ngraph::Node * node)
#define CONSTRUCT_PRIMITIVE_BUILD_STRING_DECL(op_name) \
construct_primitive_build_string<op_name>(ngraph::runtime::cpu::MKLDNNEmitter & \
mkldnn_emitter, \
ngraph::Node * node, \
std::string & construct_string, \
std::vector<size_t> & deps, \
size_t & index, \
std::ofstream & desc_file)
namespace mkldnn namespace mkldnn
{ {
class primitive; class primitive;
...@@ -48,23 +58,46 @@ namespace ngraph ...@@ -48,23 +58,46 @@ namespace ngraph
using PrimitiveBuildOpMap = using PrimitiveBuildOpMap =
std::unordered_map<std::type_index, PrimitiveBuildFunction>; std::unordered_map<std::type_index, PrimitiveBuildFunction>;
using PrimitiveBuildStringConstructFunction =
std::function<void(ngraph::runtime::cpu::MKLDNNEmitter&,
ngraph::Node*,
std::string&,
std::vector<size_t>&,
size_t&,
std::ofstream&)>;
using PrimitiveBuildStringConstructOpMap =
std::unordered_map<std::type_index, PrimitiveBuildStringConstructFunction>;
/// This pass traverses the call graph and creates MKLDNN primitives for those ops /// This pass traverses the call graph and creates MKLDNN primitives for those ops
/// that have been assigned to MKLDNN. /// that have been assigned to MKLDNN.
class MKLDNNPrimitiveBuildPass : public ngraph::pass::CallGraphPass class MKLDNNPrimitiveBuildPass : public ngraph::pass::CallGraphPass
{ {
private: private:
std::string m_desc_filename;
ngraph::runtime::cpu::MKLDNNEmitter& m_mkldnn_emitter; ngraph::runtime::cpu::MKLDNNEmitter& m_mkldnn_emitter;
/// External map to store each node with mkldnn implementation and its mkldnn /// External map to store each node with mkldnn implementation and its mkldnn
/// associated primitive index. /// associated primitive index.
std::unordered_map<const Node*, size_t>& m_node_primitive_idx_map; std::unordered_map<const Node*, size_t>& m_node_primitive_idx_map;
/// External map to store each node with mkldnn implementation and its mkldnn
/// creation string, deps, and mkldnn primitive index.
std::map<const Node*, std::tuple<std::string, std::vector<size_t>, size_t>>&
m_node_primitive_string_deps_index_map;
public: public:
MKLDNNPrimitiveBuildPass( MKLDNNPrimitiveBuildPass(
std::string filename,
ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter, ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
std::unordered_map<const Node*, size_t>& node_primitive_idx_map) std::unordered_map<const Node*, size_t>& node_primitive_idx_map,
: m_mkldnn_emitter(mkldnn_emitter) std::map<const Node*, std::tuple<std::string, std::vector<size_t>, size_t>>&
node_primitive_string_deps_index_map)
: m_desc_filename(filename)
, m_mkldnn_emitter(mkldnn_emitter)
, m_node_primitive_idx_map(node_primitive_idx_map) , m_node_primitive_idx_map(node_primitive_idx_map)
, m_node_primitive_string_deps_index_map(
node_primitive_string_deps_index_map)
{ {
} }
...@@ -78,6 +111,19 @@ namespace ngraph ...@@ -78,6 +111,19 @@ namespace ngraph
throw std::runtime_error("Unimplemented op '" + node->description() + throw std::runtime_error("Unimplemented op '" + node->description() +
"' in MKLDNNPrimitiveBuildPass"); "' in MKLDNNPrimitiveBuildPass");
} }
template <typename OP>
static void construct_primitive_build_string(
ngraph::runtime::cpu::MKLDNNEmitter& mkldnn_emitter,
ngraph::Node* node,
std::string& construct_string,
std::vector<size_t>& deps,
size_t& index,
std::ofstream& desc_file)
{
throw std::runtime_error("Unimplemented op '" + node->description() +
"' in MKLDNNPrimitiveBuildPass");
}
}; };
} }
} }
......
...@@ -26,8 +26,35 @@ struct CPURuntimeContextCG ...@@ -26,8 +26,35 @@ struct CPURuntimeContextCG
std::unique_ptr<tbb::flow::graph> tbb_graph; std::unique_ptr<tbb::flow::graph> tbb_graph;
std::unique_ptr<tbb::global_control> tbb_gcontrol; std::unique_ptr<tbb::global_control> tbb_gcontrol;
CPURuntimeContextCG() { init_tbb(); } CPURuntimeContextCG() { init_tbb(); init_mkldnn_primitives();}
~CPURuntimeContextCG() { cleanup_tbb(); } ~CPURuntimeContextCG() { cleanup_tbb(); cleanup_mkldnn_primitives();}
std::vector<mkldnn::primitive*> mkldnn_primitives;
std::vector<char*> mkldnn_workspaces;
std::vector<mkldnn::memory::desc*> mkldnn_descriptors;
mkldnn::engine global_cpu_engine = mkldnn::engine(mkldnn::engine::cpu, 0);
void set_memory_ptr(size_t primitive_index,
void* ptr)
{
auto primitive = static_cast<mkldnn::memory*>(mkldnn_primitives[primitive_index]);
primitive->set_data_handle(ptr);
}
void mkldnn_invoke_primitive(size_t primitive_index)
{
mkldnn::stream s(mkldnn::stream::kind::eager);
try
{
s.submit({*mkldnn_primitives[primitive_index]}).wait();
}
catch (const mkldnn::error& e)
{
throw std::runtime_error("Could not run mkldnn primitive " + e.message);
}
}
private: private:
inline void init_tbb() inline void init_tbb()
...@@ -59,6 +86,35 @@ private: ...@@ -59,6 +86,35 @@ private:
} }
} }
} }
void init_mkldnn_primitives();
inline void cleanup_mkldnn_primitives()
{
for (auto p : mkldnn_primitives)
{
delete p;
}
#ifndef _WIN32
//To avoid memory leak in mkldnn, release any buffers that are not free'd yet.
//https://software.intel.com/en-us/mkl-linux-developer-guide-avoiding-memory-leaks-in-intel-mkl
//mkl_free_buffers() is not exposed at this point, hence using mkl_serv_free_buffers()
ngraph::runtime::cpu::mkldnn_utils::mkl_serv_free_buffers();
#endif
for (auto w : mkldnn_workspaces)
{
free(w);
}
}
inline void cleanup_mkldnn_descriptors()
{
for (auto d : mkldnn_descriptors)
{
free(d);
}
}
}; };
extern "C" CPURuntimeContextCG* init_cg_ctx() extern "C" CPURuntimeContextCG* init_cg_ctx()
...@@ -70,4 +126,25 @@ extern "C" void destroy_cg_ctx(CPURuntimeContextCG* cg_ctx) ...@@ -70,4 +126,25 @@ extern "C" void destroy_cg_ctx(CPURuntimeContextCG* cg_ctx)
{ {
delete cg_ctx; delete cg_ctx;
} }
static void
deserialize_memory_descs_and_build_memory_primitives(std::ifstream& desc_file,
CPURuntimeContextCG* cg_ctx,
size_t descs_count)
{
cg_ctx->mkldnn_descriptors = std::vector<mkldnn::memory::desc*>(descs_count);
for (auto i = 0; i < descs_count; i++)
{
size_t primitive_index;
desc_file >> primitive_index;
auto desc = (mkldnn::memory::desc*)malloc(sizeof(mkldnn::memory::desc));
if (!desc)
{
throw std::bad_alloc();
}
desc_file.read(reinterpret_cast<char*>(desc), sizeof(mkldnn::memory::desc));
cg_ctx->mkldnn_descriptors[i] = desc;
cg_ctx->mkldnn_primitives[primitive_index] = new mkldnn::memory({*cg_ctx->mkldnn_descriptors[i], cg_ctx->global_cpu_engine}, nullptr);
}
};
)" )"
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment