Commit f029ab3e authored by pthoreho's avatar pthoreho

WIP elementwise mkldnn kernel selection based on cost factor

parent eabfebe5
......@@ -150,8 +150,11 @@ namespace ngraph
<< args[1].get_name() << ");\n";
writer << "out = arg0 + arg1;\n";
#else
if (args[0].get_element_type() == element::f32 &&
args[1].get_element_type() == element::f32)
auto op_annotations =
static_cast<const ngraph::op::Op*>(node)->get_op_annotations();
if (op_annotations &&
static_pointer_cast<ngraph::runtime::cpu::CPUOpAnnotations>(op_annotations)
->is_mkldnn_op())
{
auto input0_size_1d = 1;
auto input1_size_1d = 1;
......@@ -164,8 +167,9 @@ namespace ngraph
input1_size_1d *= args[1].get_shape()[i];
result_size_1d *= out[0].get_shape()[i];
}
const string& et =
get_mkldnn_data_type(args[0].get_element_type().c_type_string());
// get input element type
const string& et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(
args[1].get_element_type());
// Bind to CPU engine
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
......
......@@ -136,3 +136,24 @@ size_t MKLDNNEmitter::build_convolution_forward(const mkldnn::memory::desc& inpu
primitive_deps[conv_index] = {input_data_index, weights_index, result_index};
return conv_index;
}
size_t MKLDNNEmitter::build_elementwise_add(const mkldnn::memory::desc& input0_data_desc,
const mkldnn::memory::desc& input1_data_desc,
const mkldnn::memory::desc& result_desc,
const mkldnn::memory::primitive_desc& input_pd,
const mkldnn::memory::primitive::at& inputs_primitive)
{
size_t input0_data_index = build_memory_primitive(input0_data_desc);
size_t input1_data_index = build_memory_primitive(input1_data_desc);
size_t result_index = build_memory_primitive(result_desc);
// elementwise sum primtive descriptor
sum::primitive_desc sum_pd = sum::primitive_desc(result_desc,scale_vector, inputs_pd);
// sum primitive
size_t add_index = insert_primitive(new mkldnn::sum(sum_pd, inputs_primitive, *mkldnn_primitives[result_index]))
primitive_deps[add_index] = {input1_data_index, input0_data_index, result_index};
return add_index;
}
\ No newline at end of file
......@@ -19,7 +19,6 @@
#include <typeinfo>
#include <unordered_set>
#include "ngraph/types/element_type.hpp"
#include "ngraph/node.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/avg_pool.hpp"
......@@ -27,6 +26,7 @@
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/types/element_type.hpp"
#include "mkldnn_utils.hpp"
......@@ -122,7 +122,8 @@ mkldnn::memory::format runtime::cpu::mkldnn_utils::CreateNativeDataFormat(
}
}
const std::string& runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(const ngraph::element::Type& type)
const std::string&
runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(const ngraph::element::Type& type)
{
auto it = s_mkldnn_data_type_string_map.find(type);
if (it == s_mkldnn_data_type_string_map.end() || it->second.empty())
......@@ -130,7 +131,8 @@ const std::string& runtime::cpu::mkldnn_utils::get_mkldnn_data_type_string(const
return it->second;
}
mkldnn::memory::data_type runtime::cpu::mkldnn_utils::get_mkldnn_data_type(const ngraph::element::Type& type)
mkldnn::memory::data_type
runtime::cpu::mkldnn_utils::get_mkldnn_data_type(const ngraph::element::Type& type)
{
auto it = s_mkldnn_data_type_map.find(type);
if (it == s_mkldnn_data_type_map.end() || it->second == memory::data_type::data_undef)
......
......@@ -25,6 +25,7 @@
#include <mkldnn.hpp>
#include "ngraph/descriptor/output.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/convolution.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
......@@ -66,6 +67,29 @@ namespace ngraph
convolution->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::Add)
{
auto add = static_cast<op::Add*>(node);
auto arg0_shape = node->get_input_shape(0);
auto src_size = 1;
for (size_t i = 0; i < node->get_input_shape(0).size(); i++)
{
src_size *= arg0_shape[0];
}
// insert Add as MKLDNN op, only if the src_size is big. this is to avoid MKLDNN overhead
// for smaller tensor sizes
if (node->get_input_element_type(0) == element::f32 &&
node->get_input_element_type(1) == element::f32 && src_size > 64000)
{
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
add->set_op_annotations(op_annotations);
}
}
}
}
}
......@@ -76,6 +100,7 @@ namespace ngraph
static const runtime::cpu::pass::AssignOpMap s_dispatcher{
{TI(ngraph::op::Convolution),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Convolution>},
{TI(ngraph::op::Add), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Add>},
};
bool runtime::cpu::pass::CPUAssignment::run_on_call_graph(
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment