Commit 5d3d8f53 authored by Jaikrishnan Menon's avatar Jaikrishnan Menon

Merge branch 'master' into mkldnn-compile

parents 4e1d1ac7 5280405e
......@@ -2681,22 +2681,98 @@ namespace ngraph
void CPU_Emitter::EMITTER_DECL(ngraph::op::MaxPoolBackprop)
{
auto mpb = static_cast<const ngraph::op::MaxPoolBackprop*>(node);
auto max_pool_fprop_op = mpb->get_forward_op();
auto delta_shape = args[1].get_shape();
auto delta_rank = delta_shape.size();
auto out_shape = out[0].get_shape();
writer << "kernel::max_pool_backprop<" << out[0].get_type() << ">("
<< args[0].get_name() << ",\n";
writer << " " << args[1].get_name() << ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " {" << join(delta_shape) << "},\n";
writer << " {" << join(out_shape) << "},\n";
writer << " {" << join(mpb->get_window_shape()) << "},\n";
writer << " {" << join(mpb->get_window_movement_strides())
<< "},\n";
writer << " {" << join(mpb->get_padding_below()) << "},\n";
writer << " {" << join(mpb->get_padding_above()) << "}\n";
writer << " );\n";
if (delta_rank == 4 && mpb->get_window_shape().size() == 2 &&
args[0].get_element_type() == element::f32 && max_pool_fprop_op != nullptr)
{
const string& et =
get_mkldnn_data_type(args[1].get_element_type().c_type_string());
writer << "{\n";
writer.indent++;
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
writer << "memory::desc input_data_desc = memory::desc({" << join(delta_shape)
<< "}, " << et << ", memory::format::nchw);\n";
writer << "memory::desc result_desc = memory::desc({" << join(out_shape)
<< "}, " << et << ", memory::format::nchw);\n";
writer << "memory input_data = memory({input_data_desc, cpu_engine}, "
<< args[1].get_name() << ");\n";
writer << "memory result = memory({result_desc, cpu_engine}, "
<< out[0].get_name() << ");\n";
//----------------------------------------------------------------------------------------------
// create a forward primitive_desc, use this to query the workspace
// TODO: (pruthvi) this is a workaround, till we maintain a global context to refer to the corrosponding
// MKLDNN fprop kernel. this impacts performance
writer << "memory::desc max_pool_input_desc = memory::desc({"
<< join(args[0].get_shape()) << "}, " << et
<< ", memory::format::nchw);\n";
writer << "memory::desc max_pool_result_desc = memory::desc({"
<< join(args[1].get_shape()) << "}, " << et
<< ", memory::format::nchw);\n";
writer
<< "memory maxpool_input_data = memory({max_pool_input_desc, cpu_engine}, "
<< args[0].get_name() << ");\n";
writer << "memory maxpool_result = memory({max_pool_result_desc, cpu_engine}, "
<< out[0].get_name() << ");\n";
writer << "pooling_forward::primitive_desc pool_fwd_pd = "
"pooling_forward::primitive_desc("
<< "{prop_kind::forward, algorithm::pooling_max, "
<< "max_pool_input_desc, max_pool_result_desc, {"
<< join(max_pool_fprop_op->get_window_movement_strides()) << "}, {"
<< join(max_pool_fprop_op->get_window_shape()) << "}, "
<< "{" << join(max_pool_fprop_op->get_padding_below()) << "}, "
<< "{" << join(max_pool_fprop_op->get_padding_above()) << "}, "
<< "padding_kind::zero}, cpu_engine);\n";
// query the workspace from the forward primitive desc and allocates memory
writer << "auto max_pool_workspace_memory = "
"memory(pool_fwd_pd.workspace_primitive_desc());\n";
//run fprop with this workspace attached
writer << "pooling_forward max_pooling_fwd = pooling_forward("
<< "pool_fwd_pd, maxpool_input_data, maxpool_result, "
"max_pool_workspace_memory);\n";
writer << "stream s_fprop = stream(stream::kind::eager);\n"
<< "s_fprop.submit({max_pooling_fwd}).wait();\n";
//---------------------------------------------------------------------------------------------
writer << "auto max_pooling_bwd = "
"pooling_backward(pooling_backward::primitive_desc("
<< "pooling_backward::desc(algorithm::pooling_max, "
<< "result_desc, input_data_desc, {"
<< join(mpb->get_window_movement_strides()) << "}, {"
<< join(mpb->get_window_shape()) << "}, "
<< "{" << join(mpb->get_padding_below()) << "}, "
<< "{" << join(mpb->get_padding_above()) << "}, "
<< "padding_kind::zero), cpu_engine, pool_fwd_pd), "
<< "input_data, max_pool_workspace_memory, result);\n";
writer << "auto s_bwd = stream(stream::kind::eager);\n"
<< "s_bwd.submit({max_pooling_bwd}).wait();\n";
writer.indent--;
writer << "}\n";
}
else
{
writer << "kernel::max_pool_backprop<" << out[0].get_type() << ">("
<< args[0].get_name() << ",\n";
writer << " " << args[1].get_name() << ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " {" << join(delta_shape) << "},\n";
writer << " {" << join(out_shape) << "},\n";
writer << " {" << join(mpb->get_window_shape()) << "},\n";
writer << " {" << join(mpb->get_window_movement_strides())
<< "},\n";
writer << " {" << join(mpb->get_padding_below()) << "},\n";
writer << " {" << join(mpb->get_padding_above()) << "}\n";
writer << " );\n";
}
}
}
}
......
......@@ -56,7 +56,16 @@ runtime::cpu::CPUTensorView::CPUTensorView(const ngraph::element::Type& element_
throw ngraph_error("Error allocating CPU Tensor View memory");
}
buffer = static_cast<char*>(ptr);
// GCC major versions below 5 do not implement C++11 std::align
#if !defined(__GNUC__) || __GNUC__ >= 5
std::align(BufferAlignment, buffer_size, ptr, allocation_size);
#else
ptr = static_cast<char*>(ptr) + (BufferAlignment - 1);
ptr = reinterpret_cast<void*>(reinterpret_cast<uintptr_t>(ptr) &
~(uintptr_t(BufferAlignment - 1)));
#endif
aligned_buffer = static_cast<char*>(ptr);
}
}
......
......@@ -41,11 +41,12 @@ namespace ngraph
static const std::unordered_set<std::type_index> s_op_registry{
TI(ngraph::op::AvgPool),
TI(ngraph::op::AvgPoolBackprop),
TI(ngraph::op::BatchNorm),
TI(ngraph::op::Convolution),
TI(ngraph::op::ConvolutionBackpropData),
TI(ngraph::op::ConvolutionBackpropFilters),
TI(ngraph::op::MaxPool),
TI(ngraph::op::BatchNorm)};
TI(ngraph::op::MaxPoolBackprop)};
static const std::unordered_map<std::string, const mkldnn::memory::data_type>
s_data_type_map{{"char", mkldnn::memory::data_type::s8},
......
......@@ -1405,3 +1405,88 @@ TEST(${BACKEND_NAME}, backwards_reverse_3d_02)
};
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph, {x}, .01f, .01f));
}
TEST(${BACKEND_NAME}, backwards_maxpool_n4c1h4w4_kh2kw2_sh1sw1)
{
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
Shape shape_a{4, 1, 4, 4}; //in NCHW
Shape maxpool_shape{4, 1, 3, 3};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape window_shape{2, 2};
auto window_movement_strides = Strides{1, 1};
auto maxpool = make_shared<op::MaxPool>(A, window_shape, window_movement_strides);
auto f = make_shared<Function>(maxpool, op::Parameters{A});
shared_ptr<runtime::TensorView> ep =
backend->make_primary_tensor_view(element::f32, maxpool_shape);
vector<float> dataEp(shape_size(maxpool_shape), 4);
shared_ptr<runtime::TensorView> input =
backend->make_primary_tensor_view(element::f32, shape_a);
shared_ptr<runtime::TensorView> output =
backend->make_primary_tensor_view(element::f32, shape_a);
vector<float> dataInput{11, 65, 44, 28, 31, 33, 21, 66, 40, 49, 69, 57, 47, 30, 24, 27,
13, 56, 46, 60, 61, 41, 25, 42, 48, 53, 51, 43, 59, 58, 29, 71,
17, 22, 72, 18, 39, 35, 15, 38, 64, 52, 73, 67, 62, 50, 10, 68,
45, 63, 16, 14, 55, 54, 37, 20, 36, 12, 70, 34, 19, 26, 32, 23};
vector<float> expected{//delta
0, 8, 0, 0, 0, 0, 0, 4, 0, 8, 16, 0, 0, 0, 0, 0, 0, 4, 0, 4, 8, 0,
0, 0, 0, 4, 4, 0, 4, 4, 0, 4, 0, 0, 8, 0, 4, 0, 0, 0, 8, 0, 16, 0,
0, 0, 0, 0, 0, 8, 0, 0, 4, 0, 4, 0, 4, 0, 16, 0, 0, 0, 0, 0};
copy_data(ep, dataEp);
copy_data(input, dataInput);
auto C = make_shared<op::Parameter>(element::f32, maxpool_shape);
auto df = autodiff::backprop_function(f);
auto external = manager->compile(df);
auto cf = backend->make_call_frame(external);
cf->tensor_call({input, ep}, {output});
ASSERT_TRUE(read_vector<float>(output) == expected);
}
TEST(${BACKEND_NAME}, backwards_maxpool_n2c1h5w5_kh3kw3_sh2sw2)
{
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
Shape shape_a{1, 2, 5, 5}; //in NCHW
Shape maxpool_shape{1, 2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape window_shape{3, 3};
auto window_movement_strides = Strides{2, 2};
auto maxpool = make_shared<op::MaxPool>(A, window_shape, window_movement_strides);
auto f = make_shared<Function>(maxpool, op::Parameters{A});
shared_ptr<runtime::TensorView> ep =
backend->make_primary_tensor_view(element::f32, maxpool_shape);
vector<float> dataEp(shape_size(maxpool_shape), 4);
shared_ptr<runtime::TensorView> input =
backend->make_primary_tensor_view(element::f32, shape_a);
shared_ptr<runtime::TensorView> output =
backend->make_primary_tensor_view(element::f32, shape_a);
vector<float> dataInput{58, 15, 51, 35, 18, 47, 31, 32, 52, 21, 36, 38, 57, 54, 25, 45, 23,
30, 16, 27, 48, 20, 41, 37, 43, 39, 22, 28, 33, 29, 12, 17, 44, 42,
19, 40, 10, 46, 34, 53, 26, 55, 50, 13, 24, 14, 49, 56, 59, 11};
vector<float> expected{//delta
4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0};
copy_data(ep, dataEp);
copy_data(input, dataInput);
auto C = make_shared<op::Parameter>(element::f32, maxpool_shape);
auto df = autodiff::backprop_function(f);
auto external = manager->compile(df);
auto cf = backend->make_call_frame(external);
cf->tensor_call({input, ep}, {output});
ASSERT_TRUE(read_vector<float>(output) == expected);
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment