Unverified Commit 5280405e authored by Jayaram Bobba's avatar Jayaram Bobba Committed by GitHub

Merge pull request #512 from NervanaSystems/pruthvi/max_pooling

mkldnn MaxPoolBprop implementation in CPU emitter
parents 67c97f6a 94fe470c
......@@ -2717,22 +2717,98 @@ namespace ngraph
void CPU_Emitter::EMITTER_DECL(ngraph::op::MaxPoolBackprop)
{
auto mpb = static_cast<const ngraph::op::MaxPoolBackprop*>(node);
auto max_pool_fprop_op = mpb->get_forward_op();
auto delta_shape = args[1].get_shape();
auto delta_rank = delta_shape.size();
auto out_shape = out[0].get_shape();
writer << "kernel::max_pool_backprop<" << out[0].get_type() << ">("
<< args[0].get_name() << ",\n";
writer << " " << args[1].get_name() << ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " {" << join(delta_shape) << "},\n";
writer << " {" << join(out_shape) << "},\n";
writer << " {" << join(mpb->get_window_shape()) << "},\n";
writer << " {" << join(mpb->get_window_movement_strides())
<< "},\n";
writer << " {" << join(mpb->get_padding_below()) << "},\n";
writer << " {" << join(mpb->get_padding_above()) << "}\n";
writer << " );\n";
if (delta_rank == 4 && mpb->get_window_shape().size() == 2 &&
args[0].get_element_type() == element::f32 && max_pool_fprop_op != nullptr)
{
const string& et =
get_mkldnn_data_type(args[1].get_element_type().c_type_string());
writer << "{\n";
writer.indent++;
writer << "engine cpu_engine = engine(engine::cpu, 0);\n";
writer << "memory::desc input_data_desc = memory::desc({" << join(delta_shape)
<< "}, " << et << ", memory::format::nchw);\n";
writer << "memory::desc result_desc = memory::desc({" << join(out_shape)
<< "}, " << et << ", memory::format::nchw);\n";
writer << "memory input_data = memory({input_data_desc, cpu_engine}, "
<< args[1].get_name() << ");\n";
writer << "memory result = memory({result_desc, cpu_engine}, "
<< out[0].get_name() << ");\n";
//----------------------------------------------------------------------------------------------
// create a forward primitive_desc, use this to query the workspace
// TODO: (pruthvi) this is a workaround, till we maintain a global context to refer to the corrosponding
// MKLDNN fprop kernel. this impacts performance
writer << "memory::desc max_pool_input_desc = memory::desc({"
<< join(args[0].get_shape()) << "}, " << et
<< ", memory::format::nchw);\n";
writer << "memory::desc max_pool_result_desc = memory::desc({"
<< join(args[1].get_shape()) << "}, " << et
<< ", memory::format::nchw);\n";
writer
<< "memory maxpool_input_data = memory({max_pool_input_desc, cpu_engine}, "
<< args[0].get_name() << ");\n";
writer << "memory maxpool_result = memory({max_pool_result_desc, cpu_engine}, "
<< out[0].get_name() << ");\n";
writer << "pooling_forward::primitive_desc pool_fwd_pd = "
"pooling_forward::primitive_desc("
<< "{prop_kind::forward, algorithm::pooling_max, "
<< "max_pool_input_desc, max_pool_result_desc, {"
<< join(max_pool_fprop_op->get_window_movement_strides()) << "}, {"
<< join(max_pool_fprop_op->get_window_shape()) << "}, "
<< "{" << join(max_pool_fprop_op->get_padding_below()) << "}, "
<< "{" << join(max_pool_fprop_op->get_padding_above()) << "}, "
<< "padding_kind::zero}, cpu_engine);\n";
// query the workspace from the forward primitive desc and allocates memory
writer << "auto max_pool_workspace_memory = "
"memory(pool_fwd_pd.workspace_primitive_desc());\n";
//run fprop with this workspace attached
writer << "pooling_forward max_pooling_fwd = pooling_forward("
<< "pool_fwd_pd, maxpool_input_data, maxpool_result, "
"max_pool_workspace_memory);\n";
writer << "stream s_fprop = stream(stream::kind::eager);\n"
<< "s_fprop.submit({max_pooling_fwd}).wait();\n";
//---------------------------------------------------------------------------------------------
writer << "auto max_pooling_bwd = "
"pooling_backward(pooling_backward::primitive_desc("
<< "pooling_backward::desc(algorithm::pooling_max, "
<< "result_desc, input_data_desc, {"
<< join(mpb->get_window_movement_strides()) << "}, {"
<< join(mpb->get_window_shape()) << "}, "
<< "{" << join(mpb->get_padding_below()) << "}, "
<< "{" << join(mpb->get_padding_above()) << "}, "
<< "padding_kind::zero), cpu_engine, pool_fwd_pd), "
<< "input_data, max_pool_workspace_memory, result);\n";
writer << "auto s_bwd = stream(stream::kind::eager);\n"
<< "s_bwd.submit({max_pooling_bwd}).wait();\n";
writer.indent--;
writer << "}\n";
}
else
{
writer << "kernel::max_pool_backprop<" << out[0].get_type() << ">("
<< args[0].get_name() << ",\n";
writer << " " << args[1].get_name() << ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " {" << join(delta_shape) << "},\n";
writer << " {" << join(out_shape) << "},\n";
writer << " {" << join(mpb->get_window_shape()) << "},\n";
writer << " {" << join(mpb->get_window_movement_strides())
<< "},\n";
writer << " {" << join(mpb->get_padding_below()) << "},\n";
writer << " {" << join(mpb->get_padding_above()) << "}\n";
writer << " );\n";
}
}
}
}
......
......@@ -40,11 +40,12 @@ namespace ngraph
const std::unordered_set<std::type_index> s_op_registry{
TI(ngraph::op::AvgPool),
TI(ngraph::op::AvgPoolBackprop),
TI(ngraph::op::BatchNorm),
TI(ngraph::op::Convolution),
TI(ngraph::op::ConvolutionBackpropData),
TI(ngraph::op::ConvolutionBackpropFilters),
TI(ngraph::op::MaxPool),
TI(ngraph::op::BatchNorm)};
TI(ngraph::op::MaxPoolBackprop)};
bool IsMKLDNNOp(ngraph::Node& op)
{
......
......@@ -1405,3 +1405,88 @@ TEST(${BACKEND_NAME}, backwards_reverse_3d_02)
};
EXPECT_TRUE(autodiff_numeric_compare<float>(manager, backend, make_graph, {x}, .01f, .01f));
}
TEST(${BACKEND_NAME}, backwards_maxpool_n4c1h4w4_kh2kw2_sh1sw1)
{
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
Shape shape_a{4, 1, 4, 4}; //in NCHW
Shape maxpool_shape{4, 1, 3, 3};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape window_shape{2, 2};
auto window_movement_strides = Strides{1, 1};
auto maxpool = make_shared<op::MaxPool>(A, window_shape, window_movement_strides);
auto f = make_shared<Function>(maxpool, op::Parameters{A});
shared_ptr<runtime::TensorView> ep =
backend->make_primary_tensor_view(element::f32, maxpool_shape);
vector<float> dataEp(shape_size(maxpool_shape), 4);
shared_ptr<runtime::TensorView> input =
backend->make_primary_tensor_view(element::f32, shape_a);
shared_ptr<runtime::TensorView> output =
backend->make_primary_tensor_view(element::f32, shape_a);
vector<float> dataInput{11, 65, 44, 28, 31, 33, 21, 66, 40, 49, 69, 57, 47, 30, 24, 27,
13, 56, 46, 60, 61, 41, 25, 42, 48, 53, 51, 43, 59, 58, 29, 71,
17, 22, 72, 18, 39, 35, 15, 38, 64, 52, 73, 67, 62, 50, 10, 68,
45, 63, 16, 14, 55, 54, 37, 20, 36, 12, 70, 34, 19, 26, 32, 23};
vector<float> expected{//delta
0, 8, 0, 0, 0, 0, 0, 4, 0, 8, 16, 0, 0, 0, 0, 0, 0, 4, 0, 4, 8, 0,
0, 0, 0, 4, 4, 0, 4, 4, 0, 4, 0, 0, 8, 0, 4, 0, 0, 0, 8, 0, 16, 0,
0, 0, 0, 0, 0, 8, 0, 0, 4, 0, 4, 0, 4, 0, 16, 0, 0, 0, 0, 0};
copy_data(ep, dataEp);
copy_data(input, dataInput);
auto C = make_shared<op::Parameter>(element::f32, maxpool_shape);
auto df = autodiff::backprop_function(f);
auto external = manager->compile(df);
auto cf = backend->make_call_frame(external);
cf->tensor_call({input, ep}, {output});
ASSERT_TRUE(read_vector<float>(output) == expected);
}
TEST(${BACKEND_NAME}, backwards_maxpool_n2c1h5w5_kh3kw3_sh2sw2)
{
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto backend = manager->allocate_backend();
Shape shape_a{1, 2, 5, 5}; //in NCHW
Shape maxpool_shape{1, 2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape window_shape{3, 3};
auto window_movement_strides = Strides{2, 2};
auto maxpool = make_shared<op::MaxPool>(A, window_shape, window_movement_strides);
auto f = make_shared<Function>(maxpool, op::Parameters{A});
shared_ptr<runtime::TensorView> ep =
backend->make_primary_tensor_view(element::f32, maxpool_shape);
vector<float> dataEp(shape_size(maxpool_shape), 4);
shared_ptr<runtime::TensorView> input =
backend->make_primary_tensor_view(element::f32, shape_a);
shared_ptr<runtime::TensorView> output =
backend->make_primary_tensor_view(element::f32, shape_a);
vector<float> dataInput{58, 15, 51, 35, 18, 47, 31, 32, 52, 21, 36, 38, 57, 54, 25, 45, 23,
30, 16, 27, 48, 20, 41, 37, 43, 39, 22, 28, 33, 29, 12, 17, 44, 42,
19, 40, 10, 46, 34, 53, 26, 55, 50, 13, 24, 14, 49, 56, 59, 11};
vector<float> expected{//delta
4, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 12, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 4, 0, 4, 0, 0, 0, 0, 0, 0, 0, 4, 4, 0};
copy_data(ep, dataEp);
copy_data(input, dataInput);
auto C = make_shared<op::Parameter>(element::f32, maxpool_shape);
auto df = autodiff::backprop_function(f);
auto external = manager->compile(df);
auto cf = backend->make_call_frame(external);
cf->tensor_call({input, ep}, {output});
ASSERT_TRUE(read_vector<float>(output) == expected);
}
\ No newline at end of file
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment