Commit e14c0565 authored by Jai Menon's avatar Jai Menon Committed by Scott Cyphers

Optimized Pad (#658)

* CPU: Eigen-based Pad kernel

* CPU: Create a global Eigen thread pool and use it for padding

* Formatting fixes
parent 41a4d55f
......@@ -188,6 +188,8 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/mkldnn_emitter.cpp
runtime/cpu/mkldnn_invoke.cpp
runtime/cpu/mkldnn_utils.cpp
runtime/cpu/kernels/eigen_thread_pool.cpp
runtime/cpu/kernels/pad.cpp
runtime/cpu/ops/conv_bias.cpp
runtime/cpu/ops/convert_layout.cpp
runtime/cpu/ops/sigmoid.cpp
......
......@@ -2766,15 +2766,31 @@ namespace ngraph
auto arg0_shape = args[0].get_shape();
auto result_shape = out[0].get_shape();
writer << "kernel::pad<" << out[0].get_type() << ">(" << args[0].get_name()
<< ",\n";
writer << " " << args[1].get_name() << ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " {" << join(arg0_shape) << "},\n";
writer << " {" << join(result_shape) << "},\n";
writer << " {" << join(pad->get_padding_below()) << "},\n";
writer << " {" << join(pad->get_padding_above()) << "},\n";
writer << " {" << join(pad->get_padding_interior()) << "});\n";
if (arg0_shape.size() == 4 && args[0].get_element_type() == element::f32 &&
pad->get_padding_interior() == Shape(arg0_shape.size()))
{
writer << "cpu::kernel::pad_4d_float32(" << args[0].get_name() << ",\n"
<< " " << out[0].get_name() << ",\n"
<< " *(" << args[1].get_name() << "),\n"
<< " {" << join(arg0_shape) << "},\n"
<< " {" << join(result_shape) << "},\n"
<< " {" << join(pad->get_padding_below())
<< "},\n"
<< " {" << join(pad->get_padding_above())
<< "});\n";
}
else
{
writer << "kernel::pad<" << out[0].get_type() << ">(" << args[0].get_name()
<< ",\n";
writer << " " << args[1].get_name() << ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " {" << join(arg0_shape) << "},\n";
writer << " {" << join(result_shape) << "},\n";
writer << " {" << join(pad->get_padding_below()) << "},\n";
writer << " {" << join(pad->get_padding_above()) << "},\n";
writer << " {" << join(pad->get_padding_interior()) << "});\n";
}
}
template <>
......
......@@ -104,3 +104,25 @@ namespace mkl
size_t ldb);
}
}
namespace ngraph
{
class Shape;
namespace runtime
{
namespace cpu
{
namespace kernel
{
void pad_4d_float32(float* input,
float* output,
float pad_value,
const Shape& input_shape,
const Shape& output_shape,
const Shape& padding_below,
const Shape& padding_above);
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "eigen_thread_pool.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace eigen
{
Eigen::ThreadPool global_thread_pool(Eigen::nbThreads());
Eigen::ThreadPoolDevice global_thread_pool_device(&global_thread_pool,
Eigen::nbThreads());
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace eigen
{
extern Eigen::ThreadPool global_thread_pool;
extern Eigen::ThreadPoolDevice global_thread_pool_device;
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "pad.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
void pad_4d_float32(float* input,
float* output,
float pad_value,
const Shape& input_shape,
const Shape& output_shape,
const Shape& padding_below,
const Shape& padding_above)
{
pad<float, 4>(input,
output,
pad_value,
input_shape,
output_shape,
padding_below,
padding_above);
}
}
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#define EIGEN_USE_THREADS
#include <unsupported/Eigen/CXX11/Tensor>
#include "ngraph/runtime/cpu/kernels/eigen_thread_pool.hpp"
#include "ngraph/shape.hpp"
namespace ngraph
{
namespace runtime
{
namespace cpu
{
namespace kernel
{
template <typename ElementType, unsigned int Rank>
void pad(ElementType* input,
ElementType* output,
ElementType pad_value,
const Shape& input_shape,
const Shape& output_shape,
const Shape& padding_below,
const Shape& padding_above)
{
Eigen::array<Eigen::Index, Rank> out_dims, in_dims;
Eigen::array<Eigen::IndexPair<size_t>, Rank> padding;
for (int i = 0; i < Rank; i++)
{
out_dims[i] = output_shape[i];
in_dims[i] = input_shape[i];
padding[i] = {padding_below[i], padding_above[i]};
}
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> out(
output, out_dims);
Eigen::TensorMap<Eigen::Tensor<ElementType, Rank, Eigen::RowMajor>> in(input,
in_dims);
out.device(eigen::global_thread_pool_device) = in.pad(padding, pad_value);
}
}
}
}
}
......@@ -7118,6 +7118,74 @@ TEST(${BACKEND_NAME}, pad_exterior_2d_3x0)
read_vector<float>(result));
}
TEST(${BACKEND_NAME}, pad_exterior_4d_1x2x2x2)
{
SKIP_TEST_FOR("GPU", "${BACKEND_NAME}");
SKIP_TEST_FOR("ARGON", "${BACKEND_NAME}");
Shape shape_a{1, 2, 2, 2};
auto A = make_shared<op::Parameter>(element::f32, shape_a);
Shape shape_b{};
auto B = make_shared<op::Parameter>(element::f32, shape_b);
Shape shape_r{1, 2, 4, 4};
Shape padding_below{0, 0, 1, 1};
Shape padding_above{0, 0, 1, 1};
Shape padding_interior{0, 0, 0, 0};
auto f = make_shared<Function>(
make_shared<op::Pad>(A, B, padding_below, padding_above, padding_interior),
op::ParameterVector{A, B});
auto manager = runtime::Manager::get("${BACKEND_NAME}");
auto external = manager->compile(f);
auto backend = manager->allocate_backend();
auto cf = backend->make_call_frame(external);
// Create some tensors for input/output
auto a = backend->make_primary_tensor_view(element::f32, shape_a);
// clang-format off
copy_data(a, test::NDArray<float, 4>(
{
{
{
{0.0f, 0.0f},
{0.0f, 0.0f}
},
{
{0.0f, 0.0f},
{0.0f, 0.0f}
}
}
}).get_vector());
// clang-format on
auto b = backend->make_primary_tensor_view(element::f32, shape_b);
copy_data(b, vector<float>{42});
auto result = backend->make_primary_tensor_view(element::f32, shape_r);
cf->call({a, b}, {result});
// clang-format off
EXPECT_EQ((test::NDArray<float, 4>(
{
{
{
{42.0f, 42.0f, 42.0f, 42.0f},
{42.0f, 0.0f, 0.0f, 42.0f},
{42.0f, 0.0f, 0.0f, 42.0f},
{42.0f, 42.0f, 42.0f, 42.0f}
},
{
{42.0f, 42.0f, 42.0f, 42.0f},
{42.0f, 0.0f, 0.0f, 42.0f},
{42.0f, 0.0f, 0.0f, 42.0f},
{42.0f, 42.0f, 42.0f, 42.0f}
}
}
}).get_vector()),
read_vector<float>(result));
// clang-format on
}
// This is a regression test for one of TF's unit tests, which was failing.
// The problem was inappropriate handling of the shape computation for a
// zero-length axis with interior padding. Rather than subtract 1 from the
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment