Commit 20c2325c authored by Nishant Patel's avatar Nishant Patel Committed by Robert Kimball

Add support for Quantized Pooling(Max + Avg) op via mkldnn for IA backend (codegen + DEX) (#1571)

* Add support for Quantized Pooling(Max + Avg) op via mkldnn for IA backend (codegen + DEX)

* Add checks for min and max

* Extracting out the common code from codegen and DEX

* Use call_with_validate
parent 1cdae06e
......@@ -52,6 +52,8 @@ set(SRC
builder/reduce_function.cpp
builder/reduce_function_window.cpp
builder/replace_slice.cpp
builder/quantized_max_pool.cpp
builder/quantized_avg_pool.cpp
builder/reshape.cpp
builder/reverse.cpp
builder/reverse_sequence.cpp
......@@ -83,6 +85,8 @@ set(SRC
op/lstm.cpp
op/matmul_bias.cpp
op/max_pool_with_indices.cpp
op/quantized_max_pool.cpp
op/quantized_avg_pool.cpp
op/rnn.cpp
op/sigmoid_mul.cpp
op/conv_add.cpp
......
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::QuantizedAvgPool)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
vector<float> quant_util;
mkldnn_emitter->build_quantized_avg_pool(node, quant_util);
auto& deps = mkldnn_emitter->get_primitive_deps(quant_util[2]);
auto functor = [&, quant_util](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
*(static_cast<float*>(out1_tensor)) = quant_util[0];
*(static_cast<float*>(out2_tensor)) = quant_util[1];
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quant_util[2]);
};
functors.emplace_back(functor);
}
else
{
throw ngraph_error("unsupported parameters for QuantizedAvgPool via DEX");
}
}
REGISTER_OP_BUILDER(QuantizedAvgPool);
}
}
}
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::QuantizedMaxPool)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& functors = external_function->get_functors();
auto& arg_tensor = external_function->get_tensor_data(args[0].get_name());
auto& out_tensor = external_function->get_tensor_data(out[0].get_name());
auto& out1_tensor = external_function->get_tensor_data(out[1].get_name());
auto& out2_tensor = external_function->get_tensor_data(out[2].get_name());
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
vector<float> quant_util;
mkldnn_emitter->build_quantized_max_pool(node, quant_util);
auto& deps = mkldnn_emitter->get_primitive_deps(quant_util[2]);
auto functor = [&, quant_util](CPURuntimeContext* ctx) {
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[0], arg_tensor);
cpu::mkldnn_utils::set_memory_ptr(ctx, deps[1], out_tensor);
*(static_cast<float*>(out1_tensor)) = quant_util[0];
*(static_cast<float*>(out2_tensor)) = quant_util[1];
cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, quant_util[2]);
};
functors.emplace_back(functor);
}
else
{
throw ngraph_error("unsupported parameters for QuantizedMaxPool via DEX");
}
}
REGISTER_OP_BUILDER(QuantizedMaxPool);
}
}
}
......@@ -109,6 +109,8 @@
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
#include "ngraph/runtime/cpu/op/sigmoid.hpp"
#include "ngraph/runtime/cpu/op/sigmoid_mul.hpp"
......@@ -3072,6 +3074,54 @@ namespace ngraph
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::QuantizedMaxPool)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
vector<float> quant_util;
mkldnn_emitter->build_quantized_max_pool(node, quant_util);
auto& deps = mkldnn_emitter->get_primitive_deps(quant_util[2]);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << out[0].get_name() << ");\n";
writer << "*(" << out[1].get_name() << ") = " << quant_util[0] << ";\n";
writer << "*(" << out[2].get_name() << ") = " << quant_util[1] << ";\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(quant_util[2]) << ");\n";
}
else
{
throw ngraph_error("unsupported parameters for QuantizedMaxPool");
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::QuantizedAvgPool)
{
if (runtime::cpu::mkldnn_utils::use_mkldnn_kernel(node))
{
auto& mkldnn_emitter = external_function->get_mkldnn_emitter();
vector<float> quant_util;
mkldnn_emitter->build_quantized_avg_pool(node, quant_util);
auto& deps = mkldnn_emitter->get_primitive_deps(quant_util[2]);
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[0])
<< ", " << args[0].get_name() << ");\n";
writer << "cpu::mkldnn_utils::set_memory_ptr(ctx, " << to_string(deps[1])
<< ", " << out[0].get_name() << ");\n";
writer << "*(" << out[1].get_name() << ") = " << quant_util[0] << ";\n";
writer << "*(" << out[2].get_name() << ") = " << quant_util[1] << ";\n";
writer << "cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<< to_string(quant_util[2]) << ");\n";
}
else
{
throw ngraph_error("unsupported parameters for QuantizedAvgPool");
}
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::MaxPoolWithIndices)
{
......
......@@ -149,6 +149,8 @@
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/matmul_bias.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
#include "ngraph/runtime/cpu/op/sigmoid.hpp"
#include "ngraph/runtime/cpu/op/sigmoid_mul.hpp"
......@@ -310,6 +312,8 @@ static const runtime::cpu::OpMap dispatcher{
&runtime::cpu::CPU_Emitter::emit<runtime::cpu::op::ConvertLayout>},
{TI(ngraph::op::Not), &runtime::cpu::CPU_Emitter::emit<op::Not>},
{TI(ngraph::op::MaxPool), &runtime::cpu::CPU_Emitter::emit<op::MaxPool>},
{TI(ngraph::op::QuantizedMaxPool), &runtime::cpu::CPU_Emitter::emit<op::QuantizedMaxPool>},
{TI(ngraph::op::QuantizedAvgPool), &runtime::cpu::CPU_Emitter::emit<op::QuantizedAvgPool>},
{TI(ngraph::op::MaxPoolWithIndices), &runtime::cpu::CPU_Emitter::emit<op::MaxPoolWithIndices>},
{TI(ngraph::op::Reverse), &runtime::cpu::CPU_Emitter::emit<op::Reverse>},
{TI(ngraph::op::ReverseSequence), &runtime::cpu::CPU_Emitter::emit<op::ReverseSequence>},
......
......@@ -25,6 +25,8 @@
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/dequantize.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/type/element_type.hpp"
using namespace ngraph::runtime::cpu;
......@@ -147,6 +149,53 @@ size_t MKLDNNEmitter::build_dequantization(const ngraph::Node* node,
return dequantize_index;
}
void MKLDNNEmitter::build_quantized_max_pool(const ngraph::Node* node,
std::vector<float>& quant_util)
{
auto qmax_pool = static_cast<const ngraph::op::QuantizedMaxPool*>(node);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t qmax_pool_index = this->build_pooling_forward(mkldnn::algorithm::pooling_max,
input_desc,
result_desc,
qmax_pool->get_window_movement_strides(),
qmax_pool->get_window_shape(),
qmax_pool->get_padding_below(),
qmax_pool->get_padding_above());
auto min_const_op = std::static_pointer_cast<ngraph::op::Constant>(qmax_pool->get_argument(1));
auto max_const_op = std::static_pointer_cast<ngraph::op::Constant>(qmax_pool->get_argument(2));
float min = *(static_cast<float const*>(min_const_op->get_data_ptr()));
float max = *(static_cast<float const*>(max_const_op->get_data_ptr()));
quant_util.push_back(min);
quant_util.push_back(max);
quant_util.push_back(qmax_pool_index);
}
void MKLDNNEmitter::build_quantized_avg_pool(const ngraph::Node* node,
std::vector<float>& quant_util)
{
auto qavg_pool = static_cast<const ngraph::op::QuantizedAvgPool*>(node);
auto input_desc = mkldnn_utils::get_input_mkldnn_md(node, 0);
auto result_desc = mkldnn_utils::get_output_mkldnn_md(node, 0);
size_t qavg_pool_index =
this->build_pooling_forward((qavg_pool->get_include_padding_in_avg_computation()
? mkldnn::algorithm::pooling_avg_include_padding
: mkldnn::algorithm::pooling_avg_exclude_padding),
input_desc,
result_desc,
qavg_pool->get_window_movement_strides(),
qavg_pool->get_window_shape(),
qavg_pool->get_padding_below(),
qavg_pool->get_padding_above());
auto min_const_op = std::static_pointer_cast<ngraph::op::Constant>(qavg_pool->get_argument(1));
auto max_const_op = std::static_pointer_cast<ngraph::op::Constant>(qavg_pool->get_argument(2));
float min = *(static_cast<float const*>(min_const_op->get_data_ptr()));
float max = *(static_cast<float const*>(max_const_op->get_data_ptr()));
quant_util.push_back(min);
quant_util.push_back(max);
quant_util.push_back(qavg_pool_index);
}
mkldnn::memory::format MKLDNNEmitter::query_convolution_forward_weight_format(
const mkldnn::memory::desc& input_data_desc,
const mkldnn::memory::desc& weights_desc_any,
......
......@@ -519,6 +519,12 @@ namespace ngraph
const mkldnn::memory::desc& input_desc,
const mkldnn::memory::desc& result_desc);
void build_quantized_max_pool(const ngraph::Node* node,
std::vector<float>& quant_util);
void build_quantized_avg_pool(const ngraph::Node* node,
std::vector<float>& quant_util);
private:
std::vector<mkldnn::primitive*> m_mkldnn_primitives;
std::vector<mkldnn::stream> m_mkldnn_streams;
......
......@@ -235,12 +235,12 @@ mkldnn::memory::desc runtime::cpu::mkldnn_utils::create_default_mkldnn_md(
if (output)
{
shape = node->get_output_shape(index);
et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type(node->get_output_element_type(0));
et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type(node->get_output_element_type(index));
}
else
{
shape = node->get_input_shape(index);
et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type(node->get_input_element_type(0));
et = runtime::cpu::mkldnn_utils::get_mkldnn_data_type(node->get_input_element_type(index));
}
return memory::desc(memory::dims(shape.begin(), shape.end()), et, format);
......
This diff is collapsed.
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/graph_util.hpp"
#include "ngraph/op/op.hpp"
namespace ngraph
{
namespace op
{
/// \brief Batched average pooling operation, with optional padding and window stride.
///
class QuantizedAvgPool : public Op
{
public:
/// \brief Constructs a batched average pooling operation.
///
/// \param arg The node producing the input data batch tensor.<br>
/// `[d1, dn]`
/// \param window_shape The window shape.<br>
/// `[n]`
/// \param window_movement_strides The window movement strides.<br>
/// `[n]`
/// \param padding_below The below-padding shape.<br>
/// `[n]`
/// \param padding_above The above-padding shape.<br>
/// `[n]`
/// \param include_padding_in_avg_computation If true then averages include padding
/// elements, each treated as the number zero. If false, padding elements are entirely
/// ignored when computing averages.
QuantizedAvgPool(const std::shared_ptr<Node>& arg,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
bool include_padding_in_avg_computation,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
void validate_and_infer_types() override;
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
/// \return The window shape.
const Shape& get_window_shape() const { return m_window_shape; }
/// \return The window movement strides.
const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
/// \return The below-padding shape.
const Shape& get_padding_below() const { return m_padding_below; }
/// \return The above-padding shape.
const Shape& get_padding_above() const { return m_padding_above; }
bool get_include_padding_in_avg_computation() const
{
return m_include_padding_in_avg_computation;
}
protected:
Shape m_window_shape;
Strides m_window_movement_strides;
Shape m_padding_below;
Shape m_padding_above;
bool m_include_padding_in_avg_computation;
};
}
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "quantized_max_pool.hpp"
#include "ngraph/function.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/util.hpp"
using namespace std;
using namespace ngraph;
op::QuantizedMaxPool::QuantizedMaxPool(const shared_ptr<Node>& arg,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
const shared_ptr<Node> min,
const shared_ptr<Node> max)
: Op("QuantizedMaxPool", check_single_output_args({arg, min, max}))
, m_window_shape(window_shape)
, m_window_movement_strides(window_movement_strides)
, m_padding_below(padding_below)
, m_padding_above(padding_above)
{
constructor_validate_and_infer_types();
if (arg->get_element_type() != element::u8 && arg->get_element_type() != element::i8)
{
throw ngraph_error("Dequantization supported only for i8/u8!");
}
if (min->get_element_type() != max->get_element_type())
{
throw ngraph_error("Min's element type isn't equal to max's!");
}
if (!(std::dynamic_pointer_cast<op::Constant>(min) &&
std::dynamic_pointer_cast<op::Constant>(max)))
{
throw ngraph_error("Min and max have to be constants!");
}
}
void op::QuantizedMaxPool::validate_and_infer_types()
{
auto& arg_shape = get_input_shape(0);
if (0 == m_window_movement_strides.size() && arg_shape.size() > 2)
{
m_window_movement_strides = Strides(arg_shape.size() - 2, 1);
}
if (0 == m_padding_below.size() && arg_shape.size() > 2)
{
m_padding_below = Shape(arg_shape.size() - 2, 0);
}
if (0 == m_padding_above.size() && arg_shape.size() > 2)
{
m_padding_above = Shape(arg_shape.size() - 2, 0);
}
//
// Make sure batch size and channel count are not zero, and that we have at least one spatial
// dimension (in other words, that arg has shape NCDi for some Di of rank>0, N != 0, C != 0).
//
NODE_VALIDATION_ASSERT(this, arg_shape.size() >= 3)
<< "Data input shape does not have rank of at least 3 (data input shape: " << arg_shape
<< ").";
size_t batch_size = arg_shape[0];
NODE_VALIDATION_ASSERT(this, batch_size != 0)
<< "Data batch size is zero (data input shape: " << arg_shape << ").";
size_t channel_count = arg_shape[1];
NODE_VALIDATION_ASSERT(this, channel_count != 0)
<< "Channel count is zero (data input shape: " << arg_shape << ").";
size_t spatial_dimension_count = arg_shape.size() - 2;
//
// Make sure window shape, window movement strides, and padding have same rank as Di.
//
NODE_VALIDATION_ASSERT(this, m_window_shape.size() == spatial_dimension_count)
<< "Window shape rank does not match number of spatial dimensions (window shape: "
<< m_window_shape << ", data input shape: " << arg_shape << ").";
NODE_VALIDATION_ASSERT(this, m_window_movement_strides.size() == spatial_dimension_count)
<< "Window movement stride rank does not match number of spatial dimensions (window "
"movement strides: "
<< m_window_movement_strides << ", data input shape: " << arg_shape << ").";
NODE_VALIDATION_ASSERT(this, m_padding_below.size() == spatial_dimension_count)
<< "Below-padding rank does not match number of spatial dimensions (padding below: "
<< m_padding_below << ", data input shape: " << arg_shape << ").";
NODE_VALIDATION_ASSERT(this, m_padding_above.size() == spatial_dimension_count)
<< "Above-padding rank does not match number of spatial dimensions (padding above: "
<< m_padding_above << ", data input shape: " << arg_shape << ").";
//
// Extract input item shape Di and make sure all dimensions are larger than 0.
//
Shape input_item_virtual_shape;
for (size_t i = 0; i < spatial_dimension_count; i++)
{
size_t dim_size = arg_shape[1 + 1 + i];
size_t virtual_dim_size = m_padding_below[i] + dim_size + m_padding_above[i];
input_item_virtual_shape.push_back(virtual_dim_size);
}
for (size_t i = 0; i < spatial_dimension_count; i++)
{
NODE_VALIDATION_ASSERT(this, input_item_virtual_shape[i] != 0)
<< "Data input spatial dimension " << i
<< " has zero length even after padding (virtual shape of input item: "
<< input_item_virtual_shape << ").";
}
//
// Make sure window shape dimensions are all larger than 0.
//
for (size_t i = 0; i < spatial_dimension_count; i++)
{
NODE_VALIDATION_ASSERT(this, m_window_shape[i] != 0)
<< "Window shape dimension " << i
<< " has zero length (window shape: " << m_window_shape << ").";
}
//
// Make sure the pooling window fits within the spatial dimensions.
//
for (size_t i = 0; i < spatial_dimension_count; i++)
{
NODE_VALIDATION_ASSERT(this, m_window_shape[i] <= input_item_virtual_shape[i])
<< "Window shape after padding is larger than the spatial dimensions (window shape: "
<< m_window_shape << ", virtual shape of input item: " << input_item_virtual_shape
<< ").";
}
//
// Compute output item shape Do, checking at the same time that all window movement strides are larger than 0.
//
Shape output_item_shape;
for (size_t i = 0; i < spatial_dimension_count; i++)
{
NODE_VALIDATION_ASSERT(this, m_window_movement_strides[i] != 0)
<< "Window movement strides dimension " << i
<< " has zero length (window movement strides: " << m_window_movement_strides << ").";
output_item_shape.push_back(ceil_div(input_item_virtual_shape[i] - m_window_shape[i] + 1,
m_window_movement_strides[i]));
}
//
// Construct result shape: NCDo.
//
Shape result_shape(1 + 1 + spatial_dimension_count);
result_shape[0] = batch_size;
result_shape[1] = channel_count;
copy(output_item_shape.begin(), output_item_shape.end(), result_shape.begin() + 2);
set_output_size(3);
set_output_type(0, get_input_element_type(0), result_shape);
//TODO(nbpatel): Change to Shape{} once the mkldnn version is updated.
set_output_type(1, element::f32, Shape{1});
set_output_type(2, element::f32, Shape{1});
}
shared_ptr<Node> op::QuantizedMaxPool::copy_with_new_args(const NodeVector& new_args) const
{
check_new_args_count(this, new_args);
return make_shared<QuantizedMaxPool>(new_args.at(0),
m_window_shape,
m_window_movement_strides,
m_padding_below,
m_padding_above,
new_args.at(1),
new_args.at(2));
}
/*******************************************************************************
* Copyright 2017-2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#pragma once
#include "ngraph/graph_util.hpp"
#include "ngraph/op/op.hpp"
namespace ngraph
{
namespace op
{
class QuantizedMaxPool : public Op
{
public:
/// \brief Constructs a batched max pooling operation.
///
/// \param arg The node producing the input data batch tensor.
/// \param window_shape The window shape.
/// \param window_movement_strides The window movement strides.
/// \param padding_below The below-padding shape.
/// \param padding_above The above-padding shape.
QuantizedMaxPool(const std::shared_ptr<Node>& arg,
const Shape& window_shape,
const Strides& window_movement_strides,
const Shape& padding_below,
const Shape& padding_above,
const std::shared_ptr<Node> min,
const std::shared_ptr<Node> max);
void validate_and_infer_types() override;
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override;
const Shape& get_window_shape() const { return m_window_shape; }
const Strides& get_window_movement_strides() const { return m_window_movement_strides; }
const Shape& get_padding_below() const { return m_padding_below; }
const Shape& get_padding_above() const { return m_padding_above; }
protected:
Shape m_window_shape;
Strides m_window_movement_strides;
Shape m_padding_below;
Shape m_padding_above;
};
}
}
......@@ -45,6 +45,8 @@
#include "ngraph/runtime/cpu/op/group_conv.hpp"
#include "ngraph/runtime/cpu/op/lstm.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
#include "ngraph/runtime/cpu/op/sigmoid.hpp"
......@@ -670,6 +672,33 @@ namespace ngraph
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedMaxPool)
{
if (node->get_input_element_type(0) == element::u8 ||
node->get_input_element_type(0) == element::i8)
{
auto quantized_mp = static_cast<op::QuantizedMaxPool*>(node);
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
quantized_mp->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::QuantizedAvgPool)
{
if (node->get_input_element_type(0) == element::u8 ||
node->get_input_element_type(0) == element::i8)
{
auto quantized_ap = static_cast<op::QuantizedAvgPool*>(node);
auto op_annotations =
std::make_shared<ngraph::runtime::cpu::CPUOpAnnotations>();
op_annotations->set_mkldnn_op(true);
quantized_ap->set_op_annotations(op_annotations);
}
}
template <>
void CPUAssignment::ASSIGN_DECL(ngraph::op::BoundedRelu)
{
......@@ -754,6 +783,10 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::SigmoidBackprop>},
{TI(ngraph::op::Lstm), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Lstm>},
{TI(ngraph::op::Rnn), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Rnn>},
{TI(ngraph::op::QuantizedMaxPool),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedMaxPool>},
{TI(ngraph::op::QuantizedAvgPool),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::QuantizedAvgPool>},
{TI(ngraph::op::Softmax), &runtime::cpu::pass::CPUAssignment::assign<ngraph::op::Softmax>},
{TI(ngraph::op::ConvolutionAdd),
&runtime::cpu::pass::CPUAssignment::assign<ngraph::op::ConvolutionAdd>},
......
This diff is collapsed.
......@@ -24,6 +24,8 @@
#include "ngraph/op/constant.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/runtime/cpu/op/dequantize.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "util/all_close.hpp"
#include "util/all_close_f.hpp"
#include "util/ndarray.hpp"
......@@ -34,6 +36,134 @@
using namespace std;
using namespace ngraph;
TEST(quantize_cpu, quantize_max_pool_2d_unsigned)
{
vector<uint8_t> a_data = {0, 1, 0, 2, 1, 0, 3, 2, 0, 0, 2, 0, 0, 0, 1};
Shape shape_a{1, 1, 3, 5};
Shape window_shape{2, 3};
auto window_movement_strides = Strides{1, 1};
Shape padding_below{0, 0};
Shape padding_above{0, 0};
auto A = make_shared<op::Parameter>(element::u8, shape_a);
Shape shape_r{1, 1, 2, 3};
auto B = op::Constant::create(element::f32, Shape{1}, {0.0f});
auto C = op::Constant::create(element::f32, Shape{1}, {255.0f});
auto QMP = make_shared<op::QuantizedMaxPool>(
A, window_shape, window_movement_strides, padding_below, padding_above, B, C);
auto output_data = std::make_shared<op::GetOutputElement>(QMP, 0);
auto output_min = std::make_shared<op::GetOutputElement>(QMP, 1);
auto output_max = std::make_shared<op::GetOutputElement>(QMP, 2);
auto f = make_shared<Function>(NodeVector{output_data, output_min, output_max},
op::ParameterVector{A});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::u8, shape_a);
copy_data(a, a_data);
auto result = backend->create_tensor(element::u8, shape_r);
auto result_min = backend->create_tensor(element::f32, Shape{1});
auto result_max = backend->create_tensor(element::f32, Shape{1});
backend->call_with_validate(f, {result, result_min, result_max}, {a});
EXPECT_EQ((vector<uint8_t>{3, 3, 2, 3, 3, 2}), read_vector<uint8_t>(result));
EXPECT_EQ((vector<float>{0.0}), read_vector<float>(result_min));
EXPECT_EQ((vector<float>{255.0}), read_vector<float>(result_max));
}
TEST(quantize_cpu, quantize_max_pool_2d_signed)
{
vector<int8_t> a_data = {0, 1, 0, -2, 1, 0, -3, 2, 0, 0, 2, 0, 0, 0, 1};
Shape shape_a{1, 1, 3, 5};
Shape window_shape{2, 3};
auto window_movement_strides = Strides{1, 1};
Shape padding_below{0, 0};
Shape padding_above{0, 0};
auto A = make_shared<op::Parameter>(element::i8, shape_a);
Shape shape_r{1, 1, 2, 3};
auto B = op::Constant::create(element::f32, Shape{1}, {0.0f});
auto C = op::Constant::create(element::f32, Shape{1}, {127.0f});
auto QMP = make_shared<op::QuantizedMaxPool>(
A, window_shape, window_movement_strides, padding_below, padding_above, B, C);
auto output_data = std::make_shared<op::GetOutputElement>(QMP, 0);
auto output_min = std::make_shared<op::GetOutputElement>(QMP, 1);
auto output_max = std::make_shared<op::GetOutputElement>(QMP, 2);
auto f = make_shared<Function>(NodeVector{output_data, output_min, output_max},
op::ParameterVector{A});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::i8, shape_a);
copy_data(a, a_data);
auto result = backend->create_tensor(element::i8, shape_r);
auto result_min = backend->create_tensor(element::f32, Shape{1});
auto result_max = backend->create_tensor(element::f32, Shape{1});
backend->call_with_validate(f, {result, result_min, result_max}, {a});
EXPECT_EQ((vector<int8_t>{2, 2, 2, 2, 2, 2}), read_vector<int8_t>(result));
EXPECT_EQ((vector<float>{0.0}), read_vector<float>(result_min));
EXPECT_EQ((vector<float>{127.0}), read_vector<float>(result_max));
}
TEST(quantize_cpu, quantize_avg_pool_2d_unsigned)
{
vector<uint8_t> a_data = {0, 1, 0, 2, 1, 0, 3, 2, 0, 0, 2, 0, 0, 0, 1};
Shape shape_a{1, 1, 3, 5};
Shape window_shape{2, 3};
auto window_movement_strides = Strides{1, 1};
Shape padding_below{0, 0};
Shape padding_above{0, 0};
auto A = make_shared<op::Parameter>(element::u8, shape_a);
Shape shape_r{1, 1, 2, 3};
auto B = op::Constant::create(element::f32, Shape{1}, {0.0f});
auto C = op::Constant::create(element::f32, Shape{1}, {255.0f});
auto QMP = make_shared<op::QuantizedAvgPool>(
A, window_shape, window_movement_strides, padding_below, padding_above, false, B, C);
auto output_data = std::make_shared<op::GetOutputElement>(QMP, 0);
auto output_min = std::make_shared<op::GetOutputElement>(QMP, 1);
auto output_max = std::make_shared<op::GetOutputElement>(QMP, 2);
auto f = make_shared<Function>(NodeVector{output_data, output_min, output_max},
op::ParameterVector{A});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::u8, shape_a);
copy_data(a, a_data);
auto result = backend->create_tensor(element::u8, shape_r);
auto result_min = backend->create_tensor(element::f32, Shape{1});
auto result_max = backend->create_tensor(element::f32, Shape{1});
backend->call_with_validate(f, {result, result_min, result_max}, {a});
EXPECT_EQ((vector<uint8_t>{1, 1, 1, 1, 1, 0}), read_vector<uint8_t>(result));
EXPECT_EQ((vector<float>{0.0}), read_vector<float>(result_min));
EXPECT_EQ((vector<float>{255.0}), read_vector<float>(result_max));
}
TEST(quantize_cpu, quantize_avg_pool_2d_signed)
{
vector<int8_t> a_data = {10, 1, 0, -2, 1, 0, -3, 4, 0, 0, 2, 0, 0, 0, 1};
Shape shape_a{1, 1, 3, 5};
Shape window_shape{2, 3};
auto window_movement_strides = Strides{1, 1};
Shape padding_below{0, 0};
Shape padding_above{0, 0};
auto A = make_shared<op::Parameter>(element::i8, shape_a);
Shape shape_r{1, 1, 2, 3};
auto B = op::Constant::create(element::f32, Shape{1}, {0.0f});
auto C = op::Constant::create(element::f32, Shape{1}, {127.0f});
auto QMP = make_shared<op::QuantizedAvgPool>(
A, window_shape, window_movement_strides, padding_below, padding_above, false, B, C);
auto output_data = std::make_shared<op::GetOutputElement>(QMP, 0);
auto output_min = std::make_shared<op::GetOutputElement>(QMP, 1);
auto output_max = std::make_shared<op::GetOutputElement>(QMP, 2);
auto f = make_shared<Function>(NodeVector{output_data, output_min, output_max},
op::ParameterVector{A});
auto backend = runtime::Backend::create("CPU");
// Create some tensors for input/output
auto a = backend->create_tensor(element::i8, shape_a);
copy_data(a, a_data);
auto result = backend->create_tensor(element::i8, shape_r);
auto result_min = backend->create_tensor(element::f32, Shape{1});
auto result_max = backend->create_tensor(element::f32, Shape{1});
backend->call_with_validate(f, {result, result_min, result_max}, {a});
EXPECT_EQ((vector<int8_t>{2, 0, 0, 0, 0, 1}), read_vector<int8_t>(result));
EXPECT_EQ((vector<float>{0.0}), read_vector<float>(result_min));
EXPECT_EQ((vector<float>{127.0}), read_vector<float>(result_max));
}
template <typename T>
void DequantizeTest(int input, float min, float max, float expected_output)
{
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment