Commit bcfbf099 authored by Adam Straw's avatar Adam Straw Committed by Robert Kimball

run Quantize / Dequantize reference kernels on CPU backend (#1780)

* dex core quantize/de

* more types

* codegen

* remove comments

* remove more dead code
parent 7497ca7c
...@@ -70,6 +70,7 @@ set(SRC ...@@ -70,6 +70,7 @@ set(SRC
builder/softmax.cpp builder/softmax.cpp
builder/sum.cpp builder/sum.cpp
builder/topk.cpp builder/topk.cpp
builder/quantization.cpp
kernel/eigen_thread_pool.cpp kernel/eigen_thread_pool.cpp
kernel/pad.cpp kernel/pad.cpp
kernel/reduce_max.cpp kernel/reduce_max.cpp
......
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <cstring>
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/reference/dequantize.hpp"
#include "ngraph/runtime/reference/quantize.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace runtime
{
namespace cpu
{
template <>
void Builder::BUILDER_DECL(ngraph::op::Dequantize)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
const ngraph::op::Dequantize* dequantize =
static_cast<const ngraph::op::Dequantize*>(node);
function<void(CPURuntimeContext*)> functor;
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& arg1_tensor = tensor_data[args[1].get_name()];
auto& arg2_tensor = tensor_data[args[2].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto daxes = dequantize->get_axes();
if (args[0].get_element_type() == element::i8)
{
if (out[0].get_element_type() == element::f32)
{
functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
ngraph::runtime::reference::dequantize<int8_t>(
static_cast<int8_t*>(arg0_tensor),
static_cast<float*>(arg1_tensor),
static_cast<int8_t*>(arg2_tensor),
static_cast<float*>(out_tensor),
arg0_shape,
arg1_shape,
daxes);
};
}
else if (out[0].get_element_type() == element::f64)
{
functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
ngraph::runtime::reference::dequantize<int8_t>(
static_cast<int8_t*>(arg0_tensor),
static_cast<double*>(arg1_tensor),
static_cast<int8_t*>(arg2_tensor),
static_cast<double*>(out_tensor),
arg0_shape,
arg1_shape,
daxes);
};
}
else
{
throw ngraph_error("Unsupported dequantization element type");
}
}
else if (args[0].get_element_type() == element::u8)
{
if (out[0].get_element_type() == element::f32)
{
functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
ngraph::runtime::reference::dequantize<uint8_t>(
static_cast<uint8_t*>(arg0_tensor),
static_cast<float*>(arg1_tensor),
static_cast<uint8_t*>(arg2_tensor),
static_cast<float*>(out_tensor),
arg0_shape,
arg1_shape,
daxes);
};
}
else if (out[0].get_element_type() == element::f64)
{
functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
ngraph::runtime::reference::dequantize<uint8_t>(
static_cast<uint8_t*>(arg0_tensor),
static_cast<double*>(arg1_tensor),
static_cast<uint8_t*>(arg2_tensor),
static_cast<double*>(out_tensor),
arg0_shape,
arg1_shape,
daxes);
};
}
else
{
throw ngraph_error("Unsupported dequantization element type");
}
}
else
{
throw ngraph_error("Unsupported input element type");
}
functors.emplace_back(functor);
}
template <>
void Builder::BUILDER_DECL(ngraph::op::Quantize)
{
auto& functors = external_function->get_functors();
auto& tensor_data = external_function->get_tensor_data();
const ngraph::op::Quantize* quantize =
static_cast<const ngraph::op::Quantize*>(node);
function<void(CPURuntimeContext*)> functor;
auto& arg0_tensor = tensor_data[args[0].get_name()];
auto& arg1_tensor = tensor_data[args[1].get_name()];
auto& arg2_tensor = tensor_data[args[2].get_name()];
auto& out_tensor = tensor_data[out[0].get_name()];
auto arg0_shape = args[0].get_shape();
auto arg1_shape = args[1].get_shape();
auto daxes = quantize->get_axes();
if (args[0].get_element_type() == element::f32)
{
if (out[0].get_element_type() == element::i8)
{
functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
ngraph::runtime::reference::quantize<float>(
static_cast<float*>(arg0_tensor),
static_cast<float*>(arg1_tensor),
static_cast<int8_t*>(arg2_tensor),
static_cast<int8_t*>(out_tensor),
arg0_shape,
arg1_shape,
daxes);
};
}
else if (out[0].get_element_type() == element::u8)
{
functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
ngraph::runtime::reference::quantize<float>(
static_cast<float*>(arg0_tensor),
static_cast<float*>(arg1_tensor),
static_cast<uint8_t*>(arg2_tensor),
static_cast<uint8_t*>(out_tensor),
arg0_shape,
arg1_shape,
daxes);
};
}
else
{
throw ngraph_error("Unsupported quantization element type");
}
}
else if (args[0].get_element_type() == element::f64)
{
if (out[0].get_element_type() == element::i8)
{
functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
ngraph::runtime::reference::quantize<double>(
static_cast<double*>(arg0_tensor),
static_cast<double*>(arg1_tensor),
static_cast<int8_t*>(arg2_tensor),
static_cast<int8_t*>(out_tensor),
arg0_shape,
arg1_shape,
daxes);
};
}
else if (out[0].get_element_type() == element::u8)
{
functor = [&, arg0_shape, arg1_shape, daxes](CPURuntimeContext* ctx) {
ngraph::runtime::reference::quantize<double>(
static_cast<double*>(arg0_tensor),
static_cast<double*>(arg1_tensor),
static_cast<uint8_t*>(arg2_tensor),
static_cast<uint8_t*>(out_tensor),
arg0_shape,
arg1_shape,
daxes);
};
}
else
{
throw ngraph_error("Unsupported quantization element type");
}
}
else
{
throw ngraph_error("Unsupported input element type");
}
functors.emplace_back(functor);
}
REGISTER_OP_BUILDER(Dequantize);
REGISTER_OP_BUILDER(Quantize);
}
}
}
...@@ -42,6 +42,7 @@ ...@@ -42,6 +42,7 @@
#include "ngraph/op/convolution.hpp" #include "ngraph/op/convolution.hpp"
#include "ngraph/op/cos.hpp" #include "ngraph/op/cos.hpp"
#include "ngraph/op/cosh.hpp" #include "ngraph/op/cosh.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/divide.hpp" #include "ngraph/op/divide.hpp"
#include "ngraph/op/dot.hpp" #include "ngraph/op/dot.hpp"
#include "ngraph/op/equal.hpp" #include "ngraph/op/equal.hpp"
...@@ -71,6 +72,7 @@ ...@@ -71,6 +72,7 @@
#include "ngraph/op/parameter.hpp" #include "ngraph/op/parameter.hpp"
#include "ngraph/op/power.hpp" #include "ngraph/op/power.hpp"
#include "ngraph/op/product.hpp" #include "ngraph/op/product.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/reduce.hpp" #include "ngraph/op/reduce.hpp"
#include "ngraph/op/reduce_window.hpp" #include "ngraph/op/reduce_window.hpp"
#include "ngraph/op/relu.hpp" #include "ngraph/op/relu.hpp"
...@@ -4690,6 +4692,34 @@ namespace ngraph ...@@ -4690,6 +4692,34 @@ namespace ngraph
writer.block_end(); writer.block_end();
} }
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Dequantize)
{
auto dequantize = static_cast<const ngraph::op::Dequantize*>(node);
writer << "reference::dequantize(";
writer << " " << args[0].get_name() << ",\n";
writer << " " << args[1].get_name() << ",\n";
writer << " " << args[2].get_name() << ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " {" << join(args[0].get_shape()) << "},\n";
writer << " {" << join(args[1].get_shape()) << "},\n";
writer << " {" << join(dequantize->get_axes()) << "});\n";
}
template <>
void CPU_Emitter::EMITTER_DECL(ngraph::op::Quantize)
{
auto quantize = static_cast<const ngraph::op::Dequantize*>(node);
writer << "reference::quantize(";
writer << " " << args[0].get_name() << ",\n";
writer << " " << args[1].get_name() << ",\n";
writer << " " << args[2].get_name() << ",\n";
writer << " " << out[0].get_name() << ",\n";
writer << " {" << join(args[0].get_shape()) << "},\n";
writer << " {" << join(args[1].get_shape()) << "},\n";
writer << " {" << join(quantize->get_axes()) << "});\n";
}
#undef TI #undef TI
} }
} }
......
...@@ -65,6 +65,7 @@ ...@@ -65,6 +65,7 @@
#include "ngraph/op/convolution.hpp" #include "ngraph/op/convolution.hpp"
#include "ngraph/op/cos.hpp" #include "ngraph/op/cos.hpp"
#include "ngraph/op/cosh.hpp" #include "ngraph/op/cosh.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/divide.hpp" #include "ngraph/op/divide.hpp"
#include "ngraph/op/dot.hpp" #include "ngraph/op/dot.hpp"
#include "ngraph/op/equal.hpp" #include "ngraph/op/equal.hpp"
...@@ -94,6 +95,7 @@ ...@@ -94,6 +95,7 @@
#include "ngraph/op/parameter.hpp" #include "ngraph/op/parameter.hpp"
#include "ngraph/op/power.hpp" #include "ngraph/op/power.hpp"
#include "ngraph/op/product.hpp" #include "ngraph/op/product.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/reduce.hpp" #include "ngraph/op/reduce.hpp"
#include "ngraph/op/reduce_window.hpp" #include "ngraph/op/reduce_window.hpp"
#include "ngraph/op/relu.hpp" #include "ngraph/op/relu.hpp"
...@@ -362,6 +364,8 @@ static const runtime::cpu::OpMap dispatcher{ ...@@ -362,6 +364,8 @@ static const runtime::cpu::OpMap dispatcher{
&runtime::cpu::CPU_Emitter::emit<runtime::cpu::op::LoopKernel>}, &runtime::cpu::CPU_Emitter::emit<runtime::cpu::op::LoopKernel>},
{TI(ngraph::op::LRN), &runtime::cpu::CPU_Emitter::emit<ngraph::op::LRN>}, {TI(ngraph::op::LRN), &runtime::cpu::CPU_Emitter::emit<ngraph::op::LRN>},
{TI(ngraph::op::ConvolutionAdd), &runtime::cpu::CPU_Emitter::emit<op::ConvolutionAdd>}, {TI(ngraph::op::ConvolutionAdd), &runtime::cpu::CPU_Emitter::emit<op::ConvolutionAdd>},
{TI(ngraph::op::Quantize), &runtime::cpu::CPU_Emitter::emit<op::Quantize>},
{TI(ngraph::op::Dequantize), &runtime::cpu::CPU_Emitter::emit<op::Dequantize>},
}; };
...@@ -436,6 +440,7 @@ void runtime::cpu::CPU_ExternalFunction::compile() ...@@ -436,6 +440,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
#include "ngraph/runtime/reference/broadcast.hpp" #include "ngraph/runtime/reference/broadcast.hpp"
#include "ngraph/runtime/reference/concat.hpp" #include "ngraph/runtime/reference/concat.hpp"
#include "ngraph/runtime/reference/convolution.hpp" #include "ngraph/runtime/reference/convolution.hpp"
#include "ngraph/runtime/reference/dequantize.hpp"
#include "ngraph/runtime/reference/dot.hpp" #include "ngraph/runtime/reference/dot.hpp"
#include "ngraph/runtime/reference/lrn.hpp" #include "ngraph/runtime/reference/lrn.hpp"
#include "ngraph/runtime/reference/max.hpp" #include "ngraph/runtime/reference/max.hpp"
...@@ -446,6 +451,7 @@ void runtime::cpu::CPU_ExternalFunction::compile() ...@@ -446,6 +451,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
#include "ngraph/runtime/reference/or.hpp" #include "ngraph/runtime/reference/or.hpp"
#include "ngraph/runtime/reference/pad.hpp" #include "ngraph/runtime/reference/pad.hpp"
#include "ngraph/runtime/reference/product.hpp" #include "ngraph/runtime/reference/product.hpp"
#include "ngraph/runtime/reference/quantize.hpp"
#include "ngraph/runtime/reference/reduce.hpp" #include "ngraph/runtime/reference/reduce.hpp"
#include "ngraph/runtime/reference/reduce_window.hpp" #include "ngraph/runtime/reference/reduce_window.hpp"
#include "ngraph/runtime/reference/relu.hpp" #include "ngraph/runtime/reference/relu.hpp"
......
...@@ -12,10 +12,3 @@ one_hot_vector_1_fp ...@@ -12,10 +12,3 @@ one_hot_vector_1_fp
one_hot_vector_1_fp_nonint one_hot_vector_1_fp_nonint
backwards_batch_norm_three_outputs backwards_batch_norm_three_outputs
backwards_maxpool_n2_c1_hw5_3x3_str2_max_pad1x2_2x3 backwards_maxpool_n2_c1_hw5_3x3_str2_max_pad1x2_2x3
quantize
quantize_axes
quantize_int8
quantize_clamp
dequantize
dequantize_axes
dequantize_int8
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment