Commit 9ece1688 authored by Nishant Patel's avatar Nishant Patel Committed by Scott Cyphers

Delete redundant files (#3490)

parent 8673ecad
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/builder/quantization/quantized_linear_matmul.hpp"
#include "ngraph/axis_set.hpp"
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/builder/quantization.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/dot.hpp"
#include "ngraph/op/experimental/quantized_dot.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/reshape.hpp"
#include "ngraph/type/element_type.hpp"
using namespace std;
using namespace ngraph;
namespace ngraph
{
namespace builder
{
namespace quantization
{
// TODO: this code is falling back to fp32 dot
// 1) add support in reference kernel for zero point
shared_ptr<Node> QuantizedLinearMatmul(const Output<Node>& input0,
const Output<Node>& input1,
const Output<Node>& input0_scale,
const Output<Node>& input0_zero_point,
const Output<Node>& input1_scale,
const Output<Node>& input1_zero_point,
const Output<Node>& output_scale,
const Output<Node>& output_zero_point)
{
// Check if zero point is constant and zero
if (ngraph::is_zero(input0_zero_point) && ngraph::is_zero(input1_zero_point) &&
ngraph::is_zero(output_zero_point))
{
auto requantization_scale = (input0_scale * input1_scale) / output_scale;
return make_shared<op::QuantizedDot>(input0, input1, requantization_scale);
}
else
{
AxisSet axes;
auto dq_input0 = make_shared<op::Dequantize>(input0,
input0_scale,
input0_zero_point,
input0_scale.get_element_type(),
axes);
auto dq_input1 = make_shared<op::Dequantize>(input1,
input1_scale,
input1_zero_point,
input1_scale.get_element_type(),
axes);
auto dot = make_shared<op::Dot>(dq_input0, dq_input1, 1);
return make_shared<op::Quantize>(
dot,
output_scale,
output_zero_point,
output_zero_point.get_element_type(),
axes,
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
}
}
shared_ptr<Node> QuantizedLinearMatmulInteger(const Output<Node>& input0,
const Output<Node>& input1)
{
auto output_scale = make_constant(element::f32, Shape{}, 1);
return make_shared<op::QuantizedDot>(input0, input1, output_scale, false, false);
}
shared_ptr<Node> QuantizedLinearMatmulInteger(const Output<Node>& input0,
const Output<Node>& input1,
const Output<Node>& input0_zero_point,
const Output<Node>& input1_zero_point)
{
// Check if zero points are constant and zero
if (ngraph::is_zero(input0_zero_point) && ngraph::is_zero(input1_zero_point))
{
return QuantizedLinearMatmulInteger(input0, input1);
}
else
{
// Fall back to performing matmul on dequantized floating-point values
const auto input0_scale = make_constant(element::f32, Shape{}, 1);
const auto input1_scale = make_constant(element::f32, Shape{}, 1);
const auto output_scale = make_constant(element::f32, Shape{}, 1);
const auto output_zero_point = make_constant(element::i32, Shape{}, 0);
const AxisSet axes;
const auto dq_input0 =
make_shared<op::Dequantize>(input0,
input0_scale,
input0_zero_point,
input0_scale->get_element_type(),
axes);
const auto dq_input1 =
make_shared<op::Dequantize>(input1,
input1_scale,
input1_zero_point,
input1_scale->get_element_type(),
axes);
const auto dot = make_shared<op::Dot>(dq_input0, dq_input1, 1);
return make_shared<op::Quantize>(
dot,
output_scale,
output_zero_point,
output_zero_point->get_element_type(),
axes,
op::Quantize::RoundMode::ROUND_NEAREST_TOWARD_EVEN);
}
}
}
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
namespace ngraph
{
namespace builder
{
namespace quantization
{
std::shared_ptr<Node> QuantizedLinearMatmul(const Output<Node>& input0,
const Output<Node>& input1,
const Output<Node>& input0_scale,
const Output<Node>& input0_zero_point,
const Output<Node>& input1_scale,
const Output<Node>& input1_zero_point,
const Output<Node>& output_scale,
const Output<Node>& output_zero_point);
std::shared_ptr<Node> QuantizedLinearMatmulInteger(const Output<Node>& input0,
const Output<Node>& input1);
std::shared_ptr<Node>
QuantizedLinearMatmulInteger(const Output<Node>& input0,
const Output<Node>& input1,
const Output<Node>& input0_zero_point,
const Output<Node>& input1_zero_point);
}
}
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <functional>
#include "ngraph/shape.hpp"
#include "quantized_dot.hpp"
using namespace std;
using namespace ngraph;
const string op::QuantizedDot::type_name{"QuantizedDot"};
op::QuantizedDot::QuantizedDot(const Output<Node>& data,
const Output<Node>& weights,
const Output<Node>& scale,
bool requantize,
bool with_relu)
: Op({data, weights, scale})
, m_requantize(requantize)
, m_with_relu(with_relu)
{
constructor_validate_and_infer_types();
auto& data_shape = data.get_shape();
auto& weights_shape = weights.get_shape();
// QuantizedDot does [m ,n] * [n, k] = [m, k]
NODE_VALIDATION_CHECK(this,
data_shape.size() == 2 && weights_shape.size() == 2 &&
data_shape[1] == weights_shape[0],
"only valid tensors of rank 2 supported. data shape ",
data_shape,
" weights shape ",
weights_shape);
auto output_et = requantize ? (with_relu ? element::u8 : element::i8) : element::i32;
if (data.get_element_type() == element::u8 && weights.get_element_type() == element::u8)
{
output_et = element::u8;
}
set_output_type(0, output_et, Shape{data_shape[0], weights_shape[1]});
}
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <utility>
#include "ngraph/op/op.hpp"
namespace ngraph
{
namespace op
{
class QuantizedDot : public Op
{
public:
NGRAPH_API
static const std::string type_name;
const std::string& description() const override { return type_name; }
QuantizedDot() = default;
QuantizedDot(const Output<Node>& data,
const Output<Node>& weights,
const Output<Node>& scale,
bool requantize = true,
bool with_relu = false);
virtual std::shared_ptr<Node>
copy_with_new_args(const NodeVector& new_args) const override
{
check_new_args_count(this, new_args);
return std::make_shared<QuantizedDot>(
new_args.at(0), new_args.at(1), new_args.at(2), m_requantize, m_with_relu);
}
bool with_relu() const { return m_with_relu; }
bool requantize() const { return m_requantize; }
protected:
bool m_requantize;
bool m_with_relu;
};
} // namespace op
} // namespace ngraph
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment