Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
f3b9389c
Commit
f3b9389c
authored
Aug 23, 2019
by
Nishant Patel
Committed by
Scott Cyphers
Aug 23, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Segregate the quant builders op wise (#3501)
* Segregate builders op wise * Style * Update ngraph.hpp
parent
98205845
Show whitespace changes
Inline
Side-by-side
Showing
19 changed files
with
814 additions
and
844 deletions
+814
-844
CMakeLists.txt
src/ngraph/CMakeLists.txt
+6
-3
dequantize_builder.cpp
src/ngraph/builder/dequantize_builder.cpp
+57
-0
dequantize_builder.hpp
src/ngraph/builder/dequantize_builder.hpp
+35
-0
quantization.cpp
src/ngraph/builder/quantization.cpp
+0
-359
quantization.hpp
src/ngraph/builder/quantization.hpp
+0
-132
quantized_linear_convolution.cpp
...aph/builder/quantization/quantized_linear_convolution.cpp
+0
-1
quantization_util.hpp
src/ngraph/builder/quantization_util.hpp
+0
-319
quantization_utils.cpp
src/ngraph/builder/quantization_utils.cpp
+122
-0
quantization_utils.hpp
src/ngraph/builder/quantization_utils.hpp
+24
-0
quantize_builder.cpp
src/ngraph/builder/quantize_builder.cpp
+58
-0
quantize_builder.hpp
src/ngraph/builder/quantize_builder.hpp
+36
-0
quantized_concat_builder.cpp
src/ngraph/builder/quantized_concat_builder.cpp
+65
-0
quantized_concat_builder.hpp
src/ngraph/builder/quantized_concat_builder.hpp
+39
-0
quantized_conv_builder.cpp
src/ngraph/builder/quantized_conv_builder.cpp
+204
-1
quantized_conv_builder.hpp
src/ngraph/builder/quantized_conv_builder.hpp
+77
-0
quantized_dot_builder.cpp
src/ngraph/builder/quantized_dot_builder.cpp
+40
-1
quantized_dot_builder.hpp
src/ngraph/builder/quantized_dot_builder.hpp
+15
-0
ngraph.hpp
src/ngraph/ngraph.hpp
+4
-0
builder_quantization.cpp
test/builder_quantization.cpp
+32
-28
No files found.
src/ngraph/CMakeLists.txt
View file @
f3b9389c
...
...
@@ -24,20 +24,23 @@ set (SRC
axis_vector.hpp
builder/autobroadcast.cpp
builder/autobroadcast.hpp
builder/dequantize_builder.cpp
builder/dequantize_builder.hpp
builder/make_constant.hpp
builder/norm.cpp
builder/norm.hpp
builder/numpy_transpose.cpp
builder/numpy_transpose.hpp
builder/quantization.cpp
builder/quantization.hpp
builder/quantize_builder.cpp
builder/quantize_builder.hpp
builder/quantized_concat_builder.cpp
builder/quantized_concat_builder.hpp
builder/quantized_conv_builder.cpp
builder/quantized_conv_builder.hpp
builder/quantized_dot_builder.cpp
builder/quantized_dot_builder.hpp
builder/quantization/quantized_linear_convolution.cpp
builder/quantization/quantized_linear_convolution.hpp
builder/quantization_util.hpp
builder/quantization_utils.hpp
builder/quantization_utils.cpp
builder/reduce_ops.cpp
...
...
src/ngraph/builder/dequantize_builder.cpp
0 → 100644
View file @
f3b9389c
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <memory>
#include "ngraph/builder/dequantize_builder.hpp"
using
namespace
std
;
using
namespace
ngraph
;
namespace
ngraph
{
namespace
builder
{
shared_ptr
<
Node
>
DequantizeBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
,
const
ngraph
::
element
::
Type
&
real_type
,
const
ngraph
::
AxisSet
&
axes
)
{
auto
quant_type
=
input
.
get_element_type
();
if
(
min
.
get_element_type
()
!=
real_type
)
{
throw
ngraph_error
(
"DequantizeBuilder: min must match input type"
);
}
if
(
max
.
get_element_type
()
!=
real_type
)
{
throw
ngraph_error
(
"DequantizeBuilder: max must match input type"
);
}
auto
shape
=
min
.
get_shape
();
if
(
shape
!=
max
.
get_shape
())
{
throw
ngraph_error
(
"DequantizeBuilder: min and max must have same shape"
);
}
auto
zero
=
make_constant
(
quant_type
,
shape
,
0
);
auto
scale
=
quantization_utils
::
get_scale
(
min
,
max
,
quant_type
);
return
make_shared
<
op
::
Dequantize
>
(
input
,
scale
,
zero
,
real_type
,
axes
);
}
}
}
src/ngraph/builder/dequantize_builder.hpp
0 → 100644
View file @
f3b9389c
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/dequantize.hpp"
#include "quantization_utils.hpp"
namespace
ngraph
{
namespace
builder
{
std
::
shared_ptr
<
Node
>
DequantizeBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
,
const
ngraph
::
element
::
Type
&
real_type
,
const
ngraph
::
AxisSet
&
axes
);
}
}
src/ngraph/builder/quantization.cpp
deleted
100644 → 0
View file @
98205845
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <memory>
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/builder/quantization.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/reshape.hpp"
#include "quantization_util.hpp"
using
namespace
std
;
using
namespace
ngraph
;
namespace
ngraph
{
namespace
builder
{
shared_ptr
<
Node
>
ScaledQuantize
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
,
const
ngraph
::
element
::
Type
&
quant_type
,
const
ngraph
::
AxisSet
&
axes
,
op
::
Quantize
::
RoundMode
round_mode
)
{
auto
real_type
=
input
.
get_element_type
();
if
(
min
.
get_element_type
()
!=
real_type
)
{
throw
ngraph_error
(
"ScaledQuantize: min must match input type"
);
}
if
(
max
.
get_element_type
()
!=
real_type
)
{
throw
ngraph_error
(
"ScaledQuantize: max must match input type"
);
}
auto
shape
=
min
.
get_shape
();
if
(
shape
!=
max
.
get_shape
())
{
throw
ngraph_error
(
"ScaledQuantize: min and max must have same shape"
);
}
auto
zero
=
make_constant
(
quant_type
,
shape
,
0
);
auto
scale
=
quantization_util
::
get_scale
(
min
,
max
,
quant_type
,
true
);
return
make_shared
<
op
::
Quantize
>
(
input
,
scale
,
zero
,
quant_type
,
axes
,
round_mode
);
}
shared_ptr
<
Node
>
ScaledDequantize
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
,
const
ngraph
::
element
::
Type
&
real_type
,
const
ngraph
::
AxisSet
&
axes
)
{
auto
quant_type
=
input
.
get_element_type
();
if
(
min
.
get_element_type
()
!=
real_type
)
{
throw
ngraph_error
(
"ScaledDequantize: min must match output type"
);
}
if
(
max
.
get_element_type
()
!=
real_type
)
{
throw
ngraph_error
(
"ScaledDequantize: max must match output type"
);
}
auto
shape
=
min
.
get_shape
();
if
(
shape
!=
max
.
get_shape
())
{
throw
ngraph_error
(
"ScaledDequantize: min and max must have same shape"
);
}
auto
zero
=
make_constant
(
quant_type
,
shape
,
0
);
auto
scale
=
quantization_util
::
get_scale
(
min
,
max
,
quant_type
);
return
make_shared
<
op
::
Dequantize
>
(
input
,
scale
,
zero
,
real_type
,
axes
);
}
shared_ptr
<
Node
>
ScaledQuantizedConcat
(
const
NodeVector
&
args
,
size_t
concatenation_axis
,
const
NodeVector
&
mins
,
const
NodeVector
&
maxs
)
{
quantization_util
::
check_concat
(
args
,
mins
,
maxs
);
auto
quant_type
=
args
[
0
]
->
get_element_type
();
// output scale
auto
min
=
make_shared
<
op
::
Min
>
(
make_shared
<
op
::
Concat
>
(
mins
,
0
),
ngraph
::
AxisSet
{
0
});
auto
max
=
make_shared
<
op
::
Max
>
(
make_shared
<
op
::
Concat
>
(
maxs
,
0
),
ngraph
::
AxisSet
{
0
});
auto
out_scale
=
quantization_util
::
get_scale
(
min
,
max
,
quant_type
);
NodeVector
rescaled_args
(
args
.
size
());
for
(
size_t
i
=
0
;
i
<
args
.
size
();
++
i
)
{
auto
q_type
=
args
[
i
]
->
get_element_type
();
auto
in_scale
=
make_shared
<
ngraph
::
op
::
Reshape
>
(
quantization_util
::
get_scale
(
mins
[
i
],
maxs
[
i
],
q_type
),
AxisVector
{
0
},
Shape
{});
auto
zero
=
make_constant
(
q_type
,
in_scale
->
get_shape
(),
0
);
rescaled_args
[
i
]
=
make_shared
<
op
::
Dequantize
>
(
args
[
i
],
in_scale
,
zero
,
element
::
f32
,
AxisSet
{});
rescaled_args
[
i
]
=
make_shared
<
op
::
Quantize
>
(
rescaled_args
[
i
],
out_scale
,
zero
,
q_type
,
AxisSet
{},
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
);
}
return
make_shared
<
op
::
Concat
>
(
rescaled_args
,
concatenation_axis
);
}
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBias
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
with_relu
)
{
auto
output_et
=
with_relu
?
element
::
u8
:
element
::
i8
;
auto
requantization_scale
=
quantization_util
::
get_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
,
min_output
,
max_output
,
output_et
);
auto
mybias
=
bias
;
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
auto
zero
=
make_constant
(
element
::
i32
,
min_input
.
get_shape
(),
0
);
AxisSet
quantization_axes
;
auto
bias_scale
=
quantization_util
::
get_bias_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
);
op
::
Quantize
::
RoundMode
round_mode
=
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
;
mybias
=
make_shared
<
op
::
Quantize
>
(
bias
,
bias_scale
,
zero
,
element
::
i32
,
quantization_axes
,
round_mode
);
}
return
make_shared
<
op
::
QuantizedConvolutionBias
>
(
input
,
filters
,
mybias
,
window_movement_strides
,
window_dilation_strides
,
padding_below
,
padding_above
,
data_dilation_strides
,
requantization_scale
,
with_relu
);
}
shared_ptr
<
Node
>
ScaledQuantizedConvolutionRelu
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
)
{
auto
requantization_scale
=
quantization_util
::
get_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
,
min_output
,
max_output
,
element
::
u8
);
return
make_shared
<
op
::
QuantizedConvolutionRelu
>
(
input
,
filters
,
window_movement_strides
,
window_dilation_strides
,
padding_below
,
padding_above
,
data_dilation_strides
,
requantization_scale
);
}
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBiasAdd
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
sum_input
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_sum_input
,
const
Output
<
Node
>&
max_sum_input
,
const
bool
with_relu
)
{
auto
output_et
=
with_relu
?
element
::
u8
:
element
::
i8
;
auto
requantization_scale
=
quantization_util
::
get_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
,
min_output
,
max_output
,
output_et
);
auto
sum_scale
=
builder
::
quantization_util
::
get_sum_scale
(
min_output
,
max_output
,
min_sum_input
,
max_sum_input
);
auto
mybias
=
bias
;
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
auto
zero
=
make_constant
(
element
::
i32
,
min_input
.
get_shape
(),
0
);
AxisSet
quantization_axes
;
auto
bias_scale
=
quantization_util
::
get_bias_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
);
op
::
Quantize
::
RoundMode
round_mode
=
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
;
mybias
=
make_shared
<
op
::
Quantize
>
(
bias
,
bias_scale
,
zero
,
element
::
i32
,
quantization_axes
,
round_mode
);
}
return
make_shared
<
op
::
QuantizedConvolutionBiasAdd
>
(
input
,
filters
,
mybias
,
sum_input
,
window_movement_strides
,
window_dilation_strides
,
padding_below
,
padding_above
,
data_dilation_strides
,
requantization_scale
,
sum_scale
,
with_relu
);
}
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBiasSignedAdd
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
sum_input
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_sum_input
,
const
Output
<
Node
>&
max_sum_input
,
const
bool
with_relu
)
{
auto
output_et
=
with_relu
?
element
::
u8
:
element
::
i8
;
auto
requantization_scale
=
quantization_util
::
get_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
,
min_output
,
max_output
,
output_et
);
auto
sum_scale
=
builder
::
quantization_util
::
get_sum_scale
(
min_output
,
max_output
,
min_sum_input
,
max_sum_input
);
if
(
output_et
==
element
::
u8
)
{
// Need to multiply by two to account for u8 requantization_scale
auto
two
=
make_constant
(
element
::
f32
,
sum_scale
->
get_shape
(),
2.0
f
);
sum_scale
=
two
*
sum_scale
;
}
auto
mybias
=
bias
;
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
auto
zero
=
make_constant
(
element
::
i32
,
min_input
.
get_shape
(),
0
);
AxisSet
quantization_axes
;
auto
bias_scale
=
quantization_util
::
get_bias_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
);
op
::
Quantize
::
RoundMode
round_mode
=
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
;
mybias
=
make_shared
<
op
::
Quantize
>
(
bias
,
bias_scale
,
zero
,
element
::
i32
,
quantization_axes
,
round_mode
);
}
auto
qconv
=
make_shared
<
op
::
QuantizedConvolutionBiasSignedAdd
>
(
input
,
filters
,
mybias
,
sum_input
,
window_movement_strides
,
window_dilation_strides
,
padding_below
,
padding_above
,
data_dilation_strides
,
requantization_scale
,
sum_scale
,
with_relu
);
return
make_shared
<
op
::
Convert
>
(
qconv
,
element
::
u8
);
}
shared_ptr
<
Node
>
ScaledQuantizedDotBias
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
requantize
,
const
bool
with_relu
)
{
auto
requantization_scale
=
quantization_util
::
get_dot_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
,
min_output
,
max_output
,
input
.
get_element_type
(),
with_relu
?
element
::
u8
:
element
::
i8
,
requantize
);
auto
mybias
=
bias
;
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
auto
zero
=
make_constant
(
element
::
i32
,
min_input
.
get_shape
(),
0
);
AxisSet
quantization_axes
;
auto
bias_scale
=
quantization_util
::
get_bias_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
);
op
::
Quantize
::
RoundMode
round_mode
=
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
;
mybias
=
make_shared
<
op
::
Quantize
>
(
bias
,
bias_scale
,
zero
,
element
::
i32
,
quantization_axes
,
round_mode
);
}
return
make_shared
<
op
::
QuantizedDotBias
>
(
input
,
filters
,
mybias
,
requantization_scale
,
requantize
,
with_relu
);
}
}
// namespace builder
}
// namespace ngraph
src/ngraph/builder/quantization.hpp
deleted
100644 → 0
View file @
98205845
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/experimental/quantized_dot_bias.hpp"
#include "ngraph/op/quantize.hpp"
namespace
ngraph
{
namespace
builder
{
std
::
shared_ptr
<
Node
>
ScaledQuantize
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
,
const
ngraph
::
element
::
Type
&
type
,
const
ngraph
::
AxisSet
&
axes
,
op
::
Quantize
::
RoundMode
round_mode
);
std
::
shared_ptr
<
Node
>
ScaledDequantize
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
,
const
ngraph
::
element
::
Type
&
type
,
const
ngraph
::
AxisSet
&
axes
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedConcat
(
const
NodeVector
&
args
,
size_t
concatenation_axis
,
const
NodeVector
&
mins
,
const
NodeVector
&
maxes
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBias
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
with_relu
=
false
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedConvolutionRelu
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBiasAdd
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
sum_input
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_sum_input
,
const
Output
<
Node
>&
max_sum_input
,
const
bool
with_relu
=
false
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBiasSignedAdd
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
sum_input
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_sum_input
,
const
Output
<
Node
>&
max_sum_input
,
const
bool
with_relu
=
false
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedDotBias
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
requantize
=
true
,
const
bool
with_relu
=
false
);
}
// namespace builder
}
// namespace ngraph
src/ngraph/builder/quantization/quantized_linear_convolution.cpp
View file @
f3b9389c
...
...
@@ -17,7 +17,6 @@
#include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
#include "ngraph/axis_set.hpp"
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/builder/quantization.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/dequantize.hpp"
...
...
src/ngraph/builder/quantization_util.hpp
deleted
100644 → 0
View file @
98205845
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <limits>
#include <vector>
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/abs.hpp"
#include "ngraph/op/add.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/divide.hpp"
#include "ngraph/op/maximum.hpp"
#include "ngraph/op/minimum.hpp"
#include "ngraph/op/multiply.hpp"
#include "ngraph/op/subtract.hpp"
#include "ngraph/util.hpp"
namespace
ngraph
{
namespace
builder
{
namespace
quantization_util
{
std
::
shared_ptr
<
Node
>
max_abs
(
Output
<
Node
>
a
,
Output
<
Node
>
b
)
{
auto
abs_a
=
std
::
make_shared
<
op
::
Abs
>
(
a
);
auto
abs_b
=
std
::
make_shared
<
op
::
Abs
>
(
b
);
return
std
::
make_shared
<
op
::
Maximum
>
(
abs_a
,
abs_b
);
}
std
::
pair
<
std
::
shared_ptr
<
Node
>
,
std
::
shared_ptr
<
Node
>>
quantization_range_for_multiplication
(
Output
<
Node
>
min_a
,
Output
<
Node
>
max_a
,
Output
<
Node
>
min_b
,
Output
<
Node
>
max_b
)
{
auto
type
=
min_a
.
get_element_type
();
if
(
type
!=
max_a
.
get_element_type
()
||
type
!=
min_b
.
get_element_type
()
||
type
!=
max_b
.
get_element_type
())
{
throw
ngraph_error
(
"quantization_range_for_multiplication: min and max must have same type"
);
}
auto
shape
=
min_a
.
get_shape
();
if
(
shape
!=
max_a
.
get_shape
()
||
shape
!=
min_b
.
get_shape
()
||
shape
!=
max_b
.
get_shape
())
{
throw
ngraph_error
(
"quantization_range_for_multiplication: min and max must have same shape"
);
}
auto
u8_range
=
make_constant
(
type
,
shape
,
std
::
numeric_limits
<
uint8_t
>::
max
()
-
std
::
numeric_limits
<
uint8_t
>::
min
());
auto
i8_range
=
make_constant
(
type
,
shape
,
std
::
numeric_limits
<
int8_t
>::
max
()
-
std
::
numeric_limits
<
int8_t
>::
min
());
auto
a_one_quant_level
=
(
max_a
-
min_a
)
/
u8_range
;
auto
b_one_quant_level
=
(
max_b
-
min_b
)
/
i8_range
;
auto
c_one_quant_level
=
a_one_quant_level
*
b_one_quant_level
;
auto
i32_min
=
make_constant
(
type
,
shape
,
std
::
numeric_limits
<
int32_t
>::
min
());
auto
i32_max
=
make_constant
(
type
,
shape
,
std
::
numeric_limits
<
int32_t
>::
max
());
auto
min_c
=
c_one_quant_level
*
i32_min
;
auto
max_c
=
c_one_quant_level
*
i32_max
;
return
std
::
pair
<
std
::
shared_ptr
<
Node
>
,
std
::
shared_ptr
<
Node
>>
(
min_c
,
max_c
);
}
std
::
shared_ptr
<
Node
>
get_scale
(
Output
<
Node
>
min_input
,
Output
<
Node
>
max_input
,
Output
<
Node
>
min_filter
,
Output
<
Node
>
max_filter
,
Output
<
Node
>
min_freezed_output
,
Output
<
Node
>
max_freezed_output
,
const
ngraph
::
element
::
Type
&
output_type
)
{
auto
type
=
min_input
.
get_element_type
();
if
(
type
!=
max_input
.
get_element_type
()
||
type
!=
min_filter
.
get_element_type
()
||
type
!=
max_filter
.
get_element_type
()
||
type
!=
min_freezed_output
.
get_element_type
()
||
type
!=
max_freezed_output
.
get_element_type
())
{
throw
ngraph_error
(
"get_scale: min and max must have same type"
);
}
auto
shape
=
min_input
.
get_shape
();
if
(
shape
!=
max_input
.
get_shape
()
||
shape
!=
min_filter
.
get_shape
()
||
shape
!=
max_filter
.
get_shape
()
||
shape
!=
min_freezed_output
.
get_shape
()
||
shape
!=
max_freezed_output
.
get_shape
())
{
throw
ngraph_error
(
"get_scale: min and max must have same shape"
);
}
auto
ranges
=
quantization_range_for_multiplication
(
min_input
,
max_input
,
min_filter
,
max_filter
);
auto
min_out_value
=
ranges
.
first
;
auto
max_out_value
=
ranges
.
second
;
auto
max_abs32
=
max_abs
(
min_out_value
,
max_out_value
);
auto
max_abs8
=
max_abs
(
min_freezed_output
,
max_freezed_output
);
// The output of int8 convolution is accumalated in int32.
// Mkldnn needs a scale to requantize the output back to {u}int8 based on
// if relu is fused or not.
// Equation to go from f32 to s32. std::pow(2, 31)/ max_abs32 can be thought of
// as the scale used for the quantization..
// 1. s32 = f32 * std::pow(2, 31)/ max_abs32;
// Equation to go from f32 to u8.
// 2. u8 = f32 * std::pow(2, 8)/ max_abs8;
// Equation to go from f32 to s8.
// 3. s8 = f32 * std::pow(2, 7)/ max_abs8;
// Replacing f32 from eq 1 in eq 2.
// 4. u8 = s32 * std::pow(2, -23) * max_abs32 / max_abs8;
// Replacing f32 from eq 1 in eq 3.
// 5. s8 = s32 * std::pow(2, -24) * max_abs32 / max_abs8;
return
make_constant
(
type
,
shape
,
std
::
pow
(
2
,
(
output_type
==
element
::
i8
)
?
-
24
:
-
23
))
*
(
max_abs32
/
max_abs8
);
}
std
::
shared_ptr
<
Node
>
get_bias_scale
(
Output
<
Node
>
min_input
,
Output
<
Node
>
max_input
,
Output
<
Node
>
min_filter
,
Output
<
Node
>
max_filter
)
{
auto
type
=
min_input
.
get_element_type
();
if
(
type
!=
max_input
.
get_element_type
()
||
type
!=
min_filter
.
get_element_type
()
||
type
!=
max_filter
.
get_element_type
())
{
throw
ngraph_error
(
"get_bias_scale: min and max must have same type"
);
}
auto
shape
=
min_input
.
get_shape
();
if
(
shape
!=
max_input
.
get_shape
()
||
shape
!=
min_filter
.
get_shape
()
||
shape
!=
max_filter
.
get_shape
())
{
throw
ngraph_error
(
"get_bias_scale: min and max must have same shape"
);
}
auto
max_abs_input_range
=
max_abs
(
min_input
,
max_input
);
auto
max_abs_filter_range
=
max_abs
(
min_filter
,
max_filter
);
auto
range
=
make_constant
(
type
,
shape
,
std
::
numeric_limits
<
uint8_t
>::
max
()
*
std
::
numeric_limits
<
int8_t
>::
max
());
// Inverting the scale calculation here as the Quantize op passes scale as 1/scale.
return
(
max_abs_input_range
*
max_abs_filter_range
)
/
range
;
}
std
::
shared_ptr
<
Node
>
get_sum_scale
(
Output
<
Node
>
min_freezed_output_conv_1
,
Output
<
Node
>
max_freezed_output_conv_1
,
Output
<
Node
>
min_freezed_output_conv_2
,
Output
<
Node
>
max_freezed_output_conv_2
)
{
auto
type
=
min_freezed_output_conv_1
.
get_element_type
();
if
(
type
!=
max_freezed_output_conv_1
.
get_element_type
()
||
type
!=
min_freezed_output_conv_2
.
get_element_type
()
||
type
!=
max_freezed_output_conv_2
.
get_element_type
())
{
throw
ngraph_error
(
"get_sum_scale: min and max must have same type"
);
}
auto
shape
=
min_freezed_output_conv_1
.
get_shape
();
if
(
shape
!=
max_freezed_output_conv_1
.
get_shape
()
||
shape
!=
min_freezed_output_conv_2
.
get_shape
()
||
shape
!=
max_freezed_output_conv_2
.
get_shape
())
{
throw
ngraph_error
(
"get_sum_scale: min and max must have same shape"
);
}
auto
max_abs_conv_1
=
max_abs
(
min_freezed_output_conv_1
,
max_freezed_output_conv_1
);
auto
max_abs_conv_2
=
max_abs
(
min_freezed_output_conv_2
,
max_freezed_output_conv_2
);
return
max_abs_conv_2
/
max_abs_conv_1
;
}
std
::
shared_ptr
<
Node
>
get_scale
(
Output
<
Node
>
input_min_range
,
Output
<
Node
>
input_max_range
,
const
ngraph
::
element
::
Type
&
quant_type
,
bool
bump_by_eps
=
false
)
{
auto
type
=
input_min_range
.
get_element_type
();
if
(
type
!=
input_max_range
.
get_element_type
())
{
throw
ngraph_error
(
"get_scale: min and max must have same type"
);
}
auto
shape
=
input_min_range
.
get_shape
();
if
(
shape
!=
input_max_range
.
get_shape
())
{
throw
ngraph_error
(
"get_scale: min and max must have same shape"
);
}
auto
min_range
=
input_min_range
;
auto
max_range
=
input_max_range
;
if
(
bump_by_eps
)
{
auto
zero
=
make_constant
(
type
,
shape
,
0
);
min_range
=
std
::
make_shared
<
op
::
Minimum
>
(
zero
,
input_min_range
);
auto
max_abs_input_range
=
max_abs
(
input_min_range
,
input_max_range
);
auto
one
=
make_constant
(
type
,
shape
,
1
);
auto
hundred
=
make_constant
(
type
,
shape
,
100
);
auto
epsilon
=
std
::
make_shared
<
op
::
Maximum
>
(
one
,
max_abs_input_range
)
/
hundred
;
max_range
=
std
::
make_shared
<
op
::
Maximum
>
(
input_max_range
,
min_range
+
epsilon
);
max_range
=
std
::
make_shared
<
op
::
Maximum
>
(
zero
,
max_range
);
}
size_t
bw
=
quant_type
.
bitwidth
();
float
range
=
static_cast
<
float
>
(
(
quant_type
.
is_signed
()
?
std
::
pow
(
2
,
(
bw
-
1
))
:
std
::
pow
(
2
,
bw
))
-
1
);
auto
max_abs_range
=
max_abs
(
min_range
,
max_range
);
auto
target_range
=
make_constant
(
type
,
shape
,
range
);
return
max_abs_range
/
target_range
;
}
void
check_concat
(
const
NodeVector
&
args
,
const
NodeVector
&
mins
,
const
NodeVector
&
maxs
)
{
auto
size
=
args
.
size
();
if
(
size
!=
mins
.
size
()
||
size
!=
maxs
.
size
())
{
throw
ngraph_error
(
"Min and Max node vectors must be of same length"
);
}
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
auto
min
=
mins
[
i
];
auto
max
=
maxs
[
i
];
auto
type
=
min
->
get_element_type
();
if
(
type
!=
max
->
get_element_type
())
{
throw
ngraph_error
(
"check_concat: min and max must have same type"
);
}
if
(
min
->
get_shape
()
!=
Shape
{
1
}
||
max
->
get_shape
()
!=
Shape
{
1
})
{
throw
ngraph_error
(
"check_concat: min/max shape not Shape{1}: "
+
vector_to_string
(
min
->
get_shape
())
+
vector_to_string
(
max
->
get_shape
()));
}
}
}
std
::
shared_ptr
<
Node
>
get_dot_scale
(
Output
<
Node
>
min_input
,
Output
<
Node
>
max_input
,
Output
<
Node
>
min_filter
,
Output
<
Node
>
max_filter
,
Output
<
Node
>
min_freezed_output
,
Output
<
Node
>
max_freezed_output
,
const
ngraph
::
element
::
Type
&
input_type
,
const
ngraph
::
element
::
Type
&
output_type
,
const
bool
requantize
=
true
)
{
auto
type
=
min_input
.
get_element_type
();
if
(
type
!=
max_input
.
get_element_type
()
||
type
!=
min_filter
.
get_element_type
()
||
type
!=
max_filter
.
get_element_type
()
||
type
!=
min_freezed_output
.
get_element_type
()
||
type
!=
max_freezed_output
.
get_element_type
())
{
throw
ngraph_error
(
"get_dot_scale: min and max must have same type"
);
}
auto
shape
=
min_input
.
get_shape
();
if
(
shape
!=
max_input
.
get_shape
()
||
shape
!=
min_filter
.
get_shape
()
||
shape
!=
max_filter
.
get_shape
()
||
shape
!=
min_freezed_output
.
get_shape
()
||
shape
!=
max_freezed_output
.
get_shape
())
{
throw
ngraph_error
(
"get_dot_scale: min and max must have same shape"
);
}
auto
data_scale
=
get_scale
(
min_input
,
max_input
,
input_type
);
auto
weight_scale
=
get_scale
(
min_filter
,
max_filter
,
element
::
i8
);
auto
out_scale
=
get_scale
(
min_freezed_output
,
max_freezed_output
,
output_type
);
if
(
requantize
)
{
return
data_scale
*
weight_scale
/
out_scale
;
}
else
{
return
data_scale
*
weight_scale
;
}
}
}
// namespace quantization_util
}
// namespace builder
}
// namespace ngraph
src/ngraph/builder/quantization_utils.cpp
View file @
f3b9389c
...
...
@@ -74,6 +74,128 @@ namespace ngraph
return
max_abs_range
/
target_range
;
}
std
::
shared_ptr
<
Node
>
get_bias_scale
(
Output
<
Node
>
min_input
,
Output
<
Node
>
max_input
,
Output
<
Node
>
min_filter
,
Output
<
Node
>
max_filter
)
{
auto
type
=
min_input
.
get_element_type
();
if
(
type
!=
max_input
.
get_element_type
()
||
type
!=
min_filter
.
get_element_type
()
||
type
!=
max_filter
.
get_element_type
())
{
throw
ngraph_error
(
"get_bias_scale: min and max must have same type"
);
}
auto
shape
=
min_input
.
get_shape
();
if
(
shape
!=
max_input
.
get_shape
()
||
shape
!=
min_filter
.
get_shape
()
||
shape
!=
max_filter
.
get_shape
())
{
throw
ngraph_error
(
"get_bias_scale: min and max must have same shape"
);
}
auto
max_abs_input_range
=
max_abs
(
min_input
,
max_input
);
auto
max_abs_filter_range
=
max_abs
(
min_filter
,
max_filter
);
auto
range
=
make_constant
(
type
,
shape
,
std
::
numeric_limits
<
uint8_t
>::
max
()
*
std
::
numeric_limits
<
int8_t
>::
max
());
// Inverting the scale calculation here as the Quantize op passes scale as 1/scale.
return
(
max_abs_input_range
*
max_abs_filter_range
)
/
range
;
}
std
::
shared_ptr
<
Node
>
get_sum_scale
(
Output
<
Node
>
min_freezed_output_conv_1
,
Output
<
Node
>
max_freezed_output_conv_1
,
Output
<
Node
>
min_freezed_output_conv_2
,
Output
<
Node
>
max_freezed_output_conv_2
)
{
auto
type
=
min_freezed_output_conv_1
.
get_element_type
();
if
(
type
!=
max_freezed_output_conv_1
.
get_element_type
()
||
type
!=
min_freezed_output_conv_2
.
get_element_type
()
||
type
!=
max_freezed_output_conv_2
.
get_element_type
())
{
throw
ngraph_error
(
"get_sum_scale: min and max must have same type"
);
}
auto
shape
=
min_freezed_output_conv_1
.
get_shape
();
if
(
shape
!=
max_freezed_output_conv_1
.
get_shape
()
||
shape
!=
min_freezed_output_conv_2
.
get_shape
()
||
shape
!=
max_freezed_output_conv_2
.
get_shape
())
{
throw
ngraph_error
(
"get_sum_scale: min and max must have same shape"
);
}
auto
max_abs_conv_1
=
max_abs
(
min_freezed_output_conv_1
,
max_freezed_output_conv_1
);
auto
max_abs_conv_2
=
max_abs
(
min_freezed_output_conv_2
,
max_freezed_output_conv_2
);
return
max_abs_conv_2
/
max_abs_conv_1
;
}
std
::
shared_ptr
<
Node
>
get_dot_scale
(
Output
<
Node
>
min_input
,
Output
<
Node
>
max_input
,
Output
<
Node
>
min_filter
,
Output
<
Node
>
max_filter
,
Output
<
Node
>
min_freezed_output
,
Output
<
Node
>
max_freezed_output
,
const
ngraph
::
element
::
Type
&
input_type
,
const
ngraph
::
element
::
Type
&
output_type
,
const
bool
requantize
)
{
auto
type
=
min_input
.
get_element_type
();
if
(
type
!=
max_input
.
get_element_type
()
||
type
!=
min_filter
.
get_element_type
()
||
type
!=
max_filter
.
get_element_type
()
||
type
!=
min_freezed_output
.
get_element_type
()
||
type
!=
max_freezed_output
.
get_element_type
())
{
throw
ngraph_error
(
"get_dot_scale: min and max must have same type"
);
}
auto
shape
=
min_input
.
get_shape
();
if
(
shape
!=
max_input
.
get_shape
()
||
shape
!=
min_filter
.
get_shape
()
||
shape
!=
max_filter
.
get_shape
()
||
shape
!=
min_freezed_output
.
get_shape
()
||
shape
!=
max_freezed_output
.
get_shape
())
{
throw
ngraph_error
(
"get_dot_scale: min and max must have same shape"
);
}
auto
data_scale
=
get_scale
(
min_input
,
max_input
,
input_type
);
auto
weight_scale
=
get_scale
(
min_filter
,
max_filter
,
element
::
i8
);
auto
out_scale
=
get_scale
(
min_freezed_output
,
max_freezed_output
,
output_type
);
if
(
requantize
)
{
return
data_scale
*
weight_scale
/
out_scale
;
}
else
{
return
data_scale
*
weight_scale
;
}
}
void
check_concat
(
const
NodeVector
&
args
,
const
NodeVector
&
mins
,
const
NodeVector
&
maxs
)
{
auto
size
=
args
.
size
();
if
(
size
!=
mins
.
size
()
||
size
!=
maxs
.
size
())
{
throw
ngraph_error
(
"Min and Max node vectors must be of same length"
);
}
for
(
size_t
i
=
0
;
i
<
size
;
i
++
)
{
auto
min
=
mins
[
i
];
auto
max
=
maxs
[
i
];
auto
type
=
min
->
get_element_type
();
if
(
type
!=
max
->
get_element_type
())
{
throw
ngraph_error
(
"check_concat: min and max must have same type"
);
}
if
(
min
->
get_shape
()
!=
Shape
{
1
}
||
max
->
get_shape
()
!=
Shape
{
1
})
{
throw
ngraph_error
(
"check_concat: min/max shape not Shape{1}: "
+
vector_to_string
(
min
->
get_shape
())
+
vector_to_string
(
max
->
get_shape
()));
}
}
}
}
}
}
src/ngraph/builder/quantization_utils.hpp
View file @
f3b9389c
...
...
@@ -43,6 +43,30 @@ namespace ngraph
const
Output
<
Node
>&
input_max_range
,
const
ngraph
::
element
::
Type
&
quant_type
,
bool
bump_by_eps
=
false
);
std
::
shared_ptr
<
Node
>
get_bias_scale
(
Output
<
Node
>
min_input
,
Output
<
Node
>
max_input
,
Output
<
Node
>
min_filter
,
Output
<
Node
>
max_filter
);
std
::
shared_ptr
<
Node
>
get_sum_scale
(
Output
<
Node
>
min_freezed_output_conv_1
,
Output
<
Node
>
max_freezed_output_conv_1
,
Output
<
Node
>
min_freezed_output_conv_2
,
Output
<
Node
>
max_freezed_output_conv_2
);
std
::
shared_ptr
<
Node
>
get_dot_scale
(
Output
<
Node
>
min_input
,
Output
<
Node
>
max_input
,
Output
<
Node
>
min_filter
,
Output
<
Node
>
max_filter
,
Output
<
Node
>
min_freezed_output
,
Output
<
Node
>
max_freezed_output
,
const
ngraph
::
element
::
Type
&
input_type
,
const
ngraph
::
element
::
Type
&
output_type
,
const
bool
requantize
=
true
);
void
check_concat
(
const
NodeVector
&
args
,
const
NodeVector
&
mins
,
const
NodeVector
&
maxs
);
}
}
}
src/ngraph/builder/quantize_builder.cpp
0 → 100644
View file @
f3b9389c
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <memory>
#include "ngraph/builder/quantize_builder.hpp"
using
namespace
std
;
using
namespace
ngraph
;
namespace
ngraph
{
namespace
builder
{
shared_ptr
<
Node
>
QuantizeBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
,
const
ngraph
::
element
::
Type
&
quant_type
,
const
ngraph
::
AxisSet
&
axes
,
op
::
Quantize
::
RoundMode
round_mode
)
{
auto
real_type
=
input
.
get_element_type
();
if
(
min
.
get_element_type
()
!=
real_type
)
{
throw
ngraph_error
(
"QuantizeBuilder: min must match input type"
);
}
if
(
max
.
get_element_type
()
!=
real_type
)
{
throw
ngraph_error
(
"QuantizeBuilder: max must match input type"
);
}
auto
shape
=
min
.
get_shape
();
if
(
shape
!=
max
.
get_shape
())
{
throw
ngraph_error
(
"QuantizeBuilder: min and max must have same shape"
);
}
auto
zero
=
make_constant
(
quant_type
,
shape
,
0
);
auto
scale
=
quantization_utils
::
get_scale
(
min
,
max
,
quant_type
,
true
);
return
make_shared
<
op
::
Quantize
>
(
input
,
scale
,
zero
,
quant_type
,
axes
,
round_mode
);
}
}
}
src/ngraph/builder/quantize_builder.hpp
0 → 100644
View file @
f3b9389c
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/quantize.hpp"
#include "quantization_utils.hpp"
namespace
ngraph
{
namespace
builder
{
std
::
shared_ptr
<
Node
>
QuantizeBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
,
const
ngraph
::
element
::
Type
&
quant_type
,
const
ngraph
::
AxisSet
&
axes
,
op
::
Quantize
::
RoundMode
round_mode
);
}
}
src/ngraph/builder/quantized_concat_builder.cpp
0 → 100644
View file @
f3b9389c
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include <memory>
#include "ngraph/builder/quantized_concat_builder.hpp"
using
namespace
std
;
using
namespace
ngraph
;
namespace
ngraph
{
namespace
builder
{
shared_ptr
<
Node
>
QuantizedConcatBuilder
(
const
NodeVector
&
args
,
size_t
concatenation_axis
,
const
NodeVector
&
mins
,
const
NodeVector
&
maxs
)
{
quantization_utils
::
check_concat
(
args
,
mins
,
maxs
);
auto
quant_type
=
args
[
0
]
->
get_element_type
();
// output scale
auto
min
=
make_shared
<
op
::
Min
>
(
make_shared
<
op
::
Concat
>
(
mins
,
0
),
ngraph
::
AxisSet
{
0
});
auto
max
=
make_shared
<
op
::
Max
>
(
make_shared
<
op
::
Concat
>
(
maxs
,
0
),
ngraph
::
AxisSet
{
0
});
auto
out_scale
=
quantization_utils
::
get_scale
(
min
,
max
,
quant_type
);
NodeVector
rescaled_args
(
args
.
size
());
for
(
size_t
i
=
0
;
i
<
args
.
size
();
++
i
)
{
auto
q_type
=
args
[
i
]
->
get_element_type
();
auto
in_scale
=
make_shared
<
ngraph
::
op
::
Reshape
>
(
quantization_utils
::
get_scale
(
mins
[
i
],
maxs
[
i
],
q_type
),
AxisVector
{
0
},
Shape
{});
auto
zero
=
make_constant
(
q_type
,
in_scale
->
get_shape
(),
0
);
rescaled_args
[
i
]
=
make_shared
<
op
::
Dequantize
>
(
args
[
i
],
in_scale
,
zero
,
element
::
f32
,
AxisSet
{});
rescaled_args
[
i
]
=
make_shared
<
op
::
Quantize
>
(
rescaled_args
[
i
],
out_scale
,
zero
,
q_type
,
AxisSet
{},
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
);
}
return
make_shared
<
op
::
Concat
>
(
rescaled_args
,
concatenation_axis
);
}
}
}
src/ngraph/builder/quantized_concat_builder.hpp
0 → 100644
View file @
f3b9389c
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/builder/make_constant.hpp"
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/concat.hpp"
#include "ngraph/op/dequantize.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/reshape.hpp"
#include "quantization_utils.hpp"
namespace
ngraph
{
namespace
builder
{
std
::
shared_ptr
<
Node
>
QuantizedConcatBuilder
(
const
NodeVector
&
args
,
size_t
concatenation_axis
,
const
NodeVector
&
mins
,
const
NodeVector
&
maxs
);
}
}
src/ngraph/builder/quantized_conv_builder.cpp
View file @
f3b9389c
...
...
@@ -17,7 +17,6 @@
#include <memory>
#include "ngraph/builder/quantized_conv_builder.hpp"
#include "ngraph/op/constant.hpp"
using
namespace
std
;
using
namespace
ngraph
;
...
...
@@ -74,5 +73,209 @@ namespace ngraph
filter_axes
,
output_axes
);
}
shared_ptr
<
Node
>
QuantizedConvolutionBiasBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
with_relu
)
{
auto
output_et
=
with_relu
?
element
::
u8
:
element
::
i8
;
auto
input_scale
=
quantization_utils
::
get_scale
(
min_input
,
max_input
,
input
.
get_element_type
());
auto
filter_scale
=
quantization_utils
::
get_scale
(
min_filter
,
max_filter
,
filters
.
get_element_type
());
auto
output_scale
=
quantization_utils
::
get_scale
(
min_output
,
max_output
,
output_et
);
auto
requantization_scale
=
input_scale
*
filter_scale
/
output_scale
;
auto
mybias
=
bias
;
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
auto
zero
=
make_constant
(
element
::
i32
,
min_input
.
get_shape
(),
0
);
AxisSet
quantization_axes
;
auto
bias_scale
=
quantization_utils
::
get_bias_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
);
op
::
Quantize
::
RoundMode
round_mode
=
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
;
mybias
=
make_shared
<
op
::
Quantize
>
(
bias
,
bias_scale
,
zero
,
element
::
i32
,
quantization_axes
,
round_mode
);
}
return
make_shared
<
op
::
QuantizedConvolutionBias
>
(
input
,
filters
,
mybias
,
window_movement_strides
,
window_dilation_strides
,
padding_below
,
padding_above
,
data_dilation_strides
,
requantization_scale
,
with_relu
);
}
shared_ptr
<
Node
>
QuantizedConvolutionReluBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
)
{
auto
input_scale
=
quantization_utils
::
get_scale
(
min_input
,
max_input
,
input
.
get_element_type
());
auto
filter_scale
=
quantization_utils
::
get_scale
(
min_filter
,
max_filter
,
filters
.
get_element_type
());
auto
output_scale
=
quantization_utils
::
get_scale
(
min_output
,
max_output
,
element
::
u8
);
auto
requantization_scale
=
input_scale
*
filter_scale
/
output_scale
;
return
make_shared
<
op
::
QuantizedConvolutionRelu
>
(
input
,
filters
,
window_movement_strides
,
window_dilation_strides
,
padding_below
,
padding_above
,
data_dilation_strides
,
requantization_scale
);
}
shared_ptr
<
Node
>
QuantizedConvolutionBiasAddBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
sum_input
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_sum_input
,
const
Output
<
Node
>&
max_sum_input
,
const
bool
with_relu
)
{
auto
output_et
=
with_relu
?
element
::
u8
:
element
::
i8
;
auto
input_scale
=
quantization_utils
::
get_scale
(
min_input
,
max_input
,
input
.
get_element_type
());
auto
filter_scale
=
quantization_utils
::
get_scale
(
min_filter
,
max_filter
,
filters
.
get_element_type
());
auto
output_scale
=
quantization_utils
::
get_scale
(
min_output
,
max_output
,
output_et
);
auto
requantization_scale
=
input_scale
*
filter_scale
/
output_scale
;
auto
sum_scale
=
builder
::
quantization_utils
::
get_sum_scale
(
min_output
,
max_output
,
min_sum_input
,
max_sum_input
);
auto
mybias
=
bias
;
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
auto
zero
=
make_constant
(
element
::
i32
,
min_input
.
get_shape
(),
0
);
AxisSet
quantization_axes
;
auto
bias_scale
=
quantization_utils
::
get_bias_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
);
op
::
Quantize
::
RoundMode
round_mode
=
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
;
mybias
=
make_shared
<
op
::
Quantize
>
(
bias
,
bias_scale
,
zero
,
element
::
i32
,
quantization_axes
,
round_mode
);
}
return
make_shared
<
op
::
QuantizedConvolutionBiasAdd
>
(
input
,
filters
,
mybias
,
sum_input
,
window_movement_strides
,
window_dilation_strides
,
padding_below
,
padding_above
,
data_dilation_strides
,
requantization_scale
,
sum_scale
,
with_relu
);
}
shared_ptr
<
Node
>
QuantizedConvolutionBiasSignedAddBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
sum_input
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_sum_input
,
const
Output
<
Node
>&
max_sum_input
,
const
bool
with_relu
)
{
auto
output_et
=
with_relu
?
element
::
u8
:
element
::
i8
;
auto
input_scale
=
quantization_utils
::
get_scale
(
min_input
,
max_input
,
input
.
get_element_type
());
auto
filter_scale
=
quantization_utils
::
get_scale
(
min_filter
,
max_filter
,
filters
.
get_element_type
());
auto
output_scale
=
quantization_utils
::
get_scale
(
min_output
,
max_output
,
output_et
);
auto
requantization_scale
=
input_scale
*
filter_scale
/
output_scale
;
auto
sum_scale
=
builder
::
quantization_utils
::
get_sum_scale
(
min_output
,
max_output
,
min_sum_input
,
max_sum_input
);
if
(
output_et
==
element
::
u8
)
{
// Need to multiply by two to account for u8 requantization_scale
auto
two
=
make_constant
(
element
::
f32
,
sum_scale
->
get_shape
(),
2.0
f
);
sum_scale
=
two
*
sum_scale
;
}
auto
mybias
=
bias
;
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
auto
zero
=
make_constant
(
element
::
i32
,
min_input
.
get_shape
(),
0
);
AxisSet
quantization_axes
;
auto
bias_scale
=
quantization_utils
::
get_bias_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
);
op
::
Quantize
::
RoundMode
round_mode
=
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
;
mybias
=
make_shared
<
op
::
Quantize
>
(
bias
,
bias_scale
,
zero
,
element
::
i32
,
quantization_axes
,
round_mode
);
}
auto
qconv
=
make_shared
<
op
::
QuantizedConvolutionBiasSignedAdd
>
(
input
,
filters
,
mybias
,
sum_input
,
window_movement_strides
,
window_dilation_strides
,
padding_below
,
padding_above
,
data_dilation_strides
,
requantization_scale
,
sum_scale
,
with_relu
);
return
make_shared
<
op
::
Convert
>
(
qconv
,
element
::
u8
);
}
}
}
src/ngraph/builder/quantized_conv_builder.hpp
View file @
f3b9389c
...
...
@@ -18,6 +18,11 @@
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/experimental/quantized_conv_bias.hpp"
#include "ngraph/op/experimental/quantized_conv_relu.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/quantized_convolution.hpp"
#include "quantization_utils.hpp"
...
...
@@ -43,5 +48,77 @@ namespace ngraph
const
ngraph
::
AxisSet
&
input_axes
=
ngraph
::
AxisSet
{},
const
ngraph
::
AxisSet
&
filter_axes
=
ngraph
::
AxisSet
{},
const
ngraph
::
AxisSet
&
output_axes
=
ngraph
::
AxisSet
{});
std
::
shared_ptr
<
Node
>
QuantizedConvolutionBiasBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
with_relu
=
false
);
std
::
shared_ptr
<
Node
>
QuantizedConvolutionReluBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
);
std
::
shared_ptr
<
Node
>
QuantizedConvolutionBiasAddBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
sum_input
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_sum_input
,
const
Output
<
Node
>&
max_sum_input
,
const
bool
with_relu
=
false
);
std
::
shared_ptr
<
Node
>
QuantizedConvolutionBiasSignedAddBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
sum_input
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_sum_input
,
const
Output
<
Node
>&
max_sum_input
,
const
bool
with_relu
=
false
);
}
}
src/ngraph/builder/quantized_dot_builder.cpp
View file @
f3b9389c
...
...
@@ -17,7 +17,6 @@
#include <memory>
#include "ngraph/builder/quantized_dot_builder.hpp"
#include "ngraph/op/constant.hpp"
using
namespace
std
;
using
namespace
ngraph
;
...
...
@@ -65,5 +64,45 @@ namespace ngraph
input1_axes
,
output_axes
);
}
shared_ptr
<
Node
>
QuantizedDotBiasBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
requantize
,
const
bool
with_relu
)
{
auto
requantization_scale
=
quantization_utils
::
get_dot_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
,
min_output
,
max_output
,
input
.
get_element_type
(),
with_relu
?
element
::
u8
:
element
::
i8
,
requantize
);
auto
mybias
=
bias
;
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
auto
zero
=
make_constant
(
element
::
i32
,
min_input
.
get_shape
(),
0
);
AxisSet
quantization_axes
;
auto
bias_scale
=
quantization_utils
::
get_bias_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
);
op
::
Quantize
::
RoundMode
round_mode
=
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
;
mybias
=
make_shared
<
op
::
Quantize
>
(
bias
,
bias_scale
,
zero
,
element
::
i32
,
quantization_axes
,
round_mode
);
}
return
make_shared
<
op
::
QuantizedDotBias
>
(
input
,
filters
,
mybias
,
requantization_scale
,
requantize
,
with_relu
);
}
}
}
src/ngraph/builder/quantized_dot_builder.hpp
View file @
f3b9389c
...
...
@@ -18,6 +18,9 @@
#include "ngraph/coordinate_diff.hpp"
#include "ngraph/node.hpp"
#include "ngraph/op/constant.hpp"
#include "ngraph/op/experimental/quantized_dot_bias.hpp"
#include "ngraph/op/quantize.hpp"
#include "ngraph/op/quantized_dot.hpp"
#include "quantization_utils.hpp"
...
...
@@ -38,5 +41,17 @@ namespace ngraph
const
ngraph
::
AxisSet
&
input0_axes
,
const
ngraph
::
AxisSet
&
input1_axes
,
const
ngraph
::
AxisSet
&
output_axes
);
std
::
shared_ptr
<
Node
>
QuantizedDotBiasBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
requantize
=
true
,
const
bool
with_relu
=
false
);
}
}
src/ngraph/ngraph.hpp
View file @
f3b9389c
...
...
@@ -62,8 +62,12 @@ namespace ngraph
/// recipes, for example auto-broadcast.
#include "ngraph/builder/autobroadcast.hpp"
#include "ngraph/builder/dequantize_builder.hpp"
#include "ngraph/builder/numpy_transpose.hpp"
#include "ngraph/builder/quantize_builder.hpp"
#include "ngraph/builder/quantized_concat_builder.hpp"
#include "ngraph/builder/quantized_conv_builder.hpp"
#include "ngraph/builder/quantized_dot_builder.hpp"
#include "ngraph/builder/reduce_ops.hpp"
#include "ngraph/builder/reshape.hpp"
#include "ngraph/builder/tensor_mask.hpp"
...
...
test/builder_quantization.cpp
View file @
f3b9389c
...
...
@@ -21,8 +21,10 @@
#include <string>
#include "gtest/gtest.h"
#include "ngraph/builder/
quantization
.hpp"
#include "ngraph/builder/
dequantize_builder
.hpp"
#include "ngraph/builder/quantization/quantized_linear_convolution.hpp"
#include "ngraph/builder/quantize_builder.hpp"
#include "ngraph/builder/quantized_concat_builder.hpp"
#include "ngraph/builder/quantized_conv_builder.hpp"
#include "ngraph/builder/quantized_dot_builder.hpp"
#include "ngraph/ngraph.hpp"
...
...
@@ -61,7 +63,7 @@ TEST(builder, scaled_QC_with_relu)
auto
F
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
127.0
f
});
auto
G
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
20.0
f
});
auto
H
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
-
24.0
f
});
auto
CV
=
ngraph
::
builder
::
ScaledQuantizedConvolutionRelu
(
A
,
auto
CV
=
ngraph
::
builder
::
QuantizedConvolutionReluBuilder
(
A
,
B
,
Strides
{
1
,
1
},
// move_strides
Strides
{
1
,
1
},
// filter_dilation
...
...
@@ -103,7 +105,7 @@ TEST(builder, dynamic_scaled_QC_with_relu)
auto
F
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
G
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
H
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
CV
=
ngraph
::
builder
::
ScaledQuantizedConvolutionRelu
(
A
,
auto
CV
=
ngraph
::
builder
::
QuantizedConvolutionReluBuilder
(
A
,
B
,
Strides
{
1
,
1
},
// move_strides
Strides
{
1
,
1
},
// filter_dilation
...
...
@@ -158,7 +160,7 @@ TEST(builder, scaled_QC_with_bias)
auto
F
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
127.0
f
});
auto
G
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
22.0
f
});
auto
H
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
90.0
f
});
auto
CV
=
ngraph
::
builder
::
ScaledQuantizedConvolutionBias
(
A
,
auto
CV
=
ngraph
::
builder
::
QuantizedConvolutionBiasBuilder
(
A
,
B
,
Bias
,
Strides
{
1
,
1
},
// move_strides
...
...
@@ -185,7 +187,7 @@ TEST(builder, scaled_QC_with_bias)
auto
result
=
backend
->
create_tensor
(
element
::
i8
,
shape_r
);
auto
handle
=
backend
->
compile
(
f
);
handle
->
call_with_validate
({
result
},
{
a
,
b
,
c
});
EXPECT_EQ
((
vector
<
int8_t
>
{
38
,
55
,
50
,
52
,
61
,
109
,
127
,
68
,
54
,
81
,
68
,
62
}),
EXPECT_EQ
((
vector
<
int8_t
>
{
38
,
55
,
49
,
52
,
61
,
109
,
127
,
68
,
54
,
80
,
68
,
62
}),
read_vector
<
int8_t
>
(
result
));
}
...
...
@@ -206,7 +208,7 @@ TEST(builder, dynamic_scaled_QC_with_bias)
auto
F
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
G
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
H
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
CV
=
ngraph
::
builder
::
ScaledQuantizedConvolutionBias
(
A
,
auto
CV
=
ngraph
::
builder
::
QuantizedConvolutionBiasBuilder
(
A
,
B
,
Bias
,
Strides
{
1
,
1
},
// move_strides
...
...
@@ -244,7 +246,7 @@ TEST(builder, dynamic_scaled_QC_with_bias)
auto
result
=
backend
->
create_tensor
(
element
::
i8
,
shape_r
);
auto
handle
=
backend
->
compile
(
f
);
handle
->
call_with_validate
({
result
},
{
a
,
b
,
c
,
d
,
e
,
e_a
,
g
,
h
,
i
});
EXPECT_EQ
((
vector
<
int8_t
>
{
38
,
55
,
50
,
52
,
61
,
109
,
127
,
68
,
54
,
81
,
68
,
62
}),
EXPECT_EQ
((
vector
<
int8_t
>
{
38
,
55
,
49
,
52
,
61
,
109
,
127
,
68
,
54
,
80
,
68
,
62
}),
read_vector
<
int8_t
>
(
result
));
}
...
...
@@ -265,7 +267,7 @@ TEST(builder, scaled_QC_with_bias_and_relu)
auto
F
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
127.0
f
});
auto
G
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
20.0
f
});
auto
H
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
-
24.0
f
});
auto
CV
=
ngraph
::
builder
::
ScaledQuantizedConvolutionBias
(
A
,
auto
CV
=
ngraph
::
builder
::
QuantizedConvolutionBiasBuilder
(
A
,
B
,
Bias
,
Strides
{
1
,
1
},
// move_strides
...
...
@@ -317,7 +319,8 @@ TEST(builder, scaled_QC_with_bias_add_and_relu)
auto
H
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
90.0
f
});
auto
I
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
22.0
f
});
auto
J
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
180.0
f
});
auto
CV
=
ngraph
::
builder
::
ScaledQuantizedConvolutionBiasAdd
(
A
,
auto
CV
=
ngraph
::
builder
::
QuantizedConvolutionBiasAddBuilder
(
A
,
B
,
Bias
,
Add
,
...
...
@@ -350,7 +353,7 @@ TEST(builder, scaled_QC_with_bias_add_and_relu)
auto
result
=
backend
->
create_tensor
(
element
::
u8
,
shape_r
);
auto
handle
=
backend
->
compile
(
f
);
handle
->
call_with_validate
({
result
},
{
a
,
b
,
c
,
d
});
EXPECT_EQ
((
vector
<
uint8_t
>
{
78
,
114
,
105
,
113
,
132
,
230
,
255
,
136
,
110
,
16
5
,
142
,
133
}),
EXPECT_EQ
((
vector
<
uint8_t
>
{
78
,
114
,
105
,
113
,
132
,
230
,
255
,
136
,
110
,
16
6
,
142
,
133
}),
read_vector
<
uint8_t
>
(
result
));
}
...
...
@@ -375,7 +378,8 @@ TEST(builder, dynamic_scaled_QC_with_bias_add_and_relu)
auto
H
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
I
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
J
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
CV
=
ngraph
::
builder
::
ScaledQuantizedConvolutionBiasAdd
(
A
,
auto
CV
=
ngraph
::
builder
::
QuantizedConvolutionBiasAddBuilder
(
A
,
B
,
Bias
,
Add
,
...
...
@@ -424,7 +428,7 @@ TEST(builder, dynamic_scaled_QC_with_bias_add_and_relu)
auto
result
=
backend
->
create_tensor
(
element
::
u8
,
shape_r
);
auto
handle
=
backend
->
compile
(
f
);
handle
->
call_with_validate
({
result
},
{
a
,
b
,
c
,
d
,
e
,
e_a
,
g
,
h
,
i
,
j
,
k
,
l
});
EXPECT_EQ
((
vector
<
uint8_t
>
{
78
,
114
,
105
,
113
,
132
,
230
,
255
,
136
,
110
,
16
5
,
142
,
133
}),
EXPECT_EQ
((
vector
<
uint8_t
>
{
78
,
114
,
105
,
113
,
132
,
230
,
255
,
136
,
110
,
16
6
,
142
,
133
}),
read_vector
<
uint8_t
>
(
result
));
}
...
...
@@ -449,8 +453,8 @@ TEST(builder, scaled_QC_with_bias_signed_add_and_relu)
auto
H
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
90.0
f
});
auto
I
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
22.0
f
});
auto
J
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
90.0
f
});
auto
CV
=
ngraph
::
builder
::
ScaledQuantizedConvolutionBiasSignedAdd
(
A
,
auto
CV
=
ngraph
::
builder
::
QuantizedConvolutionBiasSignedAddBuilder
(
A
,
B
,
Bias
,
Add
,
...
...
@@ -511,8 +515,8 @@ TEST(builder, scaled_QC_with_bias_signed_add_and_relu_nhwc)
auto
H
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
90.0
f
});
auto
I
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
22.0
f
});
auto
J
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
90.0
f
});
auto
CV
=
ngraph
::
builder
::
ScaledQuantizedConvolutionBiasSignedAdd
(
A_reshape
,
auto
CV
=
ngraph
::
builder
::
QuantizedConvolutionBiasSignedAddBuilder
(
A_reshape
,
B_reshape
,
Bias
,
Add_reshape
,
...
...
@@ -570,8 +574,8 @@ TEST(builder, dynamic_scaled_QC_with_bias_signed_add_and_relu)
auto
H
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
I
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
J
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
CV
=
ngraph
::
builder
::
ScaledQuantizedConvolutionBiasSignedAdd
(
A
,
auto
CV
=
ngraph
::
builder
::
QuantizedConvolutionBiasSignedAddBuilder
(
A
,
B
,
Bias
,
Add
,
...
...
@@ -641,7 +645,7 @@ TEST(builder, scaled_QC_with_f32_bias_and_relu)
auto
F
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
127.0
f
});
auto
G
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
20.0
f
});
auto
H
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
-
24.0
f
});
auto
CV
=
ngraph
::
builder
::
ScaledQuantizedConvolutionBias
(
A
,
auto
CV
=
ngraph
::
builder
::
QuantizedConvolutionBiasBuilder
(
A
,
B
,
Bias
,
Strides
{
1
,
1
},
// move_strides
...
...
@@ -681,7 +685,7 @@ TEST(builder, scaled_Q_unsigned)
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_a
);
auto
B
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
-
255.0
f
});
auto
C
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
127.0
f
});
auto
QT
=
ngraph
::
builder
::
ScaledQuantize
(
A
,
B
,
C
,
element
::
u8
,
quantization_axes
,
round_mode
);
auto
QT
=
ngraph
::
builder
::
QuantizeBuilder
(
A
,
B
,
C
,
element
::
u8
,
quantization_axes
,
round_mode
);
auto
f
=
make_shared
<
Function
>
(
NodeVector
{
QT
},
ParameterVector
{
A
});
constant_fold
(
f
);
auto
backend
=
runtime
::
Backend
::
create
(
"CPU"
);
...
...
@@ -706,7 +710,7 @@ TEST(builder, dynamic_scaled_Q)
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
in_shape
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{});
auto
C
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{});
auto
QT
=
ngraph
::
builder
::
ScaledQuantize
(
A
,
B
,
C
,
type
,
AxisSet
{},
mode
);
auto
QT
=
ngraph
::
builder
::
QuantizeBuilder
(
A
,
B
,
C
,
type
,
AxisSet
{},
mode
);
auto
f
=
make_shared
<
Function
>
(
NodeVector
{
QT
},
ParameterVector
{
A
,
B
,
C
});
// Create some tensors for input/output
auto
a
=
backend
->
create_tensor
(
element
::
f32
,
in_shape
);
...
...
@@ -772,7 +776,7 @@ TEST(builder, scaled_Q_signed)
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_a
);
auto
B
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
-
127.0
f
});
auto
C
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
127.0
f
});
auto
QT
=
ngraph
::
builder
::
ScaledQuantize
(
A
,
B
,
C
,
element
::
i8
,
quantization_axes
,
round_mode
);
auto
QT
=
ngraph
::
builder
::
QuantizeBuilder
(
A
,
B
,
C
,
element
::
i8
,
quantization_axes
,
round_mode
);
auto
f
=
make_shared
<
Function
>
(
NodeVector
{
QT
},
ParameterVector
{
A
});
constant_fold
(
f
);
auto
backend
=
runtime
::
Backend
::
create
(
"CPU"
);
...
...
@@ -792,7 +796,7 @@ TEST(builder, scaled_DQ_signed)
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
i8
,
Shape
{
1
});
auto
B
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
-
1.0
f
});
auto
C
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
300.0
f
});
auto
r
=
ngraph
::
builder
::
ScaledDequantize
(
A
,
B
,
C
,
element
::
f32
,
quantization_axes
);
auto
r
=
ngraph
::
builder
::
DequantizeBuilder
(
A
,
B
,
C
,
element
::
f32
,
quantization_axes
);
auto
f
=
make_shared
<
Function
>
(
r
,
ParameterVector
{
A
});
constant_fold
(
f
);
auto
backend
=
runtime
::
Backend
::
create
(
"CPU"
);
...
...
@@ -816,7 +820,7 @@ shared_ptr<runtime::Tensor> call_SDQ(shared_ptr<runtime::Backend>& backend,
auto
A
=
make_shared
<
op
::
Parameter
>
(
type
,
in_shape
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{});
auto
C
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{});
auto
DQT
=
ngraph
::
builder
::
ScaledDequantize
(
A
,
B
,
C
,
element
::
f32
,
AxisSet
{});
auto
DQT
=
ngraph
::
builder
::
DequantizeBuilder
(
A
,
B
,
C
,
element
::
f32
,
AxisSet
{});
auto
f
=
make_shared
<
Function
>
(
NodeVector
{
DQT
},
ParameterVector
{
A
,
B
,
C
});
// Create some tensors for input/output
auto
a
=
backend
->
create_tensor
(
type
,
in_shape
);
...
...
@@ -857,7 +861,7 @@ TEST(builder, scaled_quantize_concat_unsigned)
auto
Cn
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
Cx
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
Shape
shape_r
{
8
,
2
};
auto
QConcat
=
ngraph
::
builder
::
ScaledQuantizedConcat
(
auto
QConcat
=
ngraph
::
builder
::
QuantizedConcatBuilder
(
NodeVector
{
A
,
B
,
C
},
0
,
NodeVector
{
An
,
Bn
,
Cn
},
NodeVector
{
Ax
,
Bx
,
Cx
});
auto
f
=
make_shared
<
Function
>
(
NodeVector
{
QConcat
},
ParameterVector
{
A
,
B
,
C
,
An
,
Bn
,
Cn
,
Ax
,
Bx
,
Cx
});
...
...
@@ -906,7 +910,7 @@ TEST(builder, scaled_quantize_concat_signed)
auto
Cx
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
Shape
shape_r
{
8
,
2
};
auto
QConcat
=
ngraph
::
builder
::
ScaledQuantizedConcat
(
auto
QConcat
=
ngraph
::
builder
::
QuantizedConcatBuilder
(
NodeVector
{
A
,
B
,
C
},
0
,
NodeVector
{
An
,
Bn
,
Cn
},
NodeVector
{
Ax
,
Bx
,
Cx
});
auto
f
=
make_shared
<
Function
>
(
NodeVector
{
QConcat
},
ParameterVector
{
A
,
B
,
C
,
An
,
Bn
,
Cn
,
Ax
,
Bx
,
Cx
});
...
...
@@ -954,7 +958,7 @@ TEST(builder, scaled_quantize_concat_unsigned_varying)
auto
Cn
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
Cx
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
Shape
shape_r
{
2
,
9
};
auto
QConcat
=
ngraph
::
builder
::
ScaledQuantizedConcat
(
auto
QConcat
=
ngraph
::
builder
::
QuantizedConcatBuilder
(
NodeVector
{
A
,
B
,
C
},
1
,
NodeVector
{
An
,
Bn
,
Cn
},
NodeVector
{
Ax
,
Bx
,
Cx
});
auto
f
=
make_shared
<
Function
>
(
NodeVector
{
QConcat
},
ParameterVector
{
A
,
B
,
C
,
An
,
Bn
,
Cn
,
Ax
,
Bx
,
Cx
});
...
...
@@ -1009,7 +1013,7 @@ TEST(builder, dynamic_scaled_QD_with_bias)
auto
F
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
G
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
H
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
1
});
auto
CV
=
ngraph
::
builder
::
ScaledQuantizedDotBias
(
auto
CV
=
ngraph
::
builder
::
QuantizedDotBiasBuilder
(
A
,
B
,
Bias
,
C
,
D
,
E
,
F
,
G
,
H
,
requantize
,
with_relu
);
return
make_shared
<
Function
>
(
NodeVector
{
CV
},
ParameterVector
{
A
,
B
,
Bias
,
C
,
D
,
E
,
F
,
G
,
H
});
};
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment