Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
34499001
Unverified
Commit
34499001
authored
Jul 27, 2019
by
Scott Cyphers
Committed by
GitHub
Jul 27, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Quantization conversion from nodes to outputs (#3316)
parent
8eb63379
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
304 additions
and
312 deletions
+304
-312
quantization.cpp
src/ngraph/builder/quantization.cpp
+90
-90
quantization.hpp
src/ngraph/builder/quantization.hpp
+70
-72
quantized_linear_convolution.cpp
...aph/builder/quantization/quantized_linear_convolution.cpp
+8
-8
quantized_linear_convolution.hpp
...aph/builder/quantization/quantized_linear_convolution.hpp
+6
-6
quantized_linear_matmul.cpp
src/ngraph/builder/quantization/quantized_linear_matmul.cpp
+17
-18
quantized_linear_matmul.hpp
src/ngraph/builder/quantization/quantized_linear_matmul.hpp
+14
-15
quantization_util.hpp
src/ngraph/builder/quantization_util.hpp
+69
-72
quantization_utils.cpp
src/ngraph/builder/quantization_utils.cpp
+7
-7
quantization_utils.hpp
src/ngraph/builder/quantization_utils.hpp
+3
-3
quantized_conv_builder.cpp
src/ngraph/builder/quantized_conv_builder.cpp
+12
-13
quantized_conv_builder.hpp
src/ngraph/builder/quantized_conv_builder.hpp
+8
-8
No files found.
src/ngraph/builder/quantization.cpp
View file @
34499001
...
...
@@ -33,27 +33,27 @@ namespace ngraph
{
namespace
builder
{
shared_ptr
<
Node
>
ScaledQuantize
(
const
shared_ptr
<
Node
>&
input
,
const
shared_ptr
<
Node
>&
min
,
const
shared_ptr
<
Node
>&
max
,
shared_ptr
<
Node
>
ScaledQuantize
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
,
const
ngraph
::
element
::
Type
&
quant_type
,
const
ngraph
::
AxisSet
&
axes
,
op
::
Quantize
::
RoundMode
round_mode
)
{
auto
real_type
=
input
->
get_element_type
();
auto
real_type
=
input
.
get_element_type
();
if
(
min
->
get_element_type
()
!=
real_type
)
if
(
min
.
get_element_type
()
!=
real_type
)
{
throw
ngraph_error
(
"ScaledQuantize: min must match input type"
);
}
if
(
max
->
get_element_type
()
!=
real_type
)
if
(
max
.
get_element_type
()
!=
real_type
)
{
throw
ngraph_error
(
"ScaledQuantize: max must match input type"
);
}
auto
shape
=
min
->
get_shape
();
if
(
shape
!=
max
->
get_shape
())
auto
shape
=
min
.
get_shape
();
if
(
shape
!=
max
.
get_shape
())
{
throw
ngraph_error
(
"ScaledQuantize: min and max must have same shape"
);
}
...
...
@@ -63,26 +63,26 @@ namespace ngraph
return
make_shared
<
op
::
Quantize
>
(
input
,
scale
,
zero
,
quant_type
,
axes
,
round_mode
);
}
shared_ptr
<
Node
>
ScaledDequantize
(
const
shared_ptr
<
Node
>&
input
,
const
shared_ptr
<
Node
>&
min
,
const
shared_ptr
<
Node
>&
max
,
shared_ptr
<
Node
>
ScaledDequantize
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
,
const
ngraph
::
element
::
Type
&
real_type
,
const
ngraph
::
AxisSet
&
axes
)
{
auto
quant_type
=
input
->
get_element_type
();
auto
quant_type
=
input
.
get_element_type
();
if
(
min
->
get_element_type
()
!=
real_type
)
if
(
min
.
get_element_type
()
!=
real_type
)
{
throw
ngraph_error
(
"ScaledDequantize: min must match output type"
);
}
if
(
max
->
get_element_type
()
!=
real_type
)
if
(
max
.
get_element_type
()
!=
real_type
)
{
throw
ngraph_error
(
"ScaledDequantize: max must match output type"
);
}
auto
shape
=
min
->
get_shape
();
if
(
shape
!=
max
->
get_shape
())
auto
shape
=
min
.
get_shape
();
if
(
shape
!=
max
.
get_shape
())
{
throw
ngraph_error
(
"ScaledDequantize: min and max must have same shape"
);
}
...
...
@@ -127,14 +127,14 @@ namespace ngraph
return
make_shared
<
op
::
QuantizedConcat
>
(
rescaled_args
,
concatenation_axis
);
}
shared_ptr
<
Node
>
ScaledQuantizedAvgPool
(
const
shared_ptr
<
Node
>&
input
,
shared_ptr
<
Node
>
ScaledQuantizedAvgPool
(
const
Output
<
Node
>&
input
,
const
Shape
&
window_shape
,
const
Strides
&
window_movement_strides
,
const
Shape
&
padding_below
,
const
Shape
&
padding_above
,
bool
include_padding_in_avg_computation
,
const
shared_ptr
<
Node
>&
min
,
const
shared_ptr
<
Node
>&
max
)
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
)
{
return
make_shared
<
op
::
QuantizedAvgPool
>
(
input
,
window_shape
,
...
...
@@ -144,20 +144,20 @@ namespace ngraph
include_padding_in_avg_computation
);
}
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBias
(
const
shared_ptr
<
Node
>&
input
,
const
shared_ptr
<
Node
>&
filters
,
const
shared_ptr
<
Node
>&
bias
,
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBias
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
shared_ptr
<
Node
>&
min_input
,
const
shared_ptr
<
Node
>&
max_input
,
const
shared_ptr
<
Node
>&
min_filter
,
const
shared_ptr
<
Node
>&
max_filter
,
const
shared_ptr
<
Node
>&
min_output
,
const
shared_ptr
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
with_relu
)
{
auto
output_et
=
with_relu
?
element
::
u8
:
element
::
i8
;
...
...
@@ -165,9 +165,9 @@ namespace ngraph
min_input
,
max_input
,
min_filter
,
max_filter
,
min_output
,
max_output
,
output_et
);
auto
mybias
=
bias
;
if
(
bias
->
get_element_type
()
!=
element
::
i32
)
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
auto
zero
=
make_constant
(
element
::
i32
,
min_input
->
get_shape
(),
0
);
auto
zero
=
make_constant
(
element
::
i32
,
min_input
.
get_shape
(),
0
);
AxisSet
quantization_axes
;
auto
bias_scale
=
quantization_util
::
get_bias_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
);
...
...
@@ -190,19 +190,19 @@ namespace ngraph
with_relu
);
}
shared_ptr
<
Node
>
ScaledQuantizedConvolutionRelu
(
const
shared_ptr
<
Node
>&
input
,
const
shared_ptr
<
Node
>&
filters
,
shared_ptr
<
Node
>
ScaledQuantizedConvolutionRelu
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
shared_ptr
<
Node
>&
min_input
,
const
shared_ptr
<
Node
>&
max_input
,
const
shared_ptr
<
Node
>&
min_filter
,
const
shared_ptr
<
Node
>&
max_filter
,
const
shared_ptr
<
Node
>&
min_output
,
const
shared_ptr
<
Node
>&
max_output
)
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
)
{
auto
requantization_scale
=
quantization_util
::
get_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
,
min_output
,
max_output
,
element
::
u8
);
...
...
@@ -217,35 +217,35 @@ namespace ngraph
requantization_scale
);
}
shared_ptr
<
Node
>
ScaledQuantizedMaxPool
(
const
shared_ptr
<
Node
>&
input
,
shared_ptr
<
Node
>
ScaledQuantizedMaxPool
(
const
Output
<
Node
>&
input
,
const
Shape
&
window_shape
,
const
Strides
&
window_movement_strides
,
const
Shape
&
padding_below
,
const
Shape
&
padding_above
,
const
shared_ptr
<
Node
>&
min
,
const
shared_ptr
<
Node
>&
max
)
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
)
{
return
make_shared
<
op
::
QuantizedMaxPool
>
(
input
,
window_shape
,
window_movement_strides
,
padding_below
,
padding_above
);
}
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBiasAdd
(
const
shared_ptr
<
Node
>&
input
,
const
shared_ptr
<
Node
>&
filters
,
const
shared_ptr
<
Node
>&
bias
,
const
shared_ptr
<
Node
>&
sum_input
,
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBiasAdd
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
sum_input
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
shared_ptr
<
Node
>&
min_input
,
const
shared_ptr
<
Node
>&
max_input
,
const
shared_ptr
<
Node
>&
min_filter
,
const
shared_ptr
<
Node
>&
max_filter
,
const
shared_ptr
<
Node
>&
min_output
,
const
shared_ptr
<
Node
>&
max_output
,
const
shared_ptr
<
Node
>&
min_sum_input
,
const
shared_ptr
<
Node
>&
max_sum_input
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_sum_input
,
const
Output
<
Node
>&
max_sum_input
,
const
bool
with_relu
)
{
auto
output_et
=
with_relu
?
element
::
u8
:
element
::
i8
;
...
...
@@ -256,9 +256,9 @@ namespace ngraph
min_output
,
max_output
,
min_sum_input
,
max_sum_input
);
auto
mybias
=
bias
;
if
(
bias
->
get_element_type
()
!=
element
::
i32
)
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
auto
zero
=
make_constant
(
element
::
i32
,
min_input
->
get_shape
(),
0
);
auto
zero
=
make_constant
(
element
::
i32
,
min_input
.
get_shape
(),
0
);
AxisSet
quantization_axes
;
auto
bias_scale
=
quantization_util
::
get_bias_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
);
...
...
@@ -284,23 +284,23 @@ namespace ngraph
}
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBiasSignedAdd
(
const
shared_ptr
<
Node
>&
input
,
const
shared_ptr
<
Node
>&
filters
,
const
shared_ptr
<
Node
>&
bias
,
const
shared_ptr
<
Node
>&
sum_input
,
ScaledQuantizedConvolutionBiasSignedAdd
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
sum_input
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
shared_ptr
<
Node
>&
min_input
,
const
shared_ptr
<
Node
>&
max_input
,
const
shared_ptr
<
Node
>&
min_filter
,
const
shared_ptr
<
Node
>&
max_filter
,
const
shared_ptr
<
Node
>&
min_output
,
const
shared_ptr
<
Node
>&
max_output
,
const
shared_ptr
<
Node
>&
min_sum_input
,
const
shared_ptr
<
Node
>&
max_sum_input
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_sum_input
,
const
Output
<
Node
>&
max_sum_input
,
const
bool
with_relu
)
{
auto
output_et
=
with_relu
?
element
::
u8
:
element
::
i8
;
...
...
@@ -317,9 +317,9 @@ namespace ngraph
}
auto
mybias
=
bias
;
if
(
bias
->
get_element_type
()
!=
element
::
i32
)
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
auto
zero
=
make_constant
(
element
::
i32
,
min_input
->
get_shape
(),
0
);
auto
zero
=
make_constant
(
element
::
i32
,
min_input
.
get_shape
(),
0
);
AxisSet
quantization_axes
;
auto
bias_scale
=
quantization_util
::
get_bias_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
);
...
...
@@ -344,15 +344,15 @@ namespace ngraph
return
make_shared
<
op
::
Convert
>
(
qconv
,
element
::
u8
);
}
shared_ptr
<
Node
>
ScaledQuantizedDotBias
(
const
shared_ptr
<
Node
>&
input
,
const
shared_ptr
<
Node
>&
filters
,
const
shared_ptr
<
Node
>&
bias
,
const
shared_ptr
<
Node
>&
min_input
,
const
shared_ptr
<
Node
>&
max_input
,
const
shared_ptr
<
Node
>&
min_filter
,
const
shared_ptr
<
Node
>&
max_filter
,
const
shared_ptr
<
Node
>&
min_output
,
const
shared_ptr
<
Node
>&
max_output
,
shared_ptr
<
Node
>
ScaledQuantizedDotBias
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
requantize
,
const
bool
with_relu
)
{
...
...
@@ -363,14 +363,14 @@ namespace ngraph
max_filter
,
min_output
,
max_output
,
input
->
get_element_type
(),
input
.
get_element_type
(),
with_relu
?
element
::
u8
:
element
::
i8
,
requantize
);
auto
mybias
=
bias
;
if
(
bias
->
get_element_type
()
!=
element
::
i32
)
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
auto
zero
=
make_constant
(
element
::
i32
,
min_input
->
get_shape
(),
0
);
auto
zero
=
make_constant
(
element
::
i32
,
min_input
.
get_shape
(),
0
);
AxisSet
quantization_axes
;
auto
bias_scale
=
quantization_util
::
get_bias_scale
(
min_input
,
max_input
,
min_filter
,
max_filter
);
...
...
@@ -384,14 +384,14 @@ namespace ngraph
input
,
filters
,
mybias
,
requantization_scale
,
requantize
,
with_relu
);
}
shared_ptr
<
Node
>
ScaledQuantizedDot
(
const
shared_ptr
<
Node
>&
input
,
const
shared_ptr
<
Node
>&
filters
,
const
shared_ptr
<
Node
>&
min_input
,
const
shared_ptr
<
Node
>&
max_input
,
const
shared_ptr
<
Node
>&
min_filter
,
const
shared_ptr
<
Node
>&
max_filter
,
const
shared_ptr
<
Node
>&
min_output
,
const
shared_ptr
<
Node
>&
max_output
,
shared_ptr
<
Node
>
ScaledQuantizedDot
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
requantize
,
const
bool
with_relu
)
{
...
...
@@ -402,7 +402,7 @@ namespace ngraph
max_filter
,
min_output
,
max_output
,
input
->
get_element_type
(),
input
.
get_element_type
(),
with_relu
?
element
::
u8
:
element
::
i8
,
requantize
);
return
make_shared
<
op
::
QuantizedDot
>
(
...
...
src/ngraph/builder/quantization.hpp
View file @
34499001
...
...
@@ -32,16 +32,16 @@ namespace ngraph
{
namespace
builder
{
std
::
shared_ptr
<
Node
>
ScaledQuantize
(
const
std
::
shared_ptr
<
Node
>&
input
,
const
std
::
shared_ptr
<
Node
>&
min
,
const
std
::
shared_ptr
<
Node
>&
max
,
std
::
shared_ptr
<
Node
>
ScaledQuantize
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
,
const
ngraph
::
element
::
Type
&
type
,
const
ngraph
::
AxisSet
&
axes
,
op
::
Quantize
::
RoundMode
round_mode
);
std
::
shared_ptr
<
Node
>
ScaledDequantize
(
const
std
::
shared_ptr
<
Node
>&
input
,
const
std
::
shared_ptr
<
Node
>&
min
,
const
std
::
shared_ptr
<
Node
>&
max
,
std
::
shared_ptr
<
Node
>
ScaledDequantize
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
,
const
ngraph
::
element
::
Type
&
type
,
const
ngraph
::
AxisSet
&
axes
);
...
...
@@ -50,115 +50,113 @@ namespace ngraph
const
NodeVector
&
mins
,
const
NodeVector
&
maxes
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedAvgPool
(
const
std
::
shared_ptr
<
Node
>&
input
,
std
::
shared_ptr
<
Node
>
ScaledQuantizedAvgPool
(
const
Output
<
Node
>&
input
,
const
Shape
&
window_shape
,
const
Strides
&
window_movement_strides
,
const
Shape
&
padding_below
,
const
Shape
&
padding_above
,
bool
include_padding_in_avg_computation
,
const
std
::
shared_ptr
<
Node
>&
min
,
const
std
::
shared_ptr
<
Node
>&
max
);
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBias
(
const
std
::
shared_ptr
<
Node
>&
input
,
const
std
::
shared_ptr
<
Node
>&
filters
,
const
std
::
shared_ptr
<
Node
>&
bias
,
std
::
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBias
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
std
::
shared_ptr
<
Node
>&
min_input
,
const
std
::
shared_ptr
<
Node
>&
max_input
,
const
std
::
shared_ptr
<
Node
>&
min_filter
,
const
std
::
shared_ptr
<
Node
>&
max_filter
,
const
std
::
shared_ptr
<
Node
>&
min_output
,
const
std
::
shared_ptr
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
with_relu
=
false
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedConvolutionRelu
(
const
std
::
shared_ptr
<
Node
>&
input
,
const
std
::
shared_ptr
<
Node
>&
filters
,
std
::
shared_ptr
<
Node
>
ScaledQuantizedConvolutionRelu
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
std
::
shared_ptr
<
Node
>&
min_input
,
const
std
::
shared_ptr
<
Node
>&
max_input
,
const
std
::
shared_ptr
<
Node
>&
min_filter
,
const
std
::
shared_ptr
<
Node
>&
max_filter
,
const
std
::
shared_ptr
<
Node
>&
min_output
,
const
std
::
shared_ptr
<
Node
>&
max_output
);
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedMaxPool
(
const
std
::
shared_ptr
<
Node
>&
input
,
std
::
shared_ptr
<
Node
>
ScaledQuantizedMaxPool
(
const
Output
<
Node
>&
input
,
const
Shape
&
window_shape
,
const
Strides
&
window_movement_strides
,
const
Shape
&
padding_below
,
const
Shape
&
padding_above
,
const
std
::
shared_ptr
<
Node
>&
min
,
const
std
::
shared_ptr
<
Node
>&
max
);
const
Output
<
Node
>&
min
,
const
Output
<
Node
>&
max
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBiasAdd
(
const
std
::
shared_ptr
<
Node
>&
input
,
const
std
::
shared_ptr
<
Node
>&
filters
,
const
std
::
shared_ptr
<
Node
>&
bias
,
const
std
::
shared_ptr
<
Node
>&
sum_input
,
ScaledQuantizedConvolutionBiasAdd
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
sum_input
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
std
::
shared_ptr
<
Node
>&
min_input
,
const
std
::
shared_ptr
<
Node
>&
max_input
,
const
std
::
shared_ptr
<
Node
>&
min_filter
,
const
std
::
shared_ptr
<
Node
>&
max_filter
,
const
std
::
shared_ptr
<
Node
>&
min_output
,
const
std
::
shared_ptr
<
Node
>&
max_output
,
const
std
::
shared_ptr
<
Node
>&
min_sum_input
,
const
std
::
shared_ptr
<
Node
>&
max_sum_input
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_sum_input
,
const
Output
<
Node
>&
max_sum_input
,
const
bool
with_relu
=
false
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedConvolutionBiasSignedAdd
(
const
std
::
shared_ptr
<
Node
>&
input
,
const
std
::
shared_ptr
<
Node
>&
filters
,
const
std
::
shared_ptr
<
Node
>&
bias
,
const
std
::
shared_ptr
<
Node
>&
sum_input
,
ScaledQuantizedConvolutionBiasSignedAdd
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
sum_input
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
std
::
shared_ptr
<
Node
>&
min_input
,
const
std
::
shared_ptr
<
Node
>&
max_input
,
const
std
::
shared_ptr
<
Node
>&
min_filter
,
const
std
::
shared_ptr
<
Node
>&
max_filter
,
const
std
::
shared_ptr
<
Node
>&
min_output
,
const
std
::
shared_ptr
<
Node
>&
max_output
,
const
std
::
shared_ptr
<
Node
>&
min_sum_input
,
const
std
::
shared_ptr
<
Node
>&
max_sum_input
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_sum_input
,
const
Output
<
Node
>&
max_sum_input
,
const
bool
with_relu
=
false
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedDotBias
(
const
std
::
shared_ptr
<
Node
>&
input
,
const
std
::
shared_ptr
<
Node
>&
filters
,
const
std
::
shared_ptr
<
Node
>&
bias
,
const
std
::
shared_ptr
<
Node
>&
min_input
,
const
std
::
shared_ptr
<
Node
>&
max_input
,
const
std
::
shared_ptr
<
Node
>&
min_filter
,
const
std
::
shared_ptr
<
Node
>&
max_filter
,
const
std
::
shared_ptr
<
Node
>&
min_output
,
const
std
::
shared_ptr
<
Node
>&
max_output
,
std
::
shared_ptr
<
Node
>
ScaledQuantizedDotBias
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
bias
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
requantize
=
true
,
const
bool
with_relu
=
false
);
std
::
shared_ptr
<
Node
>
ScaledQuantizedDot
(
const
std
::
shared_ptr
<
Node
>&
input
,
const
std
::
shared_ptr
<
Node
>&
filters
,
const
std
::
shared_ptr
<
Node
>&
min_input
,
const
std
::
shared_ptr
<
Node
>&
max_input
,
const
std
::
shared_ptr
<
Node
>&
min_filter
,
const
std
::
shared_ptr
<
Node
>&
max_filter
,
const
std
::
shared_ptr
<
Node
>&
min_output
,
const
std
::
shared_ptr
<
Node
>&
max_output
,
std
::
shared_ptr
<
Node
>
ScaledQuantizedDot
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
bool
requantize
=
true
,
const
bool
with_relu
=
false
);
...
...
src/ngraph/builder/quantization/quantized_linear_convolution.cpp
View file @
34499001
...
...
@@ -36,25 +36,25 @@ namespace ngraph
{
namespace
quantization
{
shared_ptr
<
Node
>
QuantizedLinearConvolutionBias
(
const
shared_ptr
<
Node
>&
input
,
const
shared_ptr
<
Node
>&
filter
,
const
shared_ptr
<
Node
>&
bias
,
shared_ptr
<
Node
>
QuantizedLinearConvolutionBias
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filter
,
const
Output
<
Node
>&
bias
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
shared_ptr
<
Node
>&
input_scale
,
const
shared_ptr
<
Node
>&
filter_scale
,
const
shared_ptr
<
Node
>&
output_scale
)
const
Output
<
Node
>&
input_scale
,
const
Output
<
Node
>&
filter_scale
,
const
Output
<
Node
>&
output_scale
)
{
// TODO: need to establish cross-nGraph view of scale (mult or div)
auto
requantization_scale
=
(
input_scale
*
filter_scale
)
/
output_scale
;
auto
mybias
=
bias
;
if
(
bias
->
get_element_type
()
!=
element
::
i32
)
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
const
auto
zero
=
make_constant
(
element
::
i32
,
input_scale
->
get_shape
(),
0
);
const
auto
zero
=
make_constant
(
element
::
i32
,
input_scale
.
get_shape
(),
0
);
const
AxisSet
quantization_axes
;
const
auto
bias_scale
=
input_scale
*
filter_scale
;
op
::
Quantize
::
RoundMode
round_mode
=
...
...
src/ngraph/builder/quantization/quantized_linear_convolution.hpp
View file @
34499001
...
...
@@ -26,17 +26,17 @@ namespace ngraph
namespace
quantization
{
std
::
shared_ptr
<
Node
>
QuantizedLinearConvolutionBias
(
const
std
::
shared_ptr
<
Node
>&
input
,
const
std
::
shared_ptr
<
Node
>&
filter
,
const
std
::
shared_ptr
<
Node
>&
bias
,
QuantizedLinearConvolutionBias
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filter
,
const
Output
<
Node
>&
bias
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
std
::
shared_ptr
<
Node
>&
input_scale
,
const
std
::
shared_ptr
<
Node
>&
filter_scale
,
const
std
::
shared_ptr
<
Node
>&
output_scale
);
const
Output
<
Node
>&
input_scale
,
const
Output
<
Node
>&
filter_scale
,
const
Output
<
Node
>&
output_scale
);
}
}
}
src/ngraph/builder/quantization/quantized_linear_matmul.cpp
View file @
34499001
...
...
@@ -39,14 +39,14 @@ namespace ngraph
{
// TODO: this code is falling back to fp32 dot
// 1) add support in reference kernel for zero point
shared_ptr
<
Node
>
QuantizedLinearMatmul
(
const
shared_ptr
<
Node
>&
input0
,
const
shared_ptr
<
Node
>&
input1
,
const
shared_ptr
<
Node
>&
input0_scale
,
const
shared_ptr
<
Node
>&
input0_zero_point
,
const
shared_ptr
<
Node
>&
input1_scale
,
const
shared_ptr
<
Node
>&
input1_zero_point
,
const
shared_ptr
<
Node
>&
output_scale
,
const
shared_ptr
<
Node
>&
output_zero_point
)
shared_ptr
<
Node
>
QuantizedLinearMatmul
(
const
Output
<
Node
>&
input0
,
const
Output
<
Node
>&
input1
,
const
Output
<
Node
>&
input0_scale
,
const
Output
<
Node
>&
input0_zero_point
,
const
Output
<
Node
>&
input1_scale
,
const
Output
<
Node
>&
input1_zero_point
,
const
Output
<
Node
>&
output_scale
,
const
Output
<
Node
>&
output_zero_point
)
{
// Check if zero point is constant and zero
if
(
ngraph
::
is_zero
(
input0_zero_point
)
&&
ngraph
::
is_zero
(
input1_zero_point
)
&&
...
...
@@ -62,13 +62,13 @@ namespace ngraph
auto
dq_input0
=
make_shared
<
op
::
Dequantize
>
(
input0
,
input0_scale
,
input0_zero_point
,
input0_scale
->
get_element_type
(),
input0_scale
.
get_element_type
(),
axes
);
auto
dq_input1
=
make_shared
<
op
::
Dequantize
>
(
input1
,
input1_scale
,
input1_zero_point
,
input1_scale
->
get_element_type
(),
input1_scale
.
get_element_type
(),
axes
);
auto
dot
=
make_shared
<
op
::
Dot
>
(
dq_input0
,
dq_input1
,
1
);
...
...
@@ -76,24 +76,23 @@ namespace ngraph
dot
,
output_scale
,
output_zero_point
,
output_zero_point
->
get_element_type
(),
output_zero_point
.
get_element_type
(),
axes
,
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
);
}
}
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
shared_ptr
<
Node
>&
input0
,
const
shared_ptr
<
Node
>&
input1
)
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
Output
<
Node
>&
input0
,
const
Output
<
Node
>&
input1
)
{
auto
output_scale
=
make_constant
(
element
::
f32
,
Shape
{},
1
);
return
make_shared
<
op
::
QuantizedDot
>
(
input0
,
input1
,
output_scale
,
false
,
false
);
}
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
std
::
shared_ptr
<
Node
>&
input0
,
const
std
::
shared_ptr
<
Node
>&
input1
,
const
std
::
shared_ptr
<
Node
>&
input0_zero_point
,
const
std
::
shared_ptr
<
Node
>&
input1_zero_point
)
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
Output
<
Node
>&
input0
,
const
Output
<
Node
>&
input1
,
const
Output
<
Node
>&
input0_zero_point
,
const
Output
<
Node
>&
input1_zero_point
)
{
// Check if zero points are constant and zero
if
(
ngraph
::
is_zero
(
input0_zero_point
)
&&
ngraph
::
is_zero
(
input1_zero_point
))
...
...
src/ngraph/builder/quantization/quantized_linear_matmul.hpp
View file @
34499001
...
...
@@ -25,24 +25,23 @@ namespace ngraph
{
namespace
quantization
{
std
::
shared_ptr
<
Node
>
QuantizedLinearMatmul
(
const
std
::
shared_ptr
<
Node
>&
input0
,
const
std
::
shared_ptr
<
Node
>&
input1
,
const
std
::
shared_ptr
<
Node
>&
input0_scale
,
const
std
::
shared_ptr
<
Node
>&
input0_zero_point
,
const
std
::
shared_ptr
<
Node
>&
input1_scale
,
const
std
::
shared_ptr
<
Node
>&
input1_zero_point
,
const
std
::
shared_ptr
<
Node
>&
output_scale
,
const
std
::
shared_ptr
<
Node
>&
output_zero_point
);
std
::
shared_ptr
<
Node
>
QuantizedLinearMatmul
(
const
Output
<
Node
>&
input0
,
const
Output
<
Node
>&
input1
,
const
Output
<
Node
>&
input0_scale
,
const
Output
<
Node
>&
input0_zero_point
,
const
Output
<
Node
>&
input1_scale
,
const
Output
<
Node
>&
input1_zero_point
,
const
Output
<
Node
>&
output_scale
,
const
Output
<
Node
>&
output_zero_point
);
std
::
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
std
::
shared_ptr
<
Node
>&
input0
,
const
std
::
shared_ptr
<
Node
>&
input1
);
std
::
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
Output
<
Node
>&
input0
,
const
Output
<
Node
>&
input1
);
std
::
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
std
::
shared_ptr
<
Node
>&
input0
,
const
std
::
shared_ptr
<
Node
>&
input1
,
const
std
::
shared_ptr
<
Node
>&
input0_zero_point
,
const
std
::
shared_ptr
<
Node
>&
input1_zero_point
);
QuantizedLinearMatmulInteger
(
const
Output
<
Node
>&
input0
,
const
Output
<
Node
>&
input1
,
const
Output
<
Node
>&
input0_zero_point
,
const
Output
<
Node
>&
input1_zero_point
);
}
}
}
src/ngraph/builder/quantization_util.hpp
View file @
34499001
...
...
@@ -37,7 +37,7 @@ namespace ngraph
{
namespace
quantization_util
{
std
::
shared_ptr
<
Node
>
max_abs
(
std
::
shared_ptr
<
Node
>
a
,
std
::
shared_ptr
<
Node
>
b
)
std
::
shared_ptr
<
Node
>
max_abs
(
Output
<
Node
>
a
,
Output
<
Node
>
b
)
{
auto
abs_a
=
std
::
make_shared
<
op
::
Abs
>
(
a
);
auto
abs_b
=
std
::
make_shared
<
op
::
Abs
>
(
b
);
...
...
@@ -45,22 +45,22 @@ namespace ngraph
}
std
::
pair
<
std
::
shared_ptr
<
Node
>
,
std
::
shared_ptr
<
Node
>>
quantization_range_for_multiplication
(
std
::
shared_ptr
<
Node
>
min_a
,
std
::
shared_ptr
<
Node
>
max_a
,
std
::
shared_ptr
<
Node
>
min_b
,
std
::
shared_ptr
<
Node
>
max_b
)
quantization_range_for_multiplication
(
Output
<
Node
>
min_a
,
Output
<
Node
>
max_a
,
Output
<
Node
>
min_b
,
Output
<
Node
>
max_b
)
{
auto
type
=
min_a
->
get_element_type
();
if
(
type
!=
max_a
->
get_element_type
()
||
type
!=
min_b
->
get_element_type
()
||
type
!=
max_b
->
get_element_type
())
auto
type
=
min_a
.
get_element_type
();
if
(
type
!=
max_a
.
get_element_type
()
||
type
!=
min_b
.
get_element_type
()
||
type
!=
max_b
.
get_element_type
())
{
throw
ngraph_error
(
"quantization_range_for_multiplication: min and max must have same type"
);
}
auto
shape
=
min_a
->
get_shape
();
if
(
shape
!=
max_a
->
get_shape
()
||
shape
!=
min_b
->
get_shape
()
||
shape
!=
max_b
->
get_shape
())
auto
shape
=
min_a
.
get_shape
();
if
(
shape
!=
max_a
.
get_shape
()
||
shape
!=
min_b
.
get_shape
()
||
shape
!=
max_b
.
get_shape
())
{
throw
ngraph_error
(
"quantization_range_for_multiplication: min and max must have same shape"
);
...
...
@@ -87,28 +87,27 @@ namespace ngraph
return
std
::
pair
<
std
::
shared_ptr
<
Node
>
,
std
::
shared_ptr
<
Node
>>
(
min_c
,
max_c
);
}
std
::
shared_ptr
<
Node
>
get_scale
(
std
::
shared_ptr
<
Node
>
min_input
,
std
::
shared_ptr
<
Node
>
max_input
,
std
::
shared_ptr
<
Node
>
min_filter
,
std
::
shared_ptr
<
Node
>
max_filter
,
std
::
shared_ptr
<
Node
>
min_freezed_output
,
std
::
shared_ptr
<
Node
>
max_freezed_output
,
std
::
shared_ptr
<
Node
>
get_scale
(
Output
<
Node
>
min_input
,
Output
<
Node
>
max_input
,
Output
<
Node
>
min_filter
,
Output
<
Node
>
max_filter
,
Output
<
Node
>
min_freezed_output
,
Output
<
Node
>
max_freezed_output
,
const
ngraph
::
element
::
Type
&
output_type
)
{
auto
type
=
min_input
->
get_element_type
();
if
(
type
!=
max_input
->
get_element_type
()
||
type
!=
min_filter
->
get_element_type
()
||
type
!=
max_filter
->
get_element_type
()
||
type
!=
min_freezed_output
->
get_element_type
()
||
type
!=
max_freezed_output
->
get_element_type
())
auto
type
=
min_input
.
get_element_type
();
if
(
type
!=
max_input
.
get_element_type
()
||
type
!=
min_filter
.
get_element_type
()
||
type
!=
max_filter
.
get_element_type
()
||
type
!=
min_freezed_output
.
get_element_type
()
||
type
!=
max_freezed_output
.
get_element_type
())
{
throw
ngraph_error
(
"get_scale: min and max must have same type"
);
}
auto
shape
=
min_input
->
get_shape
();
if
(
shape
!=
max_input
->
get_shape
()
||
shape
!=
min_filter
->
get_shape
()
||
shape
!=
max_filter
->
get_shape
()
||
shape
!=
min_freezed_output
->
get_shape
()
||
shape
!=
max_freezed_output
->
get_shape
())
auto
shape
=
min_input
.
get_shape
();
if
(
shape
!=
max_input
.
get_shape
()
||
shape
!=
min_filter
.
get_shape
()
||
shape
!=
max_filter
.
get_shape
()
||
shape
!=
min_freezed_output
.
get_shape
()
||
shape
!=
max_freezed_output
.
get_shape
())
{
throw
ngraph_error
(
"get_scale: min and max must have same shape"
);
}
...
...
@@ -147,22 +146,21 @@ namespace ngraph
(
max_abs32
/
max_abs8
);
}
std
::
shared_ptr
<
Node
>
get_bias_scale
(
std
::
shared_ptr
<
Node
>
min_input
,
std
::
shared_ptr
<
Node
>
max_input
,
std
::
shared_ptr
<
Node
>
min_filter
,
std
::
shared_ptr
<
Node
>
max_filter
)
std
::
shared_ptr
<
Node
>
get_bias_scale
(
Output
<
Node
>
min_input
,
Output
<
Node
>
max_input
,
Output
<
Node
>
min_filter
,
Output
<
Node
>
max_filter
)
{
auto
type
=
min_input
->
get_element_type
();
if
(
type
!=
max_input
->
get_element_type
()
||
type
!=
min_filter
->
get_element_type
()
||
type
!=
max_filter
->
get_element_type
())
auto
type
=
min_input
.
get_element_type
();
if
(
type
!=
max_input
.
get_element_type
()
||
type
!=
min_filter
.
get_element_type
()
||
type
!=
max_filter
.
get_element_type
())
{
throw
ngraph_error
(
"get_bias_scale: min and max must have same type"
);
}
auto
shape
=
min_input
->
get_shape
();
if
(
shape
!=
max_input
->
get_shape
()
||
shape
!=
min_filter
->
get_shape
()
||
shape
!=
max_filter
->
get_shape
())
auto
shape
=
min_input
.
get_shape
();
if
(
shape
!=
max_input
.
get_shape
()
||
shape
!=
min_filter
.
get_shape
()
||
shape
!=
max_filter
.
get_shape
())
{
throw
ngraph_error
(
"get_bias_scale: min and max must have same shape"
);
}
...
...
@@ -178,23 +176,23 @@ namespace ngraph
return
(
max_abs_input_range
*
max_abs_filter_range
)
/
range
;
}
std
::
shared_ptr
<
Node
>
get_sum_scale
(
std
::
shared_ptr
<
Node
>
min_freezed_output_conv_1
,
std
::
shared_ptr
<
Node
>
max_freezed_output_conv_1
,
std
::
shared_ptr
<
Node
>
min_freezed_output_conv_2
,
std
::
shared_ptr
<
Node
>
max_freezed_output_conv_2
)
std
::
shared_ptr
<
Node
>
get_sum_scale
(
Output
<
Node
>
min_freezed_output_conv_1
,
Output
<
Node
>
max_freezed_output_conv_1
,
Output
<
Node
>
min_freezed_output_conv_2
,
Output
<
Node
>
max_freezed_output_conv_2
)
{
auto
type
=
min_freezed_output_conv_1
->
get_element_type
();
if
(
type
!=
max_freezed_output_conv_1
->
get_element_type
()
||
type
!=
min_freezed_output_conv_2
->
get_element_type
()
||
type
!=
max_freezed_output_conv_2
->
get_element_type
())
auto
type
=
min_freezed_output_conv_1
.
get_element_type
();
if
(
type
!=
max_freezed_output_conv_1
.
get_element_type
()
||
type
!=
min_freezed_output_conv_2
.
get_element_type
()
||
type
!=
max_freezed_output_conv_2
.
get_element_type
())
{
throw
ngraph_error
(
"get_sum_scale: min and max must have same type"
);
}
auto
shape
=
min_freezed_output_conv_1
->
get_shape
();
if
(
shape
!=
max_freezed_output_conv_1
->
get_shape
()
||
shape
!=
min_freezed_output_conv_2
->
get_shape
()
||
shape
!=
max_freezed_output_conv_2
->
get_shape
())
auto
shape
=
min_freezed_output_conv_1
.
get_shape
();
if
(
shape
!=
max_freezed_output_conv_1
.
get_shape
()
||
shape
!=
min_freezed_output_conv_2
.
get_shape
()
||
shape
!=
max_freezed_output_conv_2
.
get_shape
())
{
throw
ngraph_error
(
"get_sum_scale: min and max must have same shape"
);
}
...
...
@@ -204,19 +202,19 @@ namespace ngraph
return
max_abs_conv_2
/
max_abs_conv_1
;
}
std
::
shared_ptr
<
Node
>
get_scale
(
std
::
shared_ptr
<
Node
>
input_min_range
,
std
::
shared_ptr
<
Node
>
input_max_range
,
std
::
shared_ptr
<
Node
>
get_scale
(
Output
<
Node
>
input_min_range
,
Output
<
Node
>
input_max_range
,
const
ngraph
::
element
::
Type
&
quant_type
,
bool
bump_by_eps
=
false
)
{
auto
type
=
input_min_range
->
get_element_type
();
if
(
type
!=
input_max_range
->
get_element_type
())
auto
type
=
input_min_range
.
get_element_type
();
if
(
type
!=
input_max_range
.
get_element_type
())
{
throw
ngraph_error
(
"get_scale: min and max must have same type"
);
}
auto
shape
=
input_min_range
->
get_shape
();
if
(
shape
!=
input_max_range
->
get_shape
())
auto
shape
=
input_min_range
.
get_shape
();
if
(
shape
!=
input_max_range
.
get_shape
())
{
throw
ngraph_error
(
"get_scale: min and max must have same shape"
);
}
...
...
@@ -277,30 +275,29 @@ namespace ngraph
}
}
std
::
shared_ptr
<
Node
>
get_dot_scale
(
std
::
shared_ptr
<
Node
>
min_input
,
std
::
shared_ptr
<
Node
>
max_input
,
std
::
shared_ptr
<
Node
>
min_filter
,
std
::
shared_ptr
<
Node
>
max_filter
,
std
::
shared_ptr
<
Node
>
min_freezed_output
,
std
::
shared_ptr
<
Node
>
max_freezed_output
,
std
::
shared_ptr
<
Node
>
get_dot_scale
(
Output
<
Node
>
min_input
,
Output
<
Node
>
max_input
,
Output
<
Node
>
min_filter
,
Output
<
Node
>
max_filter
,
Output
<
Node
>
min_freezed_output
,
Output
<
Node
>
max_freezed_output
,
const
ngraph
::
element
::
Type
&
input_type
,
const
ngraph
::
element
::
Type
&
output_type
,
const
bool
requantize
=
true
)
{
auto
type
=
min_input
->
get_element_type
();
if
(
type
!=
max_input
->
get_element_type
()
||
type
!=
min_filter
->
get_element_type
()
||
type
!=
max_filter
->
get_element_type
()
||
type
!=
min_freezed_output
->
get_element_type
()
||
type
!=
max_freezed_output
->
get_element_type
())
auto
type
=
min_input
.
get_element_type
();
if
(
type
!=
max_input
.
get_element_type
()
||
type
!=
min_filter
.
get_element_type
()
||
type
!=
max_filter
.
get_element_type
()
||
type
!=
min_freezed_output
.
get_element_type
()
||
type
!=
max_freezed_output
.
get_element_type
())
{
throw
ngraph_error
(
"get_dot_scale: min and max must have same type"
);
}
auto
shape
=
min_input
->
get_shape
();
if
(
shape
!=
max_input
->
get_shape
()
||
shape
!=
min_filter
->
get_shape
()
||
shape
!=
max_filter
->
get_shape
()
||
shape
!=
min_freezed_output
->
get_shape
()
||
shape
!=
max_freezed_output
->
get_shape
())
auto
shape
=
min_input
.
get_shape
();
if
(
shape
!=
max_input
.
get_shape
()
||
shape
!=
min_filter
.
get_shape
()
||
shape
!=
max_filter
.
get_shape
()
||
shape
!=
min_freezed_output
.
get_shape
()
||
shape
!=
max_freezed_output
.
get_shape
())
{
throw
ngraph_error
(
"get_dot_scale: min and max must have same shape"
);
}
...
...
src/ngraph/builder/quantization_utils.cpp
View file @
34499001
...
...
@@ -22,26 +22,26 @@ namespace ngraph
{
namespace
quantization_utils
{
std
::
shared_ptr
<
Node
>
max_abs
(
std
::
shared_ptr
<
Node
>
a
,
std
::
shared_ptr
<
Node
>
b
)
std
::
shared_ptr
<
Node
>
max_abs
(
const
Output
<
Node
>&
a
,
const
Output
<
Node
>&
b
)
{
auto
abs_a
=
std
::
make_shared
<
op
::
Abs
>
(
a
);
auto
abs_b
=
std
::
make_shared
<
op
::
Abs
>
(
b
);
return
std
::
make_shared
<
op
::
Maximum
>
(
abs_a
,
abs_b
);
}
std
::
shared_ptr
<
Node
>
get_scale
(
std
::
shared_ptr
<
Node
>
input_min_range
,
std
::
shared_ptr
<
Node
>
input_max_range
,
std
::
shared_ptr
<
Node
>
get_scale
(
const
Output
<
Node
>&
input_min_range
,
const
Output
<
Node
>&
input_max_range
,
const
ngraph
::
element
::
Type
&
quant_type
,
bool
bump_by_eps
)
{
auto
type
=
input_min_range
->
get_element_type
();
if
(
type
!=
input_max_range
->
get_element_type
())
auto
type
=
input_min_range
.
get_element_type
();
if
(
type
!=
input_max_range
.
get_element_type
())
{
throw
ngraph_error
(
"get_scale: min and max must have same type"
);
}
auto
shape
=
input_min_range
->
get_shape
();
if
(
shape
!=
input_max_range
->
get_shape
())
auto
shape
=
input_min_range
.
get_shape
();
if
(
shape
!=
input_max_range
.
get_shape
())
{
throw
ngraph_error
(
"get_scale: min and max must have same shape"
);
}
...
...
src/ngraph/builder/quantization_utils.hpp
View file @
34499001
...
...
@@ -37,10 +37,10 @@ namespace ngraph
{
namespace
quantization_utils
{
std
::
shared_ptr
<
Node
>
max_abs
(
std
::
shared_ptr
<
Node
>
a
,
std
::
shared_ptr
<
Node
>
b
);
std
::
shared_ptr
<
Node
>
max_abs
(
const
Output
<
Node
>&
a
,
const
Output
<
Node
>&
b
);
std
::
shared_ptr
<
Node
>
get_scale
(
std
::
shared_ptr
<
Node
>
input_min_range
,
std
::
shared_ptr
<
Node
>
input_max_range
,
std
::
shared_ptr
<
Node
>
get_scale
(
const
Output
<
Node
>&
input_min_range
,
const
Output
<
Node
>&
input_max_range
,
const
ngraph
::
element
::
Type
&
quant_type
,
bool
bump_by_eps
=
false
);
}
...
...
src/ngraph/builder/quantized_conv_builder.cpp
View file @
34499001
...
...
@@ -26,35 +26,34 @@ namespace ngraph
{
namespace
builder
{
shared_ptr
<
Node
>
QuantizedConvolutionBuilder
(
const
shared_ptr
<
Node
>&
input
,
const
shared_ptr
<
Node
>&
filters
,
shared_ptr
<
Node
>
QuantizedConvolutionBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
shared_ptr
<
Node
>&
min_input
,
const
shared_ptr
<
Node
>&
max_input
,
const
shared_ptr
<
Node
>&
min_filter
,
const
shared_ptr
<
Node
>&
max_filter
,
const
shared_ptr
<
Node
>&
min_output
,
const
shared_ptr
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
ngraph
::
element
::
Type
&
output_type
,
const
ngraph
::
AxisSet
&
input_axes
,
const
ngraph
::
AxisSet
&
filter_axes
,
const
ngraph
::
AxisSet
&
output_axes
)
{
auto
input_scale
=
quantization_utils
::
get_scale
(
min_input
,
max_input
,
input
->
get_element_type
());
quantization_utils
::
get_scale
(
min_input
,
max_input
,
input
.
get_element_type
());
auto
filter_scale
=
quantization_utils
::
get_scale
(
min_filter
,
max_filter
,
filters
->
get_element_type
());
quantization_utils
::
get_scale
(
min_filter
,
max_filter
,
filters
.
get_element_type
());
auto
output_scale
=
quantization_utils
::
get_scale
(
min_output
,
max_output
,
output_type
);
// TODO: Check for this later
// For Builders the zero point is assumed to be zero (for now)
auto
input_zero_point
=
op
::
Constant
::
create
(
input
->
get_element_type
(),
Shape
{},
{
0
});
auto
filter_zero_point
=
op
::
Constant
::
create
(
filters
->
get_element_type
(),
Shape
{},
{
0
});
auto
input_zero_point
=
op
::
Constant
::
create
(
input
.
get_element_type
(),
Shape
{},
{
0
});
auto
filter_zero_point
=
op
::
Constant
::
create
(
filters
.
get_element_type
(),
Shape
{},
{
0
});
return
make_shared
<
op
::
QuantizedConvolution
>
(
input
,
...
...
src/ngraph/builder/quantized_conv_builder.hpp
View file @
34499001
...
...
@@ -26,19 +26,19 @@ namespace ngraph
namespace
builder
{
std
::
shared_ptr
<
Node
>
QuantizedConvolutionBuilder
(
const
std
::
shared_ptr
<
Node
>&
input
,
const
std
::
shared_ptr
<
Node
>&
filters
,
QuantizedConvolutionBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
std
::
shared_ptr
<
Node
>&
min_input
,
const
std
::
shared_ptr
<
Node
>&
max_input
,
const
std
::
shared_ptr
<
Node
>&
min_filter
,
const
std
::
shared_ptr
<
Node
>&
max_filter
,
const
std
::
shared_ptr
<
Node
>&
min_output
,
const
std
::
shared_ptr
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
ngraph
::
element
::
Type
&
output_type
,
const
ngraph
::
AxisSet
&
input_axes
=
ngraph
::
AxisSet
{},
const
ngraph
::
AxisSet
&
filter_axes
=
ngraph
::
AxisSet
{},
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment