Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
34499001
Unverified
Commit
34499001
authored
Jul 27, 2019
by
Scott Cyphers
Committed by
GitHub
Jul 27, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Quantization conversion from nodes to outputs (#3316)
parent
8eb63379
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
11 changed files
with
75 additions
and
78 deletions
+75
-78
quantization.cpp
src/ngraph/builder/quantization.cpp
+0
-0
quantization.hpp
src/ngraph/builder/quantization.hpp
+0
-0
quantized_linear_convolution.cpp
...aph/builder/quantization/quantized_linear_convolution.cpp
+8
-8
quantized_linear_convolution.hpp
...aph/builder/quantization/quantized_linear_convolution.hpp
+6
-6
quantized_linear_matmul.cpp
src/ngraph/builder/quantization/quantized_linear_matmul.cpp
+17
-18
quantized_linear_matmul.hpp
src/ngraph/builder/quantization/quantized_linear_matmul.hpp
+14
-15
quantization_util.hpp
src/ngraph/builder/quantization_util.hpp
+0
-0
quantization_utils.cpp
src/ngraph/builder/quantization_utils.cpp
+7
-7
quantization_utils.hpp
src/ngraph/builder/quantization_utils.hpp
+3
-3
quantized_conv_builder.cpp
src/ngraph/builder/quantized_conv_builder.cpp
+12
-13
quantized_conv_builder.hpp
src/ngraph/builder/quantized_conv_builder.hpp
+8
-8
No files found.
src/ngraph/builder/quantization.cpp
View file @
34499001
This diff is collapsed.
Click to expand it.
src/ngraph/builder/quantization.hpp
View file @
34499001
This diff is collapsed.
Click to expand it.
src/ngraph/builder/quantization/quantized_linear_convolution.cpp
View file @
34499001
...
...
@@ -36,25 +36,25 @@ namespace ngraph
{
namespace
quantization
{
shared_ptr
<
Node
>
QuantizedLinearConvolutionBias
(
const
shared_ptr
<
Node
>&
input
,
const
shared_ptr
<
Node
>&
filter
,
const
shared_ptr
<
Node
>&
bias
,
shared_ptr
<
Node
>
QuantizedLinearConvolutionBias
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filter
,
const
Output
<
Node
>&
bias
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
shared_ptr
<
Node
>&
input_scale
,
const
shared_ptr
<
Node
>&
filter_scale
,
const
shared_ptr
<
Node
>&
output_scale
)
const
Output
<
Node
>&
input_scale
,
const
Output
<
Node
>&
filter_scale
,
const
Output
<
Node
>&
output_scale
)
{
// TODO: need to establish cross-nGraph view of scale (mult or div)
auto
requantization_scale
=
(
input_scale
*
filter_scale
)
/
output_scale
;
auto
mybias
=
bias
;
if
(
bias
->
get_element_type
()
!=
element
::
i32
)
if
(
bias
.
get_element_type
()
!=
element
::
i32
)
{
const
auto
zero
=
make_constant
(
element
::
i32
,
input_scale
->
get_shape
(),
0
);
const
auto
zero
=
make_constant
(
element
::
i32
,
input_scale
.
get_shape
(),
0
);
const
AxisSet
quantization_axes
;
const
auto
bias_scale
=
input_scale
*
filter_scale
;
op
::
Quantize
::
RoundMode
round_mode
=
...
...
src/ngraph/builder/quantization/quantized_linear_convolution.hpp
View file @
34499001
...
...
@@ -26,17 +26,17 @@ namespace ngraph
namespace
quantization
{
std
::
shared_ptr
<
Node
>
QuantizedLinearConvolutionBias
(
const
std
::
shared_ptr
<
Node
>&
input
,
const
std
::
shared_ptr
<
Node
>&
filter
,
const
std
::
shared_ptr
<
Node
>&
bias
,
QuantizedLinearConvolutionBias
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filter
,
const
Output
<
Node
>&
bias
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
std
::
shared_ptr
<
Node
>&
input_scale
,
const
std
::
shared_ptr
<
Node
>&
filter_scale
,
const
std
::
shared_ptr
<
Node
>&
output_scale
);
const
Output
<
Node
>&
input_scale
,
const
Output
<
Node
>&
filter_scale
,
const
Output
<
Node
>&
output_scale
);
}
}
}
src/ngraph/builder/quantization/quantized_linear_matmul.cpp
View file @
34499001
...
...
@@ -39,14 +39,14 @@ namespace ngraph
{
// TODO: this code is falling back to fp32 dot
// 1) add support in reference kernel for zero point
shared_ptr
<
Node
>
QuantizedLinearMatmul
(
const
shared_ptr
<
Node
>&
input0
,
const
shared_ptr
<
Node
>&
input1
,
const
shared_ptr
<
Node
>&
input0_scale
,
const
shared_ptr
<
Node
>&
input0_zero_point
,
const
shared_ptr
<
Node
>&
input1_scale
,
const
shared_ptr
<
Node
>&
input1_zero_point
,
const
shared_ptr
<
Node
>&
output_scale
,
const
shared_ptr
<
Node
>&
output_zero_point
)
shared_ptr
<
Node
>
QuantizedLinearMatmul
(
const
Output
<
Node
>&
input0
,
const
Output
<
Node
>&
input1
,
const
Output
<
Node
>&
input0_scale
,
const
Output
<
Node
>&
input0_zero_point
,
const
Output
<
Node
>&
input1_scale
,
const
Output
<
Node
>&
input1_zero_point
,
const
Output
<
Node
>&
output_scale
,
const
Output
<
Node
>&
output_zero_point
)
{
// Check if zero point is constant and zero
if
(
ngraph
::
is_zero
(
input0_zero_point
)
&&
ngraph
::
is_zero
(
input1_zero_point
)
&&
...
...
@@ -62,13 +62,13 @@ namespace ngraph
auto
dq_input0
=
make_shared
<
op
::
Dequantize
>
(
input0
,
input0_scale
,
input0_zero_point
,
input0_scale
->
get_element_type
(),
input0_scale
.
get_element_type
(),
axes
);
auto
dq_input1
=
make_shared
<
op
::
Dequantize
>
(
input1
,
input1_scale
,
input1_zero_point
,
input1_scale
->
get_element_type
(),
input1_scale
.
get_element_type
(),
axes
);
auto
dot
=
make_shared
<
op
::
Dot
>
(
dq_input0
,
dq_input1
,
1
);
...
...
@@ -76,24 +76,23 @@ namespace ngraph
dot
,
output_scale
,
output_zero_point
,
output_zero_point
->
get_element_type
(),
output_zero_point
.
get_element_type
(),
axes
,
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
);
}
}
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
shared_ptr
<
Node
>&
input0
,
const
shared_ptr
<
Node
>&
input1
)
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
Output
<
Node
>&
input0
,
const
Output
<
Node
>&
input1
)
{
auto
output_scale
=
make_constant
(
element
::
f32
,
Shape
{},
1
);
return
make_shared
<
op
::
QuantizedDot
>
(
input0
,
input1
,
output_scale
,
false
,
false
);
}
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
std
::
shared_ptr
<
Node
>&
input0
,
const
std
::
shared_ptr
<
Node
>&
input1
,
const
std
::
shared_ptr
<
Node
>&
input0_zero_point
,
const
std
::
shared_ptr
<
Node
>&
input1_zero_point
)
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
Output
<
Node
>&
input0
,
const
Output
<
Node
>&
input1
,
const
Output
<
Node
>&
input0_zero_point
,
const
Output
<
Node
>&
input1_zero_point
)
{
// Check if zero points are constant and zero
if
(
ngraph
::
is_zero
(
input0_zero_point
)
&&
ngraph
::
is_zero
(
input1_zero_point
))
...
...
src/ngraph/builder/quantization/quantized_linear_matmul.hpp
View file @
34499001
...
...
@@ -25,24 +25,23 @@ namespace ngraph
{
namespace
quantization
{
std
::
shared_ptr
<
Node
>
QuantizedLinearMatmul
(
const
std
::
shared_ptr
<
Node
>&
input0
,
const
std
::
shared_ptr
<
Node
>&
input1
,
const
std
::
shared_ptr
<
Node
>&
input0_scale
,
const
std
::
shared_ptr
<
Node
>&
input0_zero_point
,
const
std
::
shared_ptr
<
Node
>&
input1_scale
,
const
std
::
shared_ptr
<
Node
>&
input1_zero_point
,
const
std
::
shared_ptr
<
Node
>&
output_scale
,
const
std
::
shared_ptr
<
Node
>&
output_zero_point
);
std
::
shared_ptr
<
Node
>
QuantizedLinearMatmul
(
const
Output
<
Node
>&
input0
,
const
Output
<
Node
>&
input1
,
const
Output
<
Node
>&
input0_scale
,
const
Output
<
Node
>&
input0_zero_point
,
const
Output
<
Node
>&
input1_scale
,
const
Output
<
Node
>&
input1_zero_point
,
const
Output
<
Node
>&
output_scale
,
const
Output
<
Node
>&
output_zero_point
);
std
::
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
std
::
shared_ptr
<
Node
>&
input0
,
const
std
::
shared_ptr
<
Node
>&
input1
);
std
::
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
Output
<
Node
>&
input0
,
const
Output
<
Node
>&
input1
);
std
::
shared_ptr
<
Node
>
QuantizedLinearMatmulInteger
(
const
std
::
shared_ptr
<
Node
>&
input0
,
const
std
::
shared_ptr
<
Node
>&
input1
,
const
std
::
shared_ptr
<
Node
>&
input0_zero_point
,
const
std
::
shared_ptr
<
Node
>&
input1_zero_point
);
QuantizedLinearMatmulInteger
(
const
Output
<
Node
>&
input0
,
const
Output
<
Node
>&
input1
,
const
Output
<
Node
>&
input0_zero_point
,
const
Output
<
Node
>&
input1_zero_point
);
}
}
}
src/ngraph/builder/quantization_util.hpp
View file @
34499001
This diff is collapsed.
Click to expand it.
src/ngraph/builder/quantization_utils.cpp
View file @
34499001
...
...
@@ -22,26 +22,26 @@ namespace ngraph
{
namespace
quantization_utils
{
std
::
shared_ptr
<
Node
>
max_abs
(
std
::
shared_ptr
<
Node
>
a
,
std
::
shared_ptr
<
Node
>
b
)
std
::
shared_ptr
<
Node
>
max_abs
(
const
Output
<
Node
>&
a
,
const
Output
<
Node
>&
b
)
{
auto
abs_a
=
std
::
make_shared
<
op
::
Abs
>
(
a
);
auto
abs_b
=
std
::
make_shared
<
op
::
Abs
>
(
b
);
return
std
::
make_shared
<
op
::
Maximum
>
(
abs_a
,
abs_b
);
}
std
::
shared_ptr
<
Node
>
get_scale
(
std
::
shared_ptr
<
Node
>
input_min_range
,
std
::
shared_ptr
<
Node
>
input_max_range
,
std
::
shared_ptr
<
Node
>
get_scale
(
const
Output
<
Node
>&
input_min_range
,
const
Output
<
Node
>&
input_max_range
,
const
ngraph
::
element
::
Type
&
quant_type
,
bool
bump_by_eps
)
{
auto
type
=
input_min_range
->
get_element_type
();
if
(
type
!=
input_max_range
->
get_element_type
())
auto
type
=
input_min_range
.
get_element_type
();
if
(
type
!=
input_max_range
.
get_element_type
())
{
throw
ngraph_error
(
"get_scale: min and max must have same type"
);
}
auto
shape
=
input_min_range
->
get_shape
();
if
(
shape
!=
input_max_range
->
get_shape
())
auto
shape
=
input_min_range
.
get_shape
();
if
(
shape
!=
input_max_range
.
get_shape
())
{
throw
ngraph_error
(
"get_scale: min and max must have same shape"
);
}
...
...
src/ngraph/builder/quantization_utils.hpp
View file @
34499001
...
...
@@ -37,10 +37,10 @@ namespace ngraph
{
namespace
quantization_utils
{
std
::
shared_ptr
<
Node
>
max_abs
(
std
::
shared_ptr
<
Node
>
a
,
std
::
shared_ptr
<
Node
>
b
);
std
::
shared_ptr
<
Node
>
max_abs
(
const
Output
<
Node
>&
a
,
const
Output
<
Node
>&
b
);
std
::
shared_ptr
<
Node
>
get_scale
(
std
::
shared_ptr
<
Node
>
input_min_range
,
std
::
shared_ptr
<
Node
>
input_max_range
,
std
::
shared_ptr
<
Node
>
get_scale
(
const
Output
<
Node
>&
input_min_range
,
const
Output
<
Node
>&
input_max_range
,
const
ngraph
::
element
::
Type
&
quant_type
,
bool
bump_by_eps
=
false
);
}
...
...
src/ngraph/builder/quantized_conv_builder.cpp
View file @
34499001
...
...
@@ -26,35 +26,34 @@ namespace ngraph
{
namespace
builder
{
shared_ptr
<
Node
>
QuantizedConvolutionBuilder
(
const
shared_ptr
<
Node
>&
input
,
const
shared_ptr
<
Node
>&
filters
,
shared_ptr
<
Node
>
QuantizedConvolutionBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
shared_ptr
<
Node
>&
min_input
,
const
shared_ptr
<
Node
>&
max_input
,
const
shared_ptr
<
Node
>&
min_filter
,
const
shared_ptr
<
Node
>&
max_filter
,
const
shared_ptr
<
Node
>&
min_output
,
const
shared_ptr
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
ngraph
::
element
::
Type
&
output_type
,
const
ngraph
::
AxisSet
&
input_axes
,
const
ngraph
::
AxisSet
&
filter_axes
,
const
ngraph
::
AxisSet
&
output_axes
)
{
auto
input_scale
=
quantization_utils
::
get_scale
(
min_input
,
max_input
,
input
->
get_element_type
());
quantization_utils
::
get_scale
(
min_input
,
max_input
,
input
.
get_element_type
());
auto
filter_scale
=
quantization_utils
::
get_scale
(
min_filter
,
max_filter
,
filters
->
get_element_type
());
quantization_utils
::
get_scale
(
min_filter
,
max_filter
,
filters
.
get_element_type
());
auto
output_scale
=
quantization_utils
::
get_scale
(
min_output
,
max_output
,
output_type
);
// TODO: Check for this later
// For Builders the zero point is assumed to be zero (for now)
auto
input_zero_point
=
op
::
Constant
::
create
(
input
->
get_element_type
(),
Shape
{},
{
0
});
auto
filter_zero_point
=
op
::
Constant
::
create
(
filters
->
get_element_type
(),
Shape
{},
{
0
});
auto
input_zero_point
=
op
::
Constant
::
create
(
input
.
get_element_type
(),
Shape
{},
{
0
});
auto
filter_zero_point
=
op
::
Constant
::
create
(
filters
.
get_element_type
(),
Shape
{},
{
0
});
return
make_shared
<
op
::
QuantizedConvolution
>
(
input
,
...
...
src/ngraph/builder/quantized_conv_builder.hpp
View file @
34499001
...
...
@@ -26,19 +26,19 @@ namespace ngraph
namespace
builder
{
std
::
shared_ptr
<
Node
>
QuantizedConvolutionBuilder
(
const
std
::
shared_ptr
<
Node
>&
input
,
const
std
::
shared_ptr
<
Node
>&
filters
,
QuantizedConvolutionBuilder
(
const
Output
<
Node
>&
input
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
std
::
shared_ptr
<
Node
>&
min_input
,
const
std
::
shared_ptr
<
Node
>&
max_input
,
const
std
::
shared_ptr
<
Node
>&
min_filter
,
const
std
::
shared_ptr
<
Node
>&
max_filter
,
const
std
::
shared_ptr
<
Node
>&
min_output
,
const
std
::
shared_ptr
<
Node
>&
max_output
,
const
Output
<
Node
>&
min_input
,
const
Output
<
Node
>&
max_input
,
const
Output
<
Node
>&
min_filter
,
const
Output
<
Node
>&
max_filter
,
const
Output
<
Node
>&
min_output
,
const
Output
<
Node
>&
max_output
,
const
ngraph
::
element
::
Type
&
output_type
,
const
ngraph
::
AxisSet
&
input_axes
=
ngraph
::
AxisSet
{},
const
ngraph
::
AxisSet
&
filter_axes
=
ngraph
::
AxisSet
{},
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment