Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
4dabd001
Commit
4dabd001
authored
Sep 26, 2018
by
Nishant Patel
Committed by
Robert Kimball
Sep 26, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Add Quantized conv+relu (#1664)
parent
f8a084ac
Hide whitespace changes
Inline
Side-by-side
Showing
13 changed files
with
542 additions
and
69 deletions
+542
-69
CMakeLists.txt
src/ngraph/runtime/cpu/CMakeLists.txt
+1
-1
quantized_conv.cpp
src/ngraph/runtime/cpu/builder/quantized_conv.cpp
+43
-0
cpu_emitter.cpp
src/ngraph/runtime/cpu/cpu_emitter.cpp
+33
-0
cpu_external_function.cpp
src/ngraph/runtime/cpu/cpu_external_function.cpp
+3
-0
mkldnn_emitter.cpp
src/ngraph/runtime/cpu/mkldnn_emitter.cpp
+3
-1
mkldnn_emitter.hpp
src/ngraph/runtime/cpu/mkldnn_emitter.hpp
+35
-10
quantized_conv_relu.cpp
src/ngraph/runtime/cpu/op/quantized_conv_relu.cpp
+158
-0
quantized_conv_relu.hpp
src/ngraph/runtime/cpu/op/quantized_conv_relu.hpp
+77
-0
cpu_assignment.cpp
src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
+17
-0
cpu_layout.cpp
src/ngraph/runtime/cpu/pass/cpu_layout.cpp
+48
-0
quantization_util.cpp
src/ngraph/runtime/cpu/quantization_util.cpp
+0
-56
quantization_util.hpp
src/ngraph/runtime/cpu/quantization_util.hpp
+26
-1
quantize_cpu.cpp
test/quantize_cpu.cpp
+98
-0
No files found.
src/ngraph/runtime/cpu/CMakeLists.txt
View file @
4dabd001
...
...
@@ -27,7 +27,6 @@ set(SRC
cpu_tensor_view.cpp
cpu_tracing.cpp
cpu_visualize_tree.cpp
quantization_util.cpp
builder/add.cpp
builder/allreduce.cpp
builder/avg_pool.cpp
...
...
@@ -95,6 +94,7 @@ set(SRC
op/max_pool_with_indices.cpp
op/quantized_max_pool.cpp
op/quantized_avg_pool.cpp
op/quantized_conv_relu.cpp
op/rnn.cpp
op/sigmoid_mul.cpp
op/conv_add.cpp
...
...
src/ngraph/runtime/cpu/builder/quantized_conv.cpp
View file @
4dabd001
...
...
@@ -19,6 +19,7 @@
#include "ngraph/runtime/cpu/cpu_builder.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
using
namespace
std
;
using
namespace
ngraph
;
...
...
@@ -67,7 +68,49 @@ namespace ngraph
throw
ngraph_error
(
"unsupported parameters for QuantizedConvolution via DEX"
);
}
}
template
<>
void
Builder
::
BUILDER_DECL
(
ngraph
::
op
::
QuantizedConvolutionRelu
)
{
if
(
runtime
::
cpu
::
mkldnn_utils
::
use_mkldnn_kernel
(
node
))
{
auto
qconvolution_relu
=
static_cast
<
const
ngraph
::
op
::
QuantizedConvolutionRelu
*>
(
node
);
auto
&
functors
=
external_function
->
get_functors
();
auto
&
arg0_tensor
=
external_function
->
get_tensor_data
(
args
[
0
].
get_name
());
auto
&
arg1_tensor
=
external_function
->
get_tensor_data
(
args
[
1
].
get_name
());
auto
&
out_tensor
=
external_function
->
get_tensor_data
(
out
[
0
].
get_name
());
auto
&
out1_tensor
=
external_function
->
get_tensor_data
(
out
[
1
].
get_name
());
auto
&
out2_tensor
=
external_function
->
get_tensor_data
(
out
[
2
].
get_name
());
auto
&
mkldnn_emitter
=
external_function
->
get_mkldnn_emitter
();
auto
conv_index
=
mkldnn_emitter
->
build_convolution
<
ngraph
::
op
::
QuantizedConvolutionRelu
>
(
node
,
args
,
out
);
auto
&
deps
=
mkldnn_emitter
->
get_primitive_deps
(
conv_index
);
float
min_freezed_output
=
qconvolution_relu
->
get_freezed_output_min
();
float
max_freezed_output
=
qconvolution_relu
->
get_freezed_output_max
();
auto
functor
=
[
&
,
conv_index
,
min_freezed_output
,
max_freezed_output
](
CPURuntimeContext
*
ctx
)
{
cpu
::
mkldnn_utils
::
set_memory_ptr
(
ctx
,
deps
[
0
],
arg0_tensor
);
cpu
::
mkldnn_utils
::
set_memory_ptr
(
ctx
,
deps
[
1
],
arg1_tensor
);
cpu
::
mkldnn_utils
::
set_memory_ptr
(
ctx
,
deps
[
2
],
out_tensor
);
*
(
static_cast
<
float
*>
(
out1_tensor
))
=
min_freezed_output
;
*
(
static_cast
<
float
*>
(
out2_tensor
))
=
max_freezed_output
;
cpu
::
mkldnn_utils
::
mkldnn_invoke_primitive
(
ctx
,
conv_index
);
};
functors
.
emplace_back
(
functor
);
}
else
{
throw
ngraph_error
(
"unsupported parameters for QuantizedConvolutionRelu via DEX"
);
}
}
REGISTER_OP_BUILDER
(
QuantizedConvolution
);
REGISTER_OP_BUILDER
(
QuantizedConvolutionRelu
);
}
}
}
src/ngraph/runtime/cpu/cpu_emitter.cpp
View file @
4dabd001
...
...
@@ -109,6 +109,7 @@
#include "ngraph/runtime/cpu/op/matmul_bias.hpp"
#include "ngraph/runtime/cpu/op/max_pool_with_indices.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
#include "ngraph/runtime/cpu/op/sigmoid.hpp"
...
...
@@ -2657,6 +2658,38 @@ namespace ngraph
}
}
template
<>
void
CPU_Emitter
::
EMITTER_DECL
(
ngraph
::
op
::
QuantizedConvolutionRelu
)
{
auto
qconvolution_relu
=
static_cast
<
const
ngraph
::
op
::
QuantizedConvolutionRelu
*>
(
node
);
if
(
runtime
::
cpu
::
mkldnn_utils
::
use_mkldnn_kernel
(
node
))
{
auto
&
mkldnn_emitter
=
external_function
->
get_mkldnn_emitter
();
auto
conv_index
=
mkldnn_emitter
->
build_convolution
<
ngraph
::
op
::
QuantizedConvolutionRelu
>
(
node
,
args
,
out
);
auto
&
deps
=
mkldnn_emitter
->
get_primitive_deps
(
conv_index
);
writer
<<
"cpu::mkldnn_utils::set_memory_ptr(ctx, "
<<
to_string
(
deps
[
0
])
<<
", "
<<
args
[
0
].
get_name
()
<<
");
\n
"
;
writer
<<
"cpu::mkldnn_utils::set_memory_ptr(ctx, "
<<
to_string
(
deps
[
1
])
<<
", "
<<
args
[
1
].
get_name
()
<<
");
\n
"
;
writer
<<
"cpu::mkldnn_utils::set_memory_ptr(ctx, "
<<
to_string
(
deps
[
2
])
<<
", "
<<
out
[
0
].
get_name
()
<<
");
\n
"
;
writer
<<
"*("
<<
out
[
1
].
get_name
()
<<
") = "
<<
qconvolution_relu
->
get_freezed_output_min
()
<<
";
\n
"
;
writer
<<
"*("
<<
out
[
2
].
get_name
()
<<
") = "
<<
qconvolution_relu
->
get_freezed_output_max
()
<<
";
\n
"
;
writer
<<
"cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<<
to_string
(
conv_index
)
<<
");
\n
"
;
}
else
{
throw
ngraph_error
(
"unsupported parameters for QuantizedConvolutionRelu"
);
}
}
template
<>
void
CPU_Emitter
::
EMITTER_DECL
(
ngraph
::
op
::
QuantizedConvolution
)
{
...
...
src/ngraph/runtime/cpu/cpu_external_function.cpp
View file @
4dabd001
...
...
@@ -152,6 +152,7 @@
#include "ngraph/runtime/cpu/op/quantize.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
#include "ngraph/runtime/cpu/op/sigmoid.hpp"
...
...
@@ -310,6 +311,8 @@ static const runtime::cpu::OpMap dispatcher{
{
TI
(
ngraph
::
op
::
ConvolutionRelu
),
&
runtime
::
cpu
::
CPU_Emitter
::
emit
<
op
::
ConvolutionRelu
>
},
{
TI
(
ngraph
::
op
::
QuantizedConvolution
),
&
runtime
::
cpu
::
CPU_Emitter
::
emit
<
op
::
QuantizedConvolution
>
},
{
TI
(
ngraph
::
op
::
QuantizedConvolutionRelu
),
&
runtime
::
cpu
::
CPU_Emitter
::
emit
<
op
::
QuantizedConvolutionRelu
>
},
{
TI
(
ngraph
::
op
::
ConvolutionBiasAdd
),
&
runtime
::
cpu
::
CPU_Emitter
::
emit
<
op
::
ConvolutionBiasAdd
>
},
// conv+bias backprop for data share the same implementation as ConvolutionBackpropData
{
TI
(
ngraph
::
op
::
ConvolutionBiasBackpropFiltersBias
),
...
...
src/ngraph/runtime/cpu/mkldnn_emitter.cpp
View file @
4dabd001
...
...
@@ -281,7 +281,8 @@ size_t MKLDNNEmitter::build_quantized_convolution(const mkldnn::memory::desc& in
const
ngraph
::
Strides
&
dilation_strides
,
const
ngraph
::
CoordinateDiff
&
padding_below
,
const
ngraph
::
CoordinateDiff
&
padding_above
,
const
float
scale
)
const
float
scale
,
const
mkldnn
::
post_ops
&
pops
)
{
size_t
input_data_index
=
build_memory_primitive
(
input_data_desc
);
size_t
weights_index
=
build_memory_primitive
(
weights_desc
);
...
...
@@ -289,6 +290,7 @@ size_t MKLDNNEmitter::build_quantized_convolution(const mkldnn::memory::desc& in
std
::
vector
<
float
>
output_scale
;
output_scale
.
push_back
(
scale
);
mkldnn
::
primitive_attr
conv_attr
;
conv_attr
.
set_post_ops
(
pops
);
/* Specify the rounding mode */
conv_attr
.
set_int_output_round_mode
(
mkldnn
::
round_mode
::
round_nearest
);
/* Specify the scales array and corresponding mask */
...
...
src/ngraph/runtime/cpu/mkldnn_emitter.hpp
View file @
4dabd001
...
...
@@ -33,6 +33,7 @@
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/conv_relu.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/quantization_util.hpp"
#include "ngraph/shape.hpp"
#include "ngraph/strides.hpp"
...
...
@@ -106,14 +107,16 @@ namespace ngraph
const
ngraph
::
CoordinateDiff
&
padding_above
,
const
mkldnn
::
post_ops
&
pops
=
mkldnn
::
post_ops
());
size_t
build_quantized_convolution
(
const
mkldnn
::
memory
::
desc
&
input_data_desc
,
const
mkldnn
::
memory
::
desc
&
weights_desc
,
const
mkldnn
::
memory
::
desc
&
result_desc
,
const
ngraph
::
Strides
&
strides
,
const
ngraph
::
Strides
&
dilation_strides
,
const
ngraph
::
CoordinateDiff
&
padding_below
,
const
ngraph
::
CoordinateDiff
&
padding_above
,
const
float
scale
);
size_t
build_quantized_convolution
(
const
mkldnn
::
memory
::
desc
&
input_data_desc
,
const
mkldnn
::
memory
::
desc
&
weights_desc
,
const
mkldnn
::
memory
::
desc
&
result_desc
,
const
ngraph
::
Strides
&
strides
,
const
ngraph
::
Strides
&
dilation_strides
,
const
ngraph
::
CoordinateDiff
&
padding_below
,
const
ngraph
::
CoordinateDiff
&
padding_above
,
const
float
scale
,
const
mkldnn
::
post_ops
&
pops
=
mkldnn
::
post_ops
());
template
<
typename
OP
>
size_t
build_convolution
(
const
ngraph
::
Node
*
node
,
...
...
@@ -170,6 +173,10 @@ namespace ngraph
{
return
true
;
}
if
(
dynamic_cast
<
const
ngraph
::
op
::
QuantizedConvolutionRelu
*>
(
node
))
{
return
true
;
}
return
false
;
};
...
...
@@ -198,7 +205,24 @@ namespace ngraph
}
else
if
(
std
::
is_same
<
OP
,
ngraph
::
op
::
QuantizedConvolution
>
())
{
const
float
scale
=
quantization_util
::
get_scale
(
node
);
const
float
scale
=
quantization_util
::
get_scale
<
ngraph
::
op
::
QuantizedConvolution
>
(
node
);
return
build_quantized_convolution
(
data_desc
,
weights_desc
,
result_desc
,
convolution
->
get_window_movement_strides
(),
window_dilation_strides_adjusted
,
convolution
->
get_padding_below
(),
convolution
->
get_padding_above
(),
scale
,
ops
);
}
else
if
(
std
::
is_same
<
OP
,
ngraph
::
op
::
QuantizedConvolutionRelu
>
())
{
const
float
scale
=
quantization_util
::
get_scale
<
ngraph
::
op
::
QuantizedConvolutionRelu
>
(
node
);
return
build_quantized_convolution
(
data_desc
,
weights_desc
,
...
...
@@ -207,7 +231,8 @@ namespace ngraph
window_dilation_strides_adjusted
,
convolution
->
get_padding_below
(),
convolution
->
get_padding_above
(),
scale
);
scale
,
ops
);
}
else
{
...
...
src/ngraph/runtime/cpu/op/quantized_conv_relu.cpp
0 → 100644
View file @
4dabd001
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include <numeric>
#include "ngraph/op/constant.hpp"
#include "ngraph/op/get_output_element.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/util.hpp"
using
namespace
std
;
using
namespace
ngraph
;
op
::
QuantizedConvolutionRelu
::
QuantizedConvolutionRelu
(
const
std
::
shared_ptr
<
op
::
QuantizedConvolution
>&
qconv
)
:
Op
(
"QuantizedConvolutionRelu"
,
check_single_output_args
({
qconv
->
get_argument
(
0
),
qconv
->
get_argument
(
1
),
qconv
->
get_argument
(
2
),
qconv
->
get_argument
(
3
),
qconv
->
get_argument
(
4
),
qconv
->
get_argument
(
5
),
qconv
->
get_argument
(
6
),
qconv
->
get_argument
(
7
)}))
,
m_window_movement_strides
(
qconv
->
get_window_movement_strides
())
,
m_window_dilation_strides
(
qconv
->
get_window_dilation_strides
())
,
m_padding_below
(
qconv
->
get_padding_below
())
,
m_padding_above
(
qconv
->
get_padding_above
())
,
m_data_dilation_strides
(
qconv
->
get_data_dilation_strides
())
{
constructor_validate_and_infer_types
();
this
->
m_input_min
=
qconv
->
get_input_min
();
this
->
m_input_max
=
qconv
->
get_input_max
();
this
->
m_filter_min
=
qconv
->
get_filter_min
();
this
->
m_filter_max
=
qconv
->
get_filter_max
();
this
->
m_freezed_output_min
=
qconv
->
get_freezed_output_min
();
this
->
m_freezed_output_max
=
qconv
->
get_freezed_output_max
();
set_output_size
(
3
);
set_output_type
(
0
,
element
::
u8
,
qconv
->
get_shape
());
set_output_type
(
1
,
element
::
f32
,
Shape
{
1
});
set_output_type
(
2
,
element
::
f32
,
Shape
{
1
});
}
op
::
QuantizedConvolutionRelu
::
QuantizedConvolutionRelu
(
const
std
::
shared_ptr
<
Node
>&
data_batch
,
const
std
::
shared_ptr
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
std
::
shared_ptr
<
Node
>
min_input
,
const
std
::
shared_ptr
<
Node
>
max_input
,
const
std
::
shared_ptr
<
Node
>
min_filter
,
const
std
::
shared_ptr
<
Node
>
max_filter
,
const
std
::
shared_ptr
<
Node
>
min_freezed_output
,
const
std
::
shared_ptr
<
Node
>
max_freezed_output
)
:
Op
(
"QuantizedConvolutionRelu"
,
check_single_output_args
({
data_batch
,
filters
,
min_input
,
max_input
,
min_filter
,
max_filter
,
min_freezed_output
,
max_freezed_output
}))
,
m_window_movement_strides
(
window_movement_strides
)
,
m_window_dilation_strides
(
window_dilation_strides
)
,
m_padding_below
(
padding_below
)
,
m_padding_above
(
padding_above
)
,
m_data_dilation_strides
(
data_dilation_strides
)
{
constructor_validate_and_infer_types
();
auto
&
data_batch_shape
=
data_batch
->
get_shape
();
auto
&
filters_shape
=
filters
->
get_shape
();
auto
min_input_const_op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Constant
>
(
min_input
);
auto
max_input_const_op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Constant
>
(
max_input
);
auto
min_filter_const_op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Constant
>
(
min_filter
);
auto
max_filter_const_op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Constant
>
(
max_filter
);
auto
min_freezed_output_const_op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Constant
>
(
min_freezed_output
);
auto
max_freezed_output_const_op
=
std
::
static_pointer_cast
<
ngraph
::
op
::
Constant
>
(
max_freezed_output
);
float
input_min
=
*
(
static_cast
<
float
const
*>
(
min_input_const_op
->
get_data_ptr
()));
float
input_max
=
*
(
static_cast
<
float
const
*>
(
max_input_const_op
->
get_data_ptr
()));
float
filter_min
=
*
(
static_cast
<
float
const
*>
(
min_filter_const_op
->
get_data_ptr
()));
float
filter_max
=
*
(
static_cast
<
float
const
*>
(
max_filter_const_op
->
get_data_ptr
()));
float
output_min
=
*
(
static_cast
<
float
const
*>
(
min_freezed_output_const_op
->
get_data_ptr
()));
float
output_max
=
*
(
static_cast
<
float
const
*>
(
max_freezed_output_const_op
->
get_data_ptr
()));
this
->
m_input_min
=
input_min
;
this
->
m_input_max
=
input_max
;
this
->
m_filter_min
=
filter_min
;
this
->
m_filter_max
=
filter_max
;
this
->
m_freezed_output_min
=
output_min
;
this
->
m_freezed_output_max
=
output_max
;
set_output_size
(
3
);
set_output_type
(
0
,
element
::
u8
,
util
::
infer_convolution_output_shape
(
this
,
data_batch_shape
,
filters_shape
,
window_movement_strides
,
window_dilation_strides
,
padding_below
,
padding_above
,
data_dilation_strides
,
0
,
/* batch_axis_data, */
1
,
/* input_channel_axis_data, */
1
,
/* input_channel_axis_filters, */
0
,
/* output_channel_axis_filters, */
0
,
/* batch_axis_result, */
1
/* output_channel_axis_result, */
));
set_output_type
(
1
,
element
::
f32
,
Shape
{
1
});
set_output_type
(
2
,
element
::
f32
,
Shape
{
1
});
}
std
::
shared_ptr
<
Node
>
op
::
QuantizedConvolutionRelu
::
copy_with_new_args
(
const
NodeVector
&
new_args
)
const
{
if
(
new_args
.
size
()
!=
8
)
{
throw
ngraph_error
(
"Incorrect number of new arguments"
);
}
return
std
::
shared_ptr
<
Node
>
(
new
QuantizedConvolutionRelu
(
new_args
.
at
(
0
),
new_args
.
at
(
1
),
get_window_movement_strides
(),
get_window_dilation_strides
(),
get_padding_below
(),
get_padding_above
(),
get_data_dilation_strides
(),
new_args
.
at
(
2
),
new_args
.
at
(
3
),
new_args
.
at
(
4
),
new_args
.
at
(
5
),
new_args
.
at
(
6
),
new_args
.
at
(
7
)));
}
src/ngraph/runtime/cpu/op/quantized_conv_relu.hpp
0 → 100644
View file @
4dabd001
//*****************************************************************************
// Copyright 2017-2018 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include "ngraph/op/op.hpp"
#include "ngraph/runtime/cpu/op/conv_bias.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
namespace
ngraph
{
namespace
op
{
/// \brief Relu(Convolution) forward prop for batched convolution operation.
class
QuantizedConvolutionRelu
:
public
Op
{
public
:
QuantizedConvolutionRelu
(
const
std
::
shared_ptr
<
op
::
QuantizedConvolution
>&
qconv
);
QuantizedConvolutionRelu
(
const
std
::
shared_ptr
<
Node
>&
data_batch
,
const
std
::
shared_ptr
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_above
,
const
Strides
&
data_dilation_strides
,
const
std
::
shared_ptr
<
Node
>
min_input
,
const
std
::
shared_ptr
<
Node
>
max_input
,
const
std
::
shared_ptr
<
Node
>
min_filter
,
const
std
::
shared_ptr
<
Node
>
max_filter
,
const
std
::
shared_ptr
<
Node
>
min_freezed_output
,
const
std
::
shared_ptr
<
Node
>
max_freezed_output
);
const
Strides
&
get_window_movement_strides
()
const
{
return
m_window_movement_strides
;
}
const
Strides
&
get_window_dilation_strides
()
const
{
return
m_window_dilation_strides
;
}
const
CoordinateDiff
&
get_padding_below
()
const
{
return
m_padding_below
;
}
const
CoordinateDiff
&
get_padding_above
()
const
{
return
m_padding_above
;
}
const
Strides
&
get_data_dilation_strides
()
const
{
return
m_data_dilation_strides
;
}
float
get_input_min
()
const
{
return
m_input_min
;
}
float
get_input_max
()
const
{
return
m_input_max
;
}
float
get_filter_min
()
const
{
return
m_filter_min
;
}
float
get_filter_max
()
const
{
return
m_filter_max
;
}
float
get_freezed_output_min
()
const
{
return
m_freezed_output_min
;
}
float
get_freezed_output_max
()
const
{
return
m_freezed_output_max
;
}
std
::
shared_ptr
<
Node
>
get_filters
()
{
return
get_argument
(
1
);
}
std
::
shared_ptr
<
Node
>
get_data_batch
()
{
return
get_argument
(
0
);
}
virtual
std
::
shared_ptr
<
Node
>
copy_with_new_args
(
const
NodeVector
&
new_args
)
const
override
;
protected
:
Strides
m_window_movement_strides
;
Strides
m_window_dilation_strides
;
CoordinateDiff
m_padding_below
;
CoordinateDiff
m_padding_above
;
Strides
m_data_dilation_strides
;
float
m_input_min
;
float
m_input_max
;
float
m_filter_min
;
float
m_filter_max
;
float
m_freezed_output_min
;
float
m_freezed_output_max
;
};
}
}
src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
View file @
4dabd001
...
...
@@ -50,6 +50,7 @@
#include "ngraph/runtime/cpu/op/quantize.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
#include "ngraph/runtime/cpu/op/sigmoid.hpp"
...
...
@@ -759,6 +760,20 @@ namespace ngraph
}
}
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
QuantizedConvolutionRelu
)
{
if
(
node
->
get_input_element_type
(
0
)
==
element
::
u8
&&
node
->
get_input_element_type
(
1
)
==
element
::
i8
)
{
auto
quantized_conv_relu
=
static_cast
<
op
::
QuantizedConvolutionRelu
*>
(
node
);
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
set_mkldnn_op
(
true
);
quantized_conv_relu
->
set_op_annotations
(
op_annotations
);
}
}
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
Quantize
)
{
...
...
@@ -838,6 +853,8 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
ConvolutionAdd
>
},
{
TI
(
ngraph
::
op
::
Dequantize
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
Dequantize
>
},
{
TI
(
ngraph
::
op
::
QuantizedConvolutionRelu
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
QuantizedConvolutionRelu
>
},
};
bool
runtime
::
cpu
::
pass
::
CPUAssignment
::
run_on_call_graph
(
...
...
src/ngraph/runtime/cpu/pass/cpu_layout.cpp
View file @
4dabd001
...
...
@@ -58,6 +58,7 @@
#include "ngraph/runtime/cpu/op/quantize.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "ngraph/runtime/cpu/op/rnn.hpp"
...
...
@@ -499,6 +500,51 @@ namespace ngraph
}
}
template
<>
void
CPULayout
::
LAYOUT_DECL
(
ngraph
::
op
::
QuantizedConvolutionRelu
)
{
if
(
runtime
::
cpu
::
mkldnn_utils
::
use_mkldnn_kernel
(
node
.
get
()))
{
vector
<
memory
::
desc
>
i_mds
;
vector
<
memory
::
desc
>
o_mds
;
ConvolutionLayout
<
ngraph
::
op
::
QuantizedConvolutionRelu
,
false
,
false
>
(
node
,
i_mds
,
o_mds
);
auto
min_input_md
=
mkldnn_utils
::
create_default_mkldnn_md
(
node
.
get
(),
2
,
false
,
memory
::
format
::
x
);
auto
max_input_md
=
mkldnn_utils
::
create_default_mkldnn_md
(
node
.
get
(),
3
,
false
,
memory
::
format
::
x
);
auto
min_filter_md
=
mkldnn_utils
::
create_default_mkldnn_md
(
node
.
get
(),
4
,
false
,
memory
::
format
::
x
);
auto
max_filter_md
=
mkldnn_utils
::
create_default_mkldnn_md
(
node
.
get
(),
5
,
false
,
memory
::
format
::
x
);
auto
min_freezed_output_md
=
mkldnn_utils
::
create_default_mkldnn_md
(
node
.
get
(),
6
,
false
,
memory
::
format
::
x
);
auto
max_freezed_output_md
=
mkldnn_utils
::
create_default_mkldnn_md
(
node
.
get
(),
7
,
false
,
memory
::
format
::
x
);
auto
min_output_md
=
mkldnn_utils
::
create_default_mkldnn_md
(
node
.
get
(),
1
,
true
,
memory
::
format
::
x
);
auto
max_output_md
=
mkldnn_utils
::
create_default_mkldnn_md
(
node
.
get
(),
2
,
true
,
memory
::
format
::
x
);
i_mds
.
push_back
(
min_input_md
);
i_mds
.
push_back
(
max_input_md
);
i_mds
.
push_back
(
min_filter_md
);
i_mds
.
push_back
(
max_filter_md
);
i_mds
.
push_back
(
min_freezed_output_md
);
i_mds
.
push_back
(
max_freezed_output_md
);
o_mds
.
push_back
(
min_output_md
);
o_mds
.
push_back
(
max_output_md
);
node
=
insert_input_conversions
(
external_function
,
node
,
i_mds
);
set_output_layouts
(
node
,
o_mds
);
}
else
{
set_native_layouts
(
external_function
,
node
);
}
}
template
<>
void
CPULayout
::
LAYOUT_DECL
(
ngraph
::
op
::
ConvolutionBiasAdd
)
{
...
...
@@ -1842,6 +1888,8 @@ static const runtime::cpu::pass::LayoutOpMap s_dispatcher{
{
TI
(
ngraph
::
op
::
Dequantize
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
Dequantize
>
},
{
TI
(
ngraph
::
op
::
Slice
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
Slice
>
},
{
TI
(
ngraph
::
op
::
Quantize
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
Quantize
>
},
{
TI
(
ngraph
::
op
::
QuantizedConvolutionRelu
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
QuantizedConvolutionRelu
>
},
};
bool
runtime
::
cpu
::
pass
::
CPULayout
::
run_on_call_graph
(
const
std
::
list
<
std
::
shared_ptr
<
Node
>>&
nodes
)
...
...
src/ngraph/runtime/cpu/quantization_util.cpp
deleted
100644 → 0
View file @
f8a084ac
/*******************************************************************************
* Copyright 2018 Intel Corporation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*******************************************************************************/
#include "quantization_util.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
namespace
ngraph
{
namespace
runtime
{
namespace
cpu
{
namespace
quantization_util
{
float
get_scale
(
const
ngraph
::
Node
*
node
)
{
auto
qconvolution
=
static_cast
<
const
ngraph
::
op
::
QuantizedConvolution
*>
(
node
);
float
min_out_value
;
float
max_out_value
;
quantization_range_for_multiplication
<
uint8_t
,
int8_t
,
int32_t
>
(
qconvolution
->
get_input_min
(),
qconvolution
->
get_input_max
(),
qconvolution
->
get_filter_min
(),
qconvolution
->
get_filter_max
(),
&
min_out_value
,
&
max_out_value
);
const
float
max_abs32
=
std
::
max
(
std
::
abs
(
min_out_value
),
std
::
abs
(
max_out_value
));
const
float
max_abs8
=
std
::
max
(
std
::
abs
(
qconvolution
->
get_freezed_output_min
()),
std
::
abs
(
qconvolution
->
get_freezed_output_max
()));
// Output is signed int.
// s32 = f32 * std::pow(2, 31)/ max_abs32;
// s8 = f32 * std::pow(2, 7)/ max_abs8;
// s8 = s32 * std::pow(2, -24) * max_abs32 / max_abs8;
const
float
scale
=
static_cast
<
float
>
(
(
std
::
pow
(
2
,
-
24
)
*
static_cast
<
double
>
(
max_abs32
/
max_abs8
)));
return
scale
;
}
}
}
}
}
src/ngraph/runtime/cpu/quantization_util.hpp
View file @
4dabd001
...
...
@@ -89,7 +89,32 @@ namespace ngraph
quant_util
.
push_back
(
scale
);
}
float
get_scale
(
const
ngraph
::
Node
*
node
);
template
<
typename
OP
>
float
get_scale
(
const
ngraph
::
Node
*
node
)
{
auto
qconvolution
=
static_cast
<
const
OP
*>
(
node
);
float
min_out_value
;
float
max_out_value
;
quantization_range_for_multiplication
<
uint8_t
,
int8_t
,
int32_t
>
(
qconvolution
->
get_input_min
(),
qconvolution
->
get_input_max
(),
qconvolution
->
get_filter_min
(),
qconvolution
->
get_filter_max
(),
&
min_out_value
,
&
max_out_value
);
const
float
max_abs32
=
std
::
max
(
std
::
abs
(
min_out_value
),
std
::
abs
(
max_out_value
));
const
float
max_abs8
=
std
::
max
(
std
::
abs
(
qconvolution
->
get_freezed_output_min
()),
std
::
abs
(
qconvolution
->
get_freezed_output_max
()));
// Output is signed int.
// s32 = f32 * std::pow(2, 31)/ max_abs32;
// s8 = f32 * std::pow(2, 7)/ max_abs8;
// s8 = s32 * std::pow(2, -24) * max_abs32 / max_abs8;
const
float
scale
=
static_cast
<
float
>
(
(
std
::
pow
(
2
,
-
24
)
*
static_cast
<
double
>
(
max_abs32
/
max_abs8
)));
return
scale
;
}
}
}
}
...
...
test/quantize_cpu.cpp
View file @
4dabd001
...
...
@@ -27,6 +27,7 @@
#include "ngraph/runtime/cpu/op/quantize.hpp"
#include "ngraph/runtime/cpu/op/quantized_avg_pool.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv.hpp"
#include "ngraph/runtime/cpu/op/quantized_conv_relu.hpp"
#include "ngraph/runtime/cpu/op/quantized_max_pool.hpp"
#include "util/all_close.hpp"
#include "util/all_close_f.hpp"
...
...
@@ -331,3 +332,100 @@ TEST(quantize_cpu, quantize_to_int8)
EXPECT_EQ
((
vector
<
float
>
{
-
127
}),
read_vector
<
float
>
(
result_min
));
EXPECT_EQ
((
vector
<
float
>
{
127
}),
read_vector
<
float
>
(
result_max
));
}
TEST
(
quantize_cpu
,
quantizedConv2D_with_relu
)
{
Shape
shape_a
{
1
,
1
,
3
,
4
};
// input shape
Shape
shape_b
{
1
,
1
,
3
,
3
};
// filter shape
Shape
shape_r
{
1
,
1
,
3
,
4
};
// output shape
vector
<
uint8_t
>
a_data
=
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
0
,
1
,
2
,
3
,
4
};
vector
<
int8_t
>
b_data
=
{
1
,
2
,
3
,
4
,
5
,
0
,
0
,
1
,
2
};
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
u8
,
shape_a
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
i8
,
shape_b
);
auto
C
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
0.0
f
});
auto
D
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
255.0
f
});
auto
E
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
-
127.0
f
});
auto
F
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
127.0
f
});
auto
G
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
22.0
f
});
auto
H
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
90.0
f
});
auto
CV
=
make_shared
<
op
::
QuantizedConvolutionRelu
>
(
A
,
B
,
Strides
{
1
,
1
},
// move_strides
Strides
{
1
,
1
},
// filter_dilation
CoordinateDiff
{
1
,
1
},
// below_pads
CoordinateDiff
{
1
,
1
},
// above_pads
Strides
{
1
,
1
},
// data_dilation
C
,
D
,
E
,
F
,
G
,
H
);
auto
output_data
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
CV
,
0
);
auto
output_min
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
CV
,
1
);
auto
output_max
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
CV
,
2
);
auto
f
=
make_shared
<
Function
>
(
NodeVector
{
output_data
,
output_min
,
output_max
},
op
::
ParameterVector
{
A
,
B
});
auto
backend
=
runtime
::
Backend
::
create
(
"CPU"
);
// Create some tensors for input/output
auto
a
=
backend
->
create_tensor
(
element
::
u8
,
shape_a
);
copy_data
(
a
,
a_data
);
auto
b
=
backend
->
create_tensor
(
element
::
i8
,
shape_b
);
copy_data
(
b
,
b_data
);
auto
result
=
backend
->
create_tensor
(
element
::
u8
,
shape_r
);
auto
result_min
=
backend
->
create_tensor
(
element
::
f32
,
Shape
{
1
});
auto
result_max
=
backend
->
create_tensor
(
element
::
f32
,
Shape
{
1
});
backend
->
call_with_validate
(
f
,
{
result
,
result_min
,
result_max
},
{
a
,
b
});
EXPECT_EQ
((
vector
<
uint8_t
>
{
31
,
48
,
42
,
45
,
54
,
102
,
127
,
61
,
47
,
74
,
61
,
55
}),
read_vector
<
uint8_t
>
(
result
));
EXPECT_EQ
((
vector
<
float
>
{
22.0
}),
read_vector
<
float
>
(
result_min
));
EXPECT_EQ
((
vector
<
float
>
{
90.0
}),
read_vector
<
float
>
(
result_max
));
}
TEST
(
quantize_cpu
,
quantizedConv2D_fused_relu
)
{
Shape
shape_a
{
1
,
1
,
3
,
3
};
// input shape
Shape
shape_b
{
1
,
1
,
3
,
3
};
// filter shape
Shape
shape_r
{
1
,
1
,
3
,
3
};
// output shape
vector
<
uint8_t
>
a_data
=
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
};
vector
<
int8_t
>
b_data
=
{
1
,
2
,
1
,
0
,
0
,
0
,
-
1
,
-
2
,
-
1
};
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
u8
,
shape_a
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
i8
,
shape_b
);
auto
C
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
0.0
f
});
auto
D
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
255.0
f
});
auto
E
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
-
127.0
f
});
auto
F
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
127.0
f
});
auto
G
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
20.0
f
});
auto
H
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{
1
},
{
-
24.0
f
});
auto
CV
=
make_shared
<
op
::
QuantizedConvolutionRelu
>
(
A
,
B
,
Strides
{
1
,
1
},
// move_strides
Strides
{
1
,
1
},
// filter_dilation
CoordinateDiff
{
1
,
1
},
// below_pads
CoordinateDiff
{
1
,
1
},
// above_pads
Strides
{
1
,
1
},
// data_dilation
C
,
D
,
E
,
F
,
G
,
H
);
auto
output_data
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
CV
,
0
);
auto
output_min
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
CV
,
1
);
auto
output_max
=
std
::
make_shared
<
op
::
GetOutputElement
>
(
CV
,
2
);
auto
f
=
make_shared
<
Function
>
(
NodeVector
{
output_data
,
output_min
,
output_max
},
op
::
ParameterVector
{
A
,
B
});
auto
backend
=
runtime
::
Backend
::
create
(
"CPU"
);
// Create some tensors for input/output
auto
a
=
backend
->
create_tensor
(
element
::
u8
,
shape_a
);
copy_data
(
a
,
a_data
);
auto
b
=
backend
->
create_tensor
(
element
::
i8
,
shape_b
);
copy_data
(
b
,
b_data
);
auto
result
=
backend
->
create_tensor
(
element
::
u8
,
shape_r
);
auto
result_min
=
backend
->
create_tensor
(
element
::
f32
,
Shape
{
1
});
auto
result_max
=
backend
->
create_tensor
(
element
::
f32
,
Shape
{
1
});
backend
->
call_with_validate
(
f
,
{
result
,
result_min
,
result_max
},
{
a
,
b
});
EXPECT_EQ
((
vector
<
uint8_t
>
{
0
,
0
,
0
,
0
,
0
,
0
,
69
,
106
,
90
}),
read_vector
<
uint8_t
>
(
result
));
EXPECT_EQ
((
vector
<
float
>
{
20.0
}),
read_vector
<
float
>
(
result_min
));
EXPECT_EQ
((
vector
<
float
>
{
-
24.0
}),
read_vector
<
float
>
(
result_max
));
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment