Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
e5757725
Commit
e5757725
authored
Jun 20, 2019
by
nishant.b.patel
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into quantized_conv_temp
parents
b13eacf6
d0f03eec
Hide whitespace changes
Inline
Side-by-side
Showing
36 changed files
with
672 additions
and
298 deletions
+672
-298
conftest.py
python/test/conftest.py
+21
-16
test_ops_unary.py
python/test/ngraph/test_ops_unary.py
+16
-2
test_ops.py
python/test/test_ops.py
+1
-0
CMakeLists.txt
src/ngraph/CMakeLists.txt
+2
-0
distributed.cpp
src/ngraph/distributed.cpp
+35
-0
distributed.hpp
src/ngraph/distributed.hpp
+37
-2
mlsl.hpp
src/ngraph/distributed/mlsl.hpp
+26
-4
null.hpp
src/ngraph/distributed/null.hpp
+5
-2
open_mpi.hpp
src/ngraph/distributed/open_mpi.hpp
+23
-3
allreduce.cpp
src/ngraph/op/allreduce.cpp
+9
-2
allreduce.hpp
src/ngraph/op/allreduce.hpp
+6
-1
group_conv.cpp
src/ngraph/op/fused/group_conv.cpp
+20
-15
group_conv.hpp
src/ngraph/op/fused/group_conv.hpp
+6
-2
slice.cpp
src/ngraph/op/slice.cpp
+10
-4
slice.hpp
src/ngraph/op/slice.hpp
+7
-3
fused_op.cpp
src/ngraph/op/util/fused_op.cpp
+5
-0
fused_op.hpp
src/ngraph/op/util/fused_op.hpp
+2
-0
allreduce.cpp
src/ngraph/runtime/cpu/builder/allreduce.cpp
+12
-7
gather.cpp
src/ngraph/runtime/cpu/builder/gather.cpp
+10
-6
scatter_add.cpp
src/ngraph/runtime/cpu/builder/scatter_add.cpp
+71
-55
cpu_builder.hpp
src/ngraph/runtime/cpu/cpu_builder.hpp
+4
-24
cpu_emitter.cpp
src/ngraph/runtime/cpu/cpu_emitter.cpp
+12
-5
gather.hpp
src/ngraph/runtime/cpu/kernel/gather.hpp
+9
-2
cpu_fusion.cpp
src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
+26
-0
int_executable.hpp
src/ngraph/runtime/interpreter/int_executable.hpp
+4
-0
plaidml_impl.hpp
src/ngraph/runtime/plaidml/plaidml_impl.hpp
+2
-2
unit_test.manifest
src/ngraph/runtime/plaidml/unit_test.manifest
+18
-85
allreduce.hpp
src/ngraph/runtime/reference/allreduce.hpp
+6
-2
serializer.cpp
src/ngraph/serializer.cpp
+0
-0
serializer.hpp
src/ngraph/serializer.hpp
+0
-39
serializer_stub.cpp
src/ngraph/serializer_stub.cpp
+49
-0
backend_scatter.in.cpp
test/backend_scatter.in.cpp
+8
-9
build_graph.cpp
test/build_graph.cpp
+16
-0
cpu_fusion.cpp
test/cpu_fusion.cpp
+114
-0
distributed.in.cpp
test/distributed.in.cpp
+64
-6
serialize.cpp
test/serialize.cpp
+16
-0
No files found.
python/test/conftest.py
View file @
e5757725
...
...
@@ -19,7 +19,7 @@ import test
def
pytest_addoption
(
parser
):
parser
.
addoption
(
'--backend'
,
default
=
'INTERPRETER'
,
choices
=
[
'INTERPRETER'
,
'CPU'
,
'GPU'
,
'NNP'
,
'PlaidML'
],
choices
=
[
'INTERPRETER'
,
'CPU'
,
'GPU'
,
'NNP'
,
'PlaidML'
,
'INTELGPU'
],
help
=
'Select from available backends'
)
...
...
@@ -31,20 +31,25 @@ def pytest_configure(config):
def
pytest_collection_modifyitems
(
config
,
items
):
backend_name
=
config
.
getvalue
(
'backend'
)
gpu_skip
=
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the GPU backend.'
)
cpu_skip
=
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the CPU backend.'
)
nnp_skip
=
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the NNP backend.'
)
interpreter_skip
=
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the INTERPRETER backend.'
)
plaidml_skip
=
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the PlaidML backend.'
)
keywords
=
{
'GPU'
:
'skip_on_gpu'
,
'CPU'
:
'skip_on_cpu'
,
'NNP'
:
'skip_on_nnp'
,
'INTERPRETER'
:
'skip_on_interpreter'
,
'PlaidML'
:
'skip_on_plaidml'
,
'INTELGPU'
:
'skip_on_intelgpu'
,
}
skip_markers
=
{
'GPU'
:
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the GPU backend.'
),
'CPU'
:
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the CPU backend.'
),
'NNP'
:
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the NNP backend.'
),
'INTERPRETER'
:
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the INTERPRETER backend.'
),
'PlaidML'
:
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the PlaidML backend.'
),
'INTELGPU'
:
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the INTELGPU backend.'
),
}
for
item
in
items
:
if
backend_name
==
'GPU'
and
'skip_on_gpu'
in
item
.
keywords
:
item
.
add_marker
(
gpu_skip
)
if
backend_name
==
'CPU'
and
'skip_on_cpu'
in
item
.
keywords
:
item
.
add_marker
(
cpu_skip
)
if
backend_name
==
'NNP'
and
'skip_on_nnp'
in
item
.
keywords
:
item
.
add_marker
(
nnp_skip
)
if
backend_name
==
'INTERPRETER'
and
'skip_on_interpreter'
in
item
.
keywords
:
item
.
add_marker
(
interpreter_skip
)
if
backend_name
==
'PlaidML'
and
'skip_on_plaidml'
in
item
.
keywords
:
item
.
add_marker
(
plaidml_skip
)
skip_this_backend
=
keywords
[
backend_name
]
if
skip_this_backend
in
item
.
keywords
:
item
.
add_marker
(
skip_markers
[
backend_name
])
python/test/ngraph/test_ops_unary.py
View file @
e5757725
...
...
@@ -33,7 +33,6 @@ from test.ngraph.util import run_op_numeric_data, run_op_node
(
ng
.
exp
,
np
.
exp
,
-
100.
,
100.
),
(
ng
.
floor
,
np
.
floor
,
-
100.
,
100.
),
(
ng
.
log
,
np
.
log
,
0
,
100.
),
(
ng
.
logical_not
,
np
.
logical_not
,
-
10
,
10
),
(
ng
.
relu
,
lambda
x
:
np
.
maximum
(
0
,
x
),
-
100.
,
100.
),
(
ng
.
sign
,
np
.
sign
,
-
100.
,
100.
),
(
ng
.
sin
,
np
.
sin
,
-
100.
,
100.
),
...
...
@@ -68,7 +67,6 @@ def test_unary_op_array(ng_api_fn, numpy_fn, range_start, range_end):
(
ng
.
exp
,
np
.
exp
,
np
.
float32
(
1.5
)),
(
ng
.
floor
,
np
.
floor
,
np
.
float32
(
1.5
)),
(
ng
.
log
,
np
.
log
,
np
.
float32
(
1.5
)),
(
ng
.
logical_not
,
np
.
logical_not
,
np
.
int32
(
0
)),
(
ng
.
relu
,
lambda
x
:
np
.
maximum
(
0
,
x
),
np
.
float32
(
-
0.125
)),
(
ng
.
sign
,
np
.
sign
,
np
.
float32
(
0.
)),
(
ng
.
sin
,
np
.
sin
,
np
.
float32
(
np
.
pi
/
4.0
)),
...
...
@@ -86,3 +84,19 @@ def test_unary_op_scalar(ng_api_fn, numpy_fn, input_data):
result
=
run_op_numeric_data
(
input_data
,
ng_api_fn
)
assert
np
.
allclose
(
result
,
expected
)
@pytest.mark.parametrize
(
'input_data'
,
[
(
np
.
array
([
True
,
False
,
True
,
False
])),
(
np
.
array
(
True
)),
(
np
.
array
(
False
)),
])
@pytest.mark.skip_on_gpu
def
test_logical_not
(
input_data
):
expected
=
np
.
logical_not
(
input_data
)
result
=
run_op_node
([
input_data
],
ng
.
logical_not
)[
0
]
assert
np
.
array_equal
(
result
,
expected
)
result
=
run_op_numeric_data
(
input_data
,
ng
.
logical_not
)[
0
]
assert
np
.
array_equal
(
result
,
expected
)
python/test/test_ops.py
View file @
e5757725
...
...
@@ -818,6 +818,7 @@ def test_slice():
@pytest.mark.skip_on_gpu
@pytest.mark.skip_on_intelgpu
def
test_replace_slice
():
element_type
=
Type
.
f32
...
...
src/ngraph/CMakeLists.txt
View file @
e5757725
...
...
@@ -482,6 +482,8 @@ set(SRC ${SRC}
if
(
NGRAPH_JSON_ENABLE
)
list
(
APPEND SRC serializer.cpp serializer.hpp event_tracing.cpp event_tracing.hpp
)
else
()
list
(
APPEND SRC serializer_stub.cpp
)
endif
()
configure_file
(
version.in.hpp version.hpp
)
...
...
src/ngraph/distributed.cpp
View file @
e5757725
...
...
@@ -22,6 +22,41 @@
using
namespace
ngraph
;
NGRAPH_API
const
reduction
::
Type
reduction
::
sum
(
reduction
::
Type_t
::
sum
);
NGRAPH_API
const
reduction
::
Type
reduction
::
prod
(
reduction
::
Type_t
::
prod
);
NGRAPH_API
const
reduction
::
Type
reduction
::
min
(
reduction
::
Type_t
::
min
);
NGRAPH_API
const
reduction
::
Type
reduction
::
max
(
reduction
::
Type_t
::
max
);
std
::
ostream
&
reduction
::
operator
<<
(
std
::
ostream
&
out
,
const
reduction
::
Type
&
obj
)
{
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch
(
obj
.
get_type
())
{
case
reduction
:
:
Type_t
::
sum
:
out
<<
"sum"
;
break
;
case
reduction
:
:
Type_t
::
prod
:
out
<<
"prod"
;
break
;
case
reduction
:
:
Type_t
::
min
:
out
<<
"min"
;
break
;
case
reduction
:
:
Type_t
::
max
:
out
<<
"max"
;
break
;
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
return
out
;
};
bool
reduction
::
Type
::
operator
==
(
const
reduction
::
Type
&
other
)
const
{
return
m_type
==
other
.
m_type
;
}
reduction
::
Type_t
reduction
::
Type
::
get_type
()
const
{
return
m_type
;
}
static
std
::
unique_ptr
<
DistributedInterface
>
s_distributed_interface
;
void
ngraph
::
set_distributed_interface
(
std
::
unique_ptr
<
DistributedInterface
>
distributed_interface
)
...
...
src/ngraph/distributed.hpp
View file @
e5757725
...
...
@@ -24,6 +24,38 @@
namespace
ngraph
{
namespace
reduction
{
enum
class
Type_t
{
sum
,
prod
,
min
,
max
,
};
class
Type
{
public
:
Type
(
const
Type_t
t
)
:
m_type
(
t
)
{
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
,
const
Type
&
);
bool
operator
==
(
const
Type
&
other
)
const
;
bool
operator
!=
(
const
Type
&
other
)
const
{
return
!
(
*
this
==
other
);
}
Type_t
get_type
()
const
;
private
:
Type_t
m_type
;
};
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
Type
&
obj
);
extern
NGRAPH_API
const
Type
sum
;
extern
NGRAPH_API
const
Type
prod
;
extern
NGRAPH_API
const
Type
min
;
extern
NGRAPH_API
const
Type
max
;
}
class
DistributedInterface
{
public
:
...
...
@@ -33,8 +65,11 @@ namespace ngraph
virtual
int
get_rank
()
=
0
;
virtual
void
log_print
(
const
std
::
string
&
timestamp
,
const
std
::
vector
<
char
>&
buf
)
=
0
;
virtual
void
all_reduce
(
void
*
in
,
void
*
out
,
element
::
Type_t
element_type
,
size_t
count
)
=
0
;
virtual
void
all_reduce
(
void
*
in
,
void
*
out
,
element
::
Type_t
element_type
,
reduction
::
Type
reduce_type
,
size_t
count
)
=
0
;
virtual
void
broadcast
(
void
*
in
,
element
::
Type_t
element_type
,
size_t
count
,
int
root_id
)
=
0
;
};
...
...
src/ngraph/distributed/mlsl.hpp
View file @
e5757725
...
...
@@ -65,8 +65,11 @@ namespace ngraph
std
::
printf
(
"%s [MLSL RANK: %d]: %s
\n
"
,
timestamp
.
c_str
(),
get_rank
(),
buf
.
data
());
}
void
all_reduce
(
void
*
in
,
void
*
out
,
element
::
Type_t
element_type
,
size_t
count
)
override
void
all_reduce
(
void
*
in
,
void
*
out
,
element
::
Type_t
element_type
,
reduction
::
Type
reduce_type
,
size_t
count
)
override
{
auto
data_type
=
MLSL
::
DT_FLOAT
;
...
...
@@ -83,10 +86,29 @@ namespace ngraph
throw
std
::
runtime_error
(
"AllReduce op supports only f32 and f64 types"
);
}
decltype
(
MLSL
::
RT_SUM
)
mlsl_reduce_type
;
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch
(
reduce_type
.
get_type
())
{
case
reduction
:
:
Type_t
::
sum
:
mlsl_reduce_type
=
MLSL
::
RT_SUM
;
break
;
case
reduction
:
:
Type_t
::
prod
:
throw
std
::
runtime_error
(
"MLSL doesn't support allreduce prod"
);
break
;
case
reduction
:
:
Type_t
::
min
:
mlsl_reduce_type
=
MLSL
::
RT_MIN
;
break
;
case
reduction
:
:
Type_t
::
max
:
mlsl_reduce_type
=
MLSL
::
RT_MAX
;
break
;
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
MLSL
::
Environment
&
env
=
MLSL
::
Environment
::
GetEnv
();
MLSL
::
Distribution
*
distribution
=
env
.
CreateDistribution
(
env
.
GetProcessCount
(),
1
);
MLSL
::
CommReq
*
req
=
distribution
->
AllReduce
(
in
,
out
,
count
,
data_type
,
MLSL
::
RT_SUM
,
MLSL
::
GT_DATA
);
MLSL
::
CommReq
*
req
=
distribution
->
AllReduce
(
in
,
out
,
count
,
data_type
,
mlsl_reduce_type
,
MLSL
::
GT_DATA
);
env
.
Wait
(
req
);
env
.
DeleteDistribution
(
distribution
);
}
...
...
src/ngraph/distributed/null.hpp
View file @
e5757725
...
...
@@ -35,8 +35,11 @@ namespace ngraph
{
std
::
printf
(
"%s: %s
\n
"
,
timestamp
.
c_str
(),
buf
.
data
());
}
void
all_reduce
(
void
*
in
,
void
*
out
,
element
::
Type_t
element_type
,
size_t
count
)
override
void
all_reduce
(
void
*
in
,
void
*
out
,
element
::
Type_t
element_type
,
reduction
::
Type
reduce_type
,
size_t
count
)
override
{
throw
ngraph_error
(
"Distributed Library not supported/mentioned"
);
}
...
...
src/ngraph/distributed/open_mpi.hpp
View file @
e5757725
...
...
@@ -77,8 +77,11 @@ namespace ngraph
"%s [OpenMPI RANK: %d]: %s
\n
"
,
timestamp
.
c_str
(),
get_rank
(),
buf
.
data
());
}
void
all_reduce
(
void
*
in
,
void
*
out
,
element
::
Type_t
element_type
,
size_t
count
)
override
void
all_reduce
(
void
*
in
,
void
*
out
,
element
::
Type_t
element_type
,
reduction
::
Type
reduce_type
,
size_t
count
)
override
{
auto
data_type
=
MPI_FLOAT
;
...
...
@@ -95,7 +98,24 @@ namespace ngraph
throw
std
::
runtime_error
(
"AllReduce op supports only f32 and f64 types"
);
}
MPI_Allreduce
(
in
,
out
,
count
,
data_type
,
MPI_SUM
,
MPI_COMM_WORLD
);
decltype
(
MPI_SUM
)
mpi_reduce_type
;
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch
(
reduce_type
.
get_type
())
{
case
reduction
:
:
Type_t
::
sum
:
mpi_reduce_type
=
MPI_SUM
;
break
;
case
reduction
:
:
Type_t
::
prod
:
mpi_reduce_type
=
MPI_PROD
;
break
;
case
reduction
:
:
Type_t
::
min
:
mpi_reduce_type
=
MPI_MIN
;
break
;
case
reduction
:
:
Type_t
::
max
:
mpi_reduce_type
=
MPI_MAX
;
break
;
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
MPI_Allreduce
(
in
,
out
,
count
,
data_type
,
mpi_reduce_type
,
MPI_COMM_WORLD
);
}
void
broadcast
(
void
*
in
,
...
...
src/ngraph/op/allreduce.cpp
View file @
e5757725
...
...
@@ -22,11 +22,13 @@ using namespace ngraph;
const
string
op
::
AllReduce
::
type_name
{
"AllReduce"
};
op
::
AllReduce
::
AllReduce
()
:
m_reduce_type
(
reduction
::
sum
)
{
}
op
::
AllReduce
::
AllReduce
(
const
shared_ptr
<
Node
>&
arg
)
op
::
AllReduce
::
AllReduce
(
const
shared_ptr
<
Node
>&
arg
,
const
reduction
::
Type
reduce_type
)
:
Op
(
check_single_output_args
({
arg
}))
,
m_reduce_type
(
reduce_type
)
{
constructor_validate_and_infer_types
();
}
...
...
@@ -47,5 +49,10 @@ void op::AllReduce::validate_and_infer_types()
shared_ptr
<
Node
>
op
::
AllReduce
::
copy_with_new_args
(
const
NodeVector
&
new_args
)
const
{
check_new_args_count
(
this
,
new_args
);
return
make_shared
<
AllReduce
>
(
new_args
.
at
(
0
));
return
make_shared
<
AllReduce
>
(
new_args
.
at
(
0
),
get_reduce_type
());
}
reduction
::
Type
op
::
AllReduce
::
get_reduce_type
()
const
{
return
m_reduce_type
;
}
src/ngraph/op/allreduce.hpp
View file @
e5757725
...
...
@@ -30,11 +30,16 @@ namespace ngraph
static
const
std
::
string
type_name
;
const
std
::
string
&
description
()
const
override
{
return
type_name
;
}
AllReduce
();
AllReduce
(
const
std
::
shared_ptr
<
Node
>&
arg
);
AllReduce
(
const
std
::
shared_ptr
<
Node
>&
arg
,
const
reduction
::
Type
reduce_type
=
reduction
::
sum
);
void
validate_and_infer_types
()
override
;
std
::
shared_ptr
<
Node
>
copy_with_new_args
(
const
NodeVector
&
new_args
)
const
override
;
reduction
::
Type
get_reduce_type
()
const
;
private
:
const
reduction
::
Type
m_reduce_type
;
};
}
}
src/ngraph/op/fused/group_conv.cpp
View file @
e5757725
...
...
@@ -27,8 +27,14 @@
using
namespace
std
;
using
namespace
ngraph
;
op
::
GroupConvolution
::
GroupConvolution
(
const
shared_ptr
<
Node
>&
data_batch
,
const
shared_ptr
<
Node
>&
filters
,
const
string
op
::
GroupConvolution
::
type_name
{
"GroupConvolution"
};
op
::
GroupConvolution
::
GroupConvolution
()
{
}
op
::
GroupConvolution
::
GroupConvolution
(
const
Output
<
Node
>&
data_batch
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
...
...
@@ -36,7 +42,7 @@ op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch,
const
Strides
&
data_dilation_strides
,
const
size_t
groups
,
const
PadType
&
pad_type
)
:
FusedOp
(
"GroupConvolution"
,
check_single_output_args
({
data_batch
,
filters
})
)
:
FusedOp
(
{
data_batch
,
filters
}
)
,
m_window_movement_strides
(
window_movement_strides
)
,
m_window_dilation_strides
(
window_dilation_strides
)
,
m_padding_below
(
padding_below
)
...
...
@@ -45,7 +51,6 @@ op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch,
,
m_groups
(
groups
)
,
m_pad_type
(
pad_type
)
{
// TODO: Move this out of constructor to validate_and_infer_types()
constructor_validate_and_infer_types
();
}
...
...
@@ -129,35 +134,35 @@ shared_ptr<Node> op::GroupConvolution::copy_with_new_args(const NodeVector& new_
NodeVector
op
::
GroupConvolution
::
decompose_op
()
const
{
auto
data
=
get_argumen
t
(
0
);
auto
filters
=
get_argumen
t
(
1
);
auto
data
=
inpu
t
(
0
);
auto
filters
=
inpu
t
(
1
);
// Split one convolution op to N ops where N is the number of groups
// and concat results after computation.
// reference: https://github.com/NervanaSystems/ngraph-mxnet/blob/fdd692/src/ngraph/ngraph_emitter.cc#L822-L856
std
::
size_t
n_data_channels
{
data
->
get_shape
().
at
(
1
)};
std
::
size_t
n_filters_channels
{
filters
->
get_shape
().
at
(
0
)};
std
::
size_t
n_data_channels
{
data
.
get_shape
().
at
(
1
)};
std
::
size_t
n_filters_channels
{
filters
.
get_shape
().
at
(
0
)};
std
::
size_t
data_group_size
{
n_data_channels
/
m_groups
};
std
::
size_t
filters_group_size
{
n_filters_channels
/
m_groups
};
NodeVector
convolution_nodes
;
// initial bounds for splice
std
::
vector
<
std
::
size_t
>
data_lower_bounds
(
data
->
get_shape
().
size
());
std
::
vector
<
std
::
size_t
>
data_upper_bounds
{
data
->
get_shape
()};
std
::
vector
<
std
::
size_t
>
filters_lower_bounds
(
filters
->
get_shape
().
size
());
std
::
vector
<
std
::
size_t
>
filters_upper_bounds
{
filters
->
get_shape
()};
std
::
vector
<
std
::
size_t
>
data_lower_bounds
(
data
.
get_shape
().
size
());
std
::
vector
<
std
::
size_t
>
data_upper_bounds
{
data
.
get_shape
()};
std
::
vector
<
std
::
size_t
>
filters_lower_bounds
(
filters
.
get_shape
().
size
());
std
::
vector
<
std
::
size_t
>
filters_upper_bounds
{
filters
.
get_shape
()};
for
(
std
::
size_t
group
{
0
};
group
<
m_groups
;
++
group
)
{
// slice data
data_lower_bounds
[
1
]
=
group
*
data_group_size
;
data_upper_bounds
[
1
]
=
(
group
+
1
)
*
data_group_size
;
auto
sliced_data
=
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
data
,
data_lower_bounds
,
data_upper_bounds
);
auto
sliced_data
=
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
data
.
get_source_output
()
,
data_lower_bounds
,
data_upper_bounds
);
// slice filters
filters_lower_bounds
[
0
]
=
group
*
filters_group_size
;
filters_upper_bounds
[
0
]
=
(
group
+
1
)
*
filters_group_size
;
auto
sliced_filters
=
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
filters
,
filters_lower_bounds
,
filters_upper_bounds
);
filters
.
get_source_output
()
,
filters_lower_bounds
,
filters_upper_bounds
);
convolution_nodes
.
push_back
(
std
::
make_shared
<
ngraph
::
op
::
Convolution
>
(
sliced_data
,
...
...
src/ngraph/op/fused/group_conv.hpp
View file @
e5757725
...
...
@@ -29,8 +29,12 @@ namespace ngraph
class
GroupConvolution
:
public
ngraph
::
op
::
util
::
FusedOp
{
public
:
GroupConvolution
(
const
std
::
shared_ptr
<
Node
>&
data_batch
,
const
std
::
shared_ptr
<
Node
>&
filters
,
NGRAPH_API
static
const
std
::
string
type_name
;
const
std
::
string
&
description
()
const
override
{
return
type_name
;
}
GroupConvolution
();
GroupConvolution
(
const
Output
<
Node
>&
data_batch
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
...
...
src/ngraph/op/slice.cpp
View file @
e5757725
...
...
@@ -19,11 +19,17 @@
using
namespace
std
;
using
namespace
ngraph
;
op
::
Slice
::
Slice
(
const
shared_ptr
<
Node
>&
arg
,
const
string
op
::
Slice
::
type_name
{
"Slice"
};
op
::
Slice
::
Slice
()
{
}
op
::
Slice
::
Slice
(
const
Output
<
Node
>&
arg
,
const
Coordinate
&
lower_bounds
,
const
Coordinate
&
upper_bounds
,
const
Strides
&
strides
)
:
Op
(
"Slice"
,
check_single_output_args
({
arg
})
)
:
Op
(
{
arg
}
)
,
m_lower_bounds
(
lower_bounds
)
,
m_upper_bounds
(
upper_bounds
)
,
m_strides
(
strides
)
...
...
@@ -31,10 +37,10 @@ op::Slice::Slice(const shared_ptr<Node>& arg,
constructor_validate_and_infer_types
();
}
op
::
Slice
::
Slice
(
const
shared_ptr
<
Node
>&
arg
,
op
::
Slice
::
Slice
(
const
Output
<
Node
>&
arg
,
const
Coordinate
&
lower_bounds
,
const
Coordinate
&
upper_bounds
)
:
Op
(
"Slice"
,
check_single_output_args
({
arg
})
)
:
Op
(
{
arg
}
)
,
m_lower_bounds
(
lower_bounds
)
,
m_upper_bounds
(
upper_bounds
)
,
m_strides
(
Strides
())
...
...
src/ngraph/op/slice.hpp
View file @
e5757725
...
...
@@ -28,6 +28,11 @@ namespace ngraph
class
Slice
:
public
Op
{
public
:
NGRAPH_API
static
const
std
::
string
type_name
;
const
std
::
string
&
description
()
const
override
{
return
type_name
;
}
/// \brief Constructs a tensor slice operation
Slice
();
/// \brief Constructs a tensor slice operation.
///
/// \param arg The tensor to be sliced.
...
...
@@ -35,17 +40,16 @@ namespace ngraph
/// \param upper_bounds The axiswise upper bounds of the slice (exclusive).
/// \param strides The slicing strides; for example, strides of `{n,m}` means to take
/// every nth row and every mth column of the input matrix.
Slice
(
const
std
::
shared_ptr
<
Node
>&
arg
,
Slice
(
const
Output
<
Node
>&
arg
,
const
Coordinate
&
lower_bounds
,
const
Coordinate
&
upper_bounds
,
const
Strides
&
strides
);
/// \brief Constructs a tensor slice operation with unit strides; i.e., every element inside the bounding box will be copied to the output slice.
///
/// \param arg The tensor to be sliced.
/// \param lower_bounds The axiswise lower bounds of the slice (inclusive).
/// \param upper_bounds The axiswise upper bounds of the slice (exclusive).
Slice
(
const
std
::
shared_ptr
<
Node
>&
arg
,
Slice
(
const
Output
<
Node
>&
arg
,
const
Coordinate
&
lower_bounds
,
const
Coordinate
&
upper_bounds
);
...
...
src/ngraph/op/util/fused_op.cpp
View file @
e5757725
...
...
@@ -30,6 +30,11 @@ op::util::FusedOp::FusedOp(const NodeVector& args)
{
}
op
::
util
::
FusedOp
::
FusedOp
(
const
OutputVector
&
args
)
:
Op
(
args
)
{
}
op
::
util
::
FusedOp
::
FusedOp
(
const
std
::
string
&
node_type
,
const
NodeVector
&
args
)
:
Op
(
node_type
,
args
)
{
...
...
src/ngraph/op/util/fused_op.hpp
View file @
e5757725
...
...
@@ -51,6 +51,8 @@ namespace ngraph
/// \param args Nodes that produce the input tensors for the fused op
FusedOp
(
const
NodeVector
&
args
);
FusedOp
(
const
OutputVector
&
args
);
/// \brief Constructs a FusedOp
///
/// \param args Nodes that produce the input tensors for the fused op
...
...
src/ngraph/runtime/cpu/builder/allreduce.cpp
View file @
e5757725
...
...
@@ -37,6 +37,9 @@ namespace ngraph
auto
out_buffer_index
=
external_function
->
get_buffer_index
(
out
[
0
].
get_name
());
auto
count
=
static_cast
<
int
>
(
out
[
0
].
get_size
());
auto
data_type
=
args
[
0
].
get_element_type
().
get_type_enum
();
const
ngraph
::
op
::
AllReduce
*
allreduce
=
static_cast
<
const
ngraph
::
op
::
AllReduce
*>
(
node
);
auto
reduce_type
=
allreduce
->
get_reduce_type
();
auto
external_function_name
=
external_function
->
get_function_name
();
NGRAPH_DEBUG_PRINT
(
...
...
@@ -48,13 +51,15 @@ namespace ngraph
node
->
get_friendly_name
().
c_str
(),
count
);
auto
functor
=
[
&
,
count
,
data_type
,
arg_buffer_index
,
out_buffer_index
](
CPURuntimeContext
*
ctx
,
CPUExecutionContext
*
ectx
)
{
get_distributed_interface
()
->
all_reduce
(
ctx
->
buffer_data
[
arg_buffer_index
],
ctx
->
buffer_data
[
out_buffer_index
],
data_type
,
count
);
};
auto
functor
=
[
&
,
count
,
reduce_type
,
data_type
,
arg_buffer_index
,
out_buffer_index
](
CPURuntimeContext
*
ctx
,
CPUExecutionContext
*
ectx
)
{
get_distributed_interface
()
->
all_reduce
(
ctx
->
buffer_data
[
arg_buffer_index
],
ctx
->
buffer_data
[
out_buffer_index
],
data_type
,
reduce_type
,
count
);
};
functors
.
emplace_back
(
functor
);
}
...
...
src/ngraph/runtime/cpu/builder/gather.cpp
View file @
e5757725
...
...
@@ -53,9 +53,11 @@ namespace ngraph
if
(
is_int64
)
{
if
(
args
[
0
].
get_element_type
()
==
element
::
f32
||
args
[
0
].
get_element_type
()
==
element
::
f64
||
args
[
0
].
get_element_type
()
==
element
::
u8
)
if
((
args
[
0
].
get_element_type
()
==
element
::
f32
||
args
[
0
].
get_element_type
()
==
element
::
f64
||
args
[
0
].
get_element_type
()
==
element
::
u8
||
args
[
0
].
get_element_type
()
==
element
::
i8
)
&&
params_shape
.
size
()
<=
3
&&
out_shape
.
size
()
<=
3
)
{
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
gather_i64
<
float
,
2
,
2
>
)
>
kernel
;
...
...
@@ -111,9 +113,11 @@ namespace ngraph
else
{
if
(
args
[
0
].
get_element_type
()
==
element
::
f32
||
args
[
0
].
get_element_type
()
==
element
::
f64
||
args
[
0
].
get_element_type
()
==
element
::
u8
)
if
((
args
[
0
].
get_element_type
()
==
element
::
f32
||
args
[
0
].
get_element_type
()
==
element
::
f64
||
args
[
0
].
get_element_type
()
==
element
::
u8
||
args
[
0
].
get_element_type
()
==
element
::
i8
)
&&
params_shape
.
size
()
<=
3
&&
out_shape
.
size
()
<=
3
)
{
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
gather_i32
<
float
,
2
,
2
>
)
>
kernel
;
...
...
src/ngraph/runtime/cpu/builder/scatter_add.cpp
View file @
e5757725
...
...
@@ -46,7 +46,9 @@ namespace ngraph
}
if
(
args
[
0
].
get_element_type
()
!=
element
::
f64
&&
args
[
0
].
get_element_type
()
!=
element
::
f32
)
args
[
0
].
get_element_type
()
!=
element
::
f32
&&
args
[
0
].
get_element_type
()
!=
element
::
u8
&&
args
[
0
].
get_element_type
()
!=
element
::
i8
)
{
throw
ngraph_error
(
"Unsupported type in CPU Builder for ScatterAdd"
);
}
...
...
@@ -60,67 +62,81 @@ namespace ngraph
if
(
is_int64
)
{
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
scatter_add_i64
<
float
,
2
,
2
>
)
>
kernel
;
if
(
inputs_shape
.
size
()
<=
3
&&
updates_shape
.
size
()
<=
3
)
{
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
scatter_add_i64
<
float
,
2
,
2
>
)
>
kernel
;
SELECT_KERNEL_BY_2RANKS
(
kernel
,
args
[
0
].
get_element_type
(),
inputs_shape
.
size
(),
updates_shape
.
size
(),
runtime
::
cpu
::
kernel
::
scatter_add_i64
);
SELECT_KERNEL_BY_2RANKS
(
kernel
,
args
[
0
].
get_element_type
(),
inputs_shape
.
size
(),
updates_shape
.
size
(),
runtime
::
cpu
::
kernel
::
scatter_add_i64
);
auto
functor
=
[
&
,
kernel
,
inputs_shape
,
indices_shape
,
updates_shape
,
inputs_buffer_index
,
indices_buffer_index
,
updates_buffer_index
,
out_buffer_index
](
CPURuntimeContext
*
ctx
,
CPUExecutionContext
*
ectx
)
{
kernel
(
ctx
->
buffer_data
[
inputs_buffer_index
],
ctx
->
buffer_data
[
indices_buffer_index
],
ctx
->
buffer_data
[
updates_buffer_index
],
ctx
->
buffer_data
[
out_buffer_index
],
inputs_shape
,
indices_shape
,
updates_shape
,
ectx
->
arena
);
};
functors
.
emplace_back
(
functor
);
auto
functor
=
[
&
,
kernel
,
inputs_shape
,
indices_shape
,
updates_shape
,
inputs_buffer_index
,
indices_buffer_index
,
updates_buffer_index
,
out_buffer_index
](
CPURuntimeContext
*
ctx
,
CPUExecutionContext
*
ectx
)
{
kernel
(
ctx
->
buffer_data
[
inputs_buffer_index
],
ctx
->
buffer_data
[
indices_buffer_index
],
ctx
->
buffer_data
[
updates_buffer_index
],
ctx
->
buffer_data
[
out_buffer_index
],
inputs_shape
,
indices_shape
,
updates_shape
,
ectx
->
arena
);
};
functors
.
emplace_back
(
functor
);
}
else
{
throw
ngraph_error
(
"Unsupported ranks in CPU Builder for ScatterAdd"
);
}
}
else
{
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
scatter_add_i32
<
float
,
2
,
2
>
)
>
kernel
;
if
(
inputs_shape
.
size
()
<=
3
&&
updates_shape
.
size
()
<=
3
)
{
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
scatter_add_i32
<
float
,
2
,
2
>
)
>
kernel
;
SELECT_KERNEL_BY_2RANKS
(
kernel
,
args
[
0
].
get_element_type
(),
inputs_shape
.
size
(),
updates_shape
.
size
(),
runtime
::
cpu
::
kernel
::
scatter_add_i32
);
SELECT_KERNEL_BY_2RANKS
(
kernel
,
args
[
0
].
get_element_type
(),
inputs_shape
.
size
(),
updates_shape
.
size
(),
runtime
::
cpu
::
kernel
::
scatter_add_i32
);
auto
functor
=
[
&
,
kernel
,
inputs_shape
,
indices_shape
,
updates_shape
,
inputs_buffer_index
,
indices_buffer_index
,
updates_buffer_index
,
out_buffer_index
](
CPURuntimeContext
*
ctx
,
CPUExecutionContext
*
ectx
)
{
kernel
(
ctx
->
buffer_data
[
inputs_buffer_index
],
ctx
->
buffer_data
[
indices_buffer_index
],
ctx
->
buffer_data
[
updates_buffer_index
],
ctx
->
buffer_data
[
out_buffer_index
],
inputs_shape
,
indices_shape
,
updates_shape
,
ectx
->
arena
);
};
functors
.
emplace_back
(
functor
);
auto
functor
=
[
&
,
kernel
,
inputs_shape
,
indices_shape
,
updates_shape
,
inputs_buffer_index
,
indices_buffer_index
,
updates_buffer_index
,
out_buffer_index
](
CPURuntimeContext
*
ctx
,
CPUExecutionContext
*
ectx
)
{
kernel
(
ctx
->
buffer_data
[
inputs_buffer_index
],
ctx
->
buffer_data
[
indices_buffer_index
],
ctx
->
buffer_data
[
updates_buffer_index
],
ctx
->
buffer_data
[
out_buffer_index
],
inputs_shape
,
indices_shape
,
updates_shape
,
ectx
->
arena
);
};
functors
.
emplace_back
(
functor
);
}
else
{
throw
ngraph_error
(
"Unsupported ranks in CPU Builder for ScatterAdd"
);
}
}
}
REGISTER_OP_BUILDER
(
ScatterAdd
);
...
...
src/ngraph/runtime/cpu/cpu_builder.hpp
View file @
e5757725
...
...
@@ -211,14 +211,6 @@
KV = K<ET, 2, R2>; \
else if (R1 == 3) \
KV = K<ET, 3, R2>; \
else if (R1 == 4) \
KV = K<ET, 4, R2>; \
else if (R1 == 5) \
KV = K<ET, 5, R2>; \
else if (R1 == 6) \
KV = K<ET, 6, R2>; \
else if (R1 == 7) \
KV = K<ET, 7, R2>; \
else \
throw ngraph_error("Unsupported first rank " + std::to_string(R1) + " for kernel " #K);
...
...
@@ -235,22 +227,6 @@
{ \
SELECT_RANK1(KV, ET, R1, 3, K); \
} \
else if (R2 == 4) \
{ \
SELECT_RANK1(KV, ET, R1, 4, K); \
} \
else if (R2 == 5) \
{ \
SELECT_RANK1(KV, ET, R1, 5, K); \
} \
else if (R2 == 6) \
{ \
SELECT_RANK1(KV, ET, R1, 6, K); \
} \
else if (R2 == 7) \
{ \
SELECT_RANK1(KV, ET, R1, 7, K); \
} \
else \
{ \
throw ngraph_error("Unsupported second rank " + std::to_string(R2) + " for kernel " #K); \
...
...
@@ -270,6 +246,10 @@
{ \
SELECT_2RANKS(KV, uint8_t, R1, R2, K); \
} \
else if (ET == element::i8) \
{ \
SELECT_2RANKS(KV, int8_t, R1, R2, K); \
} \
else \
{ \
throw ngraph_error("Unsupported element type " + ET.c_type_string() + " for kernel " #K); \
...
...
src/ngraph/runtime/cpu/cpu_emitter.cpp
View file @
e5757725
...
...
@@ -271,10 +271,13 @@ namespace ngraph
template
<>
void
CPU_Emitter
::
EMITTER_DECL
(
ngraph
::
op
::
AllReduce
)
{
const
ngraph
::
op
::
AllReduce
*
allreduce
=
static_cast
<
const
ngraph
::
op
::
AllReduce
*>
(
node
);
writer
<<
"ngraph::get_distributed_interface()->all_reduce("
<<
args
[
0
].
get_name
()
<<
", "
<<
out
[
0
].
get_name
()
<<
", "
<<
"ngraph::element::Type_t::"
<<
args
[
0
].
get_element_type
().
get_type_name
()
<<
", "
<<
out
[
0
].
get_size
()
<<
");
\n
"
;
<<
", "
<<
out
[
0
].
get_size
()
<<
", "
<<
"ngraph::Reduce_t::"
<<
allreduce
->
get_reduce_type
()
<<
");
\n
"
;
}
template
<>
...
...
@@ -1842,8 +1845,9 @@ namespace ngraph
writer
.
block_begin
();
if
((
args
[
0
].
get_element_type
()
==
element
::
f64
||
args
[
0
].
get_element_type
()
==
element
::
f32
||
args
[
0
].
get_element_type
()
==
element
::
u8
)
&&
gather
->
get_axis
()
==
0
)
args
[
0
].
get_element_type
()
==
element
::
u8
||
args
[
0
].
get_element_type
()
==
element
::
i8
)
&&
args
[
0
].
get_shape
().
size
()
<=
3
&&
out
[
0
].
get_shape
().
size
()
<=
3
)
{
writer
<<
"cpu::kernel::gather<"
<<
args
[
0
].
get_type
()
<<
", "
<<
args
[
1
].
get_element_type
().
c_type_string
()
<<
", "
...
...
@@ -1903,8 +1907,11 @@ namespace ngraph
}
writer
.
block_begin
();
if
(
args
[
0
].
get_element_type
()
==
element
::
f64
||
args
[
0
].
get_element_type
()
==
element
::
f32
)
if
((
args
[
0
].
get_element_type
()
==
element
::
f64
||
args
[
0
].
get_element_type
()
==
element
::
f32
||
args
[
0
].
get_element_type
()
==
element
::
u8
||
args
[
0
].
get_element_type
()
==
element
::
i8
)
&&
args
[
0
].
get_shape
().
size
()
<=
3
&&
args
[
2
].
get_shape
().
size
()
<=
3
)
{
writer
<<
"cpu::kernel::scatter_add<"
<<
args
[
0
].
get_type
()
<<
", "
<<
args
[
1
].
get_element_type
().
c_type_string
()
<<
", "
...
...
src/ngraph/runtime/cpu/kernel/gather.hpp
View file @
e5757725
...
...
@@ -31,7 +31,7 @@ namespace ngraph
{
namespace
kernel
{
// Calculate the indices f
rom position
0 to rank-1.
// Calculate the indices f
or positions
0 to rank-1.
static
void
get_indices
(
const
Shape
&
shape
,
int
index
,
std
::
vector
<
int
>&
indices
,
int
rank
)
{
...
...
@@ -93,8 +93,11 @@ namespace ngraph
if
(
indices_rank
==
0
)
{
//TODO Enable this if compiler issue with CODEGEN is fixed or DEX needs it.
#if 0
#ifdef _OPENMP
#pragma omp parallel for
#endif
#endif
for
(
int
i
=
0
;
i
<
outer_loop_num
;
i
++
)
{
...
...
@@ -142,7 +145,11 @@ namespace ngraph
}
else
{
auto
num_indices
=
shape_size
(
indices_shape
);
size_t
num_indices
=
1
;
for
(
auto
d
:
indices_shape
)
{
num_indices
*=
d
;
}
#ifdef _OPENMP
#pragma omp parallel for
...
...
src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
View file @
e5757725
...
...
@@ -2238,6 +2238,32 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Add
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
auto
dq_l_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Dequantize
>
(
pattern_map
[
dq_l_label
]);
auto
dq_r_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Dequantize
>
(
pattern_map
[
dq_r_label
]);
// both left and right are QuantizedConvolutionBias
if
(
dq_r_m
->
get_argument
(
0
)
->
description
()
==
"QuantizedConvolutionBias"
)
{
for
(
auto
user
:
m
.
get_match_root
()
->
get_users
())
{
auto
q_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Quantize
>
(
user
);
if
(
q_m
)
{
auto
q_m_scale
=
q_m
->
get_argument
(
1
);
auto
dq_l_m_scale
=
dq_l_m
->
get_argument
(
1
);
auto
dq_r_m_scale
=
dq_r_m
->
get_argument
(
1
);
if
(
!
ngraph
::
compare_constants
(
q_m_scale
,
dq_l_m_scale
)
&&
ngraph
::
compare_constants
(
q_m_scale
,
dq_r_m_scale
))
{
NGRAPH_DEBUG
<<
"Scales of Q and DQ of right branch match"
;
// switch left and right branch
auto
temp
=
dq_l_m
;
dq_l_m
=
dq_r_m
;
dq_r_m
=
temp
;
}
break
;
}
}
}
auto
qconv
=
std
::
static_pointer_cast
<
ngraph
::
op
::
QuantizedConvolutionBias
>
(
dq_l_m
->
get_argument
(
0
));
auto
inplace_input
=
dq_r_m
->
get_argument
(
0
);
...
...
src/ngraph/runtime/interpreter/int_executable.hpp
View file @
e5757725
...
...
@@ -24,6 +24,7 @@
#include <vector>
#include "ngraph/op/all.hpp"
#include "ngraph/op/allreduce.hpp"
#include "ngraph/op/any.hpp"
#include "ngraph/op/argmax.hpp"
#include "ngraph/op/argmin.hpp"
...
...
@@ -255,9 +256,12 @@ private:
}
case
OP_TYPEID
:
:
AllReduce
:
{
const
ngraph
::
op
::
AllReduce
*
allreduce
=
static_cast
<
const
ngraph
::
op
::
AllReduce
*>
(
&
node
);
reference
::
allreduce
<
T
>
(
args
[
0
]
->
get_data_ptr
<
T
>
(),
out
[
0
]
->
get_data_ptr
<
T
>
(),
node
.
get_input_element_type
(
0
).
get_type_enum
(),
allreduce
->
get_reduce_type
(),
static_cast
<
int
>
(
shape_size
(
node
.
get_input_shape
(
0
))));
break
;
}
...
...
src/ngraph/runtime/plaidml/plaidml_impl.hpp
View file @
e5757725
...
...
@@ -76,11 +76,11 @@ namespace ngraph
// input count.
void
check_inputs
(
std
::
size_t
expected_input_count
)
const
{
if
(
op
().
get_input_size
()
!=
expected_input_count
)
if
(
op
().
get_input_size
()
<
expected_input_count
)
{
std
::
ostringstream
os
;
os
<<
"The PlaidML nGraph backend only supports "
<<
op
().
description
()
<<
" operations with an input count
=
= "
<<
expected_input_count
<<
" operations with an input count
>
= "
<<
expected_input_count
<<
" (got "
<<
op
().
get_input_size
()
<<
" inputs)"
;
throw
std
::
runtime_error
{
os
.
str
()};
}
...
...
src/ngraph/runtime/plaidml/unit_test.manifest
View file @
e5757725
...
...
@@ -261,92 +261,25 @@ batch_mat_mul_forward
dot_matrix_2x0_0x2
# dgkutnic ww24.5: these tests are to be triaged by the PlaidML team
convolution_3d_1item_large_5o3i_padded_uneven_filter_uneven_data_dilation_data_dilated
select
product_trivial
product_trivial_5d
product_to_scalar
product_matrix_columns
product_matrix_rows
product_3d_to_matrix_most_sig
product_3d_to_matrix_least_sig
product_3d_to_vector
product_3d_to_scalar
product_2d_to_scalar_int32
product_to_scalar_int32
product_to_scalar_int8
max_trivial
max_trivial_5d
max_to_scalar
max_to_scalar_int8
max_matrix_columns
max_matrix_rows
max_matrix_rows_int32
max_3d_to_matrix_most_sig
max_3d_to_matrix_least_sig
max_3d_to_vector
max_3d_to_scalar
max_3d_to_scalar_int32
min_trivial
min_trivial_5d
min_trivial_5d_int32
min_to_scalar
min_to_scalar_int8
min_matrix_columns
min_matrix_rows
min_matrix_rows_int32
min_3d_to_matrix_most_sig
min_3d_to_matrix_least_sig
min_3d_to_vector
min_3d_to_scalar
min_3d_to_scalar_int32
sum_to_scalar
# ww25.2: re-scrubbed this list of tests after fixing check_inputs
# initial debug points to some of these failing due to precision issues
sqrt
batch_norm_inference_0eps_f32
batch_norm_inference_f32
batch_norm_training_0eps_f32
argmin_trivial
argmax_trivial
argmin_trivial_in_i32
sum_large_1d_to_scalar
sum_matrix_columns
sum_matrix_6d
sum_matrix_rows
sum_3d_to_matrix_most_sig
sum_3d_to_matrix_least_sig
sum_3d_to_vector
sum_3d_to_scalar
sum_3d_to_scalar_int32
sum_5d_to_scalar
sum_5d_to_scalar_int32
sum_2d_to_scalar_int8
sum_stable_acc
sum_stable_simple_float
one_hot_scalar_2_in_3
one_hot_scalar_1_in_3
one_hot_scalar_0_in_3
lstm_cell_no_bias_no_peepholes
lstm_cell_bias_peepholes
lstm_cell_bias_peepholes_clip_input_forget
lstm_cell_activaction_functions
group_conv_transpose
group_conv_transpose_output_shape
divide_python_rounding_int32
any_2x2_to_scalar_true
any_2x2_to_scalar_false
any_2x3_eliminate_col_dim
any_2x3_eliminate_row_dim
any_2x2x3_eliminate_dim_1
any_2x2x3_eliminate_dim_2
any_2x2x3_eliminate_dims_0_1
any_2x2x3_eliminate_dims_0_2
any_2x2x3_eliminate_dims_1_2
any_2x2x3_eliminate_dims_0_1_2
all_trivial
all_2x2_to_scalar_false
all_2x2_to_scalar_true
all_2x3_eliminate_col_dim
all_2x3_eliminate_row_dim
all_2x2x3_eliminate_dim_0
all_2x2x3_eliminate_dim_1
all_2x2x3_eliminate_dim_2
all_2x2x3_eliminate_dims_0_1
all_2x2x3_eliminate_dims_0_2
all_2x2x3_eliminate_dims_1_2
all_2x2x3_eliminate_dims_0_1_2
all_dynamic_axis
all_change_axis
backwards_broadcast0
backwards_broadcast1
backwards_select
backwards_select_nested
backwards_sum_v2s
backwards_sum_m2s
backwards_sum_m2v_0
backwards_sum_m2v_1
backwards_batchmatmul_tensor2_tensor2
src/ngraph/runtime/reference/allreduce.hpp
View file @
e5757725
...
...
@@ -25,9 +25,13 @@ namespace ngraph
namespace
reference
{
template
<
typename
T
>
void
allreduce
(
T
*
arg
,
T
*
out
,
const
element
::
Type_t
element_type
,
int
count
)
void
allreduce
(
T
*
arg
,
T
*
out
,
const
element
::
Type_t
element_type
,
const
reduction
::
Type
reduce_type
,
int
count
)
{
get_distributed_interface
()
->
all_reduce
(
arg
,
out
,
element_type
,
count
);
get_distributed_interface
()
->
all_reduce
(
arg
,
out
,
element_type
,
reduce_type
,
count
);
}
}
}
...
...
src/ngraph/serializer.cpp
View file @
e5757725
This source diff could not be displayed because it is too large. You can
view the blob
instead.
src/ngraph/serializer.hpp
View file @
e5757725
...
...
@@ -62,42 +62,3 @@ namespace ngraph
/// Option may be enabled by setting the environment variable NGRAPH_SERIALIZER_OUTPUT_SHAPES
void
set_serialize_output_shapes
(
bool
enable
);
}
#ifdef NGRAPH_JSON_DISABLE
// Rather than making every reference to the serializer conditionally compile here we just
// provide some null stubs to resolve link issues
// The `inline` is so we don't get multiple definitions of function
std
::
string
inline
ngraph
::
serialize
(
std
::
shared_ptr
<
ngraph
::
Function
>
func
,
size_t
indent
)
{
return
""
;
}
void
inline
ngraph
::
serialize
(
const
std
::
string
&
path
,
std
::
shared_ptr
<
ngraph
::
Function
>
func
,
size_t
indent
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
void
inline
ngraph
::
serialize
(
std
::
ostream
&
out
,
std
::
shared_ptr
<
ngraph
::
Function
>
func
,
size_t
indent
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
std
::
shared_ptr
<
ngraph
::
Function
>
inline
ngraph
::
deserialize
(
std
::
istream
&
in
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
std
::
shared_ptr
<
ngraph
::
Function
>
inline
ngraph
::
deserialize
(
const
std
::
string
&
str
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
void
inline
ngraph
::
set_serialize_output_shapes
(
bool
enable
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
#endif
src/ngraph/serializer_stub.cpp
0 → 100644
View file @
e5757725
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/serializer.hpp"
std
::
string
ngraph
::
serialize
(
std
::
shared_ptr
<
ngraph
::
Function
>
func
,
size_t
indent
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
void
ngraph
::
serialize
(
const
std
::
string
&
path
,
std
::
shared_ptr
<
ngraph
::
Function
>
func
,
size_t
indent
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
void
ngraph
::
serialize
(
std
::
ostream
&
out
,
std
::
shared_ptr
<
ngraph
::
Function
>
func
,
size_t
indent
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
std
::
shared_ptr
<
ngraph
::
Function
>
ngraph
::
deserialize
(
std
::
istream
&
in
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
std
::
shared_ptr
<
ngraph
::
Function
>
ngraph
::
deserialize
(
const
std
::
string
&
str
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
void
ngraph
::
set_serialize_output_shapes
(
bool
enable
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
test/backend_scatter.in.cpp
View file @
e5757725
...
...
@@ -35,6 +35,7 @@ using namespace ngraph;
static
string
s_manifest
=
"${MANIFEST}"
;
#if 0
NGRAPH_TEST(${BACKEND_NAME}, scatter_add_4d_indices)
{
Shape ref_shape{3, 3, 3};
...
...
@@ -122,13 +123,14 @@ NGRAPH_TEST(${BACKEND_NAME}, scatter_add_3d_indices)
read_vector<float>(result),
MIN_FLOAT_TOLERANCE_BITS));
}
#endif
NGRAPH_TEST
(
$
{
BACKEND_NAME
},
scatter_add_2d_indices
)
{
Shape
ref_shape
{
2
,
3
,
3
};
Shape
ref_shape
{
3
};
Shape
indices_shape
{
2
,
2
};
Shape
updates_shape
{
2
,
2
,
3
,
3
};
Shape
out_shape
{
2
,
3
,
3
};
Shape
updates_shape
{
2
,
2
};
Shape
out_shape
{
3
};
auto
R
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
ref_shape
);
auto
I
=
make_shared
<
op
::
Parameter
>
(
element
::
i32
,
indices_shape
);
auto
U
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
updates_shape
);
...
...
@@ -140,20 +142,17 @@ NGRAPH_TEST(${BACKEND_NAME}, scatter_add_2d_indices)
// Create some tensors for input/output
auto
r
=
backend
->
create_tensor
(
element
::
f32
,
ref_shape
);
copy_data
(
r
,
vector
<
float
>
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
});
copy_data
(
r
,
vector
<
float
>
{
0
,
1
,
2
});
auto
i
=
backend
->
create_tensor
(
element
::
i32
,
indices_shape
);
copy_data
(
i
,
vector
<
int32_t
>
{
0
,
1
,
1
,
0
});
auto
u
=
backend
->
create_tensor
(
element
::
f32
,
updates_shape
);
copy_data
(
u
,
vector
<
float
>
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
});
copy_data
(
u
,
vector
<
float
>
{
1
,
2
,
3
,
4
});
auto
result
=
backend
->
create_tensor
(
element
::
f32
,
out_shape
);
auto
c
=
backend
->
compile
(
f
);
c
->
call_with_validate
({
result
},
{
r
,
i
,
u
});
EXPECT_TRUE
(
test
::
all_close_f
(
(
vector
<
float
>
{
0
,
3
,
6
,
9
,
12
,
15
,
18
,
21
,
24
,
3
,
6
,
9
,
12
,
15
,
18
,
21
,
24
,
27
}),
read_vector
<
float
>
(
result
),
MIN_FLOAT_TOLERANCE_BITS
));
(
vector
<
float
>
{
5
,
6
,
2
}),
read_vector
<
float
>
(
result
),
MIN_FLOAT_TOLERANCE_BITS
));
}
NGRAPH_TEST
(
$
{
BACKEND_NAME
},
scatter_add_1d_indices
)
...
...
test/build_graph.cpp
View file @
e5757725
...
...
@@ -150,3 +150,19 @@ TEST(build_graph, no_arg_construction)
validate_nodes_and_infer_types
(
ops
);
ASSERT_EQ
(
add1
->
get_output_shape
(
0
),
Shape
{
7
});
}
TEST
(
build_graph
,
multi_output_split
)
{
const
auto
data
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
64
,
8
,
100
,
150
});
auto
filters
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
128
,
2
,
10
,
20
});
const
auto
split
=
make_shared
<
op
::
Split
>
(
data
,
1
,
2
);
auto
conv
=
make_shared
<
op
::
GroupConvolution
>
(
split
->
output
(
1
),
filters
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
CoordinateDiff
{
0
,
0
},
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
},
2
);
EXPECT_EQ
(
conv
->
get_shape
(),
(
Shape
{
64
,
128
,
91
,
131
}));
}
test/cpu_fusion.cpp
View file @
e5757725
...
...
@@ -3691,6 +3691,120 @@ TEST(cpu_quant_fusion, qconvba)
EXPECT_TRUE
(
test
::
all_close
(
cpu1_results
.
at
(
0
),
cpu2_results
.
at
(
0
)));
}
TEST
(
cpu_quant_fusion
,
qconvba_q
)
{
auto
make_function
=
[]()
{
Shape
shape_input
{
1
,
2
,
2
,
2
};
Shape
shape_weights
{
1
,
2
,
1
,
1
};
Shape
shape_summand
{
1
,
1
,
2
,
2
};
auto
input_l
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_input
);
auto
weights_l
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_weights
);
auto
bias_l
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
shape_weights
[
0
]});
auto
input_r
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_input
);
auto
weights_r
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_weights
);
auto
bias_r
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
shape_weights
[
0
]});
auto
input_scale_l
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
2.0
f
});
auto
weights_scale_l
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
2.0
f
});
auto
output_scale_l
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
4.0
f
});
auto
input_scale_r
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
5.0
f
});
auto
weights_scale_r
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
5.0
f
});
auto
output_scale_r
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
20.0
f
});
auto
int8_zero
=
op
::
Constant
::
create
(
element
::
i8
,
Shape
{},
{
0
});
auto
int32_zero
=
op
::
Constant
::
create
(
element
::
i32
,
Shape
{},
{
0
});
auto
uint8_zero
=
op
::
Constant
::
create
(
element
::
u8
,
Shape
{},
{
0
});
op
::
Quantize
::
RoundMode
round_mode
=
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
;
auto
q_input_l
=
std
::
make_shared
<
op
::
Quantize
>
(
input_l
,
input_scale_l
,
uint8_zero
,
element
::
u8
,
AxisSet
{},
round_mode
);
auto
q_weights_l
=
std
::
make_shared
<
op
::
Quantize
>
(
weights_l
,
weights_scale_l
,
int8_zero
,
element
::
i8
,
AxisSet
{},
round_mode
);
auto
q_bias_l
=
std
::
make_shared
<
op
::
Quantize
>
(
bias_l
,
input_scale_l
*
weights_scale_l
,
int32_zero
,
element
::
i32
,
AxisSet
{},
round_mode
);
auto
q_input_r
=
std
::
make_shared
<
op
::
Quantize
>
(
input_r
,
input_scale_r
,
uint8_zero
,
element
::
u8
,
AxisSet
{},
round_mode
);
auto
q_weights_r
=
std
::
make_shared
<
op
::
Quantize
>
(
weights_r
,
weights_scale_r
,
int8_zero
,
element
::
i8
,
AxisSet
{},
round_mode
);
auto
q_bias_r
=
std
::
make_shared
<
op
::
Quantize
>
(
bias_r
,
input_scale_r
*
weights_scale_r
,
int32_zero
,
element
::
i32
,
AxisSet
{},
round_mode
);
// Left Graph
auto
requant_scale_l
=
(
input_scale_l
*
weights_scale_l
)
/
output_scale_l
;
auto
conv_l
=
std
::
make_shared
<
op
::
QuantizedConvolutionBias
>
(
q_input_l
,
q_weights_l
,
q_bias_l
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
CoordinateDiff
{
0
,
0
},
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
},
requant_scale_l
);
auto
dq_l
=
std
::
make_shared
<
op
::
Dequantize
>
(
conv_l
,
output_scale_l
,
int8_zero
,
element
::
f32
,
AxisSet
{});
auto
r_l
=
std
::
make_shared
<
op
::
Reshape
>
(
dq_l
,
AxisVector
{
0
,
1
,
2
,
3
},
Shape
{
1
,
2
,
2
});
auto
b_l
=
std
::
make_shared
<
op
::
Broadcast
>
(
r_l
,
Shape
{
1
,
1
,
2
,
2
},
AxisSet
{
0
});
// Right Graph
auto
requant_scale_r
=
(
input_scale_r
*
weights_scale_r
)
/
output_scale_r
;
auto
conv_r
=
std
::
make_shared
<
op
::
QuantizedConvolutionBias
>
(
q_input_r
,
q_weights_r
,
q_bias_r
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
CoordinateDiff
{
0
,
0
},
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
},
requant_scale_r
);
auto
dq_r
=
std
::
make_shared
<
op
::
Dequantize
>
(
conv_r
,
output_scale_r
,
int8_zero
,
element
::
f32
,
AxisSet
{});
auto
r_r
=
std
::
make_shared
<
op
::
Reshape
>
(
dq_r
,
AxisVector
{
0
,
1
,
2
,
3
},
Shape
{
1
,
2
,
2
});
auto
b_r
=
std
::
make_shared
<
op
::
Broadcast
>
(
r_r
,
Shape
{
1
,
1
,
2
,
2
},
AxisSet
{
0
});
auto
add
=
b_l
+
b_r
;
auto
relu
=
std
::
make_shared
<
op
::
Relu
>
(
add
);
auto
q
=
std
::
make_shared
<
op
::
Quantize
>
(
relu
,
output_scale_r
,
uint8_zero
,
element
::
u8
,
AxisSet
{},
round_mode
);
auto
dq
=
std
::
make_shared
<
op
::
Dequantize
>
(
q
,
output_scale_r
,
uint8_zero
,
element
::
f32
,
AxisSet
{});
return
make_shared
<
Function
>
(
NodeVector
{
dq
},
ParameterVector
{
input_l
,
weights_l
,
bias_l
,
input_r
,
weights_r
,
bias_r
});
};
auto
cpu_f1
=
make_function
();
auto
cpu_f2
=
make_function
();
test
::
Uniform
<
float
>
rng
(
2.0
f
,
2.0
f
);
vector
<
vector
<
float
>>
args
;
for
(
shared_ptr
<
op
::
Parameter
>
param
:
cpu_f1
->
get_parameters
())
{
vector
<
float
>
tensor_val
(
shape_size
(
param
->
get_shape
()));
rng
.
initialize
(
tensor_val
);
args
.
push_back
(
tensor_val
);
}
// Disable CPUQuantFusion
set_environment
(
"NGRAPH_PASS_ENABLES"
,
"CPUQuantFusion:0"
,
1
);
auto
cpu1_results
=
execute
(
cpu_f1
,
args
,
"CPU"
);
// Enable CPUQuantFusion
set_environment
(
"NGRAPH_PASS_ENABLES"
,
"CPUQuantFusion:1"
,
1
);
auto
cpu2_results
=
execute
(
cpu_f2
,
args
,
"CPU"
);
EXPECT_TRUE
(
test
::
all_close
(
cpu1_results
.
at
(
0
),
cpu2_results
.
at
(
0
)));
auto
backend
=
runtime
::
Backend
::
create
(
"CPU"
);
auto
fuse
=
make_function
();
backend
->
compile
(
fuse
);
ASSERT_EQ
(
count_ops_of_type
<
op
::
Quantize
>
(
fuse
),
6
);
}
#ifndef NGRAPH_JSON_DISABLE
// Tests that rely on deserializing json files
TEST
(
cpu_fusion
,
fuse_conv_bias
)
...
...
test/distributed.in.cpp
View file @
e5757725
...
...
@@ -29,25 +29,61 @@
using
namespace
std
;
using
namespace
ngraph
;
TEST
(
distributed_
$
{
BACKEND_NAME
},
allreduc
e
)
static
void
test_allreduce_common
(
reduction
::
Type
reduce_typ
e
)
{
auto
comm_size
=
get_distributed_interface
()
->
get_size
();
if
(
comm_size
>
1
)
{
auto
shape
=
Shape
{
2
,
2
};
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape
);
auto
f
=
make_shared
<
Function
>
(
make_shared
<
op
::
AllReduce
>
(
A
),
ParameterVector
{
A
});
auto
f
=
make_shared
<
Function
>
(
make_shared
<
op
::
AllReduce
>
(
A
,
reduce_type
),
ParameterVector
{
A
});
auto
backend
=
runtime
::
Backend
::
create
(
"${BACKEND_NAME}"
);
auto
v
=
vector
<
float
>
{
1
,
2
,
3
,
4
};
auto
a
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
copy_data
(
a
,
vector
<
float
>
{
1
,
2
,
3
,
4
});
auto
result
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
std
::
transform
(
v
.
begin
(),
v
.
end
(),
v
.
begin
(),
std
::
bind1st
(
std
::
multiplies
<
float
>
(),
comm_size
));
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch
(
reduce_type
.
get_type
())
{
case
reduction
:
:
Type_t
::
sum
:
copy_data
(
a
,
v
);
std
::
transform
(
v
.
begin
(),
v
.
end
(),
v
.
begin
(),
std
::
bind1st
(
std
::
multiplies
<
float
>
(),
comm_size
));
break
;
case
reduction
:
:
Type_t
::
prod
:
copy_data
(
a
,
v
);
std
::
transform
(
v
.
begin
(),
v
.
end
(),
v
.
begin
(),
[
&
](
float
elm
)
->
float
{
return
pow
(
elm
,
comm_size
);
});
break
;
case
reduction
:
:
Type_t
::
min
:
case
reduction
:
:
Type_t
::
max
:
auto
shift
=
get_distributed_interface
()
->
get_rank
();
std
::
rotate
(
v
.
begin
(),
v
.
begin
()
+
shift
%
v
.
size
(),
v
.
end
());
copy_data
(
a
,
v
);
if
(
reduce_type
==
reduction
::
Type_t
::
min
)
{
std
::
fill
(
v
.
begin
(),
v
.
end
(),
1
);
for
(
int
i
=
1
;
i
<
static_cast
<
int
>
(
v
.
size
())
-
comm_size
+
1
;
i
++
)
v
[
i
]
=
i
+
1
;
}
else
{
std
::
fill
(
v
.
begin
(),
v
.
end
(),
v
.
size
());
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
v
.
size
())
-
comm_size
;
i
++
)
v
[
i
]
=
i
+
2
;
}
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
auto
handle
=
backend
->
compile
(
f
);
handle
->
call_with_validate
({
result
},
{
a
});
...
...
@@ -55,6 +91,28 @@ TEST(distributed_${BACKEND_NAME}, allreduce)
}
}
TEST
(
distributed_
$
{
BACKEND_NAME
},
allreduce_sum
)
{
test_allreduce_common
(
reduction
::
sum
);
}
TEST
(
distributed_
$
{
BACKEND_NAME
},
allreduce_min
)
{
test_allreduce_common
(
reduction
::
min
);
}
TEST
(
distributed_
$
{
BACKEND_NAME
},
allreduce_max
)
{
test_allreduce_common
(
reduction
::
max
);
}
#if !defined(NGRAPH_DISTRIBUTED_MLSL_ENABLE)
TEST
(
distributed_
$
{
BACKEND_NAME
},
allreduce_prod
)
{
test_allreduce_common
(
reduction
::
prod
);
}
#endif
TEST
(
distributed_
$
{
BACKEND_NAME
},
broadcastdistributed
)
{
auto
shape
=
Shape
{
2
,
2
};
...
...
test/serialize.cpp
View file @
e5757725
...
...
@@ -324,3 +324,19 @@ TEST(serialize, constant_infinity_nan)
EXPECT_NE
(
str
.
find
(
R"(label="C)"
),
string
::
npos
);
EXPECT_NE
(
str
.
find
(
R"(label="D)"
),
string
::
npos
);
}
TEST
(
serialize
,
non_zero_node_output
)
{
auto
arg
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
10
});
auto
topk
=
make_shared
<
op
::
TopK
>
(
arg
,
0
,
element
::
i32
,
5
,
true
);
auto
abs
=
make_shared
<
op
::
Abs
>
(
Output
<
Node
>
(
topk
,
1
));
auto
result
=
make_shared
<
op
::
Result
>
(
abs
);
auto
f
=
make_shared
<
Function
>
(
ResultVector
{
result
},
ParameterVector
{
arg
});
string
s
=
serialize
(
f
);
shared_ptr
<
Function
>
g
=
deserialize
(
s
);
auto
g_result
=
g
->
get_results
().
at
(
0
);
auto
g_abs
=
g_result
->
input
(
0
).
get_source_output
().
get_node_shared_ptr
();
auto
topk_out
=
g_abs
->
input
(
0
).
get_source_output
();
EXPECT_EQ
(
topk_out
.
get_index
(),
1
);
EXPECT_EQ
(
topk_out
.
get_node
()
->
description
(),
"TopK"
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment