Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
e5757725
Commit
e5757725
authored
Jun 20, 2019
by
nishant.b.patel
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into quantized_conv_temp
parents
b13eacf6
d0f03eec
Expand all
Show whitespace changes
Inline
Side-by-side
Showing
36 changed files
with
608 additions
and
234 deletions
+608
-234
conftest.py
python/test/conftest.py
+21
-16
test_ops_unary.py
python/test/ngraph/test_ops_unary.py
+16
-2
test_ops.py
python/test/test_ops.py
+1
-0
CMakeLists.txt
src/ngraph/CMakeLists.txt
+2
-0
distributed.cpp
src/ngraph/distributed.cpp
+35
-0
distributed.hpp
src/ngraph/distributed.hpp
+37
-2
mlsl.hpp
src/ngraph/distributed/mlsl.hpp
+26
-4
null.hpp
src/ngraph/distributed/null.hpp
+5
-2
open_mpi.hpp
src/ngraph/distributed/open_mpi.hpp
+23
-3
allreduce.cpp
src/ngraph/op/allreduce.cpp
+9
-2
allreduce.hpp
src/ngraph/op/allreduce.hpp
+6
-1
group_conv.cpp
src/ngraph/op/fused/group_conv.cpp
+20
-15
group_conv.hpp
src/ngraph/op/fused/group_conv.hpp
+6
-2
slice.cpp
src/ngraph/op/slice.cpp
+10
-4
slice.hpp
src/ngraph/op/slice.hpp
+7
-3
fused_op.cpp
src/ngraph/op/util/fused_op.cpp
+5
-0
fused_op.hpp
src/ngraph/op/util/fused_op.hpp
+2
-0
allreduce.cpp
src/ngraph/runtime/cpu/builder/allreduce.cpp
+6
-1
gather.cpp
src/ngraph/runtime/cpu/builder/gather.cpp
+8
-4
scatter_add.cpp
src/ngraph/runtime/cpu/builder/scatter_add.cpp
+17
-1
cpu_builder.hpp
src/ngraph/runtime/cpu/cpu_builder.hpp
+4
-24
cpu_emitter.cpp
src/ngraph/runtime/cpu/cpu_emitter.cpp
+12
-5
gather.hpp
src/ngraph/runtime/cpu/kernel/gather.hpp
+9
-2
cpu_fusion.cpp
src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
+26
-0
int_executable.hpp
src/ngraph/runtime/interpreter/int_executable.hpp
+4
-0
plaidml_impl.hpp
src/ngraph/runtime/plaidml/plaidml_impl.hpp
+2
-2
unit_test.manifest
src/ngraph/runtime/plaidml/unit_test.manifest
+18
-85
allreduce.hpp
src/ngraph/runtime/reference/allreduce.hpp
+6
-2
serializer.cpp
src/ngraph/serializer.cpp
+0
-0
serializer.hpp
src/ngraph/serializer.hpp
+0
-39
serializer_stub.cpp
src/ngraph/serializer_stub.cpp
+49
-0
backend_scatter.in.cpp
test/backend_scatter.in.cpp
+8
-9
build_graph.cpp
test/build_graph.cpp
+16
-0
cpu_fusion.cpp
test/cpu_fusion.cpp
+114
-0
distributed.in.cpp
test/distributed.in.cpp
+62
-4
serialize.cpp
test/serialize.cpp
+16
-0
No files found.
python/test/conftest.py
View file @
e5757725
...
@@ -19,7 +19,7 @@ import test
...
@@ -19,7 +19,7 @@ import test
def
pytest_addoption
(
parser
):
def
pytest_addoption
(
parser
):
parser
.
addoption
(
'--backend'
,
default
=
'INTERPRETER'
,
parser
.
addoption
(
'--backend'
,
default
=
'INTERPRETER'
,
choices
=
[
'INTERPRETER'
,
'CPU'
,
'GPU'
,
'NNP'
,
'PlaidML'
],
choices
=
[
'INTERPRETER'
,
'CPU'
,
'GPU'
,
'NNP'
,
'PlaidML'
,
'INTELGPU'
],
help
=
'Select from available backends'
)
help
=
'Select from available backends'
)
...
@@ -31,20 +31,25 @@ def pytest_configure(config):
...
@@ -31,20 +31,25 @@ def pytest_configure(config):
def
pytest_collection_modifyitems
(
config
,
items
):
def
pytest_collection_modifyitems
(
config
,
items
):
backend_name
=
config
.
getvalue
(
'backend'
)
backend_name
=
config
.
getvalue
(
'backend'
)
gpu_skip
=
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the GPU backend.'
)
keywords
=
{
cpu_skip
=
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the CPU backend.'
)
'GPU'
:
'skip_on_gpu'
,
nnp_skip
=
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the NNP backend.'
)
'CPU'
:
'skip_on_cpu'
,
interpreter_skip
=
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the INTERPRETER backend.'
)
'NNP'
:
'skip_on_nnp'
,
plaidml_skip
=
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the PlaidML backend.'
)
'INTERPRETER'
:
'skip_on_interpreter'
,
'PlaidML'
:
'skip_on_plaidml'
,
'INTELGPU'
:
'skip_on_intelgpu'
,
}
skip_markers
=
{
'GPU'
:
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the GPU backend.'
),
'CPU'
:
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the CPU backend.'
),
'NNP'
:
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the NNP backend.'
),
'INTERPRETER'
:
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the INTERPRETER backend.'
),
'PlaidML'
:
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the PlaidML backend.'
),
'INTELGPU'
:
pytest
.
mark
.
skip
(
reason
=
'Skipping test on the INTELGPU backend.'
),
}
for
item
in
items
:
for
item
in
items
:
if
backend_name
==
'GPU'
and
'skip_on_gpu'
in
item
.
keywords
:
skip_this_backend
=
keywords
[
backend_name
]
item
.
add_marker
(
gpu_skip
)
if
skip_this_backend
in
item
.
keywords
:
if
backend_name
==
'CPU'
and
'skip_on_cpu'
in
item
.
keywords
:
item
.
add_marker
(
skip_markers
[
backend_name
])
item
.
add_marker
(
cpu_skip
)
if
backend_name
==
'NNP'
and
'skip_on_nnp'
in
item
.
keywords
:
item
.
add_marker
(
nnp_skip
)
if
backend_name
==
'INTERPRETER'
and
'skip_on_interpreter'
in
item
.
keywords
:
item
.
add_marker
(
interpreter_skip
)
if
backend_name
==
'PlaidML'
and
'skip_on_plaidml'
in
item
.
keywords
:
item
.
add_marker
(
plaidml_skip
)
python/test/ngraph/test_ops_unary.py
View file @
e5757725
...
@@ -33,7 +33,6 @@ from test.ngraph.util import run_op_numeric_data, run_op_node
...
@@ -33,7 +33,6 @@ from test.ngraph.util import run_op_numeric_data, run_op_node
(
ng
.
exp
,
np
.
exp
,
-
100.
,
100.
),
(
ng
.
exp
,
np
.
exp
,
-
100.
,
100.
),
(
ng
.
floor
,
np
.
floor
,
-
100.
,
100.
),
(
ng
.
floor
,
np
.
floor
,
-
100.
,
100.
),
(
ng
.
log
,
np
.
log
,
0
,
100.
),
(
ng
.
log
,
np
.
log
,
0
,
100.
),
(
ng
.
logical_not
,
np
.
logical_not
,
-
10
,
10
),
(
ng
.
relu
,
lambda
x
:
np
.
maximum
(
0
,
x
),
-
100.
,
100.
),
(
ng
.
relu
,
lambda
x
:
np
.
maximum
(
0
,
x
),
-
100.
,
100.
),
(
ng
.
sign
,
np
.
sign
,
-
100.
,
100.
),
(
ng
.
sign
,
np
.
sign
,
-
100.
,
100.
),
(
ng
.
sin
,
np
.
sin
,
-
100.
,
100.
),
(
ng
.
sin
,
np
.
sin
,
-
100.
,
100.
),
...
@@ -68,7 +67,6 @@ def test_unary_op_array(ng_api_fn, numpy_fn, range_start, range_end):
...
@@ -68,7 +67,6 @@ def test_unary_op_array(ng_api_fn, numpy_fn, range_start, range_end):
(
ng
.
exp
,
np
.
exp
,
np
.
float32
(
1.5
)),
(
ng
.
exp
,
np
.
exp
,
np
.
float32
(
1.5
)),
(
ng
.
floor
,
np
.
floor
,
np
.
float32
(
1.5
)),
(
ng
.
floor
,
np
.
floor
,
np
.
float32
(
1.5
)),
(
ng
.
log
,
np
.
log
,
np
.
float32
(
1.5
)),
(
ng
.
log
,
np
.
log
,
np
.
float32
(
1.5
)),
(
ng
.
logical_not
,
np
.
logical_not
,
np
.
int32
(
0
)),
(
ng
.
relu
,
lambda
x
:
np
.
maximum
(
0
,
x
),
np
.
float32
(
-
0.125
)),
(
ng
.
relu
,
lambda
x
:
np
.
maximum
(
0
,
x
),
np
.
float32
(
-
0.125
)),
(
ng
.
sign
,
np
.
sign
,
np
.
float32
(
0.
)),
(
ng
.
sign
,
np
.
sign
,
np
.
float32
(
0.
)),
(
ng
.
sin
,
np
.
sin
,
np
.
float32
(
np
.
pi
/
4.0
)),
(
ng
.
sin
,
np
.
sin
,
np
.
float32
(
np
.
pi
/
4.0
)),
...
@@ -86,3 +84,19 @@ def test_unary_op_scalar(ng_api_fn, numpy_fn, input_data):
...
@@ -86,3 +84,19 @@ def test_unary_op_scalar(ng_api_fn, numpy_fn, input_data):
result
=
run_op_numeric_data
(
input_data
,
ng_api_fn
)
result
=
run_op_numeric_data
(
input_data
,
ng_api_fn
)
assert
np
.
allclose
(
result
,
expected
)
assert
np
.
allclose
(
result
,
expected
)
@pytest.mark.parametrize
(
'input_data'
,
[
(
np
.
array
([
True
,
False
,
True
,
False
])),
(
np
.
array
(
True
)),
(
np
.
array
(
False
)),
])
@pytest.mark.skip_on_gpu
def
test_logical_not
(
input_data
):
expected
=
np
.
logical_not
(
input_data
)
result
=
run_op_node
([
input_data
],
ng
.
logical_not
)[
0
]
assert
np
.
array_equal
(
result
,
expected
)
result
=
run_op_numeric_data
(
input_data
,
ng
.
logical_not
)[
0
]
assert
np
.
array_equal
(
result
,
expected
)
python/test/test_ops.py
View file @
e5757725
...
@@ -818,6 +818,7 @@ def test_slice():
...
@@ -818,6 +818,7 @@ def test_slice():
@pytest.mark.skip_on_gpu
@pytest.mark.skip_on_gpu
@pytest.mark.skip_on_intelgpu
def
test_replace_slice
():
def
test_replace_slice
():
element_type
=
Type
.
f32
element_type
=
Type
.
f32
...
...
src/ngraph/CMakeLists.txt
View file @
e5757725
...
@@ -482,6 +482,8 @@ set(SRC ${SRC}
...
@@ -482,6 +482,8 @@ set(SRC ${SRC}
if
(
NGRAPH_JSON_ENABLE
)
if
(
NGRAPH_JSON_ENABLE
)
list
(
APPEND SRC serializer.cpp serializer.hpp event_tracing.cpp event_tracing.hpp
)
list
(
APPEND SRC serializer.cpp serializer.hpp event_tracing.cpp event_tracing.hpp
)
else
()
list
(
APPEND SRC serializer_stub.cpp
)
endif
()
endif
()
configure_file
(
version.in.hpp version.hpp
)
configure_file
(
version.in.hpp version.hpp
)
...
...
src/ngraph/distributed.cpp
View file @
e5757725
...
@@ -22,6 +22,41 @@
...
@@ -22,6 +22,41 @@
using
namespace
ngraph
;
using
namespace
ngraph
;
NGRAPH_API
const
reduction
::
Type
reduction
::
sum
(
reduction
::
Type_t
::
sum
);
NGRAPH_API
const
reduction
::
Type
reduction
::
prod
(
reduction
::
Type_t
::
prod
);
NGRAPH_API
const
reduction
::
Type
reduction
::
min
(
reduction
::
Type_t
::
min
);
NGRAPH_API
const
reduction
::
Type
reduction
::
max
(
reduction
::
Type_t
::
max
);
std
::
ostream
&
reduction
::
operator
<<
(
std
::
ostream
&
out
,
const
reduction
::
Type
&
obj
)
{
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch
(
obj
.
get_type
())
{
case
reduction
:
:
Type_t
::
sum
:
out
<<
"sum"
;
break
;
case
reduction
:
:
Type_t
::
prod
:
out
<<
"prod"
;
break
;
case
reduction
:
:
Type_t
::
min
:
out
<<
"min"
;
break
;
case
reduction
:
:
Type_t
::
max
:
out
<<
"max"
;
break
;
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
return
out
;
};
bool
reduction
::
Type
::
operator
==
(
const
reduction
::
Type
&
other
)
const
{
return
m_type
==
other
.
m_type
;
}
reduction
::
Type_t
reduction
::
Type
::
get_type
()
const
{
return
m_type
;
}
static
std
::
unique_ptr
<
DistributedInterface
>
s_distributed_interface
;
static
std
::
unique_ptr
<
DistributedInterface
>
s_distributed_interface
;
void
ngraph
::
set_distributed_interface
(
std
::
unique_ptr
<
DistributedInterface
>
distributed_interface
)
void
ngraph
::
set_distributed_interface
(
std
::
unique_ptr
<
DistributedInterface
>
distributed_interface
)
...
...
src/ngraph/distributed.hpp
View file @
e5757725
...
@@ -24,6 +24,38 @@
...
@@ -24,6 +24,38 @@
namespace
ngraph
namespace
ngraph
{
{
namespace
reduction
{
enum
class
Type_t
{
sum
,
prod
,
min
,
max
,
};
class
Type
{
public
:
Type
(
const
Type_t
t
)
:
m_type
(
t
)
{
}
friend
std
::
ostream
&
operator
<<
(
std
::
ostream
&
,
const
Type
&
);
bool
operator
==
(
const
Type
&
other
)
const
;
bool
operator
!=
(
const
Type
&
other
)
const
{
return
!
(
*
this
==
other
);
}
Type_t
get_type
()
const
;
private
:
Type_t
m_type
;
};
std
::
ostream
&
operator
<<
(
std
::
ostream
&
out
,
const
Type
&
obj
);
extern
NGRAPH_API
const
Type
sum
;
extern
NGRAPH_API
const
Type
prod
;
extern
NGRAPH_API
const
Type
min
;
extern
NGRAPH_API
const
Type
max
;
}
class
DistributedInterface
class
DistributedInterface
{
{
public
:
public
:
...
@@ -33,8 +65,11 @@ namespace ngraph
...
@@ -33,8 +65,11 @@ namespace ngraph
virtual
int
get_rank
()
=
0
;
virtual
int
get_rank
()
=
0
;
virtual
void
log_print
(
const
std
::
string
&
timestamp
,
const
std
::
vector
<
char
>&
buf
)
=
0
;
virtual
void
log_print
(
const
std
::
string
&
timestamp
,
const
std
::
vector
<
char
>&
buf
)
=
0
;
virtual
void
virtual
void
all_reduce
(
void
*
in
,
all_reduce
(
void
*
in
,
void
*
out
,
element
::
Type_t
element_type
,
size_t
count
)
=
0
;
void
*
out
,
element
::
Type_t
element_type
,
reduction
::
Type
reduce_type
,
size_t
count
)
=
0
;
virtual
void
virtual
void
broadcast
(
void
*
in
,
element
::
Type_t
element_type
,
size_t
count
,
int
root_id
)
=
0
;
broadcast
(
void
*
in
,
element
::
Type_t
element_type
,
size_t
count
,
int
root_id
)
=
0
;
};
};
...
...
src/ngraph/distributed/mlsl.hpp
View file @
e5757725
...
@@ -65,8 +65,11 @@ namespace ngraph
...
@@ -65,8 +65,11 @@ namespace ngraph
std
::
printf
(
"%s [MLSL RANK: %d]: %s
\n
"
,
timestamp
.
c_str
(),
get_rank
(),
buf
.
data
());
std
::
printf
(
"%s [MLSL RANK: %d]: %s
\n
"
,
timestamp
.
c_str
(),
get_rank
(),
buf
.
data
());
}
}
void
void
all_reduce
(
void
*
in
,
all_reduce
(
void
*
in
,
void
*
out
,
element
::
Type_t
element_type
,
size_t
count
)
override
void
*
out
,
element
::
Type_t
element_type
,
reduction
::
Type
reduce_type
,
size_t
count
)
override
{
{
auto
data_type
=
MLSL
::
DT_FLOAT
;
auto
data_type
=
MLSL
::
DT_FLOAT
;
...
@@ -83,10 +86,29 @@ namespace ngraph
...
@@ -83,10 +86,29 @@ namespace ngraph
throw
std
::
runtime_error
(
"AllReduce op supports only f32 and f64 types"
);
throw
std
::
runtime_error
(
"AllReduce op supports only f32 and f64 types"
);
}
}
decltype
(
MLSL
::
RT_SUM
)
mlsl_reduce_type
;
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch
(
reduce_type
.
get_type
())
{
case
reduction
:
:
Type_t
::
sum
:
mlsl_reduce_type
=
MLSL
::
RT_SUM
;
break
;
case
reduction
:
:
Type_t
::
prod
:
throw
std
::
runtime_error
(
"MLSL doesn't support allreduce prod"
);
break
;
case
reduction
:
:
Type_t
::
min
:
mlsl_reduce_type
=
MLSL
::
RT_MIN
;
break
;
case
reduction
:
:
Type_t
::
max
:
mlsl_reduce_type
=
MLSL
::
RT_MAX
;
break
;
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
MLSL
::
Environment
&
env
=
MLSL
::
Environment
::
GetEnv
();
MLSL
::
Environment
&
env
=
MLSL
::
Environment
::
GetEnv
();
MLSL
::
Distribution
*
distribution
=
env
.
CreateDistribution
(
env
.
GetProcessCount
(),
1
);
MLSL
::
Distribution
*
distribution
=
env
.
CreateDistribution
(
env
.
GetProcessCount
(),
1
);
MLSL
::
CommReq
*
req
=
MLSL
::
CommReq
*
req
=
distribution
->
AllReduce
(
distribution
->
AllReduce
(
in
,
out
,
count
,
data_type
,
MLSL
::
RT_SUM
,
MLSL
::
GT_DATA
);
in
,
out
,
count
,
data_type
,
mlsl_reduce_type
,
MLSL
::
GT_DATA
);
env
.
Wait
(
req
);
env
.
Wait
(
req
);
env
.
DeleteDistribution
(
distribution
);
env
.
DeleteDistribution
(
distribution
);
}
}
...
...
src/ngraph/distributed/null.hpp
View file @
e5757725
...
@@ -35,8 +35,11 @@ namespace ngraph
...
@@ -35,8 +35,11 @@ namespace ngraph
{
{
std
::
printf
(
"%s: %s
\n
"
,
timestamp
.
c_str
(),
buf
.
data
());
std
::
printf
(
"%s: %s
\n
"
,
timestamp
.
c_str
(),
buf
.
data
());
}
}
void
void
all_reduce
(
void
*
in
,
all_reduce
(
void
*
in
,
void
*
out
,
element
::
Type_t
element_type
,
size_t
count
)
override
void
*
out
,
element
::
Type_t
element_type
,
reduction
::
Type
reduce_type
,
size_t
count
)
override
{
{
throw
ngraph_error
(
"Distributed Library not supported/mentioned"
);
throw
ngraph_error
(
"Distributed Library not supported/mentioned"
);
}
}
...
...
src/ngraph/distributed/open_mpi.hpp
View file @
e5757725
...
@@ -77,8 +77,11 @@ namespace ngraph
...
@@ -77,8 +77,11 @@ namespace ngraph
"%s [OpenMPI RANK: %d]: %s
\n
"
,
timestamp
.
c_str
(),
get_rank
(),
buf
.
data
());
"%s [OpenMPI RANK: %d]: %s
\n
"
,
timestamp
.
c_str
(),
get_rank
(),
buf
.
data
());
}
}
void
void
all_reduce
(
void
*
in
,
all_reduce
(
void
*
in
,
void
*
out
,
element
::
Type_t
element_type
,
size_t
count
)
override
void
*
out
,
element
::
Type_t
element_type
,
reduction
::
Type
reduce_type
,
size_t
count
)
override
{
{
auto
data_type
=
MPI_FLOAT
;
auto
data_type
=
MPI_FLOAT
;
...
@@ -95,7 +98,24 @@ namespace ngraph
...
@@ -95,7 +98,24 @@ namespace ngraph
throw
std
::
runtime_error
(
"AllReduce op supports only f32 and f64 types"
);
throw
std
::
runtime_error
(
"AllReduce op supports only f32 and f64 types"
);
}
}
MPI_Allreduce
(
in
,
out
,
count
,
data_type
,
MPI_SUM
,
MPI_COMM_WORLD
);
decltype
(
MPI_SUM
)
mpi_reduce_type
;
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch
(
reduce_type
.
get_type
())
{
case
reduction
:
:
Type_t
::
sum
:
mpi_reduce_type
=
MPI_SUM
;
break
;
case
reduction
:
:
Type_t
::
prod
:
mpi_reduce_type
=
MPI_PROD
;
break
;
case
reduction
:
:
Type_t
::
min
:
mpi_reduce_type
=
MPI_MIN
;
break
;
case
reduction
:
:
Type_t
::
max
:
mpi_reduce_type
=
MPI_MAX
;
break
;
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
MPI_Allreduce
(
in
,
out
,
count
,
data_type
,
mpi_reduce_type
,
MPI_COMM_WORLD
);
}
}
void
broadcast
(
void
*
in
,
void
broadcast
(
void
*
in
,
...
...
src/ngraph/op/allreduce.cpp
View file @
e5757725
...
@@ -22,11 +22,13 @@ using namespace ngraph;
...
@@ -22,11 +22,13 @@ using namespace ngraph;
const
string
op
::
AllReduce
::
type_name
{
"AllReduce"
};
const
string
op
::
AllReduce
::
type_name
{
"AllReduce"
};
op
::
AllReduce
::
AllReduce
()
op
::
AllReduce
::
AllReduce
()
:
m_reduce_type
(
reduction
::
sum
)
{
{
}
}
op
::
AllReduce
::
AllReduce
(
const
shared_ptr
<
Node
>&
arg
)
op
::
AllReduce
::
AllReduce
(
const
shared_ptr
<
Node
>&
arg
,
const
reduction
::
Type
reduce_type
)
:
Op
(
check_single_output_args
({
arg
}))
:
Op
(
check_single_output_args
({
arg
}))
,
m_reduce_type
(
reduce_type
)
{
{
constructor_validate_and_infer_types
();
constructor_validate_and_infer_types
();
}
}
...
@@ -47,5 +49,10 @@ void op::AllReduce::validate_and_infer_types()
...
@@ -47,5 +49,10 @@ void op::AllReduce::validate_and_infer_types()
shared_ptr
<
Node
>
op
::
AllReduce
::
copy_with_new_args
(
const
NodeVector
&
new_args
)
const
shared_ptr
<
Node
>
op
::
AllReduce
::
copy_with_new_args
(
const
NodeVector
&
new_args
)
const
{
{
check_new_args_count
(
this
,
new_args
);
check_new_args_count
(
this
,
new_args
);
return
make_shared
<
AllReduce
>
(
new_args
.
at
(
0
));
return
make_shared
<
AllReduce
>
(
new_args
.
at
(
0
),
get_reduce_type
());
}
reduction
::
Type
op
::
AllReduce
::
get_reduce_type
()
const
{
return
m_reduce_type
;
}
}
src/ngraph/op/allreduce.hpp
View file @
e5757725
...
@@ -30,11 +30,16 @@ namespace ngraph
...
@@ -30,11 +30,16 @@ namespace ngraph
static
const
std
::
string
type_name
;
static
const
std
::
string
type_name
;
const
std
::
string
&
description
()
const
override
{
return
type_name
;
}
const
std
::
string
&
description
()
const
override
{
return
type_name
;
}
AllReduce
();
AllReduce
();
AllReduce
(
const
std
::
shared_ptr
<
Node
>&
arg
);
AllReduce
(
const
std
::
shared_ptr
<
Node
>&
arg
,
const
reduction
::
Type
reduce_type
=
reduction
::
sum
);
void
validate_and_infer_types
()
override
;
void
validate_and_infer_types
()
override
;
std
::
shared_ptr
<
Node
>
copy_with_new_args
(
const
NodeVector
&
new_args
)
const
override
;
std
::
shared_ptr
<
Node
>
copy_with_new_args
(
const
NodeVector
&
new_args
)
const
override
;
reduction
::
Type
get_reduce_type
()
const
;
private
:
const
reduction
::
Type
m_reduce_type
;
};
};
}
}
}
}
src/ngraph/op/fused/group_conv.cpp
View file @
e5757725
...
@@ -27,8 +27,14 @@
...
@@ -27,8 +27,14 @@
using
namespace
std
;
using
namespace
std
;
using
namespace
ngraph
;
using
namespace
ngraph
;
op
::
GroupConvolution
::
GroupConvolution
(
const
shared_ptr
<
Node
>&
data_batch
,
const
string
op
::
GroupConvolution
::
type_name
{
"GroupConvolution"
};
const
shared_ptr
<
Node
>&
filters
,
op
::
GroupConvolution
::
GroupConvolution
()
{
}
op
::
GroupConvolution
::
GroupConvolution
(
const
Output
<
Node
>&
data_batch
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_below
,
...
@@ -36,7 +42,7 @@ op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch,
...
@@ -36,7 +42,7 @@ op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch,
const
Strides
&
data_dilation_strides
,
const
Strides
&
data_dilation_strides
,
const
size_t
groups
,
const
size_t
groups
,
const
PadType
&
pad_type
)
const
PadType
&
pad_type
)
:
FusedOp
(
"GroupConvolution"
,
check_single_output_args
({
data_batch
,
filters
})
)
:
FusedOp
(
{
data_batch
,
filters
}
)
,
m_window_movement_strides
(
window_movement_strides
)
,
m_window_movement_strides
(
window_movement_strides
)
,
m_window_dilation_strides
(
window_dilation_strides
)
,
m_window_dilation_strides
(
window_dilation_strides
)
,
m_padding_below
(
padding_below
)
,
m_padding_below
(
padding_below
)
...
@@ -45,7 +51,6 @@ op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch,
...
@@ -45,7 +51,6 @@ op::GroupConvolution::GroupConvolution(const shared_ptr<Node>& data_batch,
,
m_groups
(
groups
)
,
m_groups
(
groups
)
,
m_pad_type
(
pad_type
)
,
m_pad_type
(
pad_type
)
{
{
// TODO: Move this out of constructor to validate_and_infer_types()
constructor_validate_and_infer_types
();
constructor_validate_and_infer_types
();
}
}
...
@@ -129,35 +134,35 @@ shared_ptr<Node> op::GroupConvolution::copy_with_new_args(const NodeVector& new_
...
@@ -129,35 +134,35 @@ shared_ptr<Node> op::GroupConvolution::copy_with_new_args(const NodeVector& new_
NodeVector
op
::
GroupConvolution
::
decompose_op
()
const
NodeVector
op
::
GroupConvolution
::
decompose_op
()
const
{
{
auto
data
=
get_argumen
t
(
0
);
auto
data
=
inpu
t
(
0
);
auto
filters
=
get_argumen
t
(
1
);
auto
filters
=
inpu
t
(
1
);
// Split one convolution op to N ops where N is the number of groups
// Split one convolution op to N ops where N is the number of groups
// and concat results after computation.
// and concat results after computation.
// reference: https://github.com/NervanaSystems/ngraph-mxnet/blob/fdd692/src/ngraph/ngraph_emitter.cc#L822-L856
// reference: https://github.com/NervanaSystems/ngraph-mxnet/blob/fdd692/src/ngraph/ngraph_emitter.cc#L822-L856
std
::
size_t
n_data_channels
{
data
->
get_shape
().
at
(
1
)};
std
::
size_t
n_data_channels
{
data
.
get_shape
().
at
(
1
)};
std
::
size_t
n_filters_channels
{
filters
->
get_shape
().
at
(
0
)};
std
::
size_t
n_filters_channels
{
filters
.
get_shape
().
at
(
0
)};
std
::
size_t
data_group_size
{
n_data_channels
/
m_groups
};
std
::
size_t
data_group_size
{
n_data_channels
/
m_groups
};
std
::
size_t
filters_group_size
{
n_filters_channels
/
m_groups
};
std
::
size_t
filters_group_size
{
n_filters_channels
/
m_groups
};
NodeVector
convolution_nodes
;
NodeVector
convolution_nodes
;
// initial bounds for splice
// initial bounds for splice
std
::
vector
<
std
::
size_t
>
data_lower_bounds
(
data
->
get_shape
().
size
());
std
::
vector
<
std
::
size_t
>
data_lower_bounds
(
data
.
get_shape
().
size
());
std
::
vector
<
std
::
size_t
>
data_upper_bounds
{
data
->
get_shape
()};
std
::
vector
<
std
::
size_t
>
data_upper_bounds
{
data
.
get_shape
()};
std
::
vector
<
std
::
size_t
>
filters_lower_bounds
(
filters
->
get_shape
().
size
());
std
::
vector
<
std
::
size_t
>
filters_lower_bounds
(
filters
.
get_shape
().
size
());
std
::
vector
<
std
::
size_t
>
filters_upper_bounds
{
filters
->
get_shape
()};
std
::
vector
<
std
::
size_t
>
filters_upper_bounds
{
filters
.
get_shape
()};
for
(
std
::
size_t
group
{
0
};
group
<
m_groups
;
++
group
)
for
(
std
::
size_t
group
{
0
};
group
<
m_groups
;
++
group
)
{
{
// slice data
// slice data
data_lower_bounds
[
1
]
=
group
*
data_group_size
;
data_lower_bounds
[
1
]
=
group
*
data_group_size
;
data_upper_bounds
[
1
]
=
(
group
+
1
)
*
data_group_size
;
data_upper_bounds
[
1
]
=
(
group
+
1
)
*
data_group_size
;
auto
sliced_data
=
auto
sliced_data
=
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
data
,
data_lower_bounds
,
data_upper_bounds
);
data
.
get_source_output
()
,
data_lower_bounds
,
data_upper_bounds
);
// slice filters
// slice filters
filters_lower_bounds
[
0
]
=
group
*
filters_group_size
;
filters_lower_bounds
[
0
]
=
group
*
filters_group_size
;
filters_upper_bounds
[
0
]
=
(
group
+
1
)
*
filters_group_size
;
filters_upper_bounds
[
0
]
=
(
group
+
1
)
*
filters_group_size
;
auto
sliced_filters
=
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
auto
sliced_filters
=
std
::
make_shared
<
ngraph
::
op
::
Slice
>
(
filters
,
filters_lower_bounds
,
filters_upper_bounds
);
filters
.
get_source_output
()
,
filters_lower_bounds
,
filters_upper_bounds
);
convolution_nodes
.
push_back
(
convolution_nodes
.
push_back
(
std
::
make_shared
<
ngraph
::
op
::
Convolution
>
(
sliced_data
,
std
::
make_shared
<
ngraph
::
op
::
Convolution
>
(
sliced_data
,
...
...
src/ngraph/op/fused/group_conv.hpp
View file @
e5757725
...
@@ -29,8 +29,12 @@ namespace ngraph
...
@@ -29,8 +29,12 @@ namespace ngraph
class
GroupConvolution
:
public
ngraph
::
op
::
util
::
FusedOp
class
GroupConvolution
:
public
ngraph
::
op
::
util
::
FusedOp
{
{
public
:
public
:
GroupConvolution
(
const
std
::
shared_ptr
<
Node
>&
data_batch
,
NGRAPH_API
const
std
::
shared_ptr
<
Node
>&
filters
,
static
const
std
::
string
type_name
;
const
std
::
string
&
description
()
const
override
{
return
type_name
;
}
GroupConvolution
();
GroupConvolution
(
const
Output
<
Node
>&
data_batch
,
const
Output
<
Node
>&
filters
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_movement_strides
,
const
Strides
&
window_dilation_strides
,
const
Strides
&
window_dilation_strides
,
const
CoordinateDiff
&
padding_below
,
const
CoordinateDiff
&
padding_below
,
...
...
src/ngraph/op/slice.cpp
View file @
e5757725
...
@@ -19,11 +19,17 @@
...
@@ -19,11 +19,17 @@
using
namespace
std
;
using
namespace
std
;
using
namespace
ngraph
;
using
namespace
ngraph
;
op
::
Slice
::
Slice
(
const
shared_ptr
<
Node
>&
arg
,
const
string
op
::
Slice
::
type_name
{
"Slice"
};
op
::
Slice
::
Slice
()
{
}
op
::
Slice
::
Slice
(
const
Output
<
Node
>&
arg
,
const
Coordinate
&
lower_bounds
,
const
Coordinate
&
lower_bounds
,
const
Coordinate
&
upper_bounds
,
const
Coordinate
&
upper_bounds
,
const
Strides
&
strides
)
const
Strides
&
strides
)
:
Op
(
"Slice"
,
check_single_output_args
({
arg
})
)
:
Op
(
{
arg
}
)
,
m_lower_bounds
(
lower_bounds
)
,
m_lower_bounds
(
lower_bounds
)
,
m_upper_bounds
(
upper_bounds
)
,
m_upper_bounds
(
upper_bounds
)
,
m_strides
(
strides
)
,
m_strides
(
strides
)
...
@@ -31,10 +37,10 @@ op::Slice::Slice(const shared_ptr<Node>& arg,
...
@@ -31,10 +37,10 @@ op::Slice::Slice(const shared_ptr<Node>& arg,
constructor_validate_and_infer_types
();
constructor_validate_and_infer_types
();
}
}
op
::
Slice
::
Slice
(
const
shared_ptr
<
Node
>&
arg
,
op
::
Slice
::
Slice
(
const
Output
<
Node
>&
arg
,
const
Coordinate
&
lower_bounds
,
const
Coordinate
&
lower_bounds
,
const
Coordinate
&
upper_bounds
)
const
Coordinate
&
upper_bounds
)
:
Op
(
"Slice"
,
check_single_output_args
({
arg
})
)
:
Op
(
{
arg
}
)
,
m_lower_bounds
(
lower_bounds
)
,
m_lower_bounds
(
lower_bounds
)
,
m_upper_bounds
(
upper_bounds
)
,
m_upper_bounds
(
upper_bounds
)
,
m_strides
(
Strides
())
,
m_strides
(
Strides
())
...
...
src/ngraph/op/slice.hpp
View file @
e5757725
...
@@ -28,6 +28,11 @@ namespace ngraph
...
@@ -28,6 +28,11 @@ namespace ngraph
class
Slice
:
public
Op
class
Slice
:
public
Op
{
{
public
:
public
:
NGRAPH_API
static
const
std
::
string
type_name
;
const
std
::
string
&
description
()
const
override
{
return
type_name
;
}
/// \brief Constructs a tensor slice operation
Slice
();
/// \brief Constructs a tensor slice operation.
/// \brief Constructs a tensor slice operation.
///
///
/// \param arg The tensor to be sliced.
/// \param arg The tensor to be sliced.
...
@@ -35,17 +40,16 @@ namespace ngraph
...
@@ -35,17 +40,16 @@ namespace ngraph
/// \param upper_bounds The axiswise upper bounds of the slice (exclusive).
/// \param upper_bounds The axiswise upper bounds of the slice (exclusive).
/// \param strides The slicing strides; for example, strides of `{n,m}` means to take
/// \param strides The slicing strides; for example, strides of `{n,m}` means to take
/// every nth row and every mth column of the input matrix.
/// every nth row and every mth column of the input matrix.
Slice
(
const
std
::
shared_ptr
<
Node
>&
arg
,
Slice
(
const
Output
<
Node
>&
arg
,
const
Coordinate
&
lower_bounds
,
const
Coordinate
&
lower_bounds
,
const
Coordinate
&
upper_bounds
,
const
Coordinate
&
upper_bounds
,
const
Strides
&
strides
);
const
Strides
&
strides
);
/// \brief Constructs a tensor slice operation with unit strides; i.e., every element inside the bounding box will be copied to the output slice.
/// \brief Constructs a tensor slice operation with unit strides; i.e., every element inside the bounding box will be copied to the output slice.
///
///
/// \param arg The tensor to be sliced.
/// \param arg The tensor to be sliced.
/// \param lower_bounds The axiswise lower bounds of the slice (inclusive).
/// \param lower_bounds The axiswise lower bounds of the slice (inclusive).
/// \param upper_bounds The axiswise upper bounds of the slice (exclusive).
/// \param upper_bounds The axiswise upper bounds of the slice (exclusive).
Slice
(
const
std
::
shared_ptr
<
Node
>&
arg
,
Slice
(
const
Output
<
Node
>&
arg
,
const
Coordinate
&
lower_bounds
,
const
Coordinate
&
lower_bounds
,
const
Coordinate
&
upper_bounds
);
const
Coordinate
&
upper_bounds
);
...
...
src/ngraph/op/util/fused_op.cpp
View file @
e5757725
...
@@ -30,6 +30,11 @@ op::util::FusedOp::FusedOp(const NodeVector& args)
...
@@ -30,6 +30,11 @@ op::util::FusedOp::FusedOp(const NodeVector& args)
{
{
}
}
op
::
util
::
FusedOp
::
FusedOp
(
const
OutputVector
&
args
)
:
Op
(
args
)
{
}
op
::
util
::
FusedOp
::
FusedOp
(
const
std
::
string
&
node_type
,
const
NodeVector
&
args
)
op
::
util
::
FusedOp
::
FusedOp
(
const
std
::
string
&
node_type
,
const
NodeVector
&
args
)
:
Op
(
node_type
,
args
)
:
Op
(
node_type
,
args
)
{
{
...
...
src/ngraph/op/util/fused_op.hpp
View file @
e5757725
...
@@ -51,6 +51,8 @@ namespace ngraph
...
@@ -51,6 +51,8 @@ namespace ngraph
/// \param args Nodes that produce the input tensors for the fused op
/// \param args Nodes that produce the input tensors for the fused op
FusedOp
(
const
NodeVector
&
args
);
FusedOp
(
const
NodeVector
&
args
);
FusedOp
(
const
OutputVector
&
args
);
/// \brief Constructs a FusedOp
/// \brief Constructs a FusedOp
///
///
/// \param args Nodes that produce the input tensors for the fused op
/// \param args Nodes that produce the input tensors for the fused op
...
...
src/ngraph/runtime/cpu/builder/allreduce.cpp
View file @
e5757725
...
@@ -37,6 +37,9 @@ namespace ngraph
...
@@ -37,6 +37,9 @@ namespace ngraph
auto
out_buffer_index
=
external_function
->
get_buffer_index
(
out
[
0
].
get_name
());
auto
out_buffer_index
=
external_function
->
get_buffer_index
(
out
[
0
].
get_name
());
auto
count
=
static_cast
<
int
>
(
out
[
0
].
get_size
());
auto
count
=
static_cast
<
int
>
(
out
[
0
].
get_size
());
auto
data_type
=
args
[
0
].
get_element_type
().
get_type_enum
();
auto
data_type
=
args
[
0
].
get_element_type
().
get_type_enum
();
const
ngraph
::
op
::
AllReduce
*
allreduce
=
static_cast
<
const
ngraph
::
op
::
AllReduce
*>
(
node
);
auto
reduce_type
=
allreduce
->
get_reduce_type
();
auto
external_function_name
=
external_function
->
get_function_name
();
auto
external_function_name
=
external_function
->
get_function_name
();
NGRAPH_DEBUG_PRINT
(
NGRAPH_DEBUG_PRINT
(
...
@@ -48,11 +51,13 @@ namespace ngraph
...
@@ -48,11 +51,13 @@ namespace ngraph
node
->
get_friendly_name
().
c_str
(),
node
->
get_friendly_name
().
c_str
(),
count
);
count
);
auto
functor
=
[
&
,
count
,
data_type
,
arg_buffer_index
,
out_buffer_index
](
auto
functor
=
[
&
,
count
,
reduce_type
,
data_type
,
arg_buffer_index
,
out_buffer_index
](
CPURuntimeContext
*
ctx
,
CPUExecutionContext
*
ectx
)
{
CPURuntimeContext
*
ctx
,
CPUExecutionContext
*
ectx
)
{
get_distributed_interface
()
->
all_reduce
(
ctx
->
buffer_data
[
arg_buffer_index
],
get_distributed_interface
()
->
all_reduce
(
ctx
->
buffer_data
[
arg_buffer_index
],
ctx
->
buffer_data
[
out_buffer_index
],
ctx
->
buffer_data
[
out_buffer_index
],
data_type
,
data_type
,
reduce_type
,
count
);
count
);
};
};
functors
.
emplace_back
(
functor
);
functors
.
emplace_back
(
functor
);
...
...
src/ngraph/runtime/cpu/builder/gather.cpp
View file @
e5757725
...
@@ -53,9 +53,11 @@ namespace ngraph
...
@@ -53,9 +53,11 @@ namespace ngraph
if
(
is_int64
)
if
(
is_int64
)
{
{
if
(
args
[
0
].
get_element_type
()
==
element
::
f32
||
if
(
(
args
[
0
].
get_element_type
()
==
element
::
f32
||
args
[
0
].
get_element_type
()
==
element
::
f64
||
args
[
0
].
get_element_type
()
==
element
::
f64
||
args
[
0
].
get_element_type
()
==
element
::
u8
)
args
[
0
].
get_element_type
()
==
element
::
u8
||
args
[
0
].
get_element_type
()
==
element
::
i8
)
&&
params_shape
.
size
()
<=
3
&&
out_shape
.
size
()
<=
3
)
{
{
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
gather_i64
<
float
,
2
,
2
>
)
>
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
gather_i64
<
float
,
2
,
2
>
)
>
kernel
;
kernel
;
...
@@ -111,9 +113,11 @@ namespace ngraph
...
@@ -111,9 +113,11 @@ namespace ngraph
else
else
{
{
if
(
args
[
0
].
get_element_type
()
==
element
::
f32
||
if
(
(
args
[
0
].
get_element_type
()
==
element
::
f32
||
args
[
0
].
get_element_type
()
==
element
::
f64
||
args
[
0
].
get_element_type
()
==
element
::
f64
||
args
[
0
].
get_element_type
()
==
element
::
u8
)
args
[
0
].
get_element_type
()
==
element
::
u8
||
args
[
0
].
get_element_type
()
==
element
::
i8
)
&&
params_shape
.
size
()
<=
3
&&
out_shape
.
size
()
<=
3
)
{
{
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
gather_i32
<
float
,
2
,
2
>
)
>
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
gather_i32
<
float
,
2
,
2
>
)
>
kernel
;
kernel
;
...
...
src/ngraph/runtime/cpu/builder/scatter_add.cpp
View file @
e5757725
...
@@ -46,7 +46,9 @@ namespace ngraph
...
@@ -46,7 +46,9 @@ namespace ngraph
}
}
if
(
args
[
0
].
get_element_type
()
!=
element
::
f64
&&
if
(
args
[
0
].
get_element_type
()
!=
element
::
f64
&&
args
[
0
].
get_element_type
()
!=
element
::
f32
)
args
[
0
].
get_element_type
()
!=
element
::
f32
&&
args
[
0
].
get_element_type
()
!=
element
::
u8
&&
args
[
0
].
get_element_type
()
!=
element
::
i8
)
{
{
throw
ngraph_error
(
"Unsupported type in CPU Builder for ScatterAdd"
);
throw
ngraph_error
(
"Unsupported type in CPU Builder for ScatterAdd"
);
}
}
...
@@ -59,6 +61,8 @@ namespace ngraph
...
@@ -59,6 +61,8 @@ namespace ngraph
auto
element_type
=
args
[
0
].
get_element_type
();
auto
element_type
=
args
[
0
].
get_element_type
();
if
(
is_int64
)
if
(
is_int64
)
{
if
(
inputs_shape
.
size
()
<=
3
&&
updates_shape
.
size
()
<=
3
)
{
{
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
scatter_add_i64
<
float
,
2
,
2
>
)
>
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
scatter_add_i64
<
float
,
2
,
2
>
)
>
kernel
;
kernel
;
...
@@ -91,6 +95,13 @@ namespace ngraph
...
@@ -91,6 +95,13 @@ namespace ngraph
functors
.
emplace_back
(
functor
);
functors
.
emplace_back
(
functor
);
}
}
else
else
{
throw
ngraph_error
(
"Unsupported ranks in CPU Builder for ScatterAdd"
);
}
}
else
{
if
(
inputs_shape
.
size
()
<=
3
&&
updates_shape
.
size
()
<=
3
)
{
{
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
scatter_add_i32
<
float
,
2
,
2
>
)
>
std
::
function
<
decltype
(
runtime
::
cpu
::
kernel
::
scatter_add_i32
<
float
,
2
,
2
>
)
>
kernel
;
kernel
;
...
@@ -122,6 +133,11 @@ namespace ngraph
...
@@ -122,6 +133,11 @@ namespace ngraph
};
};
functors
.
emplace_back
(
functor
);
functors
.
emplace_back
(
functor
);
}
}
else
{
throw
ngraph_error
(
"Unsupported ranks in CPU Builder for ScatterAdd"
);
}
}
}
}
REGISTER_OP_BUILDER
(
ScatterAdd
);
REGISTER_OP_BUILDER
(
ScatterAdd
);
}
}
...
...
src/ngraph/runtime/cpu/cpu_builder.hpp
View file @
e5757725
...
@@ -211,14 +211,6 @@
...
@@ -211,14 +211,6 @@
KV = K<ET, 2, R2>; \
KV = K<ET, 2, R2>; \
else if (R1 == 3) \
else if (R1 == 3) \
KV = K<ET, 3, R2>; \
KV = K<ET, 3, R2>; \
else if (R1 == 4) \
KV = K<ET, 4, R2>; \
else if (R1 == 5) \
KV = K<ET, 5, R2>; \
else if (R1 == 6) \
KV = K<ET, 6, R2>; \
else if (R1 == 7) \
KV = K<ET, 7, R2>; \
else \
else \
throw ngraph_error("Unsupported first rank " + std::to_string(R1) + " for kernel " #K);
throw ngraph_error("Unsupported first rank " + std::to_string(R1) + " for kernel " #K);
...
@@ -235,22 +227,6 @@
...
@@ -235,22 +227,6 @@
{ \
{ \
SELECT_RANK1(KV, ET, R1, 3, K); \
SELECT_RANK1(KV, ET, R1, 3, K); \
} \
} \
else if (R2 == 4) \
{ \
SELECT_RANK1(KV, ET, R1, 4, K); \
} \
else if (R2 == 5) \
{ \
SELECT_RANK1(KV, ET, R1, 5, K); \
} \
else if (R2 == 6) \
{ \
SELECT_RANK1(KV, ET, R1, 6, K); \
} \
else if (R2 == 7) \
{ \
SELECT_RANK1(KV, ET, R1, 7, K); \
} \
else \
else \
{ \
{ \
throw ngraph_error("Unsupported second rank " + std::to_string(R2) + " for kernel " #K); \
throw ngraph_error("Unsupported second rank " + std::to_string(R2) + " for kernel " #K); \
...
@@ -270,6 +246,10 @@
...
@@ -270,6 +246,10 @@
{ \
{ \
SELECT_2RANKS(KV, uint8_t, R1, R2, K); \
SELECT_2RANKS(KV, uint8_t, R1, R2, K); \
} \
} \
else if (ET == element::i8) \
{ \
SELECT_2RANKS(KV, int8_t, R1, R2, K); \
} \
else \
else \
{ \
{ \
throw ngraph_error("Unsupported element type " + ET.c_type_string() + " for kernel " #K); \
throw ngraph_error("Unsupported element type " + ET.c_type_string() + " for kernel " #K); \
...
...
src/ngraph/runtime/cpu/cpu_emitter.cpp
View file @
e5757725
...
@@ -271,10 +271,13 @@ namespace ngraph
...
@@ -271,10 +271,13 @@ namespace ngraph
template
<>
template
<>
void
CPU_Emitter
::
EMITTER_DECL
(
ngraph
::
op
::
AllReduce
)
void
CPU_Emitter
::
EMITTER_DECL
(
ngraph
::
op
::
AllReduce
)
{
{
const
ngraph
::
op
::
AllReduce
*
allreduce
=
static_cast
<
const
ngraph
::
op
::
AllReduce
*>
(
node
);
writer
<<
"ngraph::get_distributed_interface()->all_reduce("
<<
args
[
0
].
get_name
()
writer
<<
"ngraph::get_distributed_interface()->all_reduce("
<<
args
[
0
].
get_name
()
<<
", "
<<
out
[
0
].
get_name
()
<<
", "
<<
", "
<<
out
[
0
].
get_name
()
<<
", "
<<
"ngraph::element::Type_t::"
<<
args
[
0
].
get_element_type
().
get_type_name
()
<<
"ngraph::element::Type_t::"
<<
args
[
0
].
get_element_type
().
get_type_name
()
<<
", "
<<
out
[
0
].
get_size
()
<<
");
\n
"
;
<<
", "
<<
out
[
0
].
get_size
()
<<
", "
<<
"ngraph::Reduce_t::"
<<
allreduce
->
get_reduce_type
()
<<
");
\n
"
;
}
}
template
<>
template
<>
...
@@ -1842,8 +1845,9 @@ namespace ngraph
...
@@ -1842,8 +1845,9 @@ namespace ngraph
writer
.
block_begin
();
writer
.
block_begin
();
if
((
args
[
0
].
get_element_type
()
==
element
::
f64
||
if
((
args
[
0
].
get_element_type
()
==
element
::
f64
||
args
[
0
].
get_element_type
()
==
element
::
f32
||
args
[
0
].
get_element_type
()
==
element
::
f32
||
args
[
0
].
get_element_type
()
==
element
::
u8
)
&&
args
[
0
].
get_element_type
()
==
element
::
u8
||
gather
->
get_axis
()
==
0
)
args
[
0
].
get_element_type
()
==
element
::
i8
)
&&
args
[
0
].
get_shape
().
size
()
<=
3
&&
out
[
0
].
get_shape
().
size
()
<=
3
)
{
{
writer
<<
"cpu::kernel::gather<"
<<
args
[
0
].
get_type
()
<<
", "
writer
<<
"cpu::kernel::gather<"
<<
args
[
0
].
get_type
()
<<
", "
<<
args
[
1
].
get_element_type
().
c_type_string
()
<<
", "
<<
args
[
1
].
get_element_type
().
c_type_string
()
<<
", "
...
@@ -1903,8 +1907,11 @@ namespace ngraph
...
@@ -1903,8 +1907,11 @@ namespace ngraph
}
}
writer
.
block_begin
();
writer
.
block_begin
();
if
(
args
[
0
].
get_element_type
()
==
element
::
f64
||
if
((
args
[
0
].
get_element_type
()
==
element
::
f64
||
args
[
0
].
get_element_type
()
==
element
::
f32
)
args
[
0
].
get_element_type
()
==
element
::
f32
||
args
[
0
].
get_element_type
()
==
element
::
u8
||
args
[
0
].
get_element_type
()
==
element
::
i8
)
&&
args
[
0
].
get_shape
().
size
()
<=
3
&&
args
[
2
].
get_shape
().
size
()
<=
3
)
{
{
writer
<<
"cpu::kernel::scatter_add<"
<<
args
[
0
].
get_type
()
<<
", "
writer
<<
"cpu::kernel::scatter_add<"
<<
args
[
0
].
get_type
()
<<
", "
<<
args
[
1
].
get_element_type
().
c_type_string
()
<<
", "
<<
args
[
1
].
get_element_type
().
c_type_string
()
<<
", "
...
...
src/ngraph/runtime/cpu/kernel/gather.hpp
View file @
e5757725
...
@@ -31,7 +31,7 @@ namespace ngraph
...
@@ -31,7 +31,7 @@ namespace ngraph
{
{
namespace
kernel
namespace
kernel
{
{
// Calculate the indices f
rom position
0 to rank-1.
// Calculate the indices f
or positions
0 to rank-1.
static
void
static
void
get_indices
(
const
Shape
&
shape
,
int
index
,
std
::
vector
<
int
>&
indices
,
int
rank
)
get_indices
(
const
Shape
&
shape
,
int
index
,
std
::
vector
<
int
>&
indices
,
int
rank
)
{
{
...
@@ -93,8 +93,11 @@ namespace ngraph
...
@@ -93,8 +93,11 @@ namespace ngraph
if
(
indices_rank
==
0
)
if
(
indices_rank
==
0
)
{
{
//TODO Enable this if compiler issue with CODEGEN is fixed or DEX needs it.
#if 0
#ifdef _OPENMP
#ifdef _OPENMP
#pragma omp parallel for
#pragma omp parallel for
#endif
#endif
#endif
for
(
int
i
=
0
;
i
<
outer_loop_num
;
i
++
)
for
(
int
i
=
0
;
i
<
outer_loop_num
;
i
++
)
{
{
...
@@ -142,7 +145,11 @@ namespace ngraph
...
@@ -142,7 +145,11 @@ namespace ngraph
}
}
else
else
{
{
auto
num_indices
=
shape_size
(
indices_shape
);
size_t
num_indices
=
1
;
for
(
auto
d
:
indices_shape
)
{
num_indices
*=
d
;
}
#ifdef _OPENMP
#ifdef _OPENMP
#pragma omp parallel for
#pragma omp parallel for
...
...
src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
View file @
e5757725
...
@@ -2238,6 +2238,32 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
...
@@ -2238,6 +2238,32 @@ void ngraph::runtime::cpu::pass::CPUQuantFusion::construct_qconvb_add()
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Add
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Add
>
(
m
.
get_match_root
()
->
get_argument
(
0
));
auto
dq_l_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Dequantize
>
(
pattern_map
[
dq_l_label
]);
auto
dq_l_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Dequantize
>
(
pattern_map
[
dq_l_label
]);
auto
dq_r_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Dequantize
>
(
pattern_map
[
dq_r_label
]);
auto
dq_r_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Dequantize
>
(
pattern_map
[
dq_r_label
]);
// both left and right are QuantizedConvolutionBias
if
(
dq_r_m
->
get_argument
(
0
)
->
description
()
==
"QuantizedConvolutionBias"
)
{
for
(
auto
user
:
m
.
get_match_root
()
->
get_users
())
{
auto
q_m
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Quantize
>
(
user
);
if
(
q_m
)
{
auto
q_m_scale
=
q_m
->
get_argument
(
1
);
auto
dq_l_m_scale
=
dq_l_m
->
get_argument
(
1
);
auto
dq_r_m_scale
=
dq_r_m
->
get_argument
(
1
);
if
(
!
ngraph
::
compare_constants
(
q_m_scale
,
dq_l_m_scale
)
&&
ngraph
::
compare_constants
(
q_m_scale
,
dq_r_m_scale
))
{
NGRAPH_DEBUG
<<
"Scales of Q and DQ of right branch match"
;
// switch left and right branch
auto
temp
=
dq_l_m
;
dq_l_m
=
dq_r_m
;
dq_r_m
=
temp
;
}
break
;
}
}
}
auto
qconv
=
auto
qconv
=
std
::
static_pointer_cast
<
ngraph
::
op
::
QuantizedConvolutionBias
>
(
dq_l_m
->
get_argument
(
0
));
std
::
static_pointer_cast
<
ngraph
::
op
::
QuantizedConvolutionBias
>
(
dq_l_m
->
get_argument
(
0
));
auto
inplace_input
=
dq_r_m
->
get_argument
(
0
);
auto
inplace_input
=
dq_r_m
->
get_argument
(
0
);
...
...
src/ngraph/runtime/interpreter/int_executable.hpp
View file @
e5757725
...
@@ -24,6 +24,7 @@
...
@@ -24,6 +24,7 @@
#include <vector>
#include <vector>
#include "ngraph/op/all.hpp"
#include "ngraph/op/all.hpp"
#include "ngraph/op/allreduce.hpp"
#include "ngraph/op/any.hpp"
#include "ngraph/op/any.hpp"
#include "ngraph/op/argmax.hpp"
#include "ngraph/op/argmax.hpp"
#include "ngraph/op/argmin.hpp"
#include "ngraph/op/argmin.hpp"
...
@@ -255,9 +256,12 @@ private:
...
@@ -255,9 +256,12 @@ private:
}
}
case
OP_TYPEID
:
:
AllReduce
:
case
OP_TYPEID
:
:
AllReduce
:
{
{
const
ngraph
::
op
::
AllReduce
*
allreduce
=
static_cast
<
const
ngraph
::
op
::
AllReduce
*>
(
&
node
);
reference
::
allreduce
<
T
>
(
args
[
0
]
->
get_data_ptr
<
T
>
(),
reference
::
allreduce
<
T
>
(
args
[
0
]
->
get_data_ptr
<
T
>
(),
out
[
0
]
->
get_data_ptr
<
T
>
(),
out
[
0
]
->
get_data_ptr
<
T
>
(),
node
.
get_input_element_type
(
0
).
get_type_enum
(),
node
.
get_input_element_type
(
0
).
get_type_enum
(),
allreduce
->
get_reduce_type
(),
static_cast
<
int
>
(
shape_size
(
node
.
get_input_shape
(
0
))));
static_cast
<
int
>
(
shape_size
(
node
.
get_input_shape
(
0
))));
break
;
break
;
}
}
...
...
src/ngraph/runtime/plaidml/plaidml_impl.hpp
View file @
e5757725
...
@@ -76,11 +76,11 @@ namespace ngraph
...
@@ -76,11 +76,11 @@ namespace ngraph
// input count.
// input count.
void
check_inputs
(
std
::
size_t
expected_input_count
)
const
void
check_inputs
(
std
::
size_t
expected_input_count
)
const
{
{
if
(
op
().
get_input_size
()
!=
expected_input_count
)
if
(
op
().
get_input_size
()
<
expected_input_count
)
{
{
std
::
ostringstream
os
;
std
::
ostringstream
os
;
os
<<
"The PlaidML nGraph backend only supports "
<<
op
().
description
()
os
<<
"The PlaidML nGraph backend only supports "
<<
op
().
description
()
<<
" operations with an input count
=
= "
<<
expected_input_count
<<
" operations with an input count
>
= "
<<
expected_input_count
<<
" (got "
<<
op
().
get_input_size
()
<<
" inputs)"
;
<<
" (got "
<<
op
().
get_input_size
()
<<
" inputs)"
;
throw
std
::
runtime_error
{
os
.
str
()};
throw
std
::
runtime_error
{
os
.
str
()};
}
}
...
...
src/ngraph/runtime/plaidml/unit_test.manifest
View file @
e5757725
...
@@ -261,92 +261,25 @@ batch_mat_mul_forward
...
@@ -261,92 +261,25 @@ batch_mat_mul_forward
dot_matrix_2x0_0x2
dot_matrix_2x0_0x2
# dgkutnic ww24.5: these tests are to be triaged by the PlaidML team
# dgkutnic ww24.5: these tests are to be triaged by the PlaidML team
# ww25.2: re-scrubbed this list of tests after fixing check_inputs
convolution_3d_1item_large_5o3i_padded_uneven_filter_uneven_data_dilation_data_dilated
# initial debug points to some of these failing due to precision issues
select
sqrt
product_trivial
batch_norm_inference_0eps_f32
product_trivial_5d
batch_norm_inference_f32
product_to_scalar
batch_norm_training_0eps_f32
product_matrix_columns
argmin_trivial
product_matrix_rows
argmax_trivial
product_3d_to_matrix_most_sig
argmin_trivial_in_i32
product_3d_to_matrix_least_sig
product_3d_to_vector
product_3d_to_scalar
product_2d_to_scalar_int32
product_to_scalar_int32
product_to_scalar_int8
max_trivial
max_trivial_5d
max_to_scalar
max_to_scalar_int8
max_matrix_columns
max_matrix_rows
max_matrix_rows_int32
max_3d_to_matrix_most_sig
max_3d_to_matrix_least_sig
max_3d_to_vector
max_3d_to_scalar
max_3d_to_scalar_int32
min_trivial
min_trivial_5d
min_trivial_5d_int32
min_to_scalar
min_to_scalar_int8
min_matrix_columns
min_matrix_rows
min_matrix_rows_int32
min_3d_to_matrix_most_sig
min_3d_to_matrix_least_sig
min_3d_to_vector
min_3d_to_scalar
min_3d_to_scalar_int32
sum_to_scalar
sum_large_1d_to_scalar
sum_large_1d_to_scalar
sum_matrix_columns
sum_matrix_6d
sum_matrix_rows
sum_3d_to_matrix_most_sig
sum_3d_to_matrix_least_sig
sum_3d_to_vector
sum_3d_to_scalar
sum_3d_to_scalar_int32
sum_5d_to_scalar
sum_5d_to_scalar_int32
sum_2d_to_scalar_int8
sum_stable_acc
sum_stable_acc
sum_stable_simple_float
one_hot_scalar_2_in_3
one_hot_scalar_1_in_3
one_hot_scalar_0_in_3
lstm_cell_no_bias_no_peepholes
lstm_cell_bias_peepholes
lstm_cell_bias_peepholes_clip_input_forget
lstm_cell_activaction_functions
group_conv_transpose
group_conv_transpose_output_shape
divide_python_rounding_int32
divide_python_rounding_int32
any_2x2_to_scalar_true
any_2x2_to_scalar_false
any_2x3_eliminate_col_dim
any_2x3_eliminate_row_dim
any_2x2x3_eliminate_dim_1
any_2x2x3_eliminate_dim_2
any_2x2x3_eliminate_dims_0_1
any_2x2x3_eliminate_dims_0_2
any_2x2x3_eliminate_dims_1_2
any_2x2x3_eliminate_dims_0_1_2
all_trivial
all_2x2_to_scalar_false
all_2x2_to_scalar_true
all_2x3_eliminate_col_dim
all_2x3_eliminate_row_dim
all_2x2x3_eliminate_dim_0
all_2x2x3_eliminate_dim_1
all_2x2x3_eliminate_dim_2
all_2x2x3_eliminate_dims_0_1
all_2x2x3_eliminate_dims_0_2
all_2x2x3_eliminate_dims_1_2
all_2x2x3_eliminate_dims_0_1_2
all_dynamic_axis
all_change_axis
backwards_broadcast0
backwards_broadcast1
backwards_select
backwards_select_nested
backwards_sum_v2s
backwards_sum_m2s
backwards_sum_m2v_0
backwards_sum_m2v_1
backwards_batchmatmul_tensor2_tensor2
backwards_batchmatmul_tensor2_tensor2
src/ngraph/runtime/reference/allreduce.hpp
View file @
e5757725
...
@@ -25,9 +25,13 @@ namespace ngraph
...
@@ -25,9 +25,13 @@ namespace ngraph
namespace
reference
namespace
reference
{
{
template
<
typename
T
>
template
<
typename
T
>
void
allreduce
(
T
*
arg
,
T
*
out
,
const
element
::
Type_t
element_type
,
int
count
)
void
allreduce
(
T
*
arg
,
T
*
out
,
const
element
::
Type_t
element_type
,
const
reduction
::
Type
reduce_type
,
int
count
)
{
{
get_distributed_interface
()
->
all_reduce
(
arg
,
out
,
element_type
,
count
);
get_distributed_interface
()
->
all_reduce
(
arg
,
out
,
element_type
,
reduce_type
,
count
);
}
}
}
}
}
}
...
...
src/ngraph/serializer.cpp
View file @
e5757725
This diff is collapsed.
Click to expand it.
src/ngraph/serializer.hpp
View file @
e5757725
...
@@ -62,42 +62,3 @@ namespace ngraph
...
@@ -62,42 +62,3 @@ namespace ngraph
/// Option may be enabled by setting the environment variable NGRAPH_SERIALIZER_OUTPUT_SHAPES
/// Option may be enabled by setting the environment variable NGRAPH_SERIALIZER_OUTPUT_SHAPES
void
set_serialize_output_shapes
(
bool
enable
);
void
set_serialize_output_shapes
(
bool
enable
);
}
}
#ifdef NGRAPH_JSON_DISABLE
// Rather than making every reference to the serializer conditionally compile here we just
// provide some null stubs to resolve link issues
// The `inline` is so we don't get multiple definitions of function
std
::
string
inline
ngraph
::
serialize
(
std
::
shared_ptr
<
ngraph
::
Function
>
func
,
size_t
indent
)
{
return
""
;
}
void
inline
ngraph
::
serialize
(
const
std
::
string
&
path
,
std
::
shared_ptr
<
ngraph
::
Function
>
func
,
size_t
indent
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
void
inline
ngraph
::
serialize
(
std
::
ostream
&
out
,
std
::
shared_ptr
<
ngraph
::
Function
>
func
,
size_t
indent
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
std
::
shared_ptr
<
ngraph
::
Function
>
inline
ngraph
::
deserialize
(
std
::
istream
&
in
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
std
::
shared_ptr
<
ngraph
::
Function
>
inline
ngraph
::
deserialize
(
const
std
::
string
&
str
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
void
inline
ngraph
::
set_serialize_output_shapes
(
bool
enable
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
#endif
src/ngraph/serializer_stub.cpp
0 → 100644
View file @
e5757725
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#include "ngraph/serializer.hpp"
std
::
string
ngraph
::
serialize
(
std
::
shared_ptr
<
ngraph
::
Function
>
func
,
size_t
indent
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
void
ngraph
::
serialize
(
const
std
::
string
&
path
,
std
::
shared_ptr
<
ngraph
::
Function
>
func
,
size_t
indent
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
void
ngraph
::
serialize
(
std
::
ostream
&
out
,
std
::
shared_ptr
<
ngraph
::
Function
>
func
,
size_t
indent
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
std
::
shared_ptr
<
ngraph
::
Function
>
ngraph
::
deserialize
(
std
::
istream
&
in
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
std
::
shared_ptr
<
ngraph
::
Function
>
ngraph
::
deserialize
(
const
std
::
string
&
str
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
void
ngraph
::
set_serialize_output_shapes
(
bool
enable
)
{
throw
std
::
runtime_error
(
"serializer disabled in build"
);
}
test/backend_scatter.in.cpp
View file @
e5757725
...
@@ -35,6 +35,7 @@ using namespace ngraph;
...
@@ -35,6 +35,7 @@ using namespace ngraph;
static
string
s_manifest
=
"${MANIFEST}"
;
static
string
s_manifest
=
"${MANIFEST}"
;
#if 0
NGRAPH_TEST(${BACKEND_NAME}, scatter_add_4d_indices)
NGRAPH_TEST(${BACKEND_NAME}, scatter_add_4d_indices)
{
{
Shape ref_shape{3, 3, 3};
Shape ref_shape{3, 3, 3};
...
@@ -122,13 +123,14 @@ NGRAPH_TEST(${BACKEND_NAME}, scatter_add_3d_indices)
...
@@ -122,13 +123,14 @@ NGRAPH_TEST(${BACKEND_NAME}, scatter_add_3d_indices)
read_vector<float>(result),
read_vector<float>(result),
MIN_FLOAT_TOLERANCE_BITS));
MIN_FLOAT_TOLERANCE_BITS));
}
}
#endif
NGRAPH_TEST
(
$
{
BACKEND_NAME
},
scatter_add_2d_indices
)
NGRAPH_TEST
(
$
{
BACKEND_NAME
},
scatter_add_2d_indices
)
{
{
Shape
ref_shape
{
2
,
3
,
3
};
Shape
ref_shape
{
3
};
Shape
indices_shape
{
2
,
2
};
Shape
indices_shape
{
2
,
2
};
Shape
updates_shape
{
2
,
2
,
3
,
3
};
Shape
updates_shape
{
2
,
2
};
Shape
out_shape
{
2
,
3
,
3
};
Shape
out_shape
{
3
};
auto
R
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
ref_shape
);
auto
R
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
ref_shape
);
auto
I
=
make_shared
<
op
::
Parameter
>
(
element
::
i32
,
indices_shape
);
auto
I
=
make_shared
<
op
::
Parameter
>
(
element
::
i32
,
indices_shape
);
auto
U
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
updates_shape
);
auto
U
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
updates_shape
);
...
@@ -140,20 +142,17 @@ NGRAPH_TEST(${BACKEND_NAME}, scatter_add_2d_indices)
...
@@ -140,20 +142,17 @@ NGRAPH_TEST(${BACKEND_NAME}, scatter_add_2d_indices)
// Create some tensors for input/output
// Create some tensors for input/output
auto
r
=
backend
->
create_tensor
(
element
::
f32
,
ref_shape
);
auto
r
=
backend
->
create_tensor
(
element
::
f32
,
ref_shape
);
copy_data
(
r
,
vector
<
float
>
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
});
copy_data
(
r
,
vector
<
float
>
{
0
,
1
,
2
});
auto
i
=
backend
->
create_tensor
(
element
::
i32
,
indices_shape
);
auto
i
=
backend
->
create_tensor
(
element
::
i32
,
indices_shape
);
copy_data
(
i
,
vector
<
int32_t
>
{
0
,
1
,
1
,
0
});
copy_data
(
i
,
vector
<
int32_t
>
{
0
,
1
,
1
,
0
});
auto
u
=
backend
->
create_tensor
(
element
::
f32
,
updates_shape
);
auto
u
=
backend
->
create_tensor
(
element
::
f32
,
updates_shape
);
copy_data
(
u
,
vector
<
float
>
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
copy_data
(
u
,
vector
<
float
>
{
1
,
2
,
3
,
4
});
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
});
auto
result
=
backend
->
create_tensor
(
element
::
f32
,
out_shape
);
auto
result
=
backend
->
create_tensor
(
element
::
f32
,
out_shape
);
auto
c
=
backend
->
compile
(
f
);
auto
c
=
backend
->
compile
(
f
);
c
->
call_with_validate
({
result
},
{
r
,
i
,
u
});
c
->
call_with_validate
({
result
},
{
r
,
i
,
u
});
EXPECT_TRUE
(
test
::
all_close_f
(
EXPECT_TRUE
(
test
::
all_close_f
(
(
vector
<
float
>
{
0
,
3
,
6
,
9
,
12
,
15
,
18
,
21
,
24
,
3
,
6
,
9
,
12
,
15
,
18
,
21
,
24
,
27
}),
(
vector
<
float
>
{
5
,
6
,
2
}),
read_vector
<
float
>
(
result
),
MIN_FLOAT_TOLERANCE_BITS
));
read_vector
<
float
>
(
result
),
MIN_FLOAT_TOLERANCE_BITS
));
}
}
NGRAPH_TEST
(
$
{
BACKEND_NAME
},
scatter_add_1d_indices
)
NGRAPH_TEST
(
$
{
BACKEND_NAME
},
scatter_add_1d_indices
)
...
...
test/build_graph.cpp
View file @
e5757725
...
@@ -150,3 +150,19 @@ TEST(build_graph, no_arg_construction)
...
@@ -150,3 +150,19 @@ TEST(build_graph, no_arg_construction)
validate_nodes_and_infer_types
(
ops
);
validate_nodes_and_infer_types
(
ops
);
ASSERT_EQ
(
add1
->
get_output_shape
(
0
),
Shape
{
7
});
ASSERT_EQ
(
add1
->
get_output_shape
(
0
),
Shape
{
7
});
}
}
TEST
(
build_graph
,
multi_output_split
)
{
const
auto
data
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
64
,
8
,
100
,
150
});
auto
filters
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
128
,
2
,
10
,
20
});
const
auto
split
=
make_shared
<
op
::
Split
>
(
data
,
1
,
2
);
auto
conv
=
make_shared
<
op
::
GroupConvolution
>
(
split
->
output
(
1
),
filters
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
CoordinateDiff
{
0
,
0
},
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
},
2
);
EXPECT_EQ
(
conv
->
get_shape
(),
(
Shape
{
64
,
128
,
91
,
131
}));
}
test/cpu_fusion.cpp
View file @
e5757725
...
@@ -3691,6 +3691,120 @@ TEST(cpu_quant_fusion, qconvba)
...
@@ -3691,6 +3691,120 @@ TEST(cpu_quant_fusion, qconvba)
EXPECT_TRUE
(
test
::
all_close
(
cpu1_results
.
at
(
0
),
cpu2_results
.
at
(
0
)));
EXPECT_TRUE
(
test
::
all_close
(
cpu1_results
.
at
(
0
),
cpu2_results
.
at
(
0
)));
}
}
TEST
(
cpu_quant_fusion
,
qconvba_q
)
{
auto
make_function
=
[]()
{
Shape
shape_input
{
1
,
2
,
2
,
2
};
Shape
shape_weights
{
1
,
2
,
1
,
1
};
Shape
shape_summand
{
1
,
1
,
2
,
2
};
auto
input_l
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_input
);
auto
weights_l
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_weights
);
auto
bias_l
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
shape_weights
[
0
]});
auto
input_r
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_input
);
auto
weights_r
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_weights
);
auto
bias_r
=
std
::
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
shape_weights
[
0
]});
auto
input_scale_l
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
2.0
f
});
auto
weights_scale_l
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
2.0
f
});
auto
output_scale_l
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
4.0
f
});
auto
input_scale_r
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
5.0
f
});
auto
weights_scale_r
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
5.0
f
});
auto
output_scale_r
=
op
::
Constant
::
create
(
element
::
f32
,
Shape
{},
{
20.0
f
});
auto
int8_zero
=
op
::
Constant
::
create
(
element
::
i8
,
Shape
{},
{
0
});
auto
int32_zero
=
op
::
Constant
::
create
(
element
::
i32
,
Shape
{},
{
0
});
auto
uint8_zero
=
op
::
Constant
::
create
(
element
::
u8
,
Shape
{},
{
0
});
op
::
Quantize
::
RoundMode
round_mode
=
op
::
Quantize
::
RoundMode
::
ROUND_NEAREST_TOWARD_EVEN
;
auto
q_input_l
=
std
::
make_shared
<
op
::
Quantize
>
(
input_l
,
input_scale_l
,
uint8_zero
,
element
::
u8
,
AxisSet
{},
round_mode
);
auto
q_weights_l
=
std
::
make_shared
<
op
::
Quantize
>
(
weights_l
,
weights_scale_l
,
int8_zero
,
element
::
i8
,
AxisSet
{},
round_mode
);
auto
q_bias_l
=
std
::
make_shared
<
op
::
Quantize
>
(
bias_l
,
input_scale_l
*
weights_scale_l
,
int32_zero
,
element
::
i32
,
AxisSet
{},
round_mode
);
auto
q_input_r
=
std
::
make_shared
<
op
::
Quantize
>
(
input_r
,
input_scale_r
,
uint8_zero
,
element
::
u8
,
AxisSet
{},
round_mode
);
auto
q_weights_r
=
std
::
make_shared
<
op
::
Quantize
>
(
weights_r
,
weights_scale_r
,
int8_zero
,
element
::
i8
,
AxisSet
{},
round_mode
);
auto
q_bias_r
=
std
::
make_shared
<
op
::
Quantize
>
(
bias_r
,
input_scale_r
*
weights_scale_r
,
int32_zero
,
element
::
i32
,
AxisSet
{},
round_mode
);
// Left Graph
auto
requant_scale_l
=
(
input_scale_l
*
weights_scale_l
)
/
output_scale_l
;
auto
conv_l
=
std
::
make_shared
<
op
::
QuantizedConvolutionBias
>
(
q_input_l
,
q_weights_l
,
q_bias_l
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
CoordinateDiff
{
0
,
0
},
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
},
requant_scale_l
);
auto
dq_l
=
std
::
make_shared
<
op
::
Dequantize
>
(
conv_l
,
output_scale_l
,
int8_zero
,
element
::
f32
,
AxisSet
{});
auto
r_l
=
std
::
make_shared
<
op
::
Reshape
>
(
dq_l
,
AxisVector
{
0
,
1
,
2
,
3
},
Shape
{
1
,
2
,
2
});
auto
b_l
=
std
::
make_shared
<
op
::
Broadcast
>
(
r_l
,
Shape
{
1
,
1
,
2
,
2
},
AxisSet
{
0
});
// Right Graph
auto
requant_scale_r
=
(
input_scale_r
*
weights_scale_r
)
/
output_scale_r
;
auto
conv_r
=
std
::
make_shared
<
op
::
QuantizedConvolutionBias
>
(
q_input_r
,
q_weights_r
,
q_bias_r
,
Strides
{
1
,
1
},
Strides
{
1
,
1
},
CoordinateDiff
{
0
,
0
},
CoordinateDiff
{
0
,
0
},
Strides
{
1
,
1
},
requant_scale_r
);
auto
dq_r
=
std
::
make_shared
<
op
::
Dequantize
>
(
conv_r
,
output_scale_r
,
int8_zero
,
element
::
f32
,
AxisSet
{});
auto
r_r
=
std
::
make_shared
<
op
::
Reshape
>
(
dq_r
,
AxisVector
{
0
,
1
,
2
,
3
},
Shape
{
1
,
2
,
2
});
auto
b_r
=
std
::
make_shared
<
op
::
Broadcast
>
(
r_r
,
Shape
{
1
,
1
,
2
,
2
},
AxisSet
{
0
});
auto
add
=
b_l
+
b_r
;
auto
relu
=
std
::
make_shared
<
op
::
Relu
>
(
add
);
auto
q
=
std
::
make_shared
<
op
::
Quantize
>
(
relu
,
output_scale_r
,
uint8_zero
,
element
::
u8
,
AxisSet
{},
round_mode
);
auto
dq
=
std
::
make_shared
<
op
::
Dequantize
>
(
q
,
output_scale_r
,
uint8_zero
,
element
::
f32
,
AxisSet
{});
return
make_shared
<
Function
>
(
NodeVector
{
dq
},
ParameterVector
{
input_l
,
weights_l
,
bias_l
,
input_r
,
weights_r
,
bias_r
});
};
auto
cpu_f1
=
make_function
();
auto
cpu_f2
=
make_function
();
test
::
Uniform
<
float
>
rng
(
2.0
f
,
2.0
f
);
vector
<
vector
<
float
>>
args
;
for
(
shared_ptr
<
op
::
Parameter
>
param
:
cpu_f1
->
get_parameters
())
{
vector
<
float
>
tensor_val
(
shape_size
(
param
->
get_shape
()));
rng
.
initialize
(
tensor_val
);
args
.
push_back
(
tensor_val
);
}
// Disable CPUQuantFusion
set_environment
(
"NGRAPH_PASS_ENABLES"
,
"CPUQuantFusion:0"
,
1
);
auto
cpu1_results
=
execute
(
cpu_f1
,
args
,
"CPU"
);
// Enable CPUQuantFusion
set_environment
(
"NGRAPH_PASS_ENABLES"
,
"CPUQuantFusion:1"
,
1
);
auto
cpu2_results
=
execute
(
cpu_f2
,
args
,
"CPU"
);
EXPECT_TRUE
(
test
::
all_close
(
cpu1_results
.
at
(
0
),
cpu2_results
.
at
(
0
)));
auto
backend
=
runtime
::
Backend
::
create
(
"CPU"
);
auto
fuse
=
make_function
();
backend
->
compile
(
fuse
);
ASSERT_EQ
(
count_ops_of_type
<
op
::
Quantize
>
(
fuse
),
6
);
}
#ifndef NGRAPH_JSON_DISABLE
#ifndef NGRAPH_JSON_DISABLE
// Tests that rely on deserializing json files
// Tests that rely on deserializing json files
TEST
(
cpu_fusion
,
fuse_conv_bias
)
TEST
(
cpu_fusion
,
fuse_conv_bias
)
...
...
test/distributed.in.cpp
View file @
e5757725
...
@@ -29,25 +29,61 @@
...
@@ -29,25 +29,61 @@
using
namespace
std
;
using
namespace
std
;
using
namespace
ngraph
;
using
namespace
ngraph
;
TEST
(
distributed_
$
{
BACKEND_NAME
},
allreduc
e
)
static
void
test_allreduce_common
(
reduction
::
Type
reduce_typ
e
)
{
{
auto
comm_size
=
get_distributed_interface
()
->
get_size
();
auto
comm_size
=
get_distributed_interface
()
->
get_size
();
if
(
comm_size
>
1
)
if
(
comm_size
>
1
)
{
{
auto
shape
=
Shape
{
2
,
2
};
auto
shape
=
Shape
{
2
,
2
};
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape
);
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape
);
auto
f
=
make_shared
<
Function
>
(
make_shared
<
op
::
AllReduce
>
(
A
),
ParameterVector
{
A
});
auto
f
=
make_shared
<
Function
>
(
make_shared
<
op
::
AllReduce
>
(
A
,
reduce_type
),
ParameterVector
{
A
});
auto
backend
=
runtime
::
Backend
::
create
(
"${BACKEND_NAME}"
);
auto
backend
=
runtime
::
Backend
::
create
(
"${BACKEND_NAME}"
);
auto
v
=
vector
<
float
>
{
1
,
2
,
3
,
4
};
auto
v
=
vector
<
float
>
{
1
,
2
,
3
,
4
};
auto
a
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
auto
a
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
copy_data
(
a
,
vector
<
float
>
{
1
,
2
,
3
,
4
});
auto
result
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
auto
result
=
backend
->
create_tensor
(
element
::
f32
,
shape
);
#if !(defined(__GNUC__) && (__GNUC__ == 4 && __GNUC_MINOR__ == 8))
#pragma GCC diagnostic push
#pragma GCC diagnostic error "-Wswitch"
#pragma GCC diagnostic error "-Wswitch-enum"
#endif
switch
(
reduce_type
.
get_type
())
{
case
reduction
:
:
Type_t
::
sum
:
copy_data
(
a
,
v
);
std
::
transform
(
std
::
transform
(
v
.
begin
(),
v
.
end
(),
v
.
begin
(),
std
::
bind1st
(
std
::
multiplies
<
float
>
(),
comm_size
));
v
.
begin
(),
v
.
end
(),
v
.
begin
(),
std
::
bind1st
(
std
::
multiplies
<
float
>
(),
comm_size
));
break
;
case
reduction
:
:
Type_t
::
prod
:
copy_data
(
a
,
v
);
std
::
transform
(
v
.
begin
(),
v
.
end
(),
v
.
begin
(),
[
&
](
float
elm
)
->
float
{
return
pow
(
elm
,
comm_size
);
});
break
;
case
reduction
:
:
Type_t
::
min
:
case
reduction
:
:
Type_t
::
max
:
auto
shift
=
get_distributed_interface
()
->
get_rank
();
std
::
rotate
(
v
.
begin
(),
v
.
begin
()
+
shift
%
v
.
size
(),
v
.
end
());
copy_data
(
a
,
v
);
if
(
reduce_type
==
reduction
::
Type_t
::
min
)
{
std
::
fill
(
v
.
begin
(),
v
.
end
(),
1
);
for
(
int
i
=
1
;
i
<
static_cast
<
int
>
(
v
.
size
())
-
comm_size
+
1
;
i
++
)
v
[
i
]
=
i
+
1
;
}
else
{
std
::
fill
(
v
.
begin
(),
v
.
end
(),
v
.
size
());
for
(
int
i
=
0
;
i
<
static_cast
<
int
>
(
v
.
size
())
-
comm_size
;
i
++
)
v
[
i
]
=
i
+
2
;
}
}
#if !(defined(__GNUC__) && __GNUC__ == 4 && __GNUC_MINOR__ == 8)
#pragma GCC diagnostic pop
#endif
auto
handle
=
backend
->
compile
(
f
);
auto
handle
=
backend
->
compile
(
f
);
handle
->
call_with_validate
({
result
},
{
a
});
handle
->
call_with_validate
({
result
},
{
a
});
...
@@ -55,6 +91,28 @@ TEST(distributed_${BACKEND_NAME}, allreduce)
...
@@ -55,6 +91,28 @@ TEST(distributed_${BACKEND_NAME}, allreduce)
}
}
}
}
TEST
(
distributed_
$
{
BACKEND_NAME
},
allreduce_sum
)
{
test_allreduce_common
(
reduction
::
sum
);
}
TEST
(
distributed_
$
{
BACKEND_NAME
},
allreduce_min
)
{
test_allreduce_common
(
reduction
::
min
);
}
TEST
(
distributed_
$
{
BACKEND_NAME
},
allreduce_max
)
{
test_allreduce_common
(
reduction
::
max
);
}
#if !defined(NGRAPH_DISTRIBUTED_MLSL_ENABLE)
TEST
(
distributed_
$
{
BACKEND_NAME
},
allreduce_prod
)
{
test_allreduce_common
(
reduction
::
prod
);
}
#endif
TEST
(
distributed_
$
{
BACKEND_NAME
},
broadcastdistributed
)
TEST
(
distributed_
$
{
BACKEND_NAME
},
broadcastdistributed
)
{
{
auto
shape
=
Shape
{
2
,
2
};
auto
shape
=
Shape
{
2
,
2
};
...
...
test/serialize.cpp
View file @
e5757725
...
@@ -324,3 +324,19 @@ TEST(serialize, constant_infinity_nan)
...
@@ -324,3 +324,19 @@ TEST(serialize, constant_infinity_nan)
EXPECT_NE
(
str
.
find
(
R"(label="C)"
),
string
::
npos
);
EXPECT_NE
(
str
.
find
(
R"(label="C)"
),
string
::
npos
);
EXPECT_NE
(
str
.
find
(
R"(label="D)"
),
string
::
npos
);
EXPECT_NE
(
str
.
find
(
R"(label="D)"
),
string
::
npos
);
}
}
TEST
(
serialize
,
non_zero_node_output
)
{
auto
arg
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
Shape
{
10
});
auto
topk
=
make_shared
<
op
::
TopK
>
(
arg
,
0
,
element
::
i32
,
5
,
true
);
auto
abs
=
make_shared
<
op
::
Abs
>
(
Output
<
Node
>
(
topk
,
1
));
auto
result
=
make_shared
<
op
::
Result
>
(
abs
);
auto
f
=
make_shared
<
Function
>
(
ResultVector
{
result
},
ParameterVector
{
arg
});
string
s
=
serialize
(
f
);
shared_ptr
<
Function
>
g
=
deserialize
(
s
);
auto
g_result
=
g
->
get_results
().
at
(
0
);
auto
g_abs
=
g_result
->
input
(
0
).
get_source_output
().
get_node_shared_ptr
();
auto
topk_out
=
g_abs
->
input
(
0
).
get_source_output
();
EXPECT_EQ
(
topk_out
.
get_index
(),
1
);
EXPECT_EQ
(
topk_out
.
get_node
()
->
description
(),
"TopK"
);
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment