Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
6c676d2d
Commit
6c676d2d
authored
Feb 08, 2018
by
Jaikrishnan Menon
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
CPU: Merge fixes
parent
2659d5be
Hide whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
374 additions
and
375 deletions
+374
-375
cpu_external_function.cpp
src/ngraph/runtime/cpu/cpu_external_function.cpp
+2
-9
cpu_fusion.cpp
src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
+166
-166
cpu_fusion.hpp
src/ngraph/runtime/cpu/pass/cpu_fusion.hpp
+44
-38
cpu_fusion.cpp
test/cpu_fusion.cpp
+162
-162
No files found.
src/ngraph/runtime/cpu/cpu_external_function.cpp
View file @
6c676d2d
...
@@ -226,20 +226,13 @@ void runtime::cpu::CPU_ExternalFunction::compile()
...
@@ -226,20 +226,13 @@ void runtime::cpu::CPU_ExternalFunction::compile()
string
function_name
=
m_function
->
get_name
();
string
function_name
=
m_function
->
get_name
();
<<<<<<<
HEAD
ngraph
::
pass
::
Manager
pass_manager
;
ngraph
::
pass
::
Manager
pass_manager
;
pass_manager
.
register_pass
<
runtime
::
cpu
::
pass
::
CPUFusion
>
();
pass_manager
.
register_pass
<
runtime
::
cpu
::
pass
::
CPULayout
>
();
pass_manager
.
register_pass
<
runtime
::
cpu
::
pass
::
CPULayout
>
();
pass_manager
.
register_pass
<
ngraph
::
pass
::
Liveness
>
();
pass_manager
.
register_pass
<
ngraph
::
pass
::
Liveness
>
();
pass_manager
.
register_pass
<
ngraph
::
pass
::
MemoryLayout
>
(
MemoryPoolAlignment
);
pass_manager
.
register_pass
<
ngraph
::
pass
::
MemoryLayout
>
(
MemoryPoolAlignment
);
=======
pass
::
Manager
pass_manager
;
// For now, just make everyone row-major.
pass_manager
.
register_pass
<
pass
::
CPUFusion
>
();
pass_manager
.
register_pass
<
pass
::
AssignLayout
<
descriptor
::
layout
::
DenseTensorViewLayout
>>
();
pass_manager
.
register_pass
<
pass
::
Liveness
>
();
pass_manager
.
register_pass
<
pass
::
MemoryLayout
>
(
64
);
>>>>>>>
master
pass_manager
.
run_passes
(
m_function
);
pass_manager
.
run_passes
(
m_function
);
codegen
::
CodeWriter
writer
;
codegen
::
CodeWriter
writer
;
...
...
src/ngraph/runtime/cpu/pass/cpu_fusion.cpp
View file @
6c676d2d
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Copyright 2018 Nervana Systems Inc.
// Copyright 2018 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// You may obtain a copy of the License at
//
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
#include "cpu_fusion.hpp"
#include "cpu_fusion.hpp"
#include <algorithm>
#include <algorithm>
#include <iostream>
#include <iostream>
#include <unordered_set>
#include <unordered_set>
#include "ngraph/graph_util.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/log.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/add.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/broadcast.hpp"
#include "ngraph/ops/dot.hpp"
#include "ngraph/ops/dot.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/parameter.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/ops/reshape.hpp"
#include "ngraph/pattern/matcher.hpp"
#include "ngraph/pattern/matcher.hpp"
#include "ngraph/pattern/op/any.hpp"
#include "ngraph/pattern/op/any.hpp"
#include "ngraph/pattern/op/label.hpp"
#include "ngraph/pattern/op/label.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
static
bool
init_cblas_arg
(
std
::
shared_ptr
<
ngraph
::
Node
>
reshape
,
static
bool
init_cblas_arg
(
std
::
shared_ptr
<
ngraph
::
Node
>
reshape
,
std
::
shared_ptr
<
ngraph
::
Node
>
arg
,
std
::
shared_ptr
<
ngraph
::
Node
>
arg
,
bool
&
transpose_w
,
bool
&
transpose_w
,
ngraph
::
Shape
&
shape_w
)
ngraph
::
Shape
&
shape_w
)
{
{
auto
r_w
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Reshape
>
(
reshape
);
auto
r_w
=
std
::
dynamic_pointer_cast
<
ngraph
::
op
::
Reshape
>
(
reshape
);
if
(
!
r_w
)
if
(
!
r_w
)
{
{
return
true
;
//nth to do; reshape isn't a reshape
return
true
;
//nth to do; reshape isn't a reshape
}
}
if
(
r_w
->
get_shape
().
size
()
!=
2
)
if
(
r_w
->
get_shape
().
size
()
!=
2
)
{
{
NGRAPH_DEBUG
<<
"Reshape for "
<<
reshape
->
get_name
()
<<
" doesn't reshape into matrix"
NGRAPH_DEBUG
<<
"Reshape for "
<<
reshape
->
get_name
()
<<
" doesn't reshape into matrix"
<<
ngraph
::
vector_to_string
(
r_w
->
get_shape
());
<<
ngraph
::
vector_to_string
(
r_w
->
get_shape
());
return
false
;
return
false
;
}
}
auto
io
=
r_w
->
get_input_order
();
auto
io
=
r_w
->
get_input_order
();
if
(
r_w
->
get_shape
().
size
()
!=
arg
->
get_shape
().
size
())
//reshape
if
(
r_w
->
get_shape
().
size
()
!=
arg
->
get_shape
().
size
())
//reshape
{
{
ngraph
::
AxisVector
dio
(
io
.
size
());
ngraph
::
AxisVector
dio
(
io
.
size
());
std
::
iota
(
begin
(
dio
),
end
(
dio
),
0
);
std
::
iota
(
begin
(
dio
),
end
(
dio
),
0
);
if
(
io
!=
dio
)
//we can't reshape and transpose at the same time
if
(
io
!=
dio
)
//we can't reshape and transpose at the same time
{
{
NGRAPH_DEBUG
<<
"Reshape for "
<<
reshape
->
get_name
()
<<
" is not in default order "
NGRAPH_DEBUG
<<
"Reshape for "
<<
reshape
->
get_name
()
<<
" is not in default order "
<<
ngraph
::
vector_to_string
(
io
);
<<
ngraph
::
vector_to_string
(
io
);
NGRAPH_DEBUG
<<
"r_w shape = "
<<
ngraph
::
vector_to_string
(
r_w
->
get_shape
());
NGRAPH_DEBUG
<<
"r_w shape = "
<<
ngraph
::
vector_to_string
(
r_w
->
get_shape
());
NGRAPH_DEBUG
<<
"arg shape = "
<<
ngraph
::
vector_to_string
(
arg
->
get_shape
());
NGRAPH_DEBUG
<<
"arg shape = "
<<
ngraph
::
vector_to_string
(
arg
->
get_shape
());
return
false
;
return
false
;
}
}
shape_w
=
r_w
->
get_shape
();
shape_w
=
r_w
->
get_shape
();
}
}
else
else
{
{
if
(
io
==
ngraph
::
AxisVector
{
1
,
0
})
if
(
io
==
ngraph
::
AxisVector
{
1
,
0
})
{
{
transpose_w
=
true
;
transpose_w
=
true
;
}
}
//otherwise no-op reshape
//otherwise no-op reshape
}
}
return
true
;
return
true
;
}
}
template
<
typename
T
>
template
<
typename
T
>
static
std
::
vector
<
T
>
apply_permutation
(
std
::
vector
<
T
>
input
,
ngraph
::
AxisVector
order
)
static
std
::
vector
<
T
>
apply_permutation
(
std
::
vector
<
T
>
input
,
ngraph
::
AxisVector
order
)
{
{
if
(
input
.
size
()
!=
order
.
size
())
if
(
input
.
size
()
!=
order
.
size
())
{
{
throw
"input and order sizes don't match!"
;
throw
"input and order sizes don't match!"
;
}
}
std
::
vector
<
T
>
output
(
input
.
size
());
std
::
vector
<
T
>
output
(
input
.
size
());
for
(
size_t
i
=
0
;
i
<
order
.
size
();
i
++
)
for
(
size_t
i
=
0
;
i
<
order
.
size
();
i
++
)
{
{
output
[
i
]
=
input
.
at
(
order
.
at
(
i
));
output
[
i
]
=
input
.
at
(
order
.
at
(
i
));
}
}
return
output
;
return
output
;
}
}
void
ngraph
::
pass
::
CPUFusion
::
construct_gemm_pattern
()
void
ngraph
::
runtime
::
cpu
::
pass
::
CPUFusion
::
construct_gemm_pattern
()
{
{
auto
shape_w
=
Shape
{
2
,
4
};
auto
shape_w
=
Shape
{
2
,
4
};
auto
shape_x
=
Shape
{
4
,
1
};
auto
shape_x
=
Shape
{
4
,
1
};
auto
shape_b
=
Shape
{
1
};
auto
shape_b
=
Shape
{
1
};
auto
shape_dot
=
Shape
{
2
,
1
};
auto
shape_dot
=
Shape
{
2
,
1
};
auto
W
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_w
);
auto
W
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_w
);
auto
x
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_x
);
auto
x
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_x
);
auto
reshape_pred
=
[](
std
::
shared_ptr
<
Node
>
n
)
{
auto
reshape_pred
=
[](
std
::
shared_ptr
<
Node
>
n
)
{
return
static_cast
<
bool
>
(
std
::
dynamic_pointer_cast
<
op
::
Reshape
>
(
n
));
return
static_cast
<
bool
>
(
std
::
dynamic_pointer_cast
<
op
::
Reshape
>
(
n
));
};
};
auto
skip_w
=
std
::
make_shared
<
pattern
::
op
::
Any
>
(
W
,
reshape_pred
);
auto
skip_w
=
std
::
make_shared
<
pattern
::
op
::
Any
>
(
W
,
reshape_pred
);
auto
skip_x
=
std
::
make_shared
<
pattern
::
op
::
Any
>
(
x
,
reshape_pred
);
auto
skip_x
=
std
::
make_shared
<
pattern
::
op
::
Any
>
(
x
,
reshape_pred
);
auto
pdot
=
std
::
make_shared
<
op
::
Dot
>
(
skip_w
,
skip_x
);
auto
pdot
=
std
::
make_shared
<
op
::
Dot
>
(
skip_w
,
skip_x
);
auto
b
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_b
);
auto
b
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
element
::
f32
,
shape_b
);
auto
pbroadcast
=
std
::
make_shared
<
op
::
Broadcast
>
(
b
,
shape_dot
,
AxisSet
{
0
});
auto
pbroadcast
=
std
::
make_shared
<
op
::
Broadcast
>
(
b
,
shape_dot
,
AxisSet
{
0
});
auto
padd
=
pdot
+
pbroadcast
;
auto
padd
=
pdot
+
pbroadcast
;
ngraph
::
pattern
::
gr_callback_fn
callback
=
[
W
,
x
,
b
](
pattern
::
Matcher
&
m
)
{
ngraph
::
pattern
::
gr_callback_fn
callback
=
[
W
,
x
,
b
](
pattern
::
Matcher
&
m
)
{
NGRAPH_DEBUG
<<
"In callback for construct_gemm_pattern against node = "
NGRAPH_DEBUG
<<
"In callback for construct_gemm_pattern against node = "
<<
m
.
match_root
()
->
get_name
();
<<
m
.
match_root
()
->
get_name
();
auto
pattern_map
=
m
.
get_pattern_map
();
auto
pattern_map
=
m
.
get_pattern_map
();
std
::
shared_ptr
<
Node
>
nn
=
nullptr
;
std
::
shared_ptr
<
Node
>
nn
=
nullptr
;
auto
mpattern
=
m
.
match_root
();
auto
mpattern
=
m
.
match_root
();
if
(
mpattern
->
get_element_type
()
!=
element
::
f32
)
if
(
mpattern
->
get_element_type
()
!=
element
::
f32
)
{
{
NGRAPH_DEBUG
<<
"mpattern = "
<<
mpattern
->
get_name
()
<<
" type is not float!"
;
NGRAPH_DEBUG
<<
"mpattern = "
<<
mpattern
->
get_name
()
<<
" type is not float!"
;
return
nn
;
return
nn
;
}
}
auto
dot
=
mpattern
->
get_input_op
(
0
);
auto
dot
=
mpattern
->
get_input_op
(
0
);
if
(
dot
->
get_shape
().
size
()
!=
2
)
if
(
dot
->
get_shape
().
size
()
!=
2
)
{
{
NGRAPH_DEBUG
<<
"dot = "
<<
dot
->
get_name
()
<<
" shape is not equal to 2!"
;
NGRAPH_DEBUG
<<
"dot = "
<<
dot
->
get_name
()
<<
" shape is not equal to 2!"
;
return
nn
;
return
nn
;
}
}
bool
transpose_w
=
false
;
bool
transpose_w
=
false
;
Shape
shape_arg0
{
pattern_map
[
W
]
->
get_shape
()};
Shape
shape_arg0
{
pattern_map
[
W
]
->
get_shape
()};
if
(
!
init_cblas_arg
(
dot
->
get_input_op
(
0
),
pattern_map
[
W
],
transpose_w
,
shape_arg0
))
if
(
!
init_cblas_arg
(
dot
->
get_input_op
(
0
),
pattern_map
[
W
],
transpose_w
,
shape_arg0
))
{
{
return
nn
;
return
nn
;
}
}
bool
transpose_x
=
false
;
bool
transpose_x
=
false
;
Shape
shape_arg1
{
pattern_map
[
x
]
->
get_shape
()};
Shape
shape_arg1
{
pattern_map
[
x
]
->
get_shape
()};
if
(
!
init_cblas_arg
(
dot
->
get_input_op
(
1
),
pattern_map
[
x
],
transpose_x
,
shape_arg1
))
if
(
!
init_cblas_arg
(
dot
->
get_input_op
(
1
),
pattern_map
[
x
],
transpose_x
,
shape_arg1
))
{
{
return
nn
;
return
nn
;
}
}
auto
cg
=
std
::
shared_ptr
<
Node
>
(
new
op
::
MatmulBias
(
pattern_map
[
W
],
auto
cg
=
std
::
shared_ptr
<
Node
>
(
new
op
::
MatmulBias
(
pattern_map
[
W
],
pattern_map
[
x
],
pattern_map
[
x
],
mpattern
->
get_input_op
(
1
),
mpattern
->
get_input_op
(
1
),
shape_arg0
,
shape_arg0
,
shape_arg1
,
shape_arg1
,
transpose_w
,
transpose_w
,
transpose_x
));
transpose_x
));
return
cg
;
return
cg
;
};
};
auto
m
=
std
::
make_shared
<
ngraph
::
pattern
::
Matcher
>
(
padd
,
callback
);
auto
m
=
std
::
make_shared
<
ngraph
::
pattern
::
Matcher
>
(
padd
,
callback
);
this
->
add_matcher
(
m
);
this
->
add_matcher
(
m
);
}
}
src/ngraph/runtime/cpu/pass/cpu_fusion.hpp
View file @
6c676d2d
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Copyright 2018 Nervana Systems Inc.
// Copyright 2018 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// You may obtain a copy of the License at
//
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
#pragma once
#pragma once
#include "ngraph/pass/graph_rewrite.hpp"
#include "ngraph/pass/graph_rewrite.hpp"
namespace
ngraph
namespace
ngraph
{
{
namespace
pass
namespace
runtime
{
{
class
CPUFusion
;
namespace
cpu
}
{
}
namespace
pass
{
class
ngraph
::
pass
::
CPUFusion
:
public
ngraph
::
pass
::
GraphRewrite
class
CPUFusion
;
{
}
public
:
}
CPUFusion
()
}
:
GraphRewrite
()
}
{
construct_gemm_pattern
();
class
ngraph
::
runtime
::
cpu
::
pass
::
CPUFusion
:
public
ngraph
::
pass
::
GraphRewrite
}
{
public
:
private
:
CPUFusion
()
void
construct_gemm_pattern
();
:
GraphRewrite
()
};
{
construct_gemm_pattern
();
}
private
:
void
construct_gemm_pattern
();
};
test/cpu_fusion.cpp
View file @
6c676d2d
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
// Copyright 2018 Nervana Systems Inc.
// Copyright 2018 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
// You may obtain a copy of the License at
//
//
// http://www.apache.org/licenses/LICENSE-2.0
// http://www.apache.org/licenses/LICENSE-2.0
//
//
// Unless required by applicable law or agreed to in writing, software
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
// ----------------------------------------------------------------------------
#include <algorithm>
#include <algorithm>
#include <cstdio>
#include <cstdio>
#include <iostream>
#include <iostream>
#include <list>
#include <list>
#include <memory>
#include <memory>
#include "gtest/gtest.h"
#include "gtest/gtest.h"
#include "ngraph/graph_util.hpp"
#include "ngraph/graph_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/log.hpp"
#include "ngraph/ngraph.hpp"
#include "ngraph/ngraph.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/pass/graph_rewrite.hpp"
#include "ngraph/pass/graph_rewrite.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pattern/matcher.hpp"
#include "ngraph/pattern/matcher.hpp"
#include "ngraph/pattern/op/any.hpp"
#include "ngraph/pattern/op/any.hpp"
#include "ngraph/pattern/op/label.hpp"
#include "ngraph/pattern/op/label.hpp"
//
//
#include "ngraph/file_util.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/json.hpp"
#include "ngraph/json.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
#include "ngraph/runtime/cpu/ops/matmul_bias.hpp"
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/runtime/cpu/pass/cpu_fusion.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
#include "ngraph/util.hpp"
#include "util/matcher.hpp"
#include "util/matcher.hpp"
#include "util/test_tools.hpp"
#include "util/test_tools.hpp"
using
namespace
ngraph
;
using
namespace
ngraph
;
using
namespace
std
;
using
namespace
std
;
TEST
(
cpu_fusion
,
gemm_pattern
)
TEST
(
cpu_fusion
,
gemm_pattern
)
{
{
auto
shape_w
=
Shape
{
2
,
4
};
auto
shape_w
=
Shape
{
2
,
4
};
auto
shape_x
=
Shape
{
4
,
1
};
auto
shape_x
=
Shape
{
4
,
1
};
auto
shape_b
=
Shape
{
1
};
auto
shape_b
=
Shape
{
1
};
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_w
);
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_w
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_x
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_x
);
auto
C
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_b
);
auto
C
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_b
);
auto
dot
=
make_shared
<
op
::
Dot
>
(
A
,
B
);
auto
dot
=
make_shared
<
op
::
Dot
>
(
A
,
B
);
auto
broadcast
=
make_shared
<
op
::
Broadcast
>
(
C
,
dot
->
get_shape
(),
AxisSet
{
0
});
auto
broadcast
=
make_shared
<
op
::
Broadcast
>
(
C
,
dot
->
get_shape
(),
AxisSet
{
0
});
auto
add
=
dot
+
broadcast
;
auto
add
=
dot
+
broadcast
;
auto
W
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
A
);
auto
W
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
A
);
auto
x
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
B
);
auto
x
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
B
);
auto
reshape_pred
=
[](
std
::
shared_ptr
<
Node
>
n
)
{
auto
reshape_pred
=
[](
std
::
shared_ptr
<
Node
>
n
)
{
return
static_cast
<
bool
>
(
std
::
dynamic_pointer_cast
<
op
::
Reshape
>
(
n
));
return
static_cast
<
bool
>
(
std
::
dynamic_pointer_cast
<
op
::
Reshape
>
(
n
));
};
};
auto
skip_w
=
std
::
make_shared
<
pattern
::
op
::
Any
>
(
W
,
reshape_pred
);
auto
skip_w
=
std
::
make_shared
<
pattern
::
op
::
Any
>
(
W
,
reshape_pred
);
auto
skip_x
=
std
::
make_shared
<
pattern
::
op
::
Any
>
(
x
,
reshape_pred
);
auto
skip_x
=
std
::
make_shared
<
pattern
::
op
::
Any
>
(
x
,
reshape_pred
);
auto
pdot
=
make_shared
<
op
::
Dot
>
(
skip_w
,
skip_x
);
auto
pdot
=
make_shared
<
op
::
Dot
>
(
skip_w
,
skip_x
);
auto
b
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
C
);
auto
b
=
std
::
make_shared
<
pattern
::
op
::
Label
>
(
C
);
auto
pbroadcast
=
make_shared
<
op
::
Broadcast
>
(
b
,
dot
->
get_shape
(),
AxisSet
{
0
});
auto
pbroadcast
=
make_shared
<
op
::
Broadcast
>
(
b
,
dot
->
get_shape
(),
AxisSet
{
0
});
auto
padd
=
pdot
+
pbroadcast
;
auto
padd
=
pdot
+
pbroadcast
;
TestMatcher
n
(
nullptr
);
TestMatcher
n
(
nullptr
);
ASSERT_TRUE
(
n
.
match
(
padd
,
add
));
ASSERT_TRUE
(
n
.
match
(
padd
,
add
));
ASSERT_EQ
(
n
.
get_pattern_map
()[
W
],
A
);
ASSERT_EQ
(
n
.
get_pattern_map
()[
W
],
A
);
ASSERT_EQ
(
n
.
get_pattern_map
()[
x
],
B
);
ASSERT_EQ
(
n
.
get_pattern_map
()[
x
],
B
);
ASSERT_EQ
(
n
.
get_pattern_map
()[
b
],
C
);
ASSERT_EQ
(
n
.
get_pattern_map
()[
b
],
C
);
auto
reshape_w
=
make_shared
<
op
::
Reshape
>
(
A
,
AxisVector
{
1
,
0
},
W
->
get_shape
());
auto
reshape_w
=
make_shared
<
op
::
Reshape
>
(
A
,
AxisVector
{
1
,
0
},
W
->
get_shape
());
auto
reshape_x
=
make_shared
<
op
::
Reshape
>
(
B
,
AxisVector
{
1
,
0
},
x
->
get_shape
());
auto
reshape_x
=
make_shared
<
op
::
Reshape
>
(
B
,
AxisVector
{
1
,
0
},
x
->
get_shape
());
auto
re_dot
=
make_shared
<
op
::
Dot
>
(
reshape_w
,
reshape_x
);
auto
re_dot
=
make_shared
<
op
::
Dot
>
(
reshape_w
,
reshape_x
);
auto
re_add
=
re_dot
+
broadcast
;
auto
re_add
=
re_dot
+
broadcast
;
ASSERT_TRUE
(
n
.
match
(
padd
,
re_add
));
ASSERT_TRUE
(
n
.
match
(
padd
,
re_add
));
ASSERT_EQ
(
n
.
get_pattern_map
()[
W
],
A
);
ASSERT_EQ
(
n
.
get_pattern_map
()[
W
],
A
);
ASSERT_EQ
(
n
.
get_pattern_map
()[
x
],
B
);
ASSERT_EQ
(
n
.
get_pattern_map
()[
x
],
B
);
ASSERT_EQ
(
n
.
get_pattern_map
()[
b
],
C
);
ASSERT_EQ
(
n
.
get_pattern_map
()[
b
],
C
);
auto
cg
=
auto
cg
=
make_shared
<
op
::
MatmulBias
>
(
W
,
x
,
broadcast
,
W
->
get_shape
(),
x
->
get_shape
(),
false
,
false
);
make_shared
<
op
::
MatmulBias
>
(
W
,
x
,
broadcast
,
W
->
get_shape
(),
x
->
get_shape
(),
false
,
false
);
}
}
TEST
(
cpu_fusion
,
gemm_cpu
)
TEST
(
cpu_fusion
,
gemm_cpu
)
{
{
auto
shapeA
=
Shape
{
3
,
2
};
auto
shapeA
=
Shape
{
3
,
2
};
auto
shapeB
=
Shape
{
2
,
3
};
auto
shapeB
=
Shape
{
2
,
3
};
auto
shapeC
=
Shape
{
2
,
2
};
auto
shapeC
=
Shape
{
2
,
2
};
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shapeA
);
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shapeA
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shapeB
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shapeB
);
auto
reshape_w
=
make_shared
<
op
::
Reshape
>
(
A
,
AxisVector
{
1
,
0
},
Shape
{
2
,
3
});
auto
reshape_w
=
make_shared
<
op
::
Reshape
>
(
A
,
AxisVector
{
1
,
0
},
Shape
{
2
,
3
});
auto
reshape_x
=
make_shared
<
op
::
Reshape
>
(
B
,
AxisVector
{
1
,
0
},
Shape
{
3
,
2
});
auto
reshape_x
=
make_shared
<
op
::
Reshape
>
(
B
,
AxisVector
{
1
,
0
},
Shape
{
3
,
2
});
auto
one
=
op
::
Constant
::
create
<
float
>
(
element
::
f32
,
Shape
{},
std
::
vector
<
float
>
{
1.0
f
});
auto
one
=
op
::
Constant
::
create
<
float
>
(
element
::
f32
,
Shape
{},
std
::
vector
<
float
>
{
1.0
f
});
auto
broadcast
=
make_shared
<
op
::
Broadcast
>
(
one
,
shapeC
,
AxisSet
{
0
,
1
});
auto
broadcast
=
make_shared
<
op
::
Broadcast
>
(
one
,
shapeC
,
AxisSet
{
0
,
1
});
auto
cg
=
auto
cg
=
make_shared
<
op
::
MatmulBias
>
(
A
,
B
,
broadcast
,
A
->
get_shape
(),
B
->
get_shape
(),
true
,
true
);
make_shared
<
op
::
MatmulBias
>
(
A
,
B
,
broadcast
,
A
->
get_shape
(),
B
->
get_shape
(),
true
,
true
);
auto
f
=
make_shared
<
Function
>
(
cg
,
op
::
Parameters
{
A
,
B
});
auto
f
=
make_shared
<
Function
>
(
cg
,
op
::
Parameters
{
A
,
B
});
auto
manager
=
runtime
::
Manager
::
get
(
"CPU"
);
auto
manager
=
runtime
::
Manager
::
get
(
"CPU"
);
auto
external
=
manager
->
compile
(
f
);
auto
external
=
manager
->
compile
(
f
);
auto
backend
=
manager
->
allocate_backend
();
auto
backend
=
manager
->
allocate_backend
();
auto
cf
=
backend
->
make_call_frame
(
external
);
auto
cf
=
backend
->
make_call_frame
(
external
);
shared_ptr
<
runtime
::
TensorView
>
a
=
backend
->
make_primary_tensor_view
(
element
::
f32
,
shapeA
);
shared_ptr
<
runtime
::
TensorView
>
a
=
backend
->
make_primary_tensor_view
(
element
::
f32
,
shapeA
);
shared_ptr
<
runtime
::
TensorView
>
b
=
backend
->
make_primary_tensor_view
(
element
::
f32
,
shapeB
);
shared_ptr
<
runtime
::
TensorView
>
b
=
backend
->
make_primary_tensor_view
(
element
::
f32
,
shapeB
);
shared_ptr
<
runtime
::
TensorView
>
result
=
shared_ptr
<
runtime
::
TensorView
>
result
=
backend
->
make_primary_tensor_view
(
element
::
f32
,
shapeC
);
backend
->
make_primary_tensor_view
(
element
::
f32
,
shapeC
);
vector
<
float
>
dataA
{
1.0
f
,
4.0
f
,
1.0
f
,
4.0
f
,
1.0
f
,
4.0
f
};
vector
<
float
>
dataA
{
1.0
f
,
4.0
f
,
1.0
f
,
4.0
f
,
1.0
f
,
4.0
f
};
vector
<
float
>
dataB
{
3.0
f
,
3.0
f
,
3.0
f
,
9.0
f
,
9.0
f
,
9.0
f
};
vector
<
float
>
dataB
{
3.0
f
,
3.0
f
,
3.0
f
,
9.0
f
,
9.0
f
,
9.0
f
};
copy_data
(
a
,
dataA
);
copy_data
(
a
,
dataA
);
copy_data
(
b
,
dataB
);
copy_data
(
b
,
dataB
);
cf
->
call
({
a
,
b
},
{
result
});
cf
->
call
({
a
,
b
},
{
result
});
vector
<
float
>
expected
{
10
,
28
,
37
,
109
};
vector
<
float
>
expected
{
10
,
28
,
37
,
109
};
ASSERT_TRUE
(
read_vector
<
float
>
(
result
)
==
expected
);
ASSERT_TRUE
(
read_vector
<
float
>
(
result
)
==
expected
);
}
}
TEST
(
cpu_fusion
,
cpu_fusion_pass_basic
)
TEST
(
cpu_fusion
,
cpu_fusion_pass_basic
)
{
{
auto
shape
=
Shape
{};
auto
shape
=
Shape
{};
auto
shape_w
=
Shape
{
2
,
4
};
auto
shape_w
=
Shape
{
2
,
4
};
auto
shape_x
=
Shape
{
4
,
1
};
auto
shape_x
=
Shape
{
4
,
1
};
auto
shape_b
=
Shape
{
1
};
auto
shape_b
=
Shape
{
1
};
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_w
);
auto
A
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_w
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_x
);
auto
B
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_x
);
auto
C
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_b
);
auto
C
=
make_shared
<
op
::
Parameter
>
(
element
::
f32
,
shape_b
);
auto
dot
=
make_shared
<
op
::
Dot
>
(
A
,
B
);
auto
dot
=
make_shared
<
op
::
Dot
>
(
A
,
B
);
auto
broadcast
=
make_shared
<
op
::
Broadcast
>
(
C
,
dot
->
get_shape
(),
AxisSet
{
0
});
auto
broadcast
=
make_shared
<
op
::
Broadcast
>
(
C
,
dot
->
get_shape
(),
AxisSet
{
0
});
auto
add
=
dot
+
broadcast
;
auto
add
=
dot
+
broadcast
;
auto
graph
=
make_shared
<
op
::
Abs
>
(
add
);
auto
graph
=
make_shared
<
op
::
Abs
>
(
add
);
pass
::
Manager
pass_manager
;
pass
::
Manager
pass_manager
;
pass_manager
.
register_pass
<
pass
::
CPUFusion
>
();
pass_manager
.
register_pass
<
runtime
::
cpu
::
pass
::
CPUFusion
>
();
auto
func
=
make_shared
<
Function
>
(
graph
,
op
::
Parameters
{
A
,
B
,
C
});
auto
func
=
make_shared
<
Function
>
(
graph
,
op
::
Parameters
{
A
,
B
,
C
});
pass_manager
.
run_passes
(
func
);
pass_manager
.
run_passes
(
func
);
ASSERT_NE
(
std
::
dynamic_pointer_cast
<
op
::
MatmulBias
>
(
graph
->
get_input_op
(
0
)),
nullptr
);
ASSERT_NE
(
std
::
dynamic_pointer_cast
<
op
::
MatmulBias
>
(
graph
->
get_input_op
(
0
)),
nullptr
);
}
}
TEST
(
cpu_fusion
,
gemm_mlp
)
TEST
(
cpu_fusion
,
gemm_mlp
)
{
{
const
string
json_path
=
file_util
::
path_join
(
SERIALIZED_ZOO
,
"mxnet/mnist_mlp_forward.json"
);
const
string
json_path
=
file_util
::
path_join
(
SERIALIZED_ZOO
,
"mxnet/mnist_mlp_forward.json"
);
const
string
json_string
=
file_util
::
read_file_to_string
(
json_path
);
const
string
json_string
=
file_util
::
read_file_to_string
(
json_path
);
stringstream
ss
(
json_string
);
stringstream
ss
(
json_string
);
shared_ptr
<
Function
>
func
=
ngraph
::
deserialize
(
ss
);
shared_ptr
<
Function
>
func
=
ngraph
::
deserialize
(
ss
);
pass
::
Manager
pass_manager
;
pass
::
Manager
pass_manager
;
pass_manager
.
register_pass
<
pass
::
CPUFusion
>
();
pass_manager
.
register_pass
<
runtime
::
cpu
::
pass
::
CPUFusion
>
();
pass_manager
.
run_passes
(
func
);
pass_manager
.
run_passes
(
func
);
size_t
ccg
=
count_ops_of_type
<
op
::
MatmulBias
>
(
func
);
size_t
ccg
=
count_ops_of_type
<
op
::
MatmulBias
>
(
func
);
ASSERT_EQ
(
ccg
,
3
);
ASSERT_EQ
(
ccg
,
3
);
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment