Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
47342339
Unverified
Commit
47342339
authored
5 years ago
by
Scott Cyphers
Committed by
GitHub
5 years ago
Browse files
Options
Browse Files
Download
Plain Diff
Merge pull request #3178 from NervanaSystems/bob/gcpu
Update generic CPU backend to latest ngraph API
parents
30527e80
c7630c05
master
v0.29.0-rc.0
v0.28.0-rc.1
v0.28.0-rc.0
v0.27.1-rc.3
v0.27.1-rc.2
v0.27.1-rc.1
v0.27.1-rc.0
v0.27.0-rc.1
v0.27.0-rc.0
v0.26.1-rc.0
v0.26.0
v0.26.0-rc.8
v0.26.0-rc.7
v0.26.0-rc.6
v0.26.0-rc.5
v0.26.0-rc.4
v0.26.0-rc.3
v0.26.0-rc.2
v0.26.0-rc.0
v0.25.1-rc.11
v0.25.1-rc.10
v0.25.1-rc.9
v0.25.1-rc.8
v0.25.1-rc.7
v0.25.1-rc.6
v0.25.1-rc.5
v0.25.1-rc.4
v0.25.1-rc.3
v0.25.1-rc.2
v0.25.1-rc.1
v0.25.1-rc.0
v0.25.0
v0.25.0-rc.3
v0.25.0-rc.2
v0.25.0-rc.1
v0.25.0-rc.0
v0.25.0-dev.0
v0.24.0
v0.24.0-rc.3
v0.24.0-rc.2
v0.24.0-rc.1
v0.24.0-rc.0
v0.20.0-rc.2
No related merge requests found
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
156 additions
and
88 deletions
+156
-88
CMakeLists.txt
src/ngraph/runtime/generic_cpu/CMakeLists.txt
+4
-4
gcpu_backend.cpp
src/ngraph/runtime/generic_cpu/gcpu_backend.cpp
+2
-2
gcpu_executable.cpp
src/ngraph/runtime/generic_cpu/gcpu_executable.cpp
+53
-36
gcpu_executable.hpp
src/ngraph/runtime/generic_cpu/gcpu_executable.hpp
+0
-0
broadcast.hpp
src/ngraph/runtime/generic_cpu/kernel/broadcast.hpp
+95
-0
reshape.hpp
src/ngraph/runtime/generic_cpu/kernel/reshape.hpp
+1
-4
result.hpp
src/ngraph/runtime/generic_cpu/kernel/result.hpp
+0
-41
node_wrapper.hpp
src/ngraph/runtime/generic_cpu/node_wrapper.hpp
+1
-1
No files found.
src/ngraph/runtime/generic_cpu/CMakeLists.txt
View file @
47342339
...
...
@@ -15,10 +15,10 @@
# ******************************************************************************
if
(
NGRAPH_GENERIC_CPU_ENABLE
)
find_package
(
OpenMP
)
if
(
OPENMP_FOUND
)
add_compile_options
(
${
OpenMP_CXX_FLAGS
}
)
endif
()
#
find_package(OpenMP)
#
if (OPENMP_FOUND)
#
add_compile_options(${OpenMP_CXX_FLAGS})
#
endif()
add_library
(
gcpu_backend SHARED gcpu_backend.cpp gcpu_executable.cpp node_wrapper.cpp
)
if
(
NGRAPH_LIB_VERSIONING_ENABLE
)
set_target_properties
(
gcpu_backend PROPERTIES
...
...
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/generic_cpu/gcpu_backend.cpp
View file @
47342339
...
...
@@ -52,14 +52,14 @@ runtime::gcpu::GCPUBackend::GCPUBackend(const vector<string>& unsupported_op_nam
shared_ptr
<
runtime
::
Tensor
>
runtime
::
gcpu
::
GCPUBackend
::
create_tensor
(
const
element
::
Type
&
type
,
const
Shape
&
shape
)
{
return
make_shared
<
runtime
::
HostTensor
>
(
type
,
shape
,
this
);
return
make_shared
<
runtime
::
HostTensor
>
(
type
,
shape
);
}
shared_ptr
<
runtime
::
Tensor
>
runtime
::
gcpu
::
GCPUBackend
::
create_tensor
(
const
element
::
Type
&
type
,
const
Shape
&
shape
,
void
*
memory_pointer
)
{
return
make_shared
<
runtime
::
HostTensor
>
(
type
,
shape
,
memory_pointer
,
this
);
return
make_shared
<
runtime
::
HostTensor
>
(
type
,
shape
,
memory_pointer
);
}
shared_ptr
<
runtime
::
Executable
>
...
...
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/generic_cpu/gcpu_executable.cpp
View file @
47342339
...
...
@@ -15,17 +15,22 @@
//*****************************************************************************
#include "ngraph/runtime/generic_cpu/gcpu_executable.hpp"
#include "ngraph/cpio.hpp"
#include "ngraph/descriptor/layout/dense_tensor_layout.hpp"
#include "ngraph/except.hpp"
#include "ngraph/op/convert.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/op/util/binary_elementwise_comparison.hpp"
#include "ngraph/pass/assign_layout.hpp"
#include "ngraph/pass/core_fusion.hpp"
#include "ngraph/pass/fused_op_decomposition.hpp"
#include "ngraph/pass/implicit_broadcast_elimination.hpp"
#include "ngraph/pass/like_replacement.hpp"
#include "ngraph/pass/liveness.hpp"
#include "ngraph/pass/manager.hpp"
#include "ngraph/pass/memory_layout.hpp"
#include "ngraph/runtime/backend_manager.hpp"
#include "ngraph/serializer.hpp"
#include "ngraph/util.hpp"
using
namespace
std
;
...
...
@@ -35,21 +40,35 @@ using descriptor::layout::DenseTensorLayout;
runtime
::
gcpu
::
GCPUExecutable
::
GCPUExecutable
(
const
shared_ptr
<
Function
>&
function
,
bool
enable_performance_collection
)
:
m_is_compiled
{
true
}
,
m_performance_counters_enabled
{
enable_performance_collection
}
{
m_function
=
clone_function
(
*
function
);
pass
::
Manager
pass_manager
;
pass_manager
.
register_pass
<
pass
::
LikeReplacement
>
();
pass_manager
.
register_pass
<
pass
::
FusedOpDecomposition
>
();
pass_manager
.
register_pass
<
pass
::
ImplicitBroadcastElimination
>
();
pass_manager
.
register_pass
<
pass
::
AssignLayout
<
DenseTensorLayout
>>
();
pass_manager
.
register_pass
<
pass
::
Liveness
>
();
pass_manager
.
run_passes
(
m_function
);
for
(
const
shared_ptr
<
Node
>&
node
:
m_function
->
get_ordered_ops
())
{
m_is_compiled
=
true
;
pass
::
Manager
pass_manager
;
pass_manager
.
register_pass
<
pass
::
LikeReplacement
>
();
pass_manager
.
register_pass
<
pass
::
AssignLayout
<
DenseTensorLayout
>>
();
pass_manager
.
register_pass
<
pass
::
Liveness
>
();
pass_manager
.
run_passes
(
function
);
m_wrapped_nodes
.
emplace_back
(
node
);
}
set_parameters_and_results
(
*
m_function
);
}
for
(
const
shared_ptr
<
Node
>&
node
:
function
->
get_ordered_ops
())
{
m_wrapped_nodes
.
emplace_back
(
node
);
}
runtime
::
gcpu
::
GCPUExecutable
::
GCPUExecutable
(
const
std
::
string
&
model_string
)
:
m_is_compiled
{
true
}
,
m_performance_counters_enabled
{
false
}
{
m_function
=
deserialize
(
model_string
);
for
(
const
shared_ptr
<
Node
>&
node
:
m_function
->
get_ordered_ops
())
{
m_wrapped_nodes
.
emplace_back
(
node
);
}
set_parameters_and_results
(
*
function
);
set_parameters_and_results
(
*
m_
function
);
}
bool
runtime
::
gcpu
::
GCPUExecutable
::
call
(
const
vector
<
shared_ptr
<
runtime
::
Tensor
>>&
outputs
,
...
...
@@ -82,7 +101,7 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
{
for
(
size_t
i
=
0
;
i
<
param
->
get_output_size
();
++
i
)
{
descriptor
::
Tensor
*
tensor
=
param
->
get_output_tensor_ptr
(
i
).
get
();
descriptor
::
Tensor
*
tensor
=
&
param
->
output
(
i
).
get_tensor
();
tensor_map
.
insert
({
tensor
,
func_inputs
[
input_count
++
]});
}
}
...
...
@@ -95,14 +114,14 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
{
throw
ngraph_error
(
"One of function's outputs isn't op::Result"
);
}
descriptor
::
Tensor
*
tensor
=
output
->
get_output_tensor_ptr
(
0
).
get
();
descriptor
::
Tensor
*
tensor
=
&
output
->
output
(
0
).
get_tensor
();
tensor_map
.
insert
({
tensor
,
func_outputs
[
output_count
]});
}
// for each ordered op in the graph
for
(
const
NodeWrapper
&
wrapped
:
m_wrapped_nodes
)
{
const
Node
*
op
=
&
wrapped
.
get_node
();
auto
op
=
wrapped
.
get_node
();
auto
type_id
=
wrapped
.
get_typeid
();
if
(
type_id
==
OP_TYPEID
::
Parameter
)
{
...
...
@@ -111,9 +130,9 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
// get op inputs from map
vector
<
shared_ptr
<
HostTensor
>>
op_inputs
;
for
(
const
descriptor
::
Input
&
input
:
op
->
get_
inputs
())
for
(
auto
input
:
op
->
inputs
())
{
descriptor
::
Tensor
*
tensor
=
input
.
get_output
().
get_tensor_ptr
().
get
();
descriptor
::
Tensor
*
tensor
=
&
input
.
get_tensor
();
op_inputs
.
push_back
(
tensor_map
.
at
(
tensor
));
}
...
...
@@ -121,14 +140,14 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
vector
<
shared_ptr
<
HostTensor
>>
op_outputs
;
for
(
size_t
i
=
0
;
i
<
op
->
get_output_size
();
++
i
)
{
descriptor
::
Tensor
*
tensor
=
op
->
get_output_tensor_ptr
(
i
).
get
();
descriptor
::
Tensor
*
tensor
=
&
op
->
output
(
i
).
get_tensor
();
shared_ptr
<
HostTensor
>
host_tensor
;
auto
it
=
tensor_map
.
find
(
tensor
);
if
(
it
==
tensor_map
.
end
())
{
const
Shape
&
shape
=
op
->
get_output_shape
(
i
);
const
element
::
Type
&
type
=
op
->
get_output_element_type
(
i
);
string
name
=
op
->
get_output_tensor
(
i
).
get_name
();
string
name
=
op
->
output
(
i
).
get_tensor
(
).
get_name
();
host_tensor
=
make_shared
<
runtime
::
HostTensor
>
(
type
,
shape
,
name
);
tensor_map
.
insert
({
tensor
,
host_tensor
});
}
...
...
@@ -177,7 +196,7 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
}
if
(
m_nan_check_enabled
)
{
perform_nan_check
(
op_outputs
,
op
);
perform_nan_check
(
op_outputs
,
op
.
get
()
);
}
}
...
...
@@ -186,19 +205,9 @@ bool runtime::gcpu::GCPUExecutable::call(const vector<shared_ptr<runtime::Tensor
void
runtime
::
gcpu
::
GCPUExecutable
::
generate_calls
(
const
element
::
Type
&
type
,
const
NodeWrapper
&
op
,
const
vector
<
shared_ptr
<
HostTensor
>>&
out
puts
,
const
vector
<
shared_ptr
<
HostTensor
>>&
in
puts
)
const
vector
<
shared_ptr
<
HostTensor
>>&
out
,
const
vector
<
shared_ptr
<
HostTensor
>>&
in
)
{
vector
<
void
*>
out
;
vector
<
const
void
*>
in
;
for
(
auto
t
:
outputs
)
{
out
.
push_back
(
t
->
get_data_ptr
());
}
for
(
auto
t
:
inputs
)
{
in
.
push_back
(
t
->
get_data_ptr
());
}
stringstream
ss
;
switch
(
type
.
get_type_enum
())
{
...
...
@@ -216,7 +225,8 @@ void runtime::gcpu::GCPUExecutable::generate_calls(const element::Type& type,
case
element
:
:
Type_t
::
undefined
:
case
element
:
:
Type_t
::
dynamic
:
case
element
:
:
Type_t
::
bf16
:
ss
<<
"unsupported element type "
<<
type
<<
" op "
<<
op
.
get_node
().
get_name
();
case
element
:
:
Type_t
::
f16
:
ss
<<
"unsupported element type "
<<
type
<<
" op "
<<
op
.
get_node
()
->
get_name
();
throw
ngraph_error
(
ss
.
str
());
}
}
...
...
@@ -229,11 +239,9 @@ void runtime::gcpu::GCPUExecutable::set_nan_check(bool enable)
vector
<
runtime
::
PerformanceCounter
>
runtime
::
gcpu
::
GCPUExecutable
::
get_performance_data
()
const
{
vector
<
runtime
::
PerformanceCounter
>
rc
;
for
(
const
pair
<
const
Node
*
,
stopwatch
>
p
:
m_timer_map
)
for
(
const
pair
<
shared_ptr
<
const
Node
>
,
stopwatch
>
p
:
m_timer_map
)
{
rc
.
emplace_back
(
p
.
first
->
get_name
().
c_str
(),
p
.
second
.
get_total_microseconds
(),
p
.
second
.
get_call_count
());
rc
.
emplace_back
(
p
.
first
,
p
.
second
.
get_total_microseconds
(),
p
.
second
.
get_call_count
());
}
return
rc
;
}
...
...
@@ -286,3 +294,12 @@ void runtime::gcpu::GCPUExecutable::perform_nan_check(const vector<shared_ptr<Ho
arg_number
++
;
}
}
void
runtime
::
gcpu
::
GCPUExecutable
::
save
(
ostream
&
out
)
{
cpio
::
Writer
writer
(
out
);
string
si
=
"INTERPRETER Save File 1.0"
;
writer
.
write
(
"save_info"
,
si
.
data
(),
si
.
size
());
string
model
=
serialize
(
m_function
,
0
);
writer
.
write
(
"model"
,
model
.
data
(),
model
.
size
());
}
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/generic_cpu/gcpu_executable.hpp
View file @
47342339
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/generic_cpu/kernel/broadcast.hpp
View file @
47342339
...
...
@@ -140,6 +140,91 @@ namespace ngraph
}
}
template
<
typename
T
>
void
broadcast_5d
(
const
T
*
in
,
T
*
out
,
const
Shape
&
in_shape
,
const
Shape
&
out_shape
,
const
AxisSet
&
broadcast_axes
)
{
size_t
index
[
5
];
size_t
*
out_index
=
0
;
for
(
size_t
i
=
0
;
i
<
5
;
i
++
)
{
if
(
broadcast_axes
.
count
(
i
)
==
0
)
{
out_index
=
&
index
[
i
];
break
;
}
}
for
(
index
[
0
]
=
0
;
index
[
0
]
<
out_shape
[
0
];
++
index
[
0
])
{
for
(
index
[
1
]
=
0
;
index
[
1
]
<
out_shape
[
1
];
++
index
[
1
])
{
for
(
index
[
2
]
=
0
;
index
[
2
]
<
out_shape
[
2
];
++
index
[
2
])
{
for
(
index
[
3
]
=
0
;
index
[
3
]
<
out_shape
[
3
];
++
index
[
3
])
{
for
(
index
[
4
]
=
0
;
index
[
4
]
<
out_shape
[
4
];
++
index
[
4
])
{
out
[
index
[
0
]
*
out_shape
[
1
]
*
out_shape
[
2
]
*
out_shape
[
3
]
*
out_shape
[
4
]
+
index
[
1
]
*
out_shape
[
2
]
*
out_shape
[
3
]
*
out_shape
[
4
]
+
index
[
2
]
*
out_shape
[
3
]
*
out_shape
[
4
]
+
index
[
3
]
*
out_shape
[
4
]
+
index
[
4
]]
=
in
[
*
out_index
];
}
}
}
}
}
}
template
<
typename
T
>
void
broadcast_6d
(
const
T
*
in
,
T
*
out
,
const
Shape
&
in_shape
,
const
Shape
&
out_shape
,
const
AxisSet
&
broadcast_axes
)
{
size_t
index
[
6
];
size_t
*
out_index
=
0
;
for
(
size_t
i
=
0
;
i
<
6
;
i
++
)
{
if
(
broadcast_axes
.
count
(
i
)
==
0
)
{
out_index
=
&
index
[
i
];
break
;
}
}
for
(
index
[
0
]
=
0
;
index
[
0
]
<
out_shape
[
0
];
++
index
[
0
])
{
for
(
index
[
1
]
=
0
;
index
[
1
]
<
out_shape
[
1
];
++
index
[
1
])
{
for
(
index
[
2
]
=
0
;
index
[
2
]
<
out_shape
[
2
];
++
index
[
2
])
{
for
(
index
[
3
]
=
0
;
index
[
3
]
<
out_shape
[
3
];
++
index
[
3
])
{
for
(
index
[
4
]
=
0
;
index
[
4
]
<
out_shape
[
4
];
++
index
[
4
])
{
for
(
index
[
5
]
=
0
;
index
[
5
]
<
out_shape
[
5
];
++
index
[
5
])
{
out
[
index
[
0
]
*
out_shape
[
1
]
*
out_shape
[
2
]
*
out_shape
[
3
]
*
out_shape
[
4
]
*
out_shape
[
5
]
+
index
[
1
]
*
out_shape
[
2
]
*
out_shape
[
3
]
*
out_shape
[
4
]
*
out_shape
[
5
]
+
index
[
2
]
*
out_shape
[
3
]
*
out_shape
[
4
]
*
out_shape
[
5
]
+
index
[
3
]
*
out_shape
[
4
]
*
out_shape
[
5
]
+
index
[
4
]
*
out_shape
[
5
]
+
index
[
5
]]
=
in
[
*
out_index
];
}
}
}
}
}
}
}
template
<
typename
T
>
void
broadcast
(
const
T
*
in
,
T
*
out
,
...
...
@@ -167,6 +252,16 @@ namespace ngraph
case
4
:
broadcast_4d
<
T
>
(
in
,
out
,
in_shape
,
out_shape
,
broadcast_axes
);
break
;
case
5
:
broadcast_5d
<
T
>
(
in
,
out
,
in_shape
,
out_shape
,
broadcast_axes
);
break
;
case
6
:
broadcast_6d
<
T
>
(
in
,
out
,
in_shape
,
out_shape
,
broadcast_axes
);
break
;
default
:
runtime
::
reference
::
broadcast
<
T
>
(
in
,
out
,
in_shape
,
out_shape
,
broadcast_axes
);
break
;
}
}
else
...
...
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/generic_cpu/kernel/reshape.hpp
View file @
47342339
...
...
@@ -244,10 +244,7 @@ namespace ngraph
case
4
:
reshape_in4
<
T
>
(
in
,
out
,
in_shape
,
in_axis_order
,
out_shape
);
break
;
case
5
:
reshape_in5
<
T
>
(
in
,
out
,
in_shape
,
in_axis_order
,
out_shape
);
break
;
case
6
:
reshape_in6
<
T
>
(
in
,
out
,
in_shape
,
in_axis_order
,
out_shape
);
break
;
default
:
NGRAPH_INFO
<<
"reference::reshape"
;
reference
::
reshape
(
in
,
out
,
in_shape
,
in_axis_order
,
out_shape
);
break
;
default
:
reference
::
reshape
(
in
,
out
,
in_shape
,
in_axis_order
,
out_shape
);
break
;
}
}
}
...
...
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/generic_cpu/kernel/result.hpp
deleted
100644 → 0
View file @
30527e80
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <algorithm>
#include <cmath>
#include <numeric>
#include <vector>
#include "ngraph/shape.hpp"
namespace
ngraph
{
namespace
runtime
{
namespace
gcpu
{
namespace
kernel
{
template
<
typename
T
>
void
result
(
const
T
*
arg
,
T
*
out
,
size_t
count
)
{
memcpy
(
out
,
arg
,
sizeof
(
T
)
*
count
);
}
}
}
}
}
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/generic_cpu/node_wrapper.hpp
View file @
47342339
...
...
@@ -51,7 +51,7 @@ class ngraph::runtime::gcpu::NodeWrapper
public
:
NodeWrapper
(
const
std
::
shared_ptr
<
const
ngraph
::
Node
>&
node
);
const
Node
&
get_node
()
const
{
return
*
m_node
;
}
std
::
shared_ptr
<
const
Node
>
get_node
()
const
{
return
m_node
;
}
ngraph
::
runtime
::
gcpu
::
OP_TYPEID
get_typeid
()
const
{
return
m_typeid
;
}
private
:
std
::
shared_ptr
<
const
ngraph
::
Node
>
m_node
;
...
...
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment