Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
1132afe5
Commit
1132afe5
authored
Jan 27, 2019
by
Robert Kimball
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
update intel GPU backend
parent
8214cd39
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
with
86 additions
and
74 deletions
+86
-74
intelgpu_backend.cpp
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+48
-52
intelgpu_backend.hpp
src/ngraph/runtime/intelgpu/intelgpu_backend.hpp
+38
-22
No files found.
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
View file @
1132afe5
...
...
@@ -414,13 +414,31 @@ shared_ptr<runtime::Tensor> runtime::intelgpu::IntelGPUBackend::create_tensor(
element_type
,
shape
,
*
ocl_engine
,
memory_pointer
,
this
);
}
runtime
::
Handle
runtime
::
intelgpu
::
IntelGPUBackend
::
compile
(
shared_ptr
<
Function
>
func
)
shared_ptr
<
runtime
::
Executable
>
runtime
::
intelgpu
::
IntelGPUBackend
::
compile
(
shared_ptr
<
Function
>
func
,
bool
enable_timing
)
{
FunctionInstance
&
instance
=
ocl_networks
[
func
];
if
(
instance
.
ocl_network
!=
nullptr
)
shared_ptr
<
runtime
::
Executable
>
rc
;
auto
it
=
ocl_networks
.
find
(
func
);
if
(
it
!=
ocl_networks
.
end
())
{
return
func
;
rc
=
it
->
second
;
}
else
{
rc
=
make_shared
<
IntelGPUExecutable
>
(
func
,
enable_timing
);
if
(
!
m_function_cache_disabled
)
{
ocl_networks
.
insert
({
func
,
rc
});
}
}
return
rc
;
}
runtime
::
intelgpu
::
IntelGPUExecutable
::
IntelGPUExecutable
(
shared_ptr
<
Function
>
func
,
bool
enable_timing
)
{
FunctionInstance
&
instance
=
m_function_instance
;
instance
.
m_function
=
func
;
set
<
cldnn
::
primitive_id
>
func_output_names
;
cldnn
::
topology
topology
;
...
...
@@ -1803,18 +1821,16 @@ runtime::Handle runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function>
instance
.
m_compilation_time
=
timer_compile
.
get_milliseconds
();
instance
.
m_consumed_memory
=
get_max_memory_rss
()
-
mem_before_compile
;
}
return
func
;
}
bool
runtime
::
intelgpu
::
IntelGPUBackend
::
call
(
shared_ptr
<
Function
>
func
,
const
vector
<
shared_ptr
<
runtime
::
Tensor
>>&
outputs
,
const
vector
<
shared_ptr
<
runtime
::
Tensor
>>&
inputs
)
bool
runtime
::
intelgpu
::
IntelGPUExecutable
::
call
(
const
vector
<
shared_ptr
<
runtime
::
Tensor
>>&
outputs
,
const
vector
<
shared_ptr
<
runtime
::
Tensor
>>&
inputs
)
{
double
mem_call_consumed
=
0.0
f
;
stopwatch
timer_call
;
FunctionInstance
&
instance
=
ocl_networks
[
func
];
FunctionInstance
&
instance
=
m_function_instance
;
shared_ptr
<
Function
>
func
=
instance
.
m_function
;
if
(
instance
.
ocl_network
==
nullptr
)
{
throw
runtime_error
(
"compile() must be called before call()."
);
...
...
@@ -1835,7 +1851,7 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func,
{
shared_ptr
<
runtime
::
intelgpu
::
IntelGPUTensorView
>
tv
=
static_pointer_cast
<
runtime
::
intelgpu
::
IntelGPUTensorView
>
(
inputs
[
i
]);
const
ParameterVector
&
input_params
=
func
->
get_parameters
();
const
ParameterVector
&
input_params
=
get_parameters
();
const
string
&
tensor_name
=
input_params
[
i
]
->
get_output_tensor
().
get_name
();
network
->
set_input_data
(
tensor_name
,
*
tv
->
get_data_ptr
());
}
...
...
@@ -1884,11 +1900,6 @@ bool runtime::intelgpu::IntelGPUBackend::call(shared_ptr<Function> func,
instance
.
m_compilation_time
=
0.0
;
}
if
(
m_function_cache_disabled
)
{
remove_compiled_function
(
func
);
}
return
true
;
}
...
...
@@ -1897,18 +1908,6 @@ void runtime::intelgpu::IntelGPUBackend::remove_compiled_function(shared_ptr<Fun
ocl_networks
.
erase
(
func
);
}
void
runtime
::
intelgpu
::
IntelGPUBackend
::
enable_performance_data
(
shared_ptr
<
Function
>
func
,
bool
enable
)
{
FunctionInstance
&
instance
=
ocl_networks
[
func
];
if
(
instance
.
ocl_network
!=
nullptr
)
{
throw
runtime_error
(
"Performance data collection must be enabled prior to compiling."
);
}
instance
.
m_performance_counters_enabled
=
enable
;
}
// The cldnn::network contains something like "generic_layer_0_Parameter_254_0" names
// This function should return "Parameter_254" from the example above
static
string
convert_cldnn_names
(
shared_ptr
<
Function
>
func
,
const
string
&
cldnn_name
)
...
...
@@ -1933,36 +1932,33 @@ static string convert_cldnn_names(shared_ptr<Function> func, const string& cldnn
}
vector
<
runtime
::
PerformanceCounter
>
runtime
::
intelgpu
::
IntelGPU
Backend
::
get_performance_data
(
shared_ptr
<
Function
>
func
)
const
runtime
::
intelgpu
::
IntelGPU
Executable
::
get_performance_data
(
)
const
{
vector
<
runtime
::
PerformanceCounter
>
rc
;
auto
it
=
ocl_networks
.
find
(
func
);
if
(
it
!=
ocl_networks
.
end
())
{
const
shared_ptr
<
cldnn
::
network
>
network
=
it
->
second
.
ocl_network
;
const
shared_ptr
<
cldnn
::
network
>
network
=
m_function_instance
.
ocl_network
;
shared_ptr
<
Function
>
func
=
m_function_instance
.
m_function
;
if
(
network
!=
nullptr
&&
it
->
second
.
m_performance_counters_enabled
)
{
const
map
<
cldnn
::
primitive_id
,
cldnn
::
event
>&
primitives
=
network
->
get_executed_primitives
();
for
(
const
auto
&
p
:
primitives
)
if
(
network
!=
nullptr
&&
m_function_instance
.
m_performance_counters_enabled
)
{
const
map
<
cldnn
::
primitive_id
,
cldnn
::
event
>&
primitives
=
network
->
get_executed_primitives
();
for
(
const
auto
&
p
:
primitives
)
{
// Let's generate the primitive name that matches to the name in Function
const
string
primitive_name
=
convert_cldnn_names
(
func
,
p
.
first
);
size_t
usec
=
0
;
for
(
const
auto
&
q
:
p
.
second
.
get_profiling_info
())
{
// Let's generate the primitive name that matches to the name in Function
const
string
primitive_name
=
convert_cldnn_names
(
func
,
p
.
first
);
size_t
usec
=
0
;
for
(
const
auto
&
q
:
p
.
second
.
get_profiling_info
())
if
(
q
.
name
==
string
(
"executing"
))
{
if
(
q
.
name
==
string
(
"executing"
))
{
usec
+=
chrono
::
duration_cast
<
chrono
::
duration
<
size_t
,
chrono
::
milliseconds
::
period
>>
(
q
.
value
->
value
())
.
count
();
}
usec
+=
chrono
::
duration_cast
<
chrono
::
duration
<
size_t
,
chrono
::
milliseconds
::
period
>>
(
q
.
value
->
value
())
.
count
();
}
const
runtime
::
PerformanceCounter
perf_counter
(
primitive_name
.
c_str
(),
usec
,
1
);
rc
.
push_back
(
perf_counter
);
}
const
runtime
::
PerformanceCounter
perf_counter
(
primitive_name
.
c_str
(),
usec
,
1
);
rc
.
push_back
(
perf_counter
);
}
}
return
rc
;
...
...
@@ -1981,7 +1977,7 @@ static Node* get_node_by_name(const shared_ptr<Function> func, const string& nam
return
nullptr
;
}
void
runtime
::
intelgpu
::
IntelGPU
Backend
::
print_call_performance
(
void
runtime
::
intelgpu
::
IntelGPU
Executable
::
print_call_performance
(
const
shared_ptr
<
cldnn
::
network
>
network
,
const
shared_ptr
<
Function
>
func
,
double
time_compile
,
...
...
src/ngraph/runtime/intelgpu/intelgpu_backend.hpp
View file @
1132afe5
...
...
@@ -31,6 +31,7 @@ namespace ngraph
namespace
intelgpu
{
class
IntelGPUBackend
;
class
IntelGPUExecutable
;
}
}
}
...
...
@@ -47,18 +48,35 @@ public:
std
::
shared_ptr
<
ngraph
::
runtime
::
Tensor
>
create_tensor
(
const
ngraph
::
element
::
Type
&
element_type
,
const
Shape
&
shape
)
override
;
Handle
compile
(
std
::
shared_ptr
<
Function
>
func
)
override
;
std
::
shared_ptr
<
runtime
::
Executable
>
compile
(
std
::
shared_ptr
<
Function
>
func
,
bool
enable_timing
=
false
)
override
;
void
remove_compiled_function
(
std
::
shared_ptr
<
Function
>
func
);
bool
call
(
std
::
shared_ptr
<
Function
>
func
,
const
std
::
vector
<
std
::
shared_ptr
<
runtime
::
Tensor
>>&
outputs
,
const
std
::
vector
<
std
::
shared_ptr
<
runtime
::
Tensor
>>&
inputs
)
override
;
bool
is_supported_property
(
const
Property
prop
)
const
override
;
void
remove_compiled_function
(
std
::
shared_ptr
<
Function
>
func
)
override
;
void
enable_performance_data
(
std
::
shared_ptr
<
Function
>
func
,
bool
enable
)
override
;
std
::
vector
<
PerformanceCounter
>
get_performance_data
(
std
::
shared_ptr
<
Function
>
func
)
const
override
;
private
:
std
::
shared_ptr
<
cldnn
::
engine
>
ocl_engine
;
std
::
map
<
std
::
shared_ptr
<
Function
>
,
std
::
shared_ptr
<
runtime
::
Executable
>>
ocl_networks
;
bool
is_supported_property
(
const
Property
prop
)
const
override
;
bool
m_profile_enable
=
false
;
long
m_profile_lines_limit_count
=
10
;
bool
m_dump_graph_enable
=
false
;
bool
m_cldnn_graph_optimize
=
true
;
bool
m_cldnn_dump_enable
=
false
;
bool
m_function_cache_disabled
=
false
;
bool
m_disable_backend_optimizations
=
false
;
std
::
string
m_cldnn_dump_dir
=
std
::
string
(
"intelgpu_codegen"
);
std
::
string
delim
=
std
::
string
(
":"
);
};
class
ngraph
::
runtime
::
intelgpu
::
IntelGPUExecutable
:
public
runtime
::
Executable
{
public
:
IntelGPUExecutable
(
std
::
shared_ptr
<
Function
>
func
,
bool
enable_timing
);
bool
call
(
const
std
::
vector
<
std
::
shared_ptr
<
runtime
::
Tensor
>>&
outputs
,
const
std
::
vector
<
std
::
shared_ptr
<
runtime
::
Tensor
>>&
inputs
)
override
;
std
::
vector
<
PerformanceCounter
>
get_performance_data
()
const
override
;
private
:
class
FunctionInstance
...
...
@@ -68,12 +86,19 @@ private:
bool
m_performance_counters_enabled
=
false
;
double
m_compilation_time
=
0.0
;
double
m_consumed_memory
=
0.0
;
};
std
::
map
<
std
::
shared_ptr
<
Function
>
,
FunctionInstance
>
ocl_networks
;
std
::
shared_ptr
<
cldnn
::
engine
>
ocl_engine
;
std
::
shared_ptr
<
Function
>
m_function
;
}
m_function_instance
;
bool
m_profile_enable
=
false
;
long
m_profile_lines_limit_count
=
10
;
bool
m_dump_graph_enable
=
false
;
bool
m_cldnn_graph_optimize
=
true
;
bool
m_cldnn_dump_enable
=
false
;
bool
m_function_cache_disabled
=
false
;
bool
m_disable_backend_optimizations
=
false
;
std
::
shared_ptr
<
cldnn
::
engine
>
ocl_engine
;
std
::
string
m_cldnn_dump_dir
=
std
::
string
(
"intelgpu_codegen"
);
std
::
string
delim
=
std
::
string
(
":"
);
// Statistic related things
void
print_call_performance
(
const
std
::
shared_ptr
<
cldnn
::
network
>
network
,
...
...
@@ -83,13 +108,4 @@ private:
double
mem_compilation_consumed
,
double
mem_call_consumed
,
double
mem_current
)
const
;
bool
m_profile_enable
=
false
;
long
m_profile_lines_limit_count
=
10
;
bool
m_dump_graph_enable
=
false
;
bool
m_cldnn_graph_optimize
=
true
;
bool
m_cldnn_dump_enable
=
false
;
bool
m_function_cache_disabled
=
false
;
std
::
string
m_cldnn_dump_dir
=
std
::
string
(
"intelgpu_codegen"
);
std
::
string
delim
=
std
::
string
(
":"
);
};
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment