Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
92adea38
Commit
92adea38
authored
Aug 01, 2018
by
shssf
Committed by
Scott Cyphers
Aug 01, 2018
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
IntelGPU backend: Sum and redeveloped Broadcast operation (#1276)
parent
cb84305e
Show whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
180 additions
and
116 deletions
+180
-116
intelgpu_backend.cpp
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+61
-2
intelgpu_op_broadcast.cpp
src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.cpp
+103
-111
intelgpu_op_broadcast.hpp
src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp
+16
-3
No files found.
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
View file @
92adea38
...
...
@@ -289,6 +289,8 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
const
string
&
output_name
=
op
->
get_outputs
().
begin
()
->
get_tensor
().
get_name
();
const
Shape
&
output_shape
=
op
->
get_outputs
().
begin
()
->
get_shape
();
const
element
::
Type
&
output_type
=
op
->
get_outputs
().
begin
()
->
get_tensor
().
get_element_type
();
const
shared_ptr
<
op
::
Broadcast
>
broadcast
=
static_pointer_cast
<
op
::
Broadcast
>
(
op
);
const
AxisSet
&
axis
=
broadcast
->
get_broadcast_axes
();
...
...
@@ -297,10 +299,67 @@ bool runtime::intelgpu::IntelGPUBackend::compile(shared_ptr<Function> func)
{
do_equal_propagation
(
topology
,
input_name
,
output_name
);
}
else
if
(
input_shape
.
empty
())
{
do_bcast_sum_operation_scalar
(
topology
,
input_name
,
input_shape
,
output_name
,
output_shape
,
output_type
,
true
);
}
else
{
do_bcast_sum_operation
(
topology
,
input_name
,
input_shape
,
output_name
,
output_shape
,
output_type
,
axis
,
true
);
}
}
else
if
(
"Sum"
==
op
->
description
())
{
arguments_check
(
op
,
1
,
1
);
const
string
&
input_name
=
op
->
get_inputs
().
begin
()
->
get_tensor
().
get_name
();
const
Shape
&
input_shape
=
op
->
get_inputs
().
begin
()
->
get_shape
();
const
string
&
output_name
=
op
->
get_outputs
().
begin
()
->
get_tensor
().
get_name
();
const
Shape
&
output_shape
=
op
->
get_outputs
().
begin
()
->
get_shape
();
const
element
::
Type
&
output_type
=
op
->
get_outputs
().
begin
()
->
get_tensor
().
get_element_type
();
const
shared_ptr
<
op
::
Sum
>
sum
=
static_pointer_cast
<
op
::
Sum
>
(
op
);
const
AxisSet
&
axis
=
sum
->
get_reduction_axes
();
if
(
axis
.
empty
())
{
do_equal_propagation
(
topology
,
input_name
,
output_name
);
}
else
if
(
output_shape
.
empty
())
{
do_bcast_sum_operation_scalar
(
topology
,
input_name
,
input_shape
,
output_name
,
output_shape
,
output_type
,
false
);
}
else
{
do_broadcast_operation
(
topology
,
input_name
,
input_shape
,
output_name
,
output_shape
,
axis
);
do_bcast_sum_operation
(
topology
,
input_name
,
input_shape
,
output_name
,
output_shape
,
output_type
,
axis
,
false
);
}
}
else
if
(
"Reshape"
==
op
->
description
())
...
...
src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.cpp
View file @
92adea38
...
...
@@ -15,8 +15,10 @@
*******************************************************************************/
#include <CPP/concatenation.hpp>
#include <CPP/custom_gpu_primitive.hpp>
#include <CPP/reshape.hpp>
#include "ngraph/runtime/intelgpu/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp"
...
...
@@ -25,160 +27,150 @@
using
namespace
std
;
using
namespace
ngraph
;
static
const
string
reshape_suf
(
"_reshape"
)
;
static
vector
<
cldnn_arg
>
parameters_1inp_1out
=
{{
arg_input
,
0
},
{
arg_output
,
0
}}
;
static
Shape
propagate_backward
(
const
Shape
&
input
)
static
string
array_dims
(
const
Shape
&
dimentions
)
{
Shape
result
({
0
,
0
,
0
,
0
});
size_t
idx
=
result
.
size
()
-
1
;
string
buffer
;
for
(
auto
i
=
input
.
crbegin
();
i
!=
input
.
crend
();
++
i
,
--
idx
)
for
(
auto
const
&
dim
:
dimentions
)
{
result
.
at
(
idx
)
=
*
i
;
buffer
+=
"["
+
to_string
(
dim
)
+
"]"
;
}
return
result
;
return
buffer
;
}
static
Shape
propagate_forward
(
const
Shape
&
input
)
static
string
access_dims
(
const
Shape
&
dimentions
,
const
AxisSet
&
axis
=
{}
)
{
Shape
result
({
0
,
0
,
0
,
0
})
;
s
ize_t
idx
=
0
;
size_t
var_idx
=
0
;
s
tring
buffer
;
for
(
auto
i
=
input
.
cbegin
();
i
!=
input
.
cend
();
++
i
,
++
idx
)
for
(
auto
const
&
i
:
dimentions
)
{
result
.
at
(
idx
)
=
*
i
;
}
return
result
;
}
static
Shape
apply_axis
(
const
Shape
&
input
,
const
AxisSet
&
axis
)
{
Shape
result
=
input
;
for
(
auto
const
&
i
:
axis
)
if
(
axis
.
find
(
var_idx
)
==
axis
.
end
())
{
result
.
at
(
i
)
=
0
;
buffer
+=
"[i"
+
to_string
(
var_idx
)
+
"]"
;
}
++
var_idx
;
}
return
result
;
return
buffer
;
}
// This function broadcast input data to all other dimensions of the output
// it operates in two mode only (controlled by is_forward flag):
// [forward]: propagate data from left to right in Shape array term
// in[2], out[2,3,4,5], axis[1,2,3]
// [backward]: propagate data from right to left in Shape array term
// in[5], out[2,3,4,5], axis[0,1,2]
// Input and output shapes can be up to 4 dimensions
// Other variants, like: in[4] out[2,3,4,5] axis[0,1,3], unsupported yet
static
void
do_propagation
(
cldnn
::
topology
&
topology
,
void
runtime
::
intelgpu
::
do_bcast_sum_operation_scalar
(
cldnn
::
topology
&
topology
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
AxisSet
&
axis
,
bool
is_forward
)
const
element
::
Type
&
output_type
,
bool
is_bcast
)
{
//default value used in "forward" mode
cldnn
::
concatenation
::
concatenation_axis
direction
=
runtime
::
intelgpu
::
IntelGPULayout
::
get_cldnn_axis
(
3
);
string
input_name_it
=
input_name
;
string
output_name_it
=
output_name
;
Shape
input_shape_it
=
input_shape
;
for
(
auto
axis_id
=
axis
.
crbegin
();
axis_id
!=
axis
.
crend
();)
{
const
size_t
input_count
=
output_shape
.
at
(
*
axis_id
);
if
(
is_forward
)
const
string
function_name
=
is_bcast
?
"broadcast_scalar"
:
"sum_scalar"
;
const
size_t
input_count
=
is_bcast
?
shape_size
<
Shape
>
(
output_shape
)
:
shape_size
<
Shape
>
(
input_shape
);
codegen
::
CodeWriter
writer
;
writer
<<
"__kernel void "
<<
function_name
<<
"(const __global float* input, __global float* output)
\n
"
;
writer
.
block_begin
();
{
input_shape_it
.
push_back
(
1
);
const
cldnn
::
tensor
my_tensor
=
runtime
::
intelgpu
::
IntelGPULayout
::
create_cldnn_tensor
(
input_shape_it
);
writer
<<
"float sum = 0.f;
\n
"
<<
"for (uint i = 0; i < COUNT; ++i)
\n
"
;
writer
.
block_begin
(
);
const
cldnn
::
reshape
op_reshape
(
input_name_it
+
reshape_suf
,
input_name_it
,
my_tensor
);
topology
.
add
(
op_reshape
);
input_shape_it
.
back
()
=
input_count
;
input_name_it
+=
reshape_suf
;
}
else
{
direction
=
runtime
::
intelgpu
::
IntelGPULayout
::
get_cldnn_axis
(
*
axis_id
);
}
const
vector
<
cldnn
::
primitive_id
>
input_names
(
input_count
,
input_name_it
);
++
axis_id
;
if
(
axis_id
==
axis
.
crend
())
if
(
is_bcast
)
{
output_name_it
=
output_name
;
writer
<<
"output[i] = input[0];
\n
"
;
writer
.
block_end
();
}
else
{
output_name_it
+=
":_"
;
input_name_it
=
output_name_it
;
writer
<<
"sum += input[i];
\n
"
;
writer
.
block_end
();
writer
<<
"output[0] = sum;
\n
"
;
}
const
cldnn
::
concatenation
op_concat
(
output_name_it
,
input_names
,
direction
);
topology
.
add
(
op_concat
);
}
}
// Assume input is scalar. All output data will be populated by the scalar
// The function extremely non optimal from performance perspective
static
void
do_scalar_propagation
(
cldnn
::
topology
&
topology
,
const
string
&
input_name
,
const
string
&
output_name
,
const
Shape
&
output_shape
)
{
const
size_t
input_count
=
shape_size
<
const
Shape
>
(
output_shape
);
const
vector
<
cldnn
::
primitive_id
>
input_names
(
input_count
,
input_name
);
const
cldnn
::
concatenation
op_concat
(
output_name
,
input_names
,
cldnn
::
concatenation
::
along_x
);
topology
.
add
(
op_concat
);
}
// End of function bracket
writer
.
block_end
();
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
const
cldnn
::
custom_gpu_primitive
op_scalar
(
output_name
,
{
input_name
},
{
writer
.
get_code
()},
function_name
,
parameters_1inp_1out
,
string
(
"-DCOUNT="
+
to_string
(
input_count
)),
layout
);
topology
.
add
(
op_scalar
);
}
void
runtime
::
intelgpu
::
do_b
roadcast
_operation
(
cldnn
::
topology
&
topology
,
void
runtime
::
intelgpu
::
do_b
cast_sum
_operation
(
cldnn
::
topology
&
topology
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
AxisSet
&
axis
)
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
,
bool
is_bcast
)
{
if
(
input_shape
.
size
()
>
4
||
output_shape
.
size
()
>
4
)
{
throw
invalid_argument
(
"IntelGPU::Broadcast supports 4D shapes maximum."
);
}
const
string
function_name
=
is_bcast
?
"broadcast"
:
"sum"
;
codegen
::
CodeWriter
writer
;
if
(
input_shape
.
empty
())
{
do_scalar_propagation
(
topology
,
input_name
,
output_name
,
output_shape
)
;
writer
<<
"__kernel void "
<<
function_name
<<
"(const __global float input"
<<
array_dims
(
input_shape
)
<<
", __global float output"
<<
array_dims
(
output_shape
)
<<
")
\n
"
;
return
;
writer
.
block_begin
();
{
if
(
is_bcast
)
{
size_t
var_idx
=
0
;
for
(
auto
const
&
i
:
output_shape
)
{
writer
<<
"for (uint i"
<<
var_idx
<<
" = 0; i"
<<
var_idx
<<
" < "
<<
i
<<
"; ++i"
<<
var_idx
<<
")
\n
"
;
writer
.
block_begin
();
++
var_idx
;
}
writer
<<
"output"
<<
access_dims
(
output_shape
)
<<
" = input"
<<
access_dims
(
output_shape
,
axis
)
<<
";
\n
"
;
const
Shape
output_shape_axis
=
apply_axis
(
output_shape
,
axis
);
const
Shape
input_shape_forward
=
propagate_forward
(
input_shape
);
const
Shape
output_shape_forward
=
propagate_forward
(
output_shape_axis
);
const
Shape
input_shape_backward
=
propagate_backward
(
input_shape
);
const
Shape
output_shape_backward
=
propagate_backward
(
output_shape_axis
);
if
(
input_shape_forward
==
output_shape_forward
)
// Closing brackets for Broadcast loop
for
(
auto
const
&
i
:
output_shape
)
{
do_propagation
(
topology
,
input_name
,
input_shape
,
output_name
,
output_shape
,
axis
,
true
);
writer
.
block_end
(
);
}
else
if
(
input_shape_backward
==
output_shape_backward
)
{
do_propagation
(
topology
,
input_name
,
input_shape
,
output_name
,
output_shape
,
axis
,
false
);
}
else
{
ostringstream
os
;
os
<<
"IntelGP::Broadcast unsupported mode. input"
<<
vector_to_string
(
input_shape
)
<<
" output"
<<
vector_to_string
(
output_shape
)
<<
" axis"
<<
vector_to_string
(
axis
);
throw
invalid_argument
(
os
.
str
());
size_t
var_idx
=
0
;
for
(
auto
const
&
i
:
input_shape
)
{
writer
<<
"for (uint i"
<<
var_idx
<<
" = 0; i"
<<
var_idx
<<
" < "
<<
i
<<
"; ++i"
<<
var_idx
<<
")
\n
"
;
writer
.
block_begin
();
++
var_idx
;
}
writer
<<
"output"
<<
access_dims
(
input_shape
,
axis
)
<<
" += input"
<<
access_dims
(
input_shape
)
<<
";
\n
"
;
// Closing brackets for Sum loop
for
(
auto
const
&
i
:
input_shape
)
{
writer
.
block_end
();
}
}
}
// End of function bracket
writer
.
block_end
();
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
const
cldnn
::
custom_gpu_primitive
op_bcast_sum
(
output_name
,
{
input_name
},
{
writer
.
get_code
()},
function_name
,
parameters_1inp_1out
,
""
,
layout
);
topology
.
add
(
op_bcast_sum
);
}
src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp
View file @
92adea38
...
...
@@ -27,13 +27,26 @@ namespace ngraph
{
namespace
intelgpu
{
// This implements Broadcast nGraph operation
void
do_broadcast_operation
(
cldnn
::
topology
&
topology
,
// This implements Broadcast and Sum nGraph operations
// in case of input_shape is not empty
void
do_bcast_sum_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
AxisSet
&
axis
);
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
,
bool
is_bcast
);
// This implements Broadcast and Sum nGraph operations
// in case of input_shape is empty
void
do_bcast_sum_operation_scalar
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
bool
is_bcast
);
}
}
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment