Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
ca5476b3
Commit
ca5476b3
authored
Mar 05, 2019
by
Sergey Shalnov
Committed by
Scott Cyphers
Mar 05, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
IntelGPU backend: Concat operation custom kernel implemenation (#2551)
parent
5b349479
Hide whitespace changes
Inline
Side-by-side
Showing
3 changed files
with
204 additions
and
18 deletions
+204
-18
intelgpu_backend.cpp
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+54
-18
intelgpu_op_custom_kernels.cpp
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+142
-0
intelgpu_op_custom_kernels.hpp
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+8
-0
No files found.
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
View file @
ca5476b3
...
...
@@ -614,34 +614,70 @@ shared_ptr<runtime::Executable>
arguments_check
(
op
,
1
,
1
);
}
// All input shapes must be the same
// if shape is empty (means Shape{}) in this case treat its size as 1
const
size_t
ngraph_tensor_dims
=
get_input_shape
(
op
).
empty
()
?
1
:
get_input_shape
(
op
).
size
();
const
shared_ptr
<
op
::
Concat
>
concat_op
=
static_pointer_cast
<
op
::
Concat
>
(
op
);
const
size_t
ngraph_concat_axis
=
concat_op
->
get_concatenation_axis
();
vector
<
cldnn
::
primitive_id
>
inputs
;
cldnn
::
concatenation
::
concatenation_axis
cldnn_axis
=
intelgpu_space
::
get_cldnn_axis
(
ngraph_tensor_dims
,
ngraph_concat_axis
);
for
(
auto
const
&
input
:
op
->
get_inputs
())
if
(
!
shape_size
(
get_output_shape
(
op
))
||
(
get_input_type
(
op
)
!=
element
::
f32
)
||
get_output_shape
(
op
).
size
()
>
4
)
{
const
Shape
&
input_shape
=
input
.
get_shape
();
if
(
shape_size
(
input_shape
))
vector
<
string
>
input_names
;
vector
<
Shape
>
input_shapes
;
for
(
auto
const
&
input
:
op
->
get_inputs
())
{
inputs
.
push_back
(
input
.
get_tensor
().
get_name
());
const
Shape
&
input_shape
=
input
.
get_tensor
().
get_shape
();
if
(
shape_size
(
input_shape
))
{
input_names
.
push_back
(
input
.
get_tensor
().
get_name
());
input_shapes
.
push_back
(
input_shape
);
}
}
}
if
(
inputs
.
empty
())
{
do_equal_propagation
(
topology
,
get_input_name
(
op
),
get_output_name
(
op
));
if
(
input_names
.
empty
())
{
do_equal_propagation
(
topology
,
get_input_name
(
op
),
get_output_name
(
op
));
}
else
{
do_concat_operation
(
topology
,
input_names
,
input_shapes
,
get_output_name
(
op
),
get_output_shape
(
op
),
get_output_type
(
op
),
ngraph_concat_axis
);
}
}
else
{
const
cldnn
::
concatenation
cldnn_concat
(
get_output_name
(
op
),
inputs
,
cldnn_axis
);
topology
.
add
(
cldnn_concat
);
// All input shapes must be the same
// if shape is empty (means Shape{}) in this case treat its size as 1
const
size_t
ngraph_tensor_dims
=
get_input_shape
(
op
).
empty
()
?
1
:
get_input_shape
(
op
).
size
();
vector
<
cldnn
::
primitive_id
>
inputs
;
cldnn
::
concatenation
::
concatenation_axis
cldnn_axis
=
intelgpu_space
::
get_cldnn_axis
(
ngraph_tensor_dims
,
ngraph_concat_axis
);
for
(
auto
const
&
input
:
op
->
get_inputs
())
{
const
Shape
&
input_shape
=
input
.
get_shape
();
if
(
shape_size
(
input_shape
))
{
inputs
.
push_back
(
input
.
get_tensor
().
get_name
());
}
}
if
(
inputs
.
empty
())
{
do_equal_propagation
(
topology
,
get_input_name
(
op
),
get_output_name
(
op
));
}
else
{
const
cldnn
::
concatenation
cldnn_concat
(
get_output_name
(
op
),
inputs
,
cldnn_axis
);
topology
.
add
(
cldnn_concat
);
}
}
break
;
}
...
...
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
View file @
ca5476b3
...
...
@@ -1078,6 +1078,148 @@ void runtime::intelgpu::do_slice_operation(cldnn::topology& topology,
topology
.
add
(
op_slice
);
}
void
runtime
::
intelgpu
::
do_concat_operation
(
cldnn
::
topology
&
topology
,
const
vector
<
string
>&
input_names
,
const
vector
<
Shape
>&
input_shapes
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
size_t
concat_axis
)
{
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
const
string
kernel_type_name
=
get_opencl_type_name
(
output_type
);
string
entry_point_name
=
"concat_"
+
output_name
;
size_t
bound_below
=
0
;
size_t
idx
=
0
;
vector
<
string
>::
const_iterator
input_name
=
input_names
.
cbegin
();
string
aux_output_name
;
// this is quite non optimal because cldnn::custom_gpu_primitive
// does not provide an ability to run kernels simultaneously with the same output
// Also, need to make a chain of kernels to put kernel0::output0 as kernel1::input1
// with output name kernel1::output2
for
(
auto
const
&
input_shape
:
input_shapes
)
{
string
name_suffix
=
to_string
(
idx
);
const
string
entry_point_name_suffix
=
entry_point_name
+
"_"
+
name_suffix
;
CodeWriter
writer
;
vector
<
size_t
>
gws
;
if
(
idx
==
0
)
{
gen_func_def
(
writer
,
entry_point_name_suffix
,
{
kernel_type_name
},
{
input_shape
},
kernel_type_name
,
output_shape
);
}
else
{
gen_func_def
(
writer
,
entry_point_name_suffix
,
{
2
,
kernel_type_name
},
{
input_shape
,
output_shape
},
kernel_type_name
,
output_shape
);
}
writer
.
block_begin
();
{
// Main loops
gws
=
generate_loops
(
writer
,
output_shape
,
true
);
writer
<<
kernel_type_name
<<
" input_element;
\n
"
;
size_t
bound_upper
=
input_shape
.
at
(
concat_axis
);
// copy corresponding elements of input0 into output
writer
<<
"if ((("
<<
bound_below
<<
" + 0) <= i"
<<
concat_axis
<<
") && (i"
<<
concat_axis
<<
" < ("
<<
bound_below
<<
" + "
<<
bound_upper
<<
")))
\n
"
;
writer
.
block_begin
();
{
writer
<<
"input_element = input0"
;
if
(
input_shape
.
empty
())
{
// it means scalar
writer
<<
"[0]"
;
}
else
{
size_t
var_idx
=
0
;
for
(
auto
const
i
:
input_shape
)
{
if
(
var_idx
==
concat_axis
)
{
writer
<<
"[i"
<<
var_idx
<<
" - "
<<
bound_below
<<
"]"
;
}
else
{
writer
<<
"[i"
<<
var_idx
<<
"]"
;
}
++
var_idx
;
}
}
writer
<<
";
\n
"
;
}
writer
.
block_end
();
// if not a first kernel, copy input1 into output
if
(
idx
!=
0
)
{
writer
<<
"else
\n
"
;
writer
.
block_begin
();
{
writer
<<
"input_element = input1"
<<
access_dims
(
output_shape
)
<<
";
\n
"
;
}
writer
.
block_end
();
}
bound_below
+=
bound_upper
;
writer
<<
"output"
<<
access_dims
(
output_shape
)
<<
" = input_element;
\n
"
;
// Closing brackets for main loops
generate_loops
(
writer
,
output_shape
,
false
);
}
writer
.
block_end
();
vector
<
cldnn
::
primitive_id
>
kernel_input
;
vector
<
cldnn_arg
>
kernel_arguments
;
kernel_input
.
push_back
(
*
input_name
);
if
(
idx
==
0
)
{
kernel_arguments
=
get_kernel_args
(
1
,
1
);
}
else
{
if
(
idx
==
input_shapes
.
size
()
-
1
)
{
// last kernel should produce the output name as overall node required
name_suffix
=
""
;
}
kernel_input
.
push_back
(
aux_output_name
);
kernel_arguments
=
get_kernel_args
(
2
,
1
);
}
const
cldnn
::
custom_gpu_primitive
op_concat
(
output_name
+
name_suffix
,
kernel_input
,
{
writer
.
get_code
()},
entry_point_name_suffix
,
kernel_arguments
,
""
,
layout
,
gws
);
topology
.
add
(
op_concat
);
++
input_name
;
++
idx
;
aux_output_name
=
output_name
+
name_suffix
;
}
}
void
runtime
::
intelgpu
::
do_select_operation
(
cldnn
::
topology
&
topology
,
const
string
&
input0_name
,
const
Shape
&
input0_shape
,
...
...
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
View file @
ca5476b3
...
...
@@ -102,6 +102,14 @@ namespace ngraph
const
Coordinate
&
uppper_bounds
,
const
Strides
&
strides
);
void
do_concat_operation
(
cldnn
::
topology
&
topology
,
const
std
::
vector
<
std
::
string
>&
input_names
,
const
std
::
vector
<
Shape
>&
input_shapes
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
size_t
concat_axis
);
void
do_select_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input0_name
,
const
Shape
&
input0_shape
,
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment