Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
f8632ea0
Commit
f8632ea0
authored
Feb 23, 2019
by
Sergey Shalnov
Committed by
Sang Ik Lee
Feb 23, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
IntelGPU backend: Max and Avg pool fix (#2482)
parent
fd0ed37c
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
252 additions
and
20 deletions
+252
-20
intelgpu_backend.cpp
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+56
-2
intelgpu_op_custom_kernels.cpp
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
+183
-12
intelgpu_op_custom_kernels.hpp
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
+13
-0
unit_test.manifest
src/ngraph/runtime/intelgpu/unit_test.manifest
+0
-6
No files found.
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
View file @
f8632ea0
...
...
@@ -765,14 +765,40 @@ shared_ptr<runtime::Executable>
}
case
OP_TYPEID
:
:
MaxPool
:
{
arguments_check
(
op
,
1
,
1
);
const
shared_ptr
<
op
::
MaxPool
>
max_pool
=
static_pointer_cast
<
op
::
MaxPool
>
(
op
);
if
((
get_input_shape
(
op
).
size
()
>
4
)
||
(
get_output_type
(
op
)
!=
element
::
f32
)
||
!
max_pool
->
get_padding_below
().
empty
()
||
!
max_pool
->
get_padding_above
().
empty
())
{
const
shared_ptr
<
Node
>
def_val
=
max_pool
->
get_default_value
();
const
shared_ptr
<
op
::
Constant
>
def_const
=
static_pointer_cast
<
op
::
Constant
>
(
def_val
);
const
vector
<
std
::
string
>&
values
=
def_const
->
get_value_strings
();
do_max_avg_pool_operation
(
topology
,
get_input_name
(
op
),
get_input_shape
(
op
),
get_output_name
(
op
),
get_output_shape
(
op
),
get_output_type
(
op
),
max_pool
->
get_window_shape
(),
max_pool
->
get_window_movement_strides
(),
max_pool
->
get_padding_below
(),
false
,
values
.
at
(
0
),
true
);
}
else
{
do_pooling_operation
(
topology
,
op
,
max_pool
->
get_window_shape
(),
max_pool
->
get_window_movement_strides
(),
max_pool
->
get_padding_below
(),
cldnn
::
pooling_mode
::
max
);
}
break
;
}
case
OP_TYPEID
:
:
MaxPoolBackprop
:
...
...
@@ -804,7 +830,34 @@ shared_ptr<runtime::Executable>
}
case
OP_TYPEID
:
:
AvgPool
:
{
arguments_check
(
op
,
1
,
1
);
const
shared_ptr
<
op
::
AvgPool
>
avg_pool
=
static_pointer_cast
<
op
::
AvgPool
>
(
op
);
if
((
get_input_shape
(
op
).
size
()
>
4
)
||
(
get_output_type
(
op
)
!=
element
::
f32
)
||
avg_pool
->
get_include_padding_in_avg_computation
()
||
!
avg_pool
->
get_padding_below
().
empty
()
||
!
avg_pool
->
get_padding_above
().
empty
())
{
const
shared_ptr
<
Node
>
def_val
=
avg_pool
->
get_default_value
();
const
shared_ptr
<
op
::
Constant
>
def_const
=
static_pointer_cast
<
op
::
Constant
>
(
def_val
);
const
vector
<
std
::
string
>&
values
=
def_const
->
get_value_strings
();
do_max_avg_pool_operation
(
topology
,
get_input_name
(
op
),
get_input_shape
(
op
),
get_output_name
(
op
),
get_output_shape
(
op
),
get_output_type
(
op
),
avg_pool
->
get_window_shape
(),
avg_pool
->
get_window_movement_strides
(),
avg_pool
->
get_padding_below
(),
avg_pool
->
get_include_padding_in_avg_computation
(),
values
.
at
(
0
),
false
);
}
else
{
const
cldnn
::
pooling_mode
mode
=
avg_pool
->
get_include_padding_in_avg_computation
()
?
cldnn
::
pooling_mode
::
average
:
cldnn
::
pooling_mode
::
average_no_padding
;
...
...
@@ -815,6 +868,7 @@ shared_ptr<runtime::Executable>
avg_pool
->
get_window_movement_strides
(),
avg_pool
->
get_padding_below
(),
mode
);
}
break
;
}
case
OP_TYPEID
:
:
AvgPoolBackprop
:
...
...
@@ -825,8 +879,8 @@ shared_ptr<runtime::Executable>
static_pointer_cast
<
op
::
AvgPoolBackprop
>
(
op
);
do_avg_pool_backprop_operation
(
topology
,
get_input_name
(
op
,
0
),
get_input_shape
(
op
,
0
),
get_input_name
(
op
),
get_input_shape
(
op
),
get_output_name
(
op
),
get_output_shape
(
op
),
get_output_type
(
op
),
...
...
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.cpp
View file @
f8632ea0
...
...
@@ -471,26 +471,31 @@ void runtime::intelgpu::do_max_pool_backprop_operation(cldnn::topology& topology
const
Shape
&
pad_below
)
{
const
string
entry_point_name
=
"op_max_pool_backprop_"
+
output_name
;
const
string
type_name
=
get_opencl_type_name
(
output_type
);
const
Shape
delta_data
(
delta_shape
.
cbegin
()
+
2
,
delta_shape
.
cend
());
const
Shape
output_data
(
output_shape
.
cbegin
()
+
2
,
output_shape
.
cend
());
codegen
::
CodeWriter
writer
;
vector
<
size_t
>
gws
;
// The kernel name and parameters
gen_func_def
(
writer
,
entry_point_name
,
{
2
,
"float"
},
{
input_shape
,
delta_shape
},
"float"
,
output_shape
);
gen_func_def
(
writer
,
entry_point_name
,
{
2
,
type_name
},
{
input_shape
,
delta_shape
},
type_name
,
output_shape
);
writer
.
block_begin
();
{
// Main loop over delta input array
writer
<<
"const uint i0 = get_global_id(0);"
;
gws
.
push_back
(
delta_shape
.
at
(
0
));
writer
<<
"/
/ for (uint i0 = 0; i0 < "
<<
delta_shape
.
at
(
0
)
<<
"; ++i0)
\n
"
;
writer
<<
"/
*trip count "
<<
delta_shape
.
at
(
0
)
<<
"*/
\n
"
;
writer
.
block_begin
();
{
writer
<<
"const uint i1 = get_global_id(1);"
;
gws
.
push_back
(
delta_shape
.
at
(
1
));
writer
<<
"/
/ for (uint i1 = 0; i1 < "
<<
delta_shape
.
at
(
1
)
<<
"; ++i1)
\n
"
;
writer
<<
"/
*trip count "
<<
delta_shape
.
at
(
1
)
<<
"*/
\n
"
;
writer
.
block_begin
();
{
// Initialization output
...
...
@@ -509,7 +514,7 @@ void runtime::intelgpu::do_max_pool_backprop_operation(cldnn::topology& topology
{
writer
<<
"[j"
<<
i
<<
"]"
;
}
writer
<<
" = 0
.0f
;
\n
"
;
writer
<<
" = 0;
\n
"
;
// Closing brackets for Initialization loop
for
(
auto
const
&
i
:
output_data
)
...
...
@@ -533,7 +538,9 @@ void runtime::intelgpu::do_max_pool_backprop_operation(cldnn::topology& topology
{
writer
<<
"uint save_i"
<<
i
+
2
<<
" = 0;
\n
"
;
}
writer
<<
"float max_elem = FLT_MIN;
\n
"
writer
<<
type_name
<<
" max_elem = "
<<
get_opencl_type_min_max_value
(
output_type
,
true
)
<<
";
\n
"
<<
"uint elem_exists = 0;
\n
"
;
// Loop over window shape
...
...
@@ -541,7 +548,7 @@ void runtime::intelgpu::do_max_pool_backprop_operation(cldnn::topology& topology
gen_window_loop
(
writer
,
output_shape
,
win_shape
,
win_stride
,
pad_below
,
true
);
{
writer
<<
"const
float
max_local = input0[i0][i1]"
;
writer
<<
"const
"
<<
type_name
<<
"
max_local = input0[i0][i1]"
;
// additional dimensions for input
for
(
size_t
i
=
0
;
i
<
win_shape
.
size
();
++
i
)
{
...
...
@@ -606,6 +613,169 @@ void runtime::intelgpu::do_max_pool_backprop_operation(cldnn::topology& topology
topology
.
add
(
op_max_pool_backprop
);
}
void
runtime
::
intelgpu
::
do_max_avg_pool_operation
(
cldnn
::
topology
&
topology
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
Shape
&
win_shape
,
const
Shape
&
win_stride
,
const
Shape
&
pad_below
,
bool
include_padding
,
const
string
&
def_val
,
bool
is_max_pool
)
{
const
string
entry_point_name
=
"op_pool_"
+
to_string
(
is_max_pool
)
+
"_"
+
output_name
;
const
string
type_name
=
get_opencl_type_name
(
output_type
);
const
string
init_accumulator
=
is_max_pool
?
"-FLT_MAX"
:
def_val
;
codegen
::
CodeWriter
writer
;
vector
<
size_t
>
gws
;
const
Shape
input_data
(
input_shape
.
cbegin
()
+
2
,
input_shape
.
cend
());
const
Shape
output_data
(
output_shape
.
cbegin
()
+
2
,
output_shape
.
cend
());
// The kernel name and parameters
gen_func_def
(
writer
,
entry_point_name
,
{
type_name
},
{
input_shape
},
type_name
,
output_shape
);
writer
.
block_begin
();
{
// Main function body
writer
<<
"//Window:"
<<
win_shape
<<
" Stride: "
<<
win_stride
<<
"
\n
"
<<
"//padding included:"
<<
include_padding
<<
"
\n
"
<<
"//init value:"
<<
def_val
<<
"
\n\n
"
;
writer
<<
"const uint N_dim = get_global_id(0);/*trip count "
<<
input_shape
.
at
(
0
)
<<
"*/
\n
"
;
gws
.
push_back
(
output_shape
.
at
(
0
));
writer
<<
"const uint C_dim = get_global_id(1);/*trip count "
<<
input_shape
.
at
(
1
)
<<
"*/
\n
"
;
gws
.
push_back
(
output_shape
.
at
(
1
));
// Loops over output dimensions
size_t
var_idx
=
0
;
for
(
auto
i
=
output_data
.
begin
();
i
!=
output_data
.
end
();
++
i
)
{
writer
<<
"for (uint i"
<<
var_idx
<<
" = 0; i"
<<
var_idx
<<
" < "
<<
*
i
<<
"; ++i"
<<
var_idx
<<
")
\n
"
;
writer
.
block_begin
();
++
var_idx
;
}
writer
<<
type_name
<<
" accumulator = "
<<
init_accumulator
<<
";
\n
"
<<
"uint element_count = 0;
\n\n
"
;
// Loop over window
writer
<<
"// Over window iterations
\n
"
;
var_idx
=
0
;
for
(
auto
const
i
:
win_shape
)
{
writer
<<
"for (uint f"
<<
var_idx
<<
" = 0; f"
<<
var_idx
<<
" < "
<<
i
<<
"; ++f"
<<
var_idx
<<
")
\n
"
;
writer
.
block_begin
();
writer
<<
"uint input_idx"
<<
var_idx
<<
" = (i"
<<
var_idx
<<
" * "
<<
win_stride
.
at
(
var_idx
)
<<
" /*win_stride*/"
<<
") + (f"
<<
var_idx
<<
")"
<<
" - "
<<
pad_below
.
at
(
var_idx
)
<<
" /*pad_below*/;
\n
"
;
++
var_idx
;
}
// Generate conditionals
writer
<<
"if ("
;
var_idx
=
0
;
for
(
auto
const
&
i
:
input_data
)
{
if
(
var_idx
)
{
writer
<<
" && "
;
}
writer
<<
"(input_idx"
<<
var_idx
<<
" < "
<<
i
<<
")"
;
++
var_idx
;
}
writer
<<
")
\n
"
;
writer
.
block_begin
();
{
// Output element calculation
if
(
is_max_pool
)
{
writer
<<
"accumulator = max(accumulator, input0[N_dim][C_dim]"
<<
access_dims
(
win_shape
,
"input_idx"
)
<<
");
\n
"
;
}
else
{
writer
<<
"accumulator += input0[N_dim][C_dim]"
<<
access_dims
(
win_shape
,
"input_idx"
)
<<
";
\n
"
;
}
writer
<<
"++element_count;
\n
"
;
}
writer
.
block_end
();
if
(
include_padding
)
{
writer
<<
"else
\n
"
;
writer
.
block_begin
();
{
// Output element calculation
writer
<<
"accumulator += "
<<
def_val
<<
";
\n
"
<<
"++element_count;
\n
"
;
}
writer
.
block_end
();
}
// End of conditional generation
// Closing brackets for window loop
for
(
auto
const
&
i
:
win_shape
)
{
writer
.
block_end
();
}
writer
<<
"
\n
if (element_count)
\n
"
;
writer
.
block_begin
();
{
writer
<<
"output[N_dim][C_dim]"
<<
access_dims
(
output_data
)
<<
" = accumulator"
;
if
(
!
is_max_pool
)
{
writer
<<
" / element_count"
;
}
writer
<<
";
\n
"
;
}
writer
.
block_end
();
writer
<<
"else
\n
"
;
writer
.
block_begin
();
{
writer
<<
"output[N_dim][C_dim]"
<<
access_dims
(
output_data
)
<<
" = "
<<
init_accumulator
<<
";
\n
"
;
}
writer
.
block_end
();
// Closing brackets for output dimensions
for
(
const
auto
i
:
output_data
)
{
writer
.
block_end
();
}
}
// Main function body
writer
.
block_end
();
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
const
cldnn
::
custom_gpu_primitive
op_avg_pool
(
output_name
,
{
input_name
},
{
writer
.
get_code
()},
entry_point_name
,
get_kernel_args
(
1
,
1
),
""
,
layout
,
gws
);
topology
.
add
(
op_avg_pool
);
}
void
runtime
::
intelgpu
::
do_avg_pool_backprop_operation
(
cldnn
::
topology
&
topology
,
const
string
&
delta_name
,
const
Shape
&
delta_shape
,
...
...
@@ -618,6 +788,7 @@ void runtime::intelgpu::do_avg_pool_backprop_operation(cldnn::topology& topology
const
bool
include_padding
)
{
const
string
entry_point_name
=
"op_avg_pool_backprop_"
+
output_name
;
const
string
type_name
=
get_opencl_type_name
(
output_type
);
codegen
::
CodeWriter
writer
;
vector
<
size_t
>
gws
;
...
...
@@ -627,22 +798,22 @@ void runtime::intelgpu::do_avg_pool_backprop_operation(cldnn::topology& topology
size_t
win_elems_size
=
shape_size
<
Shape
>
(
win_shape
);
// The kernel name and parameters
gen_func_def
(
writer
,
entry_point_name
,
{
"float"
},
{
delta_shape
},
"float"
,
output_shape
);
gen_func_def
(
writer
,
entry_point_name
,
{
type_name
},
{
delta_shape
},
type_name
,
output_shape
);
writer
.
block_begin
();
{
writer
<<
"size_t win_elems_size = "
<<
win_elems_size
<<
";
\n
"
;
writer
<<
"float computed_val = 0.0f
;
\n
"
;
writer
<<
type_name
<<
" computed_val = 0.0
;
\n
"
;
// Main loop over delta input array
writer
<<
"const uint i0 = get_global_id(0);"
;
gws
.
push_back
(
delta_shape
.
at
(
0
));
writer
<<
"/
/ for (uint i0 = 0; i0 < "
<<
delta_shape
.
at
(
0
)
<<
"; ++i0)
\n
"
;
writer
<<
"/
*trip count "
<<
delta_shape
.
at
(
0
)
<<
"*/
\n
"
;
writer
.
block_begin
();
{
writer
<<
"const uint i1 = get_global_id(1);"
;
gws
.
push_back
(
delta_shape
.
at
(
1
));
writer
<<
"/
/ for (uint i1 = 0; i1 < "
<<
delta_shape
.
at
(
1
)
<<
"; ++i1)
\n
"
;
writer
<<
"/
*trip count "
<<
delta_shape
.
at
(
1
)
<<
"*/
\n
"
;
writer
.
block_begin
();
{
// Initialization output
...
...
@@ -661,7 +832,7 @@ void runtime::intelgpu::do_avg_pool_backprop_operation(cldnn::topology& topology
{
writer
<<
"[j"
<<
i
<<
"]"
;
}
writer
<<
" = 0
.0f
;
\n
"
;
writer
<<
" = 0;
\n
"
;
// Closing brackets for Initialization loop
for
(
auto
const
&
i
:
output_data
)
...
...
src/ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp
View file @
f8632ea0
...
...
@@ -58,6 +58,19 @@ namespace ngraph
const
Shape
&
win_stride
,
const
Shape
&
pad_below
);
void
do_max_avg_pool_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
Shape
&
win_shape
,
const
Shape
&
win_stride
,
const
Shape
&
pad_below
,
bool
include_padding
,
const
std
::
string
&
def_val
,
bool
is_max_pool
);
void
do_avg_pool_backprop_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
delta_name
,
const
Shape
&
delta_shape
,
...
...
src/ngraph/runtime/intelgpu/unit_test.manifest
View file @
f8632ea0
all_2x2x3_eliminate_dims_0_1
avg_pool_2d_2channel_2image_padded_only_above_do_not_include_in_computation
avg_pool_2d_2channel_2image_padded_only_above_include_in_computation
avg_pool_3d_uneven_strided_padded
backwards_batch_norm_training
backwards_dot_scalar_tensor
backwards_dot_tensor_scalar
backwards_dot_tensor_vector
backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_maxpool_n4_c1_hw4_2x2_max
backwards_replace_slice
backwards_reverse_sequence_n3_c2_h3
backwards_reverse_sequence_n4d2c3h2w2
...
...
@@ -18,7 +13,6 @@ embedding_lookup_10x1_arbitrary
embedding_lookup_10x1_arbitrary_index_type_int
embedding_lookup_4x5_reverse
generate_mask
max_pool_3d
replace_slice_3d
replace_slice_3d_strided
replace_slice_3d_strided_different_strides
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment