Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
62e1bc26
Commit
62e1bc26
authored
Feb 14, 2019
by
Sergey Shalnov
Committed by
Scott Cyphers
Feb 14, 2019
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
IntelGPU backend: Double datatype workaround implemented (#2435)
parent
f75e10c3
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
87 additions
and
75 deletions
+87
-75
intelgpu_backend.cpp
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+4
-2
intelgpu_layout.cpp
src/ngraph/runtime/intelgpu/intelgpu_layout.cpp
+25
-24
intelgpu_op_batchnorm.cpp
src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.cpp
+58
-42
unit_test.manifest
src/ngraph/runtime/intelgpu/unit_test.manifest
+0
-7
No files found.
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
View file @
62e1bc26
...
@@ -1332,7 +1332,8 @@ shared_ptr<runtime::Executable>
...
@@ -1332,7 +1332,8 @@ shared_ptr<runtime::Executable>
arguments_check
(
op
,
5
,
1
);
arguments_check
(
op
,
5
,
1
);
if
(
get_input_shape
(
op
,
2
).
size
()
!=
4
)
if
((
get_input_shape
(
op
,
2
).
size
()
!=
4
)
||
(
get_input_type
(
op
)
!=
ngraph
::
element
::
f32
))
{
{
do_batch_norm_operation
(
topology
,
do_batch_norm_operation
(
topology
,
get_output_name
(
op
),
get_output_name
(
op
),
...
@@ -1364,7 +1365,8 @@ shared_ptr<runtime::Executable>
...
@@ -1364,7 +1365,8 @@ shared_ptr<runtime::Executable>
static_pointer_cast
<
op
::
BatchNormTraining
>
(
op
);
static_pointer_cast
<
op
::
BatchNormTraining
>
(
op
);
const
double
eps
=
bnorm
->
get_eps_value
();
const
double
eps
=
bnorm
->
get_eps_value
();
if
(
get_input_shape
(
op
,
2
).
size
()
!=
4
)
if
((
get_input_shape
(
op
,
2
).
size
()
!=
4
)
||
(
get_input_type
(
op
)
!=
ngraph
::
element
::
f32
))
{
{
string
mean_name
;
string
mean_name
;
string
variance_name
;
string
variance_name
;
...
...
src/ngraph/runtime/intelgpu/intelgpu_layout.cpp
View file @
62e1bc26
...
@@ -54,32 +54,19 @@ bool runtime::intelgpu::IntelGPULayout::
...
@@ -54,32 +54,19 @@ bool runtime::intelgpu::IntelGPULayout::
cldnn
::
data_types
cldnn
::
data_types
runtime
::
intelgpu
::
IntelGPULayout
::
get_cldnn_type
(
const
element
::
Type
&
element_type
)
runtime
::
intelgpu
::
IntelGPULayout
::
get_cldnn_type
(
const
element
::
Type
&
element_type
)
{
{
if
((
element_type
==
ngraph
::
element
::
i8
)
||
(
element_type
==
ngraph
::
element
::
boolean
))
switch
(
element_type
.
get_type_enum
(
))
{
{
return
cldnn
::
data_types
::
i8
;
case
element
:
:
Type_t
::
i8
:
case
element
:
:
Type_t
::
boolean
:
return
cldnn
::
data_types
::
i8
;
case
element
:
:
Type_t
::
u8
:
return
cldnn
::
data_types
::
u8
;
case
element
:
:
Type_t
::
i32
:
return
cldnn
::
data_types
::
i32
;
case
element
:
:
Type_t
::
i64
:
return
cldnn
::
data_types
::
i64
;
case
element
:
:
Type_t
::
f32
:
return
cldnn
::
data_types
::
f32
;
}
}
else
if
(
element_type
==
ngraph
::
element
::
u8
)
{
return
cldnn
::
data_types
::
u8
;
}
else
if
(
element_type
==
ngraph
::
element
::
i32
)
{
return
cldnn
::
data_types
::
i32
;
}
else
if
(
element_type
==
ngraph
::
element
::
i64
)
{
return
cldnn
::
data_types
::
i64
;
}
else
if
(
element_type
==
ngraph
::
element
::
f32
)
{
return
cldnn
::
data_types
::
f32
;
}
else
{
ostringstream
os
;
ostringstream
os
;
os
<<
"IntelGPULayout::get_cldnn_type: Unknown type "
<<
element_type
;
os
<<
"IntelGPULayout::get_cldnn_type: Unknown type "
<<
element_type
;
throw
invalid_argument
(
os
.
str
());
throw
invalid_argument
(
os
.
str
());
}
}
}
cldnn
::
tensor
runtime
::
intelgpu
::
IntelGPULayout
::
create_cldnn_tensor
(
const
Shape
&
element_shape
)
cldnn
::
tensor
runtime
::
intelgpu
::
IntelGPULayout
::
create_cldnn_tensor
(
const
Shape
&
element_shape
)
...
@@ -131,13 +118,27 @@ cldnn::layout runtime::intelgpu::IntelGPULayout::create_cldnn_layout(
...
@@ -131,13 +118,27 @@ cldnn::layout runtime::intelgpu::IntelGPULayout::create_cldnn_layout(
const
cldnn
::
tensor
tensor
=
create_cldnn_tensor
(
element_shape
);
const
cldnn
::
tensor
tensor
=
create_cldnn_tensor
(
element_shape
);
cldnn
::
data_types
data_type
;
cldnn
::
data_types
data_type
;
if
((
element_type
==
ngraph
::
element
::
i16
)
||
(
element_type
==
ngraph
::
element
::
u16
))
switch
(
element_type
.
get_type_enum
())
{
case
element
:
:
Type_t
::
i16
:
case
element
:
:
Type_t
::
u16
:
{
{
data_type
=
cldnn
::
data_types
::
f16
;
data_type
=
cldnn
::
data_types
::
f16
;
break
;
}
}
else
case
element
:
:
Type_t
::
u32
:
{
{
data_type
=
get_cldnn_type
(
element_type
);
data_type
=
cldnn
::
data_types
::
i32
;
break
;
}
case
element
:
:
Type_t
::
u64
:
case
element
:
:
Type_t
::
f64
:
{
data_type
=
cldnn
::
data_types
::
i64
;
break
;
}
default
:
{
data_type
=
get_cldnn_type
(
element_type
);
}
}
}
return
cldnn
::
layout
(
data_type
,
format
,
tensor
);
return
cldnn
::
layout
(
data_type
,
format
,
tensor
);
...
...
src/ngraph/runtime/intelgpu/intelgpu_op_batchnorm.cpp
View file @
62e1bc26
...
@@ -64,11 +64,12 @@ void runtime::intelgpu::do_create_mean(cldnn::topology& topology,
...
@@ -64,11 +64,12 @@ void runtime::intelgpu::do_create_mean(cldnn::topology& topology,
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
channel_shape
);
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
channel_shape
);
const
string
entry_point_name
=
"create_mean_"
+
output_name
;
const
string
entry_point_name
=
"create_mean_"
+
output_name
;
const
size_t
output_counts
=
shape_size
<
Shape
>
(
input_shape
)
/
input_shape
.
at
(
channel_axis
);
const
size_t
output_counts
=
shape_size
<
Shape
>
(
input_shape
)
/
input_shape
.
at
(
channel_axis
);
const
string
kernel_data_type
=
get_opencl_type_name
(
output_type
);
codegen
::
CodeWriter
writer
;
codegen
::
CodeWriter
writer
;
writer
<<
"__kernel void "
<<
entry_point_name
<<
"( const __global
float input"
writer
<<
"__kernel void "
<<
entry_point_name
<<
"( const __global
"
<<
kernel_data_type
<<
array_dims
(
input_shape
)
<<
", __global float output"
<<
array_dims
(
channel_shape
)
<<
" input"
<<
array_dims
(
input_shape
)
<<
", __global "
<<
kernel_data_type
<<
" output"
<<
")
\n
"
;
<<
array_dims
(
channel_shape
)
<<
")
\n
"
;
writer
.
block_begin
();
writer
.
block_begin
();
{
// Main function body
{
// Main function body
...
@@ -78,7 +79,7 @@ void runtime::intelgpu::do_create_mean(cldnn::topology& topology,
...
@@ -78,7 +79,7 @@ void runtime::intelgpu::do_create_mean(cldnn::topology& topology,
<<
input_shape
.
at
(
channel_axis
)
<<
"; ++i"
<<
channel_axis
<<
")
\n
"
;
<<
input_shape
.
at
(
channel_axis
)
<<
"; ++i"
<<
channel_axis
<<
")
\n
"
;
writer
.
block_begin
();
writer
.
block_begin
();
{
{
writer
<<
"float
sum = 0.0f;
\n
"
;
writer
<<
kernel_data_type
<<
"
sum = 0.0f;
\n
"
;
size_t
var_idx
=
0
;
size_t
var_idx
=
0
;
// Main loops
// Main loops
for
(
auto
const
&
i
:
input_shape
)
for
(
auto
const
&
i
:
input_shape
)
...
@@ -139,11 +140,13 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology,
...
@@ -139,11 +140,13 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology,
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
channel_shape
);
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
channel_shape
);
const
string
entry_point_name
=
"create_variance_"
+
output_name
;
const
string
entry_point_name
=
"create_variance_"
+
output_name
;
const
size_t
output_counts
=
shape_size
<
Shape
>
(
input_shape
)
/
input_shape
.
at
(
channel_axis
);
const
size_t
output_counts
=
shape_size
<
Shape
>
(
input_shape
)
/
input_shape
.
at
(
channel_axis
);
const
string
kernel_data_type
=
get_opencl_type_name
(
output_type
);
codegen
::
CodeWriter
writer
;
codegen
::
CodeWriter
writer
;
writer
<<
"__kernel void "
<<
entry_point_name
<<
"( const __global float input"
writer
<<
"__kernel void "
<<
entry_point_name
<<
"( const __global "
<<
kernel_data_type
<<
array_dims
(
input_shape
)
<<
", const __global float mean"
<<
array_dims
(
channel_shape
)
<<
" input"
<<
array_dims
(
input_shape
)
<<
", const __global "
<<
kernel_data_type
<<
", __global float output"
<<
array_dims
(
channel_shape
)
<<
")
\n
"
;
<<
" mean"
<<
array_dims
(
channel_shape
)
<<
", __global "
<<
kernel_data_type
<<
" output"
<<
array_dims
(
channel_shape
)
<<
")
\n
"
;
writer
.
block_begin
();
writer
.
block_begin
();
{
// Main function body
{
// Main function body
...
@@ -153,7 +156,7 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology,
...
@@ -153,7 +156,7 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology,
<<
input_shape
.
at
(
channel_axis
)
<<
"; ++i"
<<
channel_axis
<<
")
\n
"
;
<<
input_shape
.
at
(
channel_axis
)
<<
"; ++i"
<<
channel_axis
<<
")
\n
"
;
writer
.
block_begin
();
writer
.
block_begin
();
{
{
writer
<<
"float
sum = 0.0f;
\n
"
;
writer
<<
kernel_data_type
<<
"
sum = 0.0f;
\n
"
;
size_t
var_idx
=
0
;
size_t
var_idx
=
0
;
// Main loops
// Main loops
...
@@ -168,8 +171,8 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology,
...
@@ -168,8 +171,8 @@ void runtime::intelgpu::do_create_variance(cldnn::topology& topology,
++
var_idx
;
++
var_idx
;
}
}
writer
<<
"float mean_diff = input"
<<
access_dims
(
input_shape
)
<<
" - mean[i"
writer
<<
kernel_data_type
<<
" mean_diff = input"
<<
access_dims
(
input_shape
)
<<
channel_axis
<<
"];
\n
"
;
<<
" - mean[i"
<<
channel_axis
<<
"];
\n
"
;
writer
<<
"sum += mean_diff * mean_diff;
\n
"
;
writer
<<
"sum += mean_diff * mean_diff;
\n
"
;
var_idx
=
0
;
var_idx
=
0
;
...
@@ -217,14 +220,17 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
...
@@ -217,14 +220,17 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
input_shape
);
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
input_shape
);
const
vector
<
size_t
>
gws
(
input_shape
.
begin
(),
input_shape
.
begin
()
+
2
);
const
vector
<
size_t
>
gws
(
input_shape
.
begin
(),
input_shape
.
begin
()
+
2
);
const
string
entry_point_name
=
"batch_norm_"
+
output_name
;
const
string
entry_point_name
=
"batch_norm_"
+
output_name
;
const
string
kernel_data_type
=
get_opencl_type_name
(
output_type
);
codegen
::
CodeWriter
writer
;
codegen
::
CodeWriter
writer
;
// The kernel name and parameters
// The kernel name and parameters
writer
<<
"__attribute__((reqd_work_group_size(1,1,1)))
\n
"
writer
<<
"__attribute__((reqd_work_group_size(1,1,1)))
\n
"
<<
"__kernel void "
<<
entry_point_name
<<
"__kernel void "
<<
entry_point_name
<<
"(const __global "
<<
kernel_data_type
<<
"(const __global float *input0, const __global float *input1,"
<<
" *input0, const __global "
<<
kernel_data_type
<<
" *input1,"
<<
" const __global float *input2, const __global float *input3,"
<<
" const __global "
<<
kernel_data_type
<<
" *input2, const __global "
<<
" const __global float *input4, __global float *output)
\n
"
;
<<
kernel_data_type
<<
" *input3,"
<<
" const __global "
<<
kernel_data_type
<<
" *input4, __global "
<<
kernel_data_type
<<
" *output)
\n
"
;
writer
.
block_begin
();
writer
.
block_begin
();
{
// Main function body
{
// Main function body
...
@@ -234,11 +240,12 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
...
@@ -234,11 +240,12 @@ void runtime::intelgpu::do_batch_norm_operation(cldnn::topology& topology,
<<
"); /* channel_axis trip count "
<<
input_shape
.
at
(
channel_axis
)
<<
"*/
\n
"
;
<<
"); /* channel_axis trip count "
<<
input_shape
.
at
(
channel_axis
)
<<
"*/
\n
"
;
// Invariants for the rest of the loops
// Invariants for the rest of the loops
writer
<<
"const float gamma = input1[i"
<<
channel_axis
<<
"];
\n
"
writer
<<
"const "
<<
kernel_data_type
<<
" gamma = input1[i"
<<
channel_axis
<<
"];
\n
"
<<
"const float beta = input2[i"
<<
channel_axis
<<
"];
\n
"
<<
"const "
<<
kernel_data_type
<<
" beta = input2[i"
<<
channel_axis
<<
"];
\n
"
<<
"const float mean = input3[i"
<<
channel_axis
<<
"];
\n
"
<<
"const "
<<
kernel_data_type
<<
" mean = input3[i"
<<
channel_axis
<<
"];
\n
"
<<
"const float variance = input4[i"
<<
channel_axis
<<
"];
\n
"
<<
"const "
<<
kernel_data_type
<<
" variance = input4[i"
<<
channel_axis
<<
"];
\n
"
<<
"const float var_sqrt = (gamma / sqrt(variance + "
<<
eps
<<
"));
\n
"
;
<<
"const "
<<
kernel_data_type
<<
" var_sqrt = (gamma / sqrt(variance + "
<<
eps
<<
"));
\n
"
;
writer
<<
"const uint i0 = get_global_id(0);"
writer
<<
"const uint i0 = get_global_id(0);"
<<
" /* batch axis trip count "
<<
input_shape
.
at
(
0
)
<<
"*/
\n
"
;
<<
" /* batch axis trip count "
<<
input_shape
.
at
(
0
)
<<
"*/
\n
"
;
...
@@ -285,14 +292,16 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology,
...
@@ -285,14 +292,16 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology,
const
Shape
channel_shape
=
get_channel_shape
(
input_shape
,
"create_variance_back"
);
const
Shape
channel_shape
=
get_channel_shape
(
input_shape
,
"create_variance_back"
);
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
channel_shape
);
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
channel_shape
);
const
string
entry_point_name
=
"create_variance_back_"
+
output_name
;
const
string
entry_point_name
=
"create_variance_back_"
+
output_name
;
const
string
kernel_data_type
=
get_opencl_type_name
(
output_type
);
codegen
::
CodeWriter
writer
;
codegen
::
CodeWriter
writer
;
vector
<
size_t
>
gws
;
vector
<
size_t
>
gws
;
writer
<<
"__kernel void "
<<
entry_point_name
<<
"(const __global float input"
writer
<<
"__kernel void "
<<
entry_point_name
<<
"(const __global "
<<
kernel_data_type
<<
array_dims
(
input_shape
)
<<
", const __global float delta"
<<
array_dims
(
input_shape
)
<<
" input"
<<
array_dims
(
input_shape
)
<<
", const __global "
<<
kernel_data_type
<<
", const __global float mean"
<<
array_dims
(
channel_shape
)
<<
" delta"
<<
array_dims
(
input_shape
)
<<
", const __global "
<<
kernel_data_type
<<
", const __global float variance"
<<
array_dims
(
channel_shape
)
<<
" mean"
<<
array_dims
(
channel_shape
)
<<
", const __global "
<<
kernel_data_type
<<
", __global float output"
<<
array_dims
(
channel_shape
)
<<
")
\n
"
;
<<
" variance"
<<
array_dims
(
channel_shape
)
<<
", __global "
<<
kernel_data_type
<<
" output"
<<
array_dims
(
channel_shape
)
<<
")
\n
"
;
writer
.
block_begin
();
writer
.
block_begin
();
{
// Main function body
{
// Main function body
...
@@ -302,10 +311,12 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology,
...
@@ -302,10 +311,12 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology,
writer
<<
"
\n
const uint i"
<<
channel_axis
<<
" = get_global_id("
<<
channel_axis
writer
<<
"
\n
const uint i"
<<
channel_axis
<<
" = get_global_id("
<<
channel_axis
<<
"); /* channel_axis trip count "
<<
input_shape
.
at
(
channel_axis
)
<<
"*/
\n
"
;
<<
"); /* channel_axis trip count "
<<
input_shape
.
at
(
channel_axis
)
<<
"*/
\n
"
;
gws
.
push_back
(
input_shape
.
at
(
channel_axis
));
gws
.
push_back
(
input_shape
.
at
(
channel_axis
));
writer
<<
"const float mean_loc = mean[i"
<<
channel_axis
<<
"];
\n
"
writer
<<
"const "
<<
kernel_data_type
<<
" mean_loc = mean[i"
<<
channel_axis
<<
"];
\n
"
<<
"const float variance_loc = variance[i"
<<
channel_axis
<<
"];
\n
"
<<
"const "
<<
kernel_data_type
<<
" variance_loc = variance[i"
<<
channel_axis
<<
"const float var_sqrt = 1.0f / sqrt(variance_loc + "
<<
eps
<<
");
\n
"
;
<<
"];
\n
"
writer
<<
"float sum = 0.0f;
\n
"
;
<<
"const "
<<
kernel_data_type
<<
" var_sqrt = 1.0f / sqrt(variance_loc + "
<<
eps
<<
");
\n
"
;
writer
<<
kernel_data_type
<<
" sum = 0.0f;
\n
"
;
// Main loops
// Main loops
writer
<<
"for (uint i0 = 0; i0 < "
<<
input_shape
.
at
(
0
)
<<
"; ++i0)
\n
"
;
writer
<<
"for (uint i0 = 0; i0 < "
<<
input_shape
.
at
(
0
)
<<
"; ++i0)
\n
"
;
...
@@ -317,8 +328,10 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology,
...
@@ -317,8 +328,10 @@ void runtime::intelgpu::do_create_variance_back(cldnn::topology& topology,
writer
<<
"for (uint i3 = 0; i3 < "
<<
input_shape
.
at
(
3
)
<<
"; ++i3)
\n
"
;
writer
<<
"for (uint i3 = 0; i3 < "
<<
input_shape
.
at
(
3
)
<<
"; ++i3)
\n
"
;
writer
.
block_begin
();
writer
.
block_begin
();
{
{
writer
<<
"const float input_loc = input"
<<
access_dims
(
input_shape
)
<<
";
\n
"
;
writer
<<
"const "
<<
kernel_data_type
<<
" input_loc = input"
writer
<<
"const float delta_loc = delta"
<<
access_dims
(
input_shape
)
<<
";
\n
"
;
<<
access_dims
(
input_shape
)
<<
";
\n
"
;
writer
<<
"const "
<<
kernel_data_type
<<
" delta_loc = delta"
<<
access_dims
(
input_shape
)
<<
";
\n
"
;
writer
<<
"sum += (input_loc - mean_loc) * var_sqrt * delta_loc;
\n
"
;
writer
<<
"sum += (input_loc - mean_loc) * var_sqrt * delta_loc;
\n
"
;
}
}
writer
.
block_end
();
writer
.
block_end
();
...
@@ -360,17 +373,19 @@ void runtime::intelgpu::do_batch_norm_backprop_operation(cldnn::topology& topolo
...
@@ -360,17 +373,19 @@ void runtime::intelgpu::do_batch_norm_backprop_operation(cldnn::topology& topolo
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
type
,
shape
);
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
type
,
shape
);
const
string
entry_point_name
=
"batch_norm_backprop_"
+
output_name
;
const
string
entry_point_name
=
"batch_norm_backprop_"
+
output_name
;
const
size_t
r_axes_size
=
shape_size
(
shape
)
/
shape_size
(
channel_shape
);
const
size_t
r_axes_size
=
shape_size
(
shape
)
/
shape_size
(
channel_shape
);
const
string
kernel_data_type
=
get_opencl_type_name
(
type
);
codegen
::
CodeWriter
writer
;
codegen
::
CodeWriter
writer
;
vector
<
size_t
>
gws
;
vector
<
size_t
>
gws
;
writer
<<
"__kernel void "
<<
entry_point_name
<<
"(const __global float input"
writer
<<
"__kernel void "
<<
entry_point_name
<<
"(const __global "
<<
kernel_data_type
<<
array_dims
(
shape
)
<<
", const __global float delta"
<<
array_dims
(
shape
)
<<
" input"
<<
array_dims
(
shape
)
<<
", const __global "
<<
kernel_data_type
<<
" delta"
<<
", const __global float mean"
<<
array_dims
(
channel_shape
)
<<
array_dims
(
shape
)
<<
", const __global "
<<
kernel_data_type
<<
" mean"
<<
", const __global float variance"
<<
array_dims
(
channel_shape
)
<<
array_dims
(
channel_shape
)
<<
", const __global "
<<
kernel_data_type
<<
" variance"
<<
", const __global float gamma"
<<
array_dims
(
channel_shape
)
<<
array_dims
(
channel_shape
)
<<
", const __global "
<<
kernel_data_type
<<
" gamma"
<<
", const __global float gamma_backprop"
<<
array_dims
(
channel_shape
)
<<
array_dims
(
channel_shape
)
<<
", const __global "
<<
kernel_data_type
<<
", const __global float beta_backprop"
<<
array_dims
(
channel_shape
)
<<
" gamma_backprop"
<<
array_dims
(
channel_shape
)
<<
", const __global "
<<
", __global float output"
<<
array_dims
(
shape
)
<<
")
\n
"
;
<<
kernel_data_type
<<
" beta_backprop"
<<
array_dims
(
channel_shape
)
<<
", __global "
<<
kernel_data_type
<<
" output"
<<
array_dims
(
shape
)
<<
")
\n
"
;
writer
.
block_begin
();
writer
.
block_begin
();
{
// Main function body
{
// Main function body
...
@@ -378,10 +393,11 @@ void runtime::intelgpu::do_batch_norm_backprop_operation(cldnn::topology& topolo
...
@@ -378,10 +393,11 @@ void runtime::intelgpu::do_batch_norm_backprop_operation(cldnn::topology& topolo
// Main loops
// Main loops
gws
=
runtime
::
intelgpu
::
generate_loops
(
writer
,
shape
,
true
);
gws
=
runtime
::
intelgpu
::
generate_loops
(
writer
,
shape
,
true
);
writer
<<
"float stddev = sqrt(variance[i"
<<
channel_axis
<<
"] + "
<<
eps
<<
");
\n
"
;
writer
<<
kernel_data_type
<<
" stddev = sqrt(variance[i"
<<
channel_axis
<<
"] + "
<<
eps
writer
<<
"float xhat = (input"
<<
access_dims
(
shape
)
<<
" - mean[i"
<<
channel_axis
<<
");
\n
"
;
<<
"]) / stddev;
\n
"
;
writer
<<
kernel_data_type
<<
" xhat = (input"
<<
access_dims
(
shape
)
<<
" - mean[i"
writer
<<
"float norma = gamma[i"
<<
channel_axis
<<
"] / stddev;
\n
"
;
<<
channel_axis
<<
"]) / stddev;
\n
"
;
writer
<<
kernel_data_type
<<
" norma = gamma[i"
<<
channel_axis
<<
"] / stddev;
\n
"
;
writer
<<
"output"
<<
access_dims
(
shape
)
<<
" = norma * (delta"
<<
access_dims
(
shape
)
writer
<<
"output"
<<
access_dims
(
shape
)
<<
" = norma * (delta"
<<
access_dims
(
shape
)
<<
" - (xhat * gamma_backprop[i"
<<
channel_axis
<<
"] + beta_backprop[i"
<<
" - (xhat * gamma_backprop[i"
<<
channel_axis
<<
"] + beta_backprop[i"
...
...
src/ngraph/runtime/intelgpu/unit_test.manifest
View file @
62e1bc26
all_2x2x3_eliminate_dims_0_1
all_2x2x3_eliminate_dims_0_1
argmin_trivial_in_double
avg_pool_2d_2channel_2image_padded_only_above_do_not_include_in_computation
avg_pool_2d_2channel_2image_padded_only_above_do_not_include_in_computation
avg_pool_2d_2channel_2image_padded_only_above_include_in_computation
avg_pool_2d_2channel_2image_padded_only_above_include_in_computation
avg_pool_3d_uneven_strided_padded
avg_pool_3d_uneven_strided_padded
...
@@ -30,10 +29,7 @@ embedding_lookup_10x1_arbitrary
...
@@ -30,10 +29,7 @@ embedding_lookup_10x1_arbitrary
embedding_lookup_10x1_arbitrary_index_type_int
embedding_lookup_10x1_arbitrary_index_type_int
embedding_lookup_4x5_reverse
embedding_lookup_4x5_reverse
generate_mask
generate_mask
max_3d_to_scalar_double
max_pool_3d
max_pool_3d
numeric_double_inf
numeric_double_nan
quantize
quantize
quantize_axes
quantize_axes
quantize_clamp_int32
quantize_clamp_int32
...
@@ -69,9 +65,6 @@ shape_of_matrix
...
@@ -69,9 +65,6 @@ shape_of_matrix
shape_of_scalar
shape_of_scalar
shape_of_vector
shape_of_vector
softmax_axis_3d_double
softmax_axis_3d_double
sum_stable_acc_double
sum_stable_simple_double
sum_trivial_in_double
topk_1d_max_all
topk_1d_max_all
topk_1d_max_one
topk_1d_max_one
topk_1d_max_partial
topk_1d_max_partial
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment