Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
ab9fad24
Commit
ab9fad24
authored
5 years ago
by
Sergey Shalnov
Committed by
Robert Kimball
5 years ago
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
IntelGPU backend: Custom kernels refactoring 2 (#2770)
parent
6f0c8190
master
v0.29.0-rc.0
v0.28.0-rc.1
v0.28.0-rc.0
v0.27.1-rc.3
v0.27.1-rc.2
v0.27.1-rc.1
v0.27.1-rc.0
v0.27.0-rc.1
v0.27.0-rc.0
v0.26.1-rc.0
v0.26.0
v0.26.0-rc.8
v0.26.0-rc.7
v0.26.0-rc.6
v0.26.0-rc.5
v0.26.0-rc.4
v0.26.0-rc.3
v0.26.0-rc.2
v0.26.0-rc.0
v0.25.1-rc.11
v0.25.1-rc.10
v0.25.1-rc.9
v0.25.1-rc.8
v0.25.1-rc.7
v0.25.1-rc.6
v0.25.1-rc.5
v0.25.1-rc.4
v0.25.1-rc.3
v0.25.1-rc.2
v0.25.1-rc.1
v0.25.1-rc.0
v0.25.0
v0.25.0-rc.3
v0.25.0-rc.2
v0.25.0-rc.1
v0.25.0-rc.0
v0.25.0-dev.0
v0.24.0
v0.24.0-rc.3
v0.24.0-rc.2
v0.24.0-rc.1
v0.24.0-rc.0
v0.23.0-rc.7
v0.23.0-rc.6
v0.23.0-rc.5
v0.23.0-rc.4
v0.23.0-rc.3
v0.23.0-rc.2
v0.23.0-rc.1
v0.23.0-rc.0
v0.22.2-rc.0
v0.22.1
v0.22.1-rc.0
v0.22.0
v0.22.0-rc.2
v0.22.0-rc.0
v0.21.0
v0.21.0-rc.1
v0.21.0-rc.0
v0.20.1-rc.4
v0.20.1-rc.3
v0.20.1-rc.2
v0.20.1-rc.1
v0.20.1-rc.0
v0.20.0-rc.2
v0.20.0-rc.1
v0.20.0-rc.0
v0.20.0-dev.0
v0.19.1
v0.19.1-rc.0
v0.19.0
v0.19.0-rc.5
v0.19.0-rc.4
v0.19.0-rc.3
v0.19.0-rc.2
v0.19.0-rc.1
v0.19.0-rc.0
No related merge requests found
Expand all
Hide whitespace changes
Inline
Side-by-side
Showing
8 changed files
with
82 additions
and
287 deletions
+82
-287
intelgpu_backend.cpp
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
+8
-88
intelgpu_kernels.hpp
src/ngraph/runtime/intelgpu/intelgpu_kernels.hpp
+16
-0
intelgpu_op_broadcast.cpp
src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.cpp
+0
-0
intelgpu_op_broadcast.hpp
src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp
+0
-63
intelgpu_op_custom_func_call.cpp
src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.cpp
+33
-23
intelgpu_op_custom_func_call.hpp
src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp
+0
-41
intelgpu_op_softmax.cpp
src/ngraph/runtime/intelgpu/intelgpu_op_softmax.cpp
+25
-32
intelgpu_op_softmax.hpp
src/ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp
+0
-40
No files found.
src/ngraph/runtime/intelgpu/intelgpu_backend.cpp
View file @
ab9fad24
...
...
@@ -53,10 +53,7 @@
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_batchnorm.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_tensor_view.hpp"
#include "ngraph/runtime/intelgpu/visualize_tree.hpp"
...
...
@@ -671,14 +668,7 @@ shared_ptr<runtime::Executable>
if
((
shape_dim_count
>
3
)
||
((
shape_dim_count
==
3
)
&&
(
axes_size
==
2
))
||
(
op
->
get_input_element_type
(
0
)
!=
element
::
f32
))
{
do_softmax_operation
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_input_element_type
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axes
);
kern
.
emit
<
op
::
Softmax
>
(
softmax_op
);
}
else
{
...
...
@@ -979,15 +969,7 @@ shared_ptr<runtime::Executable>
}
else
{
do_bcast_sum_operation
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_input_element_type
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
,
true
);
kern
.
emit
<
op
::
Broadcast
>
(
broadcast
);
}
break
;
}
...
...
@@ -1005,15 +987,7 @@ shared_ptr<runtime::Executable>
}
else
{
do_bcast_sum_operation
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_input_element_type
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
,
false
);
kern
.
emit
<
op
::
Sum
>
(
sum
);
}
break
;
}
...
...
@@ -1031,13 +1005,7 @@ shared_ptr<runtime::Executable>
}
else
{
do_product_operation
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
);
kern
.
emit
<
op
::
Product
>
(
prod
);
}
break
;
}
...
...
@@ -1098,44 +1066,16 @@ shared_ptr<runtime::Executable>
{
arguments_check
(
op
,
1
,
1
);
const
shared_ptr
<
op
::
All
>
all_op
=
static_pointer_cast
<
op
::
All
>
(
op
);
const
AxisSet
&
axis
=
all_op
->
get_reduction_axes
();
const
shared_ptr
<
Node
>
def_val
=
all_op
->
get_default_value
();
const
shared_ptr
<
op
::
Constant
>
def_const
=
static_pointer_cast
<
op
::
Constant
>
(
def_val
);
const
vector
<
std
::
string
>&
values
=
def_const
->
get_value_strings
();
// Empty axis is not a case for do_equal_propagation()
do_all_any_op
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
,
"lhs && rhs"
,
values
.
at
(
0
));
kern
.
emit
<
op
::
All
>
(
static_pointer_cast
<
op
::
All
>
(
op
));
break
;
}
case
OP_TYPEID
:
:
Any
:
{
arguments_check
(
op
,
1
,
1
);
const
shared_ptr
<
op
::
Any
>
any_op
=
static_pointer_cast
<
op
::
Any
>
(
op
);
const
AxisSet
&
axis
=
any_op
->
get_reduction_axes
();
const
shared_ptr
<
Node
>
def_val
=
any_op
->
get_default_value
();
const
shared_ptr
<
op
::
Constant
>
def_const
=
static_pointer_cast
<
op
::
Constant
>
(
def_val
);
const
vector
<
std
::
string
>&
values
=
def_const
->
get_value_strings
();
// Empty axis is not a case for do_equal_propagation()
do_all_any_op
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
,
"lhs || rhs"
,
values
.
at
(
0
));
kern
.
emit
<
op
::
Any
>
(
static_pointer_cast
<
op
::
Any
>
(
op
));
break
;
}
case
OP_TYPEID
:
:
ReluBackprop
:
...
...
@@ -1788,34 +1728,14 @@ shared_ptr<runtime::Executable>
{
arguments_check
(
op
,
1
,
1
);
const
shared_ptr
<
op
::
Min
>
min_op
=
static_pointer_cast
<
op
::
Min
>
(
op
);
const
AxisSet
&
axis
=
min_op
->
get_reduction_axes
();
do_max_min_operation
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
,
true
);
kern
.
emit
<
op
::
Min
>
(
static_pointer_cast
<
op
::
Min
>
(
op
));
break
;
}
case
OP_TYPEID
:
:
Max
:
{
arguments_check
(
op
,
1
,
1
);
const
shared_ptr
<
op
::
Max
>
max_op
=
static_pointer_cast
<
op
::
Max
>
(
op
);
const
AxisSet
&
axis
=
max_op
->
get_reduction_axes
();
do_max_min_operation
(
topology
,
op
->
get_input_tensor_name
(
0
),
op
->
get_input_shape
(
0
),
op
->
get_output_tensor_name
(
0
),
op
->
get_output_shape
(
0
),
op
->
get_output_element_type
(
0
),
axis
,
false
);
kern
.
emit
<
op
::
Max
>
(
static_pointer_cast
<
op
::
Max
>
(
op
));
break
;
}
case
OP_TYPEID
:
:
OneHot
:
...
...
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/intelgpu/intelgpu_kernels.hpp
View file @
ab9fad24
...
...
@@ -23,9 +23,17 @@
#include <CPP/topology.hpp>
#include "ngraph/node.hpp"
#include "ngraph/op/all.hpp"
#include "ngraph/op/any.hpp"
#include "ngraph/op/broadcast.hpp"
#include "ngraph/op/convolution.hpp"
#include "ngraph/op/max.hpp"
#include "ngraph/op/min.hpp"
#include "ngraph/op/product.hpp"
#include "ngraph/op/select.hpp"
#include "ngraph/op/slice.hpp"
#include "ngraph/op/softmax.hpp"
#include "ngraph/op/sum.hpp"
namespace
ngraph
{
...
...
@@ -98,11 +106,19 @@ public:
private
:
void
queue_krnl
(
const
krnl_info
&
krn_info
,
const
std
::
shared_ptr
<
Node
>&
op
);
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
All
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Any
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Broadcast
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Convolution
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
ConvolutionBackpropData
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
ConvolutionBackpropFilters
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Max
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Min
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Product
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Select
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Slice
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Softmax
>&
op
)
const
;
krnl_info
build_krnl
(
const
std
::
shared_ptr
<
op
::
Sum
>&
op
)
const
;
cldnn
::
topology
&
stream
;
size_t
m_count_krnls
;
...
...
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.cpp
View file @
ab9fad24
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/intelgpu/intelgpu_op_broadcast.hpp
deleted
100644 → 0
View file @
6f0c8190
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/axis_set.hpp"
#include "ngraph/shape.hpp"
namespace
ngraph
{
namespace
runtime
{
namespace
intelgpu
{
// This implements Broadcast and Sum nGraph operations.
// input_shape (bcast) or output_shape (sum) can be empty.
// If the shape is empty it means scalar
void
do_bcast_sum_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
element
::
Type
&
input_type
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
,
bool
is_bcast
);
// This implements Min and Max operations depends on is_min parameter
void
do_max_min_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
,
bool
is_min
);
// This implements Product operation
void
do_product_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
);
}
}
}
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.cpp
View file @
ab9fad24
...
...
@@ -14,26 +14,29 @@
// limitations under the License.
//*****************************************************************************
#include <CPP/custom_gpu_primitive.hpp>
#include "ngraph/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_layout.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/op/constant.hpp"
using
namespace
std
;
using
namespace
ngraph
;
using
namespace
ngraph
::
runtime
::
intelgpu
;
void
runtime
::
intelgpu
::
do_all_any_op
(
cldnn
::
topology
&
topology
,
const
string
&
input0_name
,
const
Shape
&
input0_shape
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
,
const
std
::
string
&
operation
,
const
std
::
string
&
init_val
)
static
CustomKernels
::
krnl_info
do_all_any_op
(
const
shared_ptr
<
op
::
util
::
LogicalReduction
>&
op
,
const
string
&
operation
)
{
const
string
&
input0_name
=
op
->
get_input_tensor_name
(
0
);
const
Shape
&
input0_shape
=
op
->
get_input_shape
(
0
);
const
string
&
output_name
=
op
->
get_output_tensor_name
(
0
);
const
Shape
&
output_shape
=
op
->
get_output_shape
(
0
);
const
element
::
Type
&
output_type
=
op
->
get_output_element_type
(
0
);
const
AxisSet
&
axis
=
op
->
get_reduction_axes
();
const
shared_ptr
<
Node
>
def_val
=
op
->
get_default_value
();
const
shared_ptr
<
op
::
Constant
>
def_const
=
static_pointer_cast
<
op
::
Constant
>
(
def_val
);
const
vector
<
string
>&
values
=
def_const
->
get_value_strings
();
const
string
&
init_val
=
values
.
at
(
0
);
const
string
entry_point_name
=
"custom_op_all_any_"
+
output_name
;
const
string
kernel_type_name
=
get_opencl_type_name
(
output_type
);
const
size_t
input_size
=
shape_size
<
Shape
>
(
input0_shape
);
...
...
@@ -94,14 +97,21 @@ void runtime::intelgpu::do_all_any_op(cldnn::topology& topology,
}
// End of function bracket
writer
.
block_end
();
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
const
cldnn
::
custom_gpu_primitive
op_all_any
(
output_name
,
{
input0_name
},
{
writer
.
get_code
()},
entry_point_name
,
get_kernel_args
(
1
,
1
),
""
,
layout
,
{
1
});
topology
.
add
(
op_all_any
);
const
CustomKernelInfo
krn_ret
(
output_name
,
output_shape
,
output_type
,
{
input0_name
},
{
writer
.
get_code
()},
entry_point_name
);
return
{
krn_ret
};
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
All
>&
op
)
const
{
return
do_all_any_op
(
op
,
"lhs && rhs"
);
}
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
Any
>&
op
)
const
{
return
do_all_any_op
(
op
,
"lhs || rhs"
);
}
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/intelgpu/intelgpu_op_custom_func_call.hpp
deleted
100644 → 0
View file @
6f0c8190
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/axis_set.hpp"
#include "ngraph/shape.hpp"
namespace
ngraph
{
namespace
runtime
{
namespace
intelgpu
{
void
do_all_any_op
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input0_name
,
const
Shape
&
input0_shape
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axis
,
const
std
::
string
&
operation
,
const
std
::
string
&
init_val
);
}
}
}
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/intelgpu/intelgpu_op_softmax.cpp
View file @
ab9fad24
...
...
@@ -14,15 +14,13 @@
// limitations under the License.
//*****************************************************************************
#include <CPP/custom_gpu_primitive.hpp>
#include "ngraph/code_writer.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_
layout
.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_
kernels
.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_custom_kernels.hpp"
#include "ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp"
using
namespace
std
;
using
namespace
ngraph
;
using
namespace
ngraph
::
runtime
::
intelgpu
;
static
Shape
shape_dims
(
const
Shape
&
dimentions
,
const
AxisSet
&
axis
=
{})
{
...
...
@@ -45,22 +43,20 @@ static Shape shape_dims(const Shape& dimentions, const AxisSet& axis = {})
return
output_shape
;
}
void
runtime
::
intelgpu
::
do_softmax_operation
(
cldnn
::
topology
&
topology
,
const
string
&
input_name
,
const
Shape
&
input_shape
,
const
element
::
Type
&
input_type
,
const
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axes
)
CustomKernels
::
krnl_info
CustomKernels
::
build_krnl
(
const
shared_ptr
<
op
::
Softmax
>&
op
)
const
{
const
cldnn
::
layout
layout
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
output_shape
);
const
string
&
input_name
=
op
->
get_input_tensor_name
(
0
);
const
Shape
&
input_shape
=
op
->
get_input_shape
(
0
);
const
element
::
Type
&
input_type
=
op
->
get_input_element_type
(
0
);
const
string
&
output_name
=
op
->
get_output_tensor_name
(
0
);
const
Shape
&
output_shape
=
op
->
get_output_shape
(
0
);
const
element
::
Type
&
output_type
=
op
->
get_output_element_type
(
0
);
const
AxisSet
&
axes
=
op
->
get_axes
();
const
string
entry_point_name
=
"softmax_"
+
output_name
;
const
string
middle_name
=
entry_point_name
+
"_middle"
;
const
string
entry_point_middle_name
=
"softmax_middle_"
+
output_name
;
const
string
expression
=
"output"
+
access_dims
(
input_shape
,
"i"
,
axes
)
+
" = 0.0f;
\n
"
;
const
Shape
new_shape
=
shape_dims
(
output_shape
,
axes
);
const
cldnn
::
layout
layout_middle
=
IntelGPULayout
::
create_cldnn_layout
(
output_type
,
new_shape
);
CodeWriter
writer0
;
CodeWriter
writer1
;
vector
<
size_t
>
gws
;
...
...
@@ -81,15 +77,13 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
}
writer0
.
block_end
();
const
cldnn
::
custom_gpu_primitive
op_softmax_middle
(
middle_name
,
{
input_name
},
{
writer0
.
get_code
()},
entry_point_middle_name
,
get_kernel_args
(
1
,
1
),
""
,
layout_middle
,
gws
);
topology
.
add
(
op_softmax_middle
);
const
CustomKernelInfo
op_softmax_middle
(
middle_name
,
new_shape
,
output_type
,
{
input_name
},
{
writer0
.
get_code
()},
entry_point_middle_name
,
gws
);
writer1
<<
"__kernel void "
<<
entry_point_name
<<
"(const __global "
<<
get_opencl_type_name
(
input_type
)
<<
" input0"
<<
array_dims
(
input_shape
)
...
...
@@ -107,13 +101,12 @@ void runtime::intelgpu::do_softmax_operation(cldnn::topology& topology,
}
writer1
.
block_end
();
const
cldnn
::
custom_gpu_primitive
op_softmax
(
output_name
,
{
input_name
,
middle_name
},
{
writer1
.
get_code
()},
entry_point_name
,
get_kernel_args
(
2
,
1
),
""
,
layout
,
gws
);
topology
.
add
(
op_softmax
);
const
CustomKernelInfo
op_softmax
(
output_name
,
output_shape
,
output_type
,
{
input_name
,
middle_name
},
{
writer1
.
get_code
()},
entry_point_name
,
gws
);
return
{
op_softmax_middle
,
op_softmax
};
}
This diff is collapsed.
Click to expand it.
src/ngraph/runtime/intelgpu/intelgpu_op_softmax.hpp
deleted
100644 → 0
View file @
6f0c8190
//*****************************************************************************
// Copyright 2017-2019 Intel Corporation
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
//*****************************************************************************
#pragma once
#include <CPP/topology.hpp>
#include "ngraph/shape.hpp"
#include "ngraph/type/element_type.hpp"
namespace
ngraph
{
namespace
runtime
{
namespace
intelgpu
{
void
do_softmax_operation
(
cldnn
::
topology
&
topology
,
const
std
::
string
&
input_name
,
const
Shape
&
input_shape
,
const
element
::
Type
&
input_type
,
const
std
::
string
&
output_name
,
const
Shape
&
output_shape
,
const
element
::
Type
&
output_type
,
const
AxisSet
&
axes
);
}
}
}
This diff is collapsed.
Click to expand it.
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment