Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
cf770aa5
Unverified
Commit
cf770aa5
authored
Mar 10, 2018
by
Jayaram Bobba
Committed by
GitHub
Mar 10, 2018
Browse files
Options
Browse Files
Download
Plain Diff
Merge branch 'master' into jbobba/batchnorm-inference
parents
7ce15121
8520e846
Show whitespace changes
Inline
Side-by-side
Showing
7 changed files
with
420 additions
and
134 deletions
+420
-134
cpu_call_frame.cpp
src/ngraph/runtime/cpu/cpu_call_frame.cpp
+1
-0
cpu_emitter.cpp
src/ngraph/runtime/cpu/cpu_emitter.cpp
+58
-117
cpu_runtime_context.hpp
src/ngraph/runtime/cpu/cpu_runtime_context.hpp
+1
-0
mkldnn_emitter.cpp
src/ngraph/runtime/cpu/mkldnn_emitter.cpp
+112
-0
mkldnn_emitter.hpp
src/ngraph/runtime/cpu/mkldnn_emitter.hpp
+29
-0
cpu_assignment.cpp
src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
+51
-8
cpu_layout.cpp
src/ngraph/runtime/cpu/pass/cpu_layout.cpp
+168
-9
No files found.
src/ngraph/runtime/cpu/cpu_call_frame.cpp
View file @
cf770aa5
...
...
@@ -146,6 +146,7 @@ void runtime::cpu::CPU_CallFrame::setup_runtime_context()
}
const
auto
&
mkldnn_emitter
=
m_external_function
->
get_mkldnn_emitter
();
ctx
->
mkldnn_primitives
=
mkldnn_emitter
->
get_mkldnn_primitives
().
data
();
ctx
->
mkldnn_workspaces
=
mkldnn_emitter
->
get_mkldnn_workspaces
().
data
();
}
void
runtime
::
cpu
::
CPU_CallFrame
::
cleanup_runtime_context
()
...
...
src/ngraph/runtime/cpu/cpu_emitter.cpp
View file @
cf770aa5
...
...
@@ -2805,59 +2805,31 @@ namespace ngraph
if
(
runtime
::
cpu
::
mkldnn_utils
::
use_mkldnn_kernel
(
node
))
{
const
string
&
et
=
runtime
::
cpu
::
mkldnn_utils
::
get_mkldnn_data_type_string
(
args
[
0
].
get_element_type
());
auto
&
mkldnn_emitter
=
external_function
->
get_mkldnn_emitter
();
auto
diff_dst_desc
=
mkldnn_emitter
->
build_memory_descriptor
(
args
[
0
],
runtime
::
cpu
::
mkldnn_utils
::
get_input_mkldnn_format
(
node
,
0
));
auto
diff_src_desc
=
mkldnn_emitter
->
build_memory_descriptor
(
out
[
0
],
runtime
::
cpu
::
mkldnn_utils
::
get_output_mkldnn_format
(
node
,
0
));
auto
input_format
=
runtime
::
cpu
::
mkldnn_utils
::
get_input_mkldnn_format
(
node
,
0
);
auto
result_format
=
runtime
::
cpu
::
mkldnn_utils
::
get_output_mkldnn_format
(
node
,
0
);
size_t
avg_pool_index
=
mkldnn_emitter
->
build_pooling_backward
(
(
apb
->
get_include_padding_in_avg_computation
()
?
mkldnn
::
algorithm
::
pooling_avg_include_padding
:
mkldnn
::
algorithm
::
pooling_avg_exclude_padding
),
diff_dst_desc
,
diff_src_desc
,
apb
->
get_window_movement_strides
(),
apb
->
get_window_shape
(),
apb
->
get_padding_below
(),
apb
->
get_padding_above
());
writer
<<
"{
\n
"
;
writer
.
indent
++
;
auto
&
deps
=
mkldnn_emitter
->
get_primitive_deps
(
avg_pool_index
);
writer
<<
"cpu::mkldnn_utils::set_memory_ptr(ctx, "
<<
to_string
(
deps
[
0
])
<<
", "
<<
args
[
0
].
get_name
()
<<
");
\n
"
;
writer
<<
"cpu::mkldnn_utils::set_memory_ptr(ctx, "
<<
to_string
(
deps
[
1
])
<<
", "
<<
out
[
0
].
get_name
()
<<
");
\n
"
;
writer
<<
"engine cpu_engine = engine(engine::cpu, 0);
\n
"
;
writer
<<
"memory::desc input_data_desc = memory::desc({"
<<
join
(
delta_shape
)
<<
"}, "
<<
et
<<
", "
<<
runtime
::
cpu
::
mkldnn_utils
::
get_mkldnn_format_string
(
input_format
)
<<
");
\n
"
;
writer
<<
"memory::desc result_desc = memory::desc({"
<<
join
(
out_shape
)
<<
"}, "
<<
et
<<
", "
<<
runtime
::
cpu
::
mkldnn_utils
::
get_mkldnn_format_string
(
result_format
)
<<
");
\n
"
;
writer
<<
"memory input_data = memory({input_data_desc, cpu_engine}, "
<<
args
[
0
].
get_name
()
<<
");
\n
"
;
writer
<<
"memory result = memory({result_desc, cpu_engine}, "
<<
out
[
0
].
get_name
()
<<
");
\n
"
;
// Dummy forward primitive descriptor to keep MKLDNN happy
const
char
*
algorithm_enumerator
=
apb
->
get_include_padding_in_avg_computation
()
?
"algorithm::pooling_avg_include_padding"
:
"algorithm::pooling_avg_exclude_padding"
;
writer
<<
"pooling_forward::primitive_desc fwd_pd = "
"pooling_forward::primitive_desc("
<<
"{prop_kind::forward, "
<<
algorithm_enumerator
<<
", "
<<
"result_desc, input_data_desc, {"
<<
join
(
apb
->
get_window_movement_strides
())
<<
"}, {"
<<
join
(
apb
->
get_window_shape
())
<<
"}, "
<<
"{"
<<
join
(
apb
->
get_padding_below
())
<<
"}, "
<<
"{"
<<
join
(
apb
->
get_padding_above
())
<<
"}, "
<<
"padding_kind::zero}, cpu_engine);
\n
"
;
writer
<<
"auto avg_pooling = pooling_backward(pooling_backward::primitive_desc("
<<
"pooling_backward::desc("
<<
algorithm_enumerator
<<
", "
<<
"result_desc, input_data_desc, {"
<<
join
(
apb
->
get_window_movement_strides
())
<<
"}, {"
<<
join
(
apb
->
get_window_shape
())
<<
"}, "
<<
"{"
<<
join
(
apb
->
get_padding_below
())
<<
"}, "
<<
"{"
<<
join
(
apb
->
get_padding_above
())
<<
"}, "
<<
"padding_kind::zero), cpu_engine, fwd_pd), "
<<
"input_data, result);
\n
"
;
writer
<<
"auto s = stream(stream::kind::eager);
\n
"
<<
"s.submit({avg_pooling}).wait();
\n
"
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
writer
<<
"cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<<
to_string
(
avg_pool_index
)
<<
");
\n
"
;
}
else
{
...
...
@@ -2885,79 +2857,48 @@ namespace ngraph
auto
mpb
=
static_cast
<
const
ngraph
::
op
::
MaxPoolBackprop
*>
(
node
);
auto
delta_shape
=
args
[
1
].
get_shape
();
auto
delta_rank
=
delta_shape
.
size
();
auto
out_shape
=
out
[
0
].
get_shape
();
if
(
delta_rank
==
4
&&
mpb
->
get_window_shape
().
size
()
==
2
&&
args
[
0
].
get_element_type
()
==
element
::
f32
)
if
(
runtime
::
cpu
::
mkldnn_utils
::
use_mkldnn_kernel
(
node
))
{
const
string
&
et
=
runtime
::
cpu
::
mkldnn_utils
::
get_mkldnn_data_type_string
(
args
[
1
].
get_element_type
());
auto
&
mkldnn_emitter
=
external_function
->
get_mkldnn_emitter
();
auto
fprop_src_desc
=
mkldnn_emitter
->
build_memory_descriptor
(
args
[
0
],
runtime
::
cpu
::
mkldnn_utils
::
get_input_mkldnn_format
(
node
,
0
));
auto
diff_dst_desc
=
mkldnn_emitter
->
build_memory_descriptor
(
args
[
1
],
runtime
::
cpu
::
mkldnn_utils
::
get_input_mkldnn_format
(
node
,
1
));
auto
diff_src_desc
=
mkldnn_emitter
->
build_memory_descriptor
(
out
[
0
],
runtime
::
cpu
::
mkldnn_utils
::
get_output_mkldnn_format
(
node
,
0
));
writer
<<
"{
\n
"
;
writer
.
indent
++
;
writer
<<
"engine cpu_engine = engine(engine::cpu, 0);
\n
"
;
writer
<<
"memory::desc input_data_desc = memory::desc({"
<<
join
(
delta_shape
)
<<
"}, "
<<
et
<<
", memory::format::nchw);
\n
"
;
writer
<<
"memory::desc result_desc = memory::desc({"
<<
join
(
out_shape
)
<<
"}, "
<<
et
<<
", memory::format::nchw);
\n
"
;
writer
<<
"memory input_data = memory({input_data_desc, cpu_engine}, "
<<
args
[
1
].
get_name
()
<<
");
\n
"
;
writer
<<
"memory result = memory({result_desc, cpu_engine}, "
<<
out
[
0
].
get_name
()
<<
");
\n
"
;
size_t
max_pool_index
=
mkldnn_emitter
->
build_max_pooling_backward
(
mkldnn
::
algorithm
::
pooling_max
,
fprop_src_desc
,
diff_dst_desc
,
diff_src_desc
,
mpb
->
get_window_movement_strides
(),
mpb
->
get_window_shape
(),
mpb
->
get_padding_below
(),
mpb
->
get_padding_above
());
auto
&
fdeps
=
mkldnn_emitter
->
get_primitive_deps
(
max_pool_index
-
1
);
writer
<<
"cpu::mkldnn_utils::set_memory_ptr(ctx, "
<<
to_string
(
fdeps
[
0
])
<<
", "
<<
args
[
0
].
get_name
()
<<
");
\n
"
;
writer
<<
"cpu::mkldnn_utils::set_memory_ptr(ctx, "
<<
to_string
(
fdeps
[
1
])
<<
", "
<<
out
[
0
].
get_name
()
<<
");
\n
"
;
writer
<<
"cpu::mkldnn_utils::set_memory_ptr(ctx, "
<<
to_string
(
fdeps
[
2
])
<<
", ctx->mkldnn_workspaces["
<<
fdeps
[
3
]
<<
"]);
\n
"
;
writer
<<
"cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<<
to_string
(
max_pool_index
-
1
)
<<
");
\n
"
;
//----------------------------------------------------------------------------------------------
// create a forward primitive_desc, use this to query the workspace
// TODO: (pruthvi) this is a workaround, till we maintain a global context to refer to the corrosponding
// MKLDNN fprop kernel. this impacts performance
writer
<<
"memory::desc max_pool_input_desc = memory::desc({"
<<
join
(
args
[
0
].
get_shape
())
<<
"}, "
<<
et
<<
", memory::format::nchw);
\n
"
;
writer
<<
"memory::desc max_pool_result_desc = memory::desc({"
<<
join
(
args
[
1
].
get_shape
())
<<
"}, "
<<
et
<<
", memory::format::nchw);
\n
"
;
writer
<<
"memory maxpool_input_data = memory({max_pool_input_desc, cpu_engine}, "
<<
args
[
0
].
get_name
()
<<
");
\n
"
;
writer
<<
"memory maxpool_result = memory({max_pool_result_desc, cpu_engine}, "
<<
out
[
0
].
get_name
()
<<
");
\n
"
;
writer
<<
"pooling_forward::primitive_desc pool_fwd_pd = "
"pooling_forward::primitive_desc("
<<
"{prop_kind::forward, algorithm::pooling_max, "
<<
"max_pool_input_desc, max_pool_result_desc, {"
<<
join
(
mpb
->
get_window_movement_strides
())
<<
"}, {"
<<
join
(
mpb
->
get_window_shape
())
<<
"}, "
<<
"{"
<<
join
(
mpb
->
get_padding_below
())
<<
"}, "
<<
"{"
<<
join
(
mpb
->
get_padding_above
())
<<
"}, "
<<
"padding_kind::zero}, cpu_engine);
\n
"
;
// query the workspace from the forward primitive desc and allocates memory
writer
<<
"auto max_pool_workspace_memory = "
"memory(pool_fwd_pd.workspace_primitive_desc());
\n
"
;
//run fprop with this workspace attached
writer
<<
"pooling_forward max_pooling_fwd = pooling_forward("
<<
"pool_fwd_pd, maxpool_input_data, maxpool_result, "
"max_pool_workspace_memory);
\n
"
;
writer
<<
"stream s_fprop = stream(stream::kind::eager);
\n
"
<<
"s_fprop.submit({max_pooling_fwd}).wait();
\n
"
;
//---------------------------------------------------------------------------------------------
writer
<<
"auto max_pooling_bwd = "
"pooling_backward(pooling_backward::primitive_desc("
<<
"pooling_backward::desc(algorithm::pooling_max, "
<<
"result_desc, input_data_desc, {"
<<
join
(
mpb
->
get_window_movement_strides
())
<<
"}, {"
<<
join
(
mpb
->
get_window_shape
())
<<
"}, "
<<
"{"
<<
join
(
mpb
->
get_padding_below
())
<<
"}, "
<<
"{"
<<
join
(
mpb
->
get_padding_above
())
<<
"}, "
<<
"padding_kind::zero), cpu_engine, pool_fwd_pd), "
<<
"input_data, max_pool_workspace_memory, result);
\n
"
;
writer
<<
"auto s_bwd = stream(stream::kind::eager);
\n
"
<<
"s_bwd.submit({max_pooling_bwd}).wait();
\n
"
;
auto
&
bdeps
=
mkldnn_emitter
->
get_primitive_deps
(
max_pool_index
);
writer
<<
"cpu::mkldnn_utils::set_memory_ptr(ctx, "
<<
to_string
(
bdeps
[
0
])
<<
", "
<<
args
[
1
].
get_name
()
<<
");
\n
"
;
writer
<<
"cpu::mkldnn_utils::set_memory_ptr(ctx, "
<<
to_string
(
bdeps
[
1
])
<<
", ctx->mkldnn_workspaces["
<<
bdeps
[
3
]
<<
"]);
\n
"
;
writer
<<
"cpu::mkldnn_utils::set_memory_ptr(ctx, "
<<
to_string
(
bdeps
[
2
])
<<
", "
<<
out
[
0
].
get_name
()
<<
");
\n
"
;
writer
.
indent
--
;
writer
<<
"}
\n
"
;
writer
<<
"cpu::mkldnn_utils::mkldnn_invoke_primitive(ctx, "
<<
to_string
(
max_pool_index
)
<<
");
\n
"
;
}
else
{
...
...
src/ngraph/runtime/cpu/cpu_runtime_context.hpp
View file @
cf770aa5
...
...
@@ -37,6 +37,7 @@ namespace ngraph
{
int64_t
*
op_durations
;
mkldnn
::
primitive
*
const
*
mkldnn_primitives
;
char
*
const
*
mkldnn_workspaces
;
};
}
}
...
...
src/ngraph/runtime/cpu/mkldnn_emitter.cpp
View file @
cf770aa5
...
...
@@ -21,6 +21,7 @@
#include "ngraph/runtime/cpu/cpu_layout_descriptor.hpp"
#include "ngraph/runtime/cpu/cpu_tensor_view_wrapper.hpp"
#include "ngraph/runtime/cpu/mkldnn_invoke.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
#include "ngraph/types/element_type.hpp"
...
...
@@ -37,12 +38,24 @@ const std::vector<mkldnn::primitive*>& MKLDNNEmitter::get_mkldnn_primitives() co
return
m_mkldnn_primitives
;
}
const
std
::
vector
<
char
*>&
MKLDNNEmitter
::
get_mkldnn_workspaces
()
{
return
m_workspace_bufs
;
}
size_t
MKLDNNEmitter
::
insert_primitive
(
mkldnn
::
primitive
*
primitive
)
{
m_mkldnn_primitives
.
emplace_back
(
primitive
);
return
(
m_mkldnn_primitives
.
size
()
-
1
);
}
size_t
MKLDNNEmitter
::
insert_workspace
(
std
::
unique_ptr
<
MKLDNNWorkspace
>&
workspace
)
{
m_workspace_bufs
.
push_back
(
workspace
.
get
()
->
buf
);
m_workspaces
.
push_back
(
std
::
move
(
workspace
));
return
(
m_workspaces
.
size
()
-
1
);
}
const
std
::
vector
<
size_t
>&
MKLDNNEmitter
::
get_primitive_deps
(
size_t
index
)
const
{
return
m_primitive_deps
.
at
(
index
);
...
...
@@ -331,6 +344,105 @@ size_t MKLDNNEmitter::build_pooling_forward(mkldnn::algorithm pooling_algorithm,
return
primitive_index
;
}
size_t
MKLDNNEmitter
::
build_pooling_backward
(
mkldnn
::
algorithm
pooling_algorithm
,
const
mkldnn
::
memory
::
desc
&
diff_dst_desc
,
const
mkldnn
::
memory
::
desc
&
diff_src_desc
,
const
ngraph
::
Strides
&
window_strides
,
const
ngraph
::
Shape
&
window_shape
,
const
ngraph
::
Shape
&
padding_below
,
const
ngraph
::
Shape
&
padding_above
)
{
size_t
input_index
=
build_memory_primitive
(
diff_dst_desc
);
size_t
result_index
=
build_memory_primitive
(
diff_src_desc
);
size_t
primitive_index
=
insert_primitive
(
new
mkldnn
::
pooling_backward
(
{{
pooling_algorithm
,
diff_src_desc
,
diff_dst_desc
,
mkldnn
::
memory
::
dims
(
window_strides
.
begin
(),
window_strides
.
end
()),
mkldnn
::
memory
::
dims
(
window_shape
.
begin
(),
window_shape
.
end
()),
mkldnn
::
memory
::
dims
(
padding_below
.
begin
(),
padding_below
.
end
()),
mkldnn
::
memory
::
dims
(
padding_above
.
begin
(),
padding_above
.
end
()),
mkldnn
::
padding_kind
::
zero
},
mkldnn_utils
::
global_cpu_engine
,
{{
mkldnn
::
prop_kind
::
forward_training
,
pooling_algorithm
,
diff_src_desc
,
diff_dst_desc
,
mkldnn
::
memory
::
dims
(
window_strides
.
begin
(),
window_strides
.
end
()),
mkldnn
::
memory
::
dims
(
window_shape
.
begin
(),
window_shape
.
end
()),
mkldnn
::
memory
::
dims
(
padding_below
.
begin
(),
padding_below
.
end
()),
mkldnn
::
memory
::
dims
(
padding_above
.
begin
(),
padding_above
.
end
()),
mkldnn
::
padding_kind
::
zero
},
mkldnn_utils
::
global_cpu_engine
}},
*
m_mkldnn_primitives
[
input_index
],
*
m_mkldnn_primitives
[
result_index
]));
m_primitive_deps
[
primitive_index
]
=
{
input_index
,
result_index
};
return
primitive_index
;
}
size_t
MKLDNNEmitter
::
build_max_pooling_backward
(
mkldnn
::
algorithm
pooling_algorithm
,
const
mkldnn
::
memory
::
desc
&
fprop_src_desc
,
const
mkldnn
::
memory
::
desc
&
diff_dst_desc
,
const
mkldnn
::
memory
::
desc
&
diff_src_desc
,
const
ngraph
::
Strides
&
window_strides
,
const
ngraph
::
Shape
&
window_shape
,
const
ngraph
::
Shape
&
padding_below
,
const
ngraph
::
Shape
&
padding_above
)
{
size_t
fprop_src_index
=
build_memory_primitive
(
fprop_src_desc
);
size_t
diff_dst_index
=
build_memory_primitive
(
diff_dst_desc
);
size_t
diff_src_index
=
build_memory_primitive
(
diff_src_desc
);
mkldnn
::
pooling_forward
::
primitive_desc
fwd_pd
{
{
mkldnn
::
prop_kind
::
forward_training
,
pooling_algorithm
,
diff_src_desc
,
diff_dst_desc
,
mkldnn
::
memory
::
dims
(
window_strides
.
begin
(),
window_strides
.
end
()),
mkldnn
::
memory
::
dims
(
window_shape
.
begin
(),
window_shape
.
end
()),
mkldnn
::
memory
::
dims
(
padding_below
.
begin
(),
padding_below
.
end
()),
mkldnn
::
memory
::
dims
(
padding_above
.
begin
(),
padding_above
.
end
()),
mkldnn
::
padding_kind
::
zero
},
mkldnn_utils
::
global_cpu_engine
};
auto
ws_index
=
build_memory_primitive
(
fwd_pd
.
workspace_primitive_desc
().
desc
());
// Allocate workspace
// TODO (jbobba): Might need to align memory
auto
ws
=
std
::
unique_ptr
<
MKLDNNWorkspace
>
(
new
MKLDNNWorkspace
(
fwd_pd
.
workspace_primitive_desc
().
get_size
()));
auto
ws_buf_index
=
insert_workspace
(
ws
);
size_t
fwd_primitive_index
=
insert_primitive
(
new
mkldnn
::
pooling_forward
(
fwd_pd
,
*
m_mkldnn_primitives
[
fprop_src_index
],
*
m_mkldnn_primitives
[
diff_src_index
],
// HACK - Uses diff_src buffer. Safe since diff_src > fprop_dst
*
m_mkldnn_primitives
[
ws_index
]));
size_t
bwd_primitive_index
=
insert_primitive
(
new
mkldnn
::
pooling_backward
(
{{
pooling_algorithm
,
diff_src_desc
,
diff_dst_desc
,
mkldnn
::
memory
::
dims
(
window_strides
.
begin
(),
window_strides
.
end
()),
mkldnn
::
memory
::
dims
(
window_shape
.
begin
(),
window_shape
.
end
()),
mkldnn
::
memory
::
dims
(
padding_below
.
begin
(),
padding_below
.
end
()),
mkldnn
::
memory
::
dims
(
padding_above
.
begin
(),
padding_above
.
end
()),
mkldnn
::
padding_kind
::
zero
},
mkldnn_utils
::
global_cpu_engine
,
fwd_pd
},
*
m_mkldnn_primitives
[
diff_dst_index
],
*
m_mkldnn_primitives
[
ws_index
],
*
m_mkldnn_primitives
[
diff_src_index
]));
m_primitive_deps
[
fwd_primitive_index
]
=
{
fprop_src_index
,
diff_src_index
,
ws_index
,
ws_buf_index
};
m_primitive_deps
[
bwd_primitive_index
]
=
{
diff_dst_index
,
ws_index
,
diff_src_index
,
ws_buf_index
};
return
bwd_primitive_index
;
}
size_t
MKLDNNEmitter
::
build_reorder
(
const
mkldnn
::
memory
::
desc
&
input_desc
,
const
mkldnn
::
memory
::
desc
&
result_desc
)
{
...
...
src/ngraph/runtime/cpu/mkldnn_emitter.hpp
View file @
cf770aa5
...
...
@@ -36,6 +36,14 @@ namespace ngraph
class
CPU_ExternalFunction
;
class
TensorViewWrapper
;
class
MKLDNNWorkspace
{
public
:
MKLDNNWorkspace
(
size_t
size
)
{
buf
=
reinterpret_cast
<
char
*>
(
malloc
(
size
));
}
~
MKLDNNWorkspace
()
{
free
(
buf
);
}
char
*
buf
;
};
class
MKLDNNEmitter
{
public
:
...
...
@@ -43,8 +51,10 @@ namespace ngraph
~
MKLDNNEmitter
();
const
std
::
vector
<
mkldnn
::
primitive
*>&
get_mkldnn_primitives
()
const
;
const
std
::
vector
<
char
*>&
get_mkldnn_workspaces
();
size_t
insert_primitive
(
mkldnn
::
primitive
*
primitive
);
size_t
insert_workspace
(
std
::
unique_ptr
<
MKLDNNWorkspace
>&
workspace
);
const
std
::
vector
<
size_t
>&
get_primitive_deps
(
size_t
index
)
const
;
// TODO(jmenon): Get rid of TensorViewWrappers at some point
...
...
@@ -113,6 +123,23 @@ namespace ngraph
const
ngraph
::
Shape
&
padding_below
,
const
ngraph
::
Shape
&
padding_above
);
size_t
build_pooling_backward
(
mkldnn
::
algorithm
pooling_algorithm
,
const
mkldnn
::
memory
::
desc
&
diff_dst_desc
,
const
mkldnn
::
memory
::
desc
&
diff_src_desc
,
const
ngraph
::
Strides
&
window_strides
,
const
ngraph
::
Shape
&
window_shape
,
const
ngraph
::
Shape
&
padding_below
,
const
ngraph
::
Shape
&
padding_above
);
size_t
build_max_pooling_backward
(
mkldnn
::
algorithm
pooling_algorithm
,
const
mkldnn
::
memory
::
desc
&
fprop_src_desc
,
const
mkldnn
::
memory
::
desc
&
diff_dst_desc
,
const
mkldnn
::
memory
::
desc
&
diff_src_desc
,
const
ngraph
::
Strides
&
window_strides
,
const
ngraph
::
Shape
&
window_shape
,
const
ngraph
::
Shape
&
padding_below
,
const
ngraph
::
Shape
&
padding_above
);
size_t
build_reorder
(
const
mkldnn
::
memory
::
desc
&
input_desc
,
const
mkldnn
::
memory
::
desc
&
result_desc
);
...
...
@@ -153,6 +180,8 @@ namespace ngraph
std
::
vector
<
mkldnn
::
primitive
*>
m_mkldnn_primitives
;
std
::
vector
<
mkldnn
::
stream
>
m_mkldnn_streams
;
std
::
unordered_map
<
size_t
,
std
::
vector
<
size_t
>>
m_primitive_deps
;
std
::
vector
<
std
::
unique_ptr
<
MKLDNNWorkspace
>>
m_workspaces
;
std
::
vector
<
char
*>
m_workspace_bufs
;
};
}
}
...
...
src/ngraph/runtime/cpu/pass/cpu_assignment.cpp
View file @
cf770aa5
...
...
@@ -29,6 +29,7 @@
#include "ngraph/ops/avg_pool.hpp"
#include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/runtime/cpu/cpu_op_annotations.hpp"
#include "ngraph/runtime/cpu/mkldnn_utils.hpp"
...
...
@@ -245,10 +246,48 @@ namespace ngraph
}
}
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
MaxPool
)
{
auto
max_pool
=
static_cast
<
op
::
MaxPool
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
auto
result_shape
=
node
->
get_output_shape
(
0
);
if
(
arg0_rank
==
4
&&
max_pool
->
get_window_shape
().
size
()
==
2
&&
node
->
get_input_element_type
(
0
)
==
element
::
f32
)
{
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
set_mkldnn_op
(
true
);
max_pool
->
set_op_annotations
(
op_annotations
);
}
}
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
MaxPoolBackprop
)
{
auto
max_pool
=
static_cast
<
op
::
MaxPoolBackprop
*>
(
node
);
auto
arg1_shape
=
node
->
get_input_shape
(
1
);
auto
arg1_rank
=
arg1_shape
.
size
();
auto
result_shape
=
node
->
get_output_shape
(
0
);
if
(
arg1_rank
==
4
&&
max_pool
->
get_window_shape
().
size
()
==
2
&&
node
->
get_input_element_type
(
1
)
==
element
::
f32
)
{
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
set_mkldnn_op
(
true
);
max_pool
->
set_op_annotations
(
op_annotations
);
}
}
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
Relu
)
{
auto
avg_pool
=
static_cast
<
op
::
Relu
*>
(
node
);
auto
relu
=
static_cast
<
op
::
Relu
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
...
...
@@ -260,7 +299,7 @@ namespace ngraph
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
set_mkldnn_op
(
true
);
avg_pool
->
set_op_annotations
(
op_annotations
);
relu
->
set_op_annotations
(
op_annotations
);
}
}
...
...
@@ -280,18 +319,19 @@ namespace ngraph
template
<>
void
CPUAssignment
::
ASSIGN_DECL
(
ngraph
::
op
::
ReluBackprop
)
{
auto
avg_pool
=
static_cast
<
op
::
ReluBackprop
*>
(
node
);
auto
relu_bprop
=
static_cast
<
op
::
ReluBackprop
*>
(
node
);
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg0_rank
=
arg0_shape
.
size
();
auto
result_shape
=
node
->
get_output_shape
(
0
);
if
(
arg0_rank
==
4
&&
node
->
get_input_element_type
(
0
)
==
element
::
f32
)
if
((
arg0_rank
==
4
||
arg0_rank
==
2
)
&&
node
->
get_input_element_type
(
0
)
==
element
::
f32
)
{
auto
op_annotations
=
std
::
make_shared
<
ngraph
::
runtime
::
cpu
::
CPUOpAnnotations
>
();
op_annotations
->
set_mkldnn_op
(
true
);
avg_pool
->
set_op_annotations
(
op_annotations
);
relu_bprop
->
set_op_annotations
(
op_annotations
);
}
}
...
...
@@ -323,6 +363,9 @@ namespace ngraph
static
const
runtime
::
cpu
::
pass
::
AssignOpMap
s_dispatcher
{
{
TI
(
ngraph
::
op
::
Add
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
Add
>
},
{
TI
(
ngraph
::
op
::
AvgPool
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
AvgPool
>
},
{
TI
(
ngraph
::
op
::
AvgPoolBackprop
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
AvgPoolBackprop
>
},
{
TI
(
ngraph
::
op
::
BatchNorm
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
BatchNorm
>
},
{
TI
(
ngraph
::
op
::
BatchNormBackprop
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
BatchNormBackprop
>
},
...
...
@@ -332,13 +375,13 @@ static const runtime::cpu::pass::AssignOpMap s_dispatcher{
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
ConvolutionBackpropData
>
},
{
TI
(
ngraph
::
op
::
ConvolutionBackpropFilters
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
ConvolutionBackpropFilters
>
},
{
TI
(
ngraph
::
op
::
MaxPool
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
MaxPool
>
},
{
TI
(
ngraph
::
op
::
MaxPoolBackprop
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
MaxPoolBackprop
>
},
{
TI
(
ngraph
::
op
::
ConvolutionBias
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
ConvolutionBias
>
},
{
TI
(
ngraph
::
op
::
ConvolutionBiasBackpropFiltersBias
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
ConvolutionBiasBackpropFiltersBias
>
},
{
TI
(
ngraph
::
op
::
AvgPool
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
AvgPool
>
},
{
TI
(
ngraph
::
op
::
AvgPoolBackprop
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
AvgPoolBackprop
>
},
{
TI
(
ngraph
::
op
::
Relu
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
Relu
>
},
{
TI
(
ngraph
::
op
::
ReluBackprop
),
&
runtime
::
cpu
::
pass
::
CPUAssignment
::
assign
<
ngraph
::
op
::
ReluBackprop
>
},
...
...
src/ngraph/runtime/cpu/pass/cpu_layout.cpp
View file @
cf770aa5
...
...
@@ -31,6 +31,7 @@
#include "ngraph/ops/batch_norm.hpp"
#include "ngraph/ops/convolution.hpp"
#include "ngraph/ops/get_output_element.hpp"
#include "ngraph/ops/max_pool.hpp"
#include "ngraph/ops/op.hpp"
#include "ngraph/ops/relu.hpp"
#include "ngraph/ops/result.hpp"
...
...
@@ -645,11 +646,8 @@ namespace ngraph
}
catch
(
const
mkldnn
::
error
&
e
)
{
// TODO (jbobba): Check with MKLDNN folks if this is necessary
throw
ngraph_error
(
"MKLDNN Unsupported pooling layout"
+
to_string
(
input_layout
)
+
e
.
message
);
// prim_input_formats.push_back(memory::format::nchw);
// prim_output_formats.push_back(memory::format::nchw);
}
node
=
...
...
@@ -732,11 +730,169 @@ namespace ngraph
}
catch
(
const
mkldnn
::
error
&
e
)
{
// TODO (jbobba): Check with MKLDNN folks if this is necessary
throw
ngraph_error
(
"MKLDNN Unsupported pooling layout"
+
to_string
(
input_layout
)
+
e
.
message
);
// prim_input_formats.push_back(memory::format::nchw);
// prim_output_formats.push_back(memory::format::nchw);
}
node
=
insert_input_conversions
(
external_function
,
node
,
prim_input_formats
);
set_output_layouts
(
node
,
prim_output_formats
);
}
else
{
set_default_layouts
(
external_function
,
node
);
}
}
template
<>
void
CPULayout
::
LAYOUT_DECL
(
ngraph
::
op
::
MaxPool
)
{
if
(
runtime
::
cpu
::
mkldnn_utils
::
use_mkldnn_kernel
(
node
.
get
()))
{
auto
max_pool
=
static_cast
<
const
ngraph
::
op
::
MaxPool
*>
(
node
.
get
());
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
result_shape
=
node
->
get_output_shape
(
0
);
auto
filter_shape
=
max_pool
->
get_window_shape
();
auto
filter_strides
=
max_pool
->
get_window_movement_strides
();
auto
padding_below
=
max_pool
->
get_padding_below
();
auto
padding_above
=
max_pool
->
get_padding_above
();
memory
::
data_type
et
=
runtime
::
cpu
::
mkldnn_utils
::
get_mkldnn_data_type
(
node
->
get_input_element_type
(
0
));
algorithm
algorithm_enumerator
=
algorithm
::
pooling_max
;
memory
::
dims
mkldnn_arg0_shape
(
arg0_shape
.
begin
(),
arg0_shape
.
end
());
memory
::
dims
mkldnn_result_shape
(
result_shape
.
begin
(),
result_shape
.
end
());
memory
::
dims
mkldnn_filter_shape
(
filter_shape
.
begin
(),
filter_shape
.
end
());
memory
::
dims
mkldnn_filter_strides
(
filter_strides
.
begin
(),
filter_strides
.
end
());
memory
::
dims
mkldnn_padding_below
(
padding_below
.
begin
(),
padding_below
.
end
());
memory
::
dims
mkldnn_padding_above
(
padding_above
.
begin
(),
padding_above
.
end
());
auto
input_layout
=
runtime
::
cpu
::
mkldnn_utils
::
get_input_mkldnn_format
(
node
.
get
(),
0
);
auto
input_desc
=
memory
::
desc
(
mkldnn_arg0_shape
,
et
,
input_layout
);
auto
result_desc
=
memory
::
desc
(
mkldnn_result_shape
,
et
,
memory
::
format
::
any
);
vector
<
memory
::
format
>
prim_input_formats
;
vector
<
memory
::
format
>
prim_output_formats
;
try
{
auto
prim_desc
=
pooling_forward
::
primitive_desc
(
{
prop_kind
::
forward_inference
,
algorithm_enumerator
,
input_desc
,
result_desc
,
mkldnn_filter_strides
,
mkldnn_filter_shape
,
mkldnn_padding_below
,
mkldnn_padding_above
,
padding_kind
::
zero
},
runtime
::
cpu
::
mkldnn_utils
::
global_cpu_engine
);
prim_input_formats
.
push_back
(
input_layout
);
prim_output_formats
.
push_back
(
static_cast
<
memory
::
format
>
(
prim_desc
.
dst_primitive_desc
().
desc
().
data
.
format
));
// TODO (jbobba): Add workspace layouts here
}
catch
(
const
mkldnn
::
error
&
e
)
{
throw
ngraph_error
(
"MKLDNN Unsupported pooling fwd layout"
+
to_string
(
input_layout
)
+
e
.
message
);
}
node
=
insert_input_conversions
(
external_function
,
node
,
prim_input_formats
);
set_output_layouts
(
node
,
prim_output_formats
);
}
else
{
set_default_layouts
(
external_function
,
node
);
}
}
template
<>
void
CPULayout
::
LAYOUT_DECL
(
ngraph
::
op
::
MaxPoolBackprop
)
{
if
(
runtime
::
cpu
::
mkldnn_utils
::
use_mkldnn_kernel
(
node
.
get
()))
{
auto
max_pool
=
static_cast
<
const
ngraph
::
op
::
MaxPoolBackprop
*>
(
node
.
get
());
// arg 0 - fprop input
// arg 1 - delta
// Propagate fprop's input layout
auto
arg0_shape
=
node
->
get_input_shape
(
0
);
auto
arg1_shape
=
node
->
get_input_shape
(
1
);
auto
result_shape
=
node
->
get_output_shape
(
0
);
auto
filter_shape
=
max_pool
->
get_window_shape
();
auto
filter_strides
=
max_pool
->
get_window_movement_strides
();
auto
padding_below
=
max_pool
->
get_padding_below
();
auto
padding_above
=
max_pool
->
get_padding_above
();
memory
::
data_type
et
=
runtime
::
cpu
::
mkldnn_utils
::
get_mkldnn_data_type
(
node
->
get_input_element_type
(
1
));
algorithm
algorithm_enumerator
=
algorithm
::
pooling_max
;
memory
::
dims
mkldnn_arg0_shape
(
arg0_shape
.
begin
(),
arg0_shape
.
end
());
memory
::
dims
mkldnn_arg1_shape
(
arg1_shape
.
begin
(),
arg1_shape
.
end
());
memory
::
dims
mkldnn_result_shape
(
result_shape
.
begin
(),
result_shape
.
end
());
memory
::
dims
mkldnn_filter_shape
(
filter_shape
.
begin
(),
filter_shape
.
end
());
memory
::
dims
mkldnn_filter_strides
(
filter_strides
.
begin
(),
filter_strides
.
end
());
memory
::
dims
mkldnn_padding_below
(
padding_below
.
begin
(),
padding_below
.
end
());
memory
::
dims
mkldnn_padding_above
(
padding_above
.
begin
(),
padding_above
.
end
());
auto
fprop_input_layout
=
runtime
::
cpu
::
mkldnn_utils
::
get_input_mkldnn_format
(
node
.
get
(),
0
);
auto
diff_dst_desc
=
memory
::
desc
(
mkldnn_arg1_shape
,
et
,
fprop_input_layout
);
auto
diff_src_desc
=
memory
::
desc
(
mkldnn_arg0_shape
,
et
,
memory
::
format
::
any
);
vector
<
memory
::
format
>
prim_input_formats
;
vector
<
memory
::
format
>
prim_output_formats
;
try
{
auto
fwd_prim_desc
=
pooling_forward
::
primitive_desc
(
{
prop_kind
::
forward_training
,
algorithm_enumerator
,
diff_src_desc
,
diff_dst_desc
,
mkldnn_filter_strides
,
mkldnn_filter_shape
,
mkldnn_padding_below
,
mkldnn_padding_above
,
padding_kind
::
zero
},
runtime
::
cpu
::
mkldnn_utils
::
global_cpu_engine
);
auto
prim_desc
=
pooling_backward
::
primitive_desc
(
{
algorithm_enumerator
,
diff_src_desc
,
diff_dst_desc
,
mkldnn_filter_strides
,
mkldnn_filter_shape
,
mkldnn_padding_below
,
mkldnn_padding_above
,
padding_kind
::
zero
},
runtime
::
cpu
::
mkldnn_utils
::
global_cpu_engine
,
fwd_prim_desc
);
prim_input_formats
.
push_back
(
fprop_input_layout
);
prim_input_formats
.
push_back
(
fprop_input_layout
);
prim_output_formats
.
push_back
(
static_cast
<
memory
::
format
>
(
prim_desc
.
diff_src_primitive_desc
().
desc
().
data
.
format
));
}
catch
(
const
mkldnn
::
error
&
e
)
{
throw
ngraph_error
(
"MKLDNN Unsupported pooling layout"
+
to_string
(
fprop_input_layout
)
+
e
.
message
);
}
node
=
...
...
@@ -914,18 +1070,21 @@ namespace ngraph
static
const
runtime
::
cpu
::
pass
::
LayoutOpMap
s_dispatcher
{
{
TI
(
ngraph
::
op
::
Add
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
Add
>
},
{
TI
(
ngraph
::
op
::
AvgPool
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
AvgPool
>
},
{
TI
(
ngraph
::
op
::
AvgPoolBackprop
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
AvgPoolBackprop
>
},
{
TI
(
ngraph
::
op
::
Convolution
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
Convolution
>
},
{
TI
(
ngraph
::
op
::
ConvolutionBackpropData
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
ConvolutionBackpropData
>
},
{
TI
(
ngraph
::
op
::
ConvolutionBackpropFilters
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
ConvolutionBackpropFilters
>
},
{
TI
(
ngraph
::
op
::
MaxPool
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
MaxPool
>
},
{
TI
(
ngraph
::
op
::
MaxPoolBackprop
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
MaxPoolBackprop
>
},
{
TI
(
ngraph
::
op
::
ConvolutionBias
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
ConvolutionBias
>
},
{
TI
(
ngraph
::
op
::
ConvolutionBiasBackpropFiltersBias
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
ConvolutionBiasBackpropFiltersBias
>
},
{
TI
(
ngraph
::
op
::
AvgPool
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
AvgPool
>
},
{
TI
(
ngraph
::
op
::
AvgPoolBackprop
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
AvgPoolBackprop
>
},
{
TI
(
ngraph
::
op
::
BatchNorm
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
BatchNorm
>
},
{
TI
(
ngraph
::
op
::
BatchNormBackprop
),
&
runtime
::
cpu
::
pass
::
CPULayout
::
layout
<
ngraph
::
op
::
BatchNormBackprop
>
},
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment