Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
N
ngraph
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
ngraph
Commits
fdab16db
Unverified
Commit
fdab16db
authored
Dec 13, 2017
by
Adam Procter
Committed by
GitHub
Dec 13, 2017
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Codegen for >2D concat following ref kernel pattern (#296)
parent
c89b1a84
Hide whitespace changes
Inline
Side-by-side
Showing
15 changed files
with
718 additions
and
38 deletions
+718
-38
CMakeLists.txt
src/ngraph/CMakeLists.txt
+2
-0
code_writer.cpp
src/ngraph/codegen/code_writer.cpp
+11
-0
code_writer.hpp
src/ngraph/codegen/code_writer.hpp
+3
-0
coordinate_transform.cpp
src/ngraph/coordinate_transform.cpp
+25
-25
coordinate_transform.hpp
src/ngraph/coordinate_transform.hpp
+7
-2
cpu_emitter.cpp
src/ngraph/runtime/cpu/cpu_emitter.cpp
+44
-0
cpu_emitter.hpp
src/ngraph/runtime/cpu/cpu_emitter.hpp
+2
-0
cpu_external_function.cpp
src/ngraph/runtime/cpu/cpu_external_function.cpp
+1
-0
cpu_kernel_emitters.cpp
src/ngraph/runtime/cpu/cpu_kernel_emitters.cpp
+51
-0
cpu_kernel_emitters.hpp
src/ngraph/runtime/cpu/cpu_kernel_emitters.hpp
+38
-0
cpu_kernel_utils.cpp
src/ngraph/runtime/cpu/cpu_kernel_utils.cpp
+227
-0
cpu_kernel_utils.hpp
src/ngraph/runtime/cpu/cpu_kernel_utils.hpp
+48
-0
CMakeLists.txt
test/CMakeLists.txt
+1
-2
backend_performance.cpp
test/backend_performance.cpp
+120
-0
backend_test.in.cpp
test/backend_test.in.cpp
+138
-9
No files found.
src/ngraph/CMakeLists.txt
View file @
fdab16db
...
...
@@ -151,6 +151,8 @@ if (NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR AND
runtime/cpu/cpu_backend.cpp
runtime/cpu/cpu_manager.cpp
runtime/cpu/cpu_kernels.cpp
runtime/cpu/cpu_kernel_emitters.cpp
runtime/cpu/cpu_kernel_utils.cpp
runtime/cpu/cpu_emitter.cpp
runtime/cpu/cpu_external_function.cpp
runtime/cpu/cpu_tensor_view.cpp
...
...
src/ngraph/codegen/code_writer.cpp
View file @
fdab16db
...
...
@@ -20,6 +20,7 @@ using namespace ngraph;
codegen
::
CodeWriter
::
CodeWriter
()
:
indent
(
0
)
,
m_pending_indent
(
true
)
,
m_temporary_name_count
(
0
)
{
}
...
...
@@ -32,3 +33,13 @@ void codegen::CodeWriter::operator+=(const std::string& s)
{
*
this
<<
s
;
}
std
::
string
codegen
::
CodeWriter
::
generate_temporary_name
(
std
::
string
prefix
)
{
std
::
stringstream
ss
;
ss
<<
prefix
<<
"__"
<<
m_temporary_name_count
;
m_temporary_name_count
++
;
return
ss
.
str
();
}
src/ngraph/codegen/code_writer.hpp
View file @
fdab16db
...
...
@@ -66,7 +66,10 @@ public:
return
out
;
}
std
::
string
generate_temporary_name
(
std
::
string
prefix
=
"tempvar"
);
private
:
std
::
stringstream
m_ss
;
bool
m_pending_indent
;
size_t
m_temporary_name_count
;
};
src/ngraph/coordinate_transform.cpp
View file @
fdab16db
...
...
@@ -30,18 +30,18 @@ inline T ceil_div(T x, T y)
return
(
x
==
0
?
0
:
(
1
+
(
x
-
1
)
/
y
));
}
CoordinateTransform
::
CoordinateTransform
(
const
Shape
&
source_s
pace_s
hape
,
CoordinateTransform
::
CoordinateTransform
(
const
Shape
&
source_shape
,
const
Coordinate
&
source_start_corner
,
const
Coordinate
&
source_end_corner
,
const
Strides
&
source_strides
,
const
AxisVector
&
source_axis_order
)
:
m_source_s
pace_shape
(
source_spa
ce_shape
)
:
m_source_s
hape
(
sour
ce_shape
)
,
m_source_start_corner
(
source_start_corner
)
,
m_source_end_corner
(
source_end_corner
)
,
m_source_strides
(
source_strides
)
,
m_source_axis_order
(
source_axis_order
)
{
m_n_axes
=
source_s
pace_s
hape
.
size
();
m_n_axes
=
source_shape
.
size
();
if
(
m_n_axes
!=
source_start_corner
.
size
())
{
...
...
@@ -80,8 +80,8 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
for
(
size_t
i
=
0
;
i
<
m_n_axes
;
i
++
)
{
if
(
source_start_corner
[
i
]
>=
source_s
pace_s
hape
[
i
]
&&
!
(
source_start_corner
[
i
]
==
0
&&
source_s
pace_s
hape
[
i
]
==
0
))
if
(
source_start_corner
[
i
]
>=
source_shape
[
i
]
&&
!
(
source_start_corner
[
i
]
==
0
&&
source_shape
[
i
]
==
0
))
{
std
::
stringstream
ss
;
...
...
@@ -92,7 +92,7 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
for
(
size_t
i
=
0
;
i
<
m_n_axes
;
i
++
)
{
if
(
source_end_corner
[
i
]
>
source_s
pace_s
hape
[
i
])
if
(
source_end_corner
[
i
]
>
source_shape
[
i
])
{
std
::
stringstream
ss
;
...
...
@@ -120,7 +120,7 @@ CoordinateTransform::CoordinateTransform(const Shape& source_space_shape,
}
}
AxisVector
default_axis_order
(
size_t
n_axes
)
static
AxisVector
default_axis_order
(
size_t
n_axes
)
{
AxisVector
result
(
n_axes
);
size_t
n
=
0
;
...
...
@@ -129,50 +129,50 @@ AxisVector default_axis_order(size_t n_axes)
return
result
;
}
CoordinateTransform
::
CoordinateTransform
(
const
Shape
&
source_s
pace_s
hape
,
CoordinateTransform
::
CoordinateTransform
(
const
Shape
&
source_shape
,
const
Coordinate
&
source_start_corner
,
const
Coordinate
&
source_end_corner
,
const
Strides
&
source_strides
)
:
CoordinateTransform
(
source_s
pace_s
hape
,
:
CoordinateTransform
(
source_shape
,
source_start_corner
,
source_end_corner
,
source_strides
,
default_axis_order
(
source_s
pace_s
hape
.
size
()))
default_axis_order
(
source_shape
.
size
()))
{
}
Strides
default_source_strides
(
size_t
n_axes
)
static
Strides
default_source_strides
(
size_t
n_axes
)
{
return
AxisVector
(
n_axes
,
1
);
}
CoordinateTransform
::
CoordinateTransform
(
const
Shape
&
source_s
pace_s
hape
,
CoordinateTransform
::
CoordinateTransform
(
const
Shape
&
source_shape
,
const
Coordinate
&
source_start_corner
,
const
Coordinate
&
source_end_corner
)
:
CoordinateTransform
(
source_s
pace_s
hape
,
:
CoordinateTransform
(
source_shape
,
source_start_corner
,
source_end_corner
,
default_source_strides
(
source_s
pace_s
hape
.
size
()),
default_axis_order
(
source_s
pace_s
hape
.
size
()))
default_source_strides
(
source_shape
.
size
()),
default_axis_order
(
source_shape
.
size
()))
{
}
Coordinate
default_source_start_corner
(
size_t
n_axes
)
static
Coordinate
default_source_start_corner
(
size_t
n_axes
)
{
return
Coordinate
(
n_axes
,
0
);
}
Coordinate
default_source_end_corner
(
const
Shape
&
source_spa
ce_shape
)
static
Coordinate
default_source_end_corner
(
const
Shape
&
sour
ce_shape
)
{
return
source_s
pace_s
hape
;
return
source_shape
;
}
CoordinateTransform
::
CoordinateTransform
(
const
Shape
&
source_s
pace_s
hape
)
:
CoordinateTransform
(
source_s
pace_s
hape
,
default_source_start_corner
(
source_s
pace_s
hape
.
size
()),
default_source_end_corner
(
source_s
pace_s
hape
),
default_source_strides
(
source_s
pace_s
hape
.
size
()),
default_axis_order
(
source_s
pace_s
hape
.
size
()))
CoordinateTransform
::
CoordinateTransform
(
const
Shape
&
source_shape
)
:
CoordinateTransform
(
source_shape
,
default_source_start_corner
(
source_shape
.
size
()),
default_source_end_corner
(
source_shape
),
default_source_strides
(
source_shape
.
size
()),
default_axis_order
(
source_shape
.
size
()))
{
}
...
...
@@ -185,7 +185,7 @@ size_t CoordinateTransform::index_source(const Coordinate& c) const
for
(
size_t
axis
=
m_n_axes
;
axis
--
>
0
;)
{
index
+=
c
[
axis
]
*
stride
;
stride
*=
m_source_s
pace_s
hape
[
axis
];
stride
*=
m_source_shape
[
axis
];
}
return
index
;
...
...
src/ngraph/coordinate_transform.hpp
View file @
fdab16db
...
...
@@ -41,12 +41,17 @@ namespace ngraph
const
Coordinate
&
source_start_corner
,
const
Coordinate
&
source_end_corner
);
CoordinateTransform
(
const
Shape
&
source_s
pace_s
hape
);
CoordinateTransform
(
const
Shape
&
source_shape
);
size_t
index
(
const
Coordinate
&
c
)
const
;
bool
in_bounds
(
const
Coordinate
&
c
)
const
;
Coordinate
get_target_shape
()
const
;
Shape
get_source_shape
()
{
return
m_source_shape
;
}
Coordinate
get_source_start_corner
()
{
return
m_source_start_corner
;
}
Coordinate
get_source_end_corner
()
{
return
m_source_end_corner
;
}
Strides
get_source_strides
()
{
return
m_source_strides
;
}
AxisVector
get_source_axis_order
()
{
return
m_source_axis_order
;
}
class
Iterator
{
public
:
...
...
@@ -73,7 +78,7 @@ namespace ngraph
Coordinate
to_source_coordinate
(
const
Coordinate
&
c
)
const
;
size_t
index_source
(
const
Coordinate
&
c
)
const
;
Shape
m_source_s
pace_s
hape
;
Shape
m_source_shape
;
Shape
m_source_start_corner
;
Shape
m_source_end_corner
;
Strides
m_source_strides
;
...
...
src/ngraph/runtime/cpu/cpu_emitter.cpp
View file @
fdab16db
...
...
@@ -34,6 +34,7 @@
#include "ngraph/ops/slice.hpp"
#include "ngraph/ops/sum.hpp"
#include "ngraph/runtime/cpu/cpu_emitter.hpp"
#include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp"
#include "ngraph/util.hpp"
using
namespace
std
;
...
...
@@ -250,6 +251,49 @@ void runtime::cpu::CPU_Emitter::EmitConcat(const ngraph::Node* n,
m_out
.
indent
--
;
m_out
<<
"}
\n
"
;
}
else
{
if
(
m_use_ref_kernels
)
{
auto
axis
=
(
dynamic_cast
<
const
op
::
Concat
*>
(
n
))
->
get_concatenation_axis
();
std
::
vector
<
std
::
string
>
arg_names
;
std
::
vector
<
std
::
string
>
arg_shape_strings
;
for
(
auto
arg
:
args
)
{
arg_names
.
push_back
(
arg
.
get_name
());
arg_shape_strings
.
push_back
(
"{"
+
join
(
arg
.
get_shape
())
+
"}"
);
}
m_out
<<
"kernel::concat<"
<<
out
[
0
].
get_type
()
<<
">({"
<<
join
(
arg_names
)
<<
"},
\n
"
;
m_out
<<
" "
<<
out
[
0
].
get_name
()
<<
",
\n
"
;
m_out
<<
" {"
<<
join
(
arg_shape_strings
)
<<
"},
\n
"
;
m_out
<<
" {"
<<
join
(
result_shape
)
<<
"},
\n
"
;
m_out
<<
" "
<<
axis
<<
");
\n
"
;
}
else
{
auto
axis
=
(
dynamic_cast
<
const
op
::
Concat
*>
(
n
))
->
get_concatenation_axis
();
std
::
vector
<
std
::
string
>
arg_names
;
std
::
vector
<
Shape
>
arg_shapes
;
for
(
auto
arg
:
args
)
{
arg_names
.
push_back
(
arg
.
get_name
());
arg_shapes
.
push_back
(
arg
.
get_shape
());
}
kernels
::
emit_concat
(
m_out
,
args
[
0
].
get_element_type
().
c_type_string
(),
arg_names
,
out
[
0
].
get_name
(),
arg_shapes
,
result_shape
,
axis
);
}
}
}
void
runtime
::
cpu
::
CPU_Emitter
::
EmitDivide
(
const
ngraph
::
Node
*
n
,
...
...
src/ngraph/runtime/cpu/cpu_emitter.hpp
View file @
fdab16db
...
...
@@ -37,10 +37,12 @@ namespace ngraph
{
protected
:
codegen
::
CodeWriter
m_out
;
bool
m_use_ref_kernels
;
public
:
CPU_Emitter
()
:
m_out
()
,
m_use_ref_kernels
(
std
::
getenv
(
"NGRAPH_CPU_USE_REF_KERNELS"
)
!=
nullptr
)
{
}
std
::
string
get_code
()
{
return
m_out
.
get_code
();
}
...
...
src/ngraph/runtime/cpu/cpu_external_function.cpp
View file @
fdab16db
...
...
@@ -207,6 +207,7 @@ void runtime::cpu::CPU_ExternalFunction::compile()
#include "ngraph/runtime/cpu/cpu_eigen_utils.hpp"
#include "ngraph/runtime/cpu/cpu_kernels.hpp"
#include "ngraph/runtime/kernel/broadcast.hpp"
#include "ngraph/runtime/kernel/concat.hpp"
#include "ngraph/runtime/kernel/dot.hpp"
#include "ngraph/runtime/kernel/one_hot.hpp"
#include "ngraph/runtime/kernel/reduce.hpp"
...
...
src/ngraph/runtime/cpu/cpu_kernel_emitters.cpp
0 → 100644
View file @
fdab16db
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include "ngraph/runtime/cpu/cpu_kernel_emitters.hpp"
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/runtime/cpu/cpu_kernel_utils.hpp"
using
namespace
ngraph
;
using
namespace
ngraph
::
runtime
::
cpu
::
kernels
;
//
// For the reference kernel this is based on, see ngraph/runtime/kernel/concat.hpp.
//
void
ngraph
::
runtime
::
cpu
::
kernels
::
emit_concat
(
codegen
::
CodeWriter
&
writer
,
std
::
string
element_type
,
const
std
::
vector
<
std
::
string
>
args
,
std
::
string
out
,
const
std
::
vector
<
Shape
>&
in_shapes
,
const
Shape
&
out_shape
,
size_t
concatenation_axis
)
{
size_t
concatenation_pos
=
0
;
for
(
size_t
i
=
0
;
i
<
args
.
size
();
i
++
)
{
Coordinate
out_start_coord
=
Coordinate
(
out_shape
.
size
(),
0
);
out_start_coord
[
concatenation_axis
]
=
concatenation_pos
;
Coordinate
out_end_coord
=
out_shape
;
out_end_coord
[
concatenation_axis
]
=
concatenation_pos
+
in_shapes
[
i
][
concatenation_axis
];
CoordinateTransform
input_transform
(
in_shapes
[
i
]);
CoordinateTransform
output_chunk_transform
(
out_shape
,
out_start_coord
,
out_end_coord
);
emit_pointwise_copy
(
writer
,
element_type
,
args
[
i
],
out
,
input_transform
,
output_chunk_transform
);
concatenation_pos
+=
in_shapes
[
i
][
concatenation_axis
];
}
}
src/ngraph/runtime/cpu/cpu_kernel_emitters.hpp
0 → 100644
View file @
fdab16db
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/common.hpp"
namespace
ngraph
{
namespace
runtime
{
namespace
cpu
{
namespace
kernels
{
void
emit_concat
(
codegen
::
CodeWriter
&
writer
,
std
::
string
element_type
,
const
std
::
vector
<
std
::
string
>
args
,
std
::
string
out
,
const
std
::
vector
<
Shape
>&
in_shapes
,
const
Shape
&
out_shape
,
size_t
concatenation_axis
);
}
}
}
}
src/ngraph/runtime/cpu/cpu_kernel_utils.cpp
0 → 100644
View file @
fdab16db
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#include "ngraph/runtime/cpu/cpu_kernel_utils.hpp"
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/common.hpp"
#include "ngraph/coordinate_transform.hpp"
#include "ngraph/util.hpp"
using
namespace
ngraph
;
using
namespace
ngraph
::
runtime
::
cpu
::
kernels
;
//
// Given a coordinate transform and a vector of index expressions relative to
// the target coordinate space, produces the strings needed to index into the
// source coordinate space if it is represented as a multidimensional array.
//
// For example,
//
// trans has stride (2,2,2), axis order (2,0,1), and start offsets (3,4,5)
//
// index_vars are "i", "j", "k"
//
// this will produce:
//
// {"((k) * 2 + 5)", "((i) * 2 + 3)", "((j) * 2 + 4)"}
//
//
std
::
vector
<
std
::
string
>
ngraph
::
runtime
::
cpu
::
kernels
::
emit_multi_indices
(
CoordinateTransform
trans
,
std
::
vector
<
std
::
string
>
index_vars
)
{
std
::
vector
<
std
::
string
>
result
;
for
(
size_t
i
=
0
;
i
<
index_vars
.
size
();
i
++
)
{
std
::
string
index_var
=
index_vars
[
trans
.
get_source_axis_order
()[
i
]];
size_t
source_stride
=
trans
.
get_source_strides
()[
i
];
size_t
source_start
=
trans
.
get_source_start_corner
()[
i
];
std
::
stringstream
ss
;
if
(
source_stride
==
1
&&
source_start
==
0
)
{
ss
<<
index_var
;
}
else
if
(
source_stride
==
1
)
{
ss
<<
"(("
<<
index_var
<<
") + "
<<
source_start
<<
")"
;
}
else
if
(
source_start
==
0
)
{
ss
<<
"("
<<
source_stride
<<
" * ("
<<
index_var
<<
"))"
;
}
else
{
ss
<<
"("
<<
source_stride
<<
" * ("
<<
index_var
<<
") + "
<<
source_start
<<
")"
;
}
result
.
push_back
(
ss
.
str
());
}
return
result
;
}
//
// Given a coordinate transform and a vector of index expressions relative to
// the target coordinate space, produces the strings needed to index into the
// source coordinate space if it is represented as a multidimensional array.
//
// For example,
//
// trans has source shape (2,2,2) stride (2,2,2), axis order (2,0,1),
// and start offsets (3,4,5)
//
// index_vars are "i", "j", "k"
//
// this will produce:
//
// "((4 * ((k) * 2 + 5)) + (2 * ((i) * 2 + 3)) + ((j) * 2 + 4))"
//
//
std
::
string
ngraph
::
runtime
::
cpu
::
kernels
::
emit_linear_index
(
CoordinateTransform
trans
,
std
::
vector
<
std
::
string
>
index_vars
)
{
std
::
vector
<
std
::
string
>
multi_indices
=
emit_multi_indices
(
trans
,
index_vars
);
size_t
stride
=
1
;
for
(
size_t
i
=
index_vars
.
size
();
i
--
>
0
;)
{
// No need to do this (multiply by stride) if it's 1, though it wouldn't hurt anything.
if
(
stride
!=
1
)
{
std
::
stringstream
ss
;
ss
<<
"("
<<
stride
<<
" * "
<<
multi_indices
[
i
]
<<
")"
;
multi_indices
[
i
]
=
ss
.
str
();
}
stride
*=
trans
.
get_source_shape
()[
i
];
}
std
::
stringstream
ss
;
ss
<<
"("
<<
join
(
multi_indices
,
" + "
)
<<
")"
;
return
ss
.
str
();
}
//
// Begins an indexing loop (just a for-loop) with index_var as the index
// variable, starting at start, continuing while [index_var] < [end].
//
// Optionally emits an OpenMP parallel pragma, if "omp" is true.
//
std
::
string
ngraph
::
runtime
::
cpu
::
kernels
::
start_index_loop
(
std
::
string
index_var
,
size_t
start
,
size_t
end
,
bool
omp
)
{
std
::
stringstream
ss
;
if
(
omp
)
{
ss
<<
"#pragma omp parallel for
\n
"
;
}
ss
<<
"for(size_t "
<<
index_var
<<
" = "
<<
start
<<
"; "
<<
index_var
<<
" < "
<<
end
<<
"; "
<<
index_var
<<
"++)
\n
"
<<
"{
\n
"
;
return
ss
.
str
();
}
//
// Ends an indexing loop on the index variable [index_var].
//
std
::
string
ngraph
::
runtime
::
cpu
::
kernels
::
end_index_loop
(
std
::
string
index_var
)
{
std
::
stringstream
ss
;
ss
<<
"} // end for("
<<
index_var
<<
")
\n
"
;
return
ss
.
str
();
}
std
::
string
ngraph
::
runtime
::
cpu
::
kernels
::
emit_nd_sizes
(
CoordinateTransform
trans
)
{
std
::
stringstream
ss
;
for
(
size_t
s
:
trans
.
get_source_shape
())
{
ss
<<
"["
<<
s
<<
"]"
;
}
return
ss
.
str
();
}
std
::
string
ngraph
::
runtime
::
cpu
::
kernels
::
emit_nd_index
(
CoordinateTransform
trans
,
std
::
vector
<
std
::
string
>
index_vars
)
{
std
::
stringstream
ss
;
for
(
std
::
string
index
:
emit_multi_indices
(
trans
,
index_vars
))
{
ss
<<
"["
<<
index
<<
"]"
;
}
return
ss
.
str
();
}
//
// Emits a pointwise copy from source_buffer mediated by in_trans, to
// dest_buffer mediated by dest_trans.
//
void
ngraph
::
runtime
::
cpu
::
kernels
::
emit_pointwise_copy
(
codegen
::
CodeWriter
&
writer
,
std
::
string
element_type
,
std
::
string
source_buffer
,
std
::
string
dest_buffer
,
CoordinateTransform
source_trans
,
CoordinateTransform
dest_trans
)
{
std
::
vector
<
std
::
string
>
index_vars
;
Shape
source_start_corner
=
source_trans
.
get_source_start_corner
();
Shape
source_end_corner
=
source_trans
.
get_source_end_corner
();
size_t
n_axes
=
source_start_corner
.
size
();
std
::
string
source_nd_name
=
writer
.
generate_temporary_name
(
"source_nd"
);
std
::
string
dest_nd_name
=
writer
.
generate_temporary_name
(
"dest_nd"
);
writer
<<
element_type
<<
"(&"
<<
source_nd_name
<<
")"
<<
emit_nd_sizes
(
source_trans
)
<<
" = *reinterpret_cast<"
<<
element_type
<<
"(*)"
<<
emit_nd_sizes
(
source_trans
)
<<
">("
<<
source_buffer
<<
");
\n
"
;
writer
<<
element_type
<<
"(&"
<<
dest_nd_name
<<
")"
<<
emit_nd_sizes
(
dest_trans
)
<<
" = *reinterpret_cast<"
<<
element_type
<<
"(*)"
<<
emit_nd_sizes
(
dest_trans
)
<<
">("
<<
dest_buffer
<<
");
\n
"
;
for
(
size_t
i
=
0
;
i
<
n_axes
;
i
++
)
{
std
::
string
index_var
=
writer
.
generate_temporary_name
(
"i"
);
writer
<<
start_index_loop
(
index_var
,
source_start_corner
[
i
],
source_end_corner
[
i
],
i
==
0
);
writer
.
indent
++
;
index_vars
.
push_back
(
index_var
);
}
writer
<<
dest_nd_name
<<
emit_nd_index
(
dest_trans
,
index_vars
)
<<
" = "
<<
source_nd_name
<<
emit_nd_index
(
source_trans
,
index_vars
)
<<
";
\n
"
;
for
(
size_t
i
=
n_axes
;
i
--
>
0
;)
{
writer
.
indent
--
;
writer
<<
end_index_loop
(
index_vars
[
i
]);
}
}
src/ngraph/runtime/cpu/cpu_kernel_utils.hpp
0 → 100644
View file @
fdab16db
// ----------------------------------------------------------------------------
// Copyright 2017 Nervana Systems Inc.
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// ----------------------------------------------------------------------------
#pragma once
#include "ngraph/codegen/code_writer.hpp"
#include "ngraph/common.hpp"
#include "ngraph/coordinate_transform.hpp"
namespace
ngraph
{
namespace
runtime
{
namespace
cpu
{
namespace
kernels
{
std
::
vector
<
std
::
string
>
emit_multi_indices
(
CoordinateTransform
trans
,
std
::
vector
<
std
::
string
>
index_vars
);
std
::
string
emit_linear_index
(
CoordinateTransform
trans
,
std
::
vector
<
std
::
string
>
index_vars
);
std
::
string
start_index_loop
(
std
::
string
index_var
,
size_t
start
,
size_t
end
,
bool
omp
);
std
::
string
end_index_loop
(
std
::
string
index_var
);
std
::
string
emit_nd_sizes
(
CoordinateTransform
trans
);
std
::
string
emit_nd_index
(
CoordinateTransform
trans
,
std
::
vector
<
std
::
string
>
index_vars
);
void
emit_pointwise_copy
(
codegen
::
CodeWriter
&
writer
,
std
::
string
element_type
,
std
::
string
source_buffer
,
std
::
string
dest_buffer
,
CoordinateTransform
source_trans
,
CoordinateTransform
dest_trans
);
}
}
}
}
test/CMakeLists.txt
View file @
fdab16db
...
...
@@ -22,7 +22,6 @@ include_directories(
)
set
(
SRC
backend_performance.cpp
builder.cpp
builder_autobroadcast.cpp
build_graph.cpp
...
...
@@ -69,7 +68,7 @@ endif()
if
(
NGRAPH_CPU_ENABLE AND LLVM_INCLUDE_DIR
)
include_directories
(
SYSTEM
${
LLVM_INCLUDE_DIR
}
)
link_directories
(
${
LLVM_LIB_DIR
}
)
set
(
SRC
${
SRC
}
codegen.cpp
)
set
(
SRC
${
SRC
}
backend_performance.cpp
codegen.cpp
)
set
(
BACKEND_NAMES
${
BACKEND_NAMES
}
"CPU"
)
endif
()
...
...
test/backend_performance.cpp
View file @
fdab16db
...
...
@@ -22,6 +22,7 @@
#include "ngraph/codegen/execution_engine.hpp"
#include "ngraph/file_util.hpp"
#include "ngraph/log.hpp"
#include "ngraph/ops/concatenate.hpp"
#include "ngraph/runtime/backend.hpp"
#include "ngraph/runtime/call_frame.hpp"
#include "ngraph/runtime/cpu/cpu_call_frame.hpp"
...
...
@@ -33,6 +34,13 @@
using
namespace
std
;
using
namespace
ngraph
;
template
<
typename
T
>
static
void
copy_data
(
shared_ptr
<
runtime
::
TensorView
>
tv
,
const
vector
<
T
>&
data
)
{
size_t
data_size
=
data
.
size
()
*
sizeof
(
T
);
tv
->
write
(
data
.
data
(),
0
,
data_size
);
}
// Starting point CPU: 1.2ms/iteration
shared_ptr
<
runtime
::
TensorView
>
make_tensor
(
runtime
::
Backend
&
backend
,
const
ValueType
&
value
)
...
...
@@ -124,3 +132,115 @@ TEST(benchmark, mxnet_10_bucket_lstm)
NGRAPH_INFO
<<
p
.
name
()
<<
", "
<<
p
.
total_microseconds
();
}
}
//
// Benchmarks a graph that concatenates six 32x1x200 arrays along the middle axis.
//
TEST
(
benchmark
,
concat_32x1x200_axis1_6
)
{
const
size_t
n_arrays
=
6
;
Shape
shape_of_each_array
=
Shape
{
32
,
1
,
200
};
size_t
concatenation_axis
=
1
;
Shape
result_shape
;
result_shape
=
shape_of_each_array
;
result_shape
[
concatenation_axis
]
*=
n_arrays
;
size_t
elements_per_array
=
1
;
for
(
size_t
d
:
shape_of_each_array
)
{
elements_per_array
*=
d
;
}
vector
<
vector
<
float
>>
data_arrays
(
n_arrays
);
for
(
size_t
i
=
0
;
i
<
n_arrays
;
i
++
)
{
data_arrays
[
i
]
=
vector
<
float
>
(
elements_per_array
);
for
(
size_t
j
=
0
;
j
<
elements_per_array
;
j
++
)
{
data_arrays
[
i
][
j
]
=
float
(
j
+
1
);
}
}
bool
using_ref_kernels
=
(
std
::
getenv
(
"NGRAPH_CPU_USE_REF_KERNELS"
)
!=
nullptr
);
vector
<
std
::
string
>
backend_names
{
"INTERPRETER"
,
"NGVM"
,
"CPU"
};
vector
<
int
>
n_runs
{
200
,
200
,
using_ref_kernels
?
200
:
200000
};
// one for each backend
vector
<
std
::
function
<
void
()
>>
test_callbacks
;
// one for each backend
vector
<
std
::
shared_ptr
<
runtime
::
TensorView
>>
result_tvs
;
// one for each backend
for
(
std
::
string
backend_name
:
backend_names
)
{
vector
<
std
::
shared_ptr
<
op
::
Parameter
>>
params
(
n_arrays
);
vector
<
std
::
shared_ptr
<
Node
>>
params_as_nodes
(
n_arrays
);
for
(
size_t
i
=
0
;
i
<
n_arrays
;
i
++
)
{
auto
param
=
make_shared
<
op
::
Parameter
>
(
make_shared
<
TensorViewType
>
(
element
::
Float32
::
element_type
(),
shape_of_each_array
));
params
[
i
]
=
param
;
params_as_nodes
[
i
]
=
param
;
}
auto
concat
=
make_shared
<
op
::
Concat
>
(
params_as_nodes
,
concatenation_axis
);
auto
f
=
make_shared
<
Function
>
(
concat
,
params
);
auto
manager
=
runtime
::
Manager
::
get
(
backend_name
);
auto
external
=
manager
->
compile
(
f
);
auto
backend
=
manager
->
allocate_backend
();
auto
cf
=
backend
->
make_call_frame
(
external
);
vector
<
shared_ptr
<
runtime
::
Value
>>
input_vals
;
for
(
size_t
i
=
0
;
i
<
n_arrays
;
i
++
)
{
auto
tv
=
backend
->
make_primary_tensor_view
(
element
::
Float32
::
element_type
(),
shape_of_each_array
);
copy_data
(
tv
,
data_arrays
[
i
]);
input_vals
.
push_back
(
tv
);
}
auto
result_tv
=
backend
->
make_primary_tensor_view
(
element
::
Float32
::
element_type
(),
result_shape
);
result_tvs
.
push_back
(
result_tv
);
std
::
function
<
void
()
>
cb
=
[
input_vals
,
result_tv
,
cf
]()
{
cf
->
call
(
input_vals
,
{
result_tv
});
};
test_callbacks
.
push_back
(
cb
);
}
for
(
size_t
i
=
0
;
i
<
backend_names
.
size
();
i
++
)
{
std
::
cout
<<
backend_names
[
i
]
<<
": "
<<
n_runs
[
i
]
<<
" tests in "
<<
std
::
flush
;
stopwatch
sw
;
std
::
function
<
void
()
>
cb
=
test_callbacks
[
i
];
sw
.
start
();
for
(
int
j
=
0
;
j
<
n_runs
[
i
];
j
++
)
{
cb
();
}
sw
.
stop
();
std
::
cout
<<
sw
.
get_milliseconds
()
<<
"ms ("
<<
(
sw
.
get_microseconds
()
/
n_runs
[
i
])
<<
" us/test)"
<<
std
::
endl
;
}
for
(
size_t
i
=
1
;
i
<
backend_names
.
size
();
i
++
)
{
std
::
cout
<<
"Verifying "
<<
backend_names
[
i
]
<<
" result against "
<<
backend_names
[
0
]
<<
"..."
<<
std
::
flush
;
if
(
result_tvs
[
i
]
->
get_vector
<
float
>
()
==
result_tvs
[
0
]
->
get_vector
<
float
>
())
{
std
::
cout
<<
" OK"
<<
std
::
endl
;
}
else
{
std
::
cout
<<
" FAILED"
<<
std
::
endl
;
ADD_FAILURE
();
}
}
}
test/backend_test.in.cpp
View file @
fdab16db
...
...
@@ -21,6 +21,7 @@
#include "ngraph/log.hpp"
#include "ngraph/ngraph.hpp"
#include "ngraph/serializer.hpp"
#include "util/all_close.hpp"
#include "util/ndarray.hpp"
using
namespace
std
;
...
...
@@ -441,6 +442,134 @@ TEST(${BACKEND_NAME}, concat_vector)
result
->
get_vector
<
float
>
());
}
// from numpy import *
// a=linspace(1,2*3*4*3*2,2*3*4*3*2)
// b=linspace(1000+1,1000+2*3*3*3*2,2*3*3*3*2)
// c=linspace(2000+1,2000+2*3*2*3*2,2*3*2*3*2)
// a.shape=(2,3,4,3,2)
// b.shape=(2,3,3,3,2)
// c.shape=(2,3,2,3,2)
// z=concatenate((a,b,c),axis=2)
// z.shape=(2*3*(4+3+2)*3*2)
// set_printoptions(suppress=True)
// print(z)
//
// [ 1. 2. 3. 4. 5. 6. 7. 8. 9. 10.
// 11. 12. 13. 14. 15. 16. 17. 18. 19. 20.
// 21. 22. 23. 24. 1001. 1002. 1003. 1004. 1005. 1006.
// 1007. 1008. 1009. 1010. 1011. 1012. 1013. 1014. 1015. 1016.
// 1017. 1018. 2001. 2002. 2003. 2004. 2005. 2006. 2007. 2008.
// 2009. 2010. 2011. 2012. 25. 26. 27. 28. 29. 30.
// 31. 32. 33. 34. 35. 36. 37. 38. 39. 40.
// 41. 42. 43. 44. 45. 46. 47. 48. 1019. 1020.
// 1021. 1022. 1023. 1024. 1025. 1026. 1027. 1028. 1029. 1030.
// 1031. 1032. 1033. 1034. 1035. 1036. 2013. 2014. 2015. 2016.
// 2017. 2018. 2019. 2020. 2021. 2022. 2023. 2024. 49. 50.
// 51. 52. 53. 54. 55. 56. 57. 58. 59. 60.
// 61. 62. 63. 64. 65. 66. 67. 68. 69. 70.
// 71. 72. 1037. 1038. 1039. 1040. 1041. 1042. 1043. 1044.
// 1045. 1046. 1047. 1048. 1049. 1050. 1051. 1052. 1053. 1054.
// 2025. 2026. 2027. 2028. 2029. 2030. 2031. 2032. 2033. 2034.
// 2035. 2036. 73. 74. 75. 76. 77. 78. 79. 80.
// 81. 82. 83. 84. 85. 86. 87. 88. 89. 90.
// 91. 92. 93. 94. 95. 96. 1055. 1056. 1057. 1058.
// 1059. 1060. 1061. 1062. 1063. 1064. 1065. 1066. 1067. 1068.
// 1069. 1070. 1071. 1072. 2037. 2038. 2039. 2040. 2041. 2042.
// 2043. 2044. 2045. 2046. 2047. 2048. 97. 98. 99. 100.
// 101. 102. 103. 104. 105. 106. 107. 108. 109. 110.
// 111. 112. 113. 114. 115. 116. 117. 118. 119. 120.
// 1073. 1074. 1075. 1076. 1077. 1078. 1079. 1080. 1081. 1082.
// 1083. 1084. 1085. 1086. 1087. 1088. 1089. 1090. 2049. 2050.
// 2051. 2052. 2053. 2054. 2055. 2056. 2057. 2058. 2059. 2060.
// 121. 122. 123. 124. 125. 126. 127. 128. 129. 130.
// 131. 132. 133. 134. 135. 136. 137. 138. 139. 140.
// 141. 142. 143. 144. 1091. 1092. 1093. 1094. 1095. 1096.
// 1097. 1098. 1099. 1100. 1101. 1102. 1103. 1104. 1105. 1106.
// 1107. 1108. 2061. 2062. 2063. 2064. 2065. 2066. 2067. 2068.
// 2069. 2070. 2071. 2072.]
TEST
(
$
{
BACKEND_NAME
},
concat_5d
)
{
vector
<
float
>
a_data
(
2
*
3
*
4
*
3
*
2
);
for
(
int
i
=
0
;
i
<
2
*
3
*
4
*
3
*
2
;
i
++
)
{
a_data
[
i
]
=
float
(
i
+
1
);
}
vector
<
float
>
b_data
(
2
*
3
*
3
*
3
*
2
);
for
(
int
i
=
0
;
i
<
2
*
3
*
3
*
3
*
2
;
i
++
)
{
b_data
[
i
]
=
1000
+
float
(
i
+
1
);
}
vector
<
float
>
c_data
(
2
*
3
*
2
*
3
*
2
);
for
(
int
i
=
0
;
i
<
2
*
3
*
2
*
3
*
2
;
i
++
)
{
c_data
[
i
]
=
2000
+
float
(
i
+
1
);
}
auto
shape_a
=
Shape
{
2
,
3
,
4
,
3
,
2
};
auto
A
=
make_shared
<
op
::
Parameter
>
(
make_shared
<
TensorViewType
>
(
element
::
Float32
::
element_type
(),
shape_a
));
auto
shape_b
=
Shape
{
2
,
3
,
3
,
3
,
2
};
auto
B
=
make_shared
<
op
::
Parameter
>
(
make_shared
<
TensorViewType
>
(
element
::
Float32
::
element_type
(),
shape_b
));
auto
shape_c
=
Shape
{
2
,
3
,
2
,
3
,
2
};
auto
C
=
make_shared
<
op
::
Parameter
>
(
make_shared
<
TensorViewType
>
(
element
::
Float32
::
element_type
(),
shape_c
));
auto
shape_r
=
Shape
{
2
,
3
,
9
,
3
,
2
};
auto
rt
=
make_shared
<
TensorViewType
>
(
element
::
Float32
::
element_type
(),
shape_r
);
auto
r
=
make_shared
<
op
::
Concat
>
(
Nodes
{
A
,
B
,
C
},
2
);
auto
f
=
make_shared
<
Function
>
(
r
,
rt
,
op
::
Parameters
{
A
,
B
,
C
});
auto
manager
=
runtime
::
Manager
::
get
(
"${BACKEND_NAME}"
);
auto
external
=
manager
->
compile
(
f
);
auto
backend
=
manager
->
allocate_backend
();
auto
cf
=
backend
->
make_call_frame
(
external
);
// Create some tensors for input/output
auto
a
=
backend
->
make_primary_tensor_view
(
element
::
Float32
::
element_type
(),
shape_a
);
copy_data
(
a
,
a_data
);
auto
b
=
backend
->
make_primary_tensor_view
(
element
::
Float32
::
element_type
(),
shape_b
);
copy_data
(
b
,
b_data
);
auto
c
=
backend
->
make_primary_tensor_view
(
element
::
Float32
::
element_type
(),
shape_c
);
copy_data
(
c
,
c_data
);
auto
result
=
backend
->
make_primary_tensor_view
(
element
::
Float32
::
element_type
(),
shape_r
);
cf
->
call
({
a
,
b
,
c
},
{
result
});
EXPECT_EQ
(
(
vector
<
float
>
{
1.
,
2.
,
3.
,
4.
,
5.
,
6.
,
7.
,
8.
,
9.
,
10.
,
11.
,
12.
,
13.
,
14.
,
15.
,
16.
,
17.
,
18.
,
19.
,
20.
,
21.
,
22.
,
23.
,
24.
,
1001.
,
1002.
,
1003.
,
1004.
,
1005.
,
1006.
,
1007.
,
1008.
,
1009.
,
1010.
,
1011.
,
1012.
,
1013.
,
1014.
,
1015.
,
1016.
,
1017.
,
1018.
,
2001.
,
2002.
,
2003.
,
2004.
,
2005.
,
2006.
,
2007.
,
2008.
,
2009.
,
2010.
,
2011.
,
2012.
,
25.
,
26.
,
27.
,
28.
,
29.
,
30.
,
31.
,
32.
,
33.
,
34.
,
35.
,
36.
,
37.
,
38.
,
39.
,
40.
,
41.
,
42.
,
43.
,
44.
,
45.
,
46.
,
47.
,
48.
,
1019.
,
1020.
,
1021.
,
1022.
,
1023.
,
1024.
,
1025.
,
1026.
,
1027.
,
1028.
,
1029.
,
1030.
,
1031.
,
1032.
,
1033.
,
1034.
,
1035.
,
1036.
,
2013.
,
2014.
,
2015.
,
2016.
,
2017.
,
2018.
,
2019.
,
2020.
,
2021.
,
2022.
,
2023.
,
2024.
,
49.
,
50.
,
51.
,
52.
,
53.
,
54.
,
55.
,
56.
,
57.
,
58.
,
59.
,
60.
,
61.
,
62.
,
63.
,
64.
,
65.
,
66.
,
67.
,
68.
,
69.
,
70.
,
71.
,
72.
,
1037.
,
1038.
,
1039.
,
1040.
,
1041.
,
1042.
,
1043.
,
1044.
,
1045.
,
1046.
,
1047.
,
1048.
,
1049.
,
1050.
,
1051.
,
1052.
,
1053.
,
1054.
,
2025.
,
2026.
,
2027.
,
2028.
,
2029.
,
2030.
,
2031.
,
2032.
,
2033.
,
2034.
,
2035.
,
2036.
,
73.
,
74.
,
75.
,
76.
,
77.
,
78.
,
79.
,
80.
,
81.
,
82.
,
83.
,
84.
,
85.
,
86.
,
87.
,
88.
,
89.
,
90.
,
91.
,
92.
,
93.
,
94.
,
95.
,
96.
,
1055.
,
1056.
,
1057.
,
1058.
,
1059.
,
1060.
,
1061.
,
1062.
,
1063.
,
1064.
,
1065.
,
1066.
,
1067.
,
1068.
,
1069.
,
1070.
,
1071.
,
1072.
,
2037.
,
2038.
,
2039.
,
2040.
,
2041.
,
2042.
,
2043.
,
2044.
,
2045.
,
2046.
,
2047.
,
2048.
,
97.
,
98.
,
99.
,
100.
,
101.
,
102.
,
103.
,
104.
,
105.
,
106.
,
107.
,
108.
,
109.
,
110.
,
111.
,
112.
,
113.
,
114.
,
115.
,
116.
,
117.
,
118.
,
119.
,
120.
,
1073.
,
1074.
,
1075.
,
1076.
,
1077.
,
1078.
,
1079.
,
1080.
,
1081.
,
1082.
,
1083.
,
1084.
,
1085.
,
1086.
,
1087.
,
1088.
,
1089.
,
1090.
,
2049.
,
2050.
,
2051.
,
2052.
,
2053.
,
2054.
,
2055.
,
2056.
,
2057.
,
2058.
,
2059.
,
2060.
,
121.
,
122.
,
123.
,
124.
,
125.
,
126.
,
127.
,
128.
,
129.
,
130.
,
131.
,
132.
,
133.
,
134.
,
135.
,
136.
,
137.
,
138.
,
139.
,
140.
,
141.
,
142.
,
143.
,
144.
,
1091.
,
1092.
,
1093.
,
1094.
,
1095.
,
1096.
,
1097.
,
1098.
,
1099.
,
1100.
,
1101.
,
1102.
,
1103.
,
1104.
,
1105.
,
1106.
,
1107.
,
1108.
,
2061.
,
2062.
,
2063.
,
2064.
,
2065.
,
2066.
,
2067.
,
2068.
,
2069.
,
2070.
,
2071.
,
2072.
}),
result
->
get_vector
<
float
>
());
}
TEST
(
$
{
BACKEND_NAME
},
divide
)
{
auto
manager
=
runtime
::
Manager
::
get
(
"${BACKEND_NAME}"
);
...
...
@@ -2594,7 +2723,7 @@ TEST(DISABLED_${BACKEND_NAME}, reshape_6d)
auto
result
=
backend
->
make_primary_tensor_view
(
element
::
Float32
::
element_type
(),
shape_r
);
cf
->
call
({
a
},
{
result
});
ASSER
T_EQ
(
EXPEC
T_EQ
(
(
vector
<
float
>
{
1.
,
73.
,
9.
,
81.
,
17.
,
89.
,
2.
,
74.
,
10.
,
82.
,
18.
,
90.
,
3.
,
75.
,
11.
,
83.
,
19.
,
91.
,
4.
,
76.
,
12.
,
84.
,
20.
,
92.
,
145.
,
217.
,
153.
,
225.
,
...
...
@@ -4190,7 +4319,7 @@ TEST(DISABLED_${BACKEND_NAME}, dot_3d_multi_axis)
auto
result
=
backend
->
make_primary_tensor_view
(
element
::
Float32
::
element_type
(),
shape_r
);
cf
->
call
({
a
,
b
},
{
result
});
ASSER
T_EQ
((
vector
<
float
>
{
2938.
,
3016.
,
3094.
,
3172.
,
3250.
,
7042.
,
7264.
,
7486.
,
7708.
,
7930.
}),
EXPEC
T_EQ
((
vector
<
float
>
{
2938.
,
3016.
,
3094.
,
3172.
,
3250.
,
7042.
,
7264.
,
7486.
,
7708.
,
7930.
}),
result
->
get_vector
<
float
>
());
}
...
...
@@ -4247,7 +4376,7 @@ TEST(DISABLED_${BACKEND_NAME}, dot_3d_one_axis_arbitrary)
auto
result
=
backend
->
make_primary_tensor_view
(
element
::
Float32
::
element_type
(),
shape_r
);
cf
->
call
({
a
,
b
},
{
result
});
ASSER
T_EQ
((
vector
<
float
>
{
483
,
189
,
331
,
86
,
85
,
1262
,
2155
,
354
,
83
,
18
,
58
,
543
,
77
,
EXPEC
T_EQ
((
vector
<
float
>
{
483
,
189
,
331
,
86
,
85
,
1262
,
2155
,
354
,
83
,
18
,
58
,
543
,
77
,
241
,
325
,
286
,
859
,
144
,
438
,
1025
,
317
,
973
,
1041
,
2930
,
163
,
69
,
117
,
50
,
29
,
472
,
819
,
62
,
785
,
236
,
476
,
235
,
175
,
1521
,
2387
,
1402
,
97
,
29
,
69
,
412
,
63
,
286
,
429
,
218
,
45
,
11
,
29
,
162
,
...
...
@@ -4321,7 +4450,7 @@ TEST(DISABLED_${BACKEND_NAME}, dot_4d_5d_multi_axis)
auto
result
=
backend
->
make_primary_tensor_view
(
element
::
Float32
::
element_type
(),
shape_r
);
cf
->
call
({
a
,
b
},
{
result
});
ASSER
T_EQ
(
EXPEC
T_EQ
(
(
vector
<
float
>
{
6942.
,
7020.
,
7098.
,
7176.
,
7254.
,
7332.
,
7410.
,
7488.
,
7566.
,
7644.
,
7722.
,
7800.
,
16590.
,
16812.
,
17034.
,
17256.
,
17478.
,
17700.
,
17922.
,
18144.
,
18366.
,
18588.
,
18810.
,
19032.
,
26238.
,
26604.
,
26970.
,
...
...
@@ -4388,7 +4517,7 @@ TEST(DISABLED_${BACKEND_NAME}, dot_4d_5d_multi_axis_more)
auto
result
=
backend
->
make_primary_tensor_view
(
element
::
Float32
::
element_type
(),
shape_r
);
cf
->
call
({
a
,
b
},
{
result
});
ASSER
T_EQ
((
vector
<
float
>
{
251412.
,
254040.
}),
result
->
get_vector
<
float
>
());
EXPEC
T_EQ
((
vector
<
float
>
{
251412.
,
254040.
}),
result
->
get_vector
<
float
>
());
}
//
...
...
@@ -4456,14 +4585,14 @@ TEST(DISABLED_${BACKEND_NAME}, dot_4d_5d_multi_axis_big_fp64_VERY_SLOW)
auto
result
=
backend
->
make_primary_tensor_view
(
element
::
Float64
::
element_type
(),
shape_r
);
cf
->
call
({
a
,
b
},
{
result
});
ASSERT_EQ
(
(
vector
<
double
>
{
EXPECT_TRUE
(
test
::
all_close
(
vector
<
double
>
{
2.48832025919525478400e+18
,
2.48832051839533977600e+18
,
2.48832077759658444800e+18
,
2.48832103679413504000e+18
,
2.48832129599669350400e+18
,
2.48832155519793971200e+18
,
2.48832181439802265600e+18
,
2.48832207359808000000e+18
,
2.48832233279813580800e+18
,
2.48832259199822028800e+18
,
2.48832285119946496000e+18
,
2.48832311040043008000e+18
,
2.48832336959957401600e+18
,
2.48832362880081817600e+18
,
2.48832388800090368000e+18
,
2.48832414720096000000e+18
,
2.48832440640101478400e+18
,
2.48832466560109772800e+18
,
2.48832492480234188800e+18
,
2.48832518400031897600e+18
}
)
,
result
->
get_vector
<
double
>
());
2.48832492480234188800e+18
,
2.48832518400031897600e+18
},
result
->
get_vector
<
double
>
())
)
;
}
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment