Unverified Commit 9a924c17 authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Remove heap allocations during execution (#1583)

* elementwise updated

* add support for constructing any order arg list

* add comment for using node_names

* use array instead of vector for args/output

* fix per review comment

* remove dead code
parent 293ba8b7
...@@ -105,6 +105,23 @@ using namespace std; ...@@ -105,6 +105,23 @@ using namespace std;
#define TI(x) type_index(typeid(x)) #define TI(x) type_index(typeid(x))
string ngraph::runtime::gpu::GPU_Emitter::node_names(const vector<GPU_TensorViewWrapper>& args,
initializer_list<int> arg_indexes)
{
vector<string> names;
vector<int> indexes = arg_indexes;
if (indexes.empty())
{
indexes = vector<int>(args.size());
iota(indexes.begin(), indexes.end(), 0);
}
for (int i : indexes)
{
names.push_back(args[i].get_name());
}
return ngraph::join(names);
}
namespace ngraph namespace ngraph
{ {
namespace runtime namespace runtime
...@@ -118,18 +135,16 @@ namespace ngraph ...@@ -118,18 +135,16 @@ namespace ngraph
{ {
return; return;
} }
auto& cudnn_emitter =
external_function->get_primitive_emitter()->get_cudnn_emitter();
auto index = cudnn_emitter->build_tensor_op(
CUDNN_OP_TENSOR_ADD, out[0].get_type(), args[0].get_shape(), 1.0, 1.0, 0);
writer.block_begin(); writer.block_begin();
{ {
auto& cudnn_emitter = writer << "void* input[] = {" << node_names(args) << "};\n";
external_function->get_primitive_emitter()->get_cudnn_emitter(); writer << "void* output[] = {" << node_names(out) << "};\n";
auto index = cudnn_emitter->build_tensor_op( writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
CUDNN_OP_TENSOR_ADD, out[0].get_type(), args[0].get_shape(), 1.0, 1.0, 0);
writer << "gpu::invoke_primitive(ctx, " << index << ", ";
writer << "std::vector<void*>{" << args[0].get_name() << ","
<< args[1].get_name() << "}.data(), ";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -158,12 +173,13 @@ namespace ngraph ...@@ -158,12 +173,13 @@ namespace ngraph
conv_index = cudnn_emitter->build_primitive(convolution); conv_index = cudnn_emitter->build_primitive(convolution);
} }
writer << "gpu::invoke_primitive(ctx, " << conv_index << ", "; writer.block_begin();
writer << "std::vector<void*>{"; {
writer << args[0].get_name() << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << args[1].get_name() << ", "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "}.data(), "; writer << "gpu::invoke_primitive(ctx, " << conv_index << ", input, output);\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data());\n"; }
writer.block_end();
} }
template <> template <>
...@@ -186,12 +202,13 @@ namespace ngraph ...@@ -186,12 +202,13 @@ namespace ngraph
external_function->get_primitive_emitter()->get_cudnn_emitter(); external_function->get_primitive_emitter()->get_cudnn_emitter();
size_t conv_index = cudnn_emitter->build_primitive(convolution); size_t conv_index = cudnn_emitter->build_primitive(convolution);
writer << "gpu::invoke_primitive(ctx, " << conv_index << ", "; writer.block_begin();
writer << "std::vector<void*>{"; {
writer << args[0].get_name() << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << args[1].get_name() << ", "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "}.data(), "; writer << "gpu::invoke_primitive(ctx, " << conv_index << ", input, output);\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data());\n"; }
writer.block_end();
} }
template <> template <>
...@@ -214,12 +231,13 @@ namespace ngraph ...@@ -214,12 +231,13 @@ namespace ngraph
external_function->get_primitive_emitter()->get_cudnn_emitter(); external_function->get_primitive_emitter()->get_cudnn_emitter();
size_t conv_index = cudnn_emitter->build_primitive(convolution); size_t conv_index = cudnn_emitter->build_primitive(convolution);
writer << "gpu::invoke_primitive(ctx, " << conv_index << ", "; writer.block_begin();
writer << "std::vector<void*>{"; {
writer << args[0].get_name() << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << args[1].get_name() << ", "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "}.data(), "; writer << "gpu::invoke_primitive(ctx, " << conv_index << ", input, output);\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data());\n"; }
writer.block_end();
} }
template <> template <>
...@@ -394,18 +412,16 @@ namespace ngraph ...@@ -394,18 +412,16 @@ namespace ngraph
{ {
return; return;
} }
auto& cudnn_emitter =
external_function->get_primitive_emitter()->get_cudnn_emitter();
auto index = cudnn_emitter->build_tensor_op(
CUDNN_OP_TENSOR_MAX, out[0].get_type(), args[0].get_shape(), 1.0, 1.0, 0);
writer.block_begin(); writer.block_begin();
{ {
auto& cudnn_emitter = writer << "void* input[] = {" << node_names(args) << "};\n";
external_function->get_primitive_emitter()->get_cudnn_emitter(); writer << "void* output[] = {" << node_names(out) << "};\n";
auto index = cudnn_emitter->build_tensor_op( writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
CUDNN_OP_TENSOR_MAX, out[0].get_type(), args[0].get_shape(), 1.0, 1.0, 0);
writer << "gpu::invoke_primitive(ctx, " << index << ", ";
writer << "std::vector<void*>{" << args[0].get_name() << ","
<< args[1].get_name() << "}.data(), ";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -417,18 +433,16 @@ namespace ngraph ...@@ -417,18 +433,16 @@ namespace ngraph
{ {
return; return;
} }
auto& cudnn_emitter =
external_function->get_primitive_emitter()->get_cudnn_emitter();
auto index = cudnn_emitter->build_tensor_op(
CUDNN_OP_TENSOR_MIN, out[0].get_type(), args[0].get_shape(), 1.0, 1.0, 0);
writer.block_begin(); writer.block_begin();
{ {
auto& cudnn_emitter = writer << "void* input[] = {" << node_names(args) << "};\n";
external_function->get_primitive_emitter()->get_cudnn_emitter(); writer << "void* output[] = {" << node_names(out) << "};\n";
auto index = cudnn_emitter->build_tensor_op( writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
CUDNN_OP_TENSOR_MIN, out[0].get_type(), args[0].get_shape(), 1.0, 1.0, 0);
writer << "gpu::invoke_primitive(ctx, " << index << ", ";
writer << "std::vector<void*>{" << args[0].get_name() << ","
<< args[1].get_name() << "}.data(), ";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -458,10 +472,13 @@ namespace ngraph ...@@ -458,10 +472,13 @@ namespace ngraph
auto bcast_index = cuda_emitter->build_broadcast( auto bcast_index = cuda_emitter->build_broadcast(
{{args[0].get_type(), out[0].get_type()}}, result_shape, axes); {{args[0].get_type(), out[0].get_type()}}, result_shape, axes);
writer << "gpu::invoke_primitive(ctx, " << bcast_index << ", "; writer.block_begin();
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; {
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << ");\n"; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "gpu::invoke_primitive(ctx, " << bcast_index << ", input, output);\n";
}
writer.block_end();
} }
template <> template <>
...@@ -483,22 +500,15 @@ namespace ngraph ...@@ -483,22 +500,15 @@ namespace ngraph
} }
dtypes.push_back(out[0].get_type()); dtypes.push_back(out[0].get_type());
auto& cuda_emitter = external_function->get_primitive_emitter()->get_cuda_emitter();
auto index =
cuda_emitter->build_concat(dtypes, input_shapes, axis, out[0].get_shape());
writer.block_begin(); writer.block_begin();
{ {
auto& cuda_emitter = writer << "void* input[] = {" << node_names(args) << "};\n";
external_function->get_primitive_emitter()->get_cuda_emitter(); writer << "void* output[] = {" << node_names(out) << "};\n";
auto index = writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
cuda_emitter->build_concat(dtypes, input_shapes, axis, out[0].get_shape());
writer << "gpu::invoke_primitive(ctx, " << index << ", ";
writer << "std::vector<void*>{" << args[0].get_name();
for (size_t i = 1; i < args.size(); i++)
{
writer << ", " << args[i].get_name();
}
writer << "}.data(), ";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -558,10 +568,9 @@ namespace ngraph ...@@ -558,10 +568,9 @@ namespace ngraph
auto index = cuda_emitter->build_reshape( auto index = cuda_emitter->build_reshape(
{{args[0].get_type(), out[0].get_type()}}, arg_shape, input_order); {{args[0].get_type(), out[0].get_type()}}, arg_shape, input_order);
writer << "gpu::invoke_primitive(ctx, " << index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -574,33 +583,9 @@ namespace ngraph ...@@ -574,33 +583,9 @@ namespace ngraph
writer.block_begin(); writer.block_begin();
{ {
std::vector<string> input_names; writer << "void* input[] = {" << node_names(args) << "};\n";
std::vector<string> output_names; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << function->get_name() << "(input, output, ctx);\n";
for (const runtime::gpu::GPU_TensorViewWrapper& input : args)
{
input_names.push_back(input.get_name());
}
for (const runtime::gpu::GPU_TensorViewWrapper& output : out)
{
output_names.push_back(output.get_name());
}
writer << "void* args[] =\n";
writer.block_begin();
writer << "\n" << join(input_names, ",\n");
writer.block_end();
writer << ";\n";
writer << "void* out[] =\n";
writer.block_begin();
writer << "\n" << join(output_names, ",\n");
writer.block_end();
writer << ";\n";
writer << "\n";
writer << function->get_name() << "(args, out, ctx);\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -635,10 +620,9 @@ namespace ngraph ...@@ -635,10 +620,9 @@ namespace ngraph
slice_strides, slice_strides,
result_shape); result_shape);
writer << "gpu::invoke_primitive(ctx, " << index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -673,10 +657,9 @@ namespace ngraph ...@@ -673,10 +657,9 @@ namespace ngraph
auto index = cuda_emitter->build_reverse( auto index = cuda_emitter->build_reverse(
{{args[0].get_type(), out[0].get_type()}}, arg_shape, reverse_axes_flag); {{args[0].get_type(), out[0].get_type()}}, arg_shape, reverse_axes_flag);
writer << "gpu::invoke_primitive(ctx, " << index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -705,11 +688,11 @@ namespace ngraph ...@@ -705,11 +688,11 @@ namespace ngraph
out_shape, out_shape,
bi, bi,
si); si);
writer << "gpu::invoke_primitive(ctx, " << rs_index << ", "; writer.block_begin();
writer << "std::vector<void*>{" << args[0].get_name() << ", " << args[1].get_name() writer << "void* input[] = {" << node_names(args) << "};\n";
<< "}.data(), "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "gpu::invoke_primitive(ctx, " << rs_index << ", input, output);\n";
writer << ");\n"; writer.block_end();
} }
template <> template <>
...@@ -726,11 +709,11 @@ namespace ngraph ...@@ -726,11 +709,11 @@ namespace ngraph
auto index = cudnn_emitter->build_tensor_op( auto index = cudnn_emitter->build_tensor_op(
CUDNN_OP_TENSOR_MUL, out[0].get_type(), args[0].get_shape(), 1.0, 1.0, 0); CUDNN_OP_TENSOR_MUL, out[0].get_type(), args[0].get_shape(), 1.0, 1.0, 0);
writer << "gpu::invoke_primitive(ctx, " << index << ", "; writer.block_begin();
writer << "std::vector<void*>{" << args[0].get_name() << "," writer << "void* input[] = {" << node_names(args) << "};\n";
<< args[1].get_name() << "}.data(), "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
writer << ");\n"; writer.block_end();
} }
writer.block_end(); writer.block_end();
} }
...@@ -754,10 +737,11 @@ namespace ngraph ...@@ -754,10 +737,11 @@ namespace ngraph
auto index = cuda_emitter->build_onehot( auto index = cuda_emitter->build_onehot(
{{args[0].get_type(), out[0].get_type()}}, arg_shape, result_shape, idx); {{args[0].get_type(), out[0].get_type()}}, arg_shape, result_shape, idx);
writer << "gpu::invoke_primitive(ctx, " << index << ", "; writer.block_begin();
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << ");\n"; writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
writer.block_end();
} }
writer.block_end(); writer.block_end();
} }
...@@ -813,9 +797,11 @@ namespace ngraph ...@@ -813,9 +797,11 @@ namespace ngraph
external_function->get_primitive_emitter()->get_cudnn_emitter(); external_function->get_primitive_emitter()->get_cudnn_emitter();
auto index = cudnn_emitter->build_primitive(max); auto index = cudnn_emitter->build_primitive(max);
writer << "gpu::invoke_primitive(ctx, " << index << ", "; writer.block_begin();
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data());\n"; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
writer.block_end();
return; return;
} }
...@@ -833,9 +819,11 @@ namespace ngraph ...@@ -833,9 +819,11 @@ namespace ngraph
external_function->get_primitive_emitter()->get_cudnn_emitter(); external_function->get_primitive_emitter()->get_cudnn_emitter();
auto index = cudnn_emitter->build_primitive(min); auto index = cudnn_emitter->build_primitive(min);
writer << "gpu::invoke_primitive(ctx, " << index << ", "; writer.block_begin();
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data());\n"; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
writer.block_end();
return; return;
} }
...@@ -876,10 +864,10 @@ namespace ngraph ...@@ -876,10 +864,10 @@ namespace ngraph
args[0].get_shape(), args[0].get_shape(),
axes_vec); axes_vec);
writer << "gpu::invoke_primitive(ctx, " << sum_index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "gpu::invoke_primitive(ctx, " << sum_index
writer << ");\n"; << ", input, output);\n";
} }
} }
} }
...@@ -920,10 +908,10 @@ namespace ngraph ...@@ -920,10 +908,10 @@ namespace ngraph
args[0].get_shape(), args[0].get_shape(),
product->get_reduction_axes()); product->get_reduction_axes());
writer << "gpu::invoke_primitive(ctx, " << index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "gpu::invoke_primitive(ctx, " << index
writer << ");\n"; << ", input, output);\n";
} }
} }
} }
...@@ -1017,10 +1005,10 @@ namespace ngraph ...@@ -1017,10 +1005,10 @@ namespace ngraph
args[0].get_shape(), args[0].get_shape(),
reduce_op->get_reduction_axes()); reduce_op->get_reduction_axes());
writer << "gpu::invoke_primitive(ctx, " << reduce_index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "gpu::invoke_primitive(ctx, " << reduce_index
writer << ");\n"; << ", input, output);\n";
} }
} }
} }
...@@ -1123,10 +1111,10 @@ namespace ngraph ...@@ -1123,10 +1111,10 @@ namespace ngraph
reduce_window_op->get_window_shape(), reduce_window_op->get_window_shape(),
reduce_window_op->get_window_movement_strides()); reduce_window_op->get_window_movement_strides());
writer << "gpu::invoke_primitive(ctx, " << reduce_index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "gpu::invoke_primitive(ctx, " << reduce_index
writer << ");\n"; << ", input, output);\n";
} }
} }
} }
...@@ -1156,11 +1144,9 @@ namespace ngraph ...@@ -1156,11 +1144,9 @@ namespace ngraph
padding_below, padding_below,
padding_above, padding_above,
padding_interior); padding_interior);
writer << "gpu::invoke_primitive(ctx, " << pad_index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args[0].get_name() << ", " writer << "void* output[] = {" << node_names(out) << "};\n";
<< args[1].get_name() << "}.data(), "; writer << "gpu::invoke_primitive(ctx, " << pad_index << ", input, output);\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data() ";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -1204,10 +1190,11 @@ namespace ngraph ...@@ -1204,10 +1190,11 @@ namespace ngraph
max_pool_index = cudnn_emitter->build_primitive(max_pool); max_pool_index = cudnn_emitter->build_primitive(max_pool);
} }
writer << "gpu::invoke_primitive(ctx, " << max_pool_index << ", "; writer.block_begin();
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << ");\n"; writer << "gpu::invoke_primitive(ctx, " << max_pool_index << ", input, output);\n";
writer.block_end();
} }
template <> template <>
...@@ -1224,7 +1211,7 @@ namespace ngraph ...@@ -1224,7 +1211,7 @@ namespace ngraph
if (fp_input_shape.size() >= 4) if (fp_input_shape.size() >= 4)
{ {
auto max_pool_bp_index = auto index =
cudnn_emitter->build_pooling(CUDNN_POOLING_MAX, cudnn_emitter->build_pooling(CUDNN_POOLING_MAX,
out[0].get_type(), out[0].get_type(),
CUDNNEmitter::Prop::Backward, CUDNNEmitter::Prop::Backward,
...@@ -1235,11 +1222,9 @@ namespace ngraph ...@@ -1235,11 +1222,9 @@ namespace ngraph
mpb->get_padding_below(), mpb->get_padding_below(),
mpb->get_padding_above()); mpb->get_padding_above());
writer << "gpu::invoke_primitive(ctx, " << max_pool_bp_index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args[0].get_name() << ", " writer << "void* output[] = {" << node_names(out) << "};\n";
<< args[1].get_name() << "}.data(), "; writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()";
writer << ");\n";
} }
} }
writer.block_end(); writer.block_end();
...@@ -1264,29 +1249,18 @@ namespace ngraph ...@@ -1264,29 +1249,18 @@ namespace ngraph
direction = CUDNNEmitter::Prop::Inference; direction = CUDNNEmitter::Prop::Inference;
} }
auto bn_index = cudnn_emitter->build_batchnorm(CUDNN_BATCHNORM_SPATIAL, auto index = cudnn_emitter->build_batchnorm(CUDNN_BATCHNORM_SPATIAL,
out[0].get_type(), out[0].get_type(),
direction, direction,
args[2].get_shape(), args[2].get_shape(),
args[0].get_shape(), args[0].get_shape(),
batchnorm->get_eps_value()); batchnorm->get_eps_value());
writer.block_begin(); writer.block_begin();
{ {
writer << "gpu::invoke_primitive(ctx, " << bn_index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args.front().get_name(); writer << "void* output[] = {" << node_names(out) << "};\n";
for (size_t i = 1; i < args.size(); i++) writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
{
writer << ", " << args[i].get_name();
}
writer << "}.data(), ";
writer << "std::vector<void*>{" << out.front().get_name();
for (size_t i = 1; i < out.size(); i++)
{
writer << ", " << out[i].get_name();
}
writer << "}.data()";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -1300,29 +1274,18 @@ namespace ngraph ...@@ -1300,29 +1274,18 @@ namespace ngraph
auto& cudnn_emitter = auto& cudnn_emitter =
external_function->get_primitive_emitter()->get_cudnn_emitter(); external_function->get_primitive_emitter()->get_cudnn_emitter();
auto bn_index = cudnn_emitter->build_batchnorm(CUDNN_BATCHNORM_SPATIAL, auto index = cudnn_emitter->build_batchnorm(CUDNN_BATCHNORM_SPATIAL,
out[0].get_type(), out[0].get_type(),
CUDNNEmitter::Prop::Backward, CUDNNEmitter::Prop::Backward,
args[2].get_shape(), args[2].get_shape(),
args[0].get_shape(), args[0].get_shape(),
batchnorm->get_eps_value()); batchnorm->get_eps_value());
writer.block_begin(); writer.block_begin();
{ {
writer << "gpu::invoke_primitive(ctx, " << bn_index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args.front().get_name(); writer << "void* output[] = {" << node_names(out) << "};\n";
for (size_t i = 1; i < args.size(); i++) writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
{
writer << ", " << args[i].get_name();
}
writer << "}.data(), ";
writer << "std::vector<void*>{" << out.front().get_name();
for (size_t i = 1; i < out.size(); i++)
{
writer << ", " << out[i].get_name();
}
writer << "}.data()";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -1378,16 +1341,7 @@ namespace ngraph ...@@ -1378,16 +1341,7 @@ namespace ngraph
auto padding_below = avg_pool->get_padding_below(); auto padding_below = avg_pool->get_padding_below();
auto padding_above = avg_pool->get_padding_above(); auto padding_above = avg_pool->get_padding_above();
int num_nontrivial_dims = 0; size_t index = 0;
for (int64_t i = input_shape.size() - 1; i > 1; i--)
{
if (input_shape[i] > 1)
{
num_nontrivial_dims++;
}
}
size_t avg_pool_index = 0;
// if 1d or has asymmetric padding, must handle pooling manually // if 1d or has asymmetric padding, must handle pooling manually
if (input_shape.size() == 3 || padding_below != padding_above) if (input_shape.size() == 3 || padding_below != padding_above)
...@@ -1395,7 +1349,7 @@ namespace ngraph ...@@ -1395,7 +1349,7 @@ namespace ngraph
auto& cuda_emitter = auto& cuda_emitter =
external_function->get_primitive_emitter()->get_cuda_emitter(); external_function->get_primitive_emitter()->get_cuda_emitter();
avg_pool_index = index =
cuda_emitter->build_avg_pool({{args[0].get_type(), out[0].get_type()}}, cuda_emitter->build_avg_pool({{args[0].get_type(), out[0].get_type()}},
input_shape, input_shape,
result_shape, result_shape,
...@@ -1415,7 +1369,7 @@ namespace ngraph ...@@ -1415,7 +1369,7 @@ namespace ngraph
? CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING ? CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING
: CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; : CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
avg_pool_index = cudnn_emitter->build_pooling( index = cudnn_emitter->build_pooling(
cudnn_avg_type, cudnn_avg_type,
out[0].get_type(), out[0].get_type(),
CUDNNEmitter::Prop::Forward, CUDNNEmitter::Prop::Forward,
...@@ -1433,10 +1387,9 @@ namespace ngraph ...@@ -1433,10 +1387,9 @@ namespace ngraph
"Pooling currently only supports up to 3 spatial dimensions."); "Pooling currently only supports up to 3 spatial dimensions.");
} }
writer << "gpu::invoke_primitive(ctx, " << avg_pool_index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -1459,7 +1412,7 @@ namespace ngraph ...@@ -1459,7 +1412,7 @@ namespace ngraph
? CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING ? CUDNN_POOLING_AVERAGE_COUNT_INCLUDE_PADDING
: CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING; : CUDNN_POOLING_AVERAGE_COUNT_EXCLUDE_PADDING;
auto avg_pool_bp_index = auto index =
cudnn_emitter->build_pooling(cudnn_avg_type, cudnn_emitter->build_pooling(cudnn_avg_type,
out[0].get_type(), out[0].get_type(),
CUDNNEmitter::Prop::Backward, CUDNNEmitter::Prop::Backward,
...@@ -1470,15 +1423,13 @@ namespace ngraph ...@@ -1470,15 +1423,13 @@ namespace ngraph
apb->get_padding_below(), apb->get_padding_below(),
apb->get_padding_above()); apb->get_padding_above());
writer << "gpu::invoke_primitive(ctx, " << avg_pool_bp_index << ", ";
// cuDNN backwards pooling requests input and output tensors from // cuDNN backwards pooling requests input and output tensors from
// the forward pass but does not use them. It also behaves differently // the forward pass but does not use them. It also behaves differently
// for max pool vs avg pool. The repetition of args below is to address // for max pool vs avg pool. The repetition of args below is to address
// this interface in a way that supports both max and avg pooling // this interface in a way that supports both max and avg pooling
writer << "std::vector<void*>{" << args[0].get_name() << ", " writer << "void* input[] = {" << node_names(args, {0, 0}) << "};\n";
<< args[0].get_name() << "}.data(), "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
writer << ");\n";
} }
} }
writer.block_end(); writer.block_end();
...@@ -1497,11 +1448,9 @@ namespace ngraph ...@@ -1497,11 +1448,9 @@ namespace ngraph
auto index = cuda_emitter->build_primitive(rep_slice, in_place_op); auto index = cuda_emitter->build_primitive(rep_slice, in_place_op);
writer << "gpu::invoke_primitive(ctx, " << index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args[0].get_name() << ", " writer << "void* output[] = {" << node_names(out) << "};\n";
<< args[1].get_name() << "}.data(), "; writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
...@@ -1512,30 +1461,29 @@ namespace ngraph ...@@ -1512,30 +1461,29 @@ namespace ngraph
auto softmax = static_cast<const ngraph::op::Softmax*>(node); auto softmax = static_cast<const ngraph::op::Softmax*>(node);
writer.block_begin(); writer.block_begin();
{ {
size_t softmax_index; size_t index;
if (softmax->get_axes().size() != args[0].get_shape().size()) if (softmax->get_axes().size() != args[0].get_shape().size())
{ {
auto& cuda_emitter = auto& cuda_emitter =
external_function->get_primitive_emitter()->get_cuda_emitter(); external_function->get_primitive_emitter()->get_cuda_emitter();
softmax_index = cuda_emitter->build_primitive(softmax); index = cuda_emitter->build_primitive(softmax);
} }
else else
{ {
auto& cudnn_emitter = auto& cudnn_emitter =
external_function->get_primitive_emitter()->get_cudnn_emitter(); external_function->get_primitive_emitter()->get_cudnn_emitter();
softmax_index = cudnn_emitter->build_softmax(CUDNN_SOFTMAX_FAST, index = cudnn_emitter->build_softmax(CUDNN_SOFTMAX_FAST,
CUDNN_SOFTMAX_MODE_INSTANCE, CUDNN_SOFTMAX_MODE_INSTANCE,
out[0].get_type(), out[0].get_type(),
CUDNNEmitter::Prop::Forward, CUDNNEmitter::Prop::Forward,
args[0].get_shape()); args[0].get_shape());
} }
writer << "gpu::invoke_primitive(ctx, " << softmax_index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args[0].get_name() << "}.data(), "; writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()"; writer << "gpu::invoke_primitive(ctx, " << index << ", input, output);\n";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
......
...@@ -87,18 +87,22 @@ namespace ngraph ...@@ -87,18 +87,22 @@ namespace ngraph
dtypes.push_back(out[0].get_type()); dtypes.push_back(out[0].get_type());
auto ew_index = auto ew_index =
cuda_emitter->build_elementwise<T>(dtypes, out[0].get_shape()); cuda_emitter->build_elementwise<T>(dtypes, out[0].get_shape());
writer << "gpu::invoke_primitive(ctx, " << ew_index << ", "; writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "std::vector<void*>{" << args.front().get_name(); writer << "void* output[] = {" << node_names(out) << "};\n";
for (size_t i = 1; i < args.size(); i++) writer << "gpu::invoke_primitive(ctx, " << ew_index
{ << ", input, output);\n";
writer << ", " << args[i].get_name();
}
writer << "}.data(), ";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()";
writer << ");\n";
} }
writer.block_end(); writer.block_end();
} }
private:
/// \brief Create a list of node names for each arg in args
/// \param args list of tensor arguments
/// \param arg_indexes a list of indexes into args for which args to include in
/// the output list, so {1, 2} will include args 1 and 2 and skip 0.
/// \ return returns a string containing "arg0_name, arg1_name, etc."
static std::string node_names(const std::vector<GPU_TensorViewWrapper>& args,
std::initializer_list<int> arg_indexes = {});
}; };
Shape get_padded_shape(const Shape& input_shape, Shape get_padded_shape(const Shape& input_shape,
const Shape& padding_below, const Shape& padding_below,
......
...@@ -455,8 +455,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_temp_mem_pool_allocation( ...@@ -455,8 +455,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_temp_mem_pool_allocation(
{ {
m_writer << "// Allocate the memory pool\n"; m_writer << "// Allocate the memory pool\n";
// TODO memory pool malloc. // TODO memory pool malloc.
m_writer << "void* pool_base_ptr = ngraph::runtime::gpu::invoke_memory_primitive(ctx, " m_writer
<< m_tensor_memory_buffers->at(current_function->get_name()) << ");\n"; << "char* pool_base_ptr = (char*)ngraph::runtime::gpu::invoke_memory_primitive(ctx, "
<< m_tensor_memory_buffers->at(current_function->get_name()) << ");\n";
// Add temporaries to the variable name map // Add temporaries to the variable name map
for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function)) for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function))
...@@ -464,8 +465,8 @@ void runtime::gpu::GPU_ExternalFunction::emit_temp_mem_pool_allocation( ...@@ -464,8 +465,8 @@ void runtime::gpu::GPU_ExternalFunction::emit_temp_mem_pool_allocation(
for (descriptor::Tensor* tensor : node->liveness_new_list) for (descriptor::Tensor* tensor : node->liveness_new_list)
{ {
stringstream ss; stringstream ss;
ss << "((" << tensor->get_element_type().c_type_string() ss << "((" << tensor->get_element_type().c_type_string() << "*)(pool_base_ptr + "
<< "*)((char *)pool_base_ptr + " << tensor->get_pool_offset() << "))"; << tensor->get_pool_offset() << "))";
m_variable_name_map[tensor->get_name()] = ss.str(); m_variable_name_map[tensor->get_name()] = ss.str();
} }
} }
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment