Unverified Commit 9a924c17 authored by Robert Kimball's avatar Robert Kimball Committed by GitHub

Remove heap allocations during execution (#1583)

* elementwise updated

* add support for constructing any order arg list

* add comment for using node_names

* use array instead of vector for args/output

* fix per review comment

* remove dead code
parent 293ba8b7
This diff is collapsed.
......@@ -87,18 +87,22 @@ namespace ngraph
dtypes.push_back(out[0].get_type());
auto ew_index =
cuda_emitter->build_elementwise<T>(dtypes, out[0].get_shape());
writer << "gpu::invoke_primitive(ctx, " << ew_index << ", ";
writer << "std::vector<void*>{" << args.front().get_name();
for (size_t i = 1; i < args.size(); i++)
{
writer << ", " << args[i].get_name();
}
writer << "}.data(), ";
writer << "std::vector<void*>{" << out[0].get_name() << "}.data()";
writer << ");\n";
writer << "void* input[] = {" << node_names(args) << "};\n";
writer << "void* output[] = {" << node_names(out) << "};\n";
writer << "gpu::invoke_primitive(ctx, " << ew_index
<< ", input, output);\n";
}
writer.block_end();
}
private:
/// \brief Create a list of node names for each arg in args
/// \param args list of tensor arguments
/// \param arg_indexes a list of indexes into args for which args to include in
/// the output list, so {1, 2} will include args 1 and 2 and skip 0.
/// \ return returns a string containing "arg0_name, arg1_name, etc."
static std::string node_names(const std::vector<GPU_TensorViewWrapper>& args,
std::initializer_list<int> arg_indexes = {});
};
Shape get_padded_shape(const Shape& input_shape,
const Shape& padding_below,
......
......@@ -455,8 +455,9 @@ void runtime::gpu::GPU_ExternalFunction::emit_temp_mem_pool_allocation(
{
m_writer << "// Allocate the memory pool\n";
// TODO memory pool malloc.
m_writer << "void* pool_base_ptr = ngraph::runtime::gpu::invoke_memory_primitive(ctx, "
<< m_tensor_memory_buffers->at(current_function->get_name()) << ");\n";
m_writer
<< "char* pool_base_ptr = (char*)ngraph::runtime::gpu::invoke_memory_primitive(ctx, "
<< m_tensor_memory_buffers->at(current_function->get_name()) << ");\n";
// Add temporaries to the variable name map
for (shared_ptr<Node> node : m_function_ordered_ops.at(current_function))
......@@ -464,8 +465,8 @@ void runtime::gpu::GPU_ExternalFunction::emit_temp_mem_pool_allocation(
for (descriptor::Tensor* tensor : node->liveness_new_list)
{
stringstream ss;
ss << "((" << tensor->get_element_type().c_type_string()
<< "*)((char *)pool_base_ptr + " << tensor->get_pool_offset() << "))";
ss << "((" << tensor->get_element_type().c_type_string() << "*)(pool_base_ptr + "
<< tensor->get_pool_offset() << "))";
m_variable_name_map[tensor->get_name()] = ss.str();
}
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment