Commit 78b0a365 authored by Rob Earhart's avatar Rob Earhart Committed by Scott Cyphers

Minor perf tweaks (#2095)

* Only synchronize stale input tensors

* Cache backend.call() compilations

* Cache invoker bindings

* Update test list
parent 1e5e7145
......@@ -52,11 +52,7 @@ bool ngraph::runtime::plaidml::PlaidML_Backend::call(
const std::vector<std::shared_ptr<runtime::Tensor>>& outputs,
const std::vector<std::shared_ptr<runtime::Tensor>>& inputs)
{
auto cfunc = m_cache.try_lookup(func);
if (!cfunc)
{
cfunc = m_compiler.compile(func);
}
auto cfunc = m_cache.compile(func, &m_compiler);
cfunc->schedule_invocation(inputs, outputs);
return true;
}
......
......@@ -42,19 +42,31 @@ bool ngraph::runtime::plaidml::CompiledFunction::schedule_invocation(
NGRAPH_DEBUG << "Binding PlaidML function " << this;
m_bound_inputs.resize(inputs.size());
m_bound_outputs.resize(outputs.size());
std::size_t input_count = 0;
for (const auto& param : m_func->get_parameters())
{
for (std::size_t idx = 0; idx < param->get_output_size(); ++idx)
{
descriptor::Tensor* tv = param->get_output_tensor_ptr(idx).get();
auto rtv = dynamic_cast<PlaidML_Tensor*>(inputs[input_count++].get());
auto& input = inputs.at(input_count);
auto rtv = dynamic_cast<PlaidML_Tensor*>(input.get());
if (!rtv)
{
throw std::runtime_error{
"The PlaidML backend only operations on PlaidML tensor views"};
"The PlaidML backend only operates on PlaidML tensor views"};
}
rtv->sync_input();
auto& bound_input = m_bound_inputs.at(input_count);
++input_count;
if (bound_input.lock() == input)
{
// No need to re-bind this input.
continue;
}
bound_input = input;
NGRAPH_DEBUG << "Binding input " << m_input_names.at(tv) << " to tensor " << rtv;
m_invoker.set_input(m_input_names.at(tv), rtv->tensor());
}
......@@ -66,12 +78,21 @@ bool ngraph::runtime::plaidml::CompiledFunction::schedule_invocation(
for (std::size_t idx = 0; idx < result->get_output_size(); ++idx)
{
descriptor::Tensor* tv = result->get_output_tensor_ptr(idx).get();
auto rtv = dynamic_cast<PlaidML_Tensor*>(outputs[output_count++].get());
auto& output = outputs.at(output_count);
auto rtv = dynamic_cast<PlaidML_Tensor*>(output.get());
if (!rtv)
{
throw std::runtime_error{
"The PlaidML backend only operations on PlaidML tensor views"};
"The PlaidML backend only operates on PlaidML tensor views"};
}
auto& bound_output = m_bound_outputs.at(output_count);
++output_count;
if (bound_output.lock() == output)
{
// No need to re-bind this output.
continue;
}
bound_output = output;
NGRAPH_DEBUG << "Binding output " << m_output_names.at(tv) << " to tensor " << rtv;
m_invoker.set_output(m_output_names.at(tv), rtv->tensor());
}
......@@ -91,7 +112,7 @@ bool ngraph::runtime::plaidml::CompiledFunction::schedule_invocation(
if (!rtv)
{
throw std::runtime_error{
"The PlaidML backend only operations on PlaidML tensor views"};
"The PlaidML backend only operates on PlaidML tensor views"};
}
rtv->sync_output();
}
......
......@@ -58,5 +58,7 @@ private:
std::shared_ptr<Function> m_func;
std::unordered_map<descriptor::Tensor*, std::string> m_input_names;
std::unordered_map<descriptor::Tensor*, std::string> m_output_names;
mutable std::vector<std::weak_ptr<runtime::Tensor>> m_bound_inputs;
mutable std::vector<std::weak_ptr<runtime::Tensor>> m_bound_outputs;
mutable vertexai::plaidml::invoker m_invoker;
};
......@@ -101,6 +101,11 @@ void ngraph::runtime::plaidml::PlaidML_Tensor::read(void* p, size_t tensor_offse
void ngraph::runtime::plaidml::PlaidML_Tensor::sync_input()
{
if (!get_stale())
{
return;
}
set_stale(false);
if (!m_memory)
{
if (m_is_logically_zero)
......@@ -122,6 +127,7 @@ void ngraph::runtime::plaidml::PlaidML_Tensor::sync_output()
{
// The tensor's been used for an output, so it's no longer logically zero.
m_is_logically_zero = false;
set_stale(false);
if (!m_memory)
{
......
......@@ -26,12 +26,15 @@ topk_1d_max_one # No plans to implement TopK
topk_1d_min_all # No plans to implement TopK
topk_1d_min_partial # No plans to implement TopK
topk_1d_min_one # No plans to implement TopK
topk_3d_large_input_max # No plans to implement TopK
topk_3d_large_input_min # No plans to implement TopK
topk_3d_max_all # No plans to implement TopK
topk_3d_max_partial # No plans to implement TopK
topk_3d_max_one # No plans to implement TopK
topk_3d_min_all # No plans to implement TopK
topk_3d_min_partial # No plans to implement TopK
topk_3d_min_one # No plans to implement TopK
topk_3d_single_output # No plans to implement TopK
topk_2d_max_all # No plans to implement TopK
topk_2d_max_partial # No plans to implement TopK
topk_2d_max_one # No plans to implement TopK
......@@ -43,15 +46,21 @@ topk_5d_max_partial # No plans to implement TopK
# Tests that PlaidML might be able to run at some point.
backwards_maxpool_n2_c1_hw5_3x3_str2_max_pad1x2_2x3
backwards_maxpool_n4c1h4w4_kh2kw2_sh1sw1
backwards_maxpool_n2c1h5w5_kh3kw3_sh2sw2
backwards_maxpool_n4_c1_hw4_2x2_max
backwards_maxpool_n2_c1_hw5_3x3_str2_max
backwards_slice
batchnorm_fprop_bprop # To debug
batchnorm_fprop_bprop_2step # To debug
softmax_axis_3d_double # To debug
reduce_matrix_rows_zero # To debug: possible broadcasting error?
reduce_matrix_cols_zero # To debug: possible broadcasting error?
reduce_3d_to_vector # To debug: possible broadcasting error?
replace_slice_matrix_inplace
max_pool_2d_1channel_1image_overpadded
max_pool_3d
maxpool_bprop_larger_than_cache
reduce_window_emulating_max_pool_1d_1channel_1image
reduce_window_emulating_max_pool_1d_1channel_2image
reduce_window_emulating_max_pool_1d_2channel_2image
......@@ -60,21 +69,34 @@ reduce_window_emulating_max_pool_2d_1channel_1image_strided
select_and_scatter_with_overlap
select_and_scatter_without_overlap
select_and_scatter_3d_without_overlap
generate_mask
avg_pool_3d
avg_pool_3d_uneven_strided_padded_include_in_computation
dequantize_int8_zero_offset # Quantization/Dequantization is unimplemented
dequantize_int32 # Quantization/Dequantization is unimplemented
dequantize_int32_zero_offset # Quantization/Dequantization is unimplemented
dequantize_zero_offset # Quantization/Dequantization is unimplemented
quantize_ROUND_NEAREST_TOWARD_ZERO # Quantization/Dequantization is unimplemented
quantize_ROUND_NEAREST_UPWARD # Quantization/Dequantization is unimplemented
quantize_ROUND_NEAREST_DOWNWARD # Quantization/Dequantization is unimplemented
quantize_ROUND_NEAREST_TOWARD_EVEN # Quantization/Dequantization is unimplemented
quantize_ROUND_NEAREST_TOWARD_INFINITY # Quantization/Dequantization is unimplemented
quantize_ROUND_TOWARD_INFINITY # Quantization/Dequantization is unimplemented
quantize_ROUND_TOWARD_ZERO # Quantization/Dequantization is unimplemented
quantize_ROUND_UP # Quantization/Dequantization is unimplemented
quantize_ROUND_DOWN # Quantization/Dequantization is unimplemented
quantize # Quantization/Dequantization is unimplemented
quantize_zero_offset # Quantization/Dequantization is unimplemented
quantize_axes # Quantization/Dequantization is unimplemented
quantize_int8 # Quantization/Dequantization is unimplemented
quantize_int8_zero_offset # Quantization/Dequantization is unimplemented
quantize_int32 # Quantization/Dequantization is unimplemented
quantize_int32_zero_offset # Quantization/Dequantization is unimplemented
quantize_clamp # Quantization/Dequantization is unimplemented
quantize_clamp_int8 # Quantization/Dequantization is unimplemented
quantize_clamp_int32 # Quantization/Dequantization is unimplemented
quantize_clamp_int32_zero_offset # Quantization/Dequantization is unimplemented
quantize_clamp_uint8 # Quantization/Dequantization is unimplemented
dequantize # Quantization/Dequantization is unimplemented
dequantize_axes # Quantization/Dequantization is unimplemented
dequantize_int8 # Quantization/Dequantization is unimplemented
......@@ -88,3 +110,8 @@ dot_matrix_2x0_0x2 # Empty dims apparently should produce shape
dot_2x0_0 # Empty dims apparently should produce shaped 0s
numeric_float_nan
numeric_double_nan
shape_of_scalar
shape_of_vector
shape_of_matrix
shape_of_5d
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment