diff --git a/src/ngraph/runtime/executable.cpp b/src/ngraph/runtime/executable.cpp
index 16f619b32c24d3e567b0c8e1bca020f2c6515ad1..884e5e1e5bc916b3a9879278cd688730eb5e738b 100644
--- a/src/ngraph/runtime/executable.cpp
+++ b/src/ngraph/runtime/executable.cpp
@@ -128,6 +128,14 @@ bool runtime::Executable::begin_execute_helper(const vector<shared_ptr<runtime::
                                                const vector<shared_ptr<runtime::Tensor>>& inputs)
 {
     bool rc = call(outputs, inputs);
+    for (const shared_ptr<runtime::Tensor>& t : outputs)
+    {
+        t->m_promise.set_value();
+    }
+    for (const shared_ptr<runtime::Tensor>& t : inputs)
+    {
+        t->m_promise.set_value();
+    }
     return rc;
 }
 
diff --git a/src/ngraph/runtime/tensor.hpp b/src/ngraph/runtime/tensor.hpp
index 7f6d93271ccc0ae0d0444121e7df5cd451a02399..f8a272a827ecd8a1ccbf4bd07357e0b57dd6f093 100644
--- a/src/ngraph/runtime/tensor.hpp
+++ b/src/ngraph/runtime/tensor.hpp
@@ -38,6 +38,8 @@ namespace ngraph
     {
         class Tensor
         {
+            friend class Executable;
+
         protected:
             Tensor(const std::shared_ptr<ngraph::descriptor::Tensor>& descriptor)
                 : m_descriptor(descriptor)
diff --git a/test/async.cpp b/test/async.cpp
index 61034b09d0d03aeec03985f60a5b228029455b6a..684e4a525acc92f553305b888f31a09b5d5ee9d4 100644
--- a/test/async.cpp
+++ b/test/async.cpp
@@ -43,18 +43,59 @@ TEST(async, execute)
 
     auto handle = backend->compile(f);
     auto future = handle->begin_execute({r}, {a, b});
+    ASSERT_TRUE(future.valid());
     bool rc = future.get();
 
     for (float x : result_data)
     {
-        ASSERT_EQ(x, 2);
+        ASSERT_EQ(x, 4);
     }
 }
 
-TEST(async, tensor_read)
+TEST(async, tensor_write)
 {
+    Shape shape{100000};
+    auto A = make_shared<op::Parameter>(element::f32, shape);
+    auto B = make_shared<op::Parameter>(element::f32, shape);
+    auto f = make_shared<Function>(make_shared<op::Add>(A, B), ParameterVector{A, B});
+
+    auto backend = runtime::Backend::create("INTERPRETER");
+    auto handle = backend->compile(f);
+
+    vector<float> data(shape_size(shape), 2);
+    vector<float> result_data(shape_size(shape), 0);
+
+    // Create some tensors for input/output
+    shared_ptr<runtime::Tensor> a = backend->create_tensor(element::f32, shape);
+    shared_ptr<runtime::Tensor> b = backend->create_tensor(element::f32, shape);
+    shared_ptr<runtime::Tensor> r = backend->create_tensor(element::f32, shape, result_data.data());
+
+    auto future_a = a->begin_write(data.data(), data.size() * sizeof(float));
+    auto future_b = b->begin_write(data.data(), data.size() * sizeof(float));
+    ASSERT_TRUE(future_a.valid());
+    ASSERT_TRUE(future_b.valid());
+
+    chrono::milliseconds ten_ms(10);
+    EXPECT_EQ(future_a.wait_for(ten_ms), future_status::timeout);
+    EXPECT_EQ(future_b.wait_for(ten_ms), future_status::timeout);
+
+    this_thread::sleep_for(chrono::milliseconds(500));
+
+    EXPECT_EQ(future_a.wait_for(ten_ms), future_status::timeout);
+    EXPECT_EQ(future_b.wait_for(ten_ms), future_status::timeout);
+
+    auto future = handle->begin_execute({r}, {a, b});
+    bool rc = future.get();
+
+    EXPECT_EQ(future_a.wait_for(ten_ms), future_status::ready);
+    EXPECT_EQ(future_b.wait_for(ten_ms), future_status::ready);
+
+    for (float x : result_data)
+    {
+        ASSERT_EQ(x, 4);
+    }
 }
 
-TEST(async, tensor_write)
+TEST(async, tensor_read)
 {
 }