Commit 7a0e0fd0 authored by Kenton Varda's avatar Kenton Varda

Implement client RPC side of streaming.

parent 56493100
...@@ -35,6 +35,7 @@ namespace capnp { ...@@ -35,6 +35,7 @@ namespace capnp {
class OutgoingRpcMessage; class OutgoingRpcMessage;
class IncomingRpcMessage; class IncomingRpcMessage;
class RpcFlowController;
template <typename SturdyRefHostId> template <typename SturdyRefHostId>
class RpcSystem; class RpcSystem;
...@@ -59,6 +60,7 @@ public: ...@@ -59,6 +60,7 @@ public:
virtual kj::Promise<kj::Maybe<kj::Own<IncomingRpcMessage>>> receiveIncomingMessage() = 0; virtual kj::Promise<kj::Maybe<kj::Own<IncomingRpcMessage>>> receiveIncomingMessage() = 0;
virtual kj::Promise<void> shutdown() = 0; virtual kj::Promise<void> shutdown() = 0;
virtual AnyStruct::Reader baseGetPeerVatId() = 0; virtual AnyStruct::Reader baseGetPeerVatId() = 0;
virtual kj::Own<RpcFlowController> newStream() = 0;
}; };
virtual kj::Maybe<kj::Own<Connection>> baseConnect(AnyStruct::Reader vatId) = 0; virtual kj::Maybe<kj::Own<Connection>> baseConnect(AnyStruct::Reader vatId) = 0;
virtual kj::Promise<kj::Own<Connection>> baseAccept() = 0; virtual kj::Promise<kj::Own<Connection>> baseAccept() = 0;
......
...@@ -32,6 +32,7 @@ ...@@ -32,6 +32,7 @@
#include <kj/thread.h> #include <kj/thread.h>
#include <kj/compat/gtest.h> #include <kj/compat/gtest.h>
#include <kj/miniposix.h> #include <kj/miniposix.h>
#include <sys/socket.h>
// TODO(cleanup): Auto-generate stringification functions for union discriminants. // TODO(cleanup): Auto-generate stringification functions for union discriminants.
namespace capnp { namespace capnp {
...@@ -522,6 +523,122 @@ KJ_TEST("FD per message limit") { ...@@ -522,6 +523,122 @@ KJ_TEST("FD per message limit") {
} }
#endif // !_WIN32 && !__CYGWIN__ #endif // !_WIN32 && !__CYGWIN__
// =======================================================================================
class MockSndbufStream final: public kj::AsyncIoStream {
public:
MockSndbufStream(kj::Own<AsyncIoStream> inner, size_t& window, size_t& written)
: inner(kj::mv(inner)), window(window), written(written) {}
kj::Promise<size_t> read(void* buffer, size_t minBytes, size_t maxBytes) override {
return inner->read(buffer, minBytes, maxBytes);
}
kj::Promise<size_t> tryRead(void* buffer, size_t minBytes, size_t maxBytes) override {
return inner->tryRead(buffer, minBytes, maxBytes);
}
kj::Maybe<uint64_t> tryGetLength() override {
return inner->tryGetLength();
}
kj::Promise<uint64_t> pumpTo(AsyncOutputStream& output, uint64_t amount) override {
return inner->pumpTo(output, amount);
}
kj::Promise<void> write(const void* buffer, size_t size) override {
written += size;
return inner->write(buffer, size);
}
kj::Promise<void> write(kj::ArrayPtr<const kj::ArrayPtr<const byte>> pieces) override {
for (auto& piece: pieces) written += piece.size();
return inner->write(pieces);
}
kj::Maybe<kj::Promise<uint64_t>> tryPumpFrom(
kj::AsyncInputStream& input, uint64_t amount) override {
return inner->tryPumpFrom(input, amount);
}
kj::Promise<void> whenWriteDisconnected() override { return inner->whenWriteDisconnected(); }
void shutdownWrite() override { return inner->shutdownWrite(); }
void abortRead() override { return inner->abortRead(); }
void getsockopt(int level, int option, void* value, uint* length) override {
if (level == SOL_SOCKET && option == SO_SNDBUF) {
KJ_ASSERT(*length == sizeof(int));
*reinterpret_cast<int*>(value) = window;
} else {
KJ_UNIMPLEMENTED("not implemented for test", level, option);
}
}
private:
kj::Own<AsyncIoStream> inner;
size_t& window;
size_t& written;
};
KJ_TEST("Streaming over RPC") {
kj::EventLoop loop;
kj::WaitScope waitScope(loop);
auto pipe = kj::newTwoWayPipe();
size_t window = 1024;
size_t clientWritten = 0;
size_t serverWritten = 0;
pipe.ends[0] = kj::heap<MockSndbufStream>(kj::mv(pipe.ends[0]), window, clientWritten);
pipe.ends[1] = kj::heap<MockSndbufStream>(kj::mv(pipe.ends[1]), window, serverWritten);
auto ownServer = kj::heap<TestStreamingImpl>();
auto& server = *ownServer;
test::TestStreaming::Client serverCap(kj::mv(ownServer));
TwoPartyClient tpClient(*pipe.ends[0]);
TwoPartyClient tpServer(*pipe.ends[1], serverCap, rpc::twoparty::Side::SERVER);
auto cap = tpClient.bootstrap().castAs<test::TestStreaming>();
// Send stream requests until we can't anymore.
kj::Promise<void> promise = kj::READY_NOW;
uint count = 0;
while (promise.poll(waitScope)) {
promise.wait(waitScope);
auto req = cap.doStreamIRequest();
req.setI(++count);
promise = req.send();
}
// We should have sent... several.
KJ_EXPECT(count > 5);
// Now, cause calls to finish server-side one-at-a-time and check that this causes the client
// side to be willing to send more.
uint countReceived = 0;
for (uint i = 0; i < 50; i++) {
KJ_EXPECT(server.iSum == ++countReceived);
server.iSum = 0;
KJ_ASSERT_NONNULL(server.fulfiller)->fulfill();
KJ_ASSERT(promise.poll(waitScope));
promise.wait(waitScope);
auto req = cap.doStreamIRequest();
req.setI(++count);
promise = req.send();
if (promise.poll(waitScope)) {
// We'll see a couple of instances where completing one request frees up space to make two
// more. This is because the first few requests we made are a little bit larger than the
// rest due to being pipelined on the bootstrap. Once the bootstrap resolves, the request
// size gets smaller.
promise.wait(waitScope);
req = cap.doStreamIRequest();
req.setI(++count);
promise = req.send();
// We definitely shouldn't have freed up stream space for more than two additional requests!
KJ_ASSERT(!promise.poll(waitScope));
}
}
}
} // namespace } // namespace
} // namespace _ } // namespace _
} // namespace capnp } // namespace capnp
...@@ -23,6 +23,7 @@ ...@@ -23,6 +23,7 @@
#include "serialize-async.h" #include "serialize-async.h"
#include <kj/debug.h> #include <kj/debug.h>
#include <kj/io.h> #include <kj/io.h>
#include <sys/socket.h>
namespace capnp { namespace capnp {
...@@ -167,6 +168,60 @@ private: ...@@ -167,6 +168,60 @@ private:
kj::ArrayPtr<kj::AutoCloseFd> fds; kj::ArrayPtr<kj::AutoCloseFd> fds;
}; };
kj::Own<RpcFlowController> TwoPartyVatNetwork::newStream() {
return RpcFlowController::newVariableWindowController(*this);
}
size_t TwoPartyVatNetwork::getWindow() {
// The socket's send buffer size -- as returned by getsockopt(SO_SNDBUF) -- tells us how much
// data the kernel itself is willing to buffer. The kernel will increase the send buffer size if
// needed to fill the connection's congestion window. So we can cheat and use it as our stream
// window, too, to make sure we saturate said congestion window.
//
// TODO(perf): Unfortunately, this hack breaks down in the presence of proxying. What we really
// want is the window all the way to the endpoint, which could cross multiple connections. The
// first-hop window could be either too big or too small: it's too big if the first hop has
// much higher bandwidth than the full path (causing buffering at the bottleneck), and it's
// too small if the first hop has much lower latency than the full path (causing not enough
// data to be sent to saturate the connection). To handle this, we could either:
// 1. Have proxies be aware of streaming, by flagging streaming calls in the RPC protocol. The
// proxies would then handle backpressure at each hop. This seems simple to implement but
// requires base RPC protocol changes and might require thinking carefully about e-ordering
// implications. Also, it only fixes underutilization; it does not fix buffer bloat.
// 2. Do our own BBR-like computation, where the client measures the end-to-end latency and
// bandwidth based on the observed sends and returns, and then compute the window based on
// that. This seems complicated, but avoids the need for any changes to the RPC protocol.
// In theory it solves both underutilization and buffer bloat. Note that this approach would
// require the RPC system to use a clock, which feels dirty and adds non-determinism.
if (solSndbufUnimplemented) {
return RpcFlowController::DEFAULT_WINDOW_SIZE;
} else {
// TODO(perf): It might be nice to have a tryGetsockopt() that doesn't require catching
// exceptions?
int bufSize = 0;
KJ_IF_MAYBE(exception, kj::runCatchingExceptions([&]() {
socklen_t len = sizeof(int);
KJ_SWITCH_ONEOF(stream) {
KJ_CASE_ONEOF(s, kj::AsyncIoStream*) {
s->getsockopt(SOL_SOCKET, SO_SNDBUF, &bufSize, &len);
}
KJ_CASE_ONEOF(s, kj::AsyncCapabilityStream*) {
s->getsockopt(SOL_SOCKET, SO_SNDBUF, &bufSize, &len);
}
}
KJ_ASSERT(len == sizeof(bufSize));
})) {
if (exception->getType() != kj::Exception::Type::UNIMPLEMENTED) {
kj::throwRecoverableException(kj::mv(*exception));
}
solSndbufUnimplemented = true;
bufSize = RpcFlowController::DEFAULT_WINDOW_SIZE;
}
return bufSize;
}
}
rpc::twoparty::VatId::Reader TwoPartyVatNetwork::getPeerVatId() { rpc::twoparty::VatId::Reader TwoPartyVatNetwork::getPeerVatId() {
return peerVatId.getRoot<rpc::twoparty::VatId>(); return peerVatId.getRoot<rpc::twoparty::VatId>();
} }
......
...@@ -44,7 +44,8 @@ typedef VatNetwork<rpc::twoparty::VatId, rpc::twoparty::ProvisionId, ...@@ -44,7 +44,8 @@ typedef VatNetwork<rpc::twoparty::VatId, rpc::twoparty::ProvisionId,
TwoPartyVatNetworkBase; TwoPartyVatNetworkBase;
class TwoPartyVatNetwork: public TwoPartyVatNetworkBase, class TwoPartyVatNetwork: public TwoPartyVatNetworkBase,
private TwoPartyVatNetworkBase::Connection { private TwoPartyVatNetworkBase::Connection,
private RpcFlowController::WindowGetter {
// A `VatNetwork` that consists of exactly two parties communicating over an arbitrary byte // A `VatNetwork` that consists of exactly two parties communicating over an arbitrary byte
// stream. This is used to implement the common case of a client/server network. // stream. This is used to implement the common case of a client/server network.
// //
...@@ -91,6 +92,9 @@ private: ...@@ -91,6 +92,9 @@ private:
ReaderOptions receiveOptions; ReaderOptions receiveOptions;
bool accepted = false; bool accepted = false;
bool solSndbufUnimplemented = false;
// Whether stream.getsockopt(SO_SNDBUF) has been observed to throw UNIMPLEMENTED.
kj::Maybe<kj::Promise<void>> previousWrite; kj::Maybe<kj::Promise<void>> previousWrite;
// Resolves when the previous write completes. This effectively serves as the write queue. // Resolves when the previous write completes. This effectively serves as the write queue.
// Becomes null when shutdown() is called. // Becomes null when shutdown() is called.
...@@ -121,10 +125,15 @@ private: ...@@ -121,10 +125,15 @@ private:
// implements Connection ----------------------------------------------------- // implements Connection -----------------------------------------------------
kj::Own<RpcFlowController> newStream() override;
rpc::twoparty::VatId::Reader getPeerVatId() override; rpc::twoparty::VatId::Reader getPeerVatId() override;
kj::Own<OutgoingRpcMessage> newOutgoingMessage(uint firstSegmentWordSize) override; kj::Own<OutgoingRpcMessage> newOutgoingMessage(uint firstSegmentWordSize) override;
kj::Promise<kj::Maybe<kj::Own<IncomingRpcMessage>>> receiveIncomingMessage() override; kj::Promise<kj::Maybe<kj::Own<IncomingRpcMessage>>> receiveIncomingMessage() override;
kj::Promise<void> shutdown() override; kj::Promise<void> shutdown() override;
// implements WindowGetter ---------------------------------------------------
size_t getWindow() override;
}; };
class TwoPartyServer: private kj::TaskSet::ErrorHandler { class TwoPartyServer: private kj::TaskSet::ErrorHandler {
......
This diff is collapsed.
...@@ -343,6 +343,58 @@ public: ...@@ -343,6 +343,58 @@ public:
// implementations can compute the size more cheaply by summing segment sizes. // implementations can compute the size more cheaply by summing segment sizes.
}; };
class RpcFlowController {
// Tracks a particular RPC stream in order to implement a flow control algorithm.
public:
virtual kj::Promise<void> send(kj::Own<OutgoingRpcMessage> message, kj::Promise<void> ack) = 0;
// Like calling message->send(), but the promise resolves when it's a good time to send the
// next message.
//
// `ack` is a promise that resolves when the message has been acknowledged from the other side.
// In practice, `message` is typically a `Call` message and `ack` is a `Return`. Note that this
// means `ack` counts not only time to transmit the message but also time for the remote
// application to process the message. The flow controller is expected to apply backpressure if
// the remote application responds slowly. If `ack` rejects, then all outstanding and future
// sends will propagate the exception.
//
// Note that messages sent with this method must still be delivered in the same order as if they
// had been sent with `message->send()`; they cannot be delayed until later. This is important
// because the message may introduce state changes in the RPC system that later messages rely on,
// such as introducing a new Question ID that a later message may reference. Thus, the controller
// can only create backpressure by having the returned promise resolve slowly.
//
// Dropping the returned promise does not cancel the send. Once send() is called, there's no way
// to stop it.
virtual kj::Promise<void> waitAllAcked() = 0;
// Wait for all `ack`s previously passed to send() to finish. It is an error to call send() again
// after this.
// ---------------------------------------------------------------------------
// Common implementations.
static kj::Own<RpcFlowController> newFixedWindowController(size_t windowSize);
// Constructs a flow controller that implements a strict fixed window of the given size. In other
// words, the controller will throttle the stream when the total bytes in-flight exceeds the
// window.
class WindowGetter {
public:
virtual size_t getWindow() = 0;
};
static kj::Own<RpcFlowController> newVariableWindowController(WindowGetter& getter);
// Like newFixedWindowController(), but the window size is allowed to vary over time. Useful if
// you have a technique for estimating one good window size for the connection as a whole but not
// for individual streams. Keep in mind, though, that in situations where the other end of the
// connection is merely proxying capabilities from a variety of final destinations across a
// variety of networks, no single window will be appropriate for all streams.
static constexpr size_t DEFAULT_WINDOW_SIZE = 65536;
// The window size used by the default implementation of Connection::newStream().
};
template <typename VatId, typename ProvisionId, typename RecipientId, template <typename VatId, typename ProvisionId, typename RecipientId,
typename ThirdPartyCapId, typename JoinResult> typename ThirdPartyCapId, typename JoinResult>
class VatNetwork: public _::VatNetworkBase { class VatNetwork: public _::VatNetworkBase {
...@@ -387,6 +439,19 @@ public: ...@@ -387,6 +439,19 @@ public:
// connection is ready, so that the caller doesn't need to know the difference. // connection is ready, so that the caller doesn't need to know the difference.
public: public:
virtual kj::Own<RpcFlowController> newStream()
{ return RpcFlowController::newFixedWindowController(65536); }
// Construct a flow controller for a new stream on this connection. The controller can be
// passed into OutgoingRpcMessage::sendStreaming().
//
// The default implementation returns a dummy stream controller that just applies a fixed
// window of 64k to everything. This always works but may constrain throughput on networks
// where the bandwidth-delay product is high, while conversely providing too much buffer when
// the bandwidth-delay product is low.
//
// TODO(perf): We should introduce a flow controller implementation that uses a clock to
// measure RTT and bandwidth and dynamically update the window size, like BBR.
// Level 0 features ---------------------------------------------- // Level 0 features ----------------------------------------------
virtual typename VatId::Reader getPeerVatId() = 0; virtual typename VatId::Reader getPeerVatId() = 0;
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment