async-io.h 38.4 KB
Newer Older
Kenton Varda's avatar
Kenton Varda committed
1 2
// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
3
//
Kenton Varda's avatar
Kenton Varda committed
4 5 6 7 8 9
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
10
//
Kenton Varda's avatar
Kenton Varda committed
11 12
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
13
//
Kenton Varda's avatar
Kenton Varda committed
14 15 16 17 18 19 20
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
21

22
#pragma once
23

24 25 26 27
#if defined(__GNUC__) && !KJ_HEADER_WARNINGS
#pragma GCC system_header
#endif

28
#include "async.h"
29
#include "function.h"
30
#include "thread.h"
31
#include "timer.h"
32

33 34
struct sockaddr;

35 36
namespace kj {

37 38
#if _WIN32
class Win32EventPort;
Kenton Varda's avatar
Kenton Varda committed
39
class AutoCloseHandle;
40
#else
41
class UnixEventPort;
42 43
#endif

Kenton Varda's avatar
Kenton Varda committed
44
class AutoCloseFd;
45
class NetworkAddress;
46
class AsyncOutputStream;
47
class AsyncIoStream;
48 49 50

// =======================================================================================
// Streaming I/O
51

52
class AsyncInputStream {
53 54
  // Asynchronous equivalent of InputStream (from io.h).

55
public:
56
  virtual Promise<size_t> read(void* buffer, size_t minBytes, size_t maxBytes);
57 58
  virtual Promise<size_t> tryRead(void* buffer, size_t minBytes, size_t maxBytes) = 0;

59
  Promise<void> read(void* buffer, size_t bytes);
60

61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81
  virtual Maybe<uint64_t> tryGetLength();
  // Get the remaining number of bytes that will be produced by this stream, if known.
  //
  // This is used e.g. to fill in the Content-Length header of an HTTP message. If unknown, the
  // HTTP implementation may need to fall back to Transfer-Encoding: chunked.
  //
  // The default implementation always returns null.

  virtual Promise<uint64_t> pumpTo(
      AsyncOutputStream& output, uint64_t amount = kj::maxValue);
  // Read `amount` bytes from this stream (or to EOF) and write them to `output`, returning the
  // total bytes actually pumped (which is only less than `amount` if EOF was reached).
  //
  // Override this if your stream type knows how to pump itself to certain kinds of output
  // streams more efficiently than via the naive approach. You can use
  // kj::dynamicDowncastIfAvailable() to test for stream types you recognize, and if none match,
  // delegate to the default implementation.
  //
  // The default implementation first tries calling output.tryPumpFrom(), but if that fails, it
  // performs a naive pump by allocating a buffer and reading to it / writing from it in a loop.

82 83 84 85 86 87 88
  Promise<Array<byte>> readAllBytes(uint64_t limit = kj::maxValue);
  Promise<String> readAllText(uint64_t limit = kj::maxValue);
  // Read until EOF and return as one big byte array or string. Throw an exception if EOF is not
  // seen before reading `limit` bytes.
  //
  // To prevent runaway memory allocation, consider using a more conservative value for `limit` than
  // the default, particularly on untrusted data streams which may never see EOF.
89 90 91
};

class AsyncOutputStream {
92 93
  // Asynchronous equivalent of OutputStream (from io.h).

94
public:
95 96 97
  virtual Promise<void> write(const void* buffer, size_t size) KJ_WARN_UNUSED_RESULT = 0;
  virtual Promise<void> write(ArrayPtr<const ArrayPtr<const byte>> pieces)
      KJ_WARN_UNUSED_RESULT = 0;
98 99 100 101 102 103 104 105 106 107 108

  virtual Maybe<Promise<uint64_t>> tryPumpFrom(
      AsyncInputStream& input, uint64_t amount = kj::maxValue);
  // Implements double-dispatch for AsyncInputStream::pumpTo().
  //
  // This method should only be called from within an implementation of pumpTo().
  //
  // This method examines the type of `input` to find optimized ways to pump data from it to this
  // output stream. If it finds one, it performs the pump. Otherwise, it returns null.
  //
  // The default implementation always returns null.
109 110 111 112 113 114 115

  virtual Promise<void> whenWriteDisconnected() = 0;
  // Returns a promise that resolves when the stream has become disconnected such that new write()s
  // will fail with a DISCONNECTED exception. This is particularly useful, for example, to cancel
  // work early when it is detected that no one will receive the result.
  //
  // Note that not all streams are able to detect this condition without actually performing a
116 117 118 119
  // write(); such stream implementations may return a promise that never resolves. (In particular,
  // as of this writing, whenWriteDisconnected() is not implemented on Windows. Also, for TCP
  // streams, not all disconnects are detectable -- a power or network failure may lead the
  // connection to hang forever, or until configured socket options lead to a timeout.)
120 121 122
  //
  // Unlike most other asynchronous stream methods, it is safe to call whenWriteDisconnected()
  // multiple times without canceling the previous promises.
123 124 125
};

class AsyncIoStream: public AsyncInputStream, public AsyncOutputStream {
126 127
  // A combination input and output stream.

128
public:
129 130
  virtual void shutdownWrite() = 0;
  // Cleanly shut down just the write end of the stream, while keeping the read end open.
131

132 133 134 135
  virtual void abortRead() {}
  // Similar to shutdownWrite, but this will shut down the read end of the stream, and should only
  // be called when an error has occurred.

136 137 138 139 140
  virtual void getsockopt(int level, int option, void* value, uint* length);
  virtual void setsockopt(int level, int option, const void* value, uint length);
  // Corresponds to getsockopt() and setsockopt() syscalls. Will throw an "unimplemented" exception
  // if the stream is not a socket or the option is not appropriate for the socket type. The
  // default implementations always throw "unimplemented".
141 142 143 144 145 146 147 148 149 150

  virtual void getsockname(struct sockaddr* addr, uint* length);
  virtual void getpeername(struct sockaddr* addr, uint* length);
  // Corresponds to getsockname() and getpeername() syscalls. Will throw an "unimplemented"
  // exception if the stream is not a socket. The default implementations always throw
  // "unimplemented".
  //
  // Note that we don't provide methods that return NetworkAddress because it usually wouldn't
  // be useful. You can't connect() to or listen() on these addresses, obviously, because they are
  // ephemeral addresses for a single connection.
151 152
};

153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177
class AsyncCapabilityStream: public AsyncIoStream {
  // An AsyncIoStream that also allows sending and receiving new connections or other kinds of
  // capabilities, in addition to simple data.
  //
  // For correct functioning, a protocol must be designed such that the receiver knows when to
  // expect a capability transfer. The receiver must not read() when a capability is expected, and
  // must not receiveStream() when data is expected -- if it does, an exception may be thrown or
  // invalid data may be returned. This implies that data sent over an AsyncCapabilityStream must
  // be framed such that the receiver knows exactly how many bytes to read before receiving a
  // capability.
  //
  // On Unix, KJ provides an implementation based on Unix domain sockets and file descriptor
  // passing via SCM_RIGHTS. Due to the nature of SCM_RIGHTS, if the application accidentally
  // read()s when it should have called receiveStream(), it will observe a NUL byte in the data
  // and the capability will be discarded. Of course, an application should not depend on this
  // behavior; it should avoid read()ing through a capability.
  //
  // KJ does not provide any implementation of this type on Windows, as there's no obvious
  // implementation there. Handle passing on Windows requires at least one of the processes
  // involved to have permission to modify the other's handle table, which is effectively full
  // control. Handle passing between mutually non-trusting processes would require a trusted
  // broker process to facilitate. One could possibly implement this type in terms of such a
  // broker, or in terms of direct handle passing if at least one process trusts the other.

public:
178 179 180 181 182 183
  virtual Promise<void> writeWithFds(ArrayPtr<const byte> data,
                                     ArrayPtr<const ArrayPtr<const byte>> moreData,
                                     ArrayPtr<const int> fds) = 0;
  Promise<void> writeWithFds(ArrayPtr<const byte> data,
                             ArrayPtr<const ArrayPtr<const byte>> moreData,
                             ArrayPtr<const AutoCloseFd> fds);
Kenton Varda's avatar
Kenton Varda committed
184
  // Write some data to the stream with some file descriptors attached to it.
185 186 187 188 189 190 191 192 193 194 195 196 197
  //
  // The maximum number of FDs that can be sent at a time is usually subject to an OS-imposed
  // limit. On Linux, this is 253. In practice, sending more than a handful of FDs at once is
  // probably a bad idea.

  struct ReadResult {
    size_t byteCount;
    size_t capCount;
  };

  virtual Promise<ReadResult> tryReadWithFds(void* buffer, size_t minBytes, size_t maxBytes,
                                             AutoCloseFd* fdBuffer, size_t maxFds) = 0;
  // Read data from the stream that may have file descriptors attached. Any attached descriptors
198 199 200 201 202 203
  // will be placed in `fdBuffer`. If multiple bundles of FDs are encountered in the course of
  // reading the amount of data requested by minBytes/maxBytes, then they will be concatenated. If
  // more FDs are received than fit in the buffer, then the excess will be discarded and closed --
  // this behavior, while ugly, is important to defend against denial-of-service attacks that may
  // fill up the FD table with garbage. Applications must think carefully about how many FDs they
  // really need to receive at once and set a well-defined limit.
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219

  virtual Promise<void> writeWithStreams(ArrayPtr<const byte> data,
                                         ArrayPtr<const ArrayPtr<const byte>> moreData,
                                         Array<Own<AsyncCapabilityStream>> streams) = 0;
  virtual Promise<ReadResult> tryReadWithStreams(
      void* buffer, size_t minBytes, size_t maxBytes,
      Own<AsyncCapabilityStream>* streamBuffer, size_t maxStreams) = 0;
  // Like above, but passes AsyncCapabilityStream objects. The stream implementations must be from
  // the same AsyncIoProvider.

  // ---------------------------------------------------------------------------
  // Helpers for sending individual capabilities.
  //
  // These are equivalent to the above methods with the constraint that only one FD is
  // sent/received at a time and the corresponding data is a single zero-valued byte.

220
  Promise<Own<AsyncCapabilityStream>> receiveStream();
221 222 223
  Promise<Maybe<Own<AsyncCapabilityStream>>> tryReceiveStream();
  Promise<void> sendStream(Own<AsyncCapabilityStream> stream);
  // Transfer a single stream.
224 225

  Promise<AutoCloseFd> receiveFd();
226 227 228
  Promise<Maybe<AutoCloseFd>> tryReceiveFd();
  Promise<void> sendFd(int fd);
  // Transfer a single raw file descriptor.
229 230
};

231 232 233 234 235 236 237
struct OneWayPipe {
  // A data pipe with an input end and an output end.  (Typically backed by pipe() system call.)

  Own<AsyncInputStream> in;
  Own<AsyncOutputStream> out;
};

238 239 240 241 242 243 244
OneWayPipe newOneWayPipe(kj::Maybe<uint64_t> expectedLength = nullptr);
// Constructs a OneWayPipe that operates in-process. The pipe does not do any buffering -- it waits
// until both a read() and a write() call are pending, then resolves both.
//
// If `expectedLength` is non-null, then the pipe will be expected to transmit exactly that many
// bytes. The input end's `tryGetLength()` will return the number of bytes left.

245 246 247 248 249
struct TwoWayPipe {
  // A data pipe that supports sending in both directions.  Each end's output sends data to the
  // other end's input.  (Typically backed by socketpair() system call.)

  Own<AsyncIoStream> ends[2];
250 251
};

252 253 254 255
TwoWayPipe newTwoWayPipe();
// Constructs a TwoWayPipe that operates in-process. The pipe does not do any buffering -- it waits
// until both a read() and a write() call are pending, then resolves both.

256 257 258 259 260 261
struct CapabilityPipe {
  // Like TwoWayPipe but allowing capability-passing.

  Own<AsyncCapabilityStream> ends[2];
};

Harris Hancock's avatar
Harris Hancock committed
262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285
struct Tee {
  // Two AsyncInputStreams which each read the same data from some wrapped inner AsyncInputStream.

  Own<AsyncInputStream> branches[2];
};

Tee newTee(Own<AsyncInputStream> input, uint64_t limit = kj::maxValue);
// Constructs a Tee that operates in-process. The tee buffers data if any read or pump operations is
// called on one of the two input ends. If a read or pump operation is subsequently called on the
// other input end, the buffered data is consumed.
//
// `pumpTo()` operations on the input ends will proactively read from the inner stream and block
// while writing to the output stream. While one branch has an active `pumpTo()` operation, any
// `tryRead()` operation on the other branch will not be allowed to read faster than allowed by the
// pump's backpressure. (In other words, it will never cause buffering on the pump.) Similarly, if
// there are `pumpTo()` operations active on both branches, the greater of the two backpressures is
// respected -- the two pumps progress in lockstep, and there is no buffering.
//
// At no point will a branch's buffer be allowed to grow beyond `limit` bytes. If the buffer would
// grow beyond the limit, an exception is generated, which both branches see once they have
// exhausted their buffers.
//
// It is recommended that you use a more conservative value for `limit` than the default.

286
class ConnectionReceiver {
287 288
  // Represents a server socket listening on a port.

289 290
public:
  virtual Promise<Own<AsyncIoStream>> accept() = 0;
291
  // Accept the next incoming connection.
292 293 294

  virtual uint getPort() = 0;
  // Gets the port number, if applicable (i.e. if listening on IP).  This is useful if you didn't
295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403
  // specify a port when constructing the NetworkAddress -- one will have been assigned
  // automatically.

  virtual void getsockopt(int level, int option, void* value, uint* length);
  virtual void setsockopt(int level, int option, const void* value, uint length);
  // Same as the methods of AsyncIoStream.
};

// =======================================================================================
// Datagram I/O

class AncillaryMessage {
  // Represents an ancillary message (aka control message) received using the recvmsg() system
  // call (or equivalent). Most apps will not use this.

public:
  inline AncillaryMessage(int level, int type, ArrayPtr<const byte> data);
  AncillaryMessage() = default;

  inline int getLevel() const;
  // Originating protocol / socket level.

  inline int getType() const;
  // Protocol-specific message type.

  template <typename T>
  inline Maybe<const T&> as();
  // Interpret the ancillary message as the given struct type. Most ancillary messages are some
  // sort of struct, so this is a convenient way to access it. Returns nullptr if the message
  // is smaller than the struct -- this can happen if the message was truncated due to
  // insufficient ancillary buffer space.

  template <typename T>
  inline ArrayPtr<const T> asArray();
  // Interpret the ancillary message as an array of items. If the message size does not evenly
  // divide into elements of type T, the remainder is discarded -- this can happen if the message
  // was truncated due to insufficient ancillary buffer space.

private:
  int level;
  int type;
  ArrayPtr<const byte> data;
  // Message data. In most cases you should use `as()` or `asArray()`.
};

class DatagramReceiver {
  // Class encapsulating the recvmsg() system call. You must specify the DatagramReceiver's
  // capacity in advance; if a received packet is larger than the capacity, it will be truncated.

public:
  virtual Promise<void> receive() = 0;
  // Receive a new message, overwriting this object's content.
  //
  // receive() may reuse the same buffers for content and ancillary data with each call.

  template <typename T>
  struct MaybeTruncated {
    T value;

    bool isTruncated;
    // True if the Receiver's capacity was insufficient to receive the value and therefore the
    // value is truncated.
  };

  virtual MaybeTruncated<ArrayPtr<const byte>> getContent() = 0;
  // Get the content of the datagram.

  virtual MaybeTruncated<ArrayPtr<const AncillaryMessage>> getAncillary() = 0;
  // Ancilarry messages received with the datagram. See the recvmsg() system call and the cmsghdr
  // struct. Most apps don't need this.
  //
  // If the returned value is truncated, then the last message in the array may itself be
  // truncated, meaning its as<T>() method will return nullptr or its asArray<T>() method will
  // return fewer elements than expected. Truncation can also mean that additional messages were
  // available but discarded.

  virtual NetworkAddress& getSource() = 0;
  // Get the datagram sender's address.

  struct Capacity {
    size_t content = 8192;
    // How much space to allocate for the datagram content. If a datagram is received that is
    // larger than this, it will be truncated, with no way to recover the tail.

    size_t ancillary = 0;
    // How much space to allocate for ancillary messages. As with content, if the ancillary data
    // is larger than this, it will be truncated.
  };
};

class DatagramPort {
public:
  virtual Promise<size_t> send(const void* buffer, size_t size, NetworkAddress& destination) = 0;
  virtual Promise<size_t> send(ArrayPtr<const ArrayPtr<const byte>> pieces,
                               NetworkAddress& destination) = 0;

  virtual Own<DatagramReceiver> makeReceiver(
      DatagramReceiver::Capacity capacity = DatagramReceiver::Capacity()) = 0;
  // Create a new `Receiver` that can be used to receive datagrams. `capacity` specifies how much
  // space to allocate for the received message. The `DatagramPort` must outlive the `Receiver`.

  virtual uint getPort() = 0;
  // Gets the port number, if applicable (i.e. if listening on IP).  This is useful if you didn't
  // specify a port when constructing the NetworkAddress -- one will have been assigned
  // automatically.

  virtual void getsockopt(int level, int option, void* value, uint* length);
  virtual void setsockopt(int level, int option, const void* value, uint length);
  // Same as the methods of AsyncIoStream.
404 405
};

406 407 408
// =======================================================================================
// Networks

Kenton Varda's avatar
Kenton Varda committed
409
class NetworkAddress {
410 411 412 413
  // Represents a remote address to which the application can connect.

public:
  virtual Promise<Own<AsyncIoStream>> connect() = 0;
414
  // Make a new connection to this address.
Kenton Varda's avatar
Kenton Varda committed
415 416 417 418 419 420 421
  //
  // The address must not be a wildcard ("*").  If it is an IP address, it must have a port number.

  virtual Own<ConnectionReceiver> listen() = 0;
  // Listen for incoming connections on this address.
  //
  // The address must be local.
422

423 424 425 426 427 428 429 430
  virtual Own<DatagramPort> bindDatagramPort();
  // Open this address as a datagram (e.g. UDP) port.
  //
  // The address must be local.

  virtual Own<NetworkAddress> clone() = 0;
  // Returns an equivalent copy of this NetworkAddress.

431
  virtual String toString() = 0;
432
  // Produce a human-readable string which hopefully can be passed to Network::parseAddress()
433 434 435 436 437 438
  // to reproduce this address, although whether or not that works of course depends on the Network
  // implementation.  This should be called only to display the address to human users, who will
  // hopefully know what they are able to do with it.
};

class Network {
439 440
  // Factory for NetworkAddress instances, representing the network services offered by the
  // operating system.
441 442 443
  //
  // This interface typically represents broad authority, and well-designed code should limit its
  // use to high-level startup code and user interaction.  Low-level APIs should accept
444
  // NetworkAddress instances directly and work from there, if at all possible.
445 446

public:
Kenton Varda's avatar
Kenton Varda committed
447 448
  virtual Promise<Own<NetworkAddress>> parseAddress(StringPtr addr, uint portHint = 0) = 0;
  // Construct a network address from a user-provided string.  The format of the address
449 450 451 452 453 454
  // strings is not specified at the API level, and application code should make no assumptions
  // about them.  These strings should always be provided by humans, and said humans will know
  // what format to use in their particular context.
  //
  // `portHint`, if provided, specifies the "standard" IP port number for the application-level
  // service in play.  If the address turns out to be an IP address (v4 or v6), and it lacks a
Kenton Varda's avatar
Kenton Varda committed
455
  // port number, this port will be used.  If `addr` lacks a port number *and* `portHint` is
456 457
  // omitted, then the returned address will only support listen() and bindDatagramPort()
  // (not connect()), and an unused port will be chosen each time one of those methods is called.
458

Kenton Varda's avatar
Kenton Varda committed
459 460
  virtual Own<NetworkAddress> getSockaddr(const void* sockaddr, uint len) = 0;
  // Construct a network address from a legacy struct sockaddr.
461 462 463 464

  virtual Own<Network> restrictPeers(
      kj::ArrayPtr<const kj::StringPtr> allow,
      kj::ArrayPtr<const kj::StringPtr> deny = nullptr) KJ_WARN_UNUSED_RESULT = 0;
465
  // Constructs a new Network instance wrapping this one which restricts which peer addresses are
466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
  // permitted (both for outgoing and incoming connections).
  //
  // Communication will be allowed only with peers whose addresses match one of the patterns
  // specified in the `allow` array. If a `deny` array is specified, then any address which matches
  // a pattern in `deny` and *does not* match any more-specific pattern in `allow` will also be
  // denied.
  //
  // The syntax of address patterns depends on the network, except that three special patterns are
  // defined for all networks:
  // - "private": Matches network addresses that are reserved by standards for private networks,
  //   such as "10.0.0.0/8" or "192.168.0.0/16". This is a superset of "local".
  // - "public": Opposite of "private".
  // - "local": Matches network addresses that are defined by standards to only be accessible from
  //   the local machine, such as "127.0.0.0/8" or Unix domain addresses.
  // - "network": Opposite of "local".
  //
  // For the standard KJ network implementation, the following patterns are also recognized:
  // - Network blocks specified in CIDR notation (ipv4 and ipv6), such as "192.0.2.0/24" or
  //   "2001:db8::/32".
  // - "unix" to match all Unix domain addresses. (In the future, we may support specifying a
  //   glob.)
  // - "unix-abstract" to match Linux's "abstract unix domain" addresses. (In the future, we may
  //   support specifying a glob.)
  //
  // Network restrictions apply *after* DNS resolution (otherwise they'd be useless).
  //
  // It is legal to parseAddress() a restricted address. An exception won't be thrown until
  // connect() is called.
  //
  // It's possible to listen() on a restricted address. However, connections will only be accepted
  // from non-restricted addresses; others will be dropped. If a particular listen address has no
  // valid peers (e.g. because it's a unix socket address and unix sockets are not allowed) then
  // listen() may throw (or may simply never receive any connections).
  //
  // Examples:
  //
  //     auto restricted = network->restrictPeers({"public"});
  //
  // Allows connections only to/from public internet addresses. Use this when connecting to an
  // address specified by a third party that is not trusted and is not themselves already on your
  // private network.
  //
  //     auto restricted = network->restrictPeers({"private"});
  //
  // Allows connections only to/from the private network. Use this on the server side to reject
  // connections from the public internet.
  //
  //     auto restricted = network->restrictPeers({"192.0.2.0/24"}, {"192.0.2.3/32"});
  //
  // Allows connections only to/from 192.0.2.*, except 192.0.2.3 which is blocked.
  //
  //     auto restricted = network->restrictPeers({"10.0.0.0/8", "10.1.2.3/32"}, {"10.1.2.0/24"});
  //
  // Allows connections to/from 10.*.*.*, with the exception of 10.1.2.* (which is denied), with an
  // exception to the exception of 10.1.2.3 (which is allowed, because it is matched by an allow
  // rule that is more specific than the deny rule).
522 523
};

524 525
// =======================================================================================
// I/O Provider
526

527 528 529 530 531 532
class AsyncIoProvider {
  // Class which constructs asynchronous wrappers around the operating system's I/O facilities.
  //
  // Generally, the implementation of this interface must integrate closely with a particular
  // `EventLoop` implementation.  Typically, the EventLoop implementation itself will provide
  // an AsyncIoProvider.
533 534

public:
535 536 537
  virtual OneWayPipe newOneWayPipe() = 0;
  // Creates an input/output stream pair representing the ends of a one-way pipe (e.g. created with
  // the pipe(2) system call).
538

539 540 541
  virtual TwoWayPipe newTwoWayPipe() = 0;
  // Creates two AsyncIoStreams representing the two ends of a two-way pipe (e.g. created with
  // socketpair(2) system call).  Data written to one end can be read from the other.
542

543 544 545 546 547 548 549
  virtual CapabilityPipe newCapabilityPipe();
  // Creates two AsyncCapabilityStreams representing the two ends of a two-way capability pipe.
  //
  // The default implementation throws an unimplemented exception. In particular this is not
  // implemented by the default AsyncIoProvider on Windows, since Windows lacks any sane way to
  // pass handles over a stream.

550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566
  virtual Network& getNetwork() = 0;
  // Creates a new `Network` instance representing the networks exposed by the operating system.
  //
  // DO NOT CALL THIS except at the highest levels of your code, ideally in the main() function.  If
  // you call this from low-level code, then you are preventing higher-level code from injecting an
  // alternative implementation.  Instead, if your code needs to use network functionality, it
  // should ask for a `Network` as a constructor or method parameter, so that higher-level code can
  // chose what implementation to use.  The system network is essentially a singleton.  See:
  //     http://www.object-oriented-security.org/lets-argue/singletons
  //
  // Code that uses the system network should not make any assumptions about what kinds of
  // addresses it will parse, as this could differ across platforms.  String addresses should come
  // strictly from the user, who will know how to write them correctly for their system.
  //
  // With that said, KJ currently supports the following string address formats:
  // - IPv4: "1.2.3.4", "1.2.3.4:80"
  // - IPv6: "1234:5678::abcd", "[1234:5678::abcd]:80"
Kenton Varda's avatar
Kenton Varda committed
567 568
  // - Local IP wildcard (covers both v4 and v6):  "*", "*:80"
  // - Symbolic names:  "example.com", "example.com:80", "example.com:http", "1.2.3.4:http"
569 570
  // - Unix domain: "unix:/path/to/socket"

571 572 573 574 575 576 577 578 579 580 581 582
  struct PipeThread {
    // A combination of a thread and a two-way pipe that communicates with that thread.
    //
    // The fields are intentionally ordered so that the pipe will be destroyed (and therefore
    // disconnected) before the thread is destroyed (and therefore joined).  Thus if the thread
    // arranges to exit when it detects disconnect, destruction should be clean.

    Own<Thread> thread;
    Own<AsyncIoStream> pipe;
  };

  virtual PipeThread newPipeThread(
583
      Function<void(AsyncIoProvider&, AsyncIoStream&, WaitScope&)> startFunc) = 0;
584
  // Create a new thread and set up a two-way pipe (socketpair) which can be used to communicate
585
  // with it.  One end of the pipe is passed to the thread's start function and the other end of
586 587 588 589 590
  // the pipe is returned.  The new thread also gets its own `AsyncIoProvider` instance and will
  // already have an active `EventLoop` when `startFunc` is called.
  //
  // TODO(someday):  I'm not entirely comfortable with this interface.  It seems to be doing too
  //   much at once but I'm not sure how to cleanly break it down.
591 592

  virtual Timer& getTimer() = 0;
593 594 595 596 597 598
  // Returns a `Timer` based on real time.  Time does not pass while event handlers are running --
  // it only updates when the event loop polls for system events.  This means that calling `now()`
  // on this timer does not require a system call.
  //
  // This timer is not affected by changes to the system date.  It is unspecified whether the timer
  // continues to count while the system is suspended.
599
};
600

601 602 603 604 605 606 607 608
class LowLevelAsyncIoProvider {
  // Similar to `AsyncIoProvider`, but represents a lower-level interface that may differ on
  // different operating systems.  You should prefer to use `AsyncIoProvider` over this interface
  // whenever possible, as `AsyncIoProvider` is portable and friendlier to dependency-injection.
  //
  // On Unix, this interface can be used to import native file descriptors into the async framework.
  // Different implementations of this interface might work on top of different event handling
  // primitives, such as poll vs. epoll vs. kqueue vs. some higher-level event library.
609
  //
610
  // On Windows, this interface can be used to import native SOCKETs into the async framework.
611 612
  // Different implementations of this interface might work on top of different event handling
  // primitives, such as I/O completion ports vs. completion routines.
613

614 615 616 617 618 619 620 621 622 623 624
public:
  enum Flags {
    // Flags controlling how to wrap a file descriptor.

    TAKE_OWNERSHIP = 1 << 0,
    // The returned object should own the file descriptor, automatically closing it when destroyed.
    // The close-on-exec flag will be set on the descriptor if it is not already.
    //
    // If this flag is not used, then the file descriptor is not automatically closed and the
    // close-on-exec flag is not modified.

625
#if !_WIN32
626 627 628 629 630 631 632 633 634 635 636 637 638 639
    ALREADY_CLOEXEC = 1 << 1,
    // Indicates that the close-on-exec flag is known already to be set, so need not be set again.
    // Only relevant when combined with TAKE_OWNERSHIP.
    //
    // On Linux, all system calls which yield new file descriptors have flags or variants which
    // set the close-on-exec flag immediately.  Unfortunately, other OS's do not.

    ALREADY_NONBLOCK = 1 << 2
    // Indicates that the file descriptor is known already to be in non-blocking mode, so the flag
    // need not be set again.  Otherwise, all wrap*Fd() methods will enable non-blocking mode
    // automatically.
    //
    // On Linux, all system calls which yield new file descriptors have flags or variants which
    // enable non-blocking mode immediately.  Unfortunately, other OS's do not.
640
#endif
641 642
  };

643 644
#if _WIN32
  typedef uintptr_t Fd;
645
  typedef AutoCloseHandle OwnFd;
646 647 648 649 650
  // On Windows, the `fd` parameter to each of these methods must be a SOCKET, and must have the
  // flag WSA_FLAG_OVERLAPPED (which socket() uses by default, but WSASocket() wants you to specify
  // explicitly).
#else
  typedef int Fd;
651
  typedef AutoCloseFd OwnFd;
652 653 654 655
  // On Unix, any arbitrary file descriptor is supported.
#endif

  virtual Own<AsyncInputStream> wrapInputFd(Fd fd, uint flags = 0) = 0;
656 657
  // Create an AsyncInputStream wrapping a file descriptor.
  //
658
  // `flags` is a bitwise-OR of the values of the `Flags` enum.
659

660
  virtual Own<AsyncOutputStream> wrapOutputFd(Fd fd, uint flags = 0) = 0;
661 662
  // Create an AsyncOutputStream wrapping a file descriptor.
  //
663
  // `flags` is a bitwise-OR of the values of the `Flags` enum.
664

665
  virtual Own<AsyncIoStream> wrapSocketFd(Fd fd, uint flags = 0) = 0;
666 667
  // Create an AsyncIoStream wrapping a socket file descriptor.
  //
668 669
  // `flags` is a bitwise-OR of the values of the `Flags` enum.

670
#if !_WIN32
671
  virtual Own<AsyncCapabilityStream> wrapUnixSocketFd(Fd fd, uint flags = 0);
672 673
  // Like wrapSocketFd() but also support capability passing via SCM_RIGHTS. The socket must be
  // a Unix domain socket.
674 675 676
  //
  // The default implementation throws UNIMPLEMENTED, for backwards-compatibility with
  // LowLevelAsyncIoProvider implementations written before this method was added.
677 678
#endif

679 680
  virtual Promise<Own<AsyncIoStream>> wrapConnectingSocketFd(
      Fd fd, const struct sockaddr* addr, uint addrlen, uint flags = 0) = 0;
681 682
  // Create an AsyncIoStream wrapping a socket and initiate a connection to the given address.
  // The returned promise does not resolve until connection has completed.
683
  //
684
  // `flags` is a bitwise-OR of the values of the `Flags` enum.
685

686 687 688 689 690
  class NetworkFilter {
  public:
    virtual bool shouldAllow(const struct sockaddr* addr, uint addrlen) = 0;
    // Returns true if incoming connections or datagrams from the given peer should be accepted.
    // If false, they will be dropped. This is used to implement kj::Network::restrictPeers().
691 692

    static NetworkFilter& getAllAllowed();
693 694 695 696
  };

  virtual Own<ConnectionReceiver> wrapListenSocketFd(
      Fd fd, NetworkFilter& filter, uint flags = 0) = 0;
697 698 699
  inline Own<ConnectionReceiver> wrapListenSocketFd(Fd fd, uint flags = 0) {
    return wrapListenSocketFd(fd, NetworkFilter::getAllAllowed(), flags);
  }
700 701 702
  // Create an AsyncIoStream wrapping a listen socket file descriptor.  This socket should already
  // have had `bind()` and `listen()` called on it, so it's ready for `accept()`.
  //
703
  // `flags` is a bitwise-OR of the values of the `Flags` enum.
704

705
  virtual Own<DatagramPort> wrapDatagramSocketFd(Fd fd, NetworkFilter& filter, uint flags = 0);
706 707 708
  inline Own<DatagramPort> wrapDatagramSocketFd(Fd fd, uint flags = 0) {
    return wrapDatagramSocketFd(fd, NetworkFilter::getAllAllowed(), flags);
  }
709

710
  virtual Timer& getTimer() = 0;
711 712 713 714 715 716
  // Returns a `Timer` based on real time.  Time does not pass while event handlers are running --
  // it only updates when the event loop polls for system events.  This means that calling `now()`
  // on this timer does not require a system call.
  //
  // This timer is not affected by changes to the system date.  It is unspecified whether the timer
  // continues to count while the system is suspended.
717

718 719 720
  Own<AsyncInputStream> wrapInputFd(OwnFd&& fd, uint flags = 0);
  Own<AsyncOutputStream> wrapOutputFd(OwnFd&& fd, uint flags = 0);
  Own<AsyncIoStream> wrapSocketFd(OwnFd&& fd, uint flags = 0);
721
#if !_WIN32
722
  Own<AsyncCapabilityStream> wrapUnixSocketFd(OwnFd&& fd, uint flags = 0);
723 724
#endif
  Promise<Own<AsyncIoStream>> wrapConnectingSocketFd(
725
      OwnFd&& fd, const struct sockaddr* addr, uint addrlen, uint flags = 0);
726
  Own<ConnectionReceiver> wrapListenSocketFd(
727 728 729 730
      OwnFd&& fd, NetworkFilter& filter, uint flags = 0);
  Own<ConnectionReceiver> wrapListenSocketFd(OwnFd&& fd, uint flags = 0);
  Own<DatagramPort> wrapDatagramSocketFd(OwnFd&& fd, NetworkFilter& filter, uint flags = 0);
  Own<DatagramPort> wrapDatagramSocketFd(OwnFd&& fd, uint flags = 0);
731 732
  // Convenience wrappers which transfer ownership via AutoCloseFd (Unix) or AutoCloseHandle
  // (Windows). TAKE_OWNERSHIP will be implicitly added to `flags`.
733
};
734

735 736
Own<AsyncIoProvider> newAsyncIoProvider(LowLevelAsyncIoProvider& lowLevel);
// Make a new AsyncIoProvider wrapping a `LowLevelAsyncIoProvider`.
737

738 739 740
struct AsyncIoContext {
  Own<LowLevelAsyncIoProvider> lowLevelProvider;
  Own<AsyncIoProvider> provider;
741
  WaitScope& waitScope;
742

743 744 745
#if _WIN32
  Win32EventPort& win32EventPort;
#else
746 747 748
  UnixEventPort& unixEventPort;
  // TEMPORARY: Direct access to underlying UnixEventPort, mainly for waiting on signals. This
  //   field will go away at some point when we have a chance to improve these interfaces.
749
#endif
750
};
751

752
AsyncIoContext setupAsyncIo();
753
// Convenience method which sets up the current thread with everything it needs to do async I/O.
Kenton Varda's avatar
Kenton Varda committed
754
// The returned objects contain an `EventLoop` which is wrapping an appropriate `EventPort` for
755 756 757 758 759 760 761
// doing I/O on the host system, so everything is ready for the thread to start making async calls
// and waiting on promises.
//
// You would typically call this in your main() loop or in the start function of a thread.
// Example:
//
//     int main() {
Kenton Varda's avatar
Kenton Varda committed
762
//       auto ioContext = kj::setupAsyncIo();
763 764
//
//       // Now we can call an async function.
Kenton Varda's avatar
Kenton Varda committed
765
//       Promise<String> textPromise = getHttp(*ioContext.provider, "http://example.com");
766 767 768
//
//       // And we can wait for the promise to complete.  Note that you can only use `wait()`
//       // from the top level, not from inside a promise callback.
David Renshaw's avatar
David Renshaw committed
769
//       String text = textPromise.wait(ioContext.waitScope);
770 771 772
//       print(text);
//       return 0;
//     }
773 774 775 776 777 778
//
// WARNING: An AsyncIoContext can only be used in the thread and process that created it. In
//   particular, note that after a fork(), an AsyncIoContext created in the parent process will
//   not work correctly in the child, even if the parent ceases to use its copy. In particular
//   note that this means that server processes which daemonize themselves at startup must wait
//   until after daemonization to create an AsyncIoContext.
779

780 781 782
// =======================================================================================
// Convenience adapters.

783
class CapabilityStreamConnectionReceiver final: public ConnectionReceiver {
784 785 786 787 788 789 790 791 792 793 794 795 796 797
  // Trivial wrapper which allows an AsyncCapabilityStream to act as a ConnectionReceiver. accept()
  // calls receiveStream().

public:
  CapabilityStreamConnectionReceiver(AsyncCapabilityStream& inner)
      : inner(inner) {}

  Promise<Own<AsyncIoStream>> accept() override;
  uint getPort() override;

private:
  AsyncCapabilityStream& inner;
};

798
class CapabilityStreamNetworkAddress final: public NetworkAddress {
799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823
  // Trivial wrapper which allows an AsyncCapabilityStream to act as a NetworkAddress.
  //
  // connect() is implemented by calling provider.newCapabilityPipe(), sending one end over the
  // original capability stream, and returning the other end.
  //
  // listen().accept() is implemented by receiving new streams over the original stream.
  //
  // Note that clone() dosen't work (due to ownership issues) and toString() returns a static
  // string.

public:
  CapabilityStreamNetworkAddress(AsyncIoProvider& provider, AsyncCapabilityStream& inner)
      : provider(provider), inner(inner) {}

  Promise<Own<AsyncIoStream>> connect() override;
  Own<ConnectionReceiver> listen() override;

  Own<NetworkAddress> clone() override;
  String toString() override;

private:
  AsyncIoProvider& provider;
  AsyncCapabilityStream& inner;
};

824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847
// =======================================================================================
// inline implementation details

inline AncillaryMessage::AncillaryMessage(
    int level, int type, ArrayPtr<const byte> data)
    : level(level), type(type), data(data) {}

inline int AncillaryMessage::getLevel() const { return level; }
inline int AncillaryMessage::getType() const { return type; }

template <typename T>
inline Maybe<const T&> AncillaryMessage::as() {
  if (data.size() >= sizeof(T)) {
    return *reinterpret_cast<const T*>(data.begin());
  } else {
    return nullptr;
  }
}

template <typename T>
inline ArrayPtr<const T> AncillaryMessage::asArray() {
  return arrayPtr(reinterpret_cast<const T*>(data.begin()), data.size() / sizeof(T));
}

848
}  // namespace kj