http.h 36.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
// Copyright (c) 2017 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

22
#pragma once
23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
// The KJ HTTP client/server library.
//
// This is a simple library which can be used to implement an HTTP client or server. Properties
// of this library include:
// - Uses KJ async framework.
// - Agnostic to transport layer -- you can provide your own.
// - Header parsing is zero-copy -- it results in strings that point directly into the buffer
//   received off the wire.
// - Application code which reads and writes headers refers to headers by symbolic names, not by
//   string literals, with lookups being array-index-based, not map-based. To make this possible,
//   the application announces what headers it cares about in advance, in order to assign numeric
//   values to them.
// - Methods are identified by an enum.

#include <kj/string.h>
#include <kj/vector.h>
#include <kj/memory.h>
#include <kj/one-of.h>
#include <kj/async-io.h>

namespace kj {

#define KJ_HTTP_FOR_EACH_METHOD(MACRO) \
  MACRO(GET) \
  MACRO(HEAD) \
  MACRO(POST) \
  MACRO(PUT) \
  MACRO(DELETE) \
  MACRO(PATCH) \
  MACRO(PURGE) \
  MACRO(OPTIONS) \
  MACRO(TRACE) \
  /* standard methods */ \
  /* */ \
  /* (CONNECT is intentionally omitted since it is handled specially in HttpHandler) */ \
  \
  MACRO(COPY) \
  MACRO(LOCK) \
  MACRO(MKCOL) \
  MACRO(MOVE) \
  MACRO(PROPFIND) \
  MACRO(PROPPATCH) \
  MACRO(SEARCH) \
  MACRO(UNLOCK) \
67
  MACRO(ACL) \
68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
  /* WebDAV */ \
  \
  MACRO(REPORT) \
  MACRO(MKACTIVITY) \
  MACRO(CHECKOUT) \
  MACRO(MERGE) \
  /* Subversion */ \
  \
  MACRO(MSEARCH) \
  MACRO(NOTIFY) \
  MACRO(SUBSCRIBE) \
  MACRO(UNSUBSCRIBE)
  /* UPnP */

enum class HttpMethod {
  // Enum of known HTTP methods.
  //
  // We use an enum rather than a string to allow for faster parsing and switching and to reduce
  // ambiguity.

#define DECLARE_METHOD(id) id,
KJ_HTTP_FOR_EACH_METHOD(DECLARE_METHOD)
90
#undef DECLARE_METHOD
91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122
};

kj::StringPtr KJ_STRINGIFY(HttpMethod method);
kj::Maybe<HttpMethod> tryParseHttpMethod(kj::StringPtr name);

class HttpHeaderTable;

class HttpHeaderId {
  // Identifies an HTTP header by numeric ID that indexes into an HttpHeaderTable.
  //
  // The KJ HTTP API prefers that headers be identified by these IDs for a few reasons:
  // - Integer lookups are much more efficient than string lookups.
  // - Case-insensitivity is awkward to deal with when const strings are being passed to the lookup
  //   method.
  // - Writing out strings less often means fewer typos.
  //
  // See HttpHeaderTable for usage hints.

public:
  HttpHeaderId() = default;

  inline bool operator==(const HttpHeaderId& other) const { return id == other.id; }
  inline bool operator!=(const HttpHeaderId& other) const { return id != other.id; }
  inline bool operator< (const HttpHeaderId& other) const { return id <  other.id; }
  inline bool operator> (const HttpHeaderId& other) const { return id >  other.id; }
  inline bool operator<=(const HttpHeaderId& other) const { return id <= other.id; }
  inline bool operator>=(const HttpHeaderId& other) const { return id >= other.id; }

  inline size_t hashCode() const { return id; }

  kj::StringPtr toString() const;

123
  void requireFrom(const HttpHeaderTable& table) const;
124 125 126 127 128
  // In debug mode, throws an exception if the HttpHeaderId is not from the given table.
  //
  // In opt mode, no-op.

#define KJ_HTTP_FOR_EACH_BUILTIN_HEADER(MACRO) \
129 130 131 132 133 134 135
  /* Headers that are always read-only. */ \
  MACRO(CONNECTION, "Connection") \
  MACRO(KEEP_ALIVE, "Keep-Alive") \
  MACRO(TE, "TE") \
  MACRO(TRAILER, "Trailer") \
  MACRO(UPGRADE, "Upgrade") \
  \
136 137 138 139
  /* Headers that are read-only except in the case of a response to a HEAD request. */ \
  MACRO(CONTENT_LENGTH, "Content-Length") \
  MACRO(TRANSFER_ENCODING, "Transfer-Encoding") \
  \
140 141 142 143 144 145 146
  /* Headers that are read-only for WebSocket handshakes. */ \
  MACRO(SEC_WEBSOCKET_KEY, "Sec-WebSocket-Key") \
  MACRO(SEC_WEBSOCKET_VERSION, "Sec-WebSocket-Version") \
  MACRO(SEC_WEBSOCKET_ACCEPT, "Sec-WebSocket-Accept") \
  MACRO(SEC_WEBSOCKET_EXTENSIONS, "Sec-WebSocket-Extensions") \
  \
  /* Headers that you can write. */ \
147 148 149 150
  MACRO(HOST, "Host") \
  MACRO(DATE, "Date") \
  MACRO(LOCATION, "Location") \
  MACRO(CONTENT_TYPE, "Content-Type")
151
  // For convenience, these headers are valid for all HttpHeaderTables. You can refer to them like:
152 153 154
  //
  //     HttpHeaderId::HOST
  //
155
  // TODO(someday): Fill this out with more common headers.
156 157 158 159 160 161 162 163 164

#define DECLARE_HEADER(id, name) \
  static const HttpHeaderId id;
  // Declare a constant for each builtin header, e.g.: HttpHeaderId::CONNECTION

  KJ_HTTP_FOR_EACH_BUILTIN_HEADER(DECLARE_HEADER);
#undef DECLARE_HEADER

private:
165
  const HttpHeaderTable* table;
166 167
  uint id;

168 169
  inline explicit constexpr HttpHeaderId(const HttpHeaderTable* table, uint id)
      : table(table), id(id) {}
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207
  friend class HttpHeaderTable;
  friend class HttpHeaders;
};

class HttpHeaderTable {
  // Construct an HttpHeaderTable to declare which headers you'll be interested in later on, and
  // to manufacture IDs for them.
  //
  // Example:
  //
  //     // Build a header table with the headers we are interested in.
  //     kj::HttpHeaderTable::Builder builder;
  //     const HttpHeaderId accept = builder.add("Accept");
  //     const HttpHeaderId contentType = builder.add("Content-Type");
  //     kj::HttpHeaderTable table(kj::mv(builder));
  //
  //     // Create an HTTP client.
  //     auto client = kj::newHttpClient(table, network);
  //
  //     // Get http://example.com.
  //     HttpHeaders headers(table);
  //     headers.set(accept, "text/html");
  //     auto response = client->send(kj::HttpMethod::GET, "http://example.com", headers)
  //         .wait(waitScope);
  //     auto msg = kj::str("Response content type: ", response.headers.get(contentType));

  struct IdsByNameMap;

public:
  HttpHeaderTable();
  // Constructs a table that only contains the builtin headers.

  class Builder {
  public:
    Builder();
    HttpHeaderId add(kj::StringPtr name);
    Own<HttpHeaderTable> build();

208 209 210 211 212 213 214 215
    HttpHeaderTable& getFutureTable();
    // Get the still-unbuilt header table. You cannot actually use it until build() has been
    // called.
    //
    // This method exists to help when building a shared header table -- the Builder may be passed
    // to several components, each of which will register the headers they need and get a reference
    // to the future table.

216 217 218 219 220 221 222
  private:
    kj::Own<HttpHeaderTable> table;
  };

  KJ_DISALLOW_COPY(HttpHeaderTable);  // Can't copy because HttpHeaderId points to the table.
  ~HttpHeaderTable() noexcept(false);

223
  uint idCount() const;
224 225
  // Return the number of IDs in the table.

226
  kj::Maybe<HttpHeaderId> stringToId(kj::StringPtr name) const;
227 228 229 230 231
  // Try to find an ID for the given name. The matching is case-insensitive, per the HTTP spec.
  //
  // Note: if `name` contains characters that aren't allowed in HTTP header names, this may return
  //   a bogus value rather than null, due to optimizations used in case-insensitive matching.

232
  kj::StringPtr idToString(HttpHeaderId id) const;
233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
  // Get the canonical string name for the given ID.

private:
  kj::Vector<kj::StringPtr> namesById;
  kj::Own<IdsByNameMap> idsByName;
};

class HttpHeaders {
  // Represents a set of HTTP headers.
  //
  // This class guards against basic HTTP header injection attacks: Trying to set a header name or
  // value containing a newline, carriage return, or other invalid character will throw an
  // exception.

public:
248
  explicit HttpHeaders(const HttpHeaderTable& table);
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264

  KJ_DISALLOW_COPY(HttpHeaders);
  HttpHeaders(HttpHeaders&&) = default;
  HttpHeaders& operator=(HttpHeaders&&) = default;

  void clear();
  // Clears all contents, as if the object was freshly-allocated. However, calling this rather
  // than actually re-allocating the object may avoid re-allocation of internal objects.

  HttpHeaders clone() const;
  // Creates a deep clone of the HttpHeaders. The returned object owns all strings it references.

  HttpHeaders cloneShallow() const;
  // Creates a shallow clone of the HttpHeaders. The returned object references the same strings
  // as the original, owning none of them.

265 266 267 268 269 270 271
  bool isWebSocket() const;
  // Convenience method that checks for the presence of the header `Upgrade: websocket`.
  //
  // Note that this does not actually validate that the request is a complete WebSocket handshake
  // with the correct version number -- such validation will occur if and when you call
  // acceptWebSocket().

272 273 274 275
  kj::Maybe<kj::StringPtr> get(HttpHeaderId id) const;
  // Read a header.

  template <typename Func>
276
  void forEach(Func&& func) const;
277 278 279 280
  // Calls `func(name, value)` for each header in the set -- including headers that aren't mapped
  // to IDs in the header table. Both inputs are of type kj::StringPtr.

  void set(HttpHeaderId id, kj::StringPtr value);
281
  void set(HttpHeaderId id, kj::String&& value);
282 283
  // Sets a header value, overwriting the existing value.
  //
284 285
  // The String&& version is equivalent to calling the other version followed by takeOwnership().
  //
286 287 288 289 290
  // WARNING: It is the caller's responsibility to ensure that `value` remains valid until the
  //   HttpHeaders object is destroyed. This allows string literals to be passed without making a
  //   copy, but complicates the use of dynamic values. Hint: Consider using `takeOwnership()`.

  void add(kj::StringPtr name, kj::StringPtr value);
291 292
  void add(kj::StringPtr name, kj::String&& value);
  void add(kj::String&& name, kj::String&& value);
293 294 295
  // Append a header. `name` will be looked up in the header table, but if it's not mapped, the
  // header will be added to the list of unmapped headers.
  //
296 297
  // The String&& versions are equivalent to calling the other version followed by takeOwnership().
  //
298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334
  // WARNING: It is the caller's responsibility to ensure that `name` and `value` remain valid
  //   until the HttpHeaders object is destroyed. This allows string literals to be passed without
  //   making a copy, but complicates the use of dynamic values. Hint: Consider using
  //   `takeOwnership()`.

  void unset(HttpHeaderId id);
  // Removes a header.
  //
  // It's not possible to remove a header by string name because non-indexed headers would take
  // O(n) time to remove. Instead, construct a new HttpHeaders object and copy contents.

  void takeOwnership(kj::String&& string);
  void takeOwnership(kj::Array<char>&& chars);
  void takeOwnership(HttpHeaders&& otherHeaders);
  // Takes overship of a string so that it lives until the HttpHeaders object is destroyed. Useful
  // when you've passed a dynamic value to set() or add() or parse*().

  struct Request {
    HttpMethod method;
    kj::StringPtr url;
  };
  struct Response {
    uint statusCode;
    kj::StringPtr statusText;
  };

  kj::Maybe<Request> tryParseRequest(kj::ArrayPtr<char> content);
  kj::Maybe<Response> tryParseResponse(kj::ArrayPtr<char> content);
  // Parse an HTTP header blob and add all the headers to this object.
  //
  // `content` should be all text from the start of the request to the first occurrance of two
  // newlines in a row -- including the first of these two newlines, but excluding the second.
  //
  // The parse is performed with zero copies: The callee clobbers `content` with '\0' characters
  // to split it into a bunch of shorter strings. The caller must keep `content` valid until the
  // `HttpHeaders` is destroyed, or pass it to `takeOwnership()`.

335 336 337
  bool tryParse(kj::ArrayPtr<char> content);
  // Like tryParseRequest()/tryParseResponse(), but don't expect any request/response line.

338
  kj::String serializeRequest(HttpMethod method, kj::StringPtr url,
339
                              kj::ArrayPtr<const kj::StringPtr> connectionHeaders = nullptr) const;
340
  kj::String serializeResponse(uint statusCode, kj::StringPtr statusText,
341
                               kj::ArrayPtr<const kj::StringPtr> connectionHeaders = nullptr) const;
342 343 344
  // **Most applications will not use these methods; they are called by the HTTP client and server
  // implementations.**
  //
345 346
  // Serialize the headers as a complete request or response blob. The blob uses '\r\n' newlines
  // and includes the double-newline to indicate the end of the headers.
347 348 349
  //
  // `connectionHeaders`, if provided, contains connection-level headers supplied by the HTTP
  // implementation, in the order specified by the KJ_HTTP_FOR_EACH_BUILTIN_HEADER macro. These
350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368
  // headers values override any corresponding header value in the HttpHeaders object. The
  // CONNECTION_HEADERS_COUNT constants below can help you construct this `connectionHeaders` array.

  enum class BuiltinIndicesEnum {
  #define HEADER_ID(id, name) id,
    KJ_HTTP_FOR_EACH_BUILTIN_HEADER(HEADER_ID)
  #undef HEADER_ID
  };

  struct BuiltinIndices {
  #define HEADER_ID(id, name) static constexpr uint id = static_cast<uint>(BuiltinIndicesEnum::id);
    KJ_HTTP_FOR_EACH_BUILTIN_HEADER(HEADER_ID)
  #undef HEADER_ID
  };

  static constexpr uint HEAD_RESPONSE_CONNECTION_HEADERS_COUNT = BuiltinIndices::CONTENT_LENGTH;
  static constexpr uint CONNECTION_HEADERS_COUNT = BuiltinIndices::SEC_WEBSOCKET_KEY;
  static constexpr uint WEBSOCKET_CONNECTION_HEADERS_COUNT = BuiltinIndices::HOST;
  // Constants for use with HttpHeaders::serialize*().
369 370 371 372

  kj::String toString() const;

private:
373
  const HttpHeaderTable* table;
374 375 376 377 378 379 380 381 382 383 384 385

  kj::Array<kj::StringPtr> indexedHeaders;
  // Size is always table->idCount().

  struct Header {
    kj::StringPtr name;
    kj::StringPtr value;
  };
  kj::Vector<Header> unindexedHeaders;

  kj::Vector<kj::Array<char>> ownedStrings;

386
  void addNoCheck(kj::StringPtr name, kj::StringPtr value);
387 388 389 390 391 392

  kj::StringPtr cloneToOwn(kj::StringPtr str);

  kj::String serialize(kj::ArrayPtr<const char> word1,
                       kj::ArrayPtr<const char> word2,
                       kj::ArrayPtr<const char> word3,
393
                       kj::ArrayPtr<const kj::StringPtr> connectionHeaders) const;
394

395
  bool parseHeaders(char* ptr, char* end);
396 397 398 399 400 401

  // TODO(perf): Arguably we should store a map, but header sets are never very long
  // TODO(perf): We could optimize for common headers by storing them directly as fields. We could
  //   also add direct accessors for those headers.
};

402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449 450 451 452 453
class HttpInputStream {
  // Low-level interface to receive HTTP-formatted messages (headers followed by body) from an
  // input stream, without a paired output stream.
  //
  // Most applications will not use this. Regular HTTP clients and servers don't need this. This
  // is mainly useful for apps implementing various protocols that look like HTTP but aren't
  // really.

public:
  struct Request {
    HttpMethod method;
    kj::StringPtr url;
    const HttpHeaders& headers;
    kj::Own<kj::AsyncInputStream> body;
  };
  virtual kj::Promise<Request> readRequest() = 0;
  // Reads one HTTP request from the input stream.
  //
  // The returned struct contains pointers directly into a buffer that is invalidated on the next
  // message read.

  struct Response {
    uint statusCode;
    kj::StringPtr statusText;
    const HttpHeaders& headers;
    kj::Own<kj::AsyncInputStream> body;
  };
  virtual kj::Promise<Response> readResponse(HttpMethod requestMethod) = 0;
  // Reads one HTTP response from the input stream.
  //
  // You must provide the request method because responses to HEAD requests require special
  // treatment.
  //
  // The returned struct contains pointers directly into a buffer that is invalidated on the next
  // message read.

  struct Message {
    const HttpHeaders& headers;
    kj::Own<kj::AsyncInputStream> body;
  };
  virtual kj::Promise<Message> readMessage() = 0;
  // Reads an HTTP header set followed by a body, with no request or response line. This is not
  // useful for HTTP but may be useful for other protocols that make the unfortunate choice to
  // mimic HTTP message format, such as Visual Studio Code's JSON-RPC transport.
  //
  // The returned struct contains pointers directly into a buffer that is invalidated on the next
  // message read.

  virtual kj::Promise<bool> awaitNextMessage() = 0;
  // Waits until more data is available, but doesn't consume it. Returns false on EOF.
};

454 455 456 457 458 459 460 461 462 463
class EntropySource {
  // Interface for an object that generates entropy. Typically, cryptographically-random entropy
  // is expected.
  //
  // TODO(cleanup): Put this somewhere more general.

public:
  virtual void generate(kj::ArrayPtr<byte> buffer) = 0;
};

464
class WebSocket {
465 466 467 468 469 470
  // Interface representincg an open WebSocket session.
  //
  // Each side can send and receive data and "close" messages.
  //
  // Ping/Pong and message fragmentation are not exposed through this interface. These features of
  // the underlying WebSocket protocol are not exposed by the browser-level Javascript API either,
471
  // and thus applications typically need to implement these features at the application protocol
472 473
  // level instead. The implementation is, however, expected to reply to Ping messages it receives.

474
public:
475 476 477 478 479 480 481 482 483 484 485 486
  virtual kj::Promise<void> send(kj::ArrayPtr<const byte> message) = 0;
  virtual kj::Promise<void> send(kj::ArrayPtr<const char> message) = 0;
  // Send a message (binary or text). The underlying buffer must remain valid, and you must not
  // call send() again, until the returned promise resolves.

  virtual kj::Promise<void> close(uint16_t code, kj::StringPtr reason) = 0;
  // Send a Close message.
  //
  // Note that the returned Promise resolves once the message has been sent -- it does NOT wait
  // for the other end to send a Close reply. The application should await a reply before dropping
  // the WebSocket object.

487 488 489 490 491
  virtual kj::Promise<void> disconnect() = 0;
  // Sends EOF on the underlying connection without sending a "close" message. This is NOT a clean
  // shutdown, but is sometimes useful when you want the other end to trigger whatever behavior
  // it normally triggers when a connection is dropped.

492 493 494 495 496 497 498 499 500
  struct Close {
    uint16_t code;
    kj::String reason;
  };

  typedef kj::OneOf<kj::String, kj::Array<byte>, Close> Message;

  virtual kj::Promise<Message> receive() = 0;
  // Read one message from the WebSocket and return it. Can only call once at a time. Do not call
501
  // again after Close is received.
502

503
  virtual kj::Promise<void> pumpTo(WebSocket& other);
504 505 506 507 508 509 510
  // Continuously receives messages from this WebSocket and send them to `other`.
  //
  // On EOF, calls other.disconnect(), then resolves.
  //
  // On other read errors, calls other.close() with the error, then resolves.
  //
  // On write error, rejects with the error.
511 512 513 514 515 516

  virtual kj::Maybe<kj::Promise<void>> tryPumpFrom(WebSocket& other);
  // Either returns null, or performs the equivalent of other.pumpTo(*this). Only returns non-null
  // if this WebSocket implementation is able to perform the pump in an optimized way, better than
  // the default implementation of pumpTo(). The default implementation of pumpTo() always tries
  // calling this first, and the default implementation of tryPumpFrom() always returns null.
517 518 519 520 521 522 523 524 525 526 527 528 529 530 531
};

class HttpClient {
  // Interface to the client end of an HTTP connection.
  //
  // There are two kinds of clients:
  // * Host clients are used when talking to a specific host. The `url` specified in a request
  //   is actually just a path. (A `Host` header is still required in all requests.)
  // * Proxy clients are used when the target could be any arbitrary host on the internet.
  //   The `url` specified in a request is a full URL including protocol and hostname.

public:
  struct Response {
    uint statusCode;
    kj::StringPtr statusText;
532
    const HttpHeaders* headers;
533
    kj::Own<kj::AsyncInputStream> body;
534
    // `statusText` and `headers` remain valid until `body` is dropped or read from.
535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554
  };

  struct Request {
    kj::Own<kj::AsyncOutputStream> body;
    // Write the request entity body to this stream, then drop it when done.
    //
    // May be null for GET and HEAD requests (which have no body) and requests that have
    // Content-Length: 0.

    kj::Promise<Response> response;
    // Promise for the eventual respnose.
  };

  virtual Request request(HttpMethod method, kj::StringPtr url, const HttpHeaders& headers,
                          kj::Maybe<uint64_t> expectedBodySize = nullptr) = 0;
  // Perform an HTTP request.
  //
  // `url` may be a full URL (with protocol and host) or it may be only the path part of the URL,
  // depending on whether the client is a proxy client or a host client.
  //
555 556
  // `url` and `headers` need only remain valid until `request()` returns (they can be
  // stack-allocated).
557 558 559 560 561 562 563 564 565
  //
  // `expectedBodySize`, if provided, must be exactly the number of bytes that will be written to
  // the body. This will trigger use of the `Content-Length` connection header. Otherwise,
  // `Transfer-Encoding: chunked` will be used.

  struct WebSocketResponse {
    uint statusCode;
    kj::StringPtr statusText;
    const HttpHeaders* headers;
566
    kj::OneOf<kj::Own<kj::AsyncInputStream>, kj::Own<WebSocket>> webSocketOrBody;
567
    // `statusText` and `headers` remain valid until `webSocketOrBody` is dropped or read from.
568 569
  };
  virtual kj::Promise<WebSocketResponse> openWebSocket(
570
      kj::StringPtr url, const HttpHeaders& headers);
571 572
  // Tries to open a WebSocket. Default implementation calls send() and never returns a WebSocket.
  //
573 574
  // `url` and `headers` need only remain valid until `openWebSocket()` returns (they can be
  // stack-allocated).
575

576
  virtual kj::Promise<kj::Own<kj::AsyncIoStream>> connect(kj::StringPtr host);
577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600
  // Handles CONNECT requests. Only relevant for proxy clients. Default implementation throws
  // UNIMPLEMENTED.
};

class HttpService {
  // Interface which HTTP services should implement.
  //
  // This interface is functionally equivalent to HttpClient, but is intended for applications to
  // implement rather than call. The ergonomics and performance of the method signatures are
  // optimized for the serving end.
  //
  // As with clients, there are two kinds of services:
  // * Host services are used when talking to a specific host. The `url` specified in a request
  //   is actually just a path. (A `Host` header is still required in all requests, and the service
  //   may in fact serve multiple origins via this header.)
  // * Proxy services are used when the target could be any arbitrary host on the internet, i.e. to
  //   implement an HTTP proxy. The `url` specified in a request is a full URL including protocol
  //   and hostname.

public:
  class Response {
  public:
    virtual kj::Own<kj::AsyncOutputStream> send(
        uint statusCode, kj::StringPtr statusText, const HttpHeaders& headers,
601
        kj::Maybe<uint64_t> expectedBodySize = nullptr) = 0;
602 603 604 605
    // Begin the response.
    //
    // `statusText` and `headers` need only remain valid until send() returns (they can be
    // stack-allocated).
606

607 608 609
    virtual kj::Own<WebSocket> acceptWebSocket(const HttpHeaders& headers) = 0;
    // If headers.isWebSocket() is true then you can call acceptWebSocket() instead of send().

610 611 612 613 614 615 616 617 618 619
    kj::Promise<void> sendError(uint statusCode, kj::StringPtr statusText,
                                const HttpHeaders& headers);
    kj::Promise<void> sendError(uint statusCode, kj::StringPtr statusText,
                                const HttpHeaderTable& headerTable);
    // Convenience wrapper around send() which sends a basic error. A generic error page specifying
    // the error code is sent as the body.
    //
    // You must provide headers or a header table because downstream service wrappers may be
    // expecting response headers built with a particular table so that they can insert additional
    // headers.
620 621 622 623 624 625 626 627 628 629 630 631 632
  };

  virtual kj::Promise<void> request(
      HttpMethod method, kj::StringPtr url, const HttpHeaders& headers,
      kj::AsyncInputStream& requestBody, Response& response) = 0;
  // Perform an HTTP request.
  //
  // `url` may be a full URL (with protocol and host) or it may be only the path part of the URL,
  // depending on whether the service is a proxy service or a host service.
  //
  // `url` and `headers` are invalidated on the first read from `requestBody` or when the returned
  // promise resolves, whichever comes first.

633
  virtual kj::Promise<kj::Own<kj::AsyncIoStream>> connect(kj::StringPtr host);
634 635 636 637
  // Handles CONNECT requests. Only relevant for proxy services. Default implementation throws
  // UNIMPLEMENTED.
};

638 639 640
struct HttpClientSettings {
  kj::Duration idleTimout = 5 * kj::SECONDS;
  // For clients which automatically create new connections, any connection idle for at least this
641
  // long will be closed. Set this to 0 to prevent connection reuse entirely.
642 643 644 645 646 647

  kj::Maybe<EntropySource&> entropySource = nullptr;
  // Must be provided in order to use `openWebSocket`. If you don't need WebSockets, this can be
  // omitted. The WebSocket protocol uses random values to avoid triggering flaws (including
  // security flaws) in certain HTTP proxy software. Specifically, entropy is used to generate the
  // `Sec-WebSocket-Key` header and to generate frame masks. If you know that there are no broken
648
  // or vulnerable proxies between you and the server, you can provide a dummy entropy source that
649 650 651 652 653 654 655 656 657 658 659 660 661 662
  // doesn't generate real entropy (e.g. returning the same value every time). Otherwise, you must
  // provide a cryptographically-random entropy source.
};

kj::Own<HttpClient> newHttpClient(kj::Timer& timer, HttpHeaderTable& responseHeaderTable,
                                  kj::Network& network, kj::Maybe<kj::Network&> tlsNetwork,
                                  HttpClientSettings settings = HttpClientSettings());
// Creates a proxy HttpClient that connects to hosts over the given network. The URL must always
// be an absolute URL; the host is parsed from the URL. This implementation will automatically
// add an appropriate Host header (and convert the URL to just a path) once it has connected.
//
// Note that if you wish to route traffic through an HTTP proxy server rather than connect to
// remote hosts directly, you should use the form of newHttpClient() that takes a NetworkAddress,
// and supply the proxy's address.
663 664 665
//
// `responseHeaderTable` is used when parsing HTTP responses. Requests can use any header table.
//
666
// `tlsNetwork` is required to support HTTPS destination URLs. If null, only HTTP URLs can be
667
// fetched.
668 669 670 671 672 673 674 675 676 677 678 679

kj::Own<HttpClient> newHttpClient(kj::Timer& timer, HttpHeaderTable& responseHeaderTable,
                                  kj::NetworkAddress& addr,
                                  HttpClientSettings settings = HttpClientSettings());
// Creates an HttpClient that always connects to the given address no matter what URL is requested.
// The client will open and close connections as needed. It will attempt to reuse connections for
// multiple requests but will not send a new request before the previous response on the same
// connection has completed, as doing so can result in head-of-line blocking issues. The client may
// be used as a proxy client or a host client depending on whether the peer is operating as
// a proxy. (Hint: This is the best kind of client to use when routing traffic through an HTTP
// proxy. `addr` should be the address of the proxy, and the proxy itself will resolve remote hosts
// based on the URLs passed to it.)
680
//
681
// `responseHeaderTable` is used when parsing HTTP responses. Requests can use any header table.
682 683

kj::Own<HttpClient> newHttpClient(HttpHeaderTable& responseHeaderTable, kj::AsyncIoStream& stream,
684
                                  HttpClientSettings settings = HttpClientSettings());
685 686 687 688 689 690 691 692 693
// Creates an HttpClient that speaks over the given pre-established connection. The client may
// be used as a proxy client or a host client depending on whether the peer is operating as
// a proxy.
//
// Note that since this client has only one stream to work with, it will try to pipeline all
// requests on this stream. If one request or response has an I/O failure, all subsequent requests
// fail as well. If the destination server chooses to close the connection after a response,
// subsequent requests will fail. If a response takes a long time, it blocks subsequent responses.
// If a WebSocket is opened successfully, all subsequent requests fail.
694

695 696 697 698
kj::Own<HttpClient> newHttpClient(HttpService& service);
kj::Own<HttpService> newHttpService(HttpClient& client);
// Adapts an HttpClient to an HttpService and vice versa.

699 700 701 702 703 704 705 706 707 708
kj::Own<HttpInputStream> newHttpInputStream(
    kj::AsyncInputStream& input, HttpHeaderTable& headerTable);
// Create an HttpInputStream on top of the given stream. Normally applications would not call this
// directly, but it can be useful for implementing protocols that aren't quite HTTP but use similar
// message delimiting.
//
// The HttpInputStream implementation does read-ahead buffering on `input`. Therefore, when the
// HttpInputStream is destroyed, some data read from `input` may be lost, so it's not possible to
// continue reading from `input` in a reliable way.

709
kj::Own<WebSocket> newWebSocket(kj::Own<kj::AsyncIoStream> stream,
710
                                kj::Maybe<EntropySource&> maskEntropySource);
711 712 713 714
// Create a new WebSocket on top of the given stream. It is assumed that the HTTP -> WebSocket
// upgrade handshake has already occurred (or is not needed), and messages can immediately be
// sent and received on the stream. Normally applications would not call this directly.
//
715
// `maskEntropySource` is used to generate cryptographically-random frame masks. If null, outgoing
716
// frames will not be masked. Servers are required NOT to mask their outgoing frames, but clients
717 718 719 720 721
// ARE required to do so. So, on the client side, you MUST specify an entropy source. The mask
// must be crytographically random if the data being sent on the WebSocket may be malicious. The
// purpose of the mask is to prevent badly-written HTTP proxies from interpreting "things that look
// like HTTP requests" in a message as being actual HTTP requests, which could result in cache
// poisoning. See RFC6455 section 10.3.
722

723 724 725 726 727 728
struct WebSocketPipe {
  kj::Own<WebSocket> ends[2];
};

WebSocketPipe newWebSocketPipe();
// Create a WebSocket pipe. Messages written to one end of the pipe will be readable from the other
729
// end. No buffering occurs -- a message send does not complete until a corresponding receive
730 731
// accepts the message.

732 733 734 735 736 737 738 739
struct HttpServerSettings {
  kj::Duration headerTimeout = 15 * kj::SECONDS;
  // After initial connection open, or after receiving the first byte of a pipelined request,
  // the client must send the complete request within this time.

  kj::Duration pipelineTimeout = 5 * kj::SECONDS;
  // After one request/response completes, we'll wait up to this long for a pipelined request to
  // arrive.
740 741 742 743 744 745 746 747

  kj::Duration canceledUploadGacePeriod = 1 * kj::SECONDS;
  size_t canceledUploadGraceBytes = 65536;
  // If the HttpService sends a response and returns without having read the entire request body,
  // then we have to decide whether to close the connection or wait for the client to finish the
  // request so that it can pipeline the next one. We'll give them a grace period defined by the
  // above two values -- if they hit either one, we'll close the socket, but if the request
  // completes, we'll let the connection stay open to handle more requests.
748 749
};

750
class HttpServer final: private kj::TaskSet::ErrorHandler {
751 752 753 754
  // Class which listens for requests on ports or connections and sends them to an HttpService.

public:
  typedef HttpServerSettings Settings;
755
  typedef kj::Function<kj::Own<HttpService>(kj::AsyncIoStream&)> HttpServiceFactory;
756 757 758 759 760 761 762

  HttpServer(kj::Timer& timer, HttpHeaderTable& requestHeaderTable, HttpService& service,
             Settings settings = Settings());
  // Set up an HttpServer that directs incoming connections to the given service. The service
  // may be a host service or a proxy service depending on whether you are intending to implement
  // an HTTP server or an HTTP proxy.

763 764 765 766 767 768
  HttpServer(kj::Timer& timer, HttpHeaderTable& requestHeaderTable,
             HttpServiceFactory serviceFactory, Settings settings = Settings());
  // Like the other constructor, but allows a new HttpService object to be used for each
  // connection, based on the connection object. This is particularly useful for capturing the
  // client's IP address and injecting it as a header.

769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787
  kj::Promise<void> drain();
  // Stop accepting new connections or new requests on existing connections. Finish any requests
  // that are already executing, then close the connections. Returns once no more requests are
  // in-flight.

  kj::Promise<void> listenHttp(kj::ConnectionReceiver& port);
  // Accepts HTTP connections on the given port and directs them to the handler.
  //
  // The returned promise never completes normally. It may throw if port.accept() throws. Dropping
  // the returned promise will cause the server to stop listening on the port, but already-open
  // connections will continue to be served. Destroy the whole HttpServer to cancel all I/O.

  kj::Promise<void> listenHttp(kj::Own<kj::AsyncIoStream> connection);
  // Reads HTTP requests from the given connection and directs them to the handler. A successful
  // completion of the promise indicates that all requests received on the connection resulted in
  // a complete response, and the client closed the connection gracefully or drain() was called.
  // The promise throws if an unparseable request is received or if some I/O error occurs. Dropping
  // the returned promise will cancel all I/O on the connection and cancel any in-flight requests.

788 789 790 791 792 793 794 795
  kj::Promise<bool> listenHttpCleanDrain(kj::AsyncIoStream& connection);
  // Like listenHttp(), but allows you to potentially drain the server without closing connections.
  // The returned promise resolves to `true` if the connection has been left in a state where a
  // new HttpServer could potentially accept further requests from it. If `false`, then the
  // connection is either in an inconsistent state or already completed a closing handshake; the
  // caller should close it without any further reads/writes. Note this only ever returns `true`
  // if you called `drain()` -- otherwise this server would keep handling the connection.

796 797 798 799 800
private:
  class Connection;

  kj::Timer& timer;
  HttpHeaderTable& requestHeaderTable;
801
  kj::OneOf<HttpService*, HttpServiceFactory> service;
802 803 804 805 806 807 808 809 810 811 812
  Settings settings;

  bool draining = false;
  kj::ForkedPromise<void> onDrain;
  kj::Own<kj::PromiseFulfiller<void>> drainFulfiller;

  uint connectionCount = 0;
  kj::Maybe<kj::Own<kj::PromiseFulfiller<void>>> zeroConnectionsFulfiller;

  kj::TaskSet tasks;

813 814
  HttpServer(kj::Timer& timer, HttpHeaderTable& requestHeaderTable,
             kj::OneOf<HttpService*, HttpServiceFactory> service,
815 816 817 818 819 820 821 822 823 824
             Settings settings, kj::PromiseFulfillerPair<void> paf);

  kj::Promise<void> listenLoop(kj::ConnectionReceiver& port);

  void taskFailed(kj::Exception&& exception) override;
};

// =======================================================================================
// inline implementation

825
inline void HttpHeaderId::requireFrom(const HttpHeaderTable& table) const {
826 827 828 829 830
  KJ_IREQUIRE(this->table == nullptr || this->table == &table,
      "the provided HttpHeaderId is from the wrong HttpHeaderTable");
}

inline kj::Own<HttpHeaderTable> HttpHeaderTable::Builder::build() { return kj::mv(table); }
831
inline HttpHeaderTable& HttpHeaderTable::Builder::getFutureTable() { return *table; }
832

833
inline uint HttpHeaderTable::idCount() const { return namesById.size(); }
834

835
inline kj::StringPtr HttpHeaderTable::idToString(HttpHeaderId id) const {
836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851
  id.requireFrom(*this);
  return namesById[id.id];
}

inline kj::Maybe<kj::StringPtr> HttpHeaders::get(HttpHeaderId id) const {
  id.requireFrom(*table);
  auto result = indexedHeaders[id.id];
  return result == nullptr ? kj::Maybe<kj::StringPtr>(nullptr) : result;
}

inline void HttpHeaders::unset(HttpHeaderId id) {
  id.requireFrom(*table);
  indexedHeaders[id.id] = nullptr;
}

template <typename Func>
852
inline void HttpHeaders::forEach(Func&& func) const {
853 854 855 856 857 858 859 860 861 862 863 864
  for (auto i: kj::indices(indexedHeaders)) {
    if (indexedHeaders[i] != nullptr) {
      func(table->idToString(HttpHeaderId(table, i)), indexedHeaders[i]);
    }
  }

  for (auto& header: unindexedHeaders) {
    func(header.name, header.value);
  }
}

}  // namespace kj