filesystem.h 41.9 KB
Newer Older
Kenton Varda's avatar
Kenton Varda committed
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243
// Copyright (c) 2015 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
//
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
//
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
//
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.

#ifndef KJ_FILESYSTEM_H_
#define KJ_FILESYSTEM_H_

#include "memory.h"
#include "io.h"
#include <inttypes.h>
#include "time.h"  // TODO(now): problematic
#include "function.h"

namespace kj {

template <typename T>
class Vector;

class PathPtr;

class Path {
  // A Path identifies a file in a directory tree.
  //
  // In KJ, we avoid representing paths as plain strings because this can lead to path injection
  // bugs as well as numerous kinds of bugs relating to path parsing edge cases. The Path class's
  // interface is designed to "make it hard to screw up".
  //
  // A "Path" is in fact a list of strings, each string being one component of the path (as would
  // normally be separated by '/'s). Path components are not allowed to contain '/' nor '\0', nor
  // are they allowed to be the special names "", ".", nor "..".
  //
  // If you explicitly want to parse a path that contains '/'s, ".", and "..", you must use
  // parse() and/or eval(). However, users of this interface are encouraged to avoid parsing
  // paths at all, and instead express paths as string arrays.
  //
  // Note that when using the Path class, ".." is always canonicalized in path space without
  // consulting the actual filesystem. This means that "foo/some-symlink/../bar" is exactly
  // equivalent to "foo/bar". This differs from the kernel's behavior when resolving paths passed
  // to system calls: the kernel would have resolved "some-symlink" to its target physical path,
  // and then would have interpreted ".." relative to that. In practice, the kernel's behavior is
  // rarely what the user or programmer intended, hence canonicalizing in path space produces a
  // better result.
  //
  // Path objects are "immutable": functions that "modify" the path return a new path. However,
  // if the path being operated on is an rvalue, copying can be avoided. Hence it makes sense to
  // write code like:
  //
  //     Path p = ...;
  //     p = kj::mv(p).append("bar");  // in-place update, avoids string copying

public:
  Path(decltype(nullptr));  // empty path

  explicit Path(StringPtr name);
  explicit Path(String&& name);
  // Create a Path containing only one component. `name` is a single filename; it cannot contain
  // '/' nor '\0' nor can it be exactly "" nor "." nor "..".
  //
  // If you want to allow '/'s and such, you must call Path::parse(). We force you to do this to
  // prevent path injection bugs where you didn't consider what would happen if the path contained
  // a '/'.

  explicit Path(std::initializer_list<StringPtr> parts);
  explicit Path(ArrayPtr<const StringPtr> parts);
  explicit Path(Array<String> parts);
  // Construct a path from an array. Note that this means you can do:
  //
  //     Path{"foo", "bar", "baz"}   // equivalent to Path::parse("foo/bar/baz")

  KJ_DISALLOW_COPY(Path);
  Path(Path&&) = default;
  Path& operator=(Path&&) = default;

  Path clone() const;

  static Path parse(StringPtr path);
  // Parses a path in traditional format. Components are separated by '/'. Any use of "." or
  // ".." will be canonicalized (if they can't be canonicalized, e.g. because the path starts with
  // "..", an exception is thrown). Multiple consecutive '/'s will be collapsed. A leading '/'
  // is NOT accepted -- if that is a problem, you probably want `eval()`. Trailing '/'s are
  // ignored.

  Path append(Path suffix) const&;
  Path append(Path suffix) &&;
  Path append(PathPtr suffix) const&;
  Path append(PathPtr suffix) &&;
  Path append(StringPtr suffix) const&;
  Path append(StringPtr suffix) &&;
  Path append(String suffix) const&;
  Path append(String suffix) &&;
  // Create a new path by appending the given path to this path.
  //
  // `suffix` cannot contain '/' characters. Instead, you can append an array:
  //
  //     path.append({"foo", "bar"})
  //
  // Or, use Path::parse():
  //
  //     path.append(Path::parse("foo//baz/../bar"))

  Path eval(StringPtr pathText) const&;
  Path eval(StringPtr pathText) &&;
  // Evaluates a traditional path relative to this one. `pathText` is parsed like `parse()` would,
  // except that:
  // - It can contain leading ".." components that traverse up the tree.
  // - It can have a leading '/' which completely replaces the current path.
  //
  // THE NAME OF THIS METHOD WAS CHOSEN TO INSPIRE FEAR.
  //
  // Instead of using `path.eval(str)`, always consider whether you really want
  // `path.append(Path::parse(str))`. The former is much riskier than the latter in terms of path
  // injection vulnerabilities.

  PathPtr basename() const&;
  Path basename() &&;
  // Get the last component of the path. (Use `basename()[0]` to get just the string.)

  PathPtr parent() const&;
  Path parent() &&;
  // Get the parent path.

  String toString(bool absolute = false) const;
  // Converts the path to a traditional path string, appropriate to pass to a unix system call.
  // Never throws.

  const String& operator[](size_t i) const&;
  String operator[](size_t i) &&;
  size_t size() const;
  const String* begin() const;
  const String* end() const;
  PathPtr slice(size_t start, size_t end) const&;
  Path slice(size_t start, size_t end) &&;
  // A Path can be accessed as an array of strings.

  Path evalWin32(StringPtr pathText) const&;
  Path evalWin32(StringPtr pathText) &&;
  // Evaluates a Win32-style path. Differences from `eval()` include:
  //
  // - Backslashes can be used as path separators.
  // - Absolute paths begin with a drive letter followed by a colon. The drive letter, including
  //   the colon, will become the first component of the path, e.g. "c:\foo" becomes {"c:", "foo"}.
  // - A network path like "\\host\share\path" is parsed as {"host", "share", "path"}.

  String toWin32String(bool absolute = false) const;
  // Converts the path to a Win32 path string.
  //
  // (In most cases you'll want to further convert the returned string from UTF-8 to UTF-16.)
  //
  // If `absolute` is true, the path is expected to be an absolute path, meaning the first
  // component is a drive letter, namespace, or network host name. These are converted to their
  // regular Win32 format -- i.e. this method does the reverse of `evalWin32()`.
  //
  // This throws if the path would have unexpected special meaning or is otherwise invalid on
  // Windows, such as if it contains backslashes (within a path component), colons, or special
  // names like "con".

private:
  Array<String> parts;

  // TODO(perf): Consider unrolling one element from `parts`, so that a one-element path doesn't
  //   require allocation of an array.

  enum { ALREADY_CHECKED };
  Path(Array<String> parts, decltype(ALREADY_CHECKED));

  friend class PathPtr;

  static String stripNul(String input);
  static void validatePart(StringPtr part);
  static void evalPart(Vector<String>& parts, ArrayPtr<const char> part);
  static Path evalImpl(Vector<String>&& parts, StringPtr path);
  static Path evalWin32Impl(Vector<String>&& parts, StringPtr path);
  static size_t countParts(StringPtr path);
  static size_t countPartsWin32(StringPtr path);
  static bool isWin32Drive(ArrayPtr<const char> part);
  static bool isNetbiosName(ArrayPtr<const char> part);
  static bool isWin32Special(StringPtr part);
};

class PathPtr {
  // Points to a Path or a slice of a Path, but doesn't own it.
  //
  // PathPtr is to Path as ArrayPtr is to Array and StringPtr is to String.

public:
  PathPtr(decltype(nullptr));
  PathPtr(const Path& path);

  Path clone();
  Path append(Path suffix) const;
  Path append(PathPtr suffix) const;
  Path append(StringPtr suffix) const;
  Path append(String suffix) const;
  Path eval(StringPtr pathText) const;
  PathPtr basename() const;
  PathPtr parent() const;
  String toString(bool absolute = false) const;
  const String& operator[](size_t i) const;
  size_t size() const;
  const String* begin() const;
  const String* end() const;
  PathPtr slice(size_t start, size_t end) const;
  Path evalWin32(StringPtr pathText) const;
  String toWin32String(bool absolute = false) const;
  // Equivalent to the corresponding methods of `Path`.

private:
  ArrayPtr<const String> parts;

  explicit PathPtr(ArrayPtr<const String> parts);

  friend class Path;
};

// =======================================================================================
// The filesystem API
//
// This API is strictly synchronous because, unfortunately, there's no such thing as asynchronous
// filesystem access in practice. The filesystem drivers on Linux are written to assume they can
// block. The AIO API is only actually asynchronous for reading/writing the raw file blocks, but if
// the filesystem needs to be involved (to allocate blocks, update metadata, etc.) that will block.
// It's best to imagine that the filesystem is just another tier of memory that happens to be
// slower than RAM (which is slower than L3 cache, which is slower than L2, which is slower than
// L1). You can't do asynchronous RAM access so why asynchronous filesystem? The only way to
// parallelize these is using threads.

244 245 246
class FsNode {
  // Base class for filesystem node types.

Kenton Varda's avatar
Kenton Varda committed
247
public:
248
  Own<FsNode> clone();
Kenton Varda's avatar
Kenton Varda committed
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
  // Creates a new object of exactly the same type as this one, pointing at exactly the same
  // external object.
  //
  // Under the hood, this will call dup(), so the FD number will not be the same.

  virtual Maybe<int> getFd() = 0;
  // Get the underlying file descriptor, if any. Returns nullptr if this object actually isn't
  // wrapping a file descriptor.

  enum class Type {
    FILE,
    DIRECTORY,
    SYMLINK,
    BLOCK_DEVICE,
    CHARACTER_DEVICE,
    NAMED_PIPE,
    SOCKET,
    OTHER,
  };

  struct Metadata {
    Type type = Type::FILE;

    uint64_t size = 0;
    // Logical size of the file.

    uint64_t spaceUsed = 0;
    // Physical size of the file on disk. May be smaller for sparse files, or larger for
    // pre-allocated files.

    Date lastModified = UNIX_EPOCH;
    // Last modification time of the file.

    uint linkCount = 1;
    // Number of hard links pointing to this node.

    // Not currently included:
    // - Device / inode number: Rarely useful, and not safe to use in practice anyway.
    // - Access control info: Differs wildly across platforms, and KJ prefers capabilities anyway.
    // - Other timestamps: Differs across platforms.
    // - Device number: If you care, you're probably doing platform-specific stuff anyway.
290 291 292 293 294 295

    Metadata() = default;
    Metadata(Type type, uint64_t size, uint64_t spaceUsed, Date lastModified, uint linkCount)
        : type(type), size(size), spaceUsed(spaceUsed), lastModified(lastModified),
          linkCount(linkCount) {}
    // TODO(cleanup): This constructor is redundant in C++14, but needed in C++11.
Kenton Varda's avatar
Kenton Varda committed
296 297 298 299 300 301 302 303 304 305 306 307
  };

  virtual Metadata stat() = 0;

  virtual void sync() = 0;
  virtual void datasync() = 0;
  // Maps to fsync() and fdatasync() system calls.
  //
  // Also, when creating or overwriting a file, the first call to sync() atomically links the file
  // into the filesystem (*after* syncing the data), so than incomplete data is never visible to
  // other processes. (In practice this works by writing into a temporary file and then rename()ing
  // it.)
308 309 310 311 312

protected:
  virtual Own<FsNode> cloneFsNode() = 0;
  // Implements clone(). Required to return an object with exactly the same type as this one.
  // Hence, every subclass must implement this.
Kenton Varda's avatar
Kenton Varda committed
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434
};

class ReadableFile: public FsNode {
public:
  Own<ReadableFile> clone();

  String readAllText();
  // Read all text in the file and return as a big string.

  Array<byte> readAllBytes();
  // Read all bytes in the file and return as a big byte array.
  //
  // This differs from mmap() in that the read is performed all at once. Future changes to the file
  // do not affect the returned copy. Consider using mmap() instead, particularly for large files.

  virtual size_t read(uint64_t offset, ArrayPtr<byte> buffer) = 0;
  // Fills `buffer` with data starting at `offset`. Returns the number of bytes actually read --
  // the only time this is less than `buffer.size()` is when EOF occurs mid-buffer.

  virtual Array<const byte> mmap(uint64_t offset, uint64_t size) = 0;
  // Maps the file to memory read-only. The returned array always has exactly the requested size.
  // Depending on the capabilities of the OS and filesystem, the mapping may or may not reflect
  // changes that happen to the file after mmap() returns.
  //
  // Multiple calls to mmap() on the same file may or may not return the same mapping (it is
  // immutable, so there's no possibility of interference).
  //
  // If the file cannot be mmap()ed, an implementation may choose to allocate a buffer on the heap,
  // read into it, and return that. This should only happen if a real mmap() is impossible.
  //
  // The returned array is always exactly the size requested. However, accessing bytes beyond the
  // current end of the file may raise SIGBUS, or may simply return zero.

  virtual Array<byte> mmapPrivate(uint64_t offset, uint64_t size) = 0;
  // Like mmap() but returns a view that the caller can modify. Modifications will not be written
  // to the underlying file. Every call to this method returns a unique mapping. Changes made to
  // the underlying file by other clients may or may not be reflected in the mapping -- in fact,
  // some changes may be reflected while others aren't, even within the same mapping.
  //
  // In practice this is often implemented using copy-on-write pages. When you first write to a
  // page, a copy is made. Hence, changes to the underlying file within that page stop being
  // reflected in the mapping.
};

class AppendableFile: public FsNode, public OutputStream {
public:
  Own<AppendableFile> clone();

  // All methods are inherited.
};

class WritableFileMapping {
public:
  virtual ArrayPtr<byte> get() = 0;
  // Gets the mapped bytes. The returned array can be modified, and those changes may be written to
  // the underlying file, but there is no guarantee that they are written unless you subsequently
  // call changed().

  virtual void changed(ArrayPtr<byte> slice) = 0;
  // Notifies the implementation that the given bytes have changed. For some implementations this
  // may be a no-op while for others it may be necessary in order for the changes to be written
  // back at all.
  //
  // `slice` must be a slice of `bytes()`.

  virtual void sync(ArrayPtr<byte> slice) = 0;
  // Implies `changed()`, and then waits until the range has actually been written to disk before
  // returning.
  //
  // `slice` must be a slice of `bytes()`.
};

class File: public ReadableFile {
public:
  Own<File> clone();

  void writeAll(ArrayPtr<const byte> bytes);
  void writeAll(StringPtr text);
  // Completely replace the file with the given bytes or text.

  virtual void write(uint64_t offset, ArrayPtr<const byte> data) = 0;
  // Write the given data starting at the given offset in the file.

  virtual void zero(uint64_t offset, uint64_t size) = 0;
  // Write zeros to the file, starting at `offset` and continuing for `size` bytes. If the platform
  // supports it, this will "punch a hole" in the file, such that blocks that are entirely zeros
  // do not take space on disk.

  virtual void truncate(uint64_t size) = 0;
  // Set the file end pointer to `size`. If `size` is less than the current size, data past the end
  // is truncated. If `size` is larger than the current size, zeros are added to the end of the
  // file. If the platform supports it, blocks containing all-zeros will not be stored to disk.

  virtual Own<WritableFileMapping> mmapWritable(uint64_t offset, uint64_t size) = 0;
  // Like ReadableFile::mmap() but returns a mapping for which any changes will be immediately
  // visible in other mappings of the file on the same system and will eventually be written back
  // to the file.

  virtual size_t copy(uint64_t offset, ReadableFile& from, uint64_t fromOffset, uint64_t size);
  // Copies bytes from one file to another.
  //
  // Copies `size` bytes or to EOF, whichever comes first. Returns the number of bytes actually
  // copied. Hint: Pass kj::maxValue for `size` to always copy to EOF.
  //
  // The copy is not atomic. Concurrent writes may lead to garbage results.
  //
  // The default implementation performs a series of reads and writes. Subclasses can often provide
  // superior implementations that offload the work to the OS or even implement copy-on-write.
};

class ReadableDirectory: public FsNode {
  // Read-only subset of `Directory`.

public:
  Own<ReadableDirectory> clone();

  virtual Array<String> listNames() = 0;
  // List the contents of this directory. Does NOT include "." nor "..".

  struct Entry {
    FsNode::Type type;
    String name;
435 436 437 438 439 440

    inline bool operator< (const Entry& other) const { return name <  other.name; }
    inline bool operator> (const Entry& other) const { return name >  other.name; }
    inline bool operator<=(const Entry& other) const { return name <= other.name; }
    inline bool operator>=(const Entry& other) const { return name >= other.name; }
    // Convenience comparison operators to sort entries by name.
Kenton Varda's avatar
Kenton Varda committed
441 442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783
  };

  virtual Array<Entry> listEntries() = 0;
  // List the contents of the directory including the type of each file. On some platforms and
  // filesystems, this is just as fast as listNames(), but on others it may require stat()ing each
  // file.

  virtual bool exists(PathPtr path) = 0;
  // Does the specified path exist?
  //
  // If the path is a symlink, the symlink is followed and the return value indicates if the target
  // exists. If you want to know if the symlink exists, use lstat(). (This implies that listNames()
  // may return names for which exists() reports false.)

  FsNode::Metadata lstat(PathPtr path);
  virtual Maybe<FsNode::Metadata> tryLstat(PathPtr path) = 0;
  // Gets metadata about the path. If the path is a symlink, it is not followed -- the metadata
  // describes the symlink itself. `tryLstat()` returns null if the path doesn't exist.

  Own<ReadableFile> openFile(PathPtr path);
  virtual Maybe<Own<ReadableFile>> tryOpenFile(PathPtr path) = 0;
  // Open a file for reading.
  //
  // `tryOpenFile()` returns null if the path doesn't exist. Other errors still throw exceptions.

  Own<ReadableDirectory> openSubdir(PathPtr path);
  virtual Maybe<Own<ReadableDirectory>> tryOpenSubdir(PathPtr path) = 0;
  // Opens a subdirectory.
  //
  // `tryOpenSubdir()` returns null if the path doesn't exist. Other errors still throw exceptions.

  String readlink(PathPtr path);
  virtual Maybe<String> tryReadlink(PathPtr path) = 0;
  // If `path` is a symlink, reads and returns the link contents.
  //
  // See Directory::symlink() for warnings about symlinks.
};

enum class WriteMode {
  // Mode for opening a file (or directory) for write.
  //
  // (To open a file or directory read-only, do not specify a mode.)
  //
  // WriteMode is a bitfield. Hence, it overloads the bitwise logic operators. To check if a
  // particular bit is set in a bitfield, use kj::has(), like:
  //
  //     if (kj::has(mode, WriteMode::MUST_EXIST)) {
  //       requireExists(path);
  //     }
  //
  // (`if (mode & WriteMode::MUST_EXIST)` doesn't work because WriteMode is an enum class, which
  // cannot be converted to bool. Alas, C++ does not allow you to define a conversion operator
  // on an enum type, so we can't define a conversion to bool.)

  // -----------------------------------------
  // Core flags
  //
  // At least one of CREATE or MODIFY must be specified. Optionally, the two flags can be combined
  // with a bitwise-OR.

  CREATE = 1,
  // Create a new empty file.
  //
  // This can be OR'd with MODIFY, but not with REPLACE.
  //
  // When not combined with MODIFY, if the file already exists (including as a broken symlink),
  // tryOpenFile() returns null (and openFile() throws).
  //
  // When combined with MODIFY, if the path already exists, it will be opened as if CREATE hadn't
  // been specified at all. If the path refers to a broken symlink, the file at the target of the
  // link will be created (if its parent directory exists).

  MODIFY = 2,
  // Modify an existing file.
  //
  // This can be OR'd with CREATE, but not with REPLACE.
  //
  // When not combined with CREATE, if the file doesn't exist (including if it is a broken symlink),
  // tryOpenFile() returns null (and openFile() throws).
  //
  // When combined with CREATE, if the path doesn't exist, it will be created as if MODIFY hadn't
  // been specified at all. If the path refers to a broken symlink, the file at the target of the
  // link will be created (if its parent directory exists).

  // -----------------------------------------
  // Additional flags
  //
  // Any number of these may be OR'd with the core flags.

  CREATE_PARENT = 4,
  // Indicates that if the target node's parent directory doesn't exist, it should be created
  // automatically, along with its parent, and so on. This creation is NOT atomic.
  //
  // This bit only makes sense with CREATE or REPLACE.

  EXECUTABLE = 8,
  // Mark this file executable, if this is a meaningful designation on the host platform.

  PRIVATE = 16,
  // Indicates that this file is sensitive and should have permissions masked so that it is only
  // accessible by the current user.
  //
  // When this is not used, the platform's default access control settings are used. On Unix,
  // that usually means the umask is applied. On Windows, it means permissions are inherited from
  // the parent.
};

inline constexpr WriteMode operator|(WriteMode a, WriteMode b) {
  return static_cast<WriteMode>(static_cast<uint>(a) | static_cast<uint>(b));
}
inline constexpr WriteMode operator&(WriteMode a, WriteMode b) {
  return static_cast<WriteMode>(static_cast<uint>(a) & static_cast<uint>(b));
}
inline constexpr WriteMode operator+(WriteMode a, WriteMode b) {
  return static_cast<WriteMode>(static_cast<uint>(a) | static_cast<uint>(b));
}
inline constexpr WriteMode operator-(WriteMode a, WriteMode b) {
  return static_cast<WriteMode>(static_cast<uint>(a) & ~static_cast<uint>(b));
}
template <typename T, typename = EnableIf<__is_enum(T)>>
bool has(T haystack, T needle) {
  return (static_cast<__underlying_type(T)>(haystack) &
          static_cast<__underlying_type(T)>(needle)) ==
          static_cast<__underlying_type(T)>(needle);
}

enum class TransferMode {
  // Specifies desired behavior for Directory::transfer().

  MOVE,
  // The node is moved to the new location, i.e. the old location is deleted. If possible, this
  // move is performed without copying, otherwise it is performed as a copy followed by a delete.

  LINK,
  // The new location becomes a synonym for the old location (a "hard link"). Filesystems have
  // varying support for this -- typically, it is not supported on directories.

  COPY
  // The new location becomes a copy of the old.
  //
  // Some filesystems may implement this in terms of copy-on-write.
  //
  // If the filesystem supports sparse files, COPY takes sparseness into account -- it will punch
  // holes in the target file where holes exist in the source file.
};

class Directory: public ReadableDirectory {
  // Refers to a specific directory on disk.
  //
  // A `Directory` object *only* provides access to children of the directory, not parents. That
  // is, you cannot open the file "..", nor jump to the root directory with "/".
  //
  // On OSs that support in, a `Directory` is backed by an open handle to the directory node. This
  // means:
  // - If the directory is renamed on-disk, the `Directory` object still points at it.
  // - Opening files in the directory only requires the OS to traverse the path from the directory
  //   to the file; it doesn't have to re-traverse all the way from the filesystem root.

public:
  Own<Directory> clone();

  template <typename T>
  class Replacer {
    // Implements an atomic replacement of a file or directory, allowing changes to be made to
    // storage in a way that avoids losing data in a power outage and prevents other processes
    // from observing content in an inconsistent state.
    //
    // `T` may be `File` or `Directory`. For readability, the text below describes replacing a
    // file, but the logic is the same for directories.
    //
    // When you call `Directory::replaceFile()`, a temporary file is created, but the specified
    // path is not yet touched. You may call `get()` to obtain the temporary file object, through
    // which you may initialize its content, knowing that no other process can see it yet. The file
    // is atomically moved to its final path when you call `commit()`. If you destroy the Replacer
    // without calling commit(), the temporary file is deleted.
    //
    // Note that most operating systems sadly do not support creating a truly unnamed temporary file
    // and then linking it in later. Moreover, the file cannot necessarily be created in the system
    // temporary directory because it might not be on the same filesystem as the target. Therefore,
    // the replacement file may initially be created in the same directory as its eventual target.
    // The implementation of Directory will choose a name that is unique and "hidden" according to
    // the conventions of the filesystem. Additionally, the implementation of Directory will avoid
    // returning these temporary files from its list*() methods, in order to avoid observable
    // inconsistencies across platforms.
  public:
    explicit Replacer(WriteMode mode);

    virtual T& get() = 0;
    // Gets the File or Directory representing the replacement data. Fill in this object before
    // calling commit().

    void commit();
    virtual bool tryCommit() = 0;
    // Commit the replacement.
    //
    // `tryCommit()` may return false based on the CREATE/MODIFY bits passed as the WriteMode when
    // the replacement was initiated. (If CREATE but not MODIFY was used, tryCommit() returns
    // false to indicate that the target file already existed. If MODIFY but not CREATE was used,
    // tryCommit() returns false to indicate that the file didn't exist.)
    //
    // `commit()` is atomic, meaning that there is no point in time at which other processes
    // observing the file will see it in an intermediate state -- they will either see the old
    // content or the complete new content. This includes in the case of a power outage or machine
    // failure: on recovery, the file will either be in the old state or the new state, but not in
    // some intermediate state.
    //
    // It's important to note that a power failure *after commit() returns* can still revert the
    // file to its previous state. That is, `commit()` does NOT guarantee that, upon return, the
    // new content is durable. In order to guarantee this, you must call `sync()` on the immediate
    // parent directory of the replaced file.
    //
    // Note that, sadly, not all filesystems / platforms are capable of supporting all of the
    // guarantees documented above. In such cases, commit() will make a best-effort attempt to do
    // what it claims. Some examples of possible problems include:
    // - Any guarantees about durability through a power outage probably require a journaling
    //   filesystem.
    // - Many platforms do not support atomically replacing a non-empty directory. Linux does as
    //   of kernel 3.15 (via the renameat2() syscall using RENAME_EXCHANGE). Where not supported,
    //   the old directory will be moved away just before the replacement is moved into place.
    // - Many platforms do not support atomically requiring the existence or non-existence of a
    //   file before replacing it. In these cases, commit() may have to perform the check as a
    //   separate step, with a small window for a race condition.
    // - Many platforms do not support "unlinking" a non-empty directory, meaning that a replaced
    //   directory will need to be deconstructed by deleting all contents. If another process has
    //   the directory open when it is replaced, that process will observe the contents
    //   disappearing after the replacement (actually, a swap) has taken place. This differs from
    //   files, where a process that has opened a file before it is replaced will continue see the
    //   file's old content unchanged after the replacement.

  protected:
    const WriteMode mode;
  };

  using ReadableDirectory::openFile;
  using ReadableDirectory::openSubdir;
  using ReadableDirectory::tryOpenFile;
  using ReadableDirectory::tryOpenSubdir;

  Own<File> openFile(PathPtr path, WriteMode mode);
  virtual Maybe<Own<File>> tryOpenFile(PathPtr path, WriteMode mode) = 0;
  // Open a file for writing.
  //
  // `tryOpenFile()` returns null if the path is required to exist but doesn't (MODIFY or REPLACE)
  // or if the path is required not to exist but does (CREATE or RACE).

  virtual Own<Replacer<File>> replaceFile(PathPtr path, WriteMode mode) = 0;
  // Construct a file which, when ready, will be atomically moved to `path`, replacing whatever
  // is there already. See `Replacer<T>` for detalis.
  //
  // The `CREATE` and `MODIFY` bits of `mode` are not enforced until commit time, hence
  // `replaceFile()` has no "try" variant.

  virtual Own<File> createTemporary() = 0;
  // Create a temporary file backed by this directory's filesystem, but which isn't linked into
  // the directory tree. The file is deleted from disk when all references to it have been dropped.

  Own<AppendableFile> appendFile(PathPtr path, WriteMode mode);
  virtual Maybe<Own<AppendableFile>> tryAppendFile(PathPtr path, WriteMode mode) = 0;
  // Opens the file for appending only. Useful for log files.
  //
  // If the underlying filesystem supports it, writes to the file will always be appended even if
  // other writers are writing to the same file at the same time -- however, some implementations
  // may instead assume that no other process is changing the file size between writes.

  Own<Directory> openSubdir(PathPtr path, WriteMode mode);
  virtual Maybe<Own<Directory>> tryOpenSubdir(PathPtr path, WriteMode mode) = 0;
  // Opens a subdirectory for writing.

  virtual Own<Replacer<Directory>> replaceSubdir(PathPtr path, WriteMode mode) = 0;
  // Construct a directory which, when ready, will be atomically moved to `path`, replacing
  // whatever is there already. See `Replacer<T>` for detalis.
  //
  // The `CREATE` and `MODIFY` bits of `mode` are not enforced until commit time, hence
  // `replaceSubdir()` has no "try" variant.

  void symlink(PathPtr linkpath, StringPtr content, WriteMode mode);
  virtual bool trySymlink(PathPtr linkpath, StringPtr content, WriteMode mode) = 0;
  // Create a symlink. `content` is the raw text which will be written into the symlink node.
  // How this text is interpreted is entirely dependent on the filesystem. Note in particular that:
  // - Windows will require a path that uses backslashes as the separator.
  // - InMemoryDirectory does not support symlinks containing "..".
  //
  // Unfortunately under many implementations symlink() can be used to break out of the directory
  // by writing an absolute path or utilizing "..". Do not call this method with a value for
  // `target` that you don't trust.
  //
  // `mode` must be CREATE or REPLACE, not MODIFY. CREATE_PARENT is honored but EXECUTABLE and
  // PRIVATE have no effect. `trySymlink()` returns false in CREATE mode when the target already
  // exists.

  void transfer(PathPtr toPath, WriteMode toMode,
                PathPtr fromPath, TransferMode mode);
  void transfer(PathPtr toPath, WriteMode toMode,
                Directory& fromDirectory, PathPtr fromPath,
                TransferMode mode);
  virtual bool tryTransfer(PathPtr toPath, WriteMode toMode,
                           Directory& fromDirectory, PathPtr fromPath,
                           TransferMode mode);
  virtual Maybe<bool> tryTransferTo(Directory& toDirectory, PathPtr toPath, WriteMode toMode,
                                    PathPtr fromPath, TransferMode mode);
  // Move, link, or copy a file/directory tree from one location to another.
  //
  // Filesystems vary in what kinds of transfers are allowed, especially for TransferMode::LINK,
  // and whether TransferMode::MOVE is implemented as an actual move vs. copy+delete.
  //
  // tryTransfer() returns false if the source location didn't exist, or when `toMode` is CREATE
  // and the target already exists. The default implementation implements only TransferMode::COPY.
  //
  // tryTransferTo() exists to implement double-dispatch. It should be called as a fallback by
  // implementations of tryTransfer() in cases where the target directory would otherwise fail or
  // perform a pessimal transfer. The default implementation returns nullptr, which the caller
  // should interpret as: "I don't have any special optimizations; do the obvious thing."
  //
  // `toMode` controls how the target path is created. CREATE_PARENT is honored but EXECUTABLE and
  // PRIVATE have no effect.

  void remove(PathPtr path);
  virtual bool tryRemove(PathPtr path) = 0;
  // Deletes/unlinks the given path. If the path names a directory, it is recursively deleted.
  //
  // tryRemove() returns false if the path doesn't exist; remove() throws in this case.

  // TODO(someday):
  // - Support sockets? There's no openat()-like interface for sockets, so it's hard to support
  //   them currently. Also you'd probably want to use them with the async library.
  // - Support named pipes? Unclear if there's a use case that isn't better-served by sockets.
  //   Then again, they can be openat()ed.
  // - Support watching for changes (inotify). Probably also requires the async library. Also
  //   lacks openat()-like semantics.
  // - xattrs -- linux-specific
  // - chown/chmod/etc. -- unix-specific, ACLs, eww
  // - set timestamps -- only needed by archiving programs/
  // - advisory locks
  // - sendfile?
  // - fadvise and such

private:
  static void commitFailed(WriteMode mode);
};

class Filesystem {
public:
  virtual Directory& getRoot() = 0;
784
  // Get the filesystem's root directory, as of the time the Filesystem object was created.
Kenton Varda's avatar
Kenton Varda committed
785 786

  virtual Directory& getCurrent() = 0;
787
  // Get the filesystem's current directory, as of the time the Filesystem object was created.
Kenton Varda's avatar
Kenton Varda committed
788

789 790 791 792 793
  virtual PathPtr getCurrentPath() = 0;
  // Get the path from the root to the current directory, as of the time the Filesystem object was
  // created. Note that because a `Directory` does not provide access to its parent, if you want to
  // follow `..` from the current directory, you must use `getCurrentPath().eval("..")` or
  // `getCurrentPath().parent()`.
Kenton Varda's avatar
Kenton Varda committed
794 795 796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836
  //
  // This function attempts to determine the path as it appeared in the user's shell before this
  // program was started. That means, if the user had `cd`ed into a symlink, the path through that
  // symlink is returned, *not* the canonical path.
  //
  // Because of this, there is an important difference between how the operating system interprets
  // "../foo" and what you get when you write `getCurrentPath().eval("../foo")`: The former
  // will interpret ".." relative to the directory's canonical path, whereas the latter will
  // interpret it relative to the path shown in the user's shell. In practice, the latter is
  // almost always what the user wants! But the former behavior is what almost all commands do
  // in practice, and it leads to confusion. KJ commands should implement the behavior the user
  // expects.
};

// =======================================================================================

Own<File> newInMemoryFile(Clock& clock);
Own<Directory> newInMemoryDirectory(Clock& clock);
// Construct file and directory objects which reside in-memory.
//
// InMemoryFile has the following special properties:
// - The backing store is not sparse and never gets smaller even if you truncate the file.
// - While a non-private memory mapping exists, the backing store cannot get larger. Any operation
//   which would expand it will throw.
//
// InMemoryDirectory has the following special properties:
// - Symlinks are processed using Path::parse(). This implies tha a symlink cannot point to a
//   parent directory -- InMemoryDirectory does not know its parent.
// - link() can link directory nodes in addition to files.
// - link() and rename() accept any kind of Directory as `fromDirectory` -- it doesn't need to be
//   another InMemoryDirectory. However, for rename(), the from path must be a directory.

Own<AppendableFile> newFileAppender(Own<File> inner);
// Creates an AppendableFile by wrapping a File. Note that this implementation assumes it is the
// only writer. A correct implementation should always append to the file even if other writes
// are happening simultaneously, as is achieved with the O_APPEND flag to open(2), but that
// behavior is not possible to emulate on top of `File`.

Own<ReadableFile> newDiskReadableFile(kj::AutoCloseFd fd);
Own<AppendableFile> newDiskAppendableFile(kj::AutoCloseFd fd);
Own<File> newDiskFile(kj::AutoCloseFd fd);
Own<ReadableDirectory> newDiskReadableDirectory(kj::AutoCloseFd fd);
Own<Directory> newDiskDirectory(kj::AutoCloseFd fd);
837
// Wrap a file descriptor as various filesystem types.
Kenton Varda's avatar
Kenton Varda committed
838 839

Own<Filesystem> newDiskFilesystem();
840 841 842 843 844 845 846 847 848
// Get at implementation of `Filesystem` representing the real filesystem.
//
// DO NOT CALL THIS except at the top level of your program, e.g. in main(). Anywhere else, you
// should instead have your caller pass in a Filesystem object, or a specific Directory object,
// or whatever it is that your code needs. This ensures that your code supports dependency
// injection, which makes it more reusable and testable.
//
// newDiskFilesystem() reads the current working directory at the time it is called. The returned
// object is not affected by subsequent calls to chdir().
Kenton Varda's avatar
Kenton Varda committed
849 850 851 852 853 854 855 856 857 858 859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893 894 895 896

// =======================================================================================
// inline implementation details

inline Path::Path(decltype(nullptr)): parts(nullptr) {}
inline Path::Path(std::initializer_list<StringPtr> parts)
    : Path(arrayPtr(parts.begin(), parts.end())) {}
inline Path::Path(Array<String> parts, decltype(ALREADY_CHECKED))
    : parts(kj::mv(parts)) {}
inline Path Path::clone() const { return PathPtr(*this).clone(); }
inline Path Path::append(Path suffix) const& { return PathPtr(*this).append(kj::mv(suffix)); }
inline Path Path::append(PathPtr suffix) const& { return PathPtr(*this).append(suffix); }
inline Path Path::append(StringPtr suffix) const& { return append(Path(suffix)); }
inline Path Path::append(StringPtr suffix) && { return kj::mv(*this).append(Path(suffix)); }
inline Path Path::append(String suffix) const& { return append(Path(kj::mv(suffix))); }
inline Path Path::append(String suffix) && { return kj::mv(*this).append(Path(kj::mv(suffix))); }
inline Path Path::eval(StringPtr pathText) const& { return PathPtr(*this).eval(pathText); }
inline PathPtr Path::basename() const& { return PathPtr(*this).basename(); }
inline PathPtr Path::parent() const& { return PathPtr(*this).parent(); }
inline const String& Path::operator[](size_t i) const& { return parts[i]; }
inline String Path::operator[](size_t i) && { return kj::mv(parts[i]); }
inline size_t Path::size() const { return parts.size(); }
inline const String* Path::begin() const { return parts.begin(); }
inline const String* Path::end() const { return parts.end(); }
inline PathPtr Path::slice(size_t start, size_t end) const& {
  return PathPtr(*this).slice(start, end);
}
inline String Path::toString(bool absolute) const { return PathPtr(*this).toString(absolute); }
inline Path Path::evalWin32(StringPtr pathText) const& {
  return PathPtr(*this).evalWin32(pathText);
}
inline String Path::toWin32String(bool absolute) const {
  return PathPtr(*this).toWin32String(absolute);
}

inline PathPtr::PathPtr(decltype(nullptr)): parts(nullptr) {}
inline PathPtr::PathPtr(const Path& path): parts(path.parts) {}
inline PathPtr::PathPtr(ArrayPtr<const String> parts): parts(parts) {}
inline Path PathPtr::append(StringPtr suffix) const { return append(Path(suffix)); }
inline Path PathPtr::append(String suffix) const { return append(Path(kj::mv(suffix))); }
inline const String& PathPtr::operator[](size_t i) const { return parts[i]; }
inline size_t PathPtr::size() const { return parts.size(); }
inline const String* PathPtr::begin() const { return parts.begin(); }
inline const String* PathPtr::end() const { return parts.end(); }
inline PathPtr PathPtr::slice(size_t start, size_t end) const {
  return PathPtr(parts.slice(start, end));
}

897 898
inline Own<FsNode> FsNode::clone() { return cloneFsNode().downcast<FsNode>(); }
inline Own<ReadableFile> ReadableFile::clone() { return cloneFsNode().downcast<ReadableFile>(); }
Kenton Varda's avatar
Kenton Varda committed
899
inline Own<AppendableFile> AppendableFile::clone() {
900
  return cloneFsNode().downcast<AppendableFile>();
Kenton Varda's avatar
Kenton Varda committed
901
}
902
inline Own<File> File::clone() { return cloneFsNode().downcast<File>(); }
Kenton Varda's avatar
Kenton Varda committed
903
inline Own<ReadableDirectory> ReadableDirectory::clone() {
904
  return cloneFsNode().downcast<ReadableDirectory>();
Kenton Varda's avatar
Kenton Varda committed
905
}
906
inline Own<Directory> Directory::clone() { return cloneFsNode().downcast<Directory>(); }
Kenton Varda's avatar
Kenton Varda committed
907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923

inline void Directory::transfer(
    PathPtr toPath, WriteMode toMode, PathPtr fromPath, TransferMode mode) {
  return transfer(toPath, toMode, *this, fromPath, mode);
}

template <typename T>
inline Directory::Replacer<T>::Replacer(WriteMode mode): mode(mode) {}

template <typename T>
void Directory::Replacer<T>::commit() {
  if (!tryCommit()) commitFailed(mode);
}

} // namespace kj

#endif // KJ_FILESYSTEM_H_