Update SchemaParser to use KJ filesystem API.

This required some hairy backwards-compatibility hacks as the parseDiskFile() method is widely used.

Update SchemaParser to use KJ filesystem API.
This required some hairy backwards-compatibility hacks as the parseDiskFile() method is widely used.
c1fe2b03 · Kenton Varda · e2a9467b · c1fe2b03 · c1fe2b03 · c1fe2b03
Commit c1fe2b03 authored Dec 21, 2017 by Kenton Varda
Showing with 283 additions and 311 deletions

schema-parser-test.c++ c++/src/capnp/schema-parser-test.c++ +30 -31

schema-parser.c++ c++/src/capnp/schema-parser.c++ +168 -227

schema-parser.h c++/src/capnp/schema-parser.h +85 -53

No files found.
--- a/c++/src/capnp/schema-parser-test.c++
+++ b/c++/src/capnp/schema-parser-test.c++
@@ -30,26 +30,22 @@
 namespace capnp {
 namespace {

-class FakeFileReader final: public SchemaFile::FileReader {
+class FakeFileReader final: public kj::Filesystem {
 public:
  void add(kj::StringPtr name, kj::StringPtr content) {
-    files[name] = content;
+    root->openFile(cwd.eval(name), kj::WriteMode::CREATE | kj::WriteMode::CREATE_PARENT)
+        ->writeAll(content);
  }

-  bool exists(kj::StringPtr path) const override {
-    return files.count(path) > 0;
-  }
-
-  kj::Array<const char> read(kj::StringPtr path) const override {
-    auto iter = files.find(path);
-    KJ_ASSERT(iter != files.end(), "FakeFileReader has no such file.", path);
-    auto result = kj::heapArray<char>(iter->second.size());
-    memcpy(result.begin(), iter->second.begin(), iter->second.size());
-    return kj::mv(result);
-  }
+  kj::Directory& getRoot() override { return *root; }
+  kj::Directory& getCurrent() override { return *current; }
+  kj::PathPtr getCurrentPath() override { return cwd; }

 private:
-  std::map<kj::StringPtr, kj::StringPtr> files;
+  kj::Own<kj::Directory> root = kj::newInMemoryDirectory(kj::nullClock());
+  kj::Path cwd = kj::Path({"path", "to", "current", "dir"});
+  kj::Own<kj::Directory> current = root->openSubdir(cwd,
+      kj::WriteMode::CREATE | kj::WriteMode::CREATE_PARENT);
 };

 static uint64_t getFieldTypeFileId(StructSchema::Field field) {
@@ -59,8 +55,9 @@ static uint64_t getFieldTypeFileId(StructSchema::Field field) {
 }

 TEST(SchemaParser, Basic) {
-  SchemaParser parser;
  FakeFileReader reader;
+  SchemaParser parser;
+  parser.setDiskFilesystem(reader);

  reader.add("src/foo/bar.capnp",
      "@0x8123456789abcdef;\n"
@@ -90,8 +87,8 @@ TEST(SchemaParser, Basic) {
    "/usr/include", "/usr/local/include", "/opt/include"
  };

-  ParsedSchema barSchema = parser.parseFile(SchemaFile::newDiskFile(
-      "foo2/bar2.capnp", "src/foo/bar.capnp", importPath, reader));
+  ParsedSchema barSchema = parser.parseDiskFile(
+      "foo2/bar2.capnp", "src/foo/bar.capnp", importPath);

  auto barProto = barSchema.getProto();
  EXPECT_EQ(0x8123456789abcdefull, barProto.getId());
@@ -109,25 +106,25 @@ TEST(SchemaParser, Basic) {
  EXPECT_EQ("garply", barFields[3].getProto().getName());
  EXPECT_EQ(0x856789abcdef1234ull, getFieldTypeFileId(barFields[3]));

-  auto bazSchema = parser.parseFile(SchemaFile::newDiskFile(
+  auto bazSchema = parser.parseDiskFile(
      "not/used/because/already/loaded",
-      "src/foo/baz.capnp", importPath, reader));
+      "src/foo/baz.capnp", importPath);
  EXPECT_EQ(0x823456789abcdef1ull, bazSchema.getProto().getId());
  EXPECT_EQ("foo2/baz.capnp", bazSchema.getProto().getDisplayName());
  auto bazStruct = bazSchema.getNested("Baz").asStruct();
  EXPECT_EQ(bazStruct, barStruct.getDependency(bazStruct.getProto().getId()));

-  auto corgeSchema = parser.parseFile(SchemaFile::newDiskFile(
+  auto corgeSchema = parser.parseDiskFile(
      "not/used/because/already/loaded",
-      "src/qux/corge.capnp", importPath, reader));
+      "src/qux/corge.capnp", importPath);
  EXPECT_EQ(0x83456789abcdef12ull, corgeSchema.getProto().getId());
  EXPECT_EQ("qux/corge.capnp", corgeSchema.getProto().getDisplayName());
  auto corgeStruct = corgeSchema.getNested("Corge").asStruct();
  EXPECT_EQ(corgeStruct, barStruct.getDependency(corgeStruct.getProto().getId()));

-  auto graultSchema = parser.parseFile(SchemaFile::newDiskFile(
+  auto graultSchema = parser.parseDiskFile(
      "not/used/because/already/loaded",
-      "/usr/include/grault.capnp", importPath, reader));
+      "/usr/include/grault.capnp", importPath);
  EXPECT_EQ(0x8456789abcdef123ull, graultSchema.getProto().getId());
  EXPECT_EQ("grault.capnp", graultSchema.getProto().getDisplayName());
  auto graultStruct = graultSchema.getNested("Grault").asStruct();
@@ -135,9 +132,9 @@ TEST(SchemaParser, Basic) {

  // Try importing the other grault.capnp directly.  It'll get the display name we specify since
  // it wasn't imported before.
-  auto wrongGraultSchema = parser.parseFile(SchemaFile::newDiskFile(
+  auto wrongGraultSchema = parser.parseDiskFile(
      "weird/display/name.capnp",
-      "/opt/include/grault.capnp", importPath, reader));
+      "/opt/include/grault.capnp", importPath);
  EXPECT_EQ(0x8000000000000001ull, wrongGraultSchema.getProto().getId());
  EXPECT_EQ("weird/display/name.capnp", wrongGraultSchema.getProto().getDisplayName());
 }
@@ -147,8 +144,9 @@ TEST(SchemaParser, Constants) {
  // constants are not actually accessible from the generated code API, so the only way to ever
  // get a ConstSchema is by parsing it.

-  SchemaParser parser;
  FakeFileReader reader;
+  SchemaParser parser;
+  parser.setDiskFilesystem(reader);

  reader.add("const.capnp",
      "@0x8123456789abcdef;\n"
@@ -164,8 +162,8 @@ TEST(SchemaParser, Constants) {
      "  value @0 :T;\n"
      "}\n");

-  ParsedSchema fileSchema = parser.parseFile(SchemaFile::newDiskFile(
-      "const.capnp", "const.capnp", nullptr, reader));
+  ParsedSchema fileSchema = parser.parseDiskFile(
+      "const.capnp", "const.capnp", nullptr);

  EXPECT_EQ(1234, fileSchema.getNested("uint32Const").asConst().as<uint32_t>());

@@ -198,8 +196,9 @@ void expectSourceInfo(schema::Node::SourceInfo::Reader sourceInfo,
 }

 TEST(SchemaParser, SourceInfo) {
-  SchemaParser parser;
  FakeFileReader reader;
+  SchemaParser parser;
+  parser.setDiskFilesystem(reader);

  reader.add("foo.capnp",
      "@0x84a2c6051e1061ed;\n"
@@ -234,8 +233,8 @@ TEST(SchemaParser, SourceInfo) {
      "struct Thud @0xcca9972702b730b4 {}\n"
      "# post-comment\n");

-  ParsedSchema file = parser.parseFile(SchemaFile::newDiskFile(
-      "foo.capnp", "foo.capnp", nullptr, reader));
+  ParsedSchema file = parser.parseDiskFile(
+      "foo.capnp", "foo.capnp", nullptr);
  ParsedSchema foo = file.getNested("Foo");

  expectSourceInfo(file.getSourceInfo(), 0x84a2c6051e1061edull, "file doc comment\n", {});

--- a/c++/src/capnp/schema-parser.c++
+++ b/c++/src/capnp/schema-parser.c++
@@ -31,17 +31,7 @@
 #include <kj/vector.h>
 #include <kj/debug.h>
 #include <kj/io.h>
-#include <kj/miniposix.h>
-#include <sys/types.h>
-#include <sys/stat.h>
-#include <fcntl.h>
-#include <errno.h>
-
-#if _WIN32
-#include <windows.h>
-#else
-#include <sys/mman.h>
-#endif
+#include <map>

 namespace capnp {

@@ -171,20 +161,128 @@ struct SchemaFileEq {

 }  // namespace

+struct SchemaParser::DiskFileCompat {
+  // Stuff we only create if parseDiskFile() is ever called, in order to translate that call into
+  // KJ filesystem API calls.
+
+  kj::Own<kj::Filesystem> ownFs;
+  kj::Filesystem& fs;
+
+  struct ImportDir {
+    kj::String pathStr;
+    kj::Path path;
+    kj::Own<kj::ReadableDirectory> dir;
+  };
+  std::map<kj::StringPtr, ImportDir> cachedImportDirs;
+
+  std::map<std::pair<const kj::StringPtr*, size_t>, kj::Array<kj::ReadableDirectory*>>
+      cachedImportPaths;
+
+  DiskFileCompat(): ownFs(kj::newDiskFilesystem()), fs(*ownFs) {}
+  DiskFileCompat(kj::Filesystem& fs): fs(fs) {}
+};
+
 struct SchemaParser::Impl {
  typedef std::unordered_map<
      const SchemaFile*, kj::Own<ModuleImpl>, SchemaFileHash, SchemaFileEq> FileMap;
  kj::MutexGuarded<FileMap> fileMap;
  compiler::Compiler compiler;
+
+  kj::MutexGuarded<kj::Maybe<DiskFileCompat>> compat;
 };

 SchemaParser::SchemaParser(): impl(kj::heap<Impl>()) {}
 SchemaParser::~SchemaParser() noexcept(false) {}

+ParsedSchema SchemaParser::parseFromDirectory(
+    kj::ReadableDirectory& baseDir, kj::Path path,
+    kj::ArrayPtr<kj::ReadableDirectory* const> importPath) const {
+  return parseFile(SchemaFile::newFromDirectory(baseDir, kj::mv(path), importPath));
+}
+
 ParsedSchema SchemaParser::parseDiskFile(
    kj::StringPtr displayName, kj::StringPtr diskPath,
    kj::ArrayPtr<const kj::StringPtr> importPath) const {
-  return parseFile(SchemaFile::newDiskFile(displayName, diskPath, importPath));
+  auto lock = impl->compat.lockExclusive();
+  DiskFileCompat* compat;
+  KJ_IF_MAYBE(c, *lock) {
+    compat = c;
+  } else {
+    compat = &lock->emplace();
+  }
+
+  auto& root = compat->fs.getRoot();
+  auto cwd = compat->fs.getCurrentPath();
+
+  kj::ReadableDirectory* baseDir = &root;
+  kj::Path path = cwd.evalNative(diskPath);
+
+  kj::ArrayPtr<kj::ReadableDirectory* const> translatedImportPath = nullptr;
+
+  if (importPath.size() > 0) {
+    auto importPathKey = std::make_pair(importPath.begin(), importPath.size());
+    auto& slot = compat->cachedImportPaths[importPathKey];
+
+    if (slot == nullptr) {
+      slot = KJ_MAP(path, importPath) -> kj::ReadableDirectory* {
+        auto iter = compat->cachedImportDirs.find(path);
+        if (iter != compat->cachedImportDirs.end()) {
+          return iter->second.dir;
+        }
+
+        auto parsed = cwd.evalNative(path);
+        kj::Own<kj::ReadableDirectory> dir;
+        KJ_IF_MAYBE(d, root.tryOpenSubdir(parsed)) {
+          dir = kj::mv(*d);
+        } else {
+          // Ignore paths that don't exist.
+          dir = kj::newInMemoryDirectory(kj::nullClock());
+        }
+
+        kj::ReadableDirectory* result = dir;
+
+        kj::StringPtr pathRef = path;
+        KJ_ASSERT(compat->cachedImportDirs.insert(std::make_pair(pathRef,
+            DiskFileCompat::ImportDir { kj::str(path), kj::mv(parsed), kj::mv(dir) })).second);
+
+        return result;
+      };
+    }
+
+    translatedImportPath = slot;
+
+    // Check if `path` appears to be inside any of the import path directories. If so, adjust
+    // to be relative to that directory rather than absolute.
+    kj::Maybe<DiskFileCompat::ImportDir&> matchedImportDir;
+    size_t bestMatchLength = 0;
+    for (auto importDir: importPath) {
+      auto iter = compat->cachedImportDirs.find(importDir);
+      KJ_ASSERT(iter != compat->cachedImportDirs.end());
+
+      if (path.startsWith(iter->second.path)) {
+        // Looks like we're trying to load a file from inside this import path. Treat the import
+        // path as the base directory.
+        if (iter->second.path.size() > bestMatchLength) {
+          bestMatchLength = iter->second.path.size();
+          matchedImportDir = iter->second;
+        }
+      }
+    }
+
+    KJ_IF_MAYBE(match, matchedImportDir) {
+      baseDir = match->dir;
+      path = path.slice(match->path.size(), path.size()).clone();
+    }
+  }
+
+  return parseFile(SchemaFile::newFromDirectory(
+      *baseDir, kj::mv(path), translatedImportPath, kj::str(displayName)));
+}
+
+void SchemaParser::setDiskFilesystem(kj::Filesystem& fs) {
+  auto lock = impl->compat.lockExclusive();
+  KJ_REQUIRE(*lock == nullptr, "already called parseDiskFile() or setDiskFilesystem()");
+  lock->emplace(fs);
 }

 ParsedSchema SchemaParser::parseFile(kj::Own<SchemaFile>&& file) const {
@@ -234,226 +332,63 @@ schema::Node::SourceInfo::Reader ParsedSchema::getSourceInfo() const {
  return KJ_ASSERT_NONNULL(parser->getSourceInfo(*this));
 }

-// =======================================================================================
-
-namespace {
-
-class MmapDisposer: public kj::ArrayDisposer {
-protected:
-  void disposeImpl(void* firstElement, size_t elementSize, size_t elementCount,
-                   size_t capacity, void (*destroyElement)(void*)) const {
-#if _WIN32
-    KJ_ASSERT(UnmapViewOfFile(firstElement));
-#else
-    munmap(firstElement, elementSize * elementCount);
-#endif
-  }
-};
-
-KJ_CONSTEXPR(static const) MmapDisposer mmapDisposer = MmapDisposer();
-
-static char* canonicalizePath(char* path) {
-  // Taken from some old C code of mine.
-
-  // Preconditions:
-  // - path has already been determined to be relative, perhaps because the pointer actually points
-  //   into the middle of some larger path string, in which case it must point to the character
-  //   immediately after a '/'.
-
-  // Invariants:
-  // - src points to the beginning of a path component.
-  // - dst points to the location where the path component should end up, if it is not special.
-  // - src == path or src[-1] == '/'.
-  // - dst == path or dst[-1] == '/'.
-
-  char* src = path;
-  char* dst = path;
-  char* locked = dst;  // dst cannot backtrack past this
-  char* partEnd;
-  bool hasMore;
-
-  for (;;) {
-    while (*src == '/') {
-      // Skip duplicate slash.
-      ++src;
-    }
-
-    partEnd = strchr(src, '/');
-    hasMore = partEnd != NULL;
-    if (hasMore) {
-      *partEnd = '\0';
-    } else {
-      partEnd = src + strlen(src);
-    }
-
-    if (strcmp(src, ".") == 0) {
-      // Skip it.
-    } else if (strcmp(src, "..") == 0) {
-      if (dst > locked) {
-        // Backtrack over last path component.
-        --dst;
-        while (dst > locked && dst[-1] != '/') --dst;
-      } else {
-        locked += 3;
-        goto copy;
-      }
-    } else {
-      // Copy if needed.
-    copy:
-      if (dst < src) {
-        memmove(dst, src, partEnd - src);
-        dst += partEnd - src;
-      } else {
-        dst = partEnd;
-      }
-      *dst++ = '/';
-    }
-
-    if (hasMore) {
-      src = partEnd + 1;
-    } else {
-      // Oops, we have to remove the trailing '/'.
-      if (dst == path) {
-        // Oops, there is no trailing '/'.  We have to return ".".
-        strcpy(path, ".");
-        return path + 1;
-      } else {
-        // Remove the trailing '/'.  Note that this means that opening the file will work even
-        // if it is not a directory, where normally it should fail on non-directories when a
-        // trailing '/' is present.  If this is a problem, we need to add some sort of special
-        // handling for this case where we stat() it separately to check if it is a directory,
-        // because Ekam findInput will not accept a trailing '/'.
-        --dst;
-        *dst = '\0';
-        return dst;
-      }
-    }
-  }
-}
-
-kj::String canonicalizePath(kj::StringPtr path) {
-  KJ_STACK_ARRAY(char, result, path.size() + 1, 128, 512);
-  strcpy(result.begin(), path.begin());
-
-  char* start = path.startsWith("/") ? result.begin() + 1 : result.begin();
-  char* end = canonicalizePath(start);
-  return kj::heapString(result.slice(0, end - result.begin()));
-}
-
-kj::String relativePath(kj::StringPtr base, kj::StringPtr add) {
-  if (add.size() > 0 && add[0] == '/') {
-    return kj::heapString(add);
-  }
-
-  const char* pos = base.end();
-  while (pos > base.begin() && pos[-1] != '/') {
-    --pos;
-  }
-
-  return kj::str(base.slice(0, pos - base.begin()), add);
-}
-
-kj::String joinPath(kj::StringPtr base, kj::StringPtr add) {
-  KJ_REQUIRE(!add.startsWith("/"));
-
-  return kj::str(base, '/', add);
-}
-
-}  // namespace
-
-const SchemaFile::DiskFileReader SchemaFile::DiskFileReader::instance =
-    SchemaFile::DiskFileReader();
-
-bool SchemaFile::DiskFileReader::exists(kj::StringPtr path) const {
-  return access(path.cStr(), F_OK) == 0;
-}
-
-kj::Array<const char> SchemaFile::DiskFileReader::read(kj::StringPtr path) const {
-  int fd;
-  // We already established that the file exists, so this should not fail.
-  KJ_SYSCALL(fd = open(path.cStr(), O_RDONLY), path);
-  kj::AutoCloseFd closer(fd);
-
-  struct stat stats;
-  KJ_SYSCALL(fstat(fd, &stats));
-
-  if (S_ISREG(stats.st_mode)) {
-    if (stats.st_size == 0) {
-      // mmap()ing zero bytes will fail.
-      return nullptr;
-    }
-
-    // Regular file.  Just mmap() it.
-#if _WIN32
-    HANDLE handle = reinterpret_cast<HANDLE>(_get_osfhandle(fd));
-    KJ_ASSERT(handle != INVALID_HANDLE_VALUE);
-    HANDLE mappingHandle = CreateFileMapping(
-        handle, NULL, PAGE_READONLY, 0, stats.st_size, NULL);
-    KJ_ASSERT(mappingHandle != INVALID_HANDLE_VALUE);
-    KJ_DEFER(KJ_ASSERT(CloseHandle(mappingHandle)));
-    const void* mapping = MapViewOfFile(mappingHandle, FILE_MAP_READ, 0, 0, stats.st_size);
-#else  // _WIN32
-    const void* mapping = mmap(NULL, stats.st_size, PROT_READ, MAP_SHARED, fd, 0);
-    if (mapping == MAP_FAILED) {
-      KJ_FAIL_SYSCALL("mmap", errno, path);
-    }
-#endif  // !_WIN32
-
-    return kj::Array<const char>(
-        reinterpret_cast<const char*>(mapping), stats.st_size, mmapDisposer);
-  } else {
-    // This could be a stream of some sort, like a pipe.  Fall back to read().
-    // TODO(cleanup):  This does a lot of copies.  Not sure I care.
-    kj::Vector<char> data(8192);
-
-    char buffer[4096];
-    for (;;) {
-      kj::miniposix::ssize_t n;
-      KJ_SYSCALL(n = ::read(fd, buffer, sizeof(buffer)));
-      if (n == 0) break;
-      data.addAll(buffer, buffer + n);
-    }
-
-    return data.releaseAsArray();
-  }
-}
-
 // -------------------------------------------------------------------

 class SchemaFile::DiskSchemaFile final: public SchemaFile {
 public:
-  DiskSchemaFile(const FileReader& fileReader, kj::String displayName,
-                 kj::String diskPath, kj::ArrayPtr<const kj::StringPtr> importPath)
-      : fileReader(fileReader),
-        displayName(kj::mv(displayName)),
-        diskPath(kj::mv(diskPath)),
-        importPath(importPath) {}
+  DiskSchemaFile(kj::ReadableDirectory& baseDir, kj::Path pathParam,
+                 kj::ArrayPtr<kj::ReadableDirectory* const> importPath,
+                 kj::Own<kj::ReadableFile> file,
+                 kj::Maybe<kj::String> displayNameOverride)
+      : baseDir(baseDir), path(kj::mv(pathParam)), importPath(importPath), file(kj::mv(file)) {
+    KJ_IF_MAYBE(dn, displayNameOverride) {
+      displayName = kj::mv(*dn);
+      displayNameOverridden = true;
+    } else {
+      displayName = path.toString();
+      displayNameOverridden = false;
+    }
+  }

  kj::StringPtr getDisplayName() const override {
    return displayName;
  }

  kj::Array<const char> readContent() const override {
-    return fileReader.read(diskPath);
+    auto lock = file.lockExclusive();
+    // TODO(soon): Should we say that KJ files must be thread-safe and mark all the methods const?
+    //   The disk-based implementations are already thread-safe. The in-memory implementations
+    //   are not thread-safe, in fact they are thread-hostile currently due to the non-threadsafe
+    //   refcounting.
+    return lock->get()->mmap(0, lock->get()->stat().size).releaseAsChars();
  }

-  kj::Maybe<kj::Own<SchemaFile>> import(kj::StringPtr path) const override {
-    if (path.startsWith("/")) {
+  kj::Maybe<kj::Own<SchemaFile>> import(kj::StringPtr target) const override {
+    if (target.startsWith("/")) {
+      auto parsed = kj::Path::parse(target.slice(1));
      for (auto candidate: importPath) {
-        kj::String newDiskPath = canonicalizePath(joinPath(candidate, path.slice(1)));
-        if (fileReader.exists(newDiskPath)) {
+        KJ_IF_MAYBE(newFile, candidate->tryOpenFile(parsed)) {
          return kj::implicitCast<kj::Own<SchemaFile>>(kj::heap<DiskSchemaFile>(
-              fileReader, canonicalizePath(path.slice(1)),
-              kj::mv(newDiskPath), importPath));
+              *candidate, kj::mv(parsed), importPath, kj::mv(*newFile), nullptr));
        }
      }
      return nullptr;
    } else {
-      kj::String newDiskPath = canonicalizePath(relativePath(diskPath, path));
-      if (fileReader.exists(newDiskPath)) {
+      auto parsed = path.parent().eval(target);
+
+      kj::Maybe<kj::String> displayNameOverride;
+      if (displayNameOverridden) {
+        // Try to create a consistent display name override for the imported file. This is for
+        // backwards-compatibility only -- display names are only overridden when using the
+        // deprecated parseDiskFile() interface.
+        kj::runCatchingExceptions([&]() {
+          displayNameOverride = kj::Path::parse(displayName).parent().eval(target).toString();
+        });
+      }
+
+      KJ_IF_MAYBE(newFile, baseDir.tryOpenFile(parsed)) {
        return kj::implicitCast<kj::Own<SchemaFile>>(kj::heap<DiskSchemaFile>(
-            fileReader, canonicalizePath(relativePath(displayName, path)),
-            kj::mv(newDiskPath), importPath));
+            baseDir, kj::mv(parsed), importPath, kj::mv(*newFile), kj::mv(displayNameOverride)));
      } else {
        return nullptr;
      }
@@ -461,40 +396,46 @@ public:
  }

  bool operator==(const SchemaFile& other) const override {
-    return diskPath == kj::downcast<const DiskSchemaFile>(other).diskPath;
+    auto& other2 = kj::downcast<const DiskSchemaFile>(other);
+    return &baseDir == &other2.baseDir && path == other2.path;
  }
  bool operator!=(const SchemaFile& other) const override {
-    return diskPath != kj::downcast<const DiskSchemaFile>(other).diskPath;
+    return !operator==(other);
  }
  size_t hashCode() const override {
    // djb hash with xor
    // TODO(someday):  Add hashing library to KJ.
-    size_t result = 5381;
-    for (char c: diskPath) {
+    size_t result = reinterpret_cast<uintptr_t>(&baseDir);
+    for (auto& part: path) {
+      for (char c: part) {
        result = (result * 33) ^ c;
      }
+      result = (result * 33) ^ '/';
+    }
    return result;
  }

  void reportError(SourcePos start, SourcePos end, kj::StringPtr message) const override {
    kj::getExceptionCallback().onRecoverableException(kj::Exception(
-        kj::Exception::Type::FAILED, kj::heapString(diskPath), start.line,
+        kj::Exception::Type::FAILED, path.toString(), start.line,
        kj::heapString(message)));
  }

 private:
-  const FileReader& fileReader;
+  kj::ReadableDirectory& baseDir;
+  kj::Path path;
+  kj::ArrayPtr<kj::ReadableDirectory* const> importPath;
+  kj::MutexGuarded<kj::Own<kj::ReadableFile>> file;
  kj::String displayName;
-  kj::String diskPath;
-  kj::ArrayPtr<const kj::StringPtr> importPath;
+  bool displayNameOverridden;
 };

-kj::Own<SchemaFile> SchemaFile::newDiskFile(
-    kj::StringPtr displayName, kj::StringPtr diskPath,
-    kj::ArrayPtr<const kj::StringPtr> importPath,
-    const FileReader& fileReader) {
-  return kj::heap<DiskSchemaFile>(fileReader, canonicalizePath(displayName),
-                                  canonicalizePath(diskPath), importPath);
+kj::Own<SchemaFile> SchemaFile::newFromDirectory(
+    kj::ReadableDirectory& baseDir, kj::Path path,
+    kj::ArrayPtr<kj::ReadableDirectory* const> importPath,
+    kj::Maybe<kj::String> displayNameOverride) {
+  return kj::heap<DiskSchemaFile>(baseDir, kj::mv(path), importPath, baseDir.openFile(path),
+                                  kj::mv(displayNameOverride));
 }

 }  // namespace capnp
--- a/c++/src/capnp/schema-parser.h
+++ b/c++/src/capnp/schema-parser.h
@@ -28,6 +28,7 @@

 #include "schema-loader.h"
 #include <kj/string.h>
+#include <kj/filesystem.h>

 namespace capnp {

@@ -43,31 +44,85 @@ public:
  SchemaParser();
  ~SchemaParser() noexcept(false);

-  ParsedSchema parseDiskFile(kj::StringPtr displayName, kj::StringPtr diskPath,
-                             kj::ArrayPtr<const kj::StringPtr> importPath) const;
-  // Parse a file located on disk.  Throws an exception if the file dosen't exist.
-  //
-  // Parameters:
-  // * `displayName`:  The name that will appear in the file's schema node.  (If the file has
-  //   already been parsed, this will be ignored and the display name from the first time it was
-  //   parsed will be kept.)
-  // * `diskPath`:  The path to the file on disk.
-  // * `importPath`:  Directories to search when resolving absolute imports within this file
-  //   (imports that start with a `/`).  Must remain valid until the SchemaParser is destroyed.
-  //   (If the file has already been parsed, this will be ignored and the import path from the
-  //   first time it was parsed will be kept.)
+  ParsedSchema parseFromDirectory(kj::ReadableDirectory& baseDir, kj::Path path,
+                                  kj::ArrayPtr<kj::ReadableDirectory* const> importPath) const;
+  // Parse a file from the KJ filesystem API.  Throws an exception if the file dosen't exist.
+  //
+  // `baseDir` and `path` are used together to resolve relative imports. `path` is the source
+  // file's path within `baseDir`. Relative imports will be interpreted relative to `path` and
+  // will be opened using `baseDir`. Note that the KJ filesystem API prohibits "breaking out" of
+  // a directory using "..", so relative imports will be restricted to children of `baseDir`.
+  //
+  // `importPath` is used for absolute imports (imports that start with a '/'). Each directory in
+  // the array will be searched in order until a file is found.
+  //
+  // All `ReadableDirectory` objects must remain valid until the `SchemaParser` is destroyed. Also,
+  // the `importPath` array must remain valid. `path` will be copied; it need not remain valid.
  //
  // This method is a shortcut, equivalent to:
-  //     parser.parseFile(SchemaFile::newDiskFile(displayName, diskPath, importPath))`;
+  //     parser.parseFromDirectory(SchemaFile::newDiskFile(baseDir, path, importPath))`;
  //
  // This method throws an exception if any errors are encountered in the file or in anything the
  // file depends on.  Note that merely importing another file does not count as a dependency on
  // anything in the imported file -- only the imported types which are actually used are
  // "dependencies".
+  //
+  // Hint: Use kj::newDiskFilesystem() to initialize the KJ filesystem API. Usually you should do
+  //   this at a high level in your program, e.g. the main() function, and then pass down the
+  //   appropriate File/Directory objects to the components that need them. Example:
+  //
+  //     auto fs = kj::newDiskFilesystem();
+  //     SchemaParser parser;
+  //     auto schema = parser->parseFromDirectory(fs->getCurrent(),
+  //         kj::Path::parse("foo/bar.capnp"), nullptr);
+  //
+  // Hint: To use in-memory data rather than real disk, you can use kj::newInMemoryDirectory(),
+  //   write the files you want, then pass it to SchemaParser. Example:
+  //
+  //     auto dir = kj::newInMemoryDirectory(kj::nullClock());
+  //     auto path = kj::Path::parse("foo/bar.capnp");
+  //     dir->openFile(path, kj::WriteMode::CREATE | kj::WriteMode::CREATE_PARENT)
+  //        ->writeAll("struct Foo {}");
+  //     auto schema = parser->parseFromDirectory(*dir, path, nullptr);
+  //
+  // Hint: You can create an in-memory directory but then populate it with real files from disk,
+  //   in order to control what is visible while also avoiding reading files yourself or making
+  //   extra copies. Example:
+  //
+  //     auto fs = kj::newDiskFilesystem();
+  //     auto dir = kj::newInMemoryDirectory(kj::nullClock());
+  //     auto fakePath = kj::Path::parse("foo/bar.capnp");
+  //     auto realPath = kj::Path::parse("path/to/some/file.capnp");
+  //     dir->transfer(fakePath, kj::WriteMode::CREATE | kj::WriteMode::CREATE_PARENT,
+  //                   fs->getCurrent(), realPath, kj::TransferMode::LINK);
+  //     auto schema = parser->parseFromDirectory(*dir, fakePath, nullptr);
+  //
+  //   In this example, note that any imports in the file will fail, since the in-memory directory
+  //   you created contains no files except the specific one you linked in.
+
+  ParsedSchema parseDiskFile(kj::StringPtr displayName, kj::StringPtr diskPath,
+                             kj::ArrayPtr<const kj::StringPtr> importPath) const
+      CAPNP_DEPRECATED("Use parseFromDirectory() instead.");
+  // Creates a private kj::Filesystem and uses it to parse files from the real disk.
+  //
+  // DO NOT USE in new code. Use parseFromDirectory() instead.
+  //
+  // This API has a serious problem: the file can import and embed files located anywhere on disk
+  // using relative paths. Even if you specify no `importPath`, relative imports still work. By
+  // using `parseFromDirectory()`, you can arrange so that imports are only allowed within a
+  // particular directory, or even set up a dummy filesystem where other files are not visible.
+
+  void setDiskFilesystem(kj::Filesystem& fs)
+      CAPNP_DEPRECATED("Use parseFromDirectory() instead.");
+  // Call before calling parseDiskFile() to choose an alternative disk filesystem implementation.
+  // This exists mostly for testing purposes; new code should use parseFromDirectory() instead.
+  //
+  // If parseDiskFile() is called without having called setDiskFilesystem(), then
+  // kj::newDiskFilesystem() will be used instead.

  ParsedSchema parseFile(kj::Own<SchemaFile>&& file) const;
  // Advanced interface for parsing a file that may or may not be located in any global namespace.
-  // Most users will prefer `parseDiskFile()`.
+  // Most users will prefer `parseFromDirectory()`.
  //
  // If the file has already been parsed (that is, a SchemaFile that compares equal to this one
  // was parsed previously), the existing schema will be returned again.
@@ -90,6 +145,7 @@ public:

 private:
  struct Impl;
+  struct DiskFileCompat;
  class ModuleImpl;
  kj::Own<Impl> impl;
  mutable bool hadErrors = false;
@@ -135,44 +191,20 @@ class SchemaFile {
  // `SchemaFile::newDiskFile()`.

 public:
-  class FileReader {
-  public:
-    virtual bool exists(kj::StringPtr path) const = 0;
-    virtual kj::Array<const char> read(kj::StringPtr path) const = 0;
-  };
-
-  class DiskFileReader final: public FileReader {
-    // Implementation of FileReader that uses the local disk.  Files are read using mmap() if
-    // possible.
-
-  public:
-    static const DiskFileReader instance;
-
-    bool exists(kj::StringPtr path) const override;
-    kj::Array<const char> read(kj::StringPtr path) const override;
-  };
-
-  static kj::Own<SchemaFile> newDiskFile(
-      kj::StringPtr displayName, kj::StringPtr diskPath,
-      kj::ArrayPtr<const kj::StringPtr> importPath,
-      const FileReader& fileReader = DiskFileReader::instance);
-  // Construct a SchemaFile representing a file on disk (or located in the filesystem-like
-  // namespace represented by `fileReader`).
-  //
-  // Parameters:
-  // * `displayName`:  The name that will appear in the file's schema node.
-  // * `diskPath`:  The path to the file on disk.
-  // * `importPath`:  Directories to search when resolving absolute imports within this file
-  //   (imports that start with a `/`).  The array content must remain valid as long as the
-  //   SchemaFile exists (which is at least as long as the SchemaParser that parses it exists).
-  // * `fileReader`:  Allows you to use a filesystem other than the actual local disk.  Although,
-  //   if you find yourself using this, it may make more sense for you to implement SchemaFile
-  //   yourself.
-  //
-  // The SchemaFile compares equal to any other SchemaFile that has exactly the same disk path,
-  // after canonicalization.
-  //
-  // The SchemaFile will throw an exception if any errors are reported.
+  // Note: Cap'n Proto 0.6.x and below had classes FileReader and DiskFileReader and a method
+  //   newDiskFile() defined here. These were removed when SchemaParser was transitioned to use the
+  //   KJ filesystem API. You should be able to get the same effect by subclassing
+  //   kj::ReadableDirectory, or using kj::newInMemoryDirectory().
+
+  static kj::Own<SchemaFile> newFromDirectory(
+      kj::ReadableDirectory& baseDir, kj::Path path,
+      kj::ArrayPtr<kj::ReadableDirectory* const> importPath,
+      kj::Maybe<kj::String> displayNameOverride = nullptr);
+  // Construct a SchemaFile representing a file in a kj::ReadableDirectory. This is used to
+  // implement SchemaParser::parseFromDirectory(); see there for details.
+  //
+  // The SchemaFile compares equal to any other SchemaFile that has exactly the same `baseDir`
+  // object (by identity) and `path` (by value).

  // -----------------------------------------------------------------
  // For more control, you can implement this interface.