Arrange for compiler to send to the code generator the complete contents of all…

Arrange for compiler to send to the code generator the complete contents of all files listed on the command line, all first-level dependencies of those files, and parent scopes of those dependencies, but nothing more. This means it's sometimes possible to compile a schema file without parsing all transitive dependencies. Also, in 'decode' mode, don't compile annotations, so that the files defining those annotations need not be opened at all.

Arrange for compiler to send to the code generator the complete contents of all…
Arrange for compiler to send to the code generator the complete contents of all files listed on the command line, all first-level dependencies of those files, and parent scopes of those dependencies, but nothing more. This means it's sometimes possible to compile a schema file without parsing all transitive dependencies. Also, in 'decode' mode, don't compile annotations, so that the files defining those annotations need not be opened at all.
7b59b551 · Kenton Varda · b608e69d · 7b59b551 · 7b59b551 · 7b59b551
Commit 7b59b551 authored Aug 06, 2013 by Kenton Varda
12 changed files
--- a/c++/src/capnp/compiler/capnp.c++
+++ b/c++/src/capnp/compiler/capnp.c++
@@ -116,7 +116,10 @@ public:

  kj::MainFunc getDecodeMain() {
    // Only parse the schemas we actually need for decoding.
-    compileMode = Compiler::LAZY;
+    compileEagerness = Compiler::NODE;
+
+    // Drop annotations since we don't need them.  This avoids importing files like c++.capnp.
+    annotationFlag = Compiler::DROP_ANNOTATIONS;

    kj::MainBuilder builder(context, VERSION_STRING,
          "Decodes one or more encoded Cap'n Proto messages as text.  The messages have root "
@@ -180,6 +183,11 @@ public:
  }

  kj::MainBuilder::Validity addSource(kj::StringPtr file) {
+    if (!compilerConstructed) {
+      compiler = compilerSpace.construct(annotationFlag);
+      compilerConstructed = true;
+    }
+
    if (addStandardImportPaths) {
      loader.addImportPath(kj::heapString("/usr/local/include"));
      loader.addImportPath(kj::heapString("/usr/include"));
@@ -195,7 +203,9 @@ public:
    }

    KJ_IF_MAYBE(module, loader.loadModule(file, file.slice(longestPrefix))) {
-      sourceIds.add(compiler.add(*module, Compiler::EAGER));
+      uint64_t id = compiler->add(*module);
+      compiler->eagerlyCompile(id, compileEagerness);
+      sourceIds.add(id);
    } else {
      return "no such file";
    }
@@ -243,6 +253,10 @@ public:
      return true;
    }

+    // We require one or more sources and if they failed to compile we quit above, so this should
+    // pass.  (This assertion also guarantees that `compiler` has been initialized.)
+    KJ_ASSERT(sourceIds.size() > 0, "Shouldn't have gotten here without sources.");
+
    if (outputs.size() == 0) {
      return "no outputs specified";
    }
@@ -250,7 +264,7 @@ public:
    MallocMessageBuilder message;
    auto request = message.initRoot<schema::CodeGeneratorRequest>();

-    auto schemas = compiler.getLoader().getAllLoaded();
+    auto schemas = compiler->getLoader().getAllLoaded();
    auto nodes = request.initNodes(schemas.size());
    for (size_t i = 0; i < schemas.size(); i++) {
      nodes.setWithCaveats(i, schemas[i].getProto());
@@ -351,14 +365,14 @@ public:
        type = nullptr;
      }

-      KJ_IF_MAYBE(childId, compiler.lookup(id, part)) {
+      KJ_IF_MAYBE(childId, compiler->lookup(id, part)) {
        id = *childId;
      } else {
        return "no such type";
      }
    }

-    Schema schema = compiler.getLoader().get(id);
+    Schema schema = compiler->getLoader().get(id);
    if (schema.getProto().getBody().which() != schema::Node::Body::STRUCT_NODE) {
      return "not a struct type";
    }
@@ -485,8 +499,17 @@ public:
 private:
  kj::ProcessContext& context;
  ModuleLoader loader;
-  Compiler compiler;
-  Compiler::Mode compileMode = Compiler::EAGER;
+  kj::SpaceFor<Compiler> compilerSpace;
+  bool compilerConstructed = false;
+  kj::Own<Compiler> compiler;
+
+  Compiler::AnnotationFlag annotationFlag = Compiler::COMPILE_ANNOTATIONS;
+
+  uint compileEagerness = Compiler::NODE | Compiler::CHILDREN |
+                          Compiler::DEPENDENCIES | Compiler::DEPENDENCY_PARENTS;
+  // By default we compile each explicitly listed schema in full, plus first-level dependencies
+  // of those schemas, plus the parent nodes of any dependencies.  This is what most code generators
+  // require to function.

  kj::Vector<kj::String> sourcePrefixes;
  bool addStandardImportPaths = true;

--- a/c++/src/capnp/compiler/capnpc-capnp.c++
+++ b/c++/src/capnp/compiler/capnpc-capnp.c++
@@ -213,6 +213,7 @@ SchemaLoader schemaLoader;

 Text::Reader getUnqualifiedName(Schema schema) {
  auto proto = schema.getProto();
+  KJ_CONTEXT(proto.getDisplayName());
  auto parent = schemaLoader.get(proto.getScopeId());
  for (auto nested: parent.getProto().getNestedNodes()) {
    if (nested.getId() == proto.getId()) {
@@ -258,7 +259,7 @@ TextBlob nodeName(Schema target, Schema scope) {
    auto part = targetParents.back();
    auto proto = part.getProto();
    if (proto.getScopeId() == 0) {
-      path = text(kj::mv(path), "import \"", proto.getDisplayName(), "\".");
+      path = text(kj::mv(path), "import \"/", proto.getDisplayName(), "\".");
    } else {
      path = text(kj::mv(path), getUnqualifiedName(part), ".");
    }

--- a/c++/src/capnp/compiler/compiler.c++
+++ b/c++/src/capnp/compiler/compiler.c++
--- a/c++/src/capnp/compiler/compiler.h
+++ b/c++/src/capnp/compiler/compiler.h
@@ -55,37 +55,109 @@ class Compiler {
  // Cross-links separate modules (schema files) and translates them into schema nodes.

 public:
-  Compiler();
+  enum AnnotationFlag {
+    COMPILE_ANNOTATIONS,
+    // Compile annotations normally.
+
+    DROP_ANNOTATIONS
+    // Do not compile any annotations, eagerly or lazily.  All "annotations" fields in the schema
+    // will be left empty.  This is useful to avoid parsing imports that are used only for
+    // annotations which you don't intend to use anyway.
+    //
+    // Unfortunately annotations cannot simply be compiled lazily because filling in the
+    // "annotations" field at the usage site requires knowing the annotation's type, which requires
+    // compiling the annotation, and the schema API has no particular way to detect when you
+    // try to access the "annotations" field in order to lazily compile the annotations at that
+    // point.
+  };
+
+  explicit Compiler(AnnotationFlag annotationFlag = COMPILE_ANNOTATIONS);
  ~Compiler() noexcept(false);
  KJ_DISALLOW_COPY(Compiler);

-  enum Mode {
-    EAGER,
-    // Completely traverse the module's parse tree and translate it into schema nodes before
-    // returning from add().
+  uint64_t add(const Module& module) const;
+  // Add a module to the Compiler, returning the module's file ID.  The ID can then be looked up in
+  // the `SchemaLoader` returned by `getLoader()`.  However, the SchemaLoader may behave as if the
+  // schema node doesn't exist if any compilation errors occur (reported via the module's
+  // ErrorReporter).  The module is parsed at the time `add()` is called, but not fully compiled --
+  // individual schema nodes are compiled lazily.  If you want to force eager compilation,
+  // see `eagerlyCompile()`, below.
+
+  kj::Maybe<uint64_t> lookup(uint64_t parent, kj::StringPtr childName) const;
+  // Given the type ID of a schema node, find the ID of a node nested within it.  Throws an
+  // exception if the parent ID is not recognized; returns null if the parent has no child of the
+  // given name.  Neither the parent nor the child schema node is actually compiled.

-    LAZY
-    // Only interpret the module's definitions when they are requested.  The main advantage of this
-    // mode is that imports will only be loaded if they are actually needed.
+  enum Eagerness: uint32_t {
+    // Flags specifying how eager to be about compilation.  These are intended to be bitwise OR'd.
+    // Used with the method `eagerlyCompile()`.
    //
-    // Since the parse tree is traversed lazily, any particular schema node only becomes findable
-    // by ID (using the SchemaLoader) once one of its neighbors in the graph has been examined.
-    // As long as you are only traversing the graph -- only looking up IDs that you obtained from
-    // other schema nodes from the same loader -- you shouldn't be able to tell the difference.
-    // But if you receive IDs from some external source and want to look those up, you'd better
-    // use EAGER mode.
+    // Schema declarations can be compiled upfront, or they can be compiled lazily as they are
+    // needed.  Usually, the difference is not observable, but it is not a perfect abstraction.
+    // The difference has the following effects:
+    // * `getLoader().getAllLoaded()` only returns the schema nodes which have been compiled so
+    //   far.
+    // * `getLoader().get()` (i.e. searching for a schema by ID) can only find schema nodes that
+    //   have either been compiled already, or which are referenced by schema nodes which have been
+    //   compiled already.  This means that if the ID you pass in came from another schema node
+    //   compiled with the same compiler, there should be no observable difference, but if you
+    //   have an ID from elsewhere which you _a priori_ expect is defined in a particular schema
+    //   file, you will need to compile that file eagerly before you look up the node by ID.
+    // * Errors are reported when they are encountered, so some errors will not be reported until
+    //   the node is actually compiled.
+    // * If an imported file is not needed, it will never even be read from disk.
+    //
+    // The last point is the main reason why you might want to prefer lazy compilation:  it allows
+    // you to use a schema file with missing imports, so long as those missing imports are not
+    // actually needed.
+    //
+    // For example, the flag combo:
+    //     EAGER_NODE | EAGER_CHILDREN | EAGER_DEPENDENCIES | EAGER_DEPENDENCY_PARENTS
+    // will compile the entire given module, plus all direct dependencies of anything in that
+    // module, plus all lexical ancestors of those dependencies.  This is what the Cap'n Proto
+    // compiler uses when building initial code generator requests.
+
+    ALL_RELATED_NODES = ~0u,
+    // Compile everything that is in any way related to the target node, including its entire
+    // containing file and everything transitively imported by it.
+
+    NODE = 1 << 0,
+    // Eagerly compile the requested node, but not necessarily any of its parents, children, or
+    // dependencies.
+
+    PARENTS = 1 << 1,
+    // Eagerly compile all lexical parents of the requested node.  Only meaningful in conjuction
+    // with NODE.
+
+    CHILDREN = 1 << 2,
+    // Eagerly compile all of the node's lexically nested nodes.  Only meaningful in conjuction
+    // with NODE.
+
+    DEPENDENCIES = NODE << 15,
+    // For all nodes compiled as a result of the above flags, also compile their direct
+    // dependencies.  E.g. if Foo is a struct which contains a field of type Bar, and Foo is
+    // compiled, then also compile Bar.  "Dependencies" are defined as field types, method
+    // parameter and return types, and annotation types.  Nested types and outer types are not
+    // considered dependencies.
+
+    DEPENDENCY_PARENTS = PARENTS * DEPENDENCIES,
+    DEPENDENCY_CHILDREN = CHILDREN * DEPENDENCIES,
+    DEPENDENCY_DEPENDENCIES = DEPENDENCIES * DEPENDENCIES,
+    // Like PARENTS, CHILDREN, and DEPENDENCIES, but applies relative to dependency nodes rather
+    // than the original requested node.  Note that DEPENDENCY_DEPENDENCIES causes all transitive
+    // dependencies of the requested node to be compiled.
+    //
+    // These flags are defined as multiples of the original flag and DEPENDENCIES so that we
+    // can form the flags to use when traversing a dependency by shifting bits.
  };

-  uint64_t add(const Module& module, Mode mode) const;
-  // Add a module to the Compiler, returning the module's file ID.  The ID can then be used to
-  // look up the schema in the SchemaLoader returned by `getLoader()`.  However, if there were any
-  // errors while compiling (reported via `module.addError()`), then the SchemaLoader may behave as
-  // if the node doesn't exist, or may return an invalid partial Schema.
-
-  kj::Maybe<uint64_t> lookup(uint64_t parent, kj::StringPtr childName) const;
-  // Given the type ID of a schema node, find the ID of a node nested within it, without actually
-  // building either node.  Throws an exception if the parent ID is not recognized; returns null
-  // if the parent has no child of the given name.
+  void eagerlyCompile(uint64_t id, uint eagerness) const;
+  // Force eager compilation of schema nodes related to the given ID.  `eagerness` specifies which
+  // related nodes should be compiled before returning.  It is a bitwise OR of the possible values
+  // of the `Eagerness` enum.
+  //
+  // If this returns and no errors have been reported, then it is guaranteed that the compiled
+  // nodes can be found in the SchemaLoader returned by `getLoader()`.

  const SchemaLoader& getLoader() const;
  // Get a SchemaLoader backed by this compiler.  Schema nodes will be lazily constructed as you

--- a/c++/src/capnp/compiler/lexer.c++
+++ b/c++/src/capnp/compiler/lexer.c++
@@ -269,7 +269,7 @@ Lexer::Lexer(Orphanage orphanageParam, const ErrorReporter& errorReporterParam)
  parsers.emptySpace = commentsAndWhitespace;
 }

-Lexer::~Lexer() {}
+Lexer::~Lexer() noexcept(false) {}

 }  // namespace compiler
 }  // namespace capnp
--- a/c++/src/capnp/compiler/module-loader.c++
+++ b/c++/src/capnp/compiler/module-loader.c++
@@ -332,7 +332,7 @@ kj::Maybe<const Module&> ModuleLoader::Impl::loadModuleFromSearchPath(

 ModuleLoader::ModuleLoader(const GlobalErrorReporter& errorReporter)
    : impl(kj::heap<Impl>(errorReporter)) {}
-ModuleLoader::~ModuleLoader() {}
+ModuleLoader::~ModuleLoader() noexcept(false) {}

 void ModuleLoader::addImportPath(kj::String path) { impl->addImportPath(kj::mv(path)); }


--- a/c++/src/capnp/compiler/node-translator.c++
+++ b/c++/src/capnp/compiler/node-translator.c++
@@ -525,9 +525,10 @@ private:

 NodeTranslator::NodeTranslator(
    const Resolver& resolver, const ErrorReporter& errorReporter,
-    const Declaration::Reader& decl, Orphan<schema::Node> wipNodeParam)
+    const Declaration::Reader& decl, Orphan<schema::Node> wipNodeParam,
+    bool compileAnnotations)
    : resolver(resolver), errorReporter(errorReporter),
-      wipNode(kj::mv(wipNodeParam)) {
+      compileAnnotations(compileAnnotations), wipNode(kj::mv(wipNodeParam)) {
  compileNode(decl, wipNode.get());
 }

@@ -1786,7 +1787,7 @@ kj::Maybe<ListSchema> NodeTranslator::makeListSchemaOf(schema::Type::Reader elem
 Orphan<List<schema::Annotation>> NodeTranslator::compileAnnotationApplications(
    List<Declaration::AnnotationApplication>::Reader annotations,
    kj::StringPtr targetsFlagName) {
-  if (annotations.size() == 0) {
+  if (annotations.size() == 0 || !compileAnnotations) {
    // Return null.
    return Orphan<List<schema::Annotation>>();
  }

--- a/c++/src/capnp/compiler/node-translator.h
+++ b/c++/src/capnp/compiler/node-translator.h
@@ -75,7 +75,8 @@ public:
  };

  NodeTranslator(const Resolver& resolver, const ErrorReporter& errorReporter,
-                 const Declaration::Reader& decl, Orphan<schema::Node> wipNode);
+                 const Declaration::Reader& decl, Orphan<schema::Node> wipNode,
+                 bool compileAnnotations);
  // Construct a NodeTranslator to translate the given declaration.  The wipNode starts out with
  // `displayName`, `id`, `scopeId`, and `nestedNodes` already initialized.  The `NodeTranslator`
  // fills in the rest.
@@ -96,6 +97,7 @@ public:
 private:
  const Resolver& resolver;
  const ErrorReporter& errorReporter;
+  bool compileAnnotations;

  Orphan<schema::Node> wipNode;
  // The work-in-progress schema node.

--- a/c++/src/capnp/compiler/parser.c++
+++ b/c++/src/capnp/compiler/parser.c++
@@ -875,7 +875,7 @@ CapnpParser::CapnpParser(Orphanage orphanageParam, const ErrorReporter& errorRep
      parsers.methodDecl, parsers.genericDecl));
 }

-CapnpParser::~CapnpParser() {}
+CapnpParser::~CapnpParser() noexcept(false) {}

 kj::Maybe<Orphan<Declaration>> CapnpParser::parseStatement(
    Statement::Reader statement, const DeclParser& parser) {

--- a/c++/src/kj/mutex-test.c++
+++ b/c++/src/kj/mutex-test.c++
@@ -33,12 +33,27 @@ namespace {

 inline void delay() { usleep(10000); }

+#if KJ_NO_EXCEPTIONS
+#undef EXPECT_ANY_THROW
+#define EXPECT_ANY_THROW(code) EXPECT_DEATH(code, ".")
+#define EXPECT_NONFATAL_FAILURE(code) code
+#else
+#define EXPECT_NONFATAL_FAILURE EXPECT_ANY_THROW
+#endif
+
+#ifdef NDEBUG
+#define EXPECT_DEBUG_ANY_THROW(EXP)
+#else
+#define EXPECT_DEBUG_ANY_THROW EXPECT_ANY_THROW
+#endif
+
 TEST(Mutex, MutexGuarded) {
  MutexGuarded<uint> value(123);

  {
    Locked<uint> lock = value.lockExclusive();
    EXPECT_EQ(123u, *lock);
+    EXPECT_EQ(123u, value.getAlreadyLockedExclusive());

    Thread thread([&]() {
      Locked<uint> threadLock = value.lockExclusive();
@@ -56,6 +71,8 @@ TEST(Mutex, MutexGuarded) {

  {
    auto rlock1 = value.lockShared();
+    EXPECT_EQ(789u, *rlock1);
+    EXPECT_EQ(789u, value.getAlreadyLockedShared());

    {
      auto rlock2 = value.lockShared();
@@ -98,6 +115,10 @@ TEST(Mutex, MutexGuarded) {
  }

  EXPECT_EQ(321u, *value.lockExclusive());
+
+  EXPECT_DEBUG_ANY_THROW(value.getAlreadyLockedExclusive());
+  EXPECT_DEBUG_ANY_THROW(value.getAlreadyLockedShared());
+  EXPECT_EQ(321u, value.getWithoutLock());
 }

 TEST(Mutex, Lazy) {

--- a/c++/src/kj/mutex.c++
+++ b/c++/src/kj/mutex.c++
@@ -123,6 +123,19 @@ void Mutex::unlock(Exclusivity exclusivity) {
  }
 }

+void Mutex::assertLockedByCaller(Exclusivity exclusivity) {
+  switch (exclusivity) {
+    case EXCLUSIVE:
+      KJ_ASSERT(futex & EXCLUSIVE_HELD,
+                "Tried to call getAlreadyLocked*() but lock is not held.");
+      break;
+    case SHARED:
+      KJ_ASSERT(futex & SHARED_COUNT_MASK,
+                "Tried to call getAlreadyLocked*() but lock is not held.");
+      break;
+  }
+}
+
 void Once::runOnce(Initializer& init) {
  uint state = UNINITIALIZED;
  if (__atomic_compare_exchange_n(&futex, &state, INITIALIZING, false,
@@ -203,6 +216,26 @@ void Mutex::unlock(Exclusivity exclusivity) {
  KJ_PTHREAD_CALL(pthread_rwlock_unlock(&mutex));
 }

+void Mutex::assertLockedByCaller(Exclusivity exclusivity) {
+  switch (exclusivity) {
+    case EXCLUSIVE:
+      // A read lock should fail if the mutex is already held for writing.
+      if (pthread_rwlock_tryrdlock(&mutex) == 0) {
+        pthread_rwlock_unlock(&mutex);
+        KJ_FAIL_ASSERT("Tried to call getAlreadyLocked*() but lock is not held.");
+      }
+      break;
+    case SHARED:
+      // A write lock should fail if the mutex is already held for reading or writing.  We don't
+      // have any way to prove that the lock is held only for reading.
+      if (pthread_rwlock_trywrlock(&mutex) == 0) {
+        pthread_rwlock_unlock(&mutex);
+        KJ_FAIL_ASSERT("Tried to call getAlreadyLocked*() but lock is not held.");
+      }
+      break;
+  }
+}
+
 Once::Once(): initialized(false) {
  KJ_PTHREAD_CALL(pthread_mutex_init(&mutex, nullptr));
 }

--- a/c++/src/kj/mutex.h
+++ b/c++/src/kj/mutex.h
@@ -59,6 +59,11 @@ public:
  void lock(Exclusivity exclusivity);
  void unlock(Exclusivity exclusivity);

+  void assertLockedByCaller(Exclusivity exclusivity);
+  // In debug mode, assert that the mutex is locked by the calling thread, or if that is
+  // non-trivial, assert that the mutex is locked (which should be good enough to catch problems
+  // in unit tests).  In non-debug builds, do nothing.
+
 private:
 #if KJ_USE_FUTEX
  uint futex;
@@ -207,6 +212,11 @@ public:
  // Escape hatch for cases where some external factor guarantees that it's safe to get the
  // value.  You should treat these like const_cast -- be highly suspicious of any use.

+  inline const T& getAlreadyLockedShared() const;
+  inline T& getAlreadyLockedShared();
+  inline T& getAlreadyLockedExclusive() const;
+  // Like `getWithoutLock()`, but asserts that the lock is already held by the calling thread.
+
 private:
  mutable _::Mutex mutex;
  mutable T value;
@@ -265,6 +275,28 @@ inline Locked<const T> MutexGuarded<T>::lockShared() const {
  return Locked<const T>(mutex, value);
 }

+template <typename T>
+inline const T& MutexGuarded<T>::getAlreadyLockedShared() const {
+#ifndef NDEBUG
+  mutex.assertLockedByCaller(_::Mutex::SHARED);
+#endif
+  return value;
+}
+template <typename T>
+inline T& MutexGuarded<T>::getAlreadyLockedShared() {
+#ifndef NDEBUG
+  mutex.assertLockedByCaller(_::Mutex::SHARED);
+#endif
+  return value;
+}
+template <typename T>
+inline T& MutexGuarded<T>::getAlreadyLockedExclusive() const {
+#ifndef NDEBUG
+  mutex.assertLockedByCaller(_::Mutex::EXCLUSIVE);
+#endif
+  return const_cast<T&>(value);
+}
+
 template <typename T>
 template <typename Func>
 class Lazy<T>::InitImpl: public _::Once::Initializer {