Commit 70a7bcb7 authored by Kenton Varda's avatar Kenton Varda

Schema rewrite WIP.

parent 427eed46
......@@ -35,6 +35,7 @@ INPUT=$1
case "$INPUT" in
*capnp/c++.capnp | \
*capnp/schema.capnp | \
*capnp/schema2.capnp | \
*capnp/compiler/lexer.capnp | \
*capnp/compiler/grammar.capnp )
exit 0
......
......@@ -216,10 +216,11 @@ public:
}
}
KJ_IF_MAYBE(module, loader.loadModule(file, file.slice(longestPrefix))) {
kj::StringPtr canonicalName = file.slice(longestPrefix);
KJ_IF_MAYBE(module, loader.loadModule(file, canonicalName)) {
uint64_t id = compiler->add(*module);
compiler->eagerlyCompile(id, compileEagerness);
sourceIds.add(id);
sourceFiles.add(SourceFile { id, canonicalName, &*module });
} else {
return "no such file";
}
......@@ -279,14 +280,14 @@ public:
// We require one or more sources and if they failed to compile we quit above, so this should
// pass. (This assertion also guarantees that `compiler` has been initialized.)
KJ_ASSERT(sourceIds.size() > 0, "Shouldn't have gotten here without sources.");
KJ_ASSERT(sourceFiles.size() > 0, "Shouldn't have gotten here without sources.");
if (outputs.size() == 0) {
return "no outputs specified";
}
MallocMessageBuilder message;
auto request = message.initRoot<schema::CodeGeneratorRequest>();
auto request = message.initRoot<schema2::CodeGeneratorRequest>();
auto schemas = compiler->getLoader().getAllLoaded();
auto nodes = request.initNodes(schemas.size());
......@@ -294,9 +295,13 @@ public:
nodes.setWithCaveats(i, schemas[i].getProto());
}
auto requestedFiles = request.initRequestedFiles(sourceIds.size());
for (size_t i = 0; i < sourceIds.size(); i++) {
requestedFiles.set(i, sourceIds[i]);
auto requestedFiles = request.initRequestedFiles(sourceFiles.size());
for (size_t i = 0; i < sourceFiles.size(); i++) {
auto requestedFile = requestedFiles[i];
requestedFile.setId(sourceFiles[i].id);
requestedFile.setFilename(sourceFiles[i].name);
requestedFile.adoptImports(compiler->getFileImportTable(
*sourceFiles[i].module, Orphanage::getForMessageContaining(requestedFile)));
}
for (auto& output: outputs) {
......@@ -391,8 +396,8 @@ public:
}
kj::MainBuilder::Validity setRootType(kj::StringPtr type) {
KJ_ASSERT(sourceIds.size() == 1);
uint64_t id = sourceIds[0];
KJ_ASSERT(sourceFiles.size() == 1);
uint64_t id = sourceFiles[0].id;
while (type.size() > 0) {
kj::String temp;
......@@ -414,7 +419,7 @@ public:
}
Schema schema = compiler->getLoader().get(id);
if (schema.getProto().getBody().which() != schema::Node::Body::STRUCT_NODE) {
if (schema.getProto().which() != schema2::Node::STRUCT) {
return "not a struct type";
}
rootType = schema.asStruct();
......@@ -557,7 +562,13 @@ private:
StructSchema rootType;
// For the "decode" command.
kj::Vector<uint64_t> sourceIds;
struct SourceFile {
uint64_t id;
kj::StringPtr name;
const Module* module;
};
kj::Vector<SourceFile> sourceFiles;
struct OutputDirective {
kj::ArrayPtr<const char> name;
......
This diff is collapsed.
......@@ -25,7 +25,7 @@
#define CAPNP_COMPILER_COMPILER_H_
#include <capnp/compiler/grammar.capnp.h>
#include <capnp/schema.capnp.h>
#include <capnp/schema2.capnp.h>
#include <capnp/schema-loader.h>
#include "error-reporter.h"
......@@ -84,6 +84,10 @@ public:
// exception if the parent ID is not recognized; returns null if the parent has no child of the
// given name. Neither the parent nor the child schema node is actually compiled.
Orphan<List<schema2::CodeGeneratorRequest::RequestedFile::Import>>
getFileImportTable(const Module& module, Orphanage orphanage) const;
// Build the import table for the CodeGeneratorRequest for the given module.
enum Eagerness: uint32_t {
// Flags specifying how eager to be about compilation. These are intended to be bitwise OR'd.
// Used with the method `eagerlyCompile()`.
......
This diff is collapsed.
......@@ -26,8 +26,7 @@
#include <capnp/orphan.h>
#include <capnp/compiler/grammar.capnp.h>
#include <capnp/schema.capnp.h>
#include <capnp/schema-loader.h>
#include <capnp/schema2.capnp.h>
#include <capnp/dynamic.h>
#include <kj/vector.h>
#include "error-reporter.h"
......@@ -63,7 +62,7 @@ public:
// traversing other schemas. Returns null if the ID is recognized, but the corresponding
// schema node failed to be built for reasons that were already reported.
virtual kj::Maybe<schema::Node::Reader> resolveFinalSchema(uint64_t id) const = 0;
virtual kj::Maybe<schema2::Node::Reader> resolveFinalSchema(uint64_t id) const = 0;
// Get the final schema for the given ID. A bootstrap schema is not acceptable. A raw
// node reader is returned rather than a Schema object because using a Schema object built
// by the final schema loader could trigger lazy initialization of dependencies which could
......@@ -78,13 +77,23 @@ public:
};
NodeTranslator(const Resolver& resolver, const ErrorReporter& errorReporter,
const Declaration::Reader& decl, Orphan<schema::Node> wipNode,
const Declaration::Reader& decl, Orphan<schema2::Node> wipNode,
bool compileAnnotations);
// Construct a NodeTranslator to translate the given declaration. The wipNode starts out with
// `displayName`, `id`, `scopeId`, and `nestedNodes` already initialized. The `NodeTranslator`
// fills in the rest.
schema::Node::Reader getBootstrapNode() { return wipNode.getReader(); }
struct NodeSet {
schema2::Node::Reader node;
// The main node.
kj::Array<schema2::Node::Reader> auxNodes;
// Auxiliary nodes that were produced when translating this node and should be loaded along
// with it. In particular, structs that contain groups (or named unions) spawn extra nodes
// representing those.
};
NodeSet getBootstrapNode();
// Get an incomplete version of the node in which pointer-typed value expressions have not yet
// been translated. Instead, for all `schema.Value` objects representing pointer-type values,
// the value is set to an appropriate "empty" value. This version of the schema can be used to
......@@ -93,7 +102,7 @@ public:
// If the final node has already been built, this will actually return the final node (in fact,
// it's the same node object).
schema::Node::Reader finish();
NodeSet finish();
// Finish translating the node (including filling in all the pieces that are missing from the
// bootstrap node) and return it.
......@@ -102,13 +111,17 @@ private:
const ErrorReporter& errorReporter;
bool compileAnnotations;
Orphan<schema::Node> wipNode;
Orphan<schema2::Node> wipNode;
// The work-in-progress schema node.
kj::Vector<Orphan<schema2::Node>> groups;
// If this is a struct node and it contains groups, these are the nodes for those groups, which
// must be loaded together with the top-level node.
struct UnfinishedValue {
ValueExpression::Reader source;
schema::Type::Reader type;
schema::Value::Builder target;
schema2::Type::Reader type;
schema2::Value::Builder target;
};
kj::Vector<UnfinishedValue> unfinishedValues;
// List of values in `wipNode` which have not yet been interpreted, because they are structs
......@@ -116,15 +129,14 @@ private:
// of the dynamic API). Once bootstrap schemas have been built, they can be used to interpret
// these values.
void compileNode(Declaration::Reader decl, schema::Node::Builder builder);
void compileNode(Declaration::Reader decl, schema2::Node::Builder builder);
void disallowNested(List<Declaration>::Reader nestedDecls);
// Complain if the nested decl list is non-empty.
void compileFile(Declaration::Reader decl, schema::FileNode::Builder builder);
void compileConst(Declaration::Const::Reader decl, schema::ConstNode::Builder builder);
void compileConst(Declaration::Const::Reader decl, schema2::Node::Const::Builder builder);
void compileAnnotation(Declaration::Annotation::Reader decl,
schema::AnnotationNode::Builder builder);
schema2::Node::Annotation::Builder builder);
class DuplicateNameDetector;
class DuplicateOrdinalDetector;
......@@ -132,28 +144,28 @@ private:
class StructTranslator;
void compileEnum(Declaration::Enum::Reader decl, List<Declaration>::Reader members,
schema::EnumNode::Builder builder);
schema2::Node::Builder builder);
void compileStruct(Declaration::Struct::Reader decl, List<Declaration>::Reader members,
schema::StructNode::Builder builder);
schema2::Node::Builder builder);
void compileInterface(Declaration::Interface::Reader decl, List<Declaration>::Reader members,
schema::InterfaceNode::Builder builder);
schema2::Node::Builder builder);
// The `members` arrays contain only members with ordinal numbers, in code order. Other members
// are handled elsewhere.
bool compileType(TypeExpression::Reader source, schema::Type::Builder target);
bool compileType(TypeExpression::Reader source, schema2::Type::Builder target);
// Returns false if there was a problem, in which case value expressions of this type should
// not be parsed.
void compileDefaultDefaultValue(schema::Type::Reader type, schema::Value::Builder target);
void compileDefaultDefaultValue(schema2::Type::Reader type, schema2::Value::Builder target);
// Initializes `target` to contain the "default default" value for `type`.
void compileBootstrapValue(ValueExpression::Reader source, schema::Type::Reader type,
schema::Value::Builder target);
void compileBootstrapValue(ValueExpression::Reader source, schema2::Type::Reader type,
schema2::Value::Builder target);
// Calls compileValue() if this value should be interpreted at bootstrap time. Otheriwse,
// adds the value to `unfinishedValues` for later evaluation.
void compileValue(ValueExpression::Reader source, schema::Type::Reader type,
schema::Value::Builder target, bool isBootstrap);
void compileValue(ValueExpression::Reader source, schema2::Type::Reader type,
schema2::Value::Builder target, bool isBootstrap);
// Interprets the value expression and initializes `target` with the result.
class DynamicSlot;
......@@ -165,22 +177,16 @@ private:
void compileValueInner(ValueExpression::Reader src, DynamicSlot& dst, bool isBootstrap);
// Helper for compileValue().
void copyValue(schema::Value::Reader src, schema::Type::Reader srcType,
schema::Value::Builder dst, schema::Type::Reader dstType,
ValueExpression::Reader errorLocation);
// Copy a value from one schema to another, possibly coercing the type if compatible, or
// reporting an error otherwise.
kj::Maybe<DynamicValue::Reader> readConstant(DeclName::Reader name, bool isBootstrap,
ValueExpression::Reader errorLocation);
// Get the value of the given constant. May return null if some error occurs, which will already
// have been reported.
kj::Maybe<ListSchema> makeListSchemaOf(schema::Type::Reader elementType);
kj::Maybe<ListSchema> makeListSchemaOf(schema2::Type::Reader elementType);
// Construct a list schema representing a list of elements of the given type. May return null if
// some error occurs, which will already have been reported.
Orphan<List<schema::Annotation>> compileAnnotationApplications(
Orphan<List<schema2::Annotation>> compileAnnotationApplications(
List<Declaration::AnnotationApplication>::Reader annotations,
kj::StringPtr targetsFlagName);
};
......
......@@ -22,6 +22,7 @@
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include "parser.h"
#include "md5.h"
#include <capnp/dynamic.h>
#include <kj/debug.h>
#include <unistd.h>
......@@ -46,6 +47,29 @@ uint64_t generateRandomId() {
return result | (1ull << 63);
}
uint64_t generateChildId(uint64_t parentId, kj::StringPtr childName) {
// Compute ID by MD5 hashing the concatenation of the parent ID and the declaration name, and
// then taking the first 8 bytes.
kj::byte parentIdBytes[sizeof(uint64_t)];
for (uint i = 0; i < sizeof(uint64_t); i++) {
parentIdBytes[i] = (parentId >> (i * 8)) & 0xff;
}
Md5 md5;
md5.update(kj::arrayPtr(parentIdBytes, KJ_ARRAY_SIZE(parentIdBytes)));
md5.update(childName);
kj::ArrayPtr<const kj::byte> resultBytes = md5.finish();
uint64_t result = 0;
for (uint i = 0; i < sizeof(uint64_t); i++) {
result = (result << 8) | resultBytes[i];
}
return result | (1ull << 63);
}
void parseFile(List<Statement>::Reader statements, ParsedFile::Builder result,
const ErrorReporter& errorReporter) {
CapnpParser parser(Orphanage::getForMessageContaining(result), errorReporter);
......
......@@ -43,6 +43,11 @@ void parseFile(List<Statement>::Reader statements, ParsedFile::Builder result,
uint64_t generateRandomId();
// Generate a new random unique ID. This lives here mostly for lack of a better location.
uint64_t generateChildId(uint64_t parentId, kj::StringPtr childName);
// Generate the ID for a child node given its parent ID and name.
//
// TODO(cleanup): Move generateRandomId() and generateChildId() somewhere more sensible.
class CapnpParser {
// Advanced parser interface. This interface exposes the inner parsers so that you can embed
// them into your own parsers.
......
This diff is collapsed.
This diff is collapsed.
......@@ -164,14 +164,11 @@ struct RawSchema {
// TODO(someday): Make this a hashtable.
struct MemberInfo {
uint16_t scopeOrdinal;
// One plus the ordinal number of the parent scope of this member when looking up by name.
// Zero represents the top-level scope.
uint16_t index;
// Index of the member within its scope. If the index is greater than the number of elements
// in the scope, then the member is in an unnamed union, and its index within that union is
// `index` - (number of members in the outer scope).
uint16_t value;
inline operator uint16_t() const { return value; }
constexpr MemberInfo(uint16_t value): value(value) {}
constexpr MemberInfo(uint16_t value, uint16_t dummy): value(value) {}
};
const MemberInfo* membersByName;
......@@ -182,6 +179,10 @@ struct RawSchema {
uint32_t memberCount;
// Sizes of above tables.
const uint16_t* membersByDiscriminant;
// List of all member indexes ordered by discriminant value. Those which don't have a
// discriminant value are listed at the end, in order by ordinal.
const RawSchema* canCastTo;
// Points to the RawSchema of a compiled-in type to which it is safe to cast any DynamicValue
// with this schema. This is null for all compiled-in types; it is only set by SchemaLoader on
......
......@@ -66,7 +66,7 @@ public:
kj::Maybe<Schema> tryGet(uint64_t id) const;
// Like get() but doesn't throw.
Schema load(const schema::Node::Reader& reader);
Schema load(const schema2::Node::Reader& reader);
// Loads the given schema node. Validates the node and throws an exception if invalid. This
// makes a copy of the schema, so the object passed in can be destroyed after this returns.
//
......@@ -101,7 +101,7 @@ public:
// Also note that unknown types are not considered invalid. Instead, the dynamic API returns
// a DynamicValue with type UNKNOWN for these.
Schema loadOnce(const schema::Node::Reader& reader) const;
Schema loadOnce(const schema2::Node::Reader& reader) const;
// Like `load()` but does nothing if a schema with the same ID is already loaded. In contrast,
// `load()` would attempt to compare the schemas and take the newer one. `loadOnce()` is safe
// to call even while concurrently using schemas from this loader. It should be considered an
......
This diff is collapsed.
This diff is collapsed.
......@@ -75,7 +75,12 @@ struct Node {
dataSectionWordSize @7 :UInt16;
pointerSectionSize @8 :UInt16;
isGroup @9 :Bool;
preferredListEncoding @9 :ElementSize;
# The preferred element size to use when encoding a list of this struct. If this is anything
# other than `inlineComposite` then the struct is one word or less in size and is a candidate
# for list packing optimization.
isGroup @10 :Bool;
# If true, then this "struct" node is actually not an independent node, but merely represents
# some named union or group within a particular parent struct. This node's scopeId refers
# to the parent struct, which may itself be a union/group in yet another struct.
......@@ -87,24 +92,21 @@ struct Node {
# Note that a named union is considered a special kind of group -- in fact, a named union
# is exactly equivalent to a group that contains nothing but an unnamed union.
isUnion @10 :Bool;
# Whether or not this struct (or group) contains some fields that overlap. If so, then a
# discriminant must be present to indicate which field among the overlapping ones is
# currently active.
discriminantCount @11 :UInt16;
# Number of fields in this struct which are members of an anonymous union, and thus may
# overlap. If this is non-zero, then a 16-bit discriminant is present indicating which
# of the overlapping fields is active. This can never be 1 -- if it is non-zero, it must be
# two or more.
#
# Note that an unnamed union causes `isUnion` to be true on its _parent_. E.g. for a struct
# containing an unnamed union, the struct node itself will have `isUnion` = true, and there
# will be no "group" nodes. On the other hand, a named union is equivalent to a group
# containing an unnamed union, and thus becomes a whole separate schema node.
# Note that the fields of an unnamed union are considered fields of the scope containing the
# union -- an unnamed union is not its own group. So, a top-level struct may contain a
# non-zero discriminant count. Named unions, on the other hand, are equivalent to groups
# containing unnamed unions. So, a named union has its own independent schema node, with
# `isGroup` = true.
discriminantOffset @11 :UInt32;
discriminantOffset @12 :UInt32;
# If `isUnion` is true, this is the offset of the union discriminant, in multiples of 16 bits.
preferredListEncoding @12 :ElementSize;
# The preferred element size to use when encoding a list of this struct. If this is anything
# other than `inlineComposite` then the struct is one word or less in size and is a candidate
# for list packing optimization.
fields @13 :List(Field);
# Fields defined within this scope (either the struct's top-level fields, or the fields of
# a particular group; see `isGroup`).
......@@ -183,11 +185,14 @@ struct Field {
# A group. This is the ID of the group's node.
}
ordinal @8 :UInt16;
# The original ordinal number given to the field. You probably should NOT use this; if you need
# a numeric identifier for a field, use its position within the field array for its scope.
# The ordinal is given here mainly just so that the original schema text can be reproduced given
# the compiled version -- i.e. so that `capnp compile -ocapnp` can do its job.
ordinal :union {
implicit @8 :Void;
explicit @9 :UInt16;
# The original ordinal number given to the field. You probably should NOT use this; if you need
# a numeric identifier for a field, use its position within the field array for its scope.
# The ordinal is given here mainly just so that the original schema text can be reproduced given
# the compiled version -- i.e. so that `capnp compile -ocapnp` can do its job.
}
}
struct Enumerant {
......@@ -232,63 +237,65 @@ struct Method {
struct Type {
# Represents a type expression.
body @0 union {
voidType @1 :Void;
boolType @2 :Void;
int8Type @3 :Void;
int16Type @4 :Void;
int32Type @5 :Void;
int64Type @6 :Void;
uint8Type @7 :Void;
uint16Type @8 :Void;
uint32Type @9 :Void;
uint64Type @10 :Void;
float32Type @11 :Void;
float64Type @12 :Void;
textType @13 :Void;
dataType @14 :Void;
listType @15 :Type; # Value = the element type.
enumType @16 :Id;
structType @17 :Id;
interfaceType @18 :Id;
objectType @19 :Void;
union {
# The ordinals intentionally match those of Value.
void @0 :Void;
bool @1 :Void;
int8 @2 :Void;
int16 @3 :Void;
int32 @4 :Void;
int64 @5 :Void;
uint8 @6 :Void;
uint16 @7 :Void;
uint32 @8 :Void;
uint64 @9 :Void;
float32 @10 :Void;
float64 @11 :Void;
text @12 :Void;
data @13 :Void;
list @14 :Type; # Value = the element type.
enum @15 :Id;
struct @16 :Id;
interface @17 :Id;
object @18 :Void;
}
}
struct Value {
# Represents a value, e.g. a field default value, constant value, or annotation value.
body @0 union {
# Note ordinals 1 and 10 are intentionally swapped to improve union layout.
# TODO: Make it 2 and 10 that are swapped instead so that voidValue is still default?
voidValue @10 :Void;
boolValue @2 :Bool;
int8Value @3 :Int8;
int16Value @4 :Int16;
int32Value @5 :Int32;
int64Value @6 :Int64;
uint8Value @7 :UInt8;
uint16Value @8 :UInt16;
uint32Value @9 :UInt32;
uint64Value @1 :UInt64;
float32Value @11 :Float32;
float64Value @12 :Float64;
textValue @13 :Text;
dataValue @14 :Data;
listValue @15 :Object;
enumValue @16 :UInt16;
structValue @17 :Object;
interfaceValue @18 :Void;
union {
# The ordinals intentionally match those of Type.
void @0 :Void;
bool @1 :Bool;
int8 @2 :Int8;
int16 @3 :Int16;
int32 @4 :Int32;
int64 @5 :Int64;
uint8 @6 :UInt8;
uint16 @7 :UInt16;
uint32 @8 :UInt32;
uint64 @9 :UInt64;
float32 @10 :Float32;
float64 @11 :Float64;
text @12 :Text;
data @13 :Data;
list @14 :Object;
enum @15 :UInt16;
struct @16 :Object;
interface @17 :Void;
# The only interface value that can be represented statically is "null", whose methods always
# throw exceptions.
objectValue @19 :Object;
object @18 :Object;
}
}
......
This diff is collapsed.
This diff is collapsed.
......@@ -186,7 +186,7 @@ static kj::StringTree print(const DynamicValue::Reader& value,
return kj::strTree(enumerant->getProto().getName());
} else {
// Unknown enum value; output raw number.
return kj::strTree(enumValue.getRaw());
return kj::strTree('(', enumValue.getRaw(), ')');
}
break;
}
......
......@@ -148,14 +148,14 @@ namespace kj {
KJ_UNIQUE_NAME(_kjContext)(KJ_UNIQUE_NAME(_kjContextFunc))
#define _kJ_NONNULL(nature, value, ...) \
({ \
(*({ \
auto result = ::kj::_::readMaybe(value); \
if (KJ_UNLIKELY(!result)) { \
::kj::_::Debug::Fault(__FILE__, __LINE__, ::kj::Exception::Nature::nature, 0, \
#value " != nullptr", #__VA_ARGS__, ##__VA_ARGS__).fatal(); \
} \
*result; \
})
result; \
}))
#define KJ_ASSERT_NONNULL(value, ...) _kJ_NONNULL(LOCAL_BUG, value, ##__VA_ARGS__)
#define KJ_REQUIRE_NONNULL(value, ...) _kJ_NONNULL(PRECONDITION, value, ##__VA_ARGS__)
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment