Commit 0757e6ef authored by Kenton Varda's avatar Kenton Varda

Implement adoptWithCaveats() for struct lists, and some parser tweaks.

parent ce4162e8
This diff is collapsed.
......@@ -42,7 +42,7 @@ bool lex(kj::ArrayPtr<const char> input, LexedStatements::Builder result) {
KJ_IF_MAYBE(output, parseOutput) {
auto l = result.initStatements(output->size());
for (uint i = 0; i < output->size(); i++) {
l[i].adoptStatement(kj::mv((*output)[i]));
l.adoptWithCaveats(i, kj::mv((*output)[i]));
}
return true;
} else {
......@@ -64,7 +64,7 @@ bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result) {
KJ_IF_MAYBE(output, parseOutput) {
auto l = result.initTokens(output->size());
for (uint i = 0; i < output->size(); i++) {
l[i].adoptToken(kj::mv((*output)[i]));
l.adoptWithCaveats(i, kj::mv((*output)[i]));
}
return true;
} else {
......@@ -85,13 +85,13 @@ Token::Body::Builder initTok(Orphan<Token>& t, const Location& loc) {
return tb.getBody();
}
void buildTokenSequenceList(List<List<TokenPointer>>::Builder builder,
void buildTokenSequenceList(List<List<Token>>::Builder builder,
kj::Array<kj::Array<Orphan<Token>>>&& items) {
for (uint i = 0; i < items.size(); i++) {
auto& item = items[i];
auto itemBuilder = builder.init(i, item.size());
for (uint j = 0; j < item.size(); j++) {
itemBuilder[j].adoptToken(kj::mv(item[j]));
itemBuilder.adoptWithCaveats(j, kj::mv(item[j]));
}
}
}
......@@ -101,16 +101,14 @@ void attachDocComment(Statement::Builder statement, kj::Array<kj::String>&& comm
for (auto& line: comment) {
size += line.size() + 1; // include newline
}
if (size > 0) {
Text::Builder builder = statement.initDocComment(size);
char* pos = builder.begin();
for (auto& line: comment) {
memcpy(pos, line.begin(), line.size());
pos += line.size();
*pos++ = '\n';
}
KJ_ASSERT(pos == builder.end());
Text::Builder builder = statement.initDocComment(size);
char* pos = builder.begin();
for (auto& line: comment) {
memcpy(pos, line.begin(), line.size());
pos += line.size();
*pos++ = '\n';
}
KJ_ASSERT(pos == builder.end());
}
constexpr auto discardComment =
......@@ -131,16 +129,16 @@ constexpr auto newline = p::oneOf(
p::exactChar<'\n'>(),
sequence(p::exactChar<'\r'>(), p::discard(p::optional(p::exactChar<'\n'>()))));
constexpr auto docComment = sequence(
constexpr auto docComment = p::optional(p::sequence(
discardLineWhitespace,
p::discard(p::optional(newline)),
p::many(p::sequence(discardLineWhitespace, saveComment)));
p::oneOrMore(p::sequence(discardLineWhitespace, saveComment))));
// Parses a set of comment lines preceded by at most one newline and with no intervening blank
// lines.
} // namespace
Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {
Lexer::Lexer(Orphanage orphanageParam): orphanage(orphanageParam) {
// Note that because passing an lvalue to a parser constructor uses it by-referencee, it's safe
// for us to use parsers.tokenSequence even though we haven't yet constructed it.
......@@ -148,7 +146,7 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {
auto& commaDelimitedList = arena.copy(p::transform(
p::sequence(tokenSequence, p::many(p::sequence(p::exactChar<','>(), tokenSequence))),
[&](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest)
[this](kj::Array<Orphan<Token>>&& first, kj::Array<kj::Array<Orphan<Token>>>&& rest)
-> kj::Array<kj::Array<Orphan<Token>>> {
if (first == nullptr && rest == nullptr) {
// Completely empty list.
......@@ -165,39 +163,39 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {
auto& token = arena.copy(p::oneOf(
p::transformWithLocation(p::identifier,
[&](Location loc, kj::String name) -> Orphan<Token> {
[this](Location loc, kj::String name) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setIdentifier(name);
return t;
}),
p::transformWithLocation(p::doubleQuotedString,
[&](Location loc, kj::String text) -> Orphan<Token> {
[this](Location loc, kj::String text) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setStringLiteral(text);
return t;
}),
p::transformWithLocation(p::integer,
[&](Location loc, uint64_t i) -> Orphan<Token> {
[this](Location loc, uint64_t i) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setIntegerLiteral(i);
return t;
}),
p::transformWithLocation(p::number,
[&](Location loc, double x) -> Orphan<Token> {
[this](Location loc, double x) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setFloatLiteral(x);
return t;
}),
p::transformWithLocation(
p::charsToString(p::oneOrMore(p::anyOfChars("!$%&*+-./:<=>?@^|~"))),
[&](Location loc, kj::String text) -> Orphan<Token> {
[this](Location loc, kj::String text) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
initTok(t, loc).setOperator(text);
return t;
}),
p::transformWithLocation(
sequence(p::exactChar<'('>(), commaDelimitedList, p::exactChar<')'>()),
[&](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
[this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
buildTokenSequenceList(
initTok(t, loc).initParenthesizedList(items.size()), kj::mv(items));
......@@ -205,7 +203,7 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {
}),
p::transformWithLocation(
sequence(p::exactChar<'['>(), commaDelimitedList, p::exactChar<']'>()),
[&](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
[this](Location loc, kj::Array<kj::Array<Orphan<Token>>>&& items) -> Orphan<Token> {
auto t = orphanage.newOrphan<Token>();
buildTokenSequenceList(
initTok(t, loc).initBracketedList(items.size()), kj::mv(items));
......@@ -219,34 +217,46 @@ Lexer::Lexer(Orphanage orphanage): orphanage(orphanage) {
auto& statementEnd = arena.copy(p::oneOf(
transform(p::sequence(p::exactChar<';'>(), docComment),
[&](kj::Array<kj::String>&& comment) -> Orphan<Statement> {
[this](kj::Maybe<kj::Array<kj::String>>&& comment) -> Orphan<Statement> {
auto result = orphanage.newOrphan<Statement>();
auto builder = result.get();
attachDocComment(builder, kj::mv(comment));
KJ_IF_MAYBE(c, comment) {
attachDocComment(builder, kj::mv(*c));
}
builder.getBlock().setNone();
return result;
}),
transform(
p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>()),
[&](kj::Array<kj::String>&& comment, kj::Array<Orphan<Statement>>&& statements)
p::sequence(p::exactChar<'{'>(), docComment, statementSequence, p::exactChar<'}'>(),
docComment),
[this](kj::Maybe<kj::Array<kj::String>>&& comment,
kj::Array<Orphan<Statement>>&& statements,
kj::Maybe<kj::Array<kj::String>>&& lateComment)
-> Orphan<Statement> {
auto result = orphanage.newOrphan<Statement>();
auto builder = result.get();
attachDocComment(builder, kj::mv(comment));
KJ_IF_MAYBE(c, comment) {
attachDocComment(builder, kj::mv(*c));
} else KJ_IF_MAYBE(c, lateComment) {
attachDocComment(builder, kj::mv(*c));
}
auto list = builder.getBlock().initStatements(statements.size());
for (uint i = 0; i < statements.size(); i++) {
list[i].adoptStatement(kj::mv(statements[i]));
list.adoptWithCaveats(i, kj::mv(statements[i]));
}
return result;
})
));
auto& statement = arena.copy(p::transform(p::sequence(tokenSequence, statementEnd),
[&](kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) {
auto tokensBuilder = statement.get().initTokens(tokens.size());
auto& statement = arena.copy(p::transformWithLocation(p::sequence(tokenSequence, statementEnd),
[this](Location loc, kj::Array<Orphan<Token>>&& tokens, Orphan<Statement>&& statement) {
auto builder = statement.get();
auto tokensBuilder = builder.initTokens(tokens.size());
for (uint i = 0; i < tokens.size(); i++) {
tokensBuilder[i].adoptToken(kj::mv(tokens[i]));
tokensBuilder.adoptWithCaveats(i, kj::mv(tokens[i]));
}
builder.setStartByte(loc.begin());
builder.setEndByte(loc.end());
return kj::mv(statement);
}));
......
# Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
# All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions are met:
#
# 1. Redistributions of source code must retain the above copyright notice, this
# list of conditions and the following disclaimer.
# 2. Redistributions in binary form must reproduce the above copyright notice,
# this list of conditions and the following disclaimer in the documentation
# and/or other materials provided with the distribution.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
# ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
# WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
# DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
# ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
# (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
# LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
# ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
# SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
@0xa73956d2621fc3ee;
using Cxx = import "/capnp/c++.capnp";
......@@ -11,48 +34,35 @@ struct Token {
integerLiteral @3 :UInt64;
floatLiteral @4 :Float64;
operator @5 :Text;
parenthesizedList @6 :List(List(TokenPointer));
bracketedList @7 :List(List(TokenPointer));
parenthesizedList @6 :List(List(Token));
bracketedList @7 :List(List(Token));
}
startByte @8 :UInt32;
endByte @9 :UInt32;
}
struct TokenPointer {
# Hack to deal with the fact that struct lists cannot adopt elements.
#
# TODO(cleanup): Find a better approach.
token @0 :Token;
}
struct Statement {
tokens @0 :List(TokenPointer);
tokens @0 :List(Token);
block @1 union {
none @2 :Void;
statements @3 :List(StatementPointer);
statements @3 :List(Statement);
}
docComment @4 :Text;
}
struct StatementPointer {
# Hack to deal with the fact that struct lists cannot adopt elements.
#
# TODO(cleanup): Find a better approach.
statement @0 :Statement;
startByte @5 :UInt32;
endByte @6 :UInt32;
}
struct LexedTokens {
# Lexer output when asked to parse tokens that don't form statements.
tokens @0 :List(TokenPointer);
tokens @0 :List(Token);
}
struct LexedStatements {
# Lexer output when asked to parse statements.
statements @0 :List(StatementPointer);
statements @0 :List(Statement);
}
......@@ -698,6 +698,10 @@ struct WireHelpers {
// mistakenly thinks the source location still owns the object. transferPointer() doesn't do
// this zeroing itself because many callers transfer several pointers in a loop then zero out
// the whole section.
KJ_DASSERT(dst->isNull());
// We expect the caller to ensure the target is already null so won't leak.
if (src->isNull()) {
memset(dst, 0, sizeof(WirePointer));
} else if (src->kind() == WirePointer::FAR) {
......@@ -2034,6 +2038,45 @@ OrphanBuilder StructBuilder::disown(WirePointerCount ptrIndex) {
return WireHelpers::disown(segment, pointers + ptrIndex);
}
void StructBuilder::transferContentFrom(StructBuilder other) {
// Determine the amount of data the builders have in common.
BitCount sharedDataSize = kj::min(dataSize, other.dataSize);
if (dataSize > sharedDataSize) {
// Since the target is larger than the source, make sure to zero out the extra bits that the
// source doesn't have.
if (dataSize == 1 * BITS) {
setDataField<bool>(0 * ELEMENTS, false);
} else {
byte* unshared = reinterpret_cast<byte*>(data) + sharedDataSize / BITS_PER_BYTE / BYTES;
memset(unshared, 0, (dataSize - sharedDataSize) / BITS_PER_BYTE / BYTES);
}
}
// Copy over the shared part.
if (sharedDataSize == 1 * BITS) {
setDataField<bool>(0 * ELEMENTS, other.getDataField<bool>(0 * ELEMENTS));
} else {
memcpy(data, other.data, sharedDataSize / BITS_PER_BYTE / BYTES);
}
// Zero out all pointers in the target.
for (uint i = 0; i < pointerCount / POINTERS; i++) {
WireHelpers::zeroObject(segment, pointers + i);
}
// Transfer the pointers.
WirePointerCount sharedPointerCount = kj::min(pointerCount, other.pointerCount);
for (uint i = 0; i < sharedPointerCount / POINTERS; i++) {
WireHelpers::transferPointer(segment, pointers + i, other.segment, other.pointers + i);
}
// Zero out the pointers that were transferred in the source because it no longer has ownership.
// If the source had any extra pointers that the destination didn't have space for, we
// intentionally leave them be, so that they'll be cleaned up later.
memset(other.pointers, 0, sharedPointerCount * BYTES_PER_POINTER / BYTES);
}
bool StructBuilder::isPointerFieldNull(WirePointerCount ptrIndex) {
return (pointers + ptrIndex)->isNull();
}
......
......@@ -364,6 +364,11 @@ public:
// Detach the given pointer field from this object. The pointer becomes null, and the child
// object is returned as an orphan.
void transferContentFrom(StructBuilder other);
// Adopt all pointers from `other`, and also copy all data. If `other`'s sections are larger
// than this, the extra data is not transferred, meaning there is a risk of data loss when
// transferring from messages built with future versions of the protocol.
bool isPointerFieldNull(WirePointerCount ptrIndex);
StructReader asReader() const;
......
......@@ -249,6 +249,24 @@ struct List<T, Kind::STRUCT> {
return typename T::Builder(builder.getStructElement(index * ELEMENTS));
}
inline void adoptWithCaveats(uint index, Orphan<T>&& orphan) {
// Mostly behaves like you'd expect `adopt` to behave, but with two caveats originating from
// the fact that structs in a struct list are allocated inline rather than by pointer:
// * This actually performs a shallow copy, effectively adopting each of the orphan's
// children rather than adopting the orphan itself. The orphan ends up being discarded,
// possibly wasting space in the message object.
// * If the orphan is larger than the target struct -- say, because the orphan was built
// using a newer version of the schema that has additional fields -- it will be truncated,
// losing data.
// We pass a zero-valued StructSize to asStruct() because we do not want the struct to be
// expanded under any circumstances. We're just going to throw it away anyway, and
// transferContentFrom() already carefully compares the struct sizes before transferring.
builder.getStructElement(index * ELEMENTS).transferContentFrom(
orphan.builder.asStruct(_::StructSize(
0 * WORDS, 0 * POINTERS, _::FieldSize::VOID)));
}
// There are no init(), set(), adopt(), or disown() methods for lists of structs because the
// elements of the list are inlined and are initialized when the list is initialized. This
// means that init() would be redundant, and set() would risk data loss if the input struct
......
......@@ -344,6 +344,37 @@ TEST(CommonParsers, TransformParser) {
}
}
TEST(CommonParsers, TransformOrRejectParser) {
auto parser = transformOrReject(many(any),
[](Array<char> chars) -> Maybe<int> {
if (heapString(chars) == "foo") {
return 123;
} else {
return nullptr;
}
});
{
StringPtr text = "foo";
Input input(text.begin(), text.end());
Maybe<int> result = parser(input);
KJ_IF_MAYBE(i, result) {
EXPECT_EQ(123, *i);
} else {
ADD_FAILURE() << "Expected 123, got null.";
}
EXPECT_TRUE(input.atEnd());
}
{
StringPtr text = "bar";
Input input(text.begin(), text.end());
Maybe<int> result = parser(input);
EXPECT_TRUE(result == nullptr);
EXPECT_TRUE(input.atEnd());
}
}
TEST(CommonParsers, References) {
struct TransformFunc {
int value;
......
......@@ -71,11 +71,11 @@ public:
}
bool atEnd() { return pos == end; }
const Element& current() {
auto current() -> decltype(*instance<Iterator>()) {
KJ_IREQUIRE(!atEnd());
return *pos;
}
const Element& consume() {
auto consume() -> decltype(*instance<Iterator>()) {
KJ_IREQUIRE(!atEnd());
return *pos++;
}
......@@ -274,7 +274,6 @@ private:
template <typename SubParser, typename Result>
constexpr ConstResult_<SubParser, Result> constResult(SubParser&& subParser, Result&& result) {
// Constructs a parser which returns exactly `result` if `subParser` is successful.
return ConstResult_<SubParser, Result>(kj::fwd<SubParser>(subParser), kj::fwd<Result>(result));
}
......@@ -571,6 +570,27 @@ private:
TransformFunc transform;
};
template <typename SubParser, typename TransformFunc>
class TransformOrReject_ {
public:
explicit constexpr TransformOrReject_(SubParser&& subParser, TransformFunc&& transform)
: subParser(kj::fwd<SubParser>(subParser)), transform(kj::fwd<TransformFunc>(transform)) {}
template <typename Input>
decltype(kj::apply(instance<TransformFunc&>(), instance<OutputType<SubParser, Input>&&>()))
operator()(Input& input) const {
KJ_IF_MAYBE(subResult, subParser(input)) {
return kj::apply(transform, kj::mv(*subResult));
} else {
return nullptr;
}
}
private:
SubParser subParser;
TransformFunc transform;
};
template <typename SubParser, typename TransformFunc>
class TransformWithLocation_ {
public:
......@@ -606,12 +626,21 @@ constexpr Transform_<SubParser, TransformFunc> transform(
kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor));
}
template <typename SubParser, typename TransformFunc>
constexpr TransformOrReject_<SubParser, TransformFunc> transformOrReject(
SubParser&& subParser, TransformFunc&& functor) {
// Like `transform()` except that `functor` returns a `Maybe`. If it returns null, parsing fails,
// otherwise the parser's result is the content of the `Maybe`.
return TransformOrReject_<SubParser, TransformFunc>(
kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor));
}
template <typename SubParser, typename TransformFunc>
constexpr TransformWithLocation_<SubParser, TransformFunc> transformWithLocation(
SubParser&& subParser, TransformFunc&& functor) {
// Constructs a parser which executes some other parser and then transforms the result by invoking
// `functor` on it. Typically `functor` is a lambda. It is invoked using `kj::apply`,
// meaning tuples will be unpacked as arguments.
// Like `transform` except that `functor` also takes a `Span` as its first parameter specifying
// the location of the parsed content. The span's position type is whatever the parser input's
// getPosition() returns.
return TransformWithLocation_<SubParser, TransformFunc>(
kj::fwd<SubParser>(subParser), kj::fwd<TransformFunc>(functor));
}
......@@ -650,6 +679,8 @@ constexpr AcceptIf_<SubParser, Condition> acceptIf(SubParser&& subParser, Condit
// `condition` on the result to check if it is valid. Typically, `condition` is a lambda
// returning true or false. Like with `transform()`, `condition` is invoked using `kj::apply`
// to unpack tuples.
//
// TODO(soon): Remove in favor of transformOrReject()?
return AcceptIf_<SubParser, Condition>(
kj::fwd<SubParser>(subParser), kj::fwd<Condition>(condition));
}
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment