Skip to content
Projects
Groups
Snippets
Help
Loading...
Sign in / Register
Toggle navigation
C
capnproto
Project
Project
Details
Activity
Cycle Analytics
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Charts
Issues
0
Issues
0
List
Board
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Charts
Packages
Packages
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Charts
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
submodule
capnproto
Commits
ce4162e8
Commit
ce4162e8
authored
Jul 10, 2013
by
Kenton Varda
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
Refactor lexer using arena.
parent
8e1d1e53
Show whitespace changes
Inline
Side-by-side
Showing
4 changed files
with
129 additions
and
86 deletions
+129
-86
lexer.c++
c++/src/capnp/compiler/lexer.c++
+73
-85
lexer.h
c++/src/capnp/compiler/lexer.h
+50
-0
arena.h
c++/src/kj/arena.h
+5
-0
common.h
c++/src/kj/parse/common.h
+1
-1
No files found.
c++/src/capnp/compiler/lexer.c++
View file @
ce4162e8
...
...
@@ -28,12 +28,62 @@
namespace
capnp
{
namespace
compiler
{
bool
lex
(
kj
::
ArrayPtr
<
const
char
>
input
,
LexedStatements
::
Builder
result
)
{
Lexer
lexer
(
Orphanage
::
getForMessageContaining
(
result
));
Lexer
::
ParserInput
parserInput
(
input
.
begin
(),
input
.
end
());
kj
::
Maybe
<
kj
::
Array
<
Orphan
<
Statement
>>>
parseOutput
=
lexer
.
getParsers
().
statementSequence
(
parserInput
);
if
(
!
parserInput
.
atEnd
())
{
return
false
;
}
KJ_IF_MAYBE
(
output
,
parseOutput
)
{
auto
l
=
result
.
initStatements
(
output
->
size
());
for
(
uint
i
=
0
;
i
<
output
->
size
();
i
++
)
{
l
[
i
].
adoptStatement
(
kj
::
mv
((
*
output
)[
i
]));
}
return
true
;
}
else
{
return
false
;
}
}
bool
lex
(
kj
::
ArrayPtr
<
const
char
>
input
,
LexedTokens
::
Builder
result
)
{
Lexer
lexer
(
Orphanage
::
getForMessageContaining
(
result
));
Lexer
::
ParserInput
parserInput
(
input
.
begin
(),
input
.
end
());
kj
::
Maybe
<
kj
::
Array
<
Orphan
<
Token
>>>
parseOutput
=
lexer
.
getParsers
().
tokenSequence
(
parserInput
);
if
(
!
parserInput
.
atEnd
())
{
return
false
;
}
KJ_IF_MAYBE
(
output
,
parseOutput
)
{
auto
l
=
result
.
initTokens
(
output
->
size
());
for
(
uint
i
=
0
;
i
<
output
->
size
();
i
++
)
{
l
[
i
].
adoptToken
(
kj
::
mv
((
*
output
)[
i
]));
}
return
true
;
}
else
{
return
false
;
}
}
namespace
p
=
kj
::
parse
;
namespace
{
typedef
p
::
IteratorInput
<
char
,
const
char
*>
Input
;
typedef
p
::
Span
<
const
char
*>
Location
;
typedef
p
::
Span
<
uint32_t
>
Location
;
Token
::
Body
::
Builder
initTok
(
Orphan
<
Token
>&
t
,
const
Location
&
loc
)
{
auto
tb
=
t
.
get
();
tb
.
setStartByte
(
loc
.
begin
());
tb
.
setEndByte
(
loc
.
end
());
return
tb
.
getBody
();
}
void
buildTokenSequenceList
(
List
<
List
<
TokenPointer
>>::
Builder
builder
,
kj
::
Array
<
kj
::
Array
<
Orphan
<
Token
>>>&&
items
)
{
...
...
@@ -90,35 +140,13 @@ constexpr auto docComment = sequence(
}
// namespace
bool
lex
(
kj
::
ArrayPtr
<
const
char
>
input
,
LexedStatements
::
Builder
*
resultStatements
,
LexedTokens
::
Builder
*
resultTokens
)
{
// This is a bit hacky. Since the transformations applied by our parser require access to an
// Orphanage in order to build objects, we construct the parsers as local variables. This means
// that all the parsers need to live in a single function scope. In order to handle both tokens
// and statements, we have the function take `resultStatements` and `resultTokens` and parse
// into whichever one is non-null.
//
// TODO(someday): Perhaps there should be a utility class called ParserPool which has a method
// that takes a parser, allocates a copy of it within some arena, then returns a ParserRef
// referencing that copy. Then there could be a Lexer class which contains a ParserPool and
// builds all its parsers in its constructor. This would allow the class to directly expose
// the parsers so that they can be used within other parser combinators.
Orphanage
orphanage
=
resultStatements
==
nullptr
?
Orphanage
::
getForMessageContaining
(
*
resultTokens
)
:
Orphanage
::
getForMessageContaining
(
*
resultStatements
);
auto
initTok
=
[
&
](
Orphan
<
Token
>&
t
,
const
Location
&
loc
)
->
Token
::
Body
::
Builder
{
auto
tb
=
t
.
get
();
tb
.
setStartByte
(
loc
.
begin
()
-
input
.
begin
());
tb
.
setEndByte
(
loc
.
end
()
-
input
.
begin
());
return
tb
.
getBody
();
};
Lexer
::
Lexer
(
Orphanage
orphanage
)
:
orphanage
(
orphanage
)
{
p
::
ParserRef
<
Input
,
kj
::
Array
<
Orphan
<
Token
>>>
tokenSequence
;
// Note that because passing an lvalue to a parser constructor uses it by-referencee, it's safe
// for us to use parsers.tokenSequence even though we haven't yet constructed it.
auto
&
tokenSequence
=
parsers
.
tokenSequence
;
auto
commaDelimitedList
=
transform
(
auto
&
commaDelimitedList
=
arena
.
copy
(
p
::
transform
(
p
::
sequence
(
tokenSequence
,
p
::
many
(
p
::
sequence
(
p
::
exactChar
<
','
>
(),
tokenSequence
))),
[
&
](
kj
::
Array
<
Orphan
<
Token
>>&&
first
,
kj
::
Array
<
kj
::
Array
<
Orphan
<
Token
>>>&&
rest
)
->
kj
::
Array
<
kj
::
Array
<
Orphan
<
Token
>>>
{
...
...
@@ -133,9 +161,9 @@ bool lex(kj::ArrayPtr<const char> input,
}
return
result
.
finish
();
}
});
})
)
;
auto
token
=
p
::
oneOf
(
auto
&
token
=
arena
.
copy
(
p
::
oneOf
(
p
::
transformWithLocation
(
p
::
identifier
,
[
&
](
Location
loc
,
kj
::
String
name
)
->
Orphan
<
Token
>
{
auto
t
=
orphanage
.
newOrphan
<
Token
>
();
...
...
@@ -183,33 +211,13 @@ bool lex(kj::ArrayPtr<const char> input,
initTok
(
t
,
loc
).
initBracketedList
(
items
.
size
()),
kj
::
mv
(
items
));
return
t
;
})
);
auto
tokenSequence_
=
sequence
(
commentsAndWhitespace
,
many
(
sequence
(
token
,
commentsAndWhitespace
)));
tokenSequence
=
tokenSequence_
;
if
(
resultStatements
==
nullptr
)
{
// Only a token sequence is requested.
Input
parserInput
(
input
.
begin
(),
input
.
end
());
kj
::
Maybe
<
kj
::
Array
<
Orphan
<
Token
>>>
parseOutput
=
tokenSequence
(
parserInput
);
if
(
!
parserInput
.
atEnd
())
{
return
false
;
}
));
parsers
.
tokenSequence
=
arena
.
copy
(
p
::
sequence
(
commentsAndWhitespace
,
p
::
many
(
p
::
sequence
(
token
,
commentsAndWhitespace
))));
KJ_IF_MAYBE
(
output
,
parseOutput
)
{
auto
l
=
resultTokens
->
initTokens
(
output
->
size
());
for
(
uint
i
=
0
;
i
<
output
->
size
();
i
++
)
{
l
[
i
].
adoptToken
(
kj
::
mv
((
*
output
)[
i
]));
}
return
true
;
}
else
{
return
false
;
}
}
else
{
p
::
ParserRef
<
Input
,
kj
::
Array
<
Orphan
<
Statement
>>>
statementSequence
;
auto
&
statementSequence
=
parsers
.
statementSequence
;
auto
statementEnd
=
p
::
oneOf
(
auto
&
statementEnd
=
arena
.
copy
(
p
::
oneOf
(
transform
(
p
::
sequence
(
p
::
exactChar
<
';'
>
(),
docComment
),
[
&
](
kj
::
Array
<
kj
::
String
>&&
comment
)
->
Orphan
<
Statement
>
{
auto
result
=
orphanage
.
newOrphan
<
Statement
>
();
...
...
@@ -231,46 +239,26 @@ bool lex(kj::ArrayPtr<const char> input,
}
return
result
;
})
);
)
);
auto
statement
=
p
::
transform
(
p
::
sequence
(
tokenSequence
,
statementEnd
),
auto
&
statement
=
arena
.
copy
(
p
::
transform
(
p
::
sequence
(
tokenSequence
,
statementEnd
),
[
&
](
kj
::
Array
<
Orphan
<
Token
>>&&
tokens
,
Orphan
<
Statement
>&&
statement
)
{
auto
tokensBuilder
=
statement
.
get
().
initTokens
(
tokens
.
size
());
for
(
uint
i
=
0
;
i
<
tokens
.
size
();
i
++
)
{
tokensBuilder
[
i
].
adoptToken
(
kj
::
mv
(
tokens
[
i
]));
}
return
kj
::
mv
(
statement
);
}
);
})
);
auto
statementSequence_
=
sequence
(
commentsAndWhitespace
,
many
(
sequence
(
statement
,
commentsAndWhitespace
)));
statementSequence
=
statementSequence_
;
parsers
.
statementSequence
=
arena
.
copy
(
sequence
(
commentsAndWhitespace
,
many
(
sequence
(
statement
,
commentsAndWhitespace
))));
Input
parserInput
(
input
.
begin
(),
input
.
end
());
kj
::
Maybe
<
kj
::
Array
<
Orphan
<
Statement
>>>
parseOutput
=
statementSequence
(
parserInput
);
if
(
!
parserInput
.
atEnd
())
{
return
false
;
}
KJ_IF_MAYBE
(
output
,
parseOutput
)
{
auto
l
=
resultStatements
->
initStatements
(
output
->
size
());
for
(
uint
i
=
0
;
i
<
output
->
size
();
i
++
)
{
l
[
i
].
adoptStatement
(
kj
::
mv
((
*
output
)[
i
]));
}
return
true
;
}
else
{
return
false
;
}
}
parsers
.
token
=
token
;
parsers
.
statement
=
statement
;
parsers
.
emptySpace
=
commentsAndWhitespace
;
}
bool
lex
(
kj
::
ArrayPtr
<
const
char
>
input
,
LexedStatements
::
Builder
result
)
{
return
lex
(
kj
::
mv
(
input
),
&
result
,
nullptr
);
}
bool
lex
(
kj
::
ArrayPtr
<
const
char
>
input
,
LexedTokens
::
Builder
result
)
{
return
lex
(
kj
::
mv
(
input
),
nullptr
,
&
result
);
}
Lexer
::~
Lexer
()
{}
}
// namespace compiler
}
// namespace capnp
c++/src/capnp/compiler/lexer.h
View file @
ce4162e8
...
...
@@ -25,6 +25,8 @@
#define CAPNP_COMPILER_LEXER_H_
#include "lexer.capnp.h"
#include <kj/parse/common.h>
#include <kj/arena.h>
namespace
capnp
{
namespace
compiler
{
...
...
@@ -39,6 +41,54 @@ bool lex(kj::ArrayPtr<const char> input, LexedTokens::Builder result);
// that might form a part of one statement. In other words, in the later case, the input should
// not contain semicolons or curly braces, unless they are in string literals of course.
class
Lexer
{
// Advanced lexer interface. This interface exposes the inner parsers so that you can embed them
// into your own parsers.
public
:
Lexer
(
Orphanage
orphanage
);
// `orphanage` is used to allocate Cap'n Proto message objects in the result. `inputStart` is
// a pointer to the beginning of the input, used to compute byte offsets.
~
Lexer
();
class
ParserInput
:
public
kj
::
parse
::
IteratorInput
<
char
,
const
char
*>
{
// Like IteratorInput<char, const char*> except that positions are measured as byte offsets
// rather than pointers.
public
:
ParserInput
(
const
char
*
begin
,
const
char
*
end
)
:
IteratorInput
<
char
,
const
char
*>
(
begin
,
end
),
begin
(
begin
)
{}
explicit
ParserInput
(
ParserInput
&
parent
)
:
IteratorInput
<
char
,
const
char
*>
(
parent
),
begin
(
parent
.
begin
)
{}
inline
uint32_t
getPosition
()
{
return
IteratorInput
<
char
,
const
char
*>::
getPosition
()
-
begin
;
}
private
:
const
char
*
begin
;
};
template
<
typename
Output
>
using
Parser
=
kj
::
parse
::
ParserRef
<
ParserInput
,
Output
>
;
struct
Parsers
{
Parser
<
kj
::
Tuple
<>>
emptySpace
;
Parser
<
Orphan
<
Token
>>
token
;
Parser
<
kj
::
Array
<
Orphan
<
Token
>>>
tokenSequence
;
Parser
<
Orphan
<
Statement
>>
statement
;
Parser
<
kj
::
Array
<
Orphan
<
Statement
>>>
statementSequence
;
};
const
Parsers
&
getParsers
()
{
return
parsers
;
}
private
:
Orphanage
orphanage
;
kj
::
Arena
arena
;
Parsers
parsers
;
};
}
// namespace compiler
}
// namespace capnp
...
...
c++/src/kj/arena.h
View file @
ce4162e8
...
...
@@ -67,6 +67,11 @@ public:
// the need for the Arena itself to keep track of destructors to call later, which may make it
// slightly more efficient.
template
<
typename
T
>
inline
T
&
copy
(
T
&&
value
)
{
return
allocate
<
Decay
<
T
>>
(
kj
::
fwd
<
T
>
(
value
));
}
// Allocate a copy of the given value in the arena. This is just a shortcut for calling the
// type's copy (or move) constructor.
StringPtr
copyString
(
StringPtr
content
);
// Make a copy of the given string inside the arena, and return a pointer to the copy.
...
...
c++/src/kj/parse/common.h
View file @
ce4162e8
...
...
@@ -54,7 +54,7 @@ class IteratorInput {
public
:
IteratorInput
(
Iterator
begin
,
Iterator
end
)
:
parent
(
nullptr
),
pos
(
begin
),
end
(
end
),
best
(
begin
)
{}
IteratorInput
(
IteratorInput
&
parent
)
explicit
IteratorInput
(
IteratorInput
&
parent
)
:
parent
(
&
parent
),
pos
(
parent
.
pos
),
end
(
parent
.
end
),
best
(
parent
.
pos
)
{}
~
IteratorInput
()
{
if
(
parent
!=
nullptr
)
{
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment