lexer-test.c++ 12.5 KB
Newer Older
Kenton Varda's avatar
Kenton Varda committed
1 2
// Copyright (c) 2013-2014 Sandstorm Development Group, Inc. and contributors
// Licensed under the MIT License:
3
//
Kenton Varda's avatar
Kenton Varda committed
4 5 6 7 8 9
// Permission is hereby granted, free of charge, to any person obtaining a copy
// of this software and associated documentation files (the "Software"), to deal
// in the Software without restriction, including without limitation the rights
// to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
// copies of the Software, and to permit persons to whom the Software is
// furnished to do so, subject to the following conditions:
10
//
Kenton Varda's avatar
Kenton Varda committed
11 12
// The above copyright notice and this permission notice shall be included in
// all copies or substantial portions of the Software.
13
//
Kenton Varda's avatar
Kenton Varda committed
14 15 16 17 18 19 20
// THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
// IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
// FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
// AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
// LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
// OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
// THE SOFTWARE.
21 22 23

#include "lexer.h"
#include "../message.h"
24
#include <kj/compat/gtest.h>
25 26 27 28 29

namespace capnp {
namespace compiler {
namespace {

30 31
class TestFailingErrorReporter: public ErrorReporter {
public:
32
  void addError(uint32_t startByte, uint32_t endByte, kj::StringPtr message) override {
33
    KJ_FAIL_EXPECT("Parse failed.", startByte, endByte, message);
34
  }
35

36
  bool hadErrors() override {
37 38 39
    // Not used by lexer.
    return false;
  }
40 41
};

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59
template <typename LexResult>
kj::String doLex(kj::StringPtr constText) {
  // Parse the given string into the given Cap'n Proto struct type using lex(), then stringify the
  // result and return that string.  Additionally, single quotes in the input are converted to
  // double quotes, and double quotes in the output are converted to single quotes, to reduce the
  // amount of escaping needed in the test strings.
  //
  // Comparing stringifications against golden strings is ugly and brittle.  If we had a
  // text-format parser we could use that.  Except that said parser would probably be built on
  // the very lexer being tested here, so...  maybe this is the best we can reasonably do.

  kj::String text = heapString(constText);
  for (char& c: text) {
    // Make it easier to write input strings below.
    if (c == '\'') c = '\"';
  }
  MallocMessageBuilder message;
  auto file = message.initRoot<LexResult>();
60 61
  TestFailingErrorReporter errorReporter;
  EXPECT_TRUE(lex(text, file, errorReporter));
62 63 64 65 66 67 68 69 70 71 72
  kj::String result = kj::str(file);
  for (char& c: result) {
    // Make it easier to write golden strings below.
    if (c == '\"') c = '\'';
  }
  return result;
}

TEST(Lexer, Tokens) {
  EXPECT_STREQ(
      "(tokens = ["
73
        "(identifier = 'foo', startByte = 0, endByte = 3), "
74
        "(identifier = 'bar', startByte = 4, endByte = 7)"
75 76 77 78 79
      "])",
      doLex<LexedTokens>("foo bar").cStr());

  EXPECT_STREQ(
      "(tokens = ["
80
        "(identifier = 'foo', startByte = 0, endByte = 3), "
81
        "(identifier = 'bar', startByte = 15, endByte = 18)"
82 83 84 85 86
      "])",
      doLex<LexedTokens>("foo # comment\n bar").cStr());

  EXPECT_STREQ(
      "(tokens = ["
87 88 89 90 91 92
        "(stringLiteral = 'foo ', startByte = 2, endByte = 11), "
        "(integerLiteral = 123, startByte = 12, endByte = 15), "
        "(floatLiteral = 2.75, startByte = 16, endByte = 20), "
        "(floatLiteral = 60000, startByte = 21, endByte = 24), "
        "(operator = '+', startByte = 25, endByte = 26), "
        "(operator = '-=', startByte = 27, endByte = 29)"
93 94 95 96 97
      "])",
      doLex<LexedTokens>("  'foo\\x20' 123 2.75 6e4 + -=  ").cStr());

  EXPECT_STREQ(
      "(tokens = ["
98
        "(parenthesizedList = ["
99
          "["
100 101
            "(identifier = 'foo', startByte = 1, endByte = 4), "
            "(identifier = 'bar', startByte = 5, endByte = 8)"
102
          "], ["
103 104
            "(identifier = 'baz', startByte = 10, endByte = 13), "
            "(identifier = 'qux', startByte = 14, endByte = 17)"
105
          "], ["
106 107
            "(identifier = 'corge', startByte = 19, endByte = 24), "
            "(identifier = 'grault', startByte = 25, endByte = 31)"
108
          "]"
109
        "], startByte = 0, endByte = 32)"
110 111 112 113 114
      "])",
      doLex<LexedTokens>("(foo bar, baz qux, corge grault)").cStr());

  EXPECT_STREQ(
      "(tokens = ["
115
        "(parenthesizedList = ["
116
          "["
117 118
            "(identifier = 'foo', startByte = 1, endByte = 4), "
            "(identifier = 'bar', startByte = 5, endByte = 8)"
119
          "]"
120
        "], startByte = 0, endByte = 9)"
121 122 123 124 125 126 127
      "])",
      doLex<LexedTokens>("(foo bar)").cStr());

  // Empty parentheses should result in an empty list-of-lists, *not* a list containing an empty
  // list.
  EXPECT_STREQ(
      "(tokens = ["
128
        "(parenthesizedList = [], startByte = 0, endByte = 4)"
129 130 131 132 133
      "])",
      doLex<LexedTokens>("(  )").cStr());

  EXPECT_STREQ(
      "(tokens = ["
134
        "(bracketedList = ["
135
          "["
136 137
            "(identifier = 'foo', startByte = 1, endByte = 4), "
            "(identifier = 'bar', startByte = 5, endByte = 8)"
138
          "], ["
139 140
            "(identifier = 'baz', startByte = 10, endByte = 13), "
            "(identifier = 'qux', startByte = 14, endByte = 17)"
141
          "], ["
142 143
            "(identifier = 'corge', startByte = 19, endByte = 24), "
            "(identifier = 'grault', startByte = 25, endByte = 31)"
144
          "]"
145
        "], startByte = 0, endByte = 32)"
146 147 148
      "])",
      doLex<LexedTokens>("[foo bar, baz qux, corge grault]").cStr());

149 150 151 152 153 154 155 156 157 158 159 160 161
  // Trailing commas should not create an empty final list item, but be stripped by the lexer.
  EXPECT_STREQ(
      "(tokens = ["
        "(bracketedList = ["
          "["
            "(identifier = 'foo', startByte = 1, endByte = 4)"
          "], ["
            "(identifier = 'bar', startByte = 6, endByte = 9)"
          "]"
        "], startByte = 0, endByte = 11)"
      "])",
      doLex<LexedTokens>("[foo, bar,]").cStr());

162 163
  EXPECT_STREQ(
      "(tokens = ["
164
        "(bracketedList = ["
165
          "["
166
            "(identifier = 'foo', startByte = 1, endByte = 4)"
167
          "], ["
168
            "(parenthesizedList = ["
169
              "["
170
                "(identifier = 'bar', startByte = 7, endByte = 10)"
171
              "], ["
172
                "(identifier = 'baz', startByte = 12, endByte = 15)"
173
              "]"
174
            "], startByte = 6, endByte = 16)"
175
          "]"
176
        "], startByte = 0, endByte = 17), "
177
        "(identifier = 'qux', startByte = 18, endByte = 21)"
178 179 180 181 182
      "])",
      doLex<LexedTokens>("[foo, (bar, baz)] qux").cStr());

  EXPECT_STREQ(
      "(tokens = ["
183
        "(identifier = 'foo', startByte = 0, endByte = 3), "
184
        "(identifier = 'bar', startByte = 7, endByte = 10)"
185 186 187 188 189 190 191
      "])",
      doLex<LexedTokens>("foo\n\r\t\vbar").cStr());
}

TEST(Lexer, Statements) {
  EXPECT_STREQ(
      "(statements = ["
192
        "(tokens = ["
193
          "(identifier = 'foo', startByte = 0, endByte = 3), "
194
          "(identifier = 'bar', startByte = 4, endByte = 7)"
195
        "], line = void, startByte = 0, endByte = 8)"
196 197 198 199 200
      "])",
      doLex<LexedStatements>("foo bar;").cStr());

  EXPECT_STREQ(
      "(statements = ["
201
        "(tokens = ["
202 203
          "(identifier = 'foo', startByte = 0, endByte = 3)"
        "], line = void, startByte = 0, endByte = 4), "
204
        "(tokens = ["
205
          "(identifier = 'bar', startByte = 5, endByte = 8)"
206
        "], line = void, startByte = 5, endByte = 9), "
207
        "(tokens = ["
208
          "(identifier = 'baz', startByte = 10, endByte = 13)"
209
        "], line = void, startByte = 10, endByte = 14)"
210 211 212 213 214
      "])",
      doLex<LexedStatements>("foo; bar; baz; ").cStr());

  EXPECT_STREQ(
      "(statements = ["
215
        "("
216
          "tokens = ["
217
            "(identifier = 'foo', startByte = 0, endByte = 3)"
218
          "], "
219
          "block = ["
220
            "(tokens = ["
221
              "(identifier = 'bar', startByte = 5, endByte = 8)"
222
            "], line = void, startByte = 5, endByte = 9), "
223
            "(tokens = ["
224
              "(identifier = 'baz', startByte = 10, endByte = 13)"
225
            "], line = void, startByte = 10, endByte = 14)"
226
          "], "
227
          "startByte = 0, endByte = 15"
228
        "), "
229
        "(tokens = ["
230
          "(identifier = 'qux', startByte = 16, endByte = 19)"
231
        "], line = void, startByte = 16, endByte = 20)"
232 233 234 235 236 237 238
      "])",
      doLex<LexedStatements>("foo {bar; baz;} qux;").cStr());
}

TEST(Lexer, DocComments) {
  EXPECT_STREQ(
      "(statements = ["
239
        "("
240
          "tokens = ["
241
            "(identifier = 'foo', startByte = 0, endByte = 3)"
242
          "], "
243
          "line = void, "
244
          "docComment = 'blah blah\\n', "
245
          "startByte = 0, endByte = 16"
246
        ")"
247 248 249 250 251
      "])",
      doLex<LexedStatements>("foo; # blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
252
        "("
253
          "tokens = ["
254
            "(identifier = 'foo', startByte = 0, endByte = 3)"
255
          "], "
256
          "line = void, "
257
          "docComment = 'blah blah\\n', "
258
          "startByte = 0, endByte = 15"
259
        ")"
260 261 262 263 264
      "])",
      doLex<LexedStatements>("foo; #blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
265
        "("
266
          "tokens = ["
267
            "(identifier = 'foo', startByte = 0, endByte = 3)"
268
          "], "
269
          "line = void, "
270
          "docComment = ' blah blah\\n', "
271
          "startByte = 0, endByte = 17"
272
        ")"
273 274 275 276 277
      "])",
      doLex<LexedStatements>("foo; #  blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
278
        "("
279
          "tokens = ["
280
            "(identifier = 'foo', startByte = 0, endByte = 3)"
281
          "], "
282
          "line = void, "
283
          "docComment = 'blah blah\\n', "
284
          "startByte = 0, endByte = 16"
285
        ")"
286 287 288 289 290
      "])",
      doLex<LexedStatements>("foo;\n# blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
291
        "("
292
          "tokens = ["
293
            "(identifier = 'foo', startByte = 0, endByte = 3)"
294
          "], "
295 296
          "line = void, "
          "startByte = 0, endByte = 4"
297
        ")"
298 299 300 301 302
      "])",
      doLex<LexedStatements>("foo;\n\n# blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
303
        "("
304
          "tokens = ["
305
            "(identifier = 'foo', startByte = 0, endByte = 3)"
306
          "], "
307
          "line = void, "
308
          "docComment = 'bar baz\\nqux corge\\n', "
309
          "startByte = 0, endByte = 30"
310
        ")"
311 312 313 314 315
      "])",
      doLex<LexedStatements>("foo;\n # bar baz\n  # qux corge\n\n# grault\n# garply").cStr());

  EXPECT_STREQ(
      "(statements = ["
316
        "("
317
          "tokens = ["
318
            "(identifier = 'foo', startByte = 0, endByte = 3)"
319
          "], "
320
          "block = ["
321
            "(tokens = ["
322
              "(identifier = 'bar', startByte = 17, endByte = 20)"
323
            "], line = void, docComment = 'hi\\n', startByte = 17, endByte = 27), "
324
            "(tokens = ["
325
              "(identifier = 'baz', startByte = 28, endByte = 31)"
326
            "], line = void, startByte = 28, endByte = 32)"
327
          "], "
328
          "docComment = 'blah blah\\n', "
329
          "startByte = 0, endByte = 44"
330 331
        "), "
        "(tokens = ["
332
          "(identifier = 'qux', startByte = 44, endByte = 47)"
333
        "], line = void, startByte = 44, endByte = 48)"
334 335
      "])",
      doLex<LexedStatements>("foo {# blah blah\nbar; # hi\n baz;} # ignored\nqux;").cStr());
336 337 338 339 340

  EXPECT_STREQ(
      "(statements = ["
        "("
          "tokens = ["
341
            "(identifier = 'foo', startByte = 0, endByte = 3)"
342
          "], "
343
          "block = ["
344
            "(tokens = ["
345
              "(identifier = 'bar', startByte = 5, endByte = 8)"
346
            "], line = void, startByte = 5, endByte = 9), "
347
            "(tokens = ["
348
              "(identifier = 'baz', startByte = 10, endByte = 13)"
349
            "], line = void, startByte = 10, endByte = 14)"
350
          "], "
351
          "docComment = 'late comment\\n', "
352
          "startByte = 0, endByte = 31"
353 354
        "), "
        "(tokens = ["
355
          "(identifier = 'qux', startByte = 31, endByte = 34)"
356
        "], line = void, startByte = 31, endByte = 35)"
357 358
      "])",
      doLex<LexedStatements>("foo {bar; baz;}\n# late comment\nqux;").cStr());
359 360
}

361 362 363 364 365 366 367 368 369 370
TEST(Lexer, Utf8Bom) {
  EXPECT_STREQ(
      "(tokens = ["
        "(identifier = 'foo', startByte = 3, endByte = 6), "
        "(identifier = 'bar', startByte = 7, endByte = 10), "
        "(identifier = 'baz', startByte = 13, endByte = 16)"
      "])",
      doLex<LexedTokens>("\xef\xbb\xbf""foo bar\xef\xbb\xbf""baz").cStr());
}

371 372 373
}  // namespace
}  // namespace compiler
}  // namespace capnp