lexer-test.c++ 11.2 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
// Copyright (c) 2013, Kenton Varda <temporal@gmail.com>
// All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
//
// 1. Redistributions of source code must retain the above copyright notice, this
//    list of conditions and the following disclaimer.
// 2. Redistributions in binary form must reproduce the above copyright notice,
//    this list of conditions and the following disclaimer in the documentation
//    and/or other materials provided with the distribution.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
// ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
// WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
// ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
// LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
// ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
// SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "lexer.h"
#include "../message.h"
#include <gtest/gtest.h>

namespace capnp {
namespace compiler {
namespace {

32 33
class TestFailingErrorReporter: public ErrorReporter {
public:
Kenton Varda's avatar
Kenton Varda committed
34
  void addError(uint32_t startByte, uint32_t endByte, kj::StringPtr message) const override {
35 36
    ADD_FAILURE() << "Parse failed: (" << startByte << "-" << endByte << ") " << message.cStr();
  }
37 38 39 40 41

  bool hadErrors() const override {
    // Not used by lexer.
    return false;
  }
42 43
};

44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
template <typename LexResult>
kj::String doLex(kj::StringPtr constText) {
  // Parse the given string into the given Cap'n Proto struct type using lex(), then stringify the
  // result and return that string.  Additionally, single quotes in the input are converted to
  // double quotes, and double quotes in the output are converted to single quotes, to reduce the
  // amount of escaping needed in the test strings.
  //
  // Comparing stringifications against golden strings is ugly and brittle.  If we had a
  // text-format parser we could use that.  Except that said parser would probably be built on
  // the very lexer being tested here, so...  maybe this is the best we can reasonably do.

  kj::String text = heapString(constText);
  for (char& c: text) {
    // Make it easier to write input strings below.
    if (c == '\'') c = '\"';
  }
  MallocMessageBuilder message;
  auto file = message.initRoot<LexResult>();
62 63
  TestFailingErrorReporter errorReporter;
  EXPECT_TRUE(lex(text, file, errorReporter));
64 65 66 67 68 69 70 71 72 73 74
  kj::String result = kj::str(file);
  for (char& c: result) {
    // Make it easier to write golden strings below.
    if (c == '\"') c = '\'';
  }
  return result;
}

TEST(Lexer, Tokens) {
  EXPECT_STREQ(
      "(tokens = ["
75 76
        "(identifier = 'foo', endByte = 3), "
        "(identifier = 'bar', startByte = 4, endByte = 7)"
77 78 79 80 81
      "])",
      doLex<LexedTokens>("foo bar").cStr());

  EXPECT_STREQ(
      "(tokens = ["
82 83
        "(identifier = 'foo', endByte = 3), "
        "(identifier = 'bar', startByte = 15, endByte = 18)"
84 85 86 87 88
      "])",
      doLex<LexedTokens>("foo # comment\n bar").cStr());

  EXPECT_STREQ(
      "(tokens = ["
89 90 91 92 93 94
        "(stringLiteral = 'foo ', startByte = 2, endByte = 11), "
        "(integerLiteral = 123, startByte = 12, endByte = 15), "
        "(floatLiteral = 2.75, startByte = 16, endByte = 20), "
        "(floatLiteral = 60000, startByte = 21, endByte = 24), "
        "(operator = '+', startByte = 25, endByte = 26), "
        "(operator = '-=', startByte = 27, endByte = 29)"
95 96 97 98 99
      "])",
      doLex<LexedTokens>("  'foo\\x20' 123 2.75 6e4 + -=  ").cStr());

  EXPECT_STREQ(
      "(tokens = ["
100
        "(parenthesizedList = ["
101
          "["
102 103
            "(identifier = 'foo', startByte = 1, endByte = 4), "
            "(identifier = 'bar', startByte = 5, endByte = 8)"
104
          "], ["
105 106
            "(identifier = 'baz', startByte = 10, endByte = 13), "
            "(identifier = 'qux', startByte = 14, endByte = 17)"
107
          "], ["
108 109
            "(identifier = 'corge', startByte = 19, endByte = 24), "
            "(identifier = 'grault', startByte = 25, endByte = 31)"
110
          "]"
111
        "], endByte = 32)"
112 113 114 115 116
      "])",
      doLex<LexedTokens>("(foo bar, baz qux, corge grault)").cStr());

  EXPECT_STREQ(
      "(tokens = ["
117
        "(parenthesizedList = ["
118
          "["
119 120
            "(identifier = 'foo', startByte = 1, endByte = 4), "
            "(identifier = 'bar', startByte = 5, endByte = 8)"
121
          "]"
122
        "], endByte = 9)"
123 124 125 126 127 128 129
      "])",
      doLex<LexedTokens>("(foo bar)").cStr());

  // Empty parentheses should result in an empty list-of-lists, *not* a list containing an empty
  // list.
  EXPECT_STREQ(
      "(tokens = ["
130
        "(parenthesizedList = [], endByte = 4)"
131 132 133 134 135
      "])",
      doLex<LexedTokens>("(  )").cStr());

  EXPECT_STREQ(
      "(tokens = ["
136
        "(bracketedList = ["
137
          "["
138 139
            "(identifier = 'foo', startByte = 1, endByte = 4), "
            "(identifier = 'bar', startByte = 5, endByte = 8)"
140
          "], ["
141 142
            "(identifier = 'baz', startByte = 10, endByte = 13), "
            "(identifier = 'qux', startByte = 14, endByte = 17)"
143
          "], ["
144 145
            "(identifier = 'corge', startByte = 19, endByte = 24), "
            "(identifier = 'grault', startByte = 25, endByte = 31)"
146
          "]"
147
        "], endByte = 32)"
148 149 150 151 152
      "])",
      doLex<LexedTokens>("[foo bar, baz qux, corge grault]").cStr());

  EXPECT_STREQ(
      "(tokens = ["
153
        "(bracketedList = ["
154
          "["
155
            "(identifier = 'foo', startByte = 1, endByte = 4)"
156
          "], ["
157
            "(parenthesizedList = ["
158
              "["
159
                "(identifier = 'bar', startByte = 7, endByte = 10)"
160
              "], ["
161
                "(identifier = 'baz', startByte = 12, endByte = 15)"
162
              "]"
163
            "], startByte = 6, endByte = 16)"
164
          "]"
165 166
        "], endByte = 17), "
        "(identifier = 'qux', startByte = 18, endByte = 21)"
167 168 169 170 171
      "])",
      doLex<LexedTokens>("[foo, (bar, baz)] qux").cStr());

  EXPECT_STREQ(
      "(tokens = ["
172 173
        "(identifier = 'foo', endByte = 3), "
        "(identifier = 'bar', startByte = 7, endByte = 10)"
174 175 176 177 178 179 180
      "])",
      doLex<LexedTokens>("foo\n\r\t\vbar").cStr());
}

TEST(Lexer, Statements) {
  EXPECT_STREQ(
      "(statements = ["
181
        "(tokens = ["
182 183
          "(identifier = 'foo', endByte = 3), "
          "(identifier = 'bar', startByte = 4, endByte = 7)"
184
        "], endByte = 8)"
185 186 187 188 189
      "])",
      doLex<LexedStatements>("foo bar;").cStr());

  EXPECT_STREQ(
      "(statements = ["
190
        "(tokens = ["
191
          "(identifier = 'foo', endByte = 3)"
192 193
        "], endByte = 4), "
        "(tokens = ["
194
          "(identifier = 'bar', startByte = 5, endByte = 8)"
195 196
        "], startByte = 5, endByte = 9), "
        "(tokens = ["
197
          "(identifier = 'baz', startByte = 10, endByte = 13)"
198
        "], startByte = 10, endByte = 14)"
199 200 201 202 203
      "])",
      doLex<LexedStatements>("foo; bar; baz; ").cStr());

  EXPECT_STREQ(
      "(statements = ["
204
        "("
205
          "tokens = ["
206
            "(identifier = 'foo', endByte = 3)"
207
          "], "
208
          "block = ["
209
            "(tokens = ["
210
              "(identifier = 'bar', startByte = 5, endByte = 8)"
211 212
            "], startByte = 5, endByte = 9), "
            "(tokens = ["
213
              "(identifier = 'baz', startByte = 10, endByte = 13)"
214
            "], startByte = 10, endByte = 14)"
215
          "], "
216
          "endByte = 15"
217
        "), "
218
        "(tokens = ["
219
          "(identifier = 'qux', startByte = 16, endByte = 19)"
220
        "], startByte = 16, endByte = 20)"
221 222 223 224 225 226 227
      "])",
      doLex<LexedStatements>("foo {bar; baz;} qux;").cStr());
}

TEST(Lexer, DocComments) {
  EXPECT_STREQ(
      "(statements = ["
228
        "("
229
          "tokens = ["
230
            "(identifier = 'foo', endByte = 3)"
231
          "], "
232 233 234
          "docComment = 'blah blah\\n', "
          "endByte = 16"
        ")"
235 236 237 238 239
      "])",
      doLex<LexedStatements>("foo; # blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
240
        "("
241
          "tokens = ["
242
            "(identifier = 'foo', endByte = 3)"
243
          "], "
244 245 246
          "docComment = 'blah blah\\n', "
          "endByte = 15"
        ")"
247 248 249 250 251
      "])",
      doLex<LexedStatements>("foo; #blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
252
        "("
253
          "tokens = ["
254
            "(identifier = 'foo', endByte = 3)"
255
          "], "
256 257 258
          "docComment = ' blah blah\\n', "
          "endByte = 17"
        ")"
259 260 261 262 263
      "])",
      doLex<LexedStatements>("foo; #  blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
264
        "("
265
          "tokens = ["
266
            "(identifier = 'foo', endByte = 3)"
267
          "], "
268 269 270
          "docComment = 'blah blah\\n', "
          "endByte = 16"
        ")"
271 272 273 274 275
      "])",
      doLex<LexedStatements>("foo;\n# blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
276
        "("
277
          "tokens = ["
278
            "(identifier = 'foo', endByte = 3)"
279 280 281
          "], "
          "endByte = 4"
        ")"
282 283 284 285 286
      "])",
      doLex<LexedStatements>("foo;\n\n# blah blah").cStr());

  EXPECT_STREQ(
      "(statements = ["
287
        "("
288
          "tokens = ["
289
            "(identifier = 'foo', endByte = 3)"
290
          "], "
291 292 293
          "docComment = 'bar baz\\nqux corge\\n', "
          "endByte = 30"
        ")"
294 295 296 297 298
      "])",
      doLex<LexedStatements>("foo;\n # bar baz\n  # qux corge\n\n# grault\n# garply").cStr());

  EXPECT_STREQ(
      "(statements = ["
299
        "("
300
          "tokens = ["
301
            "(identifier = 'foo', endByte = 3)"
302
          "], "
303
          "block = ["
304
            "(tokens = ["
305
              "(identifier = 'bar', startByte = 17, endByte = 20)"
306 307
            "], docComment = 'hi\\n', startByte = 17, endByte = 27), "
            "(tokens = ["
308
              "(identifier = 'baz', startByte = 28, endByte = 31)"
309
            "], startByte = 28, endByte = 32)"
310
          "], "
311 312 313 314
          "docComment = 'blah blah\\n', "
          "endByte = 44"
        "), "
        "(tokens = ["
315
          "(identifier = 'qux', startByte = 44, endByte = 47)"
316
        "], startByte = 44, endByte = 48)"
317 318
      "])",
      doLex<LexedStatements>("foo {# blah blah\nbar; # hi\n baz;} # ignored\nqux;").cStr());
319 320 321 322 323

  EXPECT_STREQ(
      "(statements = ["
        "("
          "tokens = ["
324
            "(identifier = 'foo', endByte = 3)"
325
          "], "
326
          "block = ["
327
            "(tokens = ["
328
              "(identifier = 'bar', startByte = 5, endByte = 8)"
329 330
            "], startByte = 5, endByte = 9), "
            "(tokens = ["
331
              "(identifier = 'baz', startByte = 10, endByte = 13)"
332
            "], startByte = 10, endByte = 14)"
333
          "], "
334 335 336 337
          "docComment = 'late comment\\n', "
          "endByte = 31"
        "), "
        "(tokens = ["
338
          "(identifier = 'qux', startByte = 31, endByte = 34)"
339 340 341
        "], startByte = 31, endByte = 35)"
      "])",
      doLex<LexedStatements>("foo {bar; baz;}\n# late comment\nqux;").cStr());
342 343 344 345 346
}

}  // namespace
}  // namespace compiler
}  // namespace capnp