JsonTokenizer.cs 32.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49
#region Copyright notice and license
// Protocol Buffers - Google's data interchange format
// Copyright 2008 Google Inc.  All rights reserved.
// https://developers.google.com/protocol-buffers/
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are
// met:
//
//     * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
//     * Redistributions in binary form must reproduce the above
// copyright notice, this list of conditions and the following disclaimer
// in the documentation and/or other materials provided with the
// distribution.
//     * Neither the name of Google Inc. nor the names of its
// contributors may be used to endorse or promote products derived from
// this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endregion
using System;
using System.Collections.Generic;
using System.Globalization;
using System.IO;
using System.Text;

namespace Google.Protobuf
{
    /// <summary>
    /// Simple but strict JSON tokenizer, rigidly following RFC 7159.
    /// </summary>
    /// <remarks>
    /// <para>
    /// This tokenizer is stateful, and only returns "useful" tokens - names, values etc.
    /// It does not create tokens for the separator between names and values, or for the comma
    /// between values. It validates the token stream as it goes - so callers can assume that the
    /// tokens it produces are appropriate. For example, it would never produce "start object, end array."
    /// </para>
50
    /// <para>Implementation details: the base class handles single token push-back and </para>
51 52
    /// <para>Not thread-safe.</para>
    /// </remarks>
53
    internal abstract class JsonTokenizer
54 55
    {
        private JsonToken bufferedToken;
56 57

        /// <summary>
58
        ///  Creates a tokenizer that reads from the given text reader.
59
        /// </summary>
60 61 62 63
        internal static JsonTokenizer FromTextReader(TextReader reader)
        {
            return new JsonTextTokenizer(reader);
        }
64

65 66 67 68 69 70 71
        /// <summary>
        /// Creates a tokenizer that first replays the given list of tokens, then continues reading
        /// from another tokenizer. Note that if the returned tokenizer is "pushed back", that does not push back
        /// on the continuation tokenizer, or vice versa. Care should be taken when using this method - it was
        /// created for the sake of Any parsing.
        /// </summary>
        internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation)
72
        {
73
            return new JsonReplayTokenizer(tokens, continuation);
74 75
        }

76 77 78 79 80 81
        /// <summary>
        /// Returns the depth of the stack, purely in objects (not collections).
        /// Informally, this is the number of remaining unclosed '{' characters we have.
        /// </summary>
        internal int ObjectDepth { get; private set; }

82 83
        // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous
        // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack).
84 85 86 87 88 89 90
        internal void PushBack(JsonToken token)
        {
            if (bufferedToken != null)
            {
                throw new InvalidOperationException("Can't push back twice");
            }
            bufferedToken = token;
91 92
            if (token.Type == JsonToken.TokenType.StartObject)
            {
93
                ObjectDepth--;
94 95 96
            }
            else if (token.Type == JsonToken.TokenType.EndObject)
            {
97
                ObjectDepth++;
98
            }
99 100 101 102 103 104
        }

        /// <summary>
        /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream,
        /// after which point <c>Next()</c> should not be called again.
        /// </summary>
105
        /// <remarks>This implementation provides single-token buffering, and calls <see cref="NextImpl"/> if there is no buffered token.</remarks>
106 107
        /// <returns>The next token in the stream. This is never null.</returns>
        /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
108
        /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
109 110
        internal JsonToken Next()
        {
111
            JsonToken tokenToReturn;
112 113
            if (bufferedToken != null)
            {
114
                tokenToReturn = bufferedToken;
115 116
                bufferedToken = null;
            }
117
            else
118
            {
119
                tokenToReturn = NextImpl();
120
            }
121
            if (tokenToReturn.Type == JsonToken.TokenType.StartObject)
122
            {
123
                ObjectDepth++;
124
            }
125
            else if (tokenToReturn.Type == JsonToken.TokenType.EndObject)
126
            {
127
                ObjectDepth--;
128
            }
129
            return tokenToReturn;
130 131 132
        }

        /// <summary>
133 134
        /// Returns the next JSON token in the stream, when requested by the base class. (The <see cref="Next"/> method delegates
        /// to this if it doesn't have a buffered token.)
135
        /// </summary>
136 137 138
        /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
        /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
        protected abstract JsonToken NextImpl();
139

140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167
        /// <summary>
        /// Skips the value we're about to read. This must only be called immediately after reading a property name.
        /// If the value is an object or an array, the complete object/array is skipped.
        /// </summary>
        internal void SkipValue()
        {
            // We'll assume that Next() makes sure that the end objects and end arrays are all valid.
            // All we care about is the total nesting depth we need to close.
            int depth = 0;

            // do/while rather than while loop so that we read at least one token.
            do
            {
                var token = Next();
                switch (token.Type)
                {
                    case JsonToken.TokenType.EndArray:
                    case JsonToken.TokenType.EndObject:
                        depth--;
                        break;
                    case JsonToken.TokenType.StartArray:
                    case JsonToken.TokenType.StartObject:
                        depth++;
                        break;
                }
            } while (depth != 0);
        }

168
        /// <summary>
169
        /// Tokenizer which first exhausts a list of tokens, then consults another tokenizer.
170
        /// </summary>
171
        private class JsonReplayTokenizer : JsonTokenizer
172
        {
173 174 175 176 177
            private readonly IList<JsonToken> tokens;
            private readonly JsonTokenizer nextTokenizer;
            private int nextTokenIndex;

            internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer)
178
            {
179 180
                this.tokens = tokens;
                this.nextTokenizer = nextTokenizer;
181 182
            }

183 184
            // FIXME: Object depth not maintained...
            protected override JsonToken NextImpl()
185
            {
186
                if (nextTokenIndex >= tokens.Count)
187
                {
188
                    return nextTokenizer.Next();
189
                }
190
                return tokens[nextTokenIndex++];
191 192 193 194
            }
        }

        /// <summary>
195
        /// Tokenizer which does all the *real* work of parsing JSON.
196
        /// </summary>
197
        private sealed class JsonTextTokenizer : JsonTokenizer
198
        {
199 200 201 202 203 204 205 206
            // The set of states in which a value is valid next token.
            private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument;

            private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>();
            private readonly PushBackReader reader;
            private State state;

            internal JsonTextTokenizer(TextReader reader)
207
            {
208 209 210 211 212 213 214 215 216 217 218 219 220 221 222
                this.reader = new PushBackReader(reader);
                state = State.StartOfDocument;
                containerStack.Push(ContainerType.Document);
            }

            /// <remarks>
            /// This method essentially just loops through characters skipping whitespace, validating and
            /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon)
            /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point
            /// it returns the token. Although the method is large, it would be relatively hard to break down further... most
            /// of it is the large switch statement, which sometimes returns and sometimes doesn't.
            /// </remarks>
            protected override JsonToken NextImpl()
            {
                if (state == State.ReaderExhausted)
223
                {
224
                    throw new InvalidOperationException("Next() called after end of document");
225
                }
226
                while (true)
227
                {
228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247
                    var next = reader.Read();
                    if (next == null)
                    {
                        ValidateState(State.ExpectedEndOfDocument, "Unexpected end of document in state: ");
                        state = State.ReaderExhausted;
                        return JsonToken.EndDocument;
                    }
                    switch (next.Value)
                    {
                        // Skip whitespace between tokens
                        case ' ':
                        case '\t':
                        case '\r':
                        case '\n':
                            break;
                        case ':':
                            ValidateState(State.ObjectBeforeColon, "Invalid state to read a colon: ");
                            state = State.ObjectAfterColon;
                            break;
                        case ',':
248
                            ValidateState(State.ObjectAfterProperty | State.ArrayAfterValue, "Invalid state to read a comma: ");
249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
                            state = state == State.ObjectAfterProperty ? State.ObjectAfterComma : State.ArrayAfterComma;
                            break;
                        case '"':
                            string stringValue = ReadString();
                            if ((state & (State.ObjectStart | State.ObjectAfterComma)) != 0)
                            {
                                state = State.ObjectBeforeColon;
                                return JsonToken.Name(stringValue);
                            }
                            else
                            {
                                ValidateAndModifyStateForValue("Invalid state to read a double quote: ");
                                return JsonToken.Value(stringValue);
                            }
                        case '{':
                            ValidateState(ValueStates, "Invalid state to read an open brace: ");
                            state = State.ObjectStart;
                            containerStack.Push(ContainerType.Object);
                            return JsonToken.StartObject;
                        case '}':
                            ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: ");
                            PopContainer();
                            return JsonToken.EndObject;
                        case '[':
                            ValidateState(ValueStates, "Invalid state to read an open square bracket: ");
                            state = State.ArrayStart;
                            containerStack.Push(ContainerType.Array);
                            return JsonToken.StartArray;
                        case ']':
                            ValidateState(State.ArrayAfterValue | State.ArrayStart, "Invalid state to read a close square bracket: ");
                            PopContainer();
                            return JsonToken.EndArray;
                        case 'n': // Start of null
                            ConsumeLiteral("null");
                            ValidateAndModifyStateForValue("Invalid state to read a null literal: ");
                            return JsonToken.Null;
                        case 't': // Start of true
                            ConsumeLiteral("true");
                            ValidateAndModifyStateForValue("Invalid state to read a true literal: ");
                            return JsonToken.True;
                        case 'f': // Start of false
                            ConsumeLiteral("false");
                            ValidateAndModifyStateForValue("Invalid state to read a false literal: ");
                            return JsonToken.False;
                        case '-': // Start of a number
                        case '0':
                        case '1':
                        case '2':
                        case '3':
                        case '4':
                        case '5':
                        case '6':
                        case '7':
                        case '8':
                        case '9':
                            double number = ReadNumber(next.Value);
                            ValidateAndModifyStateForValue("Invalid state to read a number token: ");
                            return JsonToken.Value(number);
                        default:
                            throw new InvalidJsonException("Invalid first character of token: " + next.Value);
                    }
310 311 312
                }
            }

313
            private void ValidateState(State validStates, string errorPrefix)
314
            {
315 316 317 318
                if ((validStates & state) == 0)
                {
                    throw reader.CreateException(errorPrefix + state);
                }
319 320
            }

321 322 323 324
            /// <summary>
            /// Reads a string token. It is assumed that the opening " has already been read.
            /// </summary>
            private string ReadString()
325
            {
326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
                var value = new StringBuilder();
                bool haveHighSurrogate = false;
                while (true)
                {
                    char c = reader.ReadOrFail("Unexpected end of text while reading string");
                    if (c < ' ')
                    {
                        throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in string literal: U+{0:x4}", (int) c));
                    }
                    if (c == '"')
                    {
                        if (haveHighSurrogate)
                        {
                            throw reader.CreateException("Invalid use of surrogate pair code units");
                        }
                        return value.ToString();
                    }
                    if (c == '\\')
                    {
                        c = ReadEscapedCharacter();
                    }
                    // TODO: Consider only allowing surrogate pairs that are either both escaped,
                    // or both not escaped. It would be a very odd text stream that contained a "lone" high surrogate
                    // followed by an escaped low surrogate or vice versa... and that couldn't even be represented in UTF-8.
                    if (haveHighSurrogate != char.IsLowSurrogate(c))
                    {
                        throw reader.CreateException("Invalid use of surrogate pair code units");
                    }
                    haveHighSurrogate = char.IsHighSurrogate(c);
                    value.Append(c);
                }
357 358
            }

359 360 361 362
            /// <summary>
            /// Reads an escaped character. It is assumed that the leading backslash has already been read.
            /// </summary>
            private char ReadEscapedCharacter()
363
            {
364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387
                char c = reader.ReadOrFail("Unexpected end of text while reading character escape sequence");
                switch (c)
                {
                    case 'n':
                        return '\n';
                    case '\\':
                        return '\\';
                    case 'b':
                        return '\b';
                    case 'f':
                        return '\f';
                    case 'r':
                        return '\r';
                    case 't':
                        return '\t';
                    case '"':
                        return '"';
                    case '/':
                        return '/';
                    case 'u':
                        return ReadUnicodeEscape();
                    default:
                        throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c));
                }
388 389
            }

390 391 392 393
            /// <summary>
            /// Reads an escaped Unicode 4-nybble hex sequence. It is assumed that the leading \u has already been read.
            /// </summary>
            private char ReadUnicodeEscape()
394
            {
395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418
                int result = 0;
                for (int i = 0; i < 4; i++)
                {
                    char c = reader.ReadOrFail("Unexpected end of text while reading Unicode escape sequence");
                    int nybble;
                    if (c >= '0' && c <= '9')
                    {
                        nybble = c - '0';
                    }
                    else if (c >= 'a' && c <= 'f')
                    {
                        nybble = c - 'a' + 10;
                    }
                    else if (c >= 'A' && c <= 'F')
                    {
                        nybble = c - 'A' + 10;
                    }
                    else
                    {
                        throw reader.CreateException(string.Format(CultureInfo.InvariantCulture, "Invalid character in character escape sequence: U+{0:x4}", (int) c));
                    }
                    result = (result << 4) + nybble;
                }
                return (char) result;
419 420
            }

421 422 423 424 425
            /// <summary>
            /// Consumes a text-only literal, throwing an exception if the read text doesn't match it.
            /// It is assumed that the first letter of the literal has already been read.
            /// </summary>
            private void ConsumeLiteral(string text)
426
            {
427 428 429 430 431 432 433 434 435 436 437 438
                for (int i = 1; i < text.Length; i++)
                {
                    char? next = reader.Read();
                    if (next == null)
                    {
                        throw reader.CreateException("Unexpected end of text while reading literal token " + text);
                    }
                    if (next.Value != text[i])
                    {
                        throw reader.CreateException("Unexpected character while reading literal token " + text);
                    }
                }
439
            }
440 441

            private double ReadNumber(char initialCharacter)
442
            {
443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481
                StringBuilder builder = new StringBuilder();
                if (initialCharacter == '-')
                {
                    builder.Append("-");
                }
                else
                {
                    reader.PushBack(initialCharacter);
                }
                // Each method returns the character it read that doesn't belong in that part,
                // so we know what to do next, including pushing the character back at the end.
                // null is returned for "end of text".
                char? next = ReadInt(builder);
                if (next == '.')
                {
                    next = ReadFrac(builder);
                }
                if (next == 'e' || next == 'E')
                {
                    next = ReadExp(builder);
                }
                // If we read a character which wasn't part of the number, push it back so we can read it again
                // to parse the next token.
                if (next != null)
                {
                    reader.PushBack(next.Value);
                }

                // TODO: What exception should we throw if the value can't be represented as a double?
                try
                {
                    return double.Parse(builder.ToString(),
                        NumberStyles.AllowLeadingSign | NumberStyles.AllowDecimalPoint | NumberStyles.AllowExponent,
                        CultureInfo.InvariantCulture);
                }
                catch (OverflowException)
                {
                    throw reader.CreateException("Numeric value out of range: " + builder);
                }
482
            }
483 484

            private char? ReadInt(StringBuilder builder)
485
            {
486 487 488 489 490 491 492 493 494 495 496 497 498
                char first = reader.ReadOrFail("Invalid numeric literal");
                if (first < '0' || first > '9')
                {
                    throw reader.CreateException("Invalid numeric literal");
                }
                builder.Append(first);
                int digitCount;
                char? next = ConsumeDigits(builder, out digitCount);
                if (first == '0' && digitCount != 0)
                {
                    throw reader.CreateException("Invalid numeric literal: leading 0 for non-zero value.");
                }
                return next;
499
            }
500 501

            private char? ReadFrac(StringBuilder builder)
502
            {
503 504 505 506 507 508 509 510
                builder.Append('.'); // Already consumed this
                int digitCount;
                char? next = ConsumeDigits(builder, out digitCount);
                if (digitCount == 0)
                {
                    throw reader.CreateException("Invalid numeric literal: fraction with no trailing digits");
                }
                return next;
511 512
            }

513
            private char? ReadExp(StringBuilder builder)
514
            {
515
                builder.Append('E'); // Already consumed this (or 'e')
516
                char? next = reader.Read();
517 518 519 520 521
                if (next == null)
                {
                    throw reader.CreateException("Invalid numeric literal: exponent with no trailing digits");
                }
                if (next == '-' || next == '+')
522
                {
523
                    builder.Append(next.Value);
524
                }
525 526 527 528 529 530 531 532 533 534 535
                else
                {
                    reader.PushBack(next.Value);
                }
                int digitCount;
                next = ConsumeDigits(builder, out digitCount);
                if (digitCount == 0)
                {
                    throw reader.CreateException("Invalid numeric literal: exponent without value");
                }
                return next;
536 537
            }

538
            private char? ConsumeDigits(StringBuilder builder, out int count)
539
            {
540 541 542 543 544 545 546 547 548 549 550
                count = 0;
                while (true)
                {
                    char? next = reader.Read();
                    if (next == null || next.Value < '0' || next.Value > '9')
                    {
                        return next;
                    }
                    count++;
                    builder.Append(next.Value);
                }
551 552
            }

553 554 555 556 557
            /// <summary>
            /// Validates that we're in a valid state to read a value (using the given error prefix if necessary)
            /// and changes the state to the appropriate one, e.g. ObjectAfterColon to ObjectAfterProperty.
            /// </summary>
            private void ValidateAndModifyStateForValue(string errorPrefix)
558
            {
559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
                ValidateState(ValueStates, errorPrefix);
                switch (state)
                {
                    case State.StartOfDocument:
                        state = State.ExpectedEndOfDocument;
                        return;
                    case State.ObjectAfterColon:
                        state = State.ObjectAfterProperty;
                        return;
                    case State.ArrayStart:
                    case State.ArrayAfterComma:
                        state = State.ArrayAfterValue;
                        return;
                    default:
                        throw new InvalidOperationException("ValidateAndModifyStateForValue does not handle all value states (and should)");
                }
575 576 577
            }

            /// <summary>
578 579
            /// Pops the top-most container, and sets the state to the appropriate one for the end of a value
            /// in the parent container.
580
            /// </summary>
581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599
            private void PopContainer()
            {
                containerStack.Pop();
                var parent = containerStack.Peek();
                switch (parent)
                {
                    case ContainerType.Object:
                        state = State.ObjectAfterProperty;
                        break;
                    case ContainerType.Array:
                        state = State.ArrayAfterValue;
                        break;
                    case ContainerType.Document:
                        state = State.ExpectedEndOfDocument;
                        break;
                    default:
                        throw new InvalidOperationException("Unexpected container type: " + parent);
                }
            }
600

601
            private enum ContainerType
602
            {
603
                Document, Object, Array
604 605 606
            }

            /// <summary>
607
            /// Possible states of the tokenizer.
608
            /// </summary>
609 610 611 612 613 614 615 616
            /// <remarks>
            /// <para>This is a flags enum purely so we can simply and efficiently represent a set of valid states
            /// for checking.</para>
            /// <para>
            /// Each is documented with an example,
            /// where ^ represents the current position within the text stream. The examples all use string values,
            /// but could be any value, including nested objects/arrays.
            /// The complete state of the tokenizer also includes a stack to indicate the contexts (arrays/objects).
617
            /// Any additional notional state of "AfterValue" indicates that a value has been completed, at which
618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
            /// point there's an immediate transition to ExpectedEndOfDocument,  ObjectAfterProperty or ArrayAfterValue.
            /// </para>
            /// <para>
            /// These states were derived manually by reading RFC 7159 carefully.
            /// </para>
            /// </remarks>
            [Flags]
            private enum State
            {
                /// <summary>
                /// ^ { "foo": "bar" }
                /// Before the value in a document. Next states: ObjectStart, ArrayStart, "AfterValue"
                /// </summary>
                StartOfDocument = 1 << 0,
                /// <summary>
                /// { "foo": "bar" } ^
                /// After the value in a document. Next states: ReaderExhausted
                /// </summary>
                ExpectedEndOfDocument = 1 << 1,
                /// <summary>
                /// { "foo": "bar" } ^ (and already read to the end of the reader)
                /// Terminal state.
                /// </summary>
                ReaderExhausted = 1 << 2,
                /// <summary>
                /// { ^ "foo": "bar" }
                /// Before the *first* property in an object.
                /// Next states:
                /// "AfterValue" (empty object)
                /// ObjectBeforeColon (read a name)
                /// </summary>
                ObjectStart = 1 << 3,
                /// <summary>
                /// { "foo" ^ : "bar", "x": "y" }
                /// Next state: ObjectAfterColon
                /// </summary>
                ObjectBeforeColon = 1 << 4,
                /// <summary>
                /// { "foo" : ^ "bar", "x": "y" }
                /// Before any property other than the first in an object.
658
                /// (Equivalently: after any property in an object)
659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698
                /// Next states:
                /// "AfterValue" (value is simple)
                /// ObjectStart (value is object)
                /// ArrayStart (value is array)
                /// </summary>
                ObjectAfterColon = 1 << 5,
                /// <summary>
                /// { "foo" : "bar" ^ , "x" : "y" }
                /// At the end of a property, so expecting either a comma or end-of-object
                /// Next states: ObjectAfterComma or "AfterValue"
                /// </summary>
                ObjectAfterProperty = 1 << 6,
                /// <summary>
                /// { "foo":"bar", ^ "x":"y" }
                /// Read the comma after the previous property, so expecting another property.
                /// This is like ObjectStart, but closing brace isn't valid here
                /// Next state: ObjectBeforeColon.
                /// </summary>
                ObjectAfterComma = 1 << 7,
                /// <summary>
                /// [ ^ "foo", "bar" ]
                /// Before the *first* value in an array.
                /// Next states:
                /// "AfterValue" (read a value)
                /// "AfterValue" (end of array; will pop stack)
                /// </summary>
                ArrayStart = 1 << 8,
                /// <summary>
                /// [ "foo" ^ , "bar" ]
                /// After any value in an array, so expecting either a comma or end-of-array
                /// Next states: ArrayAfterComma or "AfterValue"
                /// </summary>
                ArrayAfterValue = 1 << 9,
                /// <summary>
                /// [ "foo", ^ "bar" ]
                /// After a comma in an array, so there *must* be another value (simple or complex).
                /// Next states: "AfterValue" (simple value), StartObject, StartArray
                /// </summary>
                ArrayAfterComma = 1 << 10
            }
699 700

            /// <summary>
701
            /// Wrapper around a text reader allowing small amounts of buffering and location handling.
702
            /// </summary>
703
            private class PushBackReader
704
            {
705 706 707 708 709
                // TODO: Add locations for errors etc.

                private readonly TextReader reader;

                internal PushBackReader(TextReader reader)
710
                {
711 712
                    // TODO: Wrap the reader in a BufferedReader?
                    this.reader = reader;
713 714
                }

715 716 717 718 719 720 721 722 723 724
                /// <summary>
                /// The buffered next character, if we have one.
                /// </summary>
                private char? nextChar;

                /// <summary>
                /// Returns the next character in the stream, or null if we have reached the end.
                /// </summary>
                /// <returns></returns>
                internal char? Read()
725
                {
726 727 728 729 730 731 732 733
                    if (nextChar != null)
                    {
                        char? tmp = nextChar;
                        nextChar = null;
                        return tmp;
                    }
                    int next = reader.Read();
                    return next == -1 ? null : (char?) next;
734 735
                }

736
                internal char ReadOrFail(string messageOnFailure)
737
                {
738 739 740 741 742 743
                    char? next = Read();
                    if (next == null)
                    {
                        throw CreateException(messageOnFailure);
                    }
                    return next.Value;
744
                }
745

746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762
                internal void PushBack(char c)
                {
                    if (nextChar != null)
                    {
                        throw new InvalidOperationException("Cannot push back when already buffering a character");
                    }
                    nextChar = c;
                }

                /// <summary>
                /// Creates a new exception appropriate for the current state of the reader.
                /// </summary>
                internal InvalidJsonException CreateException(string message)
                {
                    // TODO: Keep track of and use the location.
                    return new InvalidJsonException(message);
                }
763
            }
764 765 766
        }
    }
}