Handle JSON parsing for Any.

This required a rework of the tokenizer to allow for a "replaying" tokenizer, basically in case the @type value comes after the data itself. This rework is nice in some ways (all the pushback and object depth logic in one place) but is a little fragile in terms of token push-back when using the replay tokenizer. It'll be fine for the scenario we need it for, but we should be careful...

Handle JSON parsing for Any.
This required a rework of the tokenizer to allow for a "replaying" tokenizer, basically in case the @type value comes after the data itself. This rework is nice in some ways (all the pushback and object depth logic in one place) but is a little fragile in terms of token push-back when using the replay tokenizer. It'll be fine for the scenario we need it for, but we should be careful...
3de2fced · Jon Skeet · 567579b5 · 3de2fced · 3de2fced · 3de2fced
Commit 3de2fced authored Nov 23, 2015 by Jon Skeet
5 changed files
--- a/csharp/src/Google.Protobuf.Test/JsonParserTest.cs
+++ b/csharp/src/Google.Protobuf.Test/JsonParserTest.cs
@@ -30,6 +30,7 @@
 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 #endregion
+using Google.Protobuf.Reflection;
 using Google.Protobuf.TestProtos;
 using Google.Protobuf.WellKnownTypes;
 using NUnit.Framework;
@@ -717,6 +718,55 @@ namespace Google.Protobuf
            CollectionAssert.AreEqual(expectedPaths, parsed.Paths);
        }
+        [Test]
+        public void Any_RegularMessage()
+        {
+            var registry = TypeRegistry.FromMessages(TestAllTypes.Descriptor);
+            var formatter = new JsonFormatter(new JsonFormatter.Settings(false, TypeRegistry.FromMessages(TestAllTypes.Descriptor)));
+            var message = new TestAllTypes { SingleInt32 = 10, SingleNestedMessage = new TestAllTypes.Types.NestedMessage { Bb = 20 } };
+            var original = Any.Pack(message);
+            var json = formatter.Format(original); // This is tested in JsonFormatterTest
+            var parser = new JsonParser(new JsonParser.Settings(10, registry));
+            Assert.AreEqual(original, parser.Parse<Any>(json));
+            string valueFirstJson = "{ \"singleInt32\": 10, \"singleNestedMessage\": { \"bb\": 20 }, \"@type\": \"type.googleapis.com/protobuf_unittest.TestAllTypes\" }";
+            Assert.AreEqual(original, parser.Parse<Any>(valueFirstJson));
+        }
+        [Test]
+        public void Any_UnknownType()
+        {
+            string json = "{ \"@type\": \"type.googleapis.com/bogus\" }";
+            Assert.Throws<InvalidOperationException>(() => Any.Parser.ParseJson(json));
+        }
+        [Test]
+        public void Any_WellKnownType()
+        {
+            var registry = TypeRegistry.FromMessages(Timestamp.Descriptor);
+            var formatter = new JsonFormatter(new JsonFormatter.Settings(false, registry));
+            var timestamp = new DateTime(1673, 6, 19, 12, 34, 56, DateTimeKind.Utc).ToTimestamp();
+            var original = Any.Pack(timestamp);
+            var json = formatter.Format(original); // This is tested in JsonFormatterTest
+            var parser = new JsonParser(new JsonParser.Settings(10, registry));
+            Assert.AreEqual(original, parser.Parse<Any>(json));
+            string valueFirstJson = "{ \"value\": \"1673-06-19T12:34:56Z\", \"@type\": \"type.googleapis.com/google.protobuf.Timestamp\" }";
+            Assert.AreEqual(original, parser.Parse<Any>(valueFirstJson));
+        }
+        [Test]
+        public void Any_Nested()
+        {
+            var registry = TypeRegistry.FromMessages(TestWellKnownTypes.Descriptor, TestAllTypes.Descriptor);
+            var formatter = new JsonFormatter(new JsonFormatter.Settings(false, registry));
+            var parser = new JsonParser(new JsonParser.Settings(10, registry));
+            var doubleNestedMessage = new TestAllTypes { SingleInt32 = 20 };
+            var nestedMessage = Any.Pack(doubleNestedMessage);
+            var message = new TestWellKnownTypes { AnyField = Any.Pack(nestedMessage) };
+            var json = formatter.Format(message);
+            // Use the descriptor-based parser just for a change.
+            Assert.AreEqual(message, parser.Parse(json, TestWellKnownTypes.Descriptor));
+        }
        [Test]
        public void DataAfterObject()
        {

--- a/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs
+++ b/csharp/src/Google.Protobuf.Test/JsonTokenizerTest.cs
@@ -85,7 +85,7 @@ namespace Google.Protobuf
        public void ObjectDepth()
        {
            string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }";
-            var tokenizer = new JsonTokenizer(new StringReader(json));
+            var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json));
            // If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it.
            Assert.AreEqual(0, tokenizer.ObjectDepth);
            Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
@@ -118,7 +118,7 @@ namespace Google.Protobuf
        public void ObjectDepth_WithPushBack()
        {
            string json = "{}";
-            var tokenizer = new JsonTokenizer(new StringReader(json));
+            var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json));
            Assert.AreEqual(0, tokenizer.ObjectDepth);
            var token = tokenizer.Next();
            Assert.AreEqual(1, tokenizer.ObjectDepth);
@@ -275,7 +275,7 @@ namespace Google.Protobuf
            // Note: we don't test that the earlier tokens are exactly as expected,
            // partly because that's hard to parameterize.
            var reader = new StringReader(json.Replace('\'', '"'));
-            var tokenizer = new JsonTokenizer(reader);
+            var tokenizer = JsonTokenizer.FromTextReader(reader);
            for (int i = 0; i < expectedValidTokens; i++)
            {
                Assert.IsNotNull(tokenizer.Next());
@@ -334,7 +334,7 @@ namespace Google.Protobuf
        [Test]
        public void NextAfterEndDocumentThrows()
        {
-            var tokenizer = new JsonTokenizer(new StringReader("null"));
+            var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null"));
            Assert.AreEqual(JsonToken.Null, tokenizer.Next());
            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
            Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
@@ -343,7 +343,7 @@ namespace Google.Protobuf
        [Test]
        public void CanPushBackEndDocument()
        {
-            var tokenizer = new JsonTokenizer(new StringReader("null"));
+            var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null"));
            Assert.AreEqual(JsonToken.Null, tokenizer.Next());
            Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
            tokenizer.PushBack(JsonToken.EndDocument);
@@ -373,7 +373,7 @@ namespace Google.Protobuf
        private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens)
        {
            var reader = new StringReader(json);
-            var tokenizer = new JsonTokenizer(reader);
+            var tokenizer = JsonTokenizer.FromTextReader(reader);
            for (int i = 0; i < expectedTokens.Length; i++)
            {
                var actualToken = tokenizer.Next();
@@ -393,7 +393,7 @@ namespace Google.Protobuf
        private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens)
        {
            var reader = new StringReader(json);
-            var tokenizer = new JsonTokenizer(reader);
+            var tokenizer = JsonTokenizer.FromTextReader(reader);
            for (int i = 0; i < expectedTokens.Length; i++)
            {
                var actualToken = tokenizer.Next();

--- a/csharp/src/Google.Protobuf/JsonFormatter.cs
+++ b/csharp/src/Google.Protobuf/JsonFormatter.cs
@@ -808,12 +808,17 @@ namespace Google.Protobuf
        /// </summary>
        public sealed class Settings
        {
-            private static readonly Settings defaultInstance = new Settings(false);
            /// <summary>
            /// Default settings, as used by <see cref="JsonFormatter.Default"/>
            /// </summary>
-            public static Settings Default { get; } = new Settings(false);
+            public static Settings Default { get; }
+            // Workaround for the Mono compiler complaining about XML comments not being on
+            // valid language elements.
+            static Settings()
+            {
+                Default = new Settings(false);
+            }
            /// <summary>
            /// Whether fields whose values are the default for the field type (e.g. 0 for integers)

--- a/csharp/src/Google.Protobuf/JsonParser.cs
+++ b/csharp/src/Google.Protobuf/JsonParser.cs
@@ -77,6 +77,7 @@ namespace Google.Protobuf
            { ListValue.Descriptor.FullName, (parser, message, tokenizer) =>
                parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
            { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) },
+            { Any.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeAny(message, tokenizer) },
            { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) },
            { Int32Value.Descriptor.FullName, MergeWrapperField },
            { Int64Value.Descriptor.FullName, MergeWrapperField },
@@ -128,7 +129,7 @@ namespace Google.Protobuf
        /// <param name="jsonReader">Reader providing the JSON to parse.</param>
        internal void Merge(IMessage message, TextReader jsonReader)
        {
-            var tokenizer = new JsonTokenizer(jsonReader);
+            var tokenizer = JsonTokenizer.FromTextReader(jsonReader);
            Merge(message, tokenizer);
            var lastToken = tokenizer.Next();
            if (lastToken != JsonToken.EndDocument)
@@ -338,6 +339,7 @@ namespace Google.Protobuf
        /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
        public T Parse<T>(string json) where T : IMessage, new()
        {
+            Preconditions.CheckNotNull(json, nameof(json));
            return Parse<T>(new StringReader(json));
        }
@@ -350,11 +352,42 @@ namespace Google.Protobuf
        /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
        public T Parse<T>(TextReader jsonReader) where T : IMessage, new()
        {
+            Preconditions.CheckNotNull(jsonReader, nameof(jsonReader));
            T message = new T();
            Merge(message, jsonReader);
            return message;
        }
+        /// <summary>
+        /// Parses <paramref name="json"/> into a new message.
+        /// </summary>
+        /// <param name="json">The JSON to parse.</param>
+        /// <param name="descriptor">Descriptor of message type to parse.</param>
+        /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
+        /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
+        public IMessage Parse(string json, MessageDescriptor descriptor)
+        {
+            Preconditions.CheckNotNull(json, nameof(json));
+            Preconditions.CheckNotNull(descriptor, nameof(descriptor));
+            return Parse(new StringReader(json), descriptor);
+        }
+        /// <summary>
+        /// Parses JSON read from <paramref name="jsonReader"/> into a new message.
+        /// </summary>
+        /// <param name="jsonReader">Reader providing the JSON to parse.</param>
+        /// <param name="descriptor">Descriptor of message type to parse.</param>
+        /// <exception cref="InvalidJsonException">The JSON does not comply with RFC 7159</exception>
+        /// <exception cref="InvalidProtocolBufferException">The JSON does not represent a Protocol Buffers message correctly</exception>
+        public IMessage Parse(TextReader jsonReader, MessageDescriptor descriptor)
+        {
+            Preconditions.CheckNotNull(jsonReader, nameof(jsonReader));
+            Preconditions.CheckNotNull(descriptor, nameof(descriptor));
+            IMessage message = descriptor.Parser.CreateTemplate();
+            Merge(message, jsonReader);
+            return message;
+        }
        private void MergeStructValue(IMessage message, JsonTokenizer tokenizer)
        {
            var firstToken = tokenizer.Next();
@@ -410,6 +443,83 @@ namespace Google.Protobuf
            MergeMapField(message, field, tokenizer);
        }
+        private void MergeAny(IMessage message, JsonTokenizer tokenizer)
+        {
+            // Record the token stream until we see the @type property. At that point, we can take the value, consult
+            // the type registry for the relevant message, and replay the stream, omitting the @type property.
+            var tokens = new List<JsonToken>();
+            var token = tokenizer.Next();
+            if (token.Type != JsonToken.TokenType.StartObject)
+            {
+                throw new InvalidProtocolBufferException("Expected object value for Any");
+            }
+            int typeUrlObjectDepth = tokenizer.ObjectDepth;
+            // The check for the property depth protects us from nested Any values which occur before the type URL
+            // for *this* Any.
+            while (token.Type != JsonToken.TokenType.Name ||
+                token.StringValue != JsonFormatter.AnyTypeUrlField ||
+                tokenizer.ObjectDepth != typeUrlObjectDepth)
+            {
+                tokens.Add(token);
+                token = tokenizer.Next();
+            }
+            // Don't add the @type property or its value to the recorded token list
+            token = tokenizer.Next();
+            if (token.Type != JsonToken.TokenType.StringValue)
+            {
+                throw new InvalidProtocolBufferException("Expected string value for Any.@type");
+            }
+            string typeUrl = token.StringValue;
+            string typeName = JsonFormatter.GetTypeName(typeUrl);
+            MessageDescriptor descriptor = settings.TypeRegistry.Find(typeName);
+            if (descriptor == null)
+            {
+                throw new InvalidOperationException($"Type registry has no descriptor for type name '{typeName}'");
+            }
+            // Now replay the token stream we've already read and anything that remains of the object, just parsing it
+            // as normal. Our original tokenizer should end up at the end of the object.
+            var replay = JsonTokenizer.FromReplayedTokens(tokens, tokenizer);
+            var body = descriptor.Parser.CreateTemplate();
+            if (descriptor.IsWellKnownType)
+            {
+                MergeWellKnownTypeAnyBody(body, replay);
+            }
+            else
+            {
+                Merge(body, replay);
+            }
+            var data = body.ToByteString();
+            // Now that we have the message data, we can pack it into an Any (the message received as a parameter).
+            message.Descriptor.Fields[Any.TypeUrlFieldNumber].Accessor.SetValue(message, typeUrl);
+            message.Descriptor.Fields[Any.ValueFieldNumber].Accessor.SetValue(message, data);
+        }
+        // Well-known types end up in a property called "value" in the JSON. As there's no longer a @type property
+        // in the given JSON token stream, we should *only* have tokens of start-object, name("value"), the value
+        // itself, and then end-object.
+        private void MergeWellKnownTypeAnyBody(IMessage body, JsonTokenizer tokenizer)
+        {
+            var token = tokenizer.Next(); // Definitely start-object; checked in previous method
+            token = tokenizer.Next();
+            // TODO: What about an absent Int32Value, for example?
+            if (token.Type != JsonToken.TokenType.Name || token.StringValue != JsonFormatter.AnyWellKnownTypeValueField)
+            {
+                throw new InvalidProtocolBufferException($"Expected '{JsonFormatter.AnyWellKnownTypeValueField}' property for well-known type Any body");
+            }
+            Merge(body, tokenizer);
+            token = tokenizer.Next();
+            if (token.Type != JsonToken.TokenType.EndObject)
+            {
+                throw new InvalidProtocolBufferException($"Expected end-object token after @type/value for well-known type");
+            }
+        }
        #region Utility methods which don't depend on the state (or settings) of the parser.
        private static object ParseMapKey(FieldDescriptor field, string keyText)
        {
@@ -789,29 +899,48 @@ namespace Google.Protobuf
        /// </summary>
        public sealed class Settings
        {
-            private static readonly Settings defaultInstance = new Settings(CodedInputStream.DefaultRecursionLimit);
-            private readonly int recursionLimit;
            /// <summary>
-            /// Default settings, as used by <see cref="JsonParser.Default"/>
+            /// Default settings, as used by <see cref="JsonParser.Default"/>. This has the same default
+            /// recursion limit as <see cref="CodedInputStream"/>, and an empty type registry.
            /// </summary>
-            public static Settings Default { get { return defaultInstance; } }
+            public static Settings Default { get; }
+            // Workaround for the Mono compiler complaining about XML comments not being on
+            // valid language elements.
+            static Settings()
+            {
+                Default = new Settings(CodedInputStream.DefaultRecursionLimit);
+            }
            /// <summary>
            /// The maximum depth of messages to parse. Note that this limit only applies to parsing
            /// messages, not collections - so a message within a collection within a message only counts as
            /// depth 2, not 3.
            /// </summary>
-            public int RecursionLimit { get { return recursionLimit; } }
+            public int RecursionLimit { get; }
+            /// <summary>
+            /// The type registry used to parse <see cref="Any"/> messages.
+            /// </summary>
+            public TypeRegistry TypeRegistry { get; }
            /// <summary>
            /// Creates a new <see cref="Settings"/> object with the specified recursion limit.
            /// </summary>
            /// <param name="recursionLimit">The maximum depth of messages to parse</param>
-            public Settings(int recursionLimit)
+            public Settings(int recursionLimit) : this(recursionLimit, TypeRegistry.Empty)
+            {
+            }
+            /// <summary>
+            /// Creates a new <see cref="Settings"/> object with the specified recursion limit and type registry.
+            /// </summary>
+            /// <param name="recursionLimit">The maximum depth of messages to parse</param>
+            /// <param name="typeRegistry">The type registry used to parse <see cref="Any"/> messages</param>
+            public Settings(int recursionLimit, TypeRegistry typeRegistry)
            {
-                this.recursionLimit = recursionLimit;
+                RecursionLimit = recursionLimit;
+                TypeRegistry = Preconditions.CheckNotNull(typeRegistry, nameof(typeRegistry));
            }
        }
    }

--- a/csharp/src/Google.Protobuf/JsonTokenizer.cs
+++ b/csharp/src/Google.Protobuf/JsonTokenizer.cs
@@ -47,32 +47,38 @@ namespace Google.Protobuf
    /// between values. It validates the token stream as it goes - so callers can assume that the
    /// tokens it produces are appropriate. For example, it would never produce "start object, end array."
    /// </para>
+    /// <para>Implementation details: the base class handles single token push-back and </para>
    /// <para>Not thread-safe.</para>
    /// </remarks>
-    internal sealed class JsonTokenizer
+    internal abstract class JsonTokenizer
    {
-        // The set of states in which a value is valid next token.
-        private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument;
-        private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>();
-        private readonly PushBackReader reader;
        private JsonToken bufferedToken;
-        private State state;
-        private int objectDepth = 0;
        /// <summary>
-        /// Returns the depth of the stack, purely in objects (not collections).
+        ///  Creates a tokenizer that reads from the given text reader.
-        /// Informally, this is the number of remaining unclosed '{' characters we have.
        /// </summary>
-        internal int ObjectDepth { get { return objectDepth; } }
+        internal static JsonTokenizer FromTextReader(TextReader reader)
+        {
+            return new JsonTextTokenizer(reader);
+        }
-        internal JsonTokenizer(TextReader reader)
+        /// <summary>
+        /// Creates a tokenizer that first replays the given list of tokens, then continues reading
+        /// from another tokenizer. Note that if the returned tokenizer is "pushed back", that does not push back
+        /// on the continuation tokenizer, or vice versa. Care should be taken when using this method - it was
+        /// created for the sake of Any parsing.
+        /// </summary>
+        internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation)
        {
-            this.reader = new PushBackReader(reader);
+            return new JsonReplayTokenizer(tokens, continuation);
-            state = State.StartOfDocument;
-            containerStack.Push(ContainerType.Document);
        }
+        /// <summary>
+        /// Returns the depth of the stack, purely in objects (not collections).
+        /// Informally, this is the number of remaining unclosed '{' characters we have.
+        /// </summary>
+        internal int ObjectDepth { get; private set; }
        // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous
        // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack).
        internal void PushBack(JsonToken token)
@@ -84,11 +90,11 @@ namespace Google.Protobuf
            bufferedToken = token;
            if (token.Type == JsonToken.TokenType.StartObject)
            {
-                objectDepth--;
+                ObjectDepth--;
            }
            else if (token.Type == JsonToken.TokenType.EndObject)
            {
-                objectDepth++;
+                ObjectDepth++;
            }
        }
@@ -96,32 +102,95 @@ namespace Google.Protobuf
        /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream,
        /// after which point <c>Next()</c> should not be called again.
        /// </summary>
-        /// <remarks>
+        /// <remarks>This implementation provides single-token buffering, and calls <see cref="NextImpl"/> if there is no buffered token.</remarks>
-        /// This method essentially just loops through characters skipping whitespace, validating and
-        /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon)
-        /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point
-        /// it returns the token. Although the method is large, it would be relatively hard to break down further... most
-        /// of it is the large switch statement, which sometimes returns and sometimes doesn't.
-        /// </remarks>
        /// <returns>The next token in the stream. This is never null.</returns>
        /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
        /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
        internal JsonToken Next()
        {
+            JsonToken tokenToReturn;
            if (bufferedToken != null)
            {
-                var ret = bufferedToken;
+                tokenToReturn = bufferedToken;
                bufferedToken = null;
-                if (ret.Type == JsonToken.TokenType.StartObject)
+            }
+            else
            {
-                    objectDepth++;
+                tokenToReturn = NextImpl();
            }
-                else if (ret.Type == JsonToken.TokenType.EndObject)
+            if (tokenToReturn.Type == JsonToken.TokenType.StartObject)
            {
-                    objectDepth--;
+                ObjectDepth++;
            }
-                return ret;
+            else if (tokenToReturn.Type == JsonToken.TokenType.EndObject)
+            {
+                ObjectDepth--;
+            }
+            return tokenToReturn;
        }
+        /// <summary>
+        /// Returns the next JSON token in the stream, when requested by the base class. (The <see cref="Next"/> method delegates
+        /// to this if it doesn't have a buffered token.)
+        /// </summary>
+        /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
+        /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
+        protected abstract JsonToken NextImpl();
+        /// <summary>
+        /// Tokenizer which first exhausts a list of tokens, then consults another tokenizer.
+        /// </summary>
+        private class JsonReplayTokenizer : JsonTokenizer
+        {
+            private readonly IList<JsonToken> tokens;
+            private readonly JsonTokenizer nextTokenizer;
+            private int nextTokenIndex;
+            internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer)
+            {
+                this.tokens = tokens;
+                this.nextTokenizer = nextTokenizer;
+            }
+            // FIXME: Object depth not maintained...
+            protected override JsonToken NextImpl()
+            {
+                if (nextTokenIndex >= tokens.Count)
+                {
+                    return nextTokenizer.Next();
+                }
+                return tokens[nextTokenIndex++];
+            }
+        }
+        /// <summary>
+        /// Tokenizer which does all the *real* work of parsing JSON.
+        /// </summary>
+        private sealed class JsonTextTokenizer : JsonTokenizer
+        {
+            // The set of states in which a value is valid next token.
+            private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument;
+            private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>();
+            private readonly PushBackReader reader;
+            private State state;
+            internal JsonTextTokenizer(TextReader reader)
+            {
+                this.reader = new PushBackReader(reader);
+                state = State.StartOfDocument;
+                containerStack.Push(ContainerType.Document);
+            }
+            /// <remarks>
+            /// This method essentially just loops through characters skipping whitespace, validating and
+            /// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon)
+            /// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point
+            /// it returns the token. Although the method is large, it would be relatively hard to break down further... most
+            /// of it is the large switch statement, which sometimes returns and sometimes doesn't.
+            /// </remarks>
+            protected override JsonToken NextImpl()
+            {
                if (state == State.ReaderExhausted)
                {
                    throw new InvalidOperationException("Next() called after end of document");
@@ -167,12 +236,10 @@ namespace Google.Protobuf
                            ValidateState(ValueStates, "Invalid state to read an open brace: ");
                            state = State.ObjectStart;
                            containerStack.Push(ContainerType.Object);
-                        objectDepth++;
                            return JsonToken.StartObject;
                        case '}':
                            ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: ");
                            PopContainer();
-                        objectDepth--;
                            return JsonToken.EndObject;
                        case '[':
                            ValidateState(ValueStates, "Invalid state to read an open square bracket: ");
@@ -667,4 +734,5 @@ namespace Google.Protobuf
                }
            }
        }
+    }
 }