Commit 3de2fced authored by Jon Skeet's avatar Jon Skeet

Handle JSON parsing for Any.

This required a rework of the tokenizer to allow for a "replaying" tokenizer, basically in case the @type value comes after the data itself. This rework is nice in some ways (all the pushback and object depth logic in one place) but is a little fragile in terms of token push-back when using the replay tokenizer. It'll be fine for the scenario we need it for, but we should be careful...
parent 567579b5
...@@ -30,6 +30,7 @@ ...@@ -30,6 +30,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#endregion #endregion
using Google.Protobuf.Reflection;
using Google.Protobuf.TestProtos; using Google.Protobuf.TestProtos;
using Google.Protobuf.WellKnownTypes; using Google.Protobuf.WellKnownTypes;
using NUnit.Framework; using NUnit.Framework;
...@@ -717,6 +718,55 @@ namespace Google.Protobuf ...@@ -717,6 +718,55 @@ namespace Google.Protobuf
CollectionAssert.AreEqual(expectedPaths, parsed.Paths); CollectionAssert.AreEqual(expectedPaths, parsed.Paths);
} }
[Test]
public void Any_RegularMessage()
{
var registry = TypeRegistry.FromMessages(TestAllTypes.Descriptor);
var formatter = new JsonFormatter(new JsonFormatter.Settings(false, TypeRegistry.FromMessages(TestAllTypes.Descriptor)));
var message = new TestAllTypes { SingleInt32 = 10, SingleNestedMessage = new TestAllTypes.Types.NestedMessage { Bb = 20 } };
var original = Any.Pack(message);
var json = formatter.Format(original); // This is tested in JsonFormatterTest
var parser = new JsonParser(new JsonParser.Settings(10, registry));
Assert.AreEqual(original, parser.Parse<Any>(json));
string valueFirstJson = "{ \"singleInt32\": 10, \"singleNestedMessage\": { \"bb\": 20 }, \"@type\": \"type.googleapis.com/protobuf_unittest.TestAllTypes\" }";
Assert.AreEqual(original, parser.Parse<Any>(valueFirstJson));
}
[Test]
public void Any_UnknownType()
{
string json = "{ \"@type\": \"type.googleapis.com/bogus\" }";
Assert.Throws<InvalidOperationException>(() => Any.Parser.ParseJson(json));
}
[Test]
public void Any_WellKnownType()
{
var registry = TypeRegistry.FromMessages(Timestamp.Descriptor);
var formatter = new JsonFormatter(new JsonFormatter.Settings(false, registry));
var timestamp = new DateTime(1673, 6, 19, 12, 34, 56, DateTimeKind.Utc).ToTimestamp();
var original = Any.Pack(timestamp);
var json = formatter.Format(original); // This is tested in JsonFormatterTest
var parser = new JsonParser(new JsonParser.Settings(10, registry));
Assert.AreEqual(original, parser.Parse<Any>(json));
string valueFirstJson = "{ \"value\": \"1673-06-19T12:34:56Z\", \"@type\": \"type.googleapis.com/google.protobuf.Timestamp\" }";
Assert.AreEqual(original, parser.Parse<Any>(valueFirstJson));
}
[Test]
public void Any_Nested()
{
var registry = TypeRegistry.FromMessages(TestWellKnownTypes.Descriptor, TestAllTypes.Descriptor);
var formatter = new JsonFormatter(new JsonFormatter.Settings(false, registry));
var parser = new JsonParser(new JsonParser.Settings(10, registry));
var doubleNestedMessage = new TestAllTypes { SingleInt32 = 20 };
var nestedMessage = Any.Pack(doubleNestedMessage);
var message = new TestWellKnownTypes { AnyField = Any.Pack(nestedMessage) };
var json = formatter.Format(message);
// Use the descriptor-based parser just for a change.
Assert.AreEqual(message, parser.Parse(json, TestWellKnownTypes.Descriptor));
}
[Test] [Test]
public void DataAfterObject() public void DataAfterObject()
{ {
......
...@@ -85,7 +85,7 @@ namespace Google.Protobuf ...@@ -85,7 +85,7 @@ namespace Google.Protobuf
public void ObjectDepth() public void ObjectDepth()
{ {
string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }"; string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }";
var tokenizer = new JsonTokenizer(new StringReader(json)); var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json));
// If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it. // If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it.
Assert.AreEqual(0, tokenizer.ObjectDepth); Assert.AreEqual(0, tokenizer.ObjectDepth);
Assert.AreEqual(JsonToken.StartObject, tokenizer.Next()); Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
...@@ -118,7 +118,7 @@ namespace Google.Protobuf ...@@ -118,7 +118,7 @@ namespace Google.Protobuf
public void ObjectDepth_WithPushBack() public void ObjectDepth_WithPushBack()
{ {
string json = "{}"; string json = "{}";
var tokenizer = new JsonTokenizer(new StringReader(json)); var tokenizer = JsonTokenizer.FromTextReader(new StringReader(json));
Assert.AreEqual(0, tokenizer.ObjectDepth); Assert.AreEqual(0, tokenizer.ObjectDepth);
var token = tokenizer.Next(); var token = tokenizer.Next();
Assert.AreEqual(1, tokenizer.ObjectDepth); Assert.AreEqual(1, tokenizer.ObjectDepth);
...@@ -275,7 +275,7 @@ namespace Google.Protobuf ...@@ -275,7 +275,7 @@ namespace Google.Protobuf
// Note: we don't test that the earlier tokens are exactly as expected, // Note: we don't test that the earlier tokens are exactly as expected,
// partly because that's hard to parameterize. // partly because that's hard to parameterize.
var reader = new StringReader(json.Replace('\'', '"')); var reader = new StringReader(json.Replace('\'', '"'));
var tokenizer = new JsonTokenizer(reader); var tokenizer = JsonTokenizer.FromTextReader(reader);
for (int i = 0; i < expectedValidTokens; i++) for (int i = 0; i < expectedValidTokens; i++)
{ {
Assert.IsNotNull(tokenizer.Next()); Assert.IsNotNull(tokenizer.Next());
...@@ -334,7 +334,7 @@ namespace Google.Protobuf ...@@ -334,7 +334,7 @@ namespace Google.Protobuf
[Test] [Test]
public void NextAfterEndDocumentThrows() public void NextAfterEndDocumentThrows()
{ {
var tokenizer = new JsonTokenizer(new StringReader("null")); var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null"));
Assert.AreEqual(JsonToken.Null, tokenizer.Next()); Assert.AreEqual(JsonToken.Null, tokenizer.Next());
Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
Assert.Throws<InvalidOperationException>(() => tokenizer.Next()); Assert.Throws<InvalidOperationException>(() => tokenizer.Next());
...@@ -343,7 +343,7 @@ namespace Google.Protobuf ...@@ -343,7 +343,7 @@ namespace Google.Protobuf
[Test] [Test]
public void CanPushBackEndDocument() public void CanPushBackEndDocument()
{ {
var tokenizer = new JsonTokenizer(new StringReader("null")); var tokenizer = JsonTokenizer.FromTextReader(new StringReader("null"));
Assert.AreEqual(JsonToken.Null, tokenizer.Next()); Assert.AreEqual(JsonToken.Null, tokenizer.Next());
Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next()); Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
tokenizer.PushBack(JsonToken.EndDocument); tokenizer.PushBack(JsonToken.EndDocument);
...@@ -373,7 +373,7 @@ namespace Google.Protobuf ...@@ -373,7 +373,7 @@ namespace Google.Protobuf
private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens) private static void AssertTokensNoReplacement(string json, params JsonToken[] expectedTokens)
{ {
var reader = new StringReader(json); var reader = new StringReader(json);
var tokenizer = new JsonTokenizer(reader); var tokenizer = JsonTokenizer.FromTextReader(reader);
for (int i = 0; i < expectedTokens.Length; i++) for (int i = 0; i < expectedTokens.Length; i++)
{ {
var actualToken = tokenizer.Next(); var actualToken = tokenizer.Next();
...@@ -393,7 +393,7 @@ namespace Google.Protobuf ...@@ -393,7 +393,7 @@ namespace Google.Protobuf
private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens) private static void AssertThrowsAfter(string json, params JsonToken[] expectedTokens)
{ {
var reader = new StringReader(json); var reader = new StringReader(json);
var tokenizer = new JsonTokenizer(reader); var tokenizer = JsonTokenizer.FromTextReader(reader);
for (int i = 0; i < expectedTokens.Length; i++) for (int i = 0; i < expectedTokens.Length; i++)
{ {
var actualToken = tokenizer.Next(); var actualToken = tokenizer.Next();
......
...@@ -808,12 +808,17 @@ namespace Google.Protobuf ...@@ -808,12 +808,17 @@ namespace Google.Protobuf
/// </summary> /// </summary>
public sealed class Settings public sealed class Settings
{ {
private static readonly Settings defaultInstance = new Settings(false);
/// <summary> /// <summary>
/// Default settings, as used by <see cref="JsonFormatter.Default"/> /// Default settings, as used by <see cref="JsonFormatter.Default"/>
/// </summary> /// </summary>
public static Settings Default { get; } = new Settings(false); public static Settings Default { get; }
// Workaround for the Mono compiler complaining about XML comments not being on
// valid language elements.
static Settings()
{
Default = new Settings(false);
}
/// <summary> /// <summary>
/// Whether fields whose values are the default for the field type (e.g. 0 for integers) /// Whether fields whose values are the default for the field type (e.g. 0 for integers)
......
This diff is collapsed.
...@@ -47,32 +47,38 @@ namespace Google.Protobuf ...@@ -47,32 +47,38 @@ namespace Google.Protobuf
/// between values. It validates the token stream as it goes - so callers can assume that the /// between values. It validates the token stream as it goes - so callers can assume that the
/// tokens it produces are appropriate. For example, it would never produce "start object, end array." /// tokens it produces are appropriate. For example, it would never produce "start object, end array."
/// </para> /// </para>
/// <para>Implementation details: the base class handles single token push-back and </para>
/// <para>Not thread-safe.</para> /// <para>Not thread-safe.</para>
/// </remarks> /// </remarks>
internal sealed class JsonTokenizer internal abstract class JsonTokenizer
{ {
// The set of states in which a value is valid next token.
private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument;
private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>();
private readonly PushBackReader reader;
private JsonToken bufferedToken; private JsonToken bufferedToken;
private State state;
private int objectDepth = 0;
/// <summary> /// <summary>
/// Returns the depth of the stack, purely in objects (not collections). /// Creates a tokenizer that reads from the given text reader.
/// Informally, this is the number of remaining unclosed '{' characters we have.
/// </summary> /// </summary>
internal int ObjectDepth { get { return objectDepth; } } internal static JsonTokenizer FromTextReader(TextReader reader)
{
return new JsonTextTokenizer(reader);
}
internal JsonTokenizer(TextReader reader) /// <summary>
/// Creates a tokenizer that first replays the given list of tokens, then continues reading
/// from another tokenizer. Note that if the returned tokenizer is "pushed back", that does not push back
/// on the continuation tokenizer, or vice versa. Care should be taken when using this method - it was
/// created for the sake of Any parsing.
/// </summary>
internal static JsonTokenizer FromReplayedTokens(IList<JsonToken> tokens, JsonTokenizer continuation)
{ {
this.reader = new PushBackReader(reader); return new JsonReplayTokenizer(tokens, continuation);
state = State.StartOfDocument;
containerStack.Push(ContainerType.Document);
} }
/// <summary>
/// Returns the depth of the stack, purely in objects (not collections).
/// Informally, this is the number of remaining unclosed '{' characters we have.
/// </summary>
internal int ObjectDepth { get; private set; }
// TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous // TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous
// token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack). // token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack).
internal void PushBack(JsonToken token) internal void PushBack(JsonToken token)
...@@ -84,11 +90,11 @@ namespace Google.Protobuf ...@@ -84,11 +90,11 @@ namespace Google.Protobuf
bufferedToken = token; bufferedToken = token;
if (token.Type == JsonToken.TokenType.StartObject) if (token.Type == JsonToken.TokenType.StartObject)
{ {
objectDepth--; ObjectDepth--;
} }
else if (token.Type == JsonToken.TokenType.EndObject) else if (token.Type == JsonToken.TokenType.EndObject)
{ {
objectDepth++; ObjectDepth++;
} }
} }
...@@ -96,32 +102,95 @@ namespace Google.Protobuf ...@@ -96,32 +102,95 @@ namespace Google.Protobuf
/// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream, /// Returns the next JSON token in the stream. An EndDocument token is returned to indicate the end of the stream,
/// after which point <c>Next()</c> should not be called again. /// after which point <c>Next()</c> should not be called again.
/// </summary> /// </summary>
/// <remarks> /// <remarks>This implementation provides single-token buffering, and calls <see cref="NextImpl"/> if there is no buffered token.</remarks>
/// This method essentially just loops through characters skipping whitespace, validating and
/// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon)
/// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point
/// it returns the token. Although the method is large, it would be relatively hard to break down further... most
/// of it is the large switch statement, which sometimes returns and sometimes doesn't.
/// </remarks>
/// <returns>The next token in the stream. This is never null.</returns> /// <returns>The next token in the stream. This is never null.</returns>
/// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception> /// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
/// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception> /// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
internal JsonToken Next() internal JsonToken Next()
{ {
JsonToken tokenToReturn;
if (bufferedToken != null) if (bufferedToken != null)
{ {
var ret = bufferedToken; tokenToReturn = bufferedToken;
bufferedToken = null; bufferedToken = null;
if (ret.Type == JsonToken.TokenType.StartObject) }
else
{ {
objectDepth++; tokenToReturn = NextImpl();
} }
else if (ret.Type == JsonToken.TokenType.EndObject) if (tokenToReturn.Type == JsonToken.TokenType.StartObject)
{ {
objectDepth--; ObjectDepth++;
} }
return ret; else if (tokenToReturn.Type == JsonToken.TokenType.EndObject)
{
ObjectDepth--;
}
return tokenToReturn;
} }
/// <summary>
/// Returns the next JSON token in the stream, when requested by the base class. (The <see cref="Next"/> method delegates
/// to this if it doesn't have a buffered token.)
/// </summary>
/// <exception cref="InvalidOperationException">This method is called after an EndDocument token has been returned</exception>
/// <exception cref="InvalidJsonException">The input text does not comply with RFC 7159</exception>
protected abstract JsonToken NextImpl();
/// <summary>
/// Tokenizer which first exhausts a list of tokens, then consults another tokenizer.
/// </summary>
private class JsonReplayTokenizer : JsonTokenizer
{
private readonly IList<JsonToken> tokens;
private readonly JsonTokenizer nextTokenizer;
private int nextTokenIndex;
internal JsonReplayTokenizer(IList<JsonToken> tokens, JsonTokenizer nextTokenizer)
{
this.tokens = tokens;
this.nextTokenizer = nextTokenizer;
}
// FIXME: Object depth not maintained...
protected override JsonToken NextImpl()
{
if (nextTokenIndex >= tokens.Count)
{
return nextTokenizer.Next();
}
return tokens[nextTokenIndex++];
}
}
/// <summary>
/// Tokenizer which does all the *real* work of parsing JSON.
/// </summary>
private sealed class JsonTextTokenizer : JsonTokenizer
{
// The set of states in which a value is valid next token.
private static readonly State ValueStates = State.ArrayStart | State.ArrayAfterComma | State.ObjectAfterColon | State.StartOfDocument;
private readonly Stack<ContainerType> containerStack = new Stack<ContainerType>();
private readonly PushBackReader reader;
private State state;
internal JsonTextTokenizer(TextReader reader)
{
this.reader = new PushBackReader(reader);
state = State.StartOfDocument;
containerStack.Push(ContainerType.Document);
}
/// <remarks>
/// This method essentially just loops through characters skipping whitespace, validating and
/// changing state (e.g. from ObjectBeforeColon to ObjectAfterColon)
/// until it reaches something which will be a genuine token (e.g. a start object, or a value) at which point
/// it returns the token. Although the method is large, it would be relatively hard to break down further... most
/// of it is the large switch statement, which sometimes returns and sometimes doesn't.
/// </remarks>
protected override JsonToken NextImpl()
{
if (state == State.ReaderExhausted) if (state == State.ReaderExhausted)
{ {
throw new InvalidOperationException("Next() called after end of document"); throw new InvalidOperationException("Next() called after end of document");
...@@ -167,12 +236,10 @@ namespace Google.Protobuf ...@@ -167,12 +236,10 @@ namespace Google.Protobuf
ValidateState(ValueStates, "Invalid state to read an open brace: "); ValidateState(ValueStates, "Invalid state to read an open brace: ");
state = State.ObjectStart; state = State.ObjectStart;
containerStack.Push(ContainerType.Object); containerStack.Push(ContainerType.Object);
objectDepth++;
return JsonToken.StartObject; return JsonToken.StartObject;
case '}': case '}':
ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: "); ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: ");
PopContainer(); PopContainer();
objectDepth--;
return JsonToken.EndObject; return JsonToken.EndObject;
case '[': case '[':
ValidateState(ValueStates, "Invalid state to read an open square bracket: "); ValidateState(ValueStates, "Invalid state to read an open square bracket: ");
...@@ -667,4 +734,5 @@ namespace Google.Protobuf ...@@ -667,4 +734,5 @@ namespace Google.Protobuf
} }
} }
} }
}
} }
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment