Commit ffe25c76 authored by Jan Tattermusch's avatar Jan Tattermusch

Merge pull request #941 from jskeet/recursion-limit

Add recursion limit handling to JSON parsing.
parents 1470ced7 6fa17e75
...@@ -284,7 +284,7 @@ namespace Google.Protobuf ...@@ -284,7 +284,7 @@ namespace Google.Protobuf
Assert.Throws<InvalidProtocolBufferException>(() => input.ReadBytes()); Assert.Throws<InvalidProtocolBufferException>(() => input.ReadBytes());
} }
private static TestRecursiveMessage MakeRecursiveMessage(int depth) internal static TestRecursiveMessage MakeRecursiveMessage(int depth)
{ {
if (depth == 0) if (depth == 0)
{ {
...@@ -296,7 +296,7 @@ namespace Google.Protobuf ...@@ -296,7 +296,7 @@ namespace Google.Protobuf
} }
} }
private static void AssertMessageDepth(TestRecursiveMessage message, int depth) internal static void AssertMessageDepth(TestRecursiveMessage message, int depth)
{ {
if (depth == 0) if (depth == 0)
{ {
......
...@@ -723,5 +723,23 @@ namespace Google.Protobuf ...@@ -723,5 +723,23 @@ namespace Google.Protobuf
string json = "{} 10"; string json = "{} 10";
Assert.Throws<InvalidJsonException>(() => TestAllTypes.Parser.ParseJson(json)); Assert.Throws<InvalidJsonException>(() => TestAllTypes.Parser.ParseJson(json));
} }
/// <summary>
/// JSON equivalent to <see cref="CodedInputStreamTest.MaliciousRecursion"/>
/// </summary>
[Test]
public void MaliciousRecursion()
{
string data64 = CodedInputStreamTest.MakeRecursiveMessage(64).ToString();
string data65 = CodedInputStreamTest.MakeRecursiveMessage(65).ToString();
var parser64 = new JsonParser(new JsonParser.Settings(64));
CodedInputStreamTest.AssertMessageDepth(parser64.Parse<TestRecursiveMessage>(data64), 64);
Assert.Throws<InvalidProtocolBufferException>(() => parser64.Parse<TestRecursiveMessage>(data65));
var parser63 = new JsonParser(new JsonParser.Settings(63));
Assert.Throws<InvalidProtocolBufferException>(() => parser63.Parse<TestRecursiveMessage>(data64));
}
} }
} }
...@@ -81,6 +81,63 @@ namespace Google.Protobuf ...@@ -81,6 +81,63 @@ namespace Google.Protobuf
AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected)); AssertTokens("'\ud800\\udc00'", JsonToken.Value(expected));
} }
[Test]
public void ObjectDepth()
{
string json = "{ \"foo\": { \"x\": 1, \"y\": [ 0 ] } }";
var tokenizer = new JsonTokenizer(new StringReader(json));
// If we had more tests like this, I'd introduce a helper method... but for one test, it's not worth it.
Assert.AreEqual(0, tokenizer.ObjectDepth);
Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
Assert.AreEqual(1, tokenizer.ObjectDepth);
Assert.AreEqual(JsonToken.Name("foo"), tokenizer.Next());
Assert.AreEqual(1, tokenizer.ObjectDepth);
Assert.AreEqual(JsonToken.StartObject, tokenizer.Next());
Assert.AreEqual(2, tokenizer.ObjectDepth);
Assert.AreEqual(JsonToken.Name("x"), tokenizer.Next());
Assert.AreEqual(2, tokenizer.ObjectDepth);
Assert.AreEqual(JsonToken.Value(1), tokenizer.Next());
Assert.AreEqual(2, tokenizer.ObjectDepth);
Assert.AreEqual(JsonToken.Name("y"), tokenizer.Next());
Assert.AreEqual(2, tokenizer.ObjectDepth);
Assert.AreEqual(JsonToken.StartArray, tokenizer.Next());
Assert.AreEqual(2, tokenizer.ObjectDepth); // Depth hasn't changed in array
Assert.AreEqual(JsonToken.Value(0), tokenizer.Next());
Assert.AreEqual(2, tokenizer.ObjectDepth);
Assert.AreEqual(JsonToken.EndArray, tokenizer.Next());
Assert.AreEqual(2, tokenizer.ObjectDepth);
Assert.AreEqual(JsonToken.EndObject, tokenizer.Next());
Assert.AreEqual(1, tokenizer.ObjectDepth);
Assert.AreEqual(JsonToken.EndObject, tokenizer.Next());
Assert.AreEqual(0, tokenizer.ObjectDepth);
Assert.AreEqual(JsonToken.EndDocument, tokenizer.Next());
Assert.AreEqual(0, tokenizer.ObjectDepth);
}
[Test]
public void ObjectDepth_WithPushBack()
{
string json = "{}";
var tokenizer = new JsonTokenizer(new StringReader(json));
Assert.AreEqual(0, tokenizer.ObjectDepth);
var token = tokenizer.Next();
Assert.AreEqual(1, tokenizer.ObjectDepth);
// When we push back a "start object", we should effectively be back to the previous depth.
tokenizer.PushBack(token);
Assert.AreEqual(0, tokenizer.ObjectDepth);
// Read the same token again, and get back to depth 1
token = tokenizer.Next();
Assert.AreEqual(1, tokenizer.ObjectDepth);
// Now the same in reverse, with EndObject
token = tokenizer.Next();
Assert.AreEqual(0, tokenizer.ObjectDepth);
tokenizer.PushBack(token);
Assert.AreEqual(1, tokenizer.ObjectDepth);
tokenizer.Next();
Assert.AreEqual(0, tokenizer.ObjectDepth);
}
[Test] [Test]
[TestCase("embedded tab\t")] [TestCase("embedded tab\t")]
[TestCase("embedded CR\r")] [TestCase("embedded CR\r")]
......
...@@ -95,6 +95,13 @@ namespace Google.Protobuf ...@@ -95,6 +95,13 @@ namespace Google.Protobuf
"Use CodedInputStream.SetRecursionLimit() to increase the depth limit."); "Use CodedInputStream.SetRecursionLimit() to increase the depth limit.");
} }
internal static InvalidProtocolBufferException JsonRecursionLimitExceeded()
{
return new InvalidProtocolBufferException(
"Protocol message had too many levels of nesting. May be malicious. " +
"Use JsonParser.Settings to increase the depth limit.");
}
internal static InvalidProtocolBufferException SizeLimitExceeded() internal static InvalidProtocolBufferException SizeLimitExceeded()
{ {
return new InvalidProtocolBufferException( return new InvalidProtocolBufferException(
......
...@@ -66,13 +66,16 @@ namespace Google.Protobuf ...@@ -66,13 +66,16 @@ namespace Google.Protobuf
private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default); private static readonly JsonParser defaultInstance = new JsonParser(Settings.Default);
// TODO: Consider introducing a class containing parse state of the parser, tokenizer and depth. That would simplify these handlers
// and the signatures of various methods.
private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> private static readonly Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>> WellKnownTypeHandlers = new Dictionary<string, Action<JsonParser, IMessage, JsonTokenizer>>
{ {
{ Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) }, { Timestamp.Descriptor.FullName, (parser, message, tokenizer) => MergeTimestamp(message, tokenizer.Next()) },
{ Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) }, { Duration.Descriptor.FullName, (parser, message, tokenizer) => MergeDuration(message, tokenizer.Next()) },
{ Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) }, { Value.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStructValue(message, tokenizer) },
{ ListValue.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) }, { ListValue.Descriptor.FullName, (parser, message, tokenizer) =>
parser.MergeRepeatedField(message, message.Descriptor.Fields[ListValue.ValuesFieldNumber], tokenizer) },
{ Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) }, { Struct.Descriptor.FullName, (parser, message, tokenizer) => parser.MergeStruct(message, tokenizer) },
{ FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) }, { FieldMask.Descriptor.FullName, (parser, message, tokenizer) => MergeFieldMask(message, tokenizer.Next()) },
{ Int32Value.Descriptor.FullName, MergeWrapperField }, { Int32Value.Descriptor.FullName, MergeWrapperField },
...@@ -93,15 +96,11 @@ namespace Google.Protobuf ...@@ -93,15 +96,11 @@ namespace Google.Protobuf
} }
/// <summary> /// <summary>
/// Returns a formatter using the default settings. /// </summary> /// Returns a formatter using the default settings.
/// </summary>
public static JsonParser Default { get { return defaultInstance; } } public static JsonParser Default { get { return defaultInstance; } }
// Currently the settings are unused.
// TODO: When we've implemented Any (and the json spec is finalized), revisit whether they're
// needed at all.
#pragma warning disable 0414
private readonly Settings settings; private readonly Settings settings;
#pragma warning restore 0414
/// <summary> /// <summary>
/// Creates a new formatted with the given settings. /// Creates a new formatted with the given settings.
...@@ -147,6 +146,10 @@ namespace Google.Protobuf ...@@ -147,6 +146,10 @@ namespace Google.Protobuf
/// </summary> /// </summary>
private void Merge(IMessage message, JsonTokenizer tokenizer) private void Merge(IMessage message, JsonTokenizer tokenizer)
{ {
if (tokenizer.ObjectDepth > settings.RecursionLimit)
{
throw InvalidProtocolBufferException.JsonRecursionLimitExceeded();
}
if (message.Descriptor.IsWellKnownType) if (message.Descriptor.IsWellKnownType)
{ {
Action<JsonParser, IMessage, JsonTokenizer> handler; Action<JsonParser, IMessage, JsonTokenizer> handler;
...@@ -787,14 +790,13 @@ namespace Google.Protobuf ...@@ -787,14 +790,13 @@ namespace Google.Protobuf
#endregion #endregion
/// <summary> /// <summary>
/// Settings controlling JSON parsing. (Currently doesn't have any actual settings, but I suspect /// Settings controlling JSON parsing.
/// we'll want them for levels of strictness, descriptor pools for Any handling, etc.)
/// </summary> /// </summary>
public sealed class Settings public sealed class Settings
{ {
private static readonly Settings defaultInstance = new Settings(); private static readonly Settings defaultInstance = new Settings(CodedInputStream.DefaultRecursionLimit);
// TODO: Add recursion limit. private readonly int recursionLimit;
/// <summary> /// <summary>
/// Default settings, as used by <see cref="JsonParser.Default"/> /// Default settings, as used by <see cref="JsonParser.Default"/>
...@@ -802,10 +804,19 @@ namespace Google.Protobuf ...@@ -802,10 +804,19 @@ namespace Google.Protobuf
public static Settings Default { get { return defaultInstance; } } public static Settings Default { get { return defaultInstance; } }
/// <summary> /// <summary>
/// Creates a new <see cref="Settings"/> object. /// The maximum depth of messages to parse. Note that this limit only applies to parsing
/// messages, not collections - so a message within a collection within a message only counts as
/// depth 2, not 3.
/// </summary>
public int RecursionLimit { get { return recursionLimit; } }
/// <summary>
/// Creates a new <see cref="Settings"/> object with the specified recursion limit.
/// </summary> /// </summary>
public Settings() /// <param name="recursionLimit">The maximum depth of messages to parse</param>
public Settings(int recursionLimit)
{ {
this.recursionLimit = recursionLimit;
} }
} }
} }
......
...@@ -58,6 +58,13 @@ namespace Google.Protobuf ...@@ -58,6 +58,13 @@ namespace Google.Protobuf
private readonly PushBackReader reader; private readonly PushBackReader reader;
private JsonToken bufferedToken; private JsonToken bufferedToken;
private State state; private State state;
private int objectDepth = 0;
/// <summary>
/// Returns the depth of the stack, purely in objects (not collections).
/// Informally, this is the number of remaining unclosed '{' characters we have.
/// </summary>
internal int ObjectDepth { get { return objectDepth; } }
internal JsonTokenizer(TextReader reader) internal JsonTokenizer(TextReader reader)
{ {
...@@ -66,6 +73,8 @@ namespace Google.Protobuf ...@@ -66,6 +73,8 @@ namespace Google.Protobuf
containerStack.Push(ContainerType.Document); containerStack.Push(ContainerType.Document);
} }
// TODO: Why do we allow a different token to be pushed back? It might be better to always remember the previous
// token returned, and allow a parameterless Rewind() method (which could only be called once, just like the current PushBack).
internal void PushBack(JsonToken token) internal void PushBack(JsonToken token)
{ {
if (bufferedToken != null) if (bufferedToken != null)
...@@ -73,6 +82,14 @@ namespace Google.Protobuf ...@@ -73,6 +82,14 @@ namespace Google.Protobuf
throw new InvalidOperationException("Can't push back twice"); throw new InvalidOperationException("Can't push back twice");
} }
bufferedToken = token; bufferedToken = token;
if (token.Type == JsonToken.TokenType.StartObject)
{
objectDepth--;
}
else if (token.Type == JsonToken.TokenType.EndObject)
{
objectDepth++;
}
} }
/// <summary> /// <summary>
...@@ -95,6 +112,14 @@ namespace Google.Protobuf ...@@ -95,6 +112,14 @@ namespace Google.Protobuf
{ {
var ret = bufferedToken; var ret = bufferedToken;
bufferedToken = null; bufferedToken = null;
if (ret.Type == JsonToken.TokenType.StartObject)
{
objectDepth++;
}
else if (ret.Type == JsonToken.TokenType.EndObject)
{
objectDepth--;
}
return ret; return ret;
} }
if (state == State.ReaderExhausted) if (state == State.ReaderExhausted)
...@@ -142,10 +167,12 @@ namespace Google.Protobuf ...@@ -142,10 +167,12 @@ namespace Google.Protobuf
ValidateState(ValueStates, "Invalid state to read an open brace: "); ValidateState(ValueStates, "Invalid state to read an open brace: ");
state = State.ObjectStart; state = State.ObjectStart;
containerStack.Push(ContainerType.Object); containerStack.Push(ContainerType.Object);
objectDepth++;
return JsonToken.StartObject; return JsonToken.StartObject;
case '}': case '}':
ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: "); ValidateState(State.ObjectAfterProperty | State.ObjectStart, "Invalid state to read a close brace: ");
PopContainer(); PopContainer();
objectDepth--;
return JsonToken.EndObject; return JsonToken.EndObject;
case '[': case '[':
ValidateState(ValueStates, "Invalid state to read an open square bracket: "); ValidateState(ValueStates, "Invalid state to read an open square bracket: ");
......
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment