Commit e005be65 authored by Max Cai's avatar Max Cai

Add validation when parsing enum fields.

Invalid values from the wire are silently ignored.
Unlike full/lite, the invalid values are not stored into the
unknown fields, because there's no way to get them out from
Nano's unknown fields without a matching Extension.

Edited README and slightly moved it towards a standalone
section for Nano, independent of the Micro section.

Change-Id: I2c1eb07f4d6d8f3aea242b8ddd95b9c966f3f177
parent a793c09b
......@@ -409,33 +409,33 @@ still generated as integer constants in the message class.
Nano version
============================
Nano is even smaller than micro, especially in the number of generated
functions. It is like micro:
- No support for descriptors and reflection;
- Enum constants are integers with no protection against invalid
values set to enum fields.
Except:
- Setter/getter/hazzer/clearer functions are opt-in.
Nano is a special code generator and runtime library designed specially
for Android, and is very resource-friendly in both the amount of code
and the runtime overhead. An overview of Nano features:
- No descriptors or message builders.
- All messages are mutable; fields are public Java fields.
- For optional fields only, encapsulation behind setter/getter/hazzer/
clearer functions is opt-in, which provide proper 'has' state support.
- If not opted in, has state is not available. Serialization outputs
all fields not equal to their default. (See important implications
below.)
all fields not equal to their defaults (see important implications
below).
- Required fields are always serialized.
- Enum constants are integers; protection against invalid values only
when parsing from the wire.
- Enum constants can be generated into container interfaces bearing
the enum's name (so the referencing code is in Java style).
- CodedInputStreamMicro is renamed to CodedInputByteBufferNano and can
only take byte[] (not InputStream).
- Similar rename from CodedOutputStreamMicro to
CodedOutputByteBufferNano.
- Repeated fields are in arrays, not ArrayList or Vector.
- CodedInputByteBufferNano can only take byte[] (not InputStream).
- Similarly CodedOutputByteBufferNano can only write to byte[].
- Repeated fields are in arrays, not ArrayList or Vector. Null array
elements are allowed and silently ignored.
- Full support of serializing/deserializing repeated packed fields.
- Support of extensions.
- Unset messages/groups are null, not an immutable empty default
instance.
- Required fields are always serialized.
- toByteArray(...) and mergeFrom(...) are now static functions of
MessageNano.
- "bytes" are of java type byte[].
- The 'bytes' type translates to the Java type byte[].
IMPORTANT: If you have fields with defaults and opt out of accessors
......
......@@ -33,6 +33,8 @@ package com.google.protobuf;
import com.google.protobuf.nano.CodedInputByteBufferNano;
import com.google.protobuf.nano.EnumClassNanoMultiple;
import com.google.protobuf.nano.EnumClassNanos;
import com.google.protobuf.nano.EnumValidity;
import com.google.protobuf.nano.EnumValidityAccessors;
import com.google.protobuf.nano.Extensions;
import com.google.protobuf.nano.Extensions.AnotherMessage;
import com.google.protobuf.nano.Extensions.MessageWithGroup;
......@@ -2092,6 +2094,126 @@ public class NanoTest extends TestCase {
assertEquals(nestedMsg2.bb, newMsg.repeatedNestedMessage[2].bb);
}
/**
* Tests that invalid enum values from the wire are not accepted.
*/
public void testNanoEnumValidity() throws Exception {
final int invalid = 120;
final int alsoInvalid = 121;
EnumValidity.M m = new EnumValidity.M();
// Sanity check & baseline of the assertions for the first case below.
assertEquals(EnumValidity.E.default_, m.optionalE);
assertEquals(EnumValidity.E.BAZ, m.defaultE);
m.optionalE = invalid;
m.defaultE = invalid;
// E contains all valid values
m.repeatedE = new int[] {EnumValidity.E.FOO, EnumValidity.E.BAR};
m.packedE = new int[] {EnumValidity.E.FOO, EnumValidity.E.BAZ};
// E2 contains some invalid values
m.repeatedE2 = new int[] {invalid, EnumValidity.E.BAR, alsoInvalid};
m.packedE2 = new int[] {EnumValidity.E.FOO, invalid, alsoInvalid};
// E3 contains all invalid values
m.repeatedE3 = new int[] {invalid, invalid};
m.packedE3 = new int[] {alsoInvalid, alsoInvalid};
byte[] serialized = MessageNano.toByteArray(m);
// Sanity check that we do have all data in the byte array.
assertEquals(31, serialized.length);
// Test 1: tests that invalid values aren't included in the deserialized message.
EnumValidity.M deserialized = MessageNano.mergeFrom(new EnumValidity.M(), serialized);
assertEquals(EnumValidity.E.default_, deserialized.optionalE);
assertEquals(EnumValidity.E.BAZ, deserialized.defaultE);
assertTrue(Arrays.equals(
new int[] {EnumValidity.E.FOO, EnumValidity.E.BAR}, deserialized.repeatedE));
assertTrue(Arrays.equals(
new int[] {EnumValidity.E.FOO, EnumValidity.E.BAZ}, deserialized.packedE));
assertTrue(Arrays.equals(
new int[] {EnumValidity.E.BAR}, deserialized.repeatedE2));
assertTrue(Arrays.equals(
new int[] {EnumValidity.E.FOO}, deserialized.packedE2));
assertEquals(0, deserialized.repeatedE3.length);
assertEquals(0, deserialized.packedE3.length);
// Test 2: tests that invalid values do not override previous values in the field, including
// arrays, including pre-existing invalid values.
deserialized.optionalE = EnumValidity.E.BAR;
deserialized.defaultE = alsoInvalid;
deserialized.repeatedE = new int[] {EnumValidity.E.BAZ};
deserialized.packedE = new int[] {EnumValidity.E.BAZ, alsoInvalid};
deserialized.repeatedE2 = new int[] {invalid, alsoInvalid};
deserialized.packedE2 = null;
deserialized.repeatedE3 = null;
deserialized.packedE3 = new int[0];
MessageNano.mergeFrom(deserialized, serialized);
assertEquals(EnumValidity.E.BAR, deserialized.optionalE);
assertEquals(alsoInvalid, deserialized.defaultE);
assertTrue(Arrays.equals(
new int[] {EnumValidity.E.BAZ, /* + */ EnumValidity.E.FOO, EnumValidity.E.BAR},
deserialized.repeatedE));
assertTrue(Arrays.equals(
new int[] {EnumValidity.E.BAZ, alsoInvalid, /* + */ EnumValidity.E.FOO, EnumValidity.E.BAZ},
deserialized.packedE));
assertTrue(Arrays.equals(
new int[] {invalid, alsoInvalid, /* + */ EnumValidity.E.BAR},
deserialized.repeatedE2));
assertTrue(Arrays.equals(
new int[] {/* <null> + */ EnumValidity.E.FOO},
deserialized.packedE2));
assertNull(deserialized.repeatedE3); // null + all invalid == null
assertEquals(0, deserialized.packedE3.length); // empty + all invalid == empty
// Test 3: reading by alternative forms
EnumValidity.Alt alt = MessageNano.mergeFrom(new EnumValidity.Alt(), serialized);
assertEquals(EnumValidity.E.BAR, // last valid value in m.repeatedE2
alt.repeatedE2AsOptional);
assertTrue(Arrays.equals(new int[] {EnumValidity.E.FOO}, alt.packedE2AsNonPacked));
assertEquals(0, alt.nonPackedE3AsPacked.length);
}
/**
* Tests the same as {@link #testNanoEnumValidity()} with accessor style. Repeated fields are
* not re-tested here because they are not affected by the accessor style.
*/
public void testNanoEnumValidityAccessors() throws Exception {
final int invalid = 120;
final int alsoInvalid = 121;
EnumValidityAccessors.M m = new EnumValidityAccessors.M();
// Sanity check & baseline of the assertions for the first case below.
assertEquals(EnumValidityAccessors.default_, m.getOptionalE());
assertEquals(EnumValidityAccessors.BAZ, m.getDefaultE());
m.setOptionalE(invalid);
m.setDefaultE(invalid);
// Set repeatedE2 for Alt.repeatedE2AsOptional
m.repeatedE2 = new int[] {invalid, EnumValidityAccessors.BAR, alsoInvalid};
byte[] serialized = MessageNano.toByteArray(m);
// Sanity check that we do have all data in the byte array.
assertEquals(10, serialized.length);
// Test 1: tests that invalid values aren't included in the deserialized message.
EnumValidityAccessors.M deserialized =
MessageNano.mergeFrom(new EnumValidityAccessors.M(), serialized);
assertEquals(EnumValidityAccessors.default_, deserialized.getOptionalE());
assertEquals(EnumValidityAccessors.BAZ, deserialized.getDefaultE());
// Test 2: tests that invalid values do not override previous values in the field, including
// pre-existing invalid values.
deserialized.setOptionalE(EnumValidityAccessors.BAR);
deserialized.setDefaultE(alsoInvalid);
MessageNano.mergeFrom(deserialized, serialized);
assertEquals(EnumValidityAccessors.BAR, deserialized.getOptionalE());
assertEquals(alsoInvalid, deserialized.getDefaultE());
// Test 3: reading by alternative forms
EnumValidityAccessors.Alt alt =
MessageNano.mergeFrom(new EnumValidityAccessors.Alt(), serialized);
assertEquals(EnumValidityAccessors.BAR, // last valid value in m.repeatedE2
alt.getRepeatedE2AsOptional());
}
/**
* Tests that code generation correctly wraps a single message into its outer
* class. The class {@code SingleMessageNano} is imported from the outer
......
......@@ -71,9 +71,35 @@ void SetEnumVariables(const Params& params,
(*variables)["tag"] = SimpleItoa(internal::WireFormat::MakeTag(descriptor));
(*variables)["tag_size"] = SimpleItoa(
internal::WireFormat::TagSize(descriptor->number(), descriptor->type()));
(*variables)["non_packed_tag"] = SimpleItoa(
internal::WireFormatLite::MakeTag(descriptor->number(),
internal::WireFormat::WireTypeForFieldType(descriptor->type())));
(*variables)["message_name"] = descriptor->containing_type()->name();
}
void LoadEnumValues(const Params& params,
const EnumDescriptor* enum_descriptor, vector<string>* canonical_values) {
string enum_class_name = ClassName(params, enum_descriptor);
for (int i = 0; i < enum_descriptor->value_count(); i++) {
const EnumValueDescriptor* value = enum_descriptor->value(i);
const EnumValueDescriptor* canonical_value =
enum_descriptor->FindValueByNumber(value->number());
if (value == canonical_value) {
canonical_values->push_back(
enum_class_name + "." + RenameJavaKeywords(value->name()));
}
}
}
void PrintCaseLabels(
io::Printer* printer, const vector<string>& canonical_values) {
for (int i = 0; i < canonical_values.size(); i++) {
printer->Print(
" case $value$:\n",
"value", canonical_values[i]);
}
}
} // namespace
// ===================================================================
......@@ -82,6 +108,7 @@ EnumFieldGenerator::
EnumFieldGenerator(const FieldDescriptor* descriptor, const Params& params)
: FieldGenerator(params), descriptor_(descriptor) {
SetEnumVariables(params, descriptor, &variables_);
LoadEnumValues(params, descriptor->enum_type(), &canonical_values_);
}
EnumFieldGenerator::~EnumFieldGenerator() {}
......@@ -111,12 +138,21 @@ GenerateClearCode(io::Printer* printer) const {
void EnumFieldGenerator::
GenerateMergingCode(io::Printer* printer) const {
printer->Print(variables_,
"this.$name$ = input.readInt32();\n");
"int value = input.readInt32();\n"
"switch (value) {\n");
PrintCaseLabels(printer, canonical_values_);
printer->Print(variables_,
" this.$name$ = value;\n");
if (params_.generate_has()) {
printer->Print(variables_,
"has$capitalized_name$ = true;\n");
" has$capitalized_name$ = true;\n");
}
printer->Print(
" break;\n"
"}\n");
// No default case: in case of invalid value from the wire, preserve old
// field value. Also we are not storing the invalid value into the unknown
// fields, because there is no way to get the value out.
}
void EnumFieldGenerator::
......@@ -209,6 +245,7 @@ AccessorEnumFieldGenerator(const FieldDescriptor* descriptor,
const Params& params, int has_bit_index)
: FieldGenerator(params), descriptor_(descriptor) {
SetEnumVariables(params, descriptor, &variables_);
LoadEnumValues(params, descriptor->enum_type(), &canonical_values_);
SetBitOperationVariables("has", has_bit_index, &variables_);
}
......@@ -245,8 +282,17 @@ GenerateClearCode(io::Printer* printer) const {
void AccessorEnumFieldGenerator::
GenerateMergingCode(io::Printer* printer) const {
printer->Print(variables_,
"$name$_ = input.readInt32();\n"
"$set_has$;\n");
"int value = input.readInt32();\n"
"switch (value) {\n");
PrintCaseLabels(printer, canonical_values_);
printer->Print(variables_,
" $name$_ = value;\n"
" $set_has$;\n"
" break;\n"
"}\n");
// No default case: in case of invalid value from the wire, preserve old
// field value. Also we are not storing the invalid value into the unknown
// fields, because there is no way to get the value out.
}
void AccessorEnumFieldGenerator::
......@@ -287,6 +333,7 @@ RepeatedEnumFieldGenerator::
RepeatedEnumFieldGenerator(const FieldDescriptor* descriptor, const Params& params)
: FieldGenerator(params), descriptor_(descriptor) {
SetEnumVariables(params, descriptor, &variables_);
LoadEnumValues(params, descriptor->enum_type(), &canonical_values_);
}
RepeatedEnumFieldGenerator::~RepeatedEnumFieldGenerator() {}
......@@ -305,46 +352,82 @@ GenerateClearCode(io::Printer* printer) const {
void RepeatedEnumFieldGenerator::
GenerateMergingCode(io::Printer* printer) const {
// First, figure out the length of the array, then parse.
// First, figure out the maximum length of the array, then parse,
// and finally copy the valid values to the field.
printer->Print(variables_,
"int arrayLength = com.google.protobuf.nano.WireFormatNano\n"
" .getRepeatedFieldArrayLength(input, $tag$);\n"
"int i = this.$name$ == null ? 0 : this.$name$.length;\n"
"int[] newArray = new int[i + arrayLength];\n"
"if (i != 0) {\n"
" java.lang.System.arraycopy(this.$name$, 0, newArray, 0, i);\n"
"}\n"
"for (; i < newArray.length - 1; i++) {\n"
" newArray[i] = input.readInt32();\n"
" input.readTag();\n"
"int length = com.google.protobuf.nano.WireFormatNano\n"
" .getRepeatedFieldArrayLength(input, $non_packed_tag$);\n"
"int[] validValues = new int[length];\n"
"int validCount = 0;\n"
"for (int i = 0; i < length; i++) {\n"
" if (i != 0) { // tag for first value already consumed.\n"
" input.readTag();\n"
" }\n"
" int value = input.readInt32();\n"
" switch (value) {\n");
printer->Indent();
PrintCaseLabels(printer, canonical_values_);
printer->Outdent();
printer->Print(variables_,
" validValues[validCount++] = value;\n"
" break;\n"
" }\n"
"}\n"
"// Last one without readTag.\n"
"newArray[i] = input.readInt32();\n"
"this.$name$ = newArray;\n");
"if (validCount != 0) {\n"
" int i = this.$name$ == null ? 0 : this.$name$.length;\n"
" if (i == 0 && validCount == validValues.length) {\n"
" this.$name$ = validValues;\n"
" } else {\n"
" int[] newArray = new int[i + validCount];\n"
" if (i != 0) {\n"
" java.lang.System.arraycopy(this.$name$, 0, newArray, 0, i);\n"
" }\n"
" java.lang.System.arraycopy(validValues, 0, newArray, i, validCount);\n"
" this.$name$ = newArray;\n"
" }\n"
"}\n");
}
void RepeatedEnumFieldGenerator::
GenerateMergingCodeFromPacked(io::Printer* printer) const {
printer->Print(variables_,
"int length = input.readRawVarint32();\n"
"int limit = input.pushLimit(length);\n"
"int bytes = input.readRawVarint32();\n"
"int limit = input.pushLimit(bytes);\n"
"// First pass to compute array length.\n"
"int arrayLength = 0;\n"
"int startPos = input.getPosition();\n"
"while (input.getBytesUntilLimit() > 0) {\n"
" input.readInt32();\n"
" arrayLength++;\n"
"}\n"
"input.rewindToPosition(startPos);\n"
"int i = this.$name$ == null ? 0 : this.$name$.length;\n"
"int[] newArray = new int[i + arrayLength];\n"
"if (i != 0) {\n"
" java.lang.System.arraycopy(this.$name$, 0, newArray, 0, i);\n"
" switch (input.readInt32()) {\n");
printer->Indent();
PrintCaseLabels(printer, canonical_values_);
printer->Outdent();
printer->Print(variables_,
" arrayLength++;\n"
" break;\n"
" }\n"
"}\n"
"for (; i < newArray.length; i++) {\n"
" newArray[i] = input.readInt32();\n"
"if (arrayLength != 0) {\n"
" input.rewindToPosition(startPos);\n"
" int i = this.$name$ == null ? 0 : this.$name$.length;\n"
" int[] newArray = new int[i + arrayLength];\n"
" if (i != 0) {\n"
" java.lang.System.arraycopy(this.$name$, 0, newArray, 0, i);\n"
" }\n"
" while (input.getBytesUntilLimit() > 0) {\n"
" int value = input.readInt32();\n"
" switch (value) {\n");
printer->Indent();
printer->Indent();
PrintCaseLabels(printer, canonical_values_);
printer->Outdent();
printer->Outdent();
printer->Print(variables_,
" newArray[i++] = value;\n"
" break;\n"
" }\n"
" }\n"
" this.$name$ = newArray;\n"
"}\n"
"this.$name$ = newArray;\n"
"input.popLimit(limit);\n");
}
......
......@@ -37,6 +37,7 @@
#include <map>
#include <string>
#include <vector>
#include <google/protobuf/compiler/javanano/javanano_field.h>
namespace google {
......@@ -62,6 +63,7 @@ class EnumFieldGenerator : public FieldGenerator {
private:
const FieldDescriptor* descriptor_;
map<string, string> variables_;
vector<string> canonical_values_;
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(EnumFieldGenerator);
};
......@@ -84,6 +86,7 @@ class AccessorEnumFieldGenerator : public FieldGenerator {
private:
const FieldDescriptor* descriptor_;
map<string, string> variables_;
vector<string> canonical_values_;
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(AccessorEnumFieldGenerator);
};
......@@ -109,6 +112,7 @@ class RepeatedEnumFieldGenerator : public FieldGenerator {
const FieldDescriptor* descriptor_;
map<string, string> variables_;
vector<string> canonical_values_;
GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(RepeatedEnumFieldGenerator);
};
......
......@@ -412,7 +412,7 @@ string DefaultValue(const Params& params, const FieldDescriptor* field) {
case FieldDescriptor::CPPTYPE_ENUM:
return ClassName(params, field->enum_type()) + "." +
field->default_value_enum()->name();
RenameJavaKeywords(field->default_value_enum()->name());
case FieldDescriptor::CPPTYPE_MESSAGE:
return "null";
......
package protobuf_unittest;
option java_package = "com.google.protobuf.nano";
option java_outer_classname = "EnumValidity";
enum E {
default = 1; // test java keyword renaming
FOO = 2;
BAR = 3;
BAZ = 4;
}
message M {
optional E optional_e = 1;
optional E default_e = 2 [ default = BAZ ];
repeated E repeated_e = 3;
repeated E packed_e = 4 [ packed = true ];
repeated E repeated_e2 = 5;
repeated E packed_e2 = 6 [ packed = true ];
repeated E repeated_e3 = 7;
repeated E packed_e3 = 8 [ packed = true ];
}
message Alt {
optional E repeated_e2_as_optional = 5;
repeated E packed_e2_as_non_packed = 6;
repeated E non_packed_e3_as_packed = 7 [ packed = true ];
}
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment