Commit 30bbbe99 authored by Thomas Van Lenten's avatar Thomas Van Lenten Committed by GitHub

Merge pull request #1934 from thomasvl/objc_strings_with_null

Never use strlen on utf8 runs so null characters work.
parents 237f321e 1a6c1d09
...@@ -144,22 +144,6 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state, ...@@ -144,22 +144,6 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state,
GPBWriteRawByte(state, (int32_t)(value >> 56) & 0xFF); GPBWriteRawByte(state, (int32_t)(value >> 56) & 0xFF);
} }
#if defined(DEBUG) && DEBUG && !defined(NS_BLOCK_ASSERTIONS)
+ (void)load {
// This test exists to verify that CFStrings with embedded NULLs will work
// for us. If this Assert fails, all code below that depends on
// CFStringGetCStringPtr will NOT work properly on strings that contain
// embedded NULLs, and we do get that in some protobufs.
// Note that this will not be compiled in release.
// We didn't feel that just keeping it in a unit test was sufficient because
// the Protobuf unit tests are only run when somebody is actually working
// on protobufs.
CFStringRef zeroTest = CFSTR("Test\0String");
const char *cString = CFStringGetCStringPtr(zeroTest, kCFStringEncodingUTF8);
NSAssert(cString == NULL, @"Serious Error");
}
#endif // DEBUG && !defined(NS_BLOCK_ASSERTIONS)
- (void)dealloc { - (void)dealloc {
[self flush]; [self flush];
[state_.output close]; [state_.output close];
...@@ -282,19 +266,15 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state, ...@@ -282,19 +266,15 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state,
} }
- (void)writeStringNoTag:(const NSString *)value { - (void)writeStringNoTag:(const NSString *)value {
// If you are concerned about embedded NULLs see the test in size_t length = [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
// +load above.
const char *quickString =
CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
size_t length = (quickString != NULL)
? strlen(quickString)
: [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
GPBWriteRawVarint32(&state_, (int32_t)length); GPBWriteRawVarint32(&state_, (int32_t)length);
if (length == 0) { if (length == 0) {
return; return;
} }
const char *quickString =
CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
// Fast path: Most strings are short, if the buffer already has space, // Fast path: Most strings are short, if the buffer already has space,
// add to it directly. // add to it directly.
NSUInteger bufferBytesLeft = state_.size - state_.position; NSUInteger bufferBytesLeft = state_.size - state_.position;
...@@ -1038,14 +1018,7 @@ size_t GPBComputeBoolSizeNoTag(BOOL value) { ...@@ -1038,14 +1018,7 @@ size_t GPBComputeBoolSizeNoTag(BOOL value) {
} }
size_t GPBComputeStringSizeNoTag(NSString *value) { size_t GPBComputeStringSizeNoTag(NSString *value) {
// If you are concerned about embedded NULLs see the test in NSUInteger length = [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
// +load above.
const char *quickString =
CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8);
NSUInteger length =
(quickString != NULL)
? strlen(quickString)
: [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding];
return GPBComputeRawVarint32SizeForInteger(length) + length; return GPBComputeRawVarint32SizeForInteger(length) + length;
} }
......
...@@ -1052,7 +1052,15 @@ static void AppendStringEscaped(NSString *toPrint, NSMutableString *destStr) { ...@@ -1052,7 +1052,15 @@ static void AppendStringEscaped(NSString *toPrint, NSMutableString *destStr) {
case '\'': [destStr appendString:@"\\\'"]; break; case '\'': [destStr appendString:@"\\\'"]; break;
case '\\': [destStr appendString:@"\\\\"]; break; case '\\': [destStr appendString:@"\\\\"]; break;
default: default:
[destStr appendFormat:@"%C", aChar]; // This differs slightly from the C++ code in that the C++ doesn't
// generate UTF8; it looks at the string in UTF8, but escapes every
// byte > 0x7E.
if (aChar < 0x20) {
[destStr appendFormat:@"\\%d%d%d",
(aChar / 64), ((aChar % 64) / 8), (aChar % 8)];
} else {
[destStr appendFormat:@"%C", aChar];
}
break; break;
} }
} }
......
...@@ -193,6 +193,32 @@ ...@@ -193,6 +193,32 @@
} }
} }
- (void)assertWriteStringNoTag:(NSData*)data
value:(NSString *)value
context:(NSString *)contextMessage {
NSOutputStream* rawOutput = [NSOutputStream outputStreamToMemory];
GPBCodedOutputStream* output =
[GPBCodedOutputStream streamWithOutputStream:rawOutput];
[output writeStringNoTag:value];
[output flush];
NSData* actual =
[rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
XCTAssertEqualObjects(data, actual, @"%@", contextMessage);
// Try different block sizes.
for (int blockSize = 1; blockSize <= 16; blockSize *= 2) {
rawOutput = [NSOutputStream outputStreamToMemory];
output = [GPBCodedOutputStream streamWithOutputStream:rawOutput
bufferSize:blockSize];
[output writeStringNoTag:value];
[output flush];
actual = [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey];
XCTAssertEqualObjects(data, actual, @"%@", contextMessage);
}
}
- (void)testWriteVarint1 { - (void)testWriteVarint1 {
[self assertWriteVarint:bytes(0x00) value:0]; [self assertWriteVarint:bytes(0x00) value:0];
} }
...@@ -337,4 +363,64 @@ ...@@ -337,4 +363,64 @@
XCTAssertEqualObjects(rawBytes, goldenData); XCTAssertEqualObjects(rawBytes, goldenData);
} }
- (void)testCFStringGetCStringPtrAndStringsWithNullChars {
// This test exists to verify that CFStrings with embedded NULLs still expose
// their raw buffer if they are backed by UTF8 storage. If this fails, the
// quick/direct access paths in GPBCodedOutputStream that depend on
// CFStringGetCStringPtr need to be re-evalutated (maybe just removed).
// And yes, we do get NULLs in strings from some servers.
char zeroTest[] = "\0Test\0String";
// Note: there is a \0 at the end of this since it is a c-string.
NSString *asNSString = [[NSString alloc] initWithBytes:zeroTest
length:sizeof(zeroTest)
encoding:NSUTF8StringEncoding];
const char *cString =
CFStringGetCStringPtr((CFStringRef)asNSString, kCFStringEncodingUTF8);
XCTAssertTrue(cString != NULL);
// Again, if the above assert fails, then it means NSString no longer exposes
// the raw utf8 storage of a string created from utf8 input, so the code using
// CFStringGetCStringPtr in GPBCodedOutputStream will still work (it will take
// a different code path); but the optimizations for when
// CFStringGetCStringPtr does work could possibly go away.
XCTAssertEqual(sizeof(zeroTest),
[asNSString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]);
XCTAssertTrue(0 == memcmp(cString, zeroTest, sizeof(zeroTest)));
[asNSString release];
}
- (void)testWriteStringsWithZeroChar {
// Unicode allows `\0` as a character, and NSString is a class cluster, so
// there are a few different classes that could end up beind a given string.
// Historically, we've seen differences based on constant strings in code and
// strings built via the NSString apis. So this round trips them to ensure
// they are acting as expected.
NSArray<NSString *> *strs = @[
@"\0at start",
@"in\0middle",
@"at end\0",
];
int i = 0;
for (NSString *str in strs) {
NSData *asUTF8 = [str dataUsingEncoding:NSUTF8StringEncoding];
NSMutableData *expected = [NSMutableData data];
uint8_t lengthByte = (uint8_t)asUTF8.length;
[expected appendBytes:&lengthByte length:1];
[expected appendData:asUTF8];
NSString *context = [NSString stringWithFormat:@"Loop %d - Literal", i];
[self assertWriteStringNoTag:expected value:str context:context];
// Force a new string to be built which gets a different class from the
// NSString class cluster than the literal did.
NSString *str2 = [NSString stringWithFormat:@"%@", str];
context = [NSString stringWithFormat:@"Loop %d - Built", i];
[self assertWriteStringNoTag:expected value:str2 context:context];
++i;
}
}
@end @end
Markdown is supported
0% or
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment