diff --git a/objectivec/GPBCodedOutputStream.m b/objectivec/GPBCodedOutputStream.m index 63ba8068c7..251a159c29 100644 --- a/objectivec/GPBCodedOutputStream.m +++ b/objectivec/GPBCodedOutputStream.m @@ -144,22 +144,6 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state, GPBWriteRawByte(state, (int32_t)(value >> 56) & 0xFF); } -#if defined(DEBUG) && DEBUG && !defined(NS_BLOCK_ASSERTIONS) -+ (void)load { - // This test exists to verify that CFStrings with embedded NULLs will work - // for us. If this Assert fails, all code below that depends on - // CFStringGetCStringPtr will NOT work properly on strings that contain - // embedded NULLs, and we do get that in some protobufs. - // Note that this will not be compiled in release. - // We didn't feel that just keeping it in a unit test was sufficient because - // the Protobuf unit tests are only run when somebody is actually working - // on protobufs. - CFStringRef zeroTest = CFSTR("Test\0String"); - const char *cString = CFStringGetCStringPtr(zeroTest, kCFStringEncodingUTF8); - NSAssert(cString == NULL, @"Serious Error"); -} -#endif // DEBUG && !defined(NS_BLOCK_ASSERTIONS) - - (void)dealloc { [self flush]; [state_.output close]; @@ -282,19 +266,15 @@ static void GPBWriteRawLittleEndian64(GPBOutputBufferState *state, } - (void)writeStringNoTag:(const NSString *)value { - // If you are concerned about embedded NULLs see the test in - // +load above. - const char *quickString = - CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8); - size_t length = (quickString != NULL) - ? strlen(quickString) - : [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; + size_t length = [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; GPBWriteRawVarint32(&state_, (int32_t)length); - if (length == 0) { return; } + const char *quickString = + CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8); + // Fast path: Most strings are short, if the buffer already has space, // add to it directly. NSUInteger bufferBytesLeft = state_.size - state_.position; @@ -1038,14 +1018,7 @@ size_t GPBComputeBoolSizeNoTag(BOOL value) { } size_t GPBComputeStringSizeNoTag(NSString *value) { - // If you are concerned about embedded NULLs see the test in - // +load above. - const char *quickString = - CFStringGetCStringPtr((CFStringRef)value, kCFStringEncodingUTF8); - NSUInteger length = - (quickString != NULL) - ? strlen(quickString) - : [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; + NSUInteger length = [value lengthOfBytesUsingEncoding:NSUTF8StringEncoding]; return GPBComputeRawVarint32SizeForInteger(length) + length; } diff --git a/objectivec/GPBUtilities.m b/objectivec/GPBUtilities.m index ee84fb4505..ad876ed4fc 100644 --- a/objectivec/GPBUtilities.m +++ b/objectivec/GPBUtilities.m @@ -1052,7 +1052,15 @@ static void AppendStringEscaped(NSString *toPrint, NSMutableString *destStr) { case '\'': [destStr appendString:@"\\\'"]; break; case '\\': [destStr appendString:@"\\\\"]; break; default: - [destStr appendFormat:@"%C", aChar]; + // This differs slightly from the C++ code in that the C++ doesn't + // generate UTF8; it looks at the string in UTF8, but escapes every + // byte > 0x7E. + if (aChar < 0x20) { + [destStr appendFormat:@"\\%d%d%d", + (aChar / 64), ((aChar % 64) / 8), (aChar % 8)]; + } else { + [destStr appendFormat:@"%C", aChar]; + } break; } } diff --git a/objectivec/Tests/GPBCodedOuputStreamTests.m b/objectivec/Tests/GPBCodedOuputStreamTests.m index 0723b645f7..2ad326beb9 100644 --- a/objectivec/Tests/GPBCodedOuputStreamTests.m +++ b/objectivec/Tests/GPBCodedOuputStreamTests.m @@ -193,6 +193,32 @@ } } +- (void)assertWriteStringNoTag:(NSData*)data + value:(NSString *)value + context:(NSString *)contextMessage { + NSOutputStream* rawOutput = [NSOutputStream outputStreamToMemory]; + GPBCodedOutputStream* output = + [GPBCodedOutputStream streamWithOutputStream:rawOutput]; + [output writeStringNoTag:value]; + [output flush]; + + NSData* actual = + [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey]; + XCTAssertEqualObjects(data, actual, @"%@", contextMessage); + + // Try different block sizes. + for (int blockSize = 1; blockSize <= 16; blockSize *= 2) { + rawOutput = [NSOutputStream outputStreamToMemory]; + output = [GPBCodedOutputStream streamWithOutputStream:rawOutput + bufferSize:blockSize]; + [output writeStringNoTag:value]; + [output flush]; + + actual = [rawOutput propertyForKey:NSStreamDataWrittenToMemoryStreamKey]; + XCTAssertEqualObjects(data, actual, @"%@", contextMessage); + } +} + - (void)testWriteVarint1 { [self assertWriteVarint:bytes(0x00) value:0]; } @@ -337,4 +363,64 @@ XCTAssertEqualObjects(rawBytes, goldenData); } +- (void)testCFStringGetCStringPtrAndStringsWithNullChars { + // This test exists to verify that CFStrings with embedded NULLs still expose + // their raw buffer if they are backed by UTF8 storage. If this fails, the + // quick/direct access paths in GPBCodedOutputStream that depend on + // CFStringGetCStringPtr need to be re-evalutated (maybe just removed). + // And yes, we do get NULLs in strings from some servers. + + char zeroTest[] = "\0Test\0String"; + // Note: there is a \0 at the end of this since it is a c-string. + NSString *asNSString = [[NSString alloc] initWithBytes:zeroTest + length:sizeof(zeroTest) + encoding:NSUTF8StringEncoding]; + const char *cString = + CFStringGetCStringPtr((CFStringRef)asNSString, kCFStringEncodingUTF8); + XCTAssertTrue(cString != NULL); + // Again, if the above assert fails, then it means NSString no longer exposes + // the raw utf8 storage of a string created from utf8 input, so the code using + // CFStringGetCStringPtr in GPBCodedOutputStream will still work (it will take + // a different code path); but the optimizations for when + // CFStringGetCStringPtr does work could possibly go away. + + XCTAssertEqual(sizeof(zeroTest), + [asNSString lengthOfBytesUsingEncoding:NSUTF8StringEncoding]); + XCTAssertTrue(0 == memcmp(cString, zeroTest, sizeof(zeroTest))); + [asNSString release]; +} + +- (void)testWriteStringsWithZeroChar { + // Unicode allows `\0` as a character, and NSString is a class cluster, so + // there are a few different classes that could end up beind a given string. + // Historically, we've seen differences based on constant strings in code and + // strings built via the NSString apis. So this round trips them to ensure + // they are acting as expected. + + NSArray *strs = @[ + @"\0at start", + @"in\0middle", + @"at end\0", + ]; + int i = 0; + for (NSString *str in strs) { + NSData *asUTF8 = [str dataUsingEncoding:NSUTF8StringEncoding]; + NSMutableData *expected = [NSMutableData data]; + uint8_t lengthByte = (uint8_t)asUTF8.length; + [expected appendBytes:&lengthByte length:1]; + [expected appendData:asUTF8]; + + NSString *context = [NSString stringWithFormat:@"Loop %d - Literal", i]; + [self assertWriteStringNoTag:expected value:str context:context]; + + // Force a new string to be built which gets a different class from the + // NSString class cluster than the literal did. + NSString *str2 = [NSString stringWithFormat:@"%@", str]; + context = [NSString stringWithFormat:@"Loop %d - Built", i]; + [self assertWriteStringNoTag:expected value:str2 context:context]; + + ++i; + } +} + @end