Skip to content

Commit

Permalink
[ObjC] Update MessageSet Parsing.
Browse files Browse the repository at this point in the history
- Follow upb and only accept the first value for `type_id` and `message`
- Reflow some of the logic to hopefully make things a little easier to follow/clear.
- Validate some more assertion about things the extensions for a MessageSet.

PiperOrigin-RevId: 652545240
  • Loading branch information
thomasvl committed Jul 24, 2024
1 parent 3aa491c commit 5fa8cd9
Show file tree
Hide file tree
Showing 5 changed files with 207 additions and 79 deletions.
16 changes: 9 additions & 7 deletions objectivec/GPBDescriptor.m
Original file line number Diff line number Diff line change
Expand Up @@ -1146,13 +1146,15 @@ - (instancetype)initWithExtensionDescription:(GPBExtensionDescription *)desc
#if defined(DEBUG) && DEBUG && !defined(NS_BLOCK_ASSERTIONS)
NSAssert(usesClassRefs, @"Internal error: all extensions should have class refs");

// This is also checked by the generator.
// If the extension is a MessageSet extension, then it must be a message field.
NSAssert(
((desc->options & GPBExtensionSetWireFormat) == 0) || desc->dataType == GPBDataTypeMessage,
@"Internal error: If a MessageSet extension is set, the data type must be a message.");
// NOTE: Could also check that the exteneded class is a MessageSet, but that would force the ObjC
// runtime to start up that class and that isn't desirable here.
// These are also checked by the generator.
if ((desc->options & GPBExtensionSetWireFormat) != 0) {
NSAssert(desc->dataType == GPBDataTypeMessage,
@"Internal error: If a MessageSet extension is set, the data type must be a message.");
NSAssert((desc->options & GPBExtensionRepeated) == 0,
@"Internal Error: MessageSet extension can't be repeated.");
// NOTE: Could also check that the exteneded class is a MessageSet, but that would force the
// ObjC runtime to start up that class and that isn't desirable here.
}
#endif

if ((self = [super init])) {
Expand Down
164 changes: 99 additions & 65 deletions objectivec/GPBMessage.m
Original file line number Diff line number Diff line change
Expand Up @@ -742,16 +742,12 @@ static void DecodeSingleValueFromInputStream(GPBExtensionDescriptor *extension,
message:targetMessage
extensionRegistry:extensionRegistry];
} else {
// description->dataType == GPBDataTypeMessage
if (GPBExtensionIsWireFormat(description)) {
// For MessageSet fields the message length will have already been
// read.
[targetMessage mergeFromCodedInputStream:input
extensionRegistry:extensionRegistry
endingTag:0];
} else {
[input readMessage:targetMessage extensionRegistry:extensionRegistry];
}
// description->dataType == GPBDataTypeMessage
#if defined(DEBUG) && DEBUG && !defined(NS_BLOCK_ASSERTIONS)
NSCAssert(!GPBExtensionIsWireFormat(description),
@"Internal error: got a MessageSet extension when not expected.");
#endif
[input readMessage:targetMessage extensionRegistry:extensionRegistry];
}
// Nothing to add below since the caller provided the message (and added it).
nsValue = nil;
Expand Down Expand Up @@ -2183,81 +2179,85 @@ - (void)parseMessageSet:(GPBCodedInputStream *)input
extensionRegistry:(id<GPBExtensionRegistry>)extensionRegistry {
uint32_t typeId = 0;
NSData *rawBytes = nil;
GPBExtensionDescriptor *extension = nil;
GPBCodedInputStreamState *state = &input->state_;
BOOL gotType = NO;
BOOL gotBytes = NO;
while (true) {
uint32_t tag = GPBCodedInputStreamReadTag(state);
if (tag == 0) {
if (tag == GPBWireFormatMessageSetItemEndTag || tag == 0) {
break;
}

if (tag == GPBWireFormatMessageSetTypeIdTag) {
typeId = GPBCodedInputStreamReadUInt32(state);
if (typeId != 0) {
extension = [extensionRegistry extensionForDescriptor:[self descriptor] fieldNumber:typeId];
uint32_t tmp = GPBCodedInputStreamReadUInt32(state);
// Spec says only use the first value.
if (!gotType) {
gotType = YES;
typeId = tmp;
}
} else if (tag == GPBWireFormatMessageSetMessageTag) {
rawBytes = [GPBCodedInputStreamReadRetainedBytesNoCopy(state) autorelease];
if (gotBytes) {
// Skip over the payload instead of collecting it.
[input skipField:tag];
} else {
rawBytes = [GPBCodedInputStreamReadRetainedBytesNoCopy(state) autorelease];
gotBytes = YES;
}
} else {
// Don't capture unknowns within the message set impl group.
if (![input skipField:tag]) {
break;
}
}
}

[input checkLastTagWas:GPBWireFormatMessageSetItemEndTag];
// If we get here because of end of input (tag zero) or the wrong end tag (within the skipField:),
// this will error.
GPBCodedInputStreamCheckLastTagWas(state, GPBWireFormatMessageSetItemEndTag);

if (rawBytes != nil && typeId != 0) {
if (extension != nil) {
GPBCodedInputStream *newInput = [[GPBCodedInputStream alloc] initWithData:rawBytes];
@try {
ExtensionMergeFromInputStream(extension, extension.packable, newInput, extensionRegistry,
self);
} @finally {
[newInput release];
}
} else {
GPBUnknownFieldSet *unknownFields = GetOrMakeUnknownFields(self);
// rawBytes was created via a NoCopy, so it can be reusing a
// subrange of another NSData that might go out of scope as things
// unwind, so a copy is needed to ensure what is saved in the
// unknown fields stays valid.
NSData *cloned = [NSData dataWithData:rawBytes];
[unknownFields mergeMessageSetMessage:typeId data:cloned];
}
if (!gotType || !gotBytes) {
// upb_Decoder_DecodeMessageSetItem does't keep this partial as an unknown field, it just drops
// it, so do the same thing.
return;
}
}

- (void)parseUnknownField:(GPBCodedInputStream *)input
extensionRegistry:(id<GPBExtensionRegistry>)extensionRegistry
tag:(uint32_t)tag {
int32_t fieldNumber = GPBWireFormatGetTagFieldNumber(tag);
GPBDescriptor *descriptor = [self descriptor];
GPBExtensionDescriptor *extension = [extensionRegistry extensionForDescriptor:descriptor
fieldNumber:fieldNumber];
if (extension == nil) {
if (descriptor.wireFormat && GPBWireFormatMessageSetItemTag == tag) {
[self parseMessageSet:input extensionRegistry:extensionRegistry];
return;
GPBExtensionDescriptor *extension = [extensionRegistry extensionForDescriptor:[self descriptor]
fieldNumber:typeId];
if (extension) {
#if defined(DEBUG) && DEBUG && !defined(NS_BLOCK_ASSERTIONS)
NSAssert(extension.dataType == GPBDataTypeMessage,
@"Internal Error: MessageSet extension must be a message field.");
NSAssert(GPBExtensionIsWireFormat(extension->description_),
@"Internal Error: MessageSet extension must have message_set_wire_format set.");
NSAssert(!GPBExtensionIsRepeated(extension->description_),
@"Internal Error: MessageSet extension can't be repeated.");
#endif
// Look up the existing one to merge to or create a new one.
GPBMessage *targetMessage = [self getExistingExtension:extension];
if (!targetMessage) {
GPBDescriptor *descriptor = [extension.msgClass descriptor];
targetMessage = [[descriptor.messageClass alloc] init];
[self setExtension:extension value:targetMessage];
[targetMessage release];
}
GPBCodedInputStream *newInput = [[GPBCodedInputStream alloc] initWithData:rawBytes];
@try {
[targetMessage mergeFromCodedInputStream:newInput
extensionRegistry:extensionRegistry
endingTag:0];
} @finally {
[newInput release];
}
} else {
GPBWireFormat wireType = GPBWireFormatGetTagWireType(tag);
if (extension.wireType == wireType) {
ExtensionMergeFromInputStream(extension, extension.packable, input, extensionRegistry, self);
return;
}
// Primitive, repeated types can be packed on unpacked on the wire, and are
// parsed either way.
if ([extension isRepeated] && !GPBDataTypeIsObject(extension->description_->dataType) &&
(extension.alternateWireType == wireType)) {
ExtensionMergeFromInputStream(extension, !extension.packable, input, extensionRegistry, self);
return;
}
}
GPBUnknownFieldSet *unknownFields = GetOrMakeUnknownFields(self);
if (![unknownFields mergeFieldFrom:tag input:input]) {
[NSException raise:NSInternalInconsistencyException
format:@"Internal Error: Unable to parse unknown field %u", tag];
// The extension isn't in the registry, but it was well formed, so the whole group structure
// get preserved as an unknown field.
GPBUnknownFieldSet *unknownFields = GetOrMakeUnknownFields(self);
// rawBytes was created via a NoCopy, so it can be reusing a
// subrange of another NSData that might go out of scope as things
// unwind, so a copy is needed to ensure what is saved in the
// unknown fields stays valid.
NSData *cloned = [NSData dataWithData:rawBytes];
[unknownFields mergeMessageSetMessage:typeId data:cloned];
}
}

Expand Down Expand Up @@ -2475,6 +2475,7 @@ - (void)mergeFromCodedInputStream:(GPBCodedInputStream *)input
uint32_t tag = 0;
NSUInteger startingIndex = 0;
NSArray *fields = descriptor->fields_;
BOOL isMessageSetWireFormat = descriptor.isWireFormat;
NSUInteger numFields = fields.count;
while (YES) {
BOOL merged = NO;
Expand Down Expand Up @@ -2551,7 +2552,40 @@ - (void)mergeFromCodedInputStream:(GPBCodedInputStream *)input

if (merged) continue; // On to the next tag

[self parseUnknownField:input extensionRegistry:extensionRegistry tag:tag];
if (isMessageSetWireFormat) {
if (GPBWireFormatMessageSetItemTag == tag) {
[self parseMessageSet:input extensionRegistry:extensionRegistry];
continue; // On to the next tag
}
} else {
// ObjC Runtime currently doesn't track if a message supported extensions, so the check is
// always done.
GPBExtensionDescriptor *extension =
[extensionRegistry extensionForDescriptor:descriptor
fieldNumber:GPBWireFormatGetTagFieldNumber(tag)];
if (extension) {
GPBWireFormat wireType = GPBWireFormatGetTagWireType(tag);
if (extension.wireType == wireType) {
ExtensionMergeFromInputStream(extension, extension.packable, input, extensionRegistry,
self);
continue; // On to the next tag
}
// Primitive, repeated types can be packed on unpacked on the wire, and are
// parsed either way.
if ([extension isRepeated] && !GPBDataTypeIsObject(extension->description_->dataType) &&
(extension.alternateWireType == wireType)) {
ExtensionMergeFromInputStream(extension, !extension.packable, input, extensionRegistry,
self);
continue; // On to the next tag
}
}
}

GPBUnknownFieldSet *unknownFields = GetOrMakeUnknownFields(self);
if (![unknownFields mergeFieldFrom:tag input:input]) {
[NSException raise:NSInternalInconsistencyException
format:@"Internal Error: Unable to parse unknown field %u", tag];
}
} // while(YES)
}

Expand Down
83 changes: 81 additions & 2 deletions objectivec/Tests/GPBWireFormatTests.m
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,12 @@
// license that can be found in the LICENSE file or at
// https://developers.google.com/open-source/licenses/bsd

#import "GPBTestUtilities.h"

#import "GPBCodedInputStream.h"
#import "GPBMessage_PackagePrivate.h"
#import "GPBTestUtilities.h"
#import "GPBUnknownField.h"
#import "GPBUnknownField_PackagePrivate.h"
#import "GPBUnknownFields.h"
#import "objectivec/Tests/Unittest.pbobjc.h"
#import "objectivec/Tests/UnittestMset.pbobjc.h"

Expand Down Expand Up @@ -197,6 +198,84 @@ - (void)testParseMessageSet {
XCTAssertEqualObjects(unknownField.lengthDelimitedList[0], [NSData dataWithBytes:"bar" length:3]);
}

- (void)testParseMessageSet_FirstValueSticks {
MSetRawBreakableMessageSet* raw = [MSetRawBreakableMessageSet message];

{
MSetRawBreakableMessageSet_Item* item = [MSetRawBreakableMessageSet_Item message];

[item.typeIdArray addValue:[MSetMessageExtension1 messageSetExtension].fieldNumber];
MSetMessageExtension1* message1 = [MSetMessageExtension1 message];
message1.i = 123;
NSData* itemData = [message1 data];
[item.messageArray addObject:itemData];

[item.typeIdArray addValue:[MSetMessageExtension2 messageSetExtension].fieldNumber];
MSetMessageExtension2* message2 = [MSetMessageExtension2 message];
message2.str = @"foo";
itemData = [message2 data];
[item.messageArray addObject:itemData];

[raw.itemArray addObject:item];
}

NSData* data = [raw data];

// Parse as a MSetMessage and check the contents.
NSError* err = nil;
MSetMessage* messageSet = [MSetMessage parseFromData:data
extensionRegistry:[MSetUnittestMsetRoot extensionRegistry]
error:&err];
XCTAssertNotNil(messageSet);
XCTAssertNil(err);
XCTAssertTrue([messageSet hasExtension:[MSetMessageExtension1 messageSetExtension]]);
XCTAssertEqual([[messageSet getExtension:[MSetMessageExtension1 messageSetExtension]] i], 123);
XCTAssertFalse([messageSet hasExtension:[MSetMessageExtension2 messageSetExtension]]);
GPBUnknownFields* ufs = [[[GPBUnknownFields alloc] initFromMessage:messageSet] autorelease];
XCTAssertTrue(ufs.empty);
}

- (void)testParseMessageSet_PartialValuesDropped {
MSetRawBreakableMessageSet* raw = [MSetRawBreakableMessageSet message];

{
MSetRawBreakableMessageSet_Item* item = [MSetRawBreakableMessageSet_Item message];
[item.typeIdArray addValue:[MSetMessageExtension1 messageSetExtension].fieldNumber];
// No payload.
[raw.itemArray addObject:item];
}

{
MSetRawBreakableMessageSet_Item* item = [MSetRawBreakableMessageSet_Item message];
// No type ID.
MSetMessageExtension2* message = [MSetMessageExtension2 message];
message.str = @"foo";
NSData* itemData = [message data];
[item.messageArray addObject:itemData];
[raw.itemArray addObject:item];
}

{
MSetRawBreakableMessageSet_Item* item = [MSetRawBreakableMessageSet_Item message];
// Neither type ID nor payload.
[raw.itemArray addObject:item];
}

NSData* data = [raw data];

// Parse as a MSetMessage and check the contents.
NSError* err = nil;
MSetMessage* messageSet = [MSetMessage parseFromData:data
extensionRegistry:[MSetUnittestMsetRoot extensionRegistry]
error:&err];
XCTAssertNotNil(messageSet);
XCTAssertNil(err);
XCTAssertEqual([messageSet extensionsCurrentlySet].count,
(NSUInteger)0); // None because they were all partial and dropped.
GPBUnknownFields* ufs = [[[GPBUnknownFields alloc] initFromMessage:messageSet] autorelease];
XCTAssertTrue(ufs.empty);
}

- (void)assertFieldsInOrder:(NSData*)data {
GPBCodedInputStream* input = [GPBCodedInputStream streamWithData:data];
int32_t previousTag = 0;
Expand Down
10 changes: 10 additions & 0 deletions objectivec/Tests/unittest_mset.proto
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ option objc_class_prefix = "MSet";
// A message with message_set_wire_format.
message Message {
option message_set_wire_format = true;

extensions 4 to max;
}

Expand All @@ -40,3 +41,12 @@ message RawMessageSet {
required bytes message = 3;
}
}

// MessageSet wire format is equivalent to this but since the fields
// are repeated they can be left off or over present to testing.
message RawBreakableMessageSet {
repeated group Item = 1 {
repeated int32 type_id = 2;
repeated bytes message = 3;
}
}
13 changes: 8 additions & 5 deletions src/google/protobuf/compiler/objectivec/extension.cc
Original file line number Diff line number Diff line change
Expand Up @@ -39,11 +39,14 @@ ExtensionGenerator::ExtensionGenerator(
ABSL_CHECK(!descriptor->is_map())
<< "error: Extension is a map<>!"
<< " That used to be blocked by the compiler.";
ABSL_CHECK(
!descriptor->containing_type()->options().message_set_wire_format() ||
descriptor->type() == FieldDescriptor::TYPE_MESSAGE)
<< "error: Extension to a message_set_wire_format message and the type "
"wasn't a message!";
if (descriptor->containing_type()->options().message_set_wire_format()) {
ABSL_CHECK(descriptor->type() == FieldDescriptor::TYPE_MESSAGE)
<< "error: Extension to a message_set_wire_format message and the type "
"wasn't a message!";
ABSL_CHECK(!descriptor->is_repeated())
<< "error: Extension to a message_set_wire_format message should not "
"be repeated!";
}
}

void ExtensionGenerator::GenerateMembersHeader(io::Printer* printer) const {
Expand Down

0 comments on commit 5fa8cd9

Please sign in to comment.