From 8beb9705e495c57d55f6547c523d5817336ac35f Mon Sep 17 00:00:00 2001 From: Mike Kruskal Date: Wed, 12 Jun 2024 17:32:26 -0700 Subject: [PATCH] Fix delimited inheritance in all languages. This was previously fixed in C++ (https://github.com/protocolbuffers/protobuf/issues/16549), but not ported to other languages. Delimited field encoding can be inherited by fields where it's invalid, such as non-messages and maps. In these cases, the encoding should be ignored and length-prefixed should be used. PiperOrigin-RevId: 642792988 --- conformance/binary_json_conformance_suite.cc | 32 +++++++++++++++++++ .../test_messages_edition2023.proto | 30 ++++++++++------- .../Reflection/FieldDescriptor.cs | 5 +++ .../java/com/google/protobuf/Descriptors.java | 5 +-- python/google/protobuf/descriptor.py | 3 ++ python/google/protobuf/text_format.py | 5 +-- upb/reflection/field_def.c | 22 ++++++------- 7 files changed, 74 insertions(+), 28 deletions(-) diff --git a/conformance/binary_json_conformance_suite.cc b/conformance/binary_json_conformance_suite.cc index bd0f59b01216..1d2c29800608 100644 --- a/conformance/binary_json_conformance_suite.cc +++ b/conformance/binary_json_conformance_suite.cc @@ -156,6 +156,11 @@ std::string group(uint32_t fieldnum, std::string content) { tag(fieldnum, WireFormatLite::WIRETYPE_END_GROUP)); } +std::string len(uint32_t fieldnum, std::string content) { + return absl::StrCat(tag(fieldnum, WireFormatLite::WIRETYPE_LENGTH_DELIMITED), + delim(content)); +} + std::string GetDefaultValue(FieldDescriptor::Type type) { switch (type) { case FieldDescriptor::TYPE_INT32: @@ -364,6 +369,33 @@ void BinaryAndJsonConformanceSuite::RunDelimitedFieldTests() { TestAllTypesEdition2023 prototype; SetTypeUrl(GetTypeUrl(TestAllTypesEdition2023::GetDescriptor())); + RunValidProtobufTest( + absl::StrCat("ValidNonMessage"), REQUIRED, + field(1, WireFormatLite::WIRETYPE_VARINT, varint(99)), + R"pb(optional_int32: 99)pb"); + + RunValidProtobufTest( + absl::StrCat("ValidLengthPrefixedField"), REQUIRED, + len(18, field(1, WireFormatLite::WIRETYPE_VARINT, varint(99))), + R"pb(optional_nested_message { a: 99 })pb"); + + RunValidProtobufTest( + absl::StrCat("ValidMap.Integer"), REQUIRED, + len(56, + absl::StrCat(field(1, WireFormatLite::WIRETYPE_VARINT, varint(99)), + field(2, WireFormatLite::WIRETYPE_VARINT, varint(87)))), + R"pb(map_int32_int32 { key: 99 value: 87 })pb"); + + RunValidProtobufTest( + absl::StrCat("ValidMap.LengthPrefixed"), REQUIRED, + len(71, absl::StrCat(len(1, "a"), + len(2, field(1, WireFormatLite::WIRETYPE_VARINT, + varint(87))))), + R"pb(map_string_nested_message { + key: "a" + value: { a: 87 } + })pb"); + RunValidProtobufTest( absl::StrCat("ValidDelimitedField.GroupLike"), REQUIRED, group(201, field(202, WireFormatLite::WIRETYPE_VARINT, varint(99))), diff --git a/conformance/test_protos/test_messages_edition2023.proto b/conformance/test_protos/test_messages_edition2023.proto index f85197d6a01d..7affff6fd133 100644 --- a/conformance/test_protos/test_messages_edition2023.proto +++ b/conformance/test_protos/test_messages_edition2023.proto @@ -9,6 +9,7 @@ edition = "2023"; package protobuf_test_messages.editions; +option features.message_encoding = DELIMITED; option java_package = "com.google.protobuf_test_messages.edition2023"; option java_multiple_files = true; option objc_class_prefix = "Editions"; @@ -20,7 +21,8 @@ message ComplexMessage { message TestAllTypesEdition2023 { message NestedMessage { int32 a = 1; - TestAllTypesEdition2023 corecursive = 2; + TestAllTypesEdition2023 corecursive = 2 + [features.message_encoding = LENGTH_PREFIXED]; } enum NestedEnum { @@ -47,8 +49,10 @@ message TestAllTypesEdition2023 { string optional_string = 14; bytes optional_bytes = 15; - NestedMessage optional_nested_message = 18; - ForeignMessageEdition2023 optional_foreign_message = 19; + NestedMessage optional_nested_message = 18 + [features.message_encoding = LENGTH_PREFIXED]; + ForeignMessageEdition2023 optional_foreign_message = 19 + [features.message_encoding = LENGTH_PREFIXED]; NestedEnum optional_nested_enum = 21; ForeignEnumEdition2023 optional_foreign_enum = 22; @@ -56,7 +60,8 @@ message TestAllTypesEdition2023 { string optional_string_piece = 24 [ctype = STRING_PIECE]; string optional_cord = 25 [ctype = CORD]; - TestAllTypesEdition2023 recursive_message = 27; + TestAllTypesEdition2023 recursive_message = 27 + [features.message_encoding = LENGTH_PREFIXED]; // Repeated repeated int32 repeated_int32 = 31; @@ -75,8 +80,10 @@ message TestAllTypesEdition2023 { repeated string repeated_string = 44; repeated bytes repeated_bytes = 45; - repeated NestedMessage repeated_nested_message = 48; - repeated ForeignMessageEdition2023 repeated_foreign_message = 49; + repeated NestedMessage repeated_nested_message = 48 + [features.message_encoding = LENGTH_PREFIXED]; + repeated ForeignMessageEdition2023 repeated_foreign_message = 49 + [features.message_encoding = LENGTH_PREFIXED]; repeated NestedEnum repeated_nested_enum = 51; repeated ForeignEnumEdition2023 repeated_foreign_enum = 52; @@ -163,7 +170,8 @@ message TestAllTypesEdition2023 { oneof oneof_field { uint32 oneof_uint32 = 111; - NestedMessage oneof_nested_message = 112; + NestedMessage oneof_nested_message = 112 + [features.message_encoding = LENGTH_PREFIXED]; string oneof_string = 113; bytes oneof_bytes = 114; bool oneof_bool = 115; @@ -181,8 +189,8 @@ message TestAllTypesEdition2023 { int32 group_int32 = 202; uint32 group_uint32 = 203; } - GroupLikeType groupliketype = 201 [features.message_encoding = DELIMITED]; - GroupLikeType delimited_field = 202 [features.message_encoding = DELIMITED]; + GroupLikeType groupliketype = 201; + GroupLikeType delimited_field = 202; } message ForeignMessageEdition2023 { @@ -204,6 +212,6 @@ message GroupLikeType { } extend TestAllTypesEdition2023 { - GroupLikeType groupliketype = 121 [features.message_encoding = DELIMITED]; - GroupLikeType delimited_ext = 122 [features.message_encoding = DELIMITED]; + GroupLikeType groupliketype = 121; + GroupLikeType delimited_ext = 122; } diff --git a/csharp/src/Google.Protobuf/Reflection/FieldDescriptor.cs b/csharp/src/Google.Protobuf/Reflection/FieldDescriptor.cs index d9bf6b373ffc..0654a25f5b28 100644 --- a/csharp/src/Google.Protobuf/Reflection/FieldDescriptor.cs +++ b/csharp/src/Google.Protobuf/Reflection/FieldDescriptor.cs @@ -412,6 +412,11 @@ internal void CrossLink() throw new DescriptorValidationException(this, $"\"{Proto.TypeName}\" is not a message type."); } messageType = m; + if (m.Proto.Options?.MapEntry == true || ContainingType?.Proto.Options?.MapEntry == true) + { + // Maps can't inherit delimited encoding. + FieldType = FieldType.Message; + } if (Proto.HasDefaultValue) { diff --git a/java/core/src/main/java/com/google/protobuf/Descriptors.java b/java/core/src/main/java/com/google/protobuf/Descriptors.java index 1c5a75c3e806..08f5234af349 100644 --- a/java/core/src/main/java/com/google/protobuf/Descriptors.java +++ b/java/core/src/main/java/com/google/protobuf/Descriptors.java @@ -1297,6 +1297,8 @@ public Type getType() { // since these are used before feature resolution when parsing java feature set defaults // (custom options) into unknown fields. if (type == Type.MESSAGE + && !(messageType != null && messageType.toProto().getOptions().getMapEntry()) + && !(containingType != null && containingType.toProto().getOptions().getMapEntry()) && this.features != null && getFeatures().getMessageEncoding() == FeatureSet.MessageEncoding.DELIMITED) { return Type.GROUP; @@ -1476,8 +1478,7 @@ public boolean hasPresence() { * been upgraded to editions. */ boolean isGroupLike() { - if (getFeatures().getMessageEncoding() - != DescriptorProtos.FeatureSet.MessageEncoding.DELIMITED) { + if (getType() != Type.GROUP) { // Groups are always tag-delimited. return false; } diff --git a/python/google/protobuf/descriptor.py b/python/google/protobuf/descriptor.py index 80c75ae04cd7..d8c6a4352b11 100755 --- a/python/google/protobuf/descriptor.py +++ b/python/google/protobuf/descriptor.py @@ -708,6 +708,9 @@ def type(self): if ( self._GetFeatures().message_encoding == _FEATURESET_MESSAGE_ENCODING_DELIMITED + and self.message_type + and not self.message_type.GetOptions().map_entry + and not self.containing_type.GetOptions().map_entry ): return FieldDescriptor.TYPE_GROUP return self._type diff --git a/python/google/protobuf/text_format.py b/python/google/protobuf/text_format.py index 2244ccca3cb5..bc611092cb5c 100644 --- a/python/google/protobuf/text_format.py +++ b/python/google/protobuf/text_format.py @@ -195,10 +195,7 @@ def _IsGroupLike(field): True if this field is group-like, false otherwise. """ # Groups are always tag-delimited. - if ( - field._GetFeatures().message_encoding - != descriptor._FEATURESET_MESSAGE_ENCODING_DELIMITED - ): + if field.type != descriptor.FieldDescriptor.TYPE_GROUP: return False # Group fields always are always the lowercase type name. diff --git a/upb/reflection/field_def.c b/upb/reflection/field_def.c index d5597f4b2ea2..c18f62184183 100644 --- a/upb/reflection/field_def.c +++ b/upb/reflection/field_def.c @@ -252,8 +252,7 @@ bool _upb_FieldDef_ValidateUtf8(const upb_FieldDef* f) { bool _upb_FieldDef_IsGroupLike(const upb_FieldDef* f) { // Groups are always tag-delimited. - if (UPB_DESC(FeatureSet_message_encoding)(upb_FieldDef_ResolvedFeatures(f)) != - UPB_DESC(FeatureSet_DELIMITED)) { + if (f->type_ != kUpb_FieldType_Group) { return false; } @@ -690,12 +689,6 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, UPB_DESC(FieldDescriptorProto_has_type_name)(field_proto); f->type_ = (int)UPB_DESC(FieldDescriptorProto_type)(field_proto); - if (f->type_ == kUpb_FieldType_Message && - // TODO: remove once we can deprecate kUpb_FieldType_Group. - UPB_DESC(FeatureSet_message_encoding)(f->resolved_features) == - UPB_DESC(FeatureSet_DELIMITED)) { - f->type_ = kUpb_FieldType_Group; - } if (has_type) { switch (f->type_) { @@ -716,7 +709,7 @@ static void _upb_FieldDef_Create(upb_DefBuilder* ctx, const char* prefix, } } - if (!has_type && has_type_name) { + if ((!has_type && has_type_name) || f->type_ == kUpb_FieldType_Message) { f->type_ = UPB_FIELD_TYPE_UNSPECIFIED; // We'll assign this in resolve_subdef() } else { @@ -866,8 +859,15 @@ static void resolve_subdef(upb_DefBuilder* ctx, const char* prefix, break; case UPB_DEFTYPE_MSG: f->sub.msgdef = def; - f->type_ = kUpb_FieldType_Message; // It appears there is no way of - // this being a group. + f->type_ = kUpb_FieldType_Message; + // TODO: remove once we can deprecate + // kUpb_FieldType_Group. + if (UPB_DESC(FeatureSet_message_encoding)(f->resolved_features) == + UPB_DESC(FeatureSet_DELIMITED) && + !upb_MessageDef_IsMapEntry(def) && + !(f->msgdef && upb_MessageDef_IsMapEntry(f->msgdef))) { + f->type_ = kUpb_FieldType_Group; + } f->has_presence = !upb_FieldDef_IsRepeated(f); break; default: