diff --git a/src/google/protobuf/compiler/BUILD.bazel b/src/google/protobuf/compiler/BUILD.bazel index a2171c806dc7..6c484eb3da1b 100644 --- a/src/google/protobuf/compiler/BUILD.bazel +++ b/src/google/protobuf/compiler/BUILD.bazel @@ -11,16 +11,16 @@ load( ) load("@rules_proto//proto:defs.bzl", "proto_library") load("//build_defs:arch_tests.bzl", "aarch64_test", "x86_64_test") -load("//build_defs:cpp_opts.bzl", "COPTS", "LINK_OPTS") +load("//build_defs:cpp_opts.bzl", "COPTS") proto_library( name = "plugin_proto", srcs = ["plugin.proto"], + strip_import_prefix = "/src", visibility = [ "//:__pkg__", "//pkg:__pkg__", ], - strip_import_prefix = "/src", deps = ["//:descriptor_proto"], ) @@ -93,11 +93,14 @@ cc_library( "//src/google/protobuf:descriptor_legacy", "//src/google/protobuf:protobuf_nowkt", "//src/google/protobuf/compiler/allowlists", + "@com_google_absl//absl/algorithm", + "@com_google_absl//absl/algorithm:container", "@com_google_absl//absl/container:btree", "@com_google_absl//absl/log:absl_check", "@com_google_absl//absl/log:absl_log", "@com_google_absl//absl/strings", "@com_google_absl//absl/strings:str_format", + "@com_google_absl//absl/types:span", ], ) @@ -309,12 +312,11 @@ cc_library( visibility = ["//src/google/protobuf:__subpackages__"], deps = [ "//src/google/protobuf:protobuf_nowkt", - "@com_google_absl//absl/types:span", "@com_google_absl//absl/container:flat_hash_set", + "@com_google_absl//absl/types:span", ], ) - cc_test( name = "retention_unittest", srcs = ["retention_unittest.cc"], @@ -322,9 +324,9 @@ cc_test( ":importer", ":retention", "//src/google/protobuf/io", + "@com_google_absl//absl/log:die_if_null", "@com_google_googletest//:gtest", "@com_google_googletest//:gtest_main", - "@com_google_absl//absl/log:die_if_null", ], ) diff --git a/src/google/protobuf/compiler/command_line_interface.cc b/src/google/protobuf/compiler/command_line_interface.cc index a5b74029ddae..d0a35432f501 100644 --- a/src/google/protobuf/compiler/command_line_interface.cc +++ b/src/google/protobuf/compiler/command_line_interface.cc @@ -34,8 +34,10 @@ #include "google/protobuf/compiler/command_line_interface.h" +#include "absl/algorithm/container.h" #include "absl/container/btree_set.h" #include "absl/container/flat_hash_map.h" +#include "absl/types/span.h" #include "google/protobuf/compiler/allowlists/allowlists.h" #include "google/protobuf/descriptor_legacy.h" @@ -1028,7 +1030,20 @@ struct VisitImpl { Visitor visitor; void Visit(const FieldDescriptor* descriptor) { visitor(descriptor); } - void Visit(const EnumDescriptor* descriptor) { visitor(descriptor); } + void Visit(const EnumValueDescriptor* descriptor) { visitor(descriptor); } + + void Visit(const EnumDescriptor* descriptor) { + visitor(descriptor); + for (int i = 0; i < descriptor->value_count(); i++) { + Visit(descriptor->value(i)); + } + } + + void Visit(const Descriptor::ExtensionRange* descriptor) { + visitor(descriptor); + } + + void Visit(const OneofDescriptor* descriptor) { visitor(descriptor); } void Visit(const Descriptor* descriptor) { visitor(descriptor); @@ -1048,10 +1063,27 @@ struct VisitImpl { for (int i = 0; i < descriptor->extension_count(); i++) { Visit(descriptor->extension(i)); } + + for (int i = 0; i < descriptor->extension_range_count(); i++) { + Visit(descriptor->extension_range(i)); + } + + for (int i = 0; i < descriptor->oneof_decl_count(); i++) { + Visit(descriptor->oneof_decl(i)); + } } - void Visit(const std::vector& descriptors) { - for (auto* descriptor : descriptors) { + void Visit(const MethodDescriptor* method) { visitor(method); } + + void Visit(const ServiceDescriptor* descriptor) { + visitor(descriptor); + for (int i = 0; i < descriptor->method_count(); i++) { + Visit(descriptor->method(i)); + } + } + + void Visit(absl::Span descriptors) { + for (const FileDescriptor* descriptor : descriptors) { visitor(descriptor); for (int i = 0; i < descriptor->message_type_count(); i++) { Visit(descriptor->message_type(i)); @@ -1062,6 +1094,9 @@ struct VisitImpl { for (int i = 0; i < descriptor->extension_count(); i++) { Visit(descriptor->extension(i)); } + for (int i = 0; i < descriptor->service_count(); i++) { + Visit(descriptor->service(i)); + } } } }; @@ -1069,10 +1104,8 @@ struct VisitImpl { // Visit every node in the descriptors calling `visitor(node)`. // The visitor does not need to handle all possible node types. Types that are // not visitable via `visitor` will be ignored. -// Disclaimer: this is not fully implemented yet to visit _every_ node. -// VisitImpl might need to be updated where needs arise. template -void VisitDescriptors(const std::vector& descriptors, +void VisitDescriptors(absl::Span descriptors, Visitor visitor) { // Provide a fallback to ignore all the nodes that are not interesting to the // input visitor. @@ -1099,8 +1132,151 @@ bool HasReservedFieldNumber(const FieldDescriptor* field) { namespace { std::unique_ptr PopulateSingleSimpleDescriptorDatabase(const std::string& descriptor_set_name); + +// Indicates whether the field is compatible with the given target type. +bool IsFieldCompatible(const FieldDescriptor& field, + FieldOptions::OptionTargetType target_type) { + const RepeatedField& allowed_targets = field.options().targets(); + return allowed_targets.empty() || + absl::c_linear_search(allowed_targets, target_type); +} + +// Converts the OptionTargetType enum to a string suitable for use in error +// messages. +absl::string_view TargetTypeString(FieldOptions::OptionTargetType target_type) { + switch (target_type) { + case FieldOptions::TARGET_TYPE_FILE: + return "file"; + case FieldOptions::TARGET_TYPE_EXTENSION_RANGE: + return "extension range"; + case FieldOptions::TARGET_TYPE_MESSAGE: + return "message"; + case FieldOptions::TARGET_TYPE_FIELD: + return "field"; + case FieldOptions::TARGET_TYPE_ONEOF: + return "oneof"; + case FieldOptions::TARGET_TYPE_ENUM: + return "enum"; + case FieldOptions::TARGET_TYPE_ENUM_ENTRY: + return "enum entry"; + case FieldOptions::TARGET_TYPE_SERVICE: + return "service"; + case FieldOptions::TARGET_TYPE_METHOD: + return "method"; + default: + return "unknown"; + } +} + +// Recursively validates that the options message (or subpiece of an options +// message) is compatible with the given target type. +bool ValidateTargetConstraintsRecursive( + const Message& m, DescriptorPool::ErrorCollector& error_collector, + absl::string_view file_name, FieldOptions::OptionTargetType target_type) { + std::vector fields; + const Reflection* reflection = m.GetReflection(); + reflection->ListFields(m, &fields); + bool success = true; + for (const auto* field : fields) { + if (!IsFieldCompatible(*field, target_type)) { + success = false; + error_collector.RecordError( + file_name, "", nullptr, DescriptorPool::ErrorCollector::OPTION_NAME, + absl::StrCat("Option ", field->full_name(), + " cannot be set on an entity of type `", + TargetTypeString(target_type), "`.")); + } + if (field->type() == FieldDescriptor::TYPE_MESSAGE) { + if (field->is_repeated()) { + int field_size = reflection->FieldSize(m, field); + for (int i = 0; i < field_size; ++i) { + if (!ValidateTargetConstraintsRecursive( + reflection->GetRepeatedMessage(m, field, i), error_collector, + file_name, target_type)) { + success = false; + } + } + } else if (!ValidateTargetConstraintsRecursive( + reflection->GetMessage(m, field), error_collector, + file_name, target_type)) { + success = false; + } + } + } + return success; +} + +// Validates that the options message is correct with respect to target +// constraints, returning true if successful. This function converts the +// options message to a DynamicMessage so that we have visibility into custom +// options. We take the element name as a FunctionRef so that we do not have to +// pay the cost of constructing it unless there is an error. +bool ValidateTargetConstraints(const Message& options, + const DescriptorPool& pool, + DescriptorPool::ErrorCollector& error_collector, + absl::string_view file_name, + FieldOptions::OptionTargetType target_type) { + const Descriptor* descriptor = + pool.FindMessageTypeByName(options.GetTypeName()); + if (descriptor == nullptr) { + // We were unable to find the options message in the descriptor pool. This + // implies that the proto files we are working with do not depend on + // descriptor.proto, in which case there are no custom options to worry + // about. We can therefore skip the use of DynamicMessage. + return ValidateTargetConstraintsRecursive(options, error_collector, + file_name, target_type); + } else { + DynamicMessageFactory factory; + std::unique_ptr dynamic_message( + factory.GetPrototype(descriptor)->New()); + std::string serialized; + ABSL_CHECK(options.SerializeToString(&serialized)); + ABSL_CHECK(dynamic_message->ParseFromString(serialized)); + return ValidateTargetConstraintsRecursive(*dynamic_message, error_collector, + file_name, target_type); + } +} + +// The overloaded GetTargetType() functions below allow us to map from a +// descriptor type to the associated OptionTargetType enum. +FieldOptions::OptionTargetType GetTargetType(const FileDescriptor*) { + return FieldOptions::TARGET_TYPE_FILE; +} + +FieldOptions::OptionTargetType GetTargetType( + const Descriptor::ExtensionRange*) { + return FieldOptions::TARGET_TYPE_EXTENSION_RANGE; +} + +FieldOptions::OptionTargetType GetTargetType(const Descriptor*) { + return FieldOptions::TARGET_TYPE_MESSAGE; +} + +FieldOptions::OptionTargetType GetTargetType(const FieldDescriptor*) { + return FieldOptions::TARGET_TYPE_FIELD; +} + +FieldOptions::OptionTargetType GetTargetType(const OneofDescriptor*) { + return FieldOptions::TARGET_TYPE_ONEOF; +} + +FieldOptions::OptionTargetType GetTargetType(const EnumDescriptor*) { + return FieldOptions::TARGET_TYPE_ENUM; } +FieldOptions::OptionTargetType GetTargetType(const EnumValueDescriptor*) { + return FieldOptions::TARGET_TYPE_ENUM_ENTRY; +} + +FieldOptions::OptionTargetType GetTargetType(const ServiceDescriptor*) { + return FieldOptions::TARGET_TYPE_SERVICE; +} + +FieldOptions::OptionTargetType GetTargetType(const MethodDescriptor*) { + return FieldOptions::TARGET_TYPE_METHOD; +} +} // namespace + int CommandLineInterface::Run(int argc, const char* const argv[]) { Clear(); @@ -1189,31 +1365,50 @@ int CommandLineInterface::Run(int argc, const char* const argv[]) { bool validation_error = false; // Defer exiting so we log more warnings. - VisitDescriptors(parsed_files, [&](const FieldDescriptor* field) { - if (HasReservedFieldNumber(field)) { - const char* error_link = nullptr; - validation_error = true; - std::string error; - if (field->number() >= FieldDescriptor::kFirstReservedNumber && - field->number() <= FieldDescriptor::kLastReservedNumber) { - error = absl::Substitute( - "Field numbers $0 through $1 are reserved " - "for the protocol buffer library implementation.", - FieldDescriptor::kFirstReservedNumber, - FieldDescriptor::kLastReservedNumber); - } else { - error = absl::Substitute( - "Field number $0 is reserved for specific purposes.", - field->number()); - } - if (error_link) { - absl::StrAppend(&error, "(See ", error_link, ")"); - } - static_cast(error_collector.get()) - ->RecordError(field->file()->name(), field->full_name(), nullptr, - DescriptorPool::ErrorCollector::NUMBER, error); - } - }); + VisitDescriptors( + absl::Span(parsed_files.data(), + parsed_files.size()), + [&](const FieldDescriptor* field) { + if (HasReservedFieldNumber(field)) { + const char* error_link = nullptr; + validation_error = true; + std::string error; + if (field->number() >= FieldDescriptor::kFirstReservedNumber && + field->number() <= FieldDescriptor::kLastReservedNumber) { + error = absl::Substitute( + "Field numbers $0 through $1 are reserved " + "for the protocol buffer library implementation.", + FieldDescriptor::kFirstReservedNumber, + FieldDescriptor::kLastReservedNumber); + } else { + error = absl::Substitute( + "Field number $0 is reserved for specific purposes.", + field->number()); + } + if (error_link) { + absl::StrAppend(&error, "(See ", error_link, ")"); + } + static_cast(error_collector.get()) + ->RecordError(field->file()->name(), field->full_name(), nullptr, + DescriptorPool::ErrorCollector::NUMBER, error); + } + }); + + // We visit one file at a time because we need to provide the file name for + // error messages. Usually we can get the file name from any descriptor with + // something like descriptor->file()->name(), but ExtensionRange does not + // support this. + for (const google::protobuf::FileDescriptor* file : parsed_files) { + VisitDescriptors( + absl::Span(&file, 1), + [&](const auto* descriptor) { + if (!ValidateTargetConstraints( + descriptor->options(), *descriptor_pool, *error_collector, + file->name(), GetTargetType(descriptor))) { + validation_error = true; + } + }); + } if (validation_error) { diff --git a/src/google/protobuf/compiler/command_line_interface_unittest.cc b/src/google/protobuf/compiler/command_line_interface_unittest.cc index 272d38bc21c5..a7537757289c 100644 --- a/src/google/protobuf/compiler/command_line_interface_unittest.cc +++ b/src/google/protobuf/compiler/command_line_interface_unittest.cc @@ -2696,6 +2696,221 @@ TEST_F(CommandLineInterfaceTest, PrintFreeFieldNumbers) { #endif } +TEST_F(CommandLineInterfaceTest, TargetTypeEnforcement) { + // The target option on a field indicates what kind of entity it may apply to + // when it is used as an option. This test verifies that the enforcement + // works correctly on all entity types. + CreateTempFile("net/proto2/proto/descriptor.proto", + google::protobuf::DescriptorProto::descriptor()->file()->DebugString()); + CreateTempFile("foo.proto", + R"schema( + syntax = "proto2"; + package protobuf_unittest; + import "net/proto2/proto/descriptor.proto"; + message MyOptions { + optional string file_option = 1 [targets = TARGET_TYPE_FILE]; + optional string extension_range_option = 2 [targets = + TARGET_TYPE_EXTENSION_RANGE]; + optional string message_option = 3 [targets = TARGET_TYPE_MESSAGE]; + optional string field_option = 4 [targets = TARGET_TYPE_FIELD]; + optional string oneof_option = 5 [targets = TARGET_TYPE_ONEOF]; + optional string enum_option = 6 [targets = TARGET_TYPE_ENUM]; + optional string enum_value_option = 7 [targets = + TARGET_TYPE_ENUM_ENTRY]; + optional string service_option = 8 [targets = TARGET_TYPE_SERVICE]; + optional string method_option = 9 [targets = TARGET_TYPE_METHOD]; + } + extend google.protobuf.FileOptions { + optional MyOptions file_options = 5000; + } + extend google.protobuf.ExtensionRangeOptions { + optional MyOptions extension_range_options = 5000; + } + extend google.protobuf.MessageOptions { + optional MyOptions message_options = 5000; + } + extend google.protobuf.FieldOptions { + optional MyOptions field_options = 5000; + } + extend google.protobuf.OneofOptions { + optional MyOptions oneof_options = 5000; + } + extend google.protobuf.EnumOptions { + optional MyOptions enum_options = 5000; + } + extend google.protobuf.EnumValueOptions { + optional MyOptions enum_value_options = 5000; + } + extend google.protobuf.ServiceOptions { + optional MyOptions service_options = 5000; + } + extend google.protobuf.MethodOptions { + optional MyOptions method_options = 5000; + } + option (file_options).enum_option = "x"; + message MyMessage { + option (message_options).enum_option = "x"; + optional int32 i = 1 [(field_options).enum_option = "x"]; + extensions 2 [(extension_range_options).enum_option = "x"]; + oneof o { + option (oneof_options).enum_option = "x"; + bool oneof_field = 3; + } + } + enum MyEnum { + option (enum_options).file_option = "x"; + UNKNOWN_MY_ENUM = 0 [(enum_value_options).enum_option = "x"]; + } + service MyService { + option (service_options).enum_option = "x"; + rpc MyMethod(MyMessage) returns (MyMessage) { + option (method_options).enum_option = "x"; + } + } + )schema"); + + Run("protocol_compiler --plug_out=$tmpdir --proto_path=$tmpdir foo.proto"); + ExpectErrorSubstring( + "Option protobuf_unittest.MyOptions.enum_option " + "cannot be set on an entity of type `file`."); + ExpectErrorSubstring( + "Option protobuf_unittest.MyOptions.enum_option " + "cannot be set on an entity of type `extension range`."); + ExpectErrorSubstring( + "Option protobuf_unittest.MyOptions.enum_option " + "cannot be set on an entity of type `message`."); + ExpectErrorSubstring( + "Option protobuf_unittest.MyOptions.enum_option " + "cannot be set on an entity of type `field`."); + ExpectErrorSubstring( + "Option protobuf_unittest.MyOptions.enum_option " + "cannot be set on an entity of type `oneof`."); + ExpectErrorSubstring( + "Option protobuf_unittest.MyOptions.file_option " + "cannot be set on an entity of type `enum`."); + ExpectErrorSubstring( + "Option protobuf_unittest.MyOptions.enum_option " + "cannot be set on an entity of type `enum entry`."); + ExpectErrorSubstring( + "Option protobuf_unittest.MyOptions.enum_option " + "cannot be set on an entity of type `service`."); + ExpectErrorSubstring( + "Option protobuf_unittest.MyOptions.enum_option " + "cannot be set on an entity of type `method`."); +} + +TEST_F(CommandLineInterfaceTest, TargetTypeEnforcementMultipleTargetsValid) { + CreateTempFile("net/proto2/proto/descriptor.proto", + google::protobuf::DescriptorProto::descriptor()->file()->DebugString()); + CreateTempFile("foo.proto", + R"schema( + syntax = "proto2"; + package protobuf_unittest; + import "net/proto2/proto/descriptor.proto"; + message MyOptions { + optional string message_or_file_option = 1 [ + targets = TARGET_TYPE_MESSAGE, targets = TARGET_TYPE_FILE]; + } + extend google.protobuf.FileOptions { + optional MyOptions file_options = 5000; + } + extend google.protobuf.MessageOptions { + optional MyOptions message_options = 5000; + } + option (file_options).message_or_file_option = "x"; + message MyMessage { + option (message_options).message_or_file_option = "y"; + } + )schema"); + + Run("protocol_compiler --plug_out=$tmpdir --proto_path=$tmpdir foo.proto"); + ExpectNoErrors(); +} + +TEST_F(CommandLineInterfaceTest, TargetTypeEnforcementMultipleTargetsInvalid) { + CreateTempFile("net/proto2/proto/descriptor.proto", + google::protobuf::DescriptorProto::descriptor()->file()->DebugString()); + CreateTempFile("foo.proto", + R"schema( + syntax = "proto2"; + package protobuf_unittest; + import "net/proto2/proto/descriptor.proto"; + message MyOptions { + optional string message_or_file_option = 1 [ + targets = TARGET_TYPE_MESSAGE, targets = TARGET_TYPE_FILE]; + } + extend google.protobuf.EnumOptions { + optional MyOptions enum_options = 5000; + } + enum MyEnum { + MY_ENUM_UNSPECIFIED = 0; + option (enum_options).message_or_file_option = "y"; + } + )schema"); + + Run("protocol_compiler --plug_out=$tmpdir --proto_path=$tmpdir foo.proto"); + ExpectErrorSubstring( + "Option protobuf_unittest.MyOptions.message_or_file_option cannot be set " + "on an entity of type `enum`."); +} + +TEST_F(CommandLineInterfaceTest, + TargetTypeEnforcementMultipleEdgesWithConstraintsValid) { + CreateTempFile("net/proto2/proto/descriptor.proto", + google::protobuf::DescriptorProto::descriptor()->file()->DebugString()); + CreateTempFile("foo.proto", + R"schema( + syntax = "proto2"; + package protobuf_unittest; + import "net/proto2/proto/descriptor.proto"; + message A { + optional B b = 1 [targets = TARGET_TYPE_FILE, + targets = TARGET_TYPE_ENUM]; + } + message B { + optional int32 i = 1 [targets = TARGET_TYPE_ONEOF, + targets = TARGET_TYPE_FILE]; + } + extend google.protobuf.FileOptions { + optional A file_options = 5000; + } + option (file_options).b.i = 42; + )schema"); + + Run("protocol_compiler --plug_out=$tmpdir --proto_path=$tmpdir foo.proto"); + ExpectNoErrors(); +} + +TEST_F(CommandLineInterfaceTest, + TargetTypeEnforcementMultipleEdgesWithConstraintsInvalid) { + CreateTempFile("net/proto2/proto/descriptor.proto", + google::protobuf::DescriptorProto::descriptor()->file()->DebugString()); + CreateTempFile("foo.proto", + R"schema( + syntax = "proto2"; + package protobuf_unittest; + import "net/proto2/proto/descriptor.proto"; + message A { + optional B b = 1 [targets = TARGET_TYPE_ENUM]; + } + message B { + optional int32 i = 1 [targets = TARGET_TYPE_ONEOF]; + } + extend google.protobuf.FileOptions { + optional A file_options = 5000; + } + option (file_options).b.i = 42; + )schema"); + + Run("protocol_compiler --plug_out=$tmpdir --proto_path=$tmpdir foo.proto"); + // We have target constraint violations at two different edges in the file + // options, so let's make sure both are caught. + ExpectErrorSubstring( + "Option protobuf_unittest.A.b cannot be set on an entity of type `file`."); + ExpectErrorSubstring( + "Option protobuf_unittest.B.i cannot be set on an entity of type `file`."); +} + // =================================================================== // Test for --encode and --decode. Note that it would be easier to do this diff --git a/src/google/protobuf/descriptor.h b/src/google/protobuf/descriptor.h index 347f51abfe02..f003c3ae6149 100644 --- a/src/google/protobuf/descriptor.h +++ b/src/google/protobuf/descriptor.h @@ -430,6 +430,8 @@ class PROTOBUF_EXPORT Descriptor : private internal::SymbolBase { // See Descriptor::CopyTo(). void CopyTo(DescriptorProto_ExtensionRange* proto) const; + const ExtensionRangeOptions& options() const; + int start; // inclusive int end; // exclusive @@ -2388,6 +2390,9 @@ PROTOBUF_DEFINE_ARRAY_ACCESSOR(FileDescriptor, service, PROTOBUF_DEFINE_ARRAY_ACCESSOR(FileDescriptor, extension, const FieldDescriptor*) +PROTOBUF_DEFINE_OPTIONS_ACCESSOR(Descriptor::ExtensionRange, + ExtensionRangeOptions) + #undef PROTOBUF_DEFINE_ACCESSOR #undef PROTOBUF_DEFINE_STRING_ACCESSOR #undef PROTOBUF_DEFINE_ARRAY_ACCESSOR