From 6dbd4131fa6b2ad29b2b1b827f21fc61b160aeeb Mon Sep 17 00:00:00 2001 From: Protobuf Team Bot Date: Mon, 5 Dec 2022 12:48:09 -0800 Subject: [PATCH] Enable Text format parser to skip unknown short-formed repeated fields. The text format specs state that short formed repeated field should be valid. For example: ``` repeated_field: [{}, {}] ``` However, this was not supported when text format parser tries to skip **unknown fields**. PiperOrigin-RevId: 493085702 --- .../java/com/google/protobuf/TextFormat.java | 78 +++++++++++++------ 1 file changed, 55 insertions(+), 23 deletions(-) diff --git a/java/core/src/main/java/com/google/protobuf/TextFormat.java b/java/core/src/main/java/com/google/protobuf/TextFormat.java index 65e403ec0132..8c3ac5caf10b 100644 --- a/java/core/src/main/java/com/google/protobuf/TextFormat.java +++ b/java/core/src/main/java/com/google/protobuf/TextFormat.java @@ -1893,18 +1893,8 @@ private void mergeField( // Skips unknown fields. if (field == null) { - // Try to guess the type of this field. - // If this field is not a message, there should be a ":" between the - // field name and the field value and also the field value should not - // start with "{" or "<" which indicates the beginning of a message body. - // If there is no ":" or there is a "{" or "<" after ":", this field has - // to be a message or the input is ill-formed. detectSilentMarker(tokenizer, type, name); - if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) { - skipFieldValue(tokenizer); - } else { - skipFieldMessage(tokenizer, type); - } + guessFieldTypeAndSkip(tokenizer, type); return; } @@ -2263,19 +2253,9 @@ private void mergeAnyFieldValue( /** Skips the next field including the field's name and value. */ private void skipField(Tokenizer tokenizer, Descriptor type) throws ParseException { String name = consumeFullTypeName(tokenizer); - - // Try to guess the type of this field. - // If this field is not a message, there should be a ":" between the - // field name and the field value and also the field value should not - // start with "{" or "<" which indicates the beginning of a message body. - // If there is no ":" or there is a "{" or "<" after ":", this field has - // to be a message or the input is ill-formed. detectSilentMarker(tokenizer, type, name); - if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") && !tokenizer.lookingAt("{")) { - skipFieldValue(tokenizer); - } else { - skipFieldMessage(tokenizer, type); - } + guessFieldTypeAndSkip(tokenizer, type); + // For historical reasons, fields may optionally be separated by commas or // semicolons. if (!tokenizer.tryConsume(";")) { @@ -2314,6 +2294,58 @@ private void skipFieldValue(Tokenizer tokenizer) throws ParseException { throw tokenizer.parseException("Invalid field value: " + tokenizer.currentToken); } } + + /** + * Tries to guess the type of this field and skip it. + * + *

If this field is not a message, there should be a ":" between the field name and the field + * value and also the field value should not start with "{" or "<" which indicates the beginning + * of a message body. If there is no ":" or there is a "{" or "<" after ":", this field has to + * be a message or the input is ill-formed. For short-formed repeated fields (i.e. with "[]"), + * if it is repeated scalar, there must be a ":" between the field name and the starting "[" . + */ + private void guessFieldTypeAndSkip(Tokenizer tokenizer, Descriptor type) throws ParseException { + boolean semicolonConsumed = tokenizer.tryConsume(":"); + if (tokenizer.lookingAt("[")) { + // Short repeated field form. If a semicolon was consumed, it could be repeated scalar or + // repeated message. If not, it must be repeated message. + skipFieldShortFormedRepeated(tokenizer, semicolonConsumed, type); + } else if (semicolonConsumed && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) { + skipFieldValue(tokenizer); + } else { + skipFieldMessage(tokenizer, type); + } + } + + /** + * Skips a short-formed repeated field value. + * + *

Reports an error if scalar type is not allowed but showing up inside "[]". + */ + private void skipFieldShortFormedRepeated( + Tokenizer tokenizer, boolean scalarAllowed, Descriptor type) throws ParseException { + if (!tokenizer.tryConsume("[") || tokenizer.tryConsume("]")) { + // Try skipping "[]". + return; + } + + while (true) { + if (tokenizer.lookingAt("{") || tokenizer.lookingAt("<")) { + // Try skipping message field inside "[]" + skipFieldMessage(tokenizer, type); + } else if (scalarAllowed) { + // Try skipping scalar field inside "[]". + skipFieldValue(tokenizer); + } else { + throw tokenizer.parseException( + "Invalid repeated scalar field: missing \":\" before \"[\"."); + } + if (tokenizer.tryConsume("]")) { + break; + } + tokenizer.consume(","); + } + } } // =================================================================