Enable Text format parser to skip unknown short-formed repeated fields.

The text format specs state that short formed repeated field should be valid. For example: ``` repeated_field: [{}, {}] ``` However, this was not supported when text format parser tries to skip **unknown fields**. PiperOrigin-RevId: 493085702
protocolbuffers · Dec 5, 2022 · 6dbd413 · 6dbd413
1 parent d01a912
commit 6dbd413
Showing 1 changed file with 55 additions and 23 deletions.
diff --git a/java/core/src/main/java/com/google/protobuf/TextFormat.java b/java/core/src/main/java/com/google/protobuf/TextFormat.java
@@ -1893,18 +1893,8 @@ private void mergeField(
 
       // Skips unknown fields.
       if (field == null) {
-        // Try to guess the type of this field.
-        // If this field is not a message, there should be a ":" between the
-        // field name and the field value and also the field value should not
-        // start with "{" or "<" which indicates the beginning of a message body.
-        // If there is no ":" or there is a "{" or "<" after ":", this field has
-        // to be a message or the input is ill-formed.
         detectSilentMarker(tokenizer, type, name);
-        if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) {
-          skipFieldValue(tokenizer);
-        } else {
-          skipFieldMessage(tokenizer, type);
-        }
+        guessFieldTypeAndSkip(tokenizer, type);
         return;
       }
 
@@ -2263,19 +2253,9 @@ private void mergeAnyFieldValue(
     /** Skips the next field including the field's name and value. */
     private void skipField(Tokenizer tokenizer, Descriptor type) throws ParseException {
       String name = consumeFullTypeName(tokenizer);
-
-      // Try to guess the type of this field.
-      // If this field is not a message, there should be a ":" between the
-      // field name and the field value and also the field value should not
-      // start with "{" or "<" which indicates the beginning of a message body.
-      // If there is no ":" or there is a "{" or "<" after ":", this field has
-      // to be a message or the input is ill-formed.
       detectSilentMarker(tokenizer, type, name);
-      if (tokenizer.tryConsume(":") && !tokenizer.lookingAt("<") && !tokenizer.lookingAt("{")) {
-        skipFieldValue(tokenizer);
-      } else {
-        skipFieldMessage(tokenizer, type);
-      }
+      guessFieldTypeAndSkip(tokenizer, type);
+
       // For historical reasons, fields may optionally be separated by commas or
       // semicolons.
       if (!tokenizer.tryConsume(";")) {
@@ -2314,6 +2294,58 @@ private void skipFieldValue(Tokenizer tokenizer) throws ParseException {
         throw tokenizer.parseException("Invalid field value: " + tokenizer.currentToken);
       }
     }
+
+    /**
+     * Tries to guess the type of this field and skip it.
+     *
+     * <p>If this field is not a message, there should be a ":" between the field name and the field
+     * value and also the field value should not start with "{" or "<" which indicates the beginning
+     * of a message body. If there is no ":" or there is a "{" or "<" after ":", this field has to
+     * be a message or the input is ill-formed. For short-formed repeated fields (i.e. with "[]"),
+     * if it is repeated scalar, there must be a ":" between the field name and the starting "[" .
+     */
+    private void guessFieldTypeAndSkip(Tokenizer tokenizer, Descriptor type) throws ParseException {
+      boolean semicolonConsumed = tokenizer.tryConsume(":");
+      if (tokenizer.lookingAt("[")) {
+        // Short repeated field form. If a semicolon was consumed, it could be repeated scalar or
+        // repeated message. If not, it must be repeated message.
+        skipFieldShortFormedRepeated(tokenizer, semicolonConsumed, type);
+      } else if (semicolonConsumed && !tokenizer.lookingAt("{") && !tokenizer.lookingAt("<")) {
+        skipFieldValue(tokenizer);
+      } else {
+        skipFieldMessage(tokenizer, type);
+      }
+    }
+
+    /**
+     * Skips a short-formed repeated field value.
+     *
+     * <p>Reports an error if scalar type is not allowed but showing up inside "[]".
+     */
+    private void skipFieldShortFormedRepeated(
+        Tokenizer tokenizer, boolean scalarAllowed, Descriptor type) throws ParseException {
+      if (!tokenizer.tryConsume("[") || tokenizer.tryConsume("]")) {
+        // Try skipping "[]".
+        return;
+      }
+
+      while (true) {
+        if (tokenizer.lookingAt("{") || tokenizer.lookingAt("<")) {
+          // Try skipping message field inside "[]"
+          skipFieldMessage(tokenizer, type);
+        } else if (scalarAllowed) {
+          // Try skipping scalar field inside "[]".
+          skipFieldValue(tokenizer);
+        } else {
+          throw tokenizer.parseException(
+              "Invalid repeated scalar field: missing \":\" before \"[\".");
+        }
+        if (tokenizer.tryConsume("]")) {
+          break;
+        }
+        tokenizer.consume(",");
+      }
+    }
   }
 
   // =================================================================