From 0924dfe1e466ca4c52bd748b98ea2b53420f348e Mon Sep 17 00:00:00 2001 From: Stephan Schroevers Date: Sat, 2 Dec 2023 15:45:04 +0100 Subject: [PATCH] Fix JDK regex support (#888) Summary of changes: - Fix the test resources introduced by #783 by moving the `regex` fields, such that the test framework does not skip them with a "Not a valid test case" message. - Revert the changes introduced by #815, as those are simply incorrect. - Extend the test coverage introduced by #815 by (a) updating the test regexes to match their intended semantics and (b) include a few negative test cases. - Partially revert the change introduced by #783: the use of `Matcher#find()` is correct, but the `hasStartAnchor` and `hasEndAnchor` logic introduces more bugs than the issue it aims to solve. - Extend the test coverage introduced by #783, by introducing regexes that are not covered by the `hasStartAnchor`/`hasEndAnchor` logic. - Update the Joni regular expression integration such that it passes more of the test cases. - Disable the "trailing newline" test cases, as these are currently not handled correctly by either regex implementation. --- .../schema/regex/JDKRegularExpression.java | 21 +------ .../schema/regex/JoniRegularExpression.java | 2 +- .../networknt/schema/regex/Issue814Test.java | 46 +++++++++++---- src/test/resources/draft2020-12/issue495.json | 56 +++++++++++++++++-- src/test/resources/draft2020-12/issue782.json | 14 ++++- 5 files changed, 99 insertions(+), 40 deletions(-) diff --git a/src/main/java/com/networknt/schema/regex/JDKRegularExpression.java b/src/main/java/com/networknt/schema/regex/JDKRegularExpression.java index 75045dae4..6be0c7eb7 100644 --- a/src/main/java/com/networknt/schema/regex/JDKRegularExpression.java +++ b/src/main/java/com/networknt/schema/regex/JDKRegularExpression.java @@ -1,33 +1,16 @@ package com.networknt.schema.regex; -import java.util.regex.Matcher; import java.util.regex.Pattern; class JDKRegularExpression implements RegularExpression { private final Pattern pattern; - private final boolean hasStartAnchor; - private final boolean hasEndAnchor; JDKRegularExpression(String regex) { - // The patterns in JSON Schema are not implicitly anchored so we must - // use Matcher.find(). However, this method does not honor the end - // anchor when immediately preceded by a quantifier (e.g., ?, *, +). - // To make this work in all cases, we wrap the pattern in a group. - this.hasStartAnchor = '^' == regex.charAt(0); - this.hasEndAnchor = '$' == regex.charAt(regex.length() - 1); - String pattern = regex; - if (this.hasEndAnchor) { - pattern = pattern.substring(this.hasStartAnchor ? 1 : 0, pattern.length() - 1); - pattern = '(' + pattern + ")$"; - if (this.hasStartAnchor) pattern = '^' + pattern; - } - this.pattern = Pattern.compile(pattern); + this.pattern = Pattern.compile(regex); } @Override public boolean matches(String value) { - Matcher matcher = this.pattern.matcher(value); - return matcher.find() && (!this.hasStartAnchor || 0 == matcher.start()) && (!this.hasEndAnchor || matcher.end() == value.length()); + return this.pattern.matcher(value).find(); } - } \ No newline at end of file diff --git a/src/main/java/com/networknt/schema/regex/JoniRegularExpression.java b/src/main/java/com/networknt/schema/regex/JoniRegularExpression.java index 19b529515..bad42510d 100644 --- a/src/main/java/com/networknt/schema/regex/JoniRegularExpression.java +++ b/src/main/java/com/networknt/schema/regex/JoniRegularExpression.java @@ -21,7 +21,7 @@ class JoniRegularExpression implements RegularExpression { .replace("\\S", "[^ \\f\\n\\r\\t\\v\\u00a0\\u1680\\u2000-\\u200a\\u2028\\u2029\\u202f\\u205f\\u3000\\ufeff]"); byte[] bytes = s.getBytes(StandardCharsets.UTF_8); - this.pattern = new Regex(bytes, 0, bytes.length, Option.NONE, UTF8Encoding.INSTANCE, Syntax.ECMAScript); + this.pattern = new Regex(bytes, 0, bytes.length, Option.SINGLELINE, UTF8Encoding.INSTANCE, Syntax.ECMAScript); } @Override diff --git a/src/test/java/com/networknt/schema/regex/Issue814Test.java b/src/test/java/com/networknt/schema/regex/Issue814Test.java index a55cef222..15ab31ce9 100644 --- a/src/test/java/com/networknt/schema/regex/Issue814Test.java +++ b/src/test/java/com/networknt/schema/regex/Issue814Test.java @@ -8,40 +8,62 @@ class Issue814Test { @Test void jdkTypePattern() { - JDKRegularExpression ex = new JDKRegularExpression("^list|date|time|string|enum|int|double|long|boolean|number$"); + JDKRegularExpression ex = new JDKRegularExpression("^(list|date|time|string|enum|int|double|long|boolean|number)$"); assertTrue(ex.matches("list")); assertTrue(ex.matches("string")); assertTrue(ex.matches("boolean")); assertTrue(ex.matches("number")); assertTrue(ex.matches("enum")); + assertFalse(ex.matches("listZ")); + assertFalse(ex.matches("AenumZ")); + assertFalse(ex.matches("Anumber")); } @Test void jdkOptionsPattern() { - JDKRegularExpression ex = new JDKRegularExpression("^\\d*|[a-zA-Z_]+$"); - assertTrue(ex.matches("external")); - assertTrue(ex.matches("external_gte")); - assertTrue(ex.matches("force")); - assertTrue(ex.matches("internal")); + JDKRegularExpression ex = new JDKRegularExpression("^\\d|[a-zA-Z_]$"); + assertTrue(ex.matches("5")); + assertTrue(ex.matches("55")); + assertTrue(ex.matches("5%")); + assertTrue(ex.matches("a")); + assertTrue(ex.matches("aa")); + assertTrue(ex.matches("%a")); + assertTrue(ex.matches("%_")); + assertTrue(ex.matches("55aa")); + assertTrue(ex.matches("5%%a")); + assertFalse(ex.matches("")); + assertFalse(ex.matches("%")); + assertFalse(ex.matches("a5")); } @Test void joniTypePattern() { - JoniRegularExpression ex = new JoniRegularExpression("^list|date|time|string|enum|int|double|long|boolean|number$"); + JoniRegularExpression ex = new JoniRegularExpression("^(list|date|time|string|enum|int|double|long|boolean|number)$"); assertTrue(ex.matches("list")); assertTrue(ex.matches("string")); assertTrue(ex.matches("boolean")); assertTrue(ex.matches("number")); assertTrue(ex.matches("enum")); + assertFalse(ex.matches("listZ")); + assertFalse(ex.matches("AenumZ")); + assertFalse(ex.matches("Anumber")); } @Test void joniOptionsPattern() { - JoniRegularExpression ex = new JoniRegularExpression("^\\d*|[a-zA-Z_]+$"); - assertTrue(ex.matches("internal")); - assertTrue(ex.matches("external")); - assertTrue(ex.matches("external_gte")); - assertTrue(ex.matches("force")); + JoniRegularExpression ex = new JoniRegularExpression("^\\d|[a-zA-Z_]$"); + assertTrue(ex.matches("5")); + assertTrue(ex.matches("55")); + assertTrue(ex.matches("5%")); + assertTrue(ex.matches("a")); + assertTrue(ex.matches("aa")); + assertTrue(ex.matches("%a")); + assertTrue(ex.matches("%_")); + assertTrue(ex.matches("55aa")); + assertTrue(ex.matches("5%%a")); + assertFalse(ex.matches("")); + assertFalse(ex.matches("%")); + assertFalse(ex.matches("a5")); } } diff --git a/src/test/resources/draft2020-12/issue495.json b/src/test/resources/draft2020-12/issue495.json index abcfec51b..a16a37216 100644 --- a/src/test/resources/draft2020-12/issue495.json +++ b/src/test/resources/draft2020-12/issue495.json @@ -1,30 +1,52 @@ [ { "description": "issue495 using ECMA-262", - "regex": "ecma-262", "schema": { "$schema": "https://json-schema.org/draft/2020-12/schema", - "pattern": "^[a-z]{1,10}$", + "patternProperties": { + "^[a-z]{1,10}$": true, + "(^1$)": true + }, "unevaluatedProperties": false }, "tests": [ { "description": "an expected property name", + "regex": "ecma-262", "data": { "aaa": 3 }, "valid": true }, + { + "description": "another expected property name", + "regex": "jdk", + "data": { "1": 3 }, + "valid": true + }, { "description": "trailing newline", + "regex": "ecma-262", "data": { "aaa\n": 3 }, - "valid": false + "valid": false, + "disabled": true, + "comment": "Test fails" + }, + { + "description": "another trailing newline", + "regex": "jdk", + "data": { "1\n": 3 }, + "valid": false, + "disabled": true, + "comment": "Test fails" }, { "description": "embedded newline", + "regex": "ecma-262", "data": { "aaa\nbbb": 3 }, "valid": false }, { "description": "leading newline", + "regex": "ecma-262", "data": { "\nbbb": 3 }, "valid": false } @@ -32,30 +54,52 @@ }, { "description": "issue495 using Java Pattern", - "regex": "jdk", "schema": { "$schema": "https://json-schema.org/draft/2020-12/schema", - "pattern": "^[a-z]{1,10}$", + "patternProperties": { + "^[a-z]{1,10}$": true, + "(^1$)": true + }, "unevaluatedProperties": false }, "tests": [ { "description": "an expected property name", + "regex": "jdk", "data": { "aaa": 3 }, "valid": true }, + { + "description": "another expected property name", + "regex": "jdk", + "data": { "1": 3 }, + "valid": true + }, { "description": "trailing newline", + "regex": "jdk", "data": { "aaa\n": 3 }, - "valid": false + "valid": false, + "disabled": true, + "comment": "Test fails" + }, + { + "description": "another trailing newline", + "regex": "jdk", + "data": { "1\n": 3 }, + "valid": false, + "disabled": true, + "comment": "Test fails" }, { "description": "embedded newline", + "regex": "jdk", "data": { "aaa\nbbb": 3 }, "valid": false }, { "description": "leading newline", + "regex": "jdk", "data": { "\nbbb": 3 }, "valid": false } diff --git a/src/test/resources/draft2020-12/issue782.json b/src/test/resources/draft2020-12/issue782.json index 4c28a0ec3..1d93f33f9 100644 --- a/src/test/resources/draft2020-12/issue782.json +++ b/src/test/resources/draft2020-12/issue782.json @@ -1,7 +1,6 @@ [ { "description": "issue782 using ECMA-262", - "regex": "ecma-262", "schema": { "$schema": "https://json-schema.org/draft/2020-12/schema", "patternProperties": { @@ -14,31 +13,37 @@ "tests": [ { "description": "regexes may be anchored to the start of the property name, 1", + "regex": "ecma-262", "data": { "x-api-id": 3 }, "valid": true }, { "description": "regexes may be anchored to the start of the property name, 2", + "regex": "ecma-262", "data": { "ax-api-id": 3 }, "valid": false }, { "description": "regexes may be anchored to the end of the property name, 1", + "regex": "ecma-262", "data": { "api-id-y-": 3 }, "valid": true }, { "description": "regexes may be anchored to the end of the property name, 2", + "regex": "ecma-262", "data": { "y-api-id": 3 }, "valid": false }, { "description": "regexes may be anchored to both ends of the property name, 1", + "regex": "ecma-262", "data": { "z-": 3 }, "valid": true }, { "description": "regexes may be anchored to both ends of the property name, 2", + "regex": "ecma-262", "data": { "az-api-id": 3 }, "valid": false } @@ -46,7 +51,6 @@ }, { "description": "issue782 using Java Pattern", - "regex": "jdk", "schema": { "$schema": "https://json-schema.org/draft/2020-12/schema", "patternProperties": { @@ -59,31 +63,37 @@ "tests": [ { "description": "regexes may be anchored to the start of the property name, 1", + "regex": "jdk", "data": { "x-api-id": 3 }, "valid": true }, { "description": "regexes may be anchored to the start of the property name, 2", + "regex": "jdk", "data": { "ax-api-id": 3 }, "valid": false }, { "description": "regexes may be anchored to the end of the property name, 1", + "regex": "jdk", "data": { "api-id-y-": 3 }, "valid": true }, { "description": "regexes may be anchored to the end of the property name, 2", + "regex": "jdk", "data": { "y-api-id": 3 }, "valid": false }, { "description": "regexes may be anchored to both ends of the property name, 1", + "regex": "jdk", "data": { "z-": 3 }, "valid": true }, { "description": "regexes may be anchored to both ends of the property name, 2", + "regex": "jdk", "data": { "az-api-id": 3 }, "valid": false }