From 14cf5e21629932dbb5c764452714b621b2e23bf9 Mon Sep 17 00:00:00 2001 From: Matthew Evans Date: Mon, 25 Oct 2021 14:07:02 +0100 Subject: [PATCH] Also disallow 0 proportion and clean up tests and validator code Co-authored-by: Johan Bergsma --- openapi/openapi.json | 6 +++--- optimade/models/structures.py | 15 --------------- optimade/models/utils.py | 2 +- tests/models/test_structures.py | 10 +++++----- tests/models/test_utils.py | 4 +++- 5 files changed, 12 insertions(+), 25 deletions(-) diff --git a/openapi/openapi.json b/openapi/openapi.json index 9b96b8aba..ebfec98fb 100644 --- a/openapi/openapi.json +++ b/openapi/openapi.json @@ -3454,20 +3454,20 @@ }, "chemical_formula_reduced": { "title": "Chemical Formula Reduced", - "pattern": "^([A-Z][a-z]?\\d*)*$", + "pattern": "^([A-Z][a-z]?([2-9]|[1-9]\\d+)?)+$", "type": "string", "description": "The reduced chemical formula for a structure as a string with element symbols and integer chemical proportion numbers.\nThe proportion number MUST be omitted if it is 1.\n\n- **Type**: string\n\n- **Requirements/Conventions**:\n - **Support**: SHOULD be supported by all implementations, i.e., SHOULD NOT be `null`.\n - **Query**: MUST be a queryable property.\n However, support for filters using partial string matching with this property is OPTIONAL (i.e., BEGINS WITH, ENDS WITH, and CONTAINS).\n Intricate queries on formula components are instead suggested to be formulated using set-type filter operators on the multi valued `elements` and `elements_ratios` properties.\n - Element symbols MUST have proper capitalization (e.g., `\"Si\"`, not `\"SI\"` for \"silicon\").\n - Elements MUST be placed in alphabetical order, followed by their integer chemical proportion number.\n - For structures with no partial occupation, the chemical proportion numbers are the smallest integers for which the chemical proportion is exactly correct.\n - For structures with partial occupation, the chemical proportion numbers are integers that within reasonable approximation indicate the correct chemical proportions. The precise details of how to perform the rounding is chosen by the API implementation.\n - No spaces or separators are allowed.\n\n- **Examples**:\n - `\"H2NaO\"`\n - `\"ClNa\"`\n - `\"CCaO3\"`\n\n- **Query examples**:\n - A filter that matches an exactly given formula is `chemical_formula_reduced=\"H2NaO\"`.", "nullable": true }, "chemical_formula_hill": { "title": "Chemical Formula Hill", - "pattern": "^([A-Z][a-z]?\\d*)*$", + "pattern": "^([A-Z][a-z]?([2-9]|[1-9]\\d+)?)+$", "type": "string", "description": "The chemical formula for a structure in [Hill form](https://dx.doi.org/10.1021/ja02046a005) with element symbols followed by integer chemical proportion numbers. The proportion number MUST be omitted if it is 1.\n\n- **Type**: string\n\n- **Requirements/Conventions**:\n - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.\n - **Query**: Support for queries on this property is OPTIONAL.\n If supported, only a subset of the filter features MAY be supported.\n - The overall scale factor of the chemical proportions is chosen such that the resulting values are integers that indicate the most chemically relevant unit of which the system is composed.\n For example, if the structure is a repeating unit cell with four hydrogens and four oxygens that represents two hydroperoxide molecules, `chemical_formula_hill` is `\"H2O2\"` (i.e., not `\"HO\"`, nor `\"H4O4\"`).\n - If the chemical insight needed to ascribe a Hill formula to the system is not present, the property MUST be handled as unset.\n - Element symbols MUST have proper capitalization (e.g., `\"Si\"`, not `\"SI\"` for \"silicon\").\n - Elements MUST be placed in [Hill order](https://dx.doi.org/10.1021/ja02046a005), followed by their integer chemical proportion number.\n Hill order means: if carbon is present, it is placed first, and if also present, hydrogen is placed second.\n After that, all other elements are ordered alphabetically.\n If carbon is not present, all elements are ordered alphabetically.\n - If the system has sites with partial occupation and the total occupations of each element do not all sum up to integers, then the Hill formula SHOULD be handled as unset.\n - No spaces or separators are allowed.\n\n- **Examples**:\n - `\"H2O2\"`\n\n- **Query examples**:\n - A filter that matches an exactly given formula is `chemical_formula_hill=\"H2O2\"`." }, "chemical_formula_anonymous": { "title": "Chemical Formula Anonymous", - "pattern": "^([A-Z][a-z]?\\d*)*$", + "pattern": "^([A-Z][a-z]?([2-9]|[1-9]\\d+)?)+$", "type": "string", "description": "The anonymous formula is the `chemical_formula_reduced`, but where the elements are instead first ordered by their chemical proportion number, and then, in order left to right, replaced by anonymous symbols A, B, C, ..., Z, Aa, Ba, ..., Za, Ab, Bb, ... and so on.\n\n- **Type**: string\n\n- **Requirements/Conventions**:\n - **Support**: SHOULD be supported by all implementations, i.e., SHOULD NOT be `null`.\n - **Query**: MUST be a queryable property.\n However, support for filters using partial string matching with this property is OPTIONAL (i.e., BEGINS WITH, ENDS WITH, and CONTAINS).\n\n- **Examples**:\n - `\"A2B\"`\n - `\"A42B42C16D12E10F9G5\"`\n\n- **Querying**:\n - A filter that matches an exactly given formula is `chemical_formula_anonymous=\"A2B\"`.", "nullable": true diff --git a/optimade/models/structures.py b/optimade/models/structures.py index f07fcba0b..62a39f46e 100644 --- a/optimade/models/structures.py +++ b/optimade/models/structures.py @@ -842,7 +842,6 @@ def check_ordered_formula(cls, v, field): if v is None: return v - numbers = re.findall(r"(\d+\.?)+", v) elements = re.findall(r"[A-Z][a-z]?", v) expected_elements = sorted(elements) @@ -859,10 +858,6 @@ def check_ordered_formula(cls, v, field): f"Cannot use unknown chemical symbols {[elem for elem in elements if elem not in CHEMICAL_SYMBOLS]} in {field.name!r}" ) - if "1" in numbers: - raise ValueError( - f"Must omit proportion '1' from formula {v} in {field.name!r}" - ) if expected_elements != elements: order = "Hill" if field.name == "chemical_formula_hill" else "alphabetical" raise ValueError( @@ -879,11 +874,6 @@ def check_anonymous_formula(cls, v): elements = tuple(re.findall(r"[A-Z][a-z]*", v)) numbers = [int(n.strip()) for n in re.split(r"[A-Z][a-z]*", v) if n.strip()] - if 1 in numbers: - raise ValueError( - f"'chemical_formula_anonymous' {v} must omit proportion '1'" - ) - expected_labels = ANONYMOUS_ELEMENTS[: len(elements)] expected_numbers = sorted(numbers, reverse=True) @@ -907,11 +897,6 @@ def check_reduced_formulae(cls, value, field): # Need to remove leading 1 from split and convert to ints numbers = [int(n) for n in numbers[1:]] - if 0 in numbers: - raise ValueError( - f"{field.name} {value!r} cannot contain chemical proportion of 0." - ) - if sys.version_info[1] >= 9: gcd = math.gcd(*numbers) else: diff --git a/optimade/models/utils.py b/optimade/models/utils.py index e2351175e..79868879c 100644 --- a/optimade/models/utils.py +++ b/optimade/models/utils.py @@ -202,7 +202,7 @@ def anonymous_element_generator(): ANONYMOUS_ELEMENTS = tuple(itertools.islice(anonymous_element_generator(), 150)) """ Returns the first 150 values of the anonymous element generator. """ -CHEMICAL_FORMULA_REGEXP = r"^([A-Z][a-z]?([02-9]|[1-9]\d+)?)+$" +CHEMICAL_FORMULA_REGEXP = r"^([A-Z][a-z]?([2-9]|[1-9]\d+)?)+$" EXTRA_SYMBOLS = ["X", "vacancy"] diff --git a/tests/models/test_structures.py b/tests/models/test_structures.py index 4fa911188..09e9a626b 100644 --- a/tests/models/test_structures.py +++ b/tests/models/test_structures.py @@ -84,11 +84,11 @@ def test_bad_structures(bad_structures, mapper): ), ( {"chemical_formula_anonymous": "A1B1"}, - "'chemical_formula_anonymous' A1B1 must omit proportion '1'", + "string does not match regex", ), ( {"chemical_formula_anonymous": "BC1"}, - "'chemical_formula_anonymous' BC1 must omit proportion '1'", + "string does not match regex", ), ( {"chemical_formula_anonymous": "A9C"}, @@ -136,11 +136,11 @@ def test_bad_structures(bad_structures, mapper): ), ( {"chemical_formula_reduced": "Ge1Si"}, - "Must omit proportion '1' from formula Ge1Si in 'chemical_formula_reduced'", + "string does not match regex", ), ( {"chemical_formula_reduced": "GeSi1"}, - "Must omit proportion '1' from formula GeSi1 in 'chemical_formula_reduced'", + "string does not match regex", ), ( {"chemical_formula_reduced": "SiGe2"}, @@ -180,7 +180,7 @@ def test_bad_structures(bad_structures, mapper): ), ( {"chemical_formula_anonymous": "A44B15C9D4E3F2GHI0J0K0L0"}, - "chemical_formula_anonymous 'A44B15C9D4E3F2GHI0J0K0L0' cannot contain chemical proportion of 0.", + "string does not match regex", ), ) diff --git a/tests/models/test_utils.py b/tests/models/test_utils.py index 61d10d471..adf322167 100644 --- a/tests/models/test_utils.py +++ b/tests/models/test_utils.py @@ -93,7 +93,6 @@ class DummyModel(BaseModel): "LiP5", "Jn7Qb4", # Regexp does not care about the actual existence of elements "A5B213CeD3E65F12G", - "A1Be2", ) bad_formulae = ( @@ -102,6 +101,9 @@ class DummyModel(BaseModel): "Ag Cl", "abcd", "6F7G", + "A0Be2", + "A1Be2", + "", ) for formula in good_formulae: