Skip to content

Commit

Permalink
Also disallow 0 proportion and clean up tests and validator code
Browse files Browse the repository at this point in the history
Co-authored-by: Johan Bergsma <JPBergsma@users.noreply.github.com>
  • Loading branch information
ml-evs and JPBergsma committed Oct 26, 2021
1 parent 04d6d9a commit 32d2cdf
Show file tree
Hide file tree
Showing 5 changed files with 12 additions and 25 deletions.
6 changes: 3 additions & 3 deletions openapi/openapi.json
Original file line number Diff line number Diff line change
Expand Up @@ -3454,20 +3454,20 @@
},
"chemical_formula_reduced": {
"title": "Chemical Formula Reduced",
"pattern": "^([A-Z][a-z]?\\d*)*$",
"pattern": "^([A-Z][a-z]?([2-9]|[1-9]\\d+)?)+$",
"type": "string",
"description": "The reduced chemical formula for a structure as a string with element symbols and integer chemical proportion numbers.\nThe proportion number MUST be omitted if it is 1.\n\n- **Type**: string\n\n- **Requirements/Conventions**:\n - **Support**: SHOULD be supported by all implementations, i.e., SHOULD NOT be `null`.\n - **Query**: MUST be a queryable property.\n However, support for filters using partial string matching with this property is OPTIONAL (i.e., BEGINS WITH, ENDS WITH, and CONTAINS).\n Intricate queries on formula components are instead suggested to be formulated using set-type filter operators on the multi valued `elements` and `elements_ratios` properties.\n - Element symbols MUST have proper capitalization (e.g., `\"Si\"`, not `\"SI\"` for \"silicon\").\n - Elements MUST be placed in alphabetical order, followed by their integer chemical proportion number.\n - For structures with no partial occupation, the chemical proportion numbers are the smallest integers for which the chemical proportion is exactly correct.\n - For structures with partial occupation, the chemical proportion numbers are integers that within reasonable approximation indicate the correct chemical proportions. The precise details of how to perform the rounding is chosen by the API implementation.\n - No spaces or separators are allowed.\n\n- **Examples**:\n - `\"H2NaO\"`\n - `\"ClNa\"`\n - `\"CCaO3\"`\n\n- **Query examples**:\n - A filter that matches an exactly given formula is `chemical_formula_reduced=\"H2NaO\"`.",
"nullable": true
},
"chemical_formula_hill": {
"title": "Chemical Formula Hill",
"pattern": "^([A-Z][a-z]?\\d*)*$",
"pattern": "^([A-Z][a-z]?([2-9]|[1-9]\\d+)?)+$",
"type": "string",
"description": "The chemical formula for a structure in [Hill form](https://dx.doi.org/10.1021/ja02046a005) with element symbols followed by integer chemical proportion numbers. The proportion number MUST be omitted if it is 1.\n\n- **Type**: string\n\n- **Requirements/Conventions**:\n - **Support**: OPTIONAL support in implementations, i.e., MAY be `null`.\n - **Query**: Support for queries on this property is OPTIONAL.\n If supported, only a subset of the filter features MAY be supported.\n - The overall scale factor of the chemical proportions is chosen such that the resulting values are integers that indicate the most chemically relevant unit of which the system is composed.\n For example, if the structure is a repeating unit cell with four hydrogens and four oxygens that represents two hydroperoxide molecules, `chemical_formula_hill` is `\"H2O2\"` (i.e., not `\"HO\"`, nor `\"H4O4\"`).\n - If the chemical insight needed to ascribe a Hill formula to the system is not present, the property MUST be handled as unset.\n - Element symbols MUST have proper capitalization (e.g., `\"Si\"`, not `\"SI\"` for \"silicon\").\n - Elements MUST be placed in [Hill order](https://dx.doi.org/10.1021/ja02046a005), followed by their integer chemical proportion number.\n Hill order means: if carbon is present, it is placed first, and if also present, hydrogen is placed second.\n After that, all other elements are ordered alphabetically.\n If carbon is not present, all elements are ordered alphabetically.\n - If the system has sites with partial occupation and the total occupations of each element do not all sum up to integers, then the Hill formula SHOULD be handled as unset.\n - No spaces or separators are allowed.\n\n- **Examples**:\n - `\"H2O2\"`\n\n- **Query examples**:\n - A filter that matches an exactly given formula is `chemical_formula_hill=\"H2O2\"`."
},
"chemical_formula_anonymous": {
"title": "Chemical Formula Anonymous",
"pattern": "^([A-Z][a-z]?\\d*)*$",
"pattern": "^([A-Z][a-z]?([2-9]|[1-9]\\d+)?)+$",
"type": "string",
"description": "The anonymous formula is the `chemical_formula_reduced`, but where the elements are instead first ordered by their chemical proportion number, and then, in order left to right, replaced by anonymous symbols A, B, C, ..., Z, Aa, Ba, ..., Za, Ab, Bb, ... and so on.\n\n- **Type**: string\n\n- **Requirements/Conventions**:\n - **Support**: SHOULD be supported by all implementations, i.e., SHOULD NOT be `null`.\n - **Query**: MUST be a queryable property.\n However, support for filters using partial string matching with this property is OPTIONAL (i.e., BEGINS WITH, ENDS WITH, and CONTAINS).\n\n- **Examples**:\n - `\"A2B\"`\n - `\"A42B42C16D12E10F9G5\"`\n\n- **Querying**:\n - A filter that matches an exactly given formula is `chemical_formula_anonymous=\"A2B\"`.",
"nullable": true
Expand Down
15 changes: 0 additions & 15 deletions optimade/models/structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -842,7 +842,6 @@ def check_ordered_formula(cls, v, field):
if v is None:
return v

numbers = re.findall(r"(\d+\.?)+", v)
elements = re.findall(r"[A-Z][a-z]?", v)
expected_elements = sorted(elements)

Expand All @@ -859,10 +858,6 @@ def check_ordered_formula(cls, v, field):
f"Cannot use unknown chemical symbols {[elem for elem in elements if elem not in CHEMICAL_SYMBOLS]} in {field.name!r}"
)

if "1" in numbers:
raise ValueError(
f"Must omit proportion '1' from formula {v} in {field.name!r}"
)
if expected_elements != elements:
order = "Hill" if field.name == "chemical_formula_hill" else "alphabetical"
raise ValueError(
Expand All @@ -879,11 +874,6 @@ def check_anonymous_formula(cls, v):
elements = tuple(re.findall(r"[A-Z][a-z]*", v))
numbers = [int(n.strip()) for n in re.split(r"[A-Z][a-z]*", v) if n.strip()]

if 1 in numbers:
raise ValueError(
f"'chemical_formula_anonymous' {v} must omit proportion '1'"
)

expected_labels = ANONYMOUS_ELEMENTS[: len(elements)]
expected_numbers = sorted(numbers, reverse=True)

Expand All @@ -907,11 +897,6 @@ def check_reduced_formulae(cls, value, field):
# Need to remove leading 1 from split and convert to ints
numbers = [int(n) for n in numbers[1:]]

if 0 in numbers:
raise ValueError(
f"{field.name} {value!r} cannot contain chemical proportion of 0."
)

if sys.version_info[1] >= 9:
gcd = math.gcd(*numbers)
else:
Expand Down
2 changes: 1 addition & 1 deletion optimade/models/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ def anonymous_element_generator():
ANONYMOUS_ELEMENTS = tuple(itertools.islice(anonymous_element_generator(), 150))
""" Returns the first 150 values of the anonymous element generator. """

CHEMICAL_FORMULA_REGEXP = r"^([A-Z][a-z]?([02-9]|[1-9]\d+)?)+$"
CHEMICAL_FORMULA_REGEXP = r"^([A-Z][a-z]?([2-9]|[1-9]\d+)?)+$"

EXTRA_SYMBOLS = ["X", "vacancy"]

Expand Down
10 changes: 5 additions & 5 deletions tests/models/test_structures.py
Original file line number Diff line number Diff line change
Expand Up @@ -84,11 +84,11 @@ def test_bad_structures(bad_structures, mapper):
),
(
{"chemical_formula_anonymous": "A1B1"},
"'chemical_formula_anonymous' A1B1 must omit proportion '1'",
"string does not match regex",
),
(
{"chemical_formula_anonymous": "BC1"},
"'chemical_formula_anonymous' BC1 must omit proportion '1'",
"string does not match regex",
),
(
{"chemical_formula_anonymous": "A9C"},
Expand Down Expand Up @@ -136,11 +136,11 @@ def test_bad_structures(bad_structures, mapper):
),
(
{"chemical_formula_reduced": "Ge1Si"},
"Must omit proportion '1' from formula Ge1Si in 'chemical_formula_reduced'",
"string does not match regex",
),
(
{"chemical_formula_reduced": "GeSi1"},
"Must omit proportion '1' from formula GeSi1 in 'chemical_formula_reduced'",
"string does not match regex",
),
(
{"chemical_formula_reduced": "SiGe2"},
Expand Down Expand Up @@ -180,7 +180,7 @@ def test_bad_structures(bad_structures, mapper):
),
(
{"chemical_formula_anonymous": "A44B15C9D4E3F2GHI0J0K0L0"},
"chemical_formula_anonymous 'A44B15C9D4E3F2GHI0J0K0L0' cannot contain chemical proportion of 0.",
"string does not match regex",
),
)

Expand Down
4 changes: 3 additions & 1 deletion tests/models/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,6 @@ class DummyModel(BaseModel):
"LiP5",
"Jn7Qb4", # Regexp does not care about the actual existence of elements
"A5B213CeD3E65F12G",
"A1Be2",
)

bad_formulae = (
Expand All @@ -102,6 +101,9 @@ class DummyModel(BaseModel):
"Ag Cl",
"abcd",
"6F7G",
"A0Be2",
"A1Be2",
"",
)

for formula in good_formulae:
Expand Down

0 comments on commit 32d2cdf

Please sign in to comment.