Skip to content

Commit

Permalink
OcrdMets.add_file: Enforce mets:fileGrp/@use to be valid xsd:int, #746
Browse files Browse the repository at this point in the history
  • Loading branch information
kba committed Dec 7, 2021
1 parent 93b4d14 commit f291b38
Show file tree
Hide file tree
Showing 4 changed files with 16 additions and 4 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,10 @@ Versioned according to [Semantic Versioning](http://semver.org/).

## Unreleased

Changed:

* `OcrdMets.add_file`: `mets:fileGrp/@USE` must be valid `xs:ID`, #746

## [2.28.0] - 2021-11-30

Added:
Expand Down
4 changes: 3 additions & 1 deletion ocrd_models/ocrd_models/ocrd_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -296,7 +296,9 @@ def add_file(self, fileGrp, mimetype=None, url=None, ID=None, pageId=None, force
if not fileGrp:
raise ValueError("Must set fileGrp of the mets:file")
if not REGEX_FILE_ID.fullmatch(ID):
raise ValueError("Invalid syntax for mets:file/@ID %s" % ID)
raise ValueError("Invalid syntax for mets:file/@ID %s (not an xs:ID)" % ID)
if not REGEX_FILE_ID.fullmatch(fileGrp):
raise ValueError("Invalid syntax for mets:fileGrp/@USE %s (not an xs:ID)" % ID)
el_fileGrp = self._tree.getroot().find(".//mets:fileGrp[@USE='%s']" % (fileGrp), NS)
if el_fileGrp is None:
el_fileGrp = self.add_file_group(fileGrp)
Expand Down
6 changes: 6 additions & 0 deletions tests/model/test_ocrd_mets.py
Original file line number Diff line number Diff line change
Expand Up @@ -255,5 +255,11 @@ def test_merge(self):
self.mets.merge(other_mets, fileGrp_mapping={'OCR-D-IMG': 'FOO'})
assert len(self.mets.file_groups) == 18

def test_invalid_filegrp(self):
"""https://github.com/OCR-D/core/issues/746"""
mets = OcrdMets(content="<mets></mets>")
with self.assertRaisesRegex(ValueError, "Invalid syntax for mets:fileGrp/@USE"):
mets.add_file('1:! bad filegrp', ID="foo123", pageId="foobar")

if __name__ == '__main__':
main(__file__)
6 changes: 3 additions & 3 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -289,9 +289,9 @@ def test_make_file_id_570(self):
def test_make_file_id_605(self):
"""https://github.com/OCR-D/core/pull/605"""
mets = OcrdMets.empty_mets()
f = mets.add_file('1:!GRP', ID='FOO_0001', pageId='phys0001')
f = mets.add_file('2:!GRP', ID='FOO_0002', pageId='phys0002')
self.assertEqual(make_file_id(f, '2:!GRP'), 'id_2_GRP_0002')
f = mets.add_file('GRP1', ID='FOO_0001', pageId='phys0001')
f = mets.add_file('GRP2', ID='FOO_0002', pageId='phys0002')
self.assertEqual(make_file_id(f, 'GRP2'), 'GRP2_0002')

def test_generate_range(self):
assert generate_range('PHYS_0001', 'PHYS_0005') == ['PHYS_0001', 'PHYS_0002', 'PHYS_0003', 'PHYS_0004', 'PHYS_0005']
Expand Down

0 comments on commit f291b38

Please sign in to comment.