From 8a6eaff53eadca000a2c40624b355d83d89f6656 Mon Sep 17 00:00:00 2001 From: Kapil kumar Date: Mon, 27 Mar 2017 19:38:58 +0530 Subject: [PATCH] Validating Ids for duplicate Issue#56 (#98) * Validating duplicate ids are not present Issue#56 --- schema_salad/main.py | 2 +- schema_salad/ref_resolver.py | 27 +++++++++++++++-------- schema_salad/schema.py | 2 +- schema_salad/sourceline.py | 15 ++++++++----- schema_salad/tests/test_errors.py | 5 ++++- schema_salad/tests/test_schema/test12.cwl | 16 ++++++++++++++ schema_salad/tests/test_schema/test13.cwl | 20 +++++++++++++++++ schema_salad/tests/test_schema/test14.cwl | 11 +++++++++ setup.py | 2 +- 9 files changed, 81 insertions(+), 19 deletions(-) create mode 100644 schema_salad/tests/test_schema/test12.cwl create mode 100644 schema_salad/tests/test_schema/test13.cwl create mode 100644 schema_salad/tests/test_schema/test14.cwl diff --git a/schema_salad/main.py b/schema_salad/main.py index 0e2adf5d1..cbe566152 100644 --- a/schema_salad/main.py +++ b/schema_salad/main.py @@ -139,7 +139,7 @@ def main(argsl=None): # type: (List[str]) -> int try: schema.validate_doc(metaschema_names, schema_doc, metaschema_loader, args.strict, - source=schema_metadata["name"]) + source=schema_metadata.get("name")) except validate.ValidationException as e: _logger.error("While validating schema `%s`:\n%s" % (args.schema, str(e))) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index d9a8bbf07..d04001d1c 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -822,7 +822,8 @@ def resolve_all(self, loader.idx[metadata[identifer]] = document if checklinks: - self.validate_links(document, u"") + all_doc_ids={} # type: Dict[Text, Text] + self.validate_links(document, u"", all_doc_ids) return document, metadata @@ -877,8 +878,8 @@ def validate_scoped(self, field, link, docid): raise validate.ValidationException( "Field `%s` references unknown identifier `%s`, tried %s" % (field, link, ", ".join(tried))) - def validate_link(self, field, link, docid): - # type: (unicode, FieldType, unicode) -> FieldType + def validate_link(self, field, link, docid, all_doc_ids): + # type: (unicode, FieldType, unicode, Dict[Text, Text]) -> FieldType if field in self.nolinkcheck: return link if isinstance(link, (str, unicode)): @@ -901,14 +902,14 @@ def validate_link(self, field, link, docid): errors = [] for n, i in enumerate(link): try: - link[n] = self.validate_link(field, i, docid) + link[n] = self.validate_link(field, i, docid, all_doc_ids) except validate.ValidationException as v: errors.append(v) if bool(errors): raise validate.ValidationException( "\n".join([unicode(e) for e in errors])) elif isinstance(link, CommentedMap): - self.validate_links(link, docid) + self.validate_links(link, docid, all_doc_ids) else: raise validate.ValidationException( "`%s` field is %s, expected string, list, or a dict." @@ -924,8 +925,8 @@ def getid(self, d): # type: (Any) -> Optional[Text] return idd return None - def validate_links(self, document, base_url): - # type: (Union[CommentedMap, CommentedSeq, unicode, None], unicode) -> None + def validate_links(self, document, base_url, all_doc_ids): + # type: (Union[CommentedMap, CommentedSeq, unicode, None], unicode, Dict[Text, Text]) -> None docid = self.getid(document) if not docid: docid = base_url @@ -939,7 +940,15 @@ def validate_links(self, document, base_url): for d in self.url_fields: sl = SourceLine(document, d, validate.ValidationException) if d in document and d not in self.identity_links: - document[d] = self.validate_link(d, document[d], docid) + document[d] = self.validate_link(d, document[d], docid, all_doc_ids) + for identifier in self.identifiers: # validate that each id is defined uniquely + if identifier in document: + sl = SourceLine(document, identifier, validate.ValidationException) + if document[identifier] in all_doc_ids and sl.makeLead() != all_doc_ids[document[identifier]]: + raise validate.ValidationException( + "%s object %s `%s` previously defined" % (all_doc_ids[document[identifier]], identifier, relname(document[identifier]), )) + else: + all_doc_ids[document[identifier]] = sl.makeLead() except validate.ValidationException as v: errors.append(sl.makeError(unicode(v))) if hasattr(document, "iteritems"): @@ -952,7 +961,7 @@ def validate_links(self, document, base_url): for key, val in iterator: sl = SourceLine(document, key, validate.ValidationException) try: - self.validate_links(val, docid) + self.validate_links(val, docid, all_doc_ids) except validate.ValidationException as v: if key not in self.nolinkcheck: docid2 = self.getid(val) diff --git a/schema_salad/schema.py b/schema_salad/schema.py index f961fae62..6f55d6fd9 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -242,7 +242,7 @@ def load_and_validate(document_loader, # type: Loader validationErrors = u"" try: - document_loader.validate_links(data, u"") + document_loader.validate_links(data, u"", {}) except validate.ValidationException as v: validationErrors = unicode(v) + "\n" diff --git a/schema_salad/sourceline.py b/schema_salad/sourceline.py index 4c20e6a0f..6ab2a8838 100644 --- a/schema_salad/sourceline.py +++ b/schema_salad/sourceline.py @@ -149,18 +149,21 @@ def __exit__(self, return raise self.makeError(unicode(exc_value)) - def makeError(self, msg): # type: (Text) -> Any - if not isinstance(self.item, ruamel.yaml.comments.CommentedBase): - return self.raise_type(msg) - errs = [] + def makeLead(self): # type: () -> Text if self.key is None or self.item.lc.data is None or self.key not in self.item.lc.data: - lead = "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "", + return "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "", (self.item.lc.line or 0)+1, (self.item.lc.col or 0)+1) else: - lead = "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "", + return "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "", (self.item.lc.data[self.key][0] or 0)+1, (self.item.lc.data[self.key][1] or 0)+1) + + def makeError(self, msg): # type: (Text) -> Any + if not isinstance(self.item, ruamel.yaml.comments.CommentedBase): + return self.raise_type(msg) + errs = [] + lead = self.makeLead() for m in msg.splitlines(): if bool(lineno_re.match(m)): errs.append(m) diff --git a/schema_salad/tests/test_errors.py b/schema_salad/tests/test_errors.py index 44b712d6b..be0bfc530 100644 --- a/schema_salad/tests/test_errors.py +++ b/schema_salad/tests/test_errors.py @@ -21,7 +21,10 @@ def test_errors(self): "test_schema/test8.cwl", "test_schema/test9.cwl", "test_schema/test10.cwl", - "test_schema/test11.cwl"): + "test_schema/test11.cwl", + "test_schema/test12.cwl", + "test_schema/test13.cwl", + "test_schema/test14.cwl"): with self.assertRaises(ValidationException): try: load_and_validate(document_loader, avsc_names, diff --git a/schema_salad/tests/test_schema/test12.cwl b/schema_salad/tests/test_schema/test12.cwl new file mode 100644 index 000000000..d994e7cbc --- /dev/null +++ b/schema_salad/tests/test_schema/test12.cwl @@ -0,0 +1,16 @@ +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: echo +inputs: + - id: example_flag + type: boolean + inputBinding: + position: 1 + prefix: -f + - id: example_flag + type: int + inputBinding: + position: 3 + prefix: --example-string + +outputs: [] diff --git a/schema_salad/tests/test_schema/test13.cwl b/schema_salad/tests/test_schema/test13.cwl new file mode 100644 index 000000000..caa274d2a --- /dev/null +++ b/schema_salad/tests/test_schema/test13.cwl @@ -0,0 +1,20 @@ +cwlVersion: v1.0 +class: Workflow +inputs: + example_flag: + type: boolean + inputBinding: + position: 1 + prefix: -f + +outputs: [] + +steps: + example_flag: + in: [] + out: [] + run: + id: blah + class: CommandLineTool + inputs: [] + outputs: [] \ No newline at end of file diff --git a/schema_salad/tests/test_schema/test14.cwl b/schema_salad/tests/test_schema/test14.cwl new file mode 100644 index 000000000..729ee83df --- /dev/null +++ b/schema_salad/tests/test_schema/test14.cwl @@ -0,0 +1,11 @@ +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: echo +inputs: + example_flag: + type: boolean + inputBinding: + position: 1 + prefix: -f +outputs: + example_flag: int diff --git a/setup.py b/setup.py index 486601ec9..25b4042e0 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', - version='2.4', + version='2.5', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group',