From b1d7b6907854ccfffffae388879b1a488f52008c Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 15 Aug 2016 16:59:32 -0400 Subject: [PATCH 001/116] Support http/https existence check (using HTTP HEAD) in check_file. --- schema_salad/ref_resolver.py | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index b3ab9b8b9..ca6dc8314 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -661,12 +661,21 @@ def fetch(self, url, inject_ids=True): # type: (unicode, bool) -> Any self.idx[url] = result return result - def check_file(self, fn): # type: (unicode) -> bool - if fn.startswith("file://"): - u = urlparse.urlsplit(fn) - return os.path.exists(u.path) + def check_file(self, url): # type: (unicode) -> bool + split = urlparse.urlsplit(url) + scheme, path = split.scheme, split.path + + if scheme in [u'http', u'https'] and self.session: + try: + resp = self.session.head(url) + resp.raise_for_status() + except Exception as e: + return False + return True + elif scheme == 'file': + return os.path.exists(path) else: - return False + raise ValueError('Unsupported scheme in url: %s' % url) FieldType = TypeVar('FieldType', unicode, List[unicode], Dict[unicode, Any]) From 9b6a6be31e97600c60a8335e84bef1a3873b9f9d Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Tue, 6 Sep 2016 16:08:34 +0300 Subject: [PATCH 002/116] demote log about external schema --- Makefile | 2 +- schema_salad/ref_resolver.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index ba220d3c5..f5a824066 100644 --- a/Makefile +++ b/Makefile @@ -26,7 +26,7 @@ MODULE=schema_salad # `SHELL=bash` Will break Titus's laptop, so don't use BASH-isms like # `[[` conditional expressions. PYSOURCES=$(wildcard ${MODULE}/**.py tests/*.py) setup.py -DEVPKGS=pep8 diff_cover autopep8 pylint coverage pep257 pytest +DEVPKGS=pep8 diff_cover autopep8 pylint coverage pep257 pytest flake8 VERSION=$(shell git describe --tags --dirty | sed s/v//) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index e23cbf30f..087d02def 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -181,7 +181,7 @@ def add_schemas(self, ns, base_url): for sch in aslist(ns): fetchurl = urlparse.urljoin(base_url, sch) if fetchurl not in self.cache: - _logger.info("Getting external schema %s", fetchurl) + _logger.debug("Getting external schema %s", fetchurl) content = self.fetch_text(fetchurl) self.cache[fetchurl] = rdflib.graph.Graph() for fmt in ['xml', 'turtle', 'rdfa']: From 1728fd4f6d5eb6acb38af98124ba6567b2f3dd4d Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Tue, 6 Sep 2016 16:12:39 +0300 Subject: [PATCH 003/116] fix types --- schema_salad/ref_resolver.py | 2 +- typeshed/2.7/requests/sessions.pyi | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index b03cc5113..466746442 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -189,7 +189,7 @@ def add_schemas(self, ns, base_url): self.cache[fetchurl].parse(data=content, format=fmt) self.graph += self.cache[fetchurl] break - except xml.sax.SAXParseException: # type: ignore + except xml.sax.SAXParseException: pass except TypeError: pass diff --git a/typeshed/2.7/requests/sessions.pyi b/typeshed/2.7/requests/sessions.pyi index 7c51e9736..b9f49cbb1 100644 --- a/typeshed/2.7/requests/sessions.pyi +++ b/typeshed/2.7/requests/sessions.pyi @@ -78,7 +78,7 @@ class Session(SessionRedirectMixin): json=...) -> Response: ... def get(self, url: AnyStr, **kwargs) -> Response: ... def options(self, url: str, **kwargs) -> Response: ... - def head(self, url: str, **kwargs) -> Response: ... + def head(self, url: AnyStr, **kwargs) -> Response: ... def post(self, url: str, data=..., json=..., **kwargs) -> Response: ... def put(self, url: str, data=..., **kwargs) -> Response: ... def patch(self, url: str, data=..., **kwargs) -> Response: ... From 01dd303a59d5ffee41cd433171e15f10fb196fc6 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 30 Sep 2016 10:56:50 -0400 Subject: [PATCH 004/116] Optimize validate (#60) Validation can now either fail fast (raise_ex=False) or fail with explanation (raise_ex=True). Improve validation performance by using fail fast when checking unions. Optimize record validation by performing explicit check for "class" first. Improve validation error reporting for unions when one of the types is an exact match for class by suppressing errors for other types in the union. Other changes: * Use unicode strings more consistently. * makedoc applies table styling * Add version constraint to lockfile package dependency --- schema_salad/makedoc.py | 6 ++ schema_salad/schema.py | 48 +++++++---- schema_salad/validate.py | 178 +++++++++++++++++++++++++++------------ setup.py | 4 +- 4 files changed, 165 insertions(+), 71 deletions(-) diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index 7cdc1162d..5303c818b 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -49,6 +49,12 @@ def __init__(self): # type: () -> None def header(self, text, level, raw=None): return """%s""" % (level, to_id(text), text, level) + def table(self, header, body): + return ( + '\n%s\n' + '\n%s\n
\n' + ) % (header, body) + def to_id(text): # type: (Union[str, unicode]) -> Union[str, unicode] textid = text diff --git a/schema_salad/schema.py b/schema_salad/schema.py index 11a227b55..f3172892b 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -233,35 +233,49 @@ def validate_doc(schema_names, doc, loader, strict): else: raise validate.ValidationException("Document must be dict or list") + roots = [] + for r in schema_names.names.values(): + if ((hasattr(r, "get_prop") and r.get_prop(u"documentRoot")) or ( + r.props.get(u"documentRoot"))): + roots.append(r) + anyerrors = [] for pos, item in enumerate(validate_doc): - errors = [] success = False - for r in schema_names.names.values(): - if ((hasattr(r, "get_prop") and r.get_prop(u"documentRoot")) or ( - u"documentRoot" in r.props)): + for r in roots: + success = validate.validate_ex( + r, item, loader.identifiers, strict, foreign_properties=loader.foreign_properties, raise_ex=False) + if success: + break + + if not success: + errors = [] # type: List[unicode] + for r in roots: + if hasattr(r, "get_prop"): + name = r.get_prop(u"name") + elif hasattr(r, "name"): + name = r.name + try: validate.validate_ex( - r, item, loader.identifiers, strict, foreign_properties=loader.foreign_properties) - success = True + r, item, loader.identifiers, strict, foreign_properties=loader.foreign_properties, raise_ex=True) + except validate.ClassValidationException as e: + errors = [u"Could not validate `%s` because\n%s" % ( + name, validate.indent(str(e), nolead=False))] break except validate.ValidationException as e: - if hasattr(r, "get_prop"): - name = r.get_prop(u"name") - elif hasattr(r, "name"): - name = r.name - errors.append("Could not validate as `%s` because\n%s" % ( + errors.append(u"Could not validate as `%s` because\n%s" % ( name, validate.indent(str(e), nolead=False))) - if not success: - objerr = "Validation error at position %i" % pos + + objerr = u"Validation error at position %i" % pos for ident in loader.identifiers: if ident in item: - objerr = "Validation error in object %s" % (item[ident]) + objerr = u"Validation error in object %s" % (item[ident]) break - anyerrors.append("%s\n%s" % - (objerr, validate.indent("\n".join(errors)))) + anyerrors.append(u"%s\n%s" % + (objerr, validate.indent(u"\n".join(errors)))) if anyerrors: - raise validate.ValidationException("\n".join(anyerrors)) + raise validate.ValidationException(u"\n".join(anyerrors)) def replace_type(items, spec, loader, found): diff --git a/schema_salad/validate.py b/schema_salad/validate.py index 90b02e97f..5cb9032e7 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -2,28 +2,28 @@ import avro.schema import sys import urlparse -from typing import Any +from typing import Any, Union class ValidationException(Exception): pass +class ClassValidationException(ValidationException): + pass + def validate(expected_schema, datum, identifiers=set(), strict=False, foreign_properties=set()): # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode]) -> bool - try: - return validate_ex(expected_schema, datum, identifiers, strict=strict, foreign_properties=foreign_properties) - except ValidationException: - return False + return validate_ex(expected_schema, datum, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=False) INT_MIN_VALUE = -(1 << 31) INT_MAX_VALUE = (1 << 31) - 1 LONG_MIN_VALUE = -(1 << 63) LONG_MAX_VALUE = (1 << 63) - 1 -def indent(v, nolead=False): # type: (str, bool) -> str +def indent(v, nolead=False): # type: (Union[str, unicode], bool) -> unicode if nolead: - return v.splitlines()[0] + "\n".join([" " + l for l in v.splitlines()[1:]]) + return v.splitlines()[0] + u"\n".join([u" " + l for l in v.splitlines()[1:]]) else: - return "\n".join([" " + l for l in v.splitlines()]) + return u"\n".join([" " + l for l in v.splitlines()]) def friendly(v): # type: (Any) -> Any if isinstance(v, avro.schema.NamedSchema): @@ -37,11 +37,11 @@ def friendly(v): # type: (Any) -> Any else: return v -def multi(v, q=""): # type: (str, str) -> str +def multi(v, q=""): # type: (Union[str, unicode], Union[str, unicode]) -> unicode if '\n' in v: - return "%s%s%s\n" % (q, v, q) + return u"%s%s%s\n" % (q, v, q) else: - return "%s%s%s" % (q, v, q) + return u"%s%s%s" % (q, v, q) def vpformat(datum): # type: (Any) -> str a = pprint.pformat(datum) @@ -50,8 +50,8 @@ def vpformat(datum): # type: (Any) -> str return a def validate_ex(expected_schema, datum, identifiers=None, strict=False, - foreign_properties=None): - # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode]) -> bool + foreign_properties=None, raise_ex=True): + # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode], bool) -> bool """Determine if a python datum is an instance of a schema.""" if not identifiers: @@ -66,93 +66,154 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, if datum is None: return True else: - raise ValidationException("the value `%s` is not null" % vpformat(datum)) + if raise_ex: + raise ValidationException(u"the value `%s` is not null" % vpformat(datum)) + else: + return False elif schema_type == 'boolean': if isinstance(datum, bool): return True else: - raise ValidationException("the value `%s` is not boolean" % vpformat(datum)) + if raise_ex: + raise ValidationException(u"the value `%s` is not boolean" % vpformat(datum)) + else: + return False elif schema_type == 'string': if isinstance(datum, basestring): return True elif isinstance(datum, bytes): - datum = datum.decode("utf-8") + datum = datum.decode(u"utf-8") return True else: - raise ValidationException("the value `%s` is not string" % vpformat(datum)) + if raise_ex: + raise ValidationException(u"the value `%s` is not string" % vpformat(datum)) + else: + return False elif schema_type == 'bytes': if isinstance(datum, str): return True else: - raise ValidationException("the value `%s` is not bytes" % vpformat(datum)) + if raise_ex: + raise ValidationException(u"the value `%s` is not bytes" % vpformat(datum)) + else: + return False elif schema_type == 'int': if ((isinstance(datum, int) or isinstance(datum, long)) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE): return True else: - raise ValidationException("`%s` is not int" % vpformat(datum)) + if raise_ex: + raise ValidationException(u"`%s` is not int" % vpformat(datum)) + else: + return False elif schema_type == 'long': if ((isinstance(datum, int) or isinstance(datum, long)) and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE): return True else: - raise ValidationException("the value `%s` is not long" % vpformat(datum)) + if raise_ex: + raise ValidationException(u"the value `%s` is not long" % vpformat(datum)) + else: + return False elif schema_type in ['float', 'double']: if (isinstance(datum, int) or isinstance(datum, long) or isinstance(datum, float)): return True else: - raise ValidationException("the value `%s` is not float or double" % vpformat(datum)) + if raise_ex: + raise ValidationException(u"the value `%s` is not float or double" % vpformat(datum)) + else: + return False elif isinstance(expected_schema, avro.schema.FixedSchema): if isinstance(datum, str) and len(datum) == expected_schema.size: return True else: - raise ValidationException("the value `%s` is not fixed" % vpformat(datum)) + if raise_ex: + raise ValidationException(u"the value `%s` is not fixed" % vpformat(datum)) + else: + return False elif isinstance(expected_schema, avro.schema.EnumSchema): if expected_schema.name == "Any": if datum is not None: return True else: - raise ValidationException("Any type must be non-null") + if raise_ex: + raise ValidationException(u"'Any' type must be non-null") + else: + return False if datum in expected_schema.symbols: return True else: - raise ValidationException("the value `%s`\n is not a valid symbol in enum %s, expected one of %s" % (vpformat(datum), expected_schema.name, "'" + "', '".join(expected_schema.symbols) + "'")) + if raise_ex: + raise ValidationException(u"the value `%s`\n is not a valid symbol in enum %s, expected one of %s" % (vpformat(datum), expected_schema.name, "'" + "', '".join(expected_schema.symbols) + "'")) + else: + return False elif isinstance(expected_schema, avro.schema.ArraySchema): if isinstance(datum, list): for i, d in enumerate(datum): try: - validate_ex(expected_schema.items, d, identifiers, strict=strict, foreign_properties=foreign_properties) + if not validate_ex(expected_schema.items, d, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=raise_ex): + return False except ValidationException as v: - raise ValidationException("At position %i\n%s" % (i, indent(str(v)))) - return True - else: - raise ValidationException("the value `%s` is not a list, expected list of %s" % (vpformat(datum), friendly(expected_schema.items))) - elif isinstance(expected_schema, avro.schema.MapSchema): - if (isinstance(datum, dict) and - False not in [isinstance(k, basestring) for k in datum.keys()] and - False not in [validate(expected_schema.values, v, strict=strict) for v in datum.values()]): + if raise_ex: + raise ValidationException(u"At position %i\n%s" % (i, indent(str(v)))) + else: + return False return True else: - raise ValidationException("`%s` is not a valid map value, expected\n %s" % (vpformat(datum), vpformat(expected_schema.values))) + if raise_ex: + raise ValidationException(u"the value `%s` is not a list, expected list of %s" % (vpformat(datum), friendly(expected_schema.items))) + else: + return False elif isinstance(expected_schema, avro.schema.UnionSchema): - if True in [validate(s, datum, identifiers, strict=strict) for s in expected_schema.schemas]: - return True - else: - errors = [] - for s in expected_schema.schemas: - try: - validate_ex(s, datum, identifiers, strict=strict, foreign_properties=foreign_properties) - except ValidationException as e: - errors.append(str(e)) - raise ValidationException("the value %s is not a valid type in the union, expected one of:\n%s" % (multi(vpformat(datum), '`'), "\n".join(["- %s, but\n %s" % (friendly(expected_schema.schemas[i]), indent(multi(errors[i]))) for i in range(0, len(expected_schema.schemas))]))) + for s in expected_schema.schemas: + if validate_ex(s, datum, identifiers, strict=strict, raise_ex=False): + return True + + if not raise_ex: + return False + + errors = [] # type: List[unicode] + for s in expected_schema.schemas: + try: + validate_ex(s, datum, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=True) + except ClassValidationException as e: + raise + except ValidationException as e: + errors.append(unicode(e)) + + raise ValidationException(u"the value %s is not a valid type in the union, expected one of:\n%s" % ( + multi(vpformat(datum), '`'), u"\n".join([ + u"- %s, but\n %s" % ( + friendly(expected_schema.schemas[i]), indent(multi(errors[i]))) + for i in range(0, len(expected_schema.schemas))]))) elif isinstance(expected_schema, avro.schema.RecordSchema): if not isinstance(datum, dict): - raise ValidationException("`%s`\n is not a dict" % vpformat(datum)) + if raise_ex: + raise ValidationException(u"`%s`\n is not a dict" % vpformat(datum)) + else: + return False + + classmatch = None + for f in expected_schema.fields: + if f.name == "class": + d = datum.get("class") + if not d: + if raise_ex: + raise ValidationException(u"Missing 'class' field") + else: + return False + if not validate_ex(f.type, d, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=raise_ex): + return False + classmatch = d + break errors = [] for f in expected_schema.fields: + if f.name == "class": + continue + if f.name in datum: fieldval = datum[f.name] else: @@ -162,12 +223,14 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, fieldval = None try: - validate_ex(f.type, fieldval, identifiers, strict=strict, foreign_properties=foreign_properties) + if not validate_ex(f.type, fieldval, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=raise_ex): + return False except ValidationException as v: if f.name not in datum: - errors.append("missing required field `%s`" % f.name) + errors.append(u"missing required field `%s`" % f.name) else: - errors.append("could not validate field `%s` because\n%s" % (f.name, multi(indent(str(v))))) + errors.append(u"could not validate field `%s` because\n%s" % (f.name, multi(indent(str(v))))) + if strict: for d in datum: found = False @@ -176,14 +239,25 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, found = True if not found: if d not in identifiers and d not in foreign_properties and d[0] not in ("@", "$"): + if not raise_ex: + return False split = urlparse.urlsplit(d) if split.scheme: - errors.append("could not validate extension field `%s` because it is not recognized and strict is True. Did you include a $schemas section?" % (d)) + errors.append(u"could not validate extension field `%s` because it is not recognized and strict is True. Did you include a $schemas section?" % (d)) else: - errors.append("could not validate field `%s` because it is not recognized and strict is True, valid fields are: %s" % (d, ", ".join(fn.name for fn in expected_schema.fields))) + errors.append(u"could not validate field `%s` because it is not recognized and strict is True, valid fields are: %s" % (d, ", ".join(fn.name for fn in expected_schema.fields))) if errors: - raise ValidationException("\n".join(errors)) + if raise_ex: + if classmatch: + raise ClassValidationException(u"%s record %s" % (classmatch, "\n".join(errors))) + else: + raise ValidationException(u"\n".join(errors)) + else: + return False else: return True - raise ValidationException("Unrecognized schema_type %s" % schema_type) + if raise_ex: + raise ValidationException(u"Unrecognized schema_type %s" % schema_type) + else: + return False diff --git a/setup.py b/setup.py index a67730968..fa639a9d6 100755 --- a/setup.py +++ b/setup.py @@ -35,7 +35,7 @@ 'mistune', 'typing >= 3.5.2', 'CacheControl', - 'lockfile'] + 'lockfile >= 0.9'] install_requires.append("avro") # TODO: remove me once cwltool is # available in Debian Stable, Ubuntu 12.04 LTS @@ -46,7 +46,7 @@ extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', - version='1.17', + version='1.18', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group', From 84545469eb9ccedfd565447833f44e745bd429b5 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 5 Oct 2016 15:36:09 -0400 Subject: [PATCH 005/116] Bugfix for validating "class" field. (#61) --- schema_salad/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema_salad/validate.py b/schema_salad/validate.py index 5cb9032e7..6e3eb5cc3 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -204,7 +204,7 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, raise ValidationException(u"Missing 'class' field") else: return False - if not validate_ex(f.type, d, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=raise_ex): + if expected_schema.name != d: return False classmatch = d break From 8ac45400458abc83691eeb3062da6509d698e56c Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 21 Nov 2016 16:46:34 -0500 Subject: [PATCH 006/116] Provide line numbers in error messages (#62) * Line number reporting WIP * Handle line number tracking $import expansion of lists and idmaps. * Break long lines. * Formatting bullets, indentation. * Add tests of validation error reporting. * type cleanups * Better error reporting on link checking. * Add missing sourceline.py * Fix tests, typing, add tests for validation errors. * Bump version. * Print error validation messages for manual audit. --- schema_salad/add_dictlist.py | 1 + schema_salad/aslist.py | 1 + schema_salad/flatten.py | 2 + schema_salad/jsonld_context.py | 42 +- schema_salad/main.py | 43 +- schema_salad/makedoc.py | 27 +- schema_salad/ref_resolver.py | 408 +++++--- schema_salad/schema.py | 137 ++- schema_salad/sourceline.py | 135 +++ schema_salad/tests/test_errors.py | 29 + schema_salad/tests/test_examples.py | 61 +- .../tests/test_schema/CommandLineTool.yml | 894 ++++++++++++++++++ .../test_schema/CommonWorkflowLanguage.yml | 11 + schema_salad/tests/test_schema/Process.yml | 743 +++++++++++++++ schema_salad/tests/test_schema/Workflow.yml | 582 ++++++++++++ schema_salad/tests/test_schema/concepts.md | 1 + schema_salad/tests/test_schema/contrib.md | 1 + schema_salad/tests/test_schema/intro.md | 1 + schema_salad/tests/test_schema/invocation.md | 1 + .../tests/test_schema/metaschema_base.yml | 164 ++++ schema_salad/tests/test_schema/test1.cwl | 1 + schema_salad/tests/test_schema/test10.cwl | 10 + schema_salad/tests/test_schema/test11.cwl | 10 + schema_salad/tests/test_schema/test2.cwl | 1 + schema_salad/tests/test_schema/test3.cwl | 6 + schema_salad/tests/test_schema/test4.cwl | 6 + schema_salad/tests/test_schema/test5.cwl | 6 + schema_salad/tests/test_schema/test6.cwl | 5 + schema_salad/tests/test_schema/test7.cwl | 10 + schema_salad/tests/test_schema/test8.cwl | 10 + schema_salad/tests/test_schema/test9.cwl | 10 + schema_salad/validate.py | 148 +-- setup.cfg | 2 +- setup.py | 2 +- 34 files changed, 3199 insertions(+), 312 deletions(-) create mode 100644 schema_salad/sourceline.py create mode 100644 schema_salad/tests/test_errors.py create mode 100644 schema_salad/tests/test_schema/CommandLineTool.yml create mode 100644 schema_salad/tests/test_schema/CommonWorkflowLanguage.yml create mode 100644 schema_salad/tests/test_schema/Process.yml create mode 100644 schema_salad/tests/test_schema/Workflow.yml create mode 100644 schema_salad/tests/test_schema/concepts.md create mode 100644 schema_salad/tests/test_schema/contrib.md create mode 100644 schema_salad/tests/test_schema/intro.md create mode 100644 schema_salad/tests/test_schema/invocation.md create mode 100644 schema_salad/tests/test_schema/metaschema_base.yml create mode 100644 schema_salad/tests/test_schema/test1.cwl create mode 100644 schema_salad/tests/test_schema/test10.cwl create mode 100644 schema_salad/tests/test_schema/test11.cwl create mode 100644 schema_salad/tests/test_schema/test2.cwl create mode 100644 schema_salad/tests/test_schema/test3.cwl create mode 100644 schema_salad/tests/test_schema/test4.cwl create mode 100644 schema_salad/tests/test_schema/test5.cwl create mode 100644 schema_salad/tests/test_schema/test6.cwl create mode 100644 schema_salad/tests/test_schema/test7.cwl create mode 100644 schema_salad/tests/test_schema/test8.cwl create mode 100644 schema_salad/tests/test_schema/test9.cwl diff --git a/schema_salad/add_dictlist.py b/schema_salad/add_dictlist.py index 53bd4d48d..711f580e9 100644 --- a/schema_salad/add_dictlist.py +++ b/schema_salad/add_dictlist.py @@ -1,6 +1,7 @@ import sys from typing import Any, Dict + def add_dictlist(di, key, val): # type: (Dict, Any, Any) -> None if key not in di: di[key] = [] diff --git a/schema_salad/aslist.py b/schema_salad/aslist.py index 0332a2be5..27602ab8d 100644 --- a/schema_salad/aslist.py +++ b/schema_salad/aslist.py @@ -1,6 +1,7 @@ import sys from typing import Any, List + def aslist(l): # type: (Any) -> List """Convenience function to wrap single items and lists, and return lists unchanged.""" diff --git a/schema_salad/flatten.py b/schema_salad/flatten.py index 90c93d280..a417b343b 100644 --- a/schema_salad/flatten.py +++ b/schema_salad/flatten.py @@ -2,6 +2,8 @@ from typing import Any, Tuple # http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html + + def flatten(l, ltypes=(list, tuple)): # type: (Any, Any) -> Any if l is None: diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py index d4d203fbc..7141b0742 100755 --- a/schema_salad/jsonld_context.py +++ b/schema_salad/jsonld_context.py @@ -20,13 +20,19 @@ import logging from .aslist import aslist from typing import Any, cast, Dict, Iterable, Tuple, Union -from .ref_resolver import Loader +from .ref_resolver import Loader, ContextType _logger = logging.getLogger("salad") -def pred(datatype, field, name, context, defaultBase, namespaces): - # type: (Dict[str, Union[Dict, str]], Dict, str, Loader.ContextType, str, Dict[str, rdflib.namespace.Namespace]) -> Union[Dict, str] +def pred(datatype, # type: Dict[str, Union[Dict, str]] + field, # type: Dict + name, # type: str + context, # type: ContextType + defaultBase, # type: str + namespaces # type: Dict[str, rdflib.namespace.Namespace] + ): + # type: (...) -> Union[Dict, str] split = urlparse.urlsplit(name) vee = None # type: Union[str, unicode] @@ -84,8 +90,14 @@ def pred(datatype, field, name, context, defaultBase, namespaces): return ret -def process_type(t, g, context, defaultBase, namespaces, defaultPrefix): - # type: (Dict[str, Any], Graph, Loader.ContextType, str, Dict[str, rdflib.namespace.Namespace], str) -> None +def process_type(t, # type: Dict[str, Any] + g, # type: Graph + context, # type: ContextType + defaultBase, # type: str + namespaces, # type: Dict[str, rdflib.namespace.Namespace] + defaultPrefix # type: str + ): + # type: (...) -> None if t["type"] == "record": recordname = t["name"] @@ -154,8 +166,8 @@ def process_type(t, g, context, defaultBase, namespaces, defaultPrefix): def salad_to_jsonld_context(j, schema_ctx): - # type: (Iterable, Dict[str, Any]) -> Tuple[Loader.ContextType, Graph] - context = {} # type: Loader.ContextType + # type: (Iterable, Dict[str, Any]) -> Tuple[ContextType, Graph] + context = {} # type: ContextType namespaces = {} g = Graph() defaultPrefix = "" @@ -178,8 +190,11 @@ def salad_to_jsonld_context(j, schema_ctx): return (context, g) -def fix_jsonld_ids(obj, ids): - # type: (Union[Dict[unicode, Any], List[Dict[unicode, Any]]], List[unicode]) -> None + +def fix_jsonld_ids(obj, # type: Union[Dict[unicode, Any], List[Dict[unicode, Any]]] + ids # type: List[unicode] + ): + # type: (...) -> None if isinstance(obj, dict): for i in ids: if i in obj: @@ -190,8 +205,13 @@ def fix_jsonld_ids(obj, ids): for entry in obj: fix_jsonld_ids(entry, ids) -def makerdf(workflow, wf, ctx, graph=None): - # type: (Union[str, unicode], Union[List[Dict[unicode, Any]], Dict[unicode, Any]], Loader.ContextType, Graph) -> Graph + +def makerdf(workflow, # type: Union[str, unicode] + wf, # type: Union[List[Dict[unicode, Any]], Dict[unicode, Any]] + ctx, # type: ContextType + graph=None # type: Graph + ): + # type: (...) -> Graph prefixes = {} idfields = [] for k, v in ctx.iteritems(): diff --git a/schema_salad/main.py b/schema_salad/main.py index 1896e8fd0..f51184bc7 100644 --- a/schema_salad/main.py +++ b/schema_salad/main.py @@ -3,28 +3,36 @@ import logging import sys import traceback -import pkg_resources # part of setuptools -from . import schema -from . import jsonld_context -from . import makedoc import json -from rdflib import Graph, plugin -from rdflib.serializer import Serializer import os import urlparse -from .ref_resolver import Loader -from . import validate +import pkg_resources # part of setuptools + from typing import Any, Dict, List, Union +from rdflib import Graph, plugin +from rdflib.serializer import Serializer + +from . import schema +from . import jsonld_context +from . import makedoc +from . import validate +from .sourceline import strip_dup_lineno +from .ref_resolver import Loader + _logger = logging.getLogger("salad") from rdflib.plugin import register, Parser register('json-ld', Parser, 'rdflib_jsonld.parser', 'JsonLDParser') -def printrdf(workflow, wf, ctx, sr): - # type: (str, Union[List[Dict[unicode, Any]], Dict[unicode, Any]], Dict[unicode, Any], str) -> None +def printrdf(workflow, # type: str + wf, # type: Union[List[Dict[unicode, Any]], Dict[unicode, Any]] + ctx, # type: Dict[unicode, Any] + sr # type: str + ): + # type: (...) -> None g = jsonld_context.makerdf(workflow, wf, ctx) print(g.serialize(format=sr)) @@ -104,10 +112,14 @@ def main(argsl=None): # type: (List[str]) -> int schema_raw_doc, schema_uri) except (validate.ValidationException) as e: _logger.error("Schema `%s` failed link checking:\n%s", - args.schema, e, exc_info=(True if args.debug else False)) + args.schema, e, exc_info=(True if args.debug else False)) _logger.debug("Index is %s", metaschema_loader.idx.keys()) _logger.debug("Vocabulary is %s", metaschema_loader.vocab.keys()) return 1 + except (RuntimeError) as e: + _logger.error("Schema `%s` read error:\n%s", + args.schema, e, exc_info=(True if args.debug else False)) + return 1 # Optionally print the schema after ref resolution if not args.document and args.print_pre: @@ -121,7 +133,8 @@ def main(argsl=None): # type: (List[str]) -> int # Validate the schema document against the metaschema try: schema.validate_doc(metaschema_names, schema_doc, - metaschema_loader, args.strict) + metaschema_loader, args.strict, + source=schema_metadata["name"]) except validate.ValidationException as e: _logger.error("While validating schema `%s`:\n%s" % (args.schema, str(e))) @@ -149,8 +162,8 @@ def main(argsl=None): # type: (List[str]) -> int if isinstance(avsc_names, Exception): _logger.error("Schema `%s` error:\n%s", args.schema, - avsc_names, exc_info=((type(avsc_names), avsc_names, - None) if args.debug else None)) + avsc_names, exc_info=((type(avsc_names), avsc_names, + None) if args.debug else None)) if args.print_avro: print(json.dumps(avsc_obj, indent=4)) return 1 @@ -188,7 +201,7 @@ def main(argsl=None): # type: (List[str]) -> int document, doc_metadata = document_loader.resolve_ref(uri) except (validate.ValidationException, RuntimeError) as e: _logger.error("Document `%s` failed validation:\n%s", - args.document, e, exc_info=args.debug) + args.document, strip_dup_lineno(unicode(e)), exc_info=args.debug) return 1 # Optionally print the document after ref resolution diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index 5303c818b..5309b95f4 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -110,6 +110,7 @@ def contents(self, idn): # type: (str) -> str c += """""" return c + basicTypes = ("https://w3id.org/cwl/salad#null", "http://www.w3.org/2001/XMLSchema#boolean", "http://www.w3.org/2001/XMLSchema#int", @@ -219,8 +220,13 @@ def __init__(self, toc, j, renderlist, redirects): ("docAfter" not in f))): self.render_type(f, 1) - def typefmt(self, tp, redirects, nbsp=False, jsonldPredicate=None): - # type: (Any, Dict[str, str], bool, Dict[str, str]) -> Union[str, unicode] + def typefmt(self, + tp, # type: Any + redirects, # type: Dict[str, str] + nbsp=False, # type: bool + jsonldPredicate=None # type: Dict[str, str] + ): + # type: (...) -> Union[str, unicode] global primitiveType if isinstance(tp, list): if nbsp and len(tp) <= 3: @@ -229,16 +235,20 @@ def typefmt(self, tp, redirects, nbsp=False, jsonldPredicate=None): return " | ".join([self.typefmt(n, redirects) for n in tp]) if isinstance(tp, dict): if tp["type"] == "https://w3id.org/cwl/salad#array": - ar = "array<%s>" % (self.typefmt(tp["items"], redirects, nbsp=True)) + ar = "array<%s>" % (self.typefmt( + tp["items"], redirects, nbsp=True)) if jsonldPredicate and "mapSubject" in jsonldPredicate: if "mapPredicate" in jsonldPredicate: ar += " | map<%s.%s, %s.%s>" % (self.typefmt(tp["items"], redirects), - jsonldPredicate["mapSubject"], - self.typefmt(tp["items"], redirects), - jsonldPredicate["mapPredicate"]) + jsonldPredicate[ + "mapSubject"], + self.typefmt( + tp["items"], redirects), + jsonldPredicate["mapPredicate"]) ar += " | map<%s.%s, %s>" % (self.typefmt(tp["items"], redirects), - jsonldPredicate["mapSubject"], - self.typefmt(tp["items"], redirects)) + jsonldPredicate[ + "mapSubject"], + self.typefmt(tp["items"], redirects)) return ar if tp["type"] in ("https://w3id.org/cwl/salad#record", "https://w3id.org/cwl/salad#enum"): frg = schema.avro_name(tp["name"]) @@ -481,6 +491,7 @@ def avrold_doc(j, outdoc, renderlist, redirects, brand, brandlink): """) + if __name__ == "__main__": parser = argparse.ArgumentParser() diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 4f455a620..c88c2ebcd 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -13,27 +13,30 @@ from . import validate from .aslist import aslist from .flatten import flatten +from .sourceline import SourceLine, add_lc_filename, relname import requests from cachecontrol.wrapper import CacheControl from cachecontrol.caches import FileCache import ruamel.yaml as yaml - -try: - from ruamel.yaml import CSafeLoader as SafeLoader -except ImportError: - from ruamel.yaml import SafeLoader # type: ignore +from ruamel.yaml.comments import CommentedSeq, CommentedMap import rdflib +from rdflib import Graph from rdflib.namespace import RDF, RDFS, OWL from rdflib.plugins.parsers.notation3 import BadSyntax import xml.sax from typing import (Any, AnyStr, Callable, cast, Dict, List, Iterable, Tuple, - TypeVar, Union) + TypeVar, Union) _logger = logging.getLogger("salad") +ContextType = Dict[unicode, Union[Dict, unicode, Iterable[unicode]]] +DocumentType = TypeVar('DocumentType', CommentedSeq, CommentedMap) +DocumentOrStrType = TypeVar( + 'DocumentOrStrType', CommentedSeq, CommentedMap, unicode) + -class NormDict(dict): +class NormDict(CommentedMap): def __init__(self, normalize=unicode): # type: (type) -> None super(NormDict, self).__init__() @@ -75,29 +78,36 @@ def SubLoader(loader): # type: (Loader) -> Loader class Loader(object): - ContextType = Dict[unicode, Union[Dict, unicode, Iterable[unicode]]] - DocumentType = TypeVar('DocumentType', List, Dict[unicode, Any]) - - def __init__(self, ctx, schemagraph=None, foreign_properties=None, - idx=None, cache=None, session=None): - # type: (Loader.ContextType, rdflib.Graph, Set[unicode], Dict[unicode, Union[List, Dict[unicode, Any], unicode]], Dict[unicode, Any], requests.sessions.Session) -> None + def __init__(self, + ctx, # type: ContextType + schemagraph=None, # type: Graph + foreign_properties=None, # type: Set[unicode] + idx=None, # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]] + cache=None, # type: Dict[unicode, Any] + session=None # type: requests.sessions.Session + ): + # type: (...) -> None normalize = lambda url: urlparse.urlsplit(url).geturl() + self.idx = None # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]] if idx is not None: self.idx = idx else: self.idx = NormDict(normalize) - self.ctx = {} # type: Loader.ContextType + self.ctx = {} # type: ContextType + self.graph = None # type: Graph if schemagraph is not None: self.graph = schemagraph else: self.graph = rdflib.graph.Graph() + self.foreign_properties = None # type: Set[unicode] if foreign_properties is not None: self.foreign_properties = foreign_properties else: self.foreign_properties = set() + self.cache = None # type: Dict[unicode, Any] if cache is not None: self.cache = cache else: @@ -110,23 +120,29 @@ def __init__(self, ctx, schemagraph=None, foreign_properties=None, self.session = CacheControl(requests.Session(), cache=FileCache(os.path.join(os.environ["HOME"], ".cache", "salad"))) - self.url_fields = None # type: Set[unicode] - self.scoped_ref_fields = None # type: Dict[unicode, int] - self.vocab_fields = None # type: Set[unicode] - self.identifiers = None # type: Set[unicode] - self.identity_links = None # type: Set[unicode] - self.standalone = None # type: Set[unicode] - self.nolinkcheck = None # type: Set[unicode] - self.vocab = {} # type: Dict[unicode, unicode] - self.rvocab = {} # type: Dict[unicode, unicode] - self.idmap = None # type: Dict[unicode, Any] - self.mapPredicate = None # type: Dict[unicode, unicode] - self.type_dsl_fields = None # type: Set[unicode] + self.url_fields = None # type: Set[unicode] + self.scoped_ref_fields = None # type: Dict[unicode, int] + self.vocab_fields = None # type: Set[unicode] + self.identifiers = None # type: Set[unicode] + self.identity_links = None # type: Set[unicode] + self.standalone = None # type: Set[unicode] + self.nolinkcheck = None # type: Set[unicode] + self.vocab = {} # type: Dict[unicode, unicode] + self.rvocab = {} # type: Dict[unicode, unicode] + self.idmap = None # type: Dict[unicode, Any] + self.mapPredicate = None # type: Dict[unicode, unicode] + self.type_dsl_fields = None # type: Set[unicode] self.add_context(ctx) - def expand_url(self, url, base_url, scoped_id=False, vocab_term=False, scoped_ref=None): - # type: (unicode, unicode, bool, bool, int) -> unicode + def expand_url(self, + url, # type: unicode + base_url, # type: unicode + scoped_id=False, # type: bool + vocab_term=False, # type: bool + scoped_ref=None # type: int + ): + # type: (...) -> unicode if url in (u"@id", u"@type"): return url @@ -210,7 +226,7 @@ def add_schemas(self, ns, base_url): self.idx[unicode(s)] = None def add_context(self, newcontext, baseuri=""): - # type: (Loader.ContextType, unicode) -> None + # type: (ContextType, unicode) -> None if self.vocab: raise validate.ValidationException( "Refreshing context that already has stuff in it") @@ -272,33 +288,42 @@ def add_context(self, newcontext, baseuri=""): _logger.debug("vocab_fields is %s", self.vocab_fields) _logger.debug("vocab is %s", self.vocab) - def resolve_ref(self, ref, base_url=None, checklinks=True): - # type: (Union[Dict[unicode, Any], unicode], unicode, bool) -> Tuple[Union[List, Dict[unicode, Any], unicode], Dict[unicode, Any]] + def resolve_ref(self, + ref, # type: Union[CommentedMap, CommentedSeq, unicode] + base_url=None, # type: unicode + checklinks=True # type: bool + ): + # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, unicode], Dict[unicode, Any]] base_url = base_url or u'file://%s/' % os.path.abspath('.') - obj = None # type: Dict[unicode, Any] + obj = None # type: CommentedMap + resolved_obj = None # type: Union[CommentedMap, CommentedSeq, unicode] inc = False - mixin = None + mixin = None # type: Dict[unicode, Any] + sl = SourceLine(obj, None, ValueError) # If `ref` is a dict, look for special directives. - if isinstance(ref, dict): + if isinstance(ref, CommentedMap): obj = ref - if u"$import" in obj: + if "$import" in obj: + sl = SourceLine(obj, "$import", RuntimeError) if len(obj) == 1: ref = obj[u"$import"] obj = None else: - raise ValueError( - u"'$import' must be the only field in %s" % (str(obj))) - elif u"$include" in obj: + raise sl.makeError( + u"'$import' must be the only field in %s" % (unicode(obj))) + elif "$include" in obj: + sl = SourceLine(obj, "$include", RuntimeError) if len(obj) == 1: ref = obj[u"$include"] inc = True obj = None else: - raise ValueError( - u"'$include' must be the only field in %s" % (str(obj))) - elif u"$mixin" in obj: + raise sl.makeError( + u"'$include' must be the only field in %s" % (unicode(obj))) + elif "$mixin" in obj: + sl = SourceLine(obj, "$mixin", RuntimeError) ref = obj[u"$mixin"] mixin = obj obj = None @@ -309,11 +334,11 @@ def resolve_ref(self, ref, base_url=None, checklinks=True): ref = obj[identifier] break if not ref: - raise ValueError( - u"Object `%s` does not have identifier field in %s" % (obj, self.identifiers)) + raise sl.makeError( + u"Object `%s` does not have identifier field in %s" % (relname(obj), self.identifiers)) if not isinstance(ref, (str, unicode)): - raise ValueError(u"Must be string: `%s`" % str(ref)) + raise ValueError(u"Expected CommentedMap or string, got %s: `%s`" % (type(ref), unicode(ref))) url = self.expand_url(ref, base_url, scoped_id=(obj is not None)) @@ -321,25 +346,26 @@ def resolve_ref(self, ref, base_url=None, checklinks=True): if url in self.idx and (not mixin): return self.idx[url], {} - # "$include" directive means load raw text - if inc: - return self.fetch_text(url), {} + with sl: + # "$include" directive means load raw text + if inc: + return self.fetch_text(url), {} - doc = None - if obj: - for identifier in self.identifiers: - obj[identifier] = url - doc_url = url - else: - # Load structured document - doc_url, frg = urlparse.urldefrag(url) - if doc_url in self.idx and (not mixin): - # If the base document is in the index, it was already loaded, - # so if we didn't find the reference earlier then it must not - # exist. - raise validate.ValidationException( - u"Reference `#%s` not found in file `%s`." % (frg, doc_url)) - doc = self.fetch(doc_url, inject_ids=(not mixin)) + doc = None + if obj: + for identifier in self.identifiers: + obj[identifier] = url + doc_url = url + else: + # Load structured document + doc_url, frg = urlparse.urldefrag(url) + if doc_url in self.idx and (not mixin): + # If the base document is in the index, it was already loaded, + # so if we didn't find the reference earlier then it must not + # exist. + raise validate.ValidationException( + u"Reference `#%s` not found in file `%s`." % (frg, doc_url)) + doc = self.fetch(doc_url, inject_ids=(not mixin)) # Recursively expand urls and resolve directives if mixin: @@ -359,10 +385,11 @@ def resolve_ref(self, ref, base_url=None, checklinks=True): if url in self.idx: resolved_obj = self.idx[url] else: - raise RuntimeError("Reference `%s` is not in the index. " - "Index contains:\n %s" % (url, "\n ".join(self.idx))) + raise RuntimeError( + "Reference `%s` is not in the index. Index contains:\n %s" + % (url, "\n ".join(self.idx))) - if isinstance(resolved_obj, (dict)): + if isinstance(resolved_obj, CommentedMap): if u"$graph" in resolved_obj: metadata = _copy_dict_without_key(resolved_obj, u"$graph") return resolved_obj[u"$graph"], metadata @@ -371,9 +398,11 @@ def resolve_ref(self, ref, base_url=None, checklinks=True): else: return resolved_obj, metadata - - def _resolve_idmap(self, document, loader): - # type: (Dict[unicode, Union[Dict[unicode, Dict[unicode, unicode]], List[Dict[unicode, Any]]]], Loader) -> None + def _resolve_idmap(self, + document, # type: CommentedMap + loader # type: Loader + ): + # type: (...) -> None # Convert fields with mapSubject into lists # use mapPredicate if the mapped value isn't a dict. for idmapField in loader.idmap: @@ -382,27 +411,46 @@ def _resolve_idmap(self, document, loader): if (isinstance(idmapFieldValue, dict) and "$import" not in idmapFieldValue and "$include" not in idmapFieldValue): - ls = [] + ls = CommentedSeq() for k in sorted(idmapFieldValue.keys()): val = idmapFieldValue[k] - v = None # type: Dict[unicode, Any] - if not isinstance(val, dict): + v = None # type: CommentedMap + if not isinstance(val, CommentedMap): if idmapField in loader.mapPredicate: - v = {loader.mapPredicate[idmapField]: val} + v = CommentedMap( + ((loader.mapPredicate[idmapField], val),)) + v.lc.add_kv_line_col( + loader.mapPredicate[idmapField], + document[idmapField].lc.data[k]) + v.lc.filename = document.lc.filename else: raise validate.ValidationException( "mapSubject '%s' value '%s' is not a dict" "and does not have a mapPredicate", k, v) else: v = val + v[loader.idmap[idmapField]] = k + v.lc.add_kv_line_col(loader.idmap[idmapField], + document[idmapField].lc.data[k]) + v.lc.filename = document.lc.filename + + ls.lc.add_kv_line_col( + len(ls), document[idmapField].lc.data[k]) + + ls.lc.filename = document.lc.filename ls.append(v) + document[idmapField] = ls typeDSLregex = re.compile(ur"^([^[?]+)(\[\])?(\?)?$") - def _type_dsl(self, t): - # type: (Union[unicode, Dict, List]) -> Union[unicode, Dict[unicode, unicode], List[Union[unicode, Dict[unicode, unicode]]]] + def _type_dsl(self, + t, # type: Union[unicode, Dict, List] + lc, + filename): + # type: (...) -> Union[unicode, Dict[unicode, unicode], List[Union[unicode, Dict[unicode, unicode]]]] + if not isinstance(t, (str, unicode)): return t @@ -412,34 +460,59 @@ def _type_dsl(self, t): first = m.group(1) second = third = None if m.group(2): - second = {u"type": u"array", - u"items": first} + second = CommentedMap((("type", "array"), + ("items", first))) + second.lc.add_kv_line_col("type", lc) + second.lc.add_kv_line_col("items", lc) + second.lc.filename = filename if m.group(3): - third = [u"null", second or first] + third = CommentedSeq([u"null", second or first]) + third.lc.add_kv_line_col(0, lc) + third.lc.add_kv_line_col(1, lc) + third.lc.filename = filename return third or second or first - def _resolve_type_dsl(self, document, loader): - # type: (Dict[unicode, Union[unicode, Dict[unicode, unicode], List]], Loader) -> None + def _resolve_type_dsl(self, + document, # type: CommentedMap + loader # type: Loader + ): + # type: (...) -> None for d in loader.type_dsl_fields: if d in document: - datum = document[d] + datum2 = datum = document[d] if isinstance(datum, (str, unicode)): - document[d] = self._type_dsl(datum) - elif isinstance(datum, list): - document[d] = [self._type_dsl(t) for t in datum] - datum2 = document[d] - if isinstance(datum2, list): - document[d] = flatten(datum2) + datum2 = self._type_dsl(datum, document.lc.data[ + d], document.lc.filename) + elif isinstance(datum, CommentedSeq): + datum2 = CommentedSeq() + for n, t in enumerate(datum): + datum2.lc.add_kv_line_col( + len(datum2), datum.lc.data[n]) + datum2.append(self._type_dsl( + t, datum.lc.data[n], document.lc.filename)) + if isinstance(datum2, CommentedSeq): + datum3 = CommentedSeq() seen = [] # type: List[unicode] - uniq = [] - for item in document[d]: - if item not in seen: - uniq.append(item) - seen.append(item) - document[d] = uniq + for i, item in enumerate(datum2): + if isinstance(item, CommentedSeq): + for j, v in enumerate(item): + if v not in seen: + datum3.lc.add_kv_line_col( + len(datum3), item.lc.data[j]) + datum3.append(v) + seen.append(v) + else: + if item not in seen: + datum3.lc.add_kv_line_col( + len(datum3), datum2.lc.data[i]) + datum3.append(item) + seen.append(item) + document[d] = datum3 + else: + document[d] = datum2 def _resolve_identifier(self, document, loader, base_url): - # type: (Dict[unicode, unicode], Loader, unicode) -> unicode + # type: (CommentedMap, Loader, unicode) -> unicode # Expand identifier field (usually 'id') to resolve scope for identifer in loader.identifiers: if identifer in document: @@ -480,8 +553,12 @@ def _normalize_fields(self, document, loader): document[d2] = document[d] del document[d] - def _resolve_uris(self, document, loader, base_url): - # type: (Dict[unicode, Union[unicode, List[unicode]]], Loader, unicode) -> None + def _resolve_uris(self, + document, # type: Dict[unicode, Union[unicode, List[unicode]]] + loader, # type: Loader + base_url # type: unicode + ): + # type: (...) -> None # Resolve remaining URLs based on document base for d in loader.url_fields: if d in document: @@ -492,35 +569,43 @@ def _resolve_uris(self, document, loader, base_url): vocab_term=(d in loader.vocab_fields), scoped_ref=self.scoped_ref_fields.get(d)) elif isinstance(datum, list): - document[d] = [ - loader.expand_url( - url, base_url, scoped_id=False, - vocab_term=(d in loader.vocab_fields), - scoped_ref=self.scoped_ref_fields.get(d)) - if isinstance(url, (str, unicode)) - else url for url in datum] - - - def resolve_all(self, document, base_url, file_base=None, checklinks=True): - # type: (DocumentType, unicode, unicode, bool) -> Tuple[Union[List, Dict[unicode, Any], unicode], Dict[unicode, Any]] + for i, url in enumerate(datum): + if isinstance(url, (str, unicode)): + datum[i] = loader.expand_url( + url, base_url, scoped_id=False, + vocab_term=(d in loader.vocab_fields), + scoped_ref=self.scoped_ref_fields.get(d)) + + + def resolve_all(self, + document, # type: Union[CommentedMap, CommentedSeq] + base_url, # type: unicode + file_base=None, # type: unicode + checklinks=True # type: bool + ): + # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, unicode], Dict[unicode, Any]] loader = self - metadata = {} # type: Dict[unicode, Any] + metadata = CommentedMap() # type: CommentedMap if file_base is None: file_base = base_url - if isinstance(document, dict): + if isinstance(document, CommentedMap): # Handle $import and $include if (u'$import' in document or u'$include' in document): - return self.resolve_ref(document, base_url=file_base, checklinks=checklinks) + return self.resolve_ref( + document, base_url=file_base, checklinks=checklinks) elif u'$mixin' in document: - return self.resolve_ref(document, base_url=base_url, checklinks=checklinks) - elif isinstance(document, list): + return self.resolve_ref( + document, base_url=base_url, checklinks=checklinks) + elif isinstance(document, CommentedSeq): pass + elif isinstance(document, (list, dict)): + raise Exception("Expected CommentedMap or CommentedSeq, got %s" % type(document)) else: return (document, metadata) newctx = None # type: Loader - if isinstance(document, dict): + if isinstance(document, CommentedMap): # Handle $base, $profile, $namespaces, $schemas and $graph if u"$base" in document: base_url = document[u"$base"] @@ -549,8 +634,9 @@ def resolve_all(self, document, base_url, file_base=None, checklinks=True): if u"$graph" in document: metadata = _copy_dict_without_key(document, u"$graph") document = document[u"$graph"] - resolved_metadata = loader.resolve_all(metadata, base_url, - file_base=file_base, checklinks=False)[0] + resolved_metadata = loader.resolve_all( + metadata, base_url, file_base=file_base, + checklinks=False)[0] if isinstance(resolved_metadata, dict): metadata = resolved_metadata else: @@ -558,7 +644,7 @@ def resolve_all(self, document, base_url, file_base=None, checklinks=True): "Validation error, metadata must be dict: %s" % (resolved_metadata)) - if isinstance(document, dict): + if isinstance(document, CommentedMap): self._normalize_fields(document, loader) self._resolve_idmap(document, loader) self._resolve_type_dsl(document, loader) @@ -573,19 +659,26 @@ def resolve_all(self, document, base_url, file_base=None, checklinks=True): except validate.ValidationException as v: _logger.warn("loader is %s", id(loader), exc_info=True) raise validate.ValidationException("(%s) (%s) Validation error in field %s:\n%s" % ( - id(loader), file_base, key, validate.indent(str(v)))) + id(loader), file_base, key, validate.indent(unicode(v)))) - elif isinstance(document, list): + elif isinstance(document, CommentedSeq): i = 0 try: while i < len(document): val = document[i] - if isinstance(val, dict) and (u"$import" in val or u"$mixin" in val): - l, _ = loader.resolve_ref(val, base_url=file_base, checklinks=False) - if isinstance(l, list): # never true? + if isinstance(val, CommentedMap) and (u"$import" in val or u"$mixin" in val): + l, _ = loader.resolve_ref( + val, base_url=file_base, checklinks=False) + if isinstance(l, CommentedSeq): + lc = document.lc.data[i] del document[i] - for item in aslist(l): - document.insert(i, item) + llen = len(l) + for j in range(len(document) + llen, i + llen, -1): + document.lc.data[ + j - 1] = document.lc.data[j - llen] + for item in l: + document.insert(i, item) # type: ignore + document.lc.data[i] = lc i += 1 else: document[i] = l @@ -597,7 +690,7 @@ def resolve_all(self, document, base_url, file_base=None, checklinks=True): except validate.ValidationException as v: _logger.warn("failed", exc_info=True) raise validate.ValidationException("(%s) (%s) Validation error in position %i:\n%s" % ( - id(loader), file_base, i, validate.indent(str(v)))) + id(loader), file_base, i, validate.indent(unicode(v)))) for identifer in loader.identity_links: if identifer in metadata: @@ -607,7 +700,7 @@ def resolve_all(self, document, base_url, file_base=None, checklinks=True): loader.idx[metadata[identifer]] = document if checklinks: - document = self.validate_links(document, u"") + self.validate_links(document, u"") return document, metadata @@ -635,7 +728,10 @@ def fetch_text(self, url): else: return read except (OSError, IOError) as e: - raise RuntimeError('Error reading %s %s' % (url, e)) + if e.filename == path: + raise RuntimeError(unicode(e)) + else: + raise RuntimeError('Error reading %s: %s' % (url, e)) else: raise ValueError('Unsupported scheme in url: %s' % url) @@ -648,11 +744,12 @@ def fetch(self, url, inject_ids=True): # type: (unicode, bool) -> Any textIO = StringIO(text.decode('utf-8')) else: textIO = StringIO(text) - textIO.name = url # type: ignore - result = yaml.load(textIO, Loader=SafeLoader) + textIO.name = url # type: ignore + result = yaml.round_trip_load(textIO) # type: ignore + add_lc_filename(result, url) except yaml.parser.ParserError as e: raise validate.ValidationException("Syntax error %s" % (e)) - if isinstance(result, dict) and inject_ids and self.identifiers: + if isinstance(result, CommentedMap) and inject_ids and self.identifiers: for identifier in self.identifiers: if identifier not in result: result[identifier] = url @@ -677,7 +774,7 @@ def check_file(self, url): # type: (unicode) -> bool else: raise ValueError('Unsupported scheme in url: %s' % url) - FieldType = TypeVar('FieldType', unicode, List[unicode], Dict[unicode, Any]) + FieldType = TypeVar('FieldType', unicode, CommentedSeq, CommentedMap) def validate_scoped(self, field, link, docid): # type: (unicode, unicode, unicode) -> unicode @@ -701,7 +798,7 @@ def validate_scoped(self, field, link, docid): break sp.pop() raise validate.ValidationException( - "Field `%s` contains undefined reference to `%s`, tried %s" % (field, link, tried)) + "Field `%s` references unknown identifier `%s`, tried %s" % (field, link, ", ".join(tried))) def validate_link(self, field, link, docid): # type: (unicode, FieldType, unicode) -> FieldType @@ -721,7 +818,7 @@ def validate_link(self, field, link, docid): elif not self.check_file(link): raise validate.ValidationException( "Field `%s` contains undefined reference to `%s`" % (field, link)) - elif isinstance(link, list): + elif isinstance(link, CommentedSeq): errors = [] for n, i in enumerate(link): try: @@ -730,12 +827,12 @@ def validate_link(self, field, link, docid): errors.append(v) if errors: raise validate.ValidationException( - "\n".join([str(e) for e in errors])) - elif isinstance(link, dict): + "\n".join([unicode(e) for e in errors])) + elif isinstance(link, CommentedMap): self.validate_links(link, docid) else: - raise validate.ValidationException("Link must be a str, unicode, " - "list, or a dict.") + raise validate.ValidationException( + "`%s` field is %s, expected string, list, or a dict." % (field, type(link).__name__)) return link def getid(self, d): # type: (Any) -> unicode @@ -747,59 +844,62 @@ def getid(self, d): # type: (Any) -> unicode return None def validate_links(self, document, base_url): - # type: (DocumentType, unicode) -> DocumentType + # type: (Union[CommentedMap, CommentedSeq, unicode], unicode) -> None docid = self.getid(document) if not docid: docid = base_url - errors = [] - iterator = None # type: Any + errors = [] # type: List[Exception] + iterator = None # type: Any if isinstance(document, list): iterator = enumerate(document) elif isinstance(document, dict): try: for d in self.url_fields: + sl = SourceLine(document, d, validate.ValidationException) if d in document and d not in self.identity_links: document[d] = self.validate_link(d, document[d], docid) except validate.ValidationException as v: - errors.append(v) + errors.append(sl.makeError(unicode(v))) if hasattr(document, "iteritems"): iterator = document.iteritems() else: iterator = document.items() else: - return document + return for key, val in iterator: + sl = SourceLine(document, key, validate.ValidationException) try: - document[key] = self.validate_links(val, docid) + self.validate_links(val, docid) except validate.ValidationException as v: if key not in self.nolinkcheck: docid2 = self.getid(val) if docid2: - errors.append(validate.ValidationException( - "While checking object `%s`\n%s" % (docid2, validate.indent(str(v))))) + errors.append(sl.makeError("checking object `%s`\n%s" % ( + relname(docid2), validate.indent(unicode(v))))) else: if isinstance(key, basestring): - errors.append(validate.ValidationException( - "While checking field `%s`\n%s" % (key, validate.indent(str(v))))) + errors.append(sl.makeError("checking field `%s`\n%s" % ( + key, validate.indent(unicode(v))))) else: - errors.append(validate.ValidationException( - "While checking position %s\n%s" % (key, validate.indent(str(v))))) + errors.append(sl.makeError("checking item\n%s" % ( + validate.indent(unicode(v))))) if errors: if len(errors) > 1: raise validate.ValidationException( - "\n".join([str(e) for e in errors])) + u"\n".join([unicode(e) for e in errors])) else: raise errors[0] - return document + return + +D = TypeVar('D', CommentedMap, ContextType) def _copy_dict_without_key(from_dict, filtered_key): - # type: (Dict, Any) -> Dict - new_dict = {} - for key, value in from_dict.items(): - if key != filtered_key: - new_dict[key] = value + # type: (D, Any) -> D + new_dict = copy.copy(from_dict) + if filtered_key in new_dict: + del new_dict[filtered_key] # type: ignore return new_dict diff --git a/schema_salad/schema.py b/schema_salad/schema.py index f3172892b..ff2d18cef 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -5,22 +5,23 @@ import pprint from pkg_resources import resource_stream import ruamel.yaml as yaml -try: - from ruamel.yaml import CSafeLoader as SafeLoader -except ImportError: - from ruamel.yaml import SafeLoader # type: ignore import avro.schema from . import validate import json import urlparse +import os AvroSchemaFromJSONData = avro.schema.make_avsc_object # AvroSchemaFromJSONData=avro.schema.SchemaFromJSONData +from avro.schema import Names, SchemaParseException from . import ref_resolver +from .ref_resolver import Loader, DocumentType from .flatten import flatten import logging from .aslist import aslist from . import jsonld_context +from .sourceline import SourceLine, strip_dup_lineno, add_lc_filename, bullets, relname from typing import Any, AnyStr, cast, Dict, List, Tuple, TypeVar, Union +from ruamel.yaml.comments import CommentedSeq, CommentedMap _logger = logging.getLogger("salad") @@ -48,7 +49,7 @@ def get_metaschema(): - # type: () -> Tuple[avro.schema.Names, List[Dict[unicode, Any]], ref_resolver.Loader] + # type: () -> Tuple[Names, List[Dict[unicode, Any]], Loader] loader = ref_resolver.Loader({ "Any": "https://w3id.org/cwl/salad#Any", "ArraySchema": "https://w3id.org/cwl/salad#ArraySchema", @@ -162,8 +163,8 @@ def get_metaschema(): loader.cache["https://w3id.org/cwl/salad"] = rs.read() rs.close() - j = yaml.load(loader.cache["https://w3id.org/cwl/salad"], - Loader=SafeLoader) + j = yaml.round_trip_load(loader.cache["https://w3id.org/cwl/salad"]) # type: ignore + add_lc_filename(j, "metaschema.yml") j, _ = loader.resolve_all(j, "https://w3id.org/cwl/salad#") # pprint.pprint(j) @@ -177,8 +178,14 @@ def get_metaschema(): return (sch_names, j, loader) -def load_schema(schema_ref, cache=None): - # type: (Union[unicode, Dict[unicode, Any]], Dict) -> Tuple[ref_resolver.Loader, Union[avro.schema.Names, avro.schema.SchemaParseException], Dict[unicode, Any], ref_resolver.Loader] +def load_schema(schema_ref, # type: Union[CommentedMap, CommentedSeq, unicode] + cache=None # type: Dict + ): + # type: (...) -> Tuple[Loader, Union[Names, SchemaParseException], Dict[unicode, Any], Loader] + """Load a schema that can be used to validate documents using load_and_validate. + + return document_loader, avsc_names, schema_metadata, metaschema_loader""" + metaschema_names, metaschema_doc, metaschema_loader = get_metaschema() if cache is not None: metaschema_loader.cache = cache @@ -194,7 +201,7 @@ def load_schema(schema_ref, cache=None): schema_doc, metactx) # Create the loader that will be used to load the target document. - document_loader = ref_resolver.Loader(schema_ctx, cache=cache) + document_loader = Loader(schema_ctx, cache=cache) # Make the Avro validation that will be used to validate the target # document @@ -202,19 +209,53 @@ def load_schema(schema_ref, cache=None): return document_loader, avsc_names, schema_metadata, metaschema_loader -def load_and_validate(document_loader, avsc_names, document, strict): - # type: (ref_resolver.Loader, avro.schema.Names, Union[Dict[unicode, Any], unicode], bool) -> Tuple[Any, Dict[unicode, Any]] - if isinstance(document, dict): - data, metadata = document_loader.resolve_all(document, document["id"]) - else: - data, metadata = document_loader.resolve_ref(document) - validate_doc(avsc_names, data, document_loader, strict) +def load_and_validate(document_loader, # type: Loader + avsc_names, # type: Names + document, # type: Union[CommentedMap, unicode] + strict # type: bool + ): + # type: (...) -> Tuple[Any, Dict[unicode, Any]] + """Load a document and validate it with the provided schema. + + return data, metadata + """ + try: + if isinstance(document, CommentedMap): + source = document["id"] + data, metadata = document_loader.resolve_all( + document, document["id"], checklinks=False) + else: + source = document + data, metadata = document_loader.resolve_ref( + document, checklinks=False) + except validate.ValidationException as v: + raise validate.ValidationException(strip_dup_lineno(str(v))) + + validationErrors = u"" + try: + document_loader.validate_links(data, u"") + except validate.ValidationException as v: + validationErrors = unicode(v) + "\n" + + try: + validate_doc(avsc_names, data, document_loader, strict, source=source) + except validate.ValidationException as v: + validationErrors += unicode(v) + + if validationErrors: + raise validate.ValidationException(validationErrors) + return data, metadata -def validate_doc(schema_names, doc, loader, strict): - # type: (avro.schema.Names, Union[Dict[unicode, Any], List[Dict[unicode, Any]], unicode], ref_resolver.Loader, bool) -> None +def validate_doc(schema_names, # type: Names + doc, # type: Union[Dict[unicode, Any], List[Dict[unicode, Any]], unicode] + loader, # type: Loader + strict, # type: bool + source=None + ): + # type: (...) -> None has_root = False for r in schema_names.names.values(): if ((hasattr(r, 'get_prop') and r.get_prop(u"documentRoot")) or ( @@ -228,8 +269,10 @@ def validate_doc(schema_names, doc, loader, strict): if isinstance(doc, list): validate_doc = doc - elif isinstance(doc, dict): - validate_doc = [doc] + elif isinstance(doc, CommentedMap): + validate_doc = CommentedSeq([doc]) + validate_doc.lc.add_kv_line_col(0, [doc.lc.line, doc.lc.col]) + validate_doc.lc.filename = doc.lc.filename else: raise validate.ValidationException("Document must be dict or list") @@ -241,10 +284,12 @@ def validate_doc(schema_names, doc, loader, strict): anyerrors = [] for pos, item in enumerate(validate_doc): + sl = SourceLine(validate_doc, pos, unicode) success = False for r in roots: success = validate.validate_ex( - r, item, loader.identifiers, strict, foreign_properties=loader.foreign_properties, raise_ex=False) + r, item, loader.identifiers, strict, + foreign_properties=loader.foreign_properties, raise_ex=False) if success: break @@ -258,28 +303,33 @@ def validate_doc(schema_names, doc, loader, strict): try: validate.validate_ex( - r, item, loader.identifiers, strict, foreign_properties=loader.foreign_properties, raise_ex=True) + r, item, loader.identifiers, strict, + foreign_properties=loader.foreign_properties, + raise_ex=True) except validate.ClassValidationException as e: - errors = [u"Could not validate `%s` because\n%s" % ( - name, validate.indent(str(e), nolead=False))] + errors = [sl.makeError(u"tried `%s` but\n%s" % ( + name, validate.indent(str(e), nolead=False)))] break except validate.ValidationException as e: - errors.append(u"Could not validate as `%s` because\n%s" % ( - name, validate.indent(str(e), nolead=False))) + errors.append(sl.makeError(u"tried `%s` but\n%s" % ( + name, validate.indent(str(e), nolead=False)))) - objerr = u"Validation error at position %i" % pos + objerr = sl.makeError(u"Invalid") for ident in loader.identifiers: if ident in item: - objerr = u"Validation error in object %s" % (item[ident]) + objerr = sl.makeError( + u"Object `%s` is not valid because" + % (relname(item[ident]))) break anyerrors.append(u"%s\n%s" % - (objerr, validate.indent(u"\n".join(errors)))) + (objerr, validate.indent(bullets(errors, "- ")))) if anyerrors: - raise validate.ValidationException(u"\n".join(anyerrors)) + raise validate.ValidationException( + strip_dup_lineno(bullets(anyerrors, "* "))) def replace_type(items, spec, loader, found): - # type: (Any, Dict[unicode, Any], ref_resolver.Loader, Set[unicode]) -> Any + # type: (Any, Dict[unicode, Any], Loader, Set[unicode]) -> Any """ Go through and replace types in the 'spec' mapping""" items = copy.deepcopy(items) @@ -328,10 +378,16 @@ def avro_name(url): # type: (AnyStr) -> AnyStr return frg return url + Avro = TypeVar('Avro', Dict[unicode, Any], List[Any], unicode) -def make_valid_avro(items, alltypes, found, union=False): - # type: (Avro, Dict[unicode, Dict[unicode, Any]], Set[unicode], bool) -> Union[Avro, Dict] + +def make_valid_avro(items, # type: Avro + alltypes, # type: Dict[unicode, Dict[unicode, Any]] + found, # type: Set[unicode] + union=False # type: bool + ): + # type: (...) -> Union[Avro, Dict] items = copy.deepcopy(items) if isinstance(items, dict): if items.get("name"): @@ -364,13 +420,13 @@ def make_valid_avro(items, alltypes, found, union=False): if union and isinstance(items, (str, unicode)): if items in alltypes and avro_name(items) not in found: return cast(Dict, make_valid_avro(alltypes[items], alltypes, found, - union=union)) + union=union)) items = avro_name(items) return items def extend_and_specialize(items, loader): - # type: (List[Dict[unicode, Any]], ref_resolver.Loader) -> List[Dict[unicode, Any]] + # type: (List[Dict[unicode, Any]], Loader) -> List[Dict[unicode, Any]] """Apply 'extend' and 'specialize' to fully materialize derived record types.""" @@ -442,7 +498,8 @@ def extend_and_specialize(items, loader): for t in n: if t.get("abstract") and t["name"] not in extended_by: - raise validate.ValidationException("%s is abstract but missing a concrete subtype" % t["name"]) + raise validate.ValidationException( + "%s is abstract but missing a concrete subtype" % t["name"]) for t in n: if "fields" in t: @@ -451,8 +508,10 @@ def extend_and_specialize(items, loader): return n -def make_avro_schema(i, loader): - # type: (List[Dict[unicode, Any]], ref_resolver.Loader) -> Tuple[Union[avro.schema.Names,avro.schema.SchemaParseException], List[Dict[unicode, Any]]] +def make_avro_schema(i, # type: List[Dict[unicode, Any]] + loader # type: Loader + ): + # type: (...) -> Tuple[Union[Names, SchemaParseException], List[Dict[unicode, Any]]] names = avro.schema.Names() j = extend_and_specialize(i, loader) diff --git a/schema_salad/sourceline.py b/schema_salad/sourceline.py new file mode 100644 index 000000000..492deb254 --- /dev/null +++ b/schema_salad/sourceline.py @@ -0,0 +1,135 @@ +import ruamel.yaml +from ruamel.yaml.comments import CommentedBase, CommentedMap, CommentedSeq +import re +import os + +from typing import (Any, AnyStr, Callable, cast, Dict, List, Iterable, Tuple, + TypeVar, Union, Text) + +lineno_re = re.compile(u"^(.*?:[0-9]+:[0-9]+: )(( *)(.*))") + +def _add_lc_filename(r, source): # type: (ruamel.yaml.comments.CommentedBase, AnyStr) -> None + if isinstance(r, ruamel.yaml.comments.CommentedBase): + r.lc.filename = source + if isinstance(r, list): + for d in r: + _add_lc_filename(d, source) + elif isinstance(r, dict): + for d in r.itervalues(): + _add_lc_filename(d, source) + +def relname(source): # type: (AnyStr) -> AnyStr + if source.startswith("file://"): + source = source[7:] + source = os.path.relpath(source) + return source + +def add_lc_filename(r, source): # type: (ruamel.yaml.comments.CommentedBase, AnyStr) -> None + _add_lc_filename(r, relname(source)) + +def reflow(text, maxline, shift=""): # type: (AnyStr, int, AnyStr) -> AnyStr + if maxline < 20: + maxline = 20 + if len(text) > maxline: + sp = text.rfind(' ', 0, maxline) + if sp < 1: + sp = text.find(' ', sp+1) + if sp == -1: + sp = len(text) + if sp < len(text): + return "%s\n%s%s" % (text[0:sp], shift, reflow(text[sp+1:], maxline, shift)) + return text + +def indent(v, nolead=False, shift=u" ", bullet=u" "): # type: (Text, bool, Text, Text) -> Text + if nolead: + return v.splitlines()[0] + u"\n".join([shift + l for l in v.splitlines()[1:]]) + else: + def lineno(i, l): # type: (int, Text) -> Text + r = lineno_re.match(l) + if r: + return r.group(1) + (bullet if i == 0 else shift) + r.group(2) + else: + return (bullet if i == 0 else shift) + l + + return u"\n".join([lineno(i, l) for i, l in enumerate(v.splitlines())]) + +def bullets(textlist, bul): # type: (List[Text], Text) -> Text + if len(textlist) == 1: + return textlist[0] + else: + return "\n".join(indent(t, bullet=bul) for t in textlist) + +def strip_dup_lineno(text, maxline=None): # type: (Text, int) -> Text + if maxline is None: + maxline = int(os.environ.get("COLUMNS", "100")) + pre = None + msg = [] + for l in text.splitlines(): + g = lineno_re.match(l) + if not g: + msg.append(l) + continue + shift = len(g.group(1)) + len(g.group(3)) + g2 = reflow(g.group(2), maxline-shift, " " * shift) + if g.group(1) != pre: + pre = g.group(1) + msg.append(pre + g2) + else: + g2 = reflow(g.group(2), maxline-len(g.group(1)), " " * (len(g.group(1))+len(g.group(3)))) + msg.append(" " * len(g.group(1)) + g2) + return "\n".join(msg) + +def cmap(d, lc=None, fn=None): # type: (Union[int, float, str, unicode, Dict, List], List[int], unicode) -> Union[int, float, str, unicode, CommentedMap, CommentedSeq] + if lc is None: + lc = [0, 0, 0, 0] + if fn is None: + fn = "test" + if isinstance(d, dict): + cm = CommentedMap() + for k,v in d.iteritems(): + cm[k] = cmap(v) + cm.lc.add_kv_line_col(k, lc) + cm.lc.filename = fn + return cm + if isinstance(d, list): + cs = CommentedSeq() + for k,v in enumerate(d): + cs.append(cmap(v)) + cs.lc.add_kv_line_col(k, lc) + cs.lc.filename = fn + return cs + else: + return d + +class SourceLine(object): + def __init__(self, item, key=None, raise_type=unicode): # type: (Any, Any, Callable) -> None + self.item = item + self.key = key + self.raise_type = raise_type + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_value, traceback): + if not exc_value: + return + raise self.makeError(unicode(exc_value)) + + def makeError(self, msg): # type: (Text) -> Any + if not isinstance(self.item, ruamel.yaml.comments.CommentedBase): + return self.raise_type(msg) + errs = [] + if self.key is None: + lead = "%s:%i:%i:" % (self.item.lc.filename, + self.item.lc.line+1, + self.item.lc.col+1) + else: + lead = "%s:%i:%i:" % (self.item.lc.filename, + self.item.lc.data[self.key][0]+1, + self.item.lc.data[self.key][1]+1) + for m in msg.splitlines(): + if lineno_re.match(m): + errs.append(m) + else: + errs.append("%s %s" % (lead, m)) + return self.raise_type("\n".join(errs)) diff --git a/schema_salad/tests/test_errors.py b/schema_salad/tests/test_errors.py new file mode 100644 index 000000000..79f3d6035 --- /dev/null +++ b/schema_salad/tests/test_errors.py @@ -0,0 +1,29 @@ +import unittest +from typing import cast +from schema_salad.schema import load_schema, load_and_validate +from schema_salad.validate import ValidationException +from avro.schema import Names + +class TestErrors(unittest.TestCase): + def test_errors(self): + document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema( + u"schema_salad/tests/test_schema/CommonWorkflowLanguage.yml") + avsc_names = cast(Names, avsc_names) + + for t in ("test_schema/test1.cwl", + "test_schema/test2.cwl", + "test_schema/test3.cwl", + "test_schema/test4.cwl", + "test_schema/test5.cwl", + "test_schema/test6.cwl", + "test_schema/test7.cwl", + "test_schema/test8.cwl", + "test_schema/test9.cwl", + "test_schema/test10.cwl", + "test_schema/test11.cwl"): + with self.assertRaises(ValidationException): + try: + load_and_validate(document_loader, avsc_names, unicode("schema_salad/tests/"+t), True) + except ValidationException as e: + print "\n", e + raise diff --git a/schema_salad/tests/test_examples.py b/schema_salad/tests/test_examples.py index c63b81efc..6ff7ae262 100644 --- a/schema_salad/tests/test_examples.py +++ b/schema_salad/tests/test_examples.py @@ -5,15 +5,17 @@ from schema_salad.jsonld_context import makerdf from pkg_resources import Requirement, resource_filename, ResolutionError # type: ignore import rdflib -import ruamel.yaml as yaml +import ruamel.yaml import json import os +from schema_salad.sourceline import cmap try: from ruamel.yaml import CSafeLoader as SafeLoader except ImportError: from ruamel.yaml import SafeLoader # type: ignore +from ruamel.yaml.comments import CommentedSeq, CommentedMap def get_data(filename): filepath = None @@ -31,11 +33,11 @@ class TestSchemas(unittest.TestCase): def test_schemas(self): l = schema_salad.ref_resolver.Loader({}) - ra, _ = l.resolve_all({ + ra, _ = l.resolve_all(cmap({ u"$schemas": ["file://" + get_data("tests/EDAM.owl")], u"$namespaces": {u"edam": u"http://edamontology.org/"}, u"edam:has_format": u"edam:format_1915" - }, "") + }), "") self.assertEqual({ u"$schemas": ["file://" + get_data("tests/EDAM.owl")], @@ -74,7 +76,7 @@ def test_avro_regression(self): argsl=[get_data("tests/Process.yml")])) def test_jsonld_ctx(self): - ldr, _, _, _ = schema_salad.schema.load_schema({ + ldr, _, _, _ = schema_salad.schema.load_schema(cmap({ "$base": "Y", "name": "X", "$namespaces": { @@ -84,9 +86,9 @@ def test_jsonld_ctx(self): "name": "ExampleType", "type": "enum", "symbols": ["asym", "bsym"]}] - }) + })) - ra, _ = ldr.resolve_all({"foo:bar": "asym"}, "X") + ra, _ = ldr.resolve_all(cmap({"foo:bar": "asym"}), "X") self.assertEqual(ra, { 'http://example.com/foo#bar': 'asym' @@ -106,7 +108,7 @@ def test_idmap(self): }, "id": "@id"}) - ra, _ = ldr.resolve_all({ + ra, _ = ldr.resolve_all(cmap({ "id": "stuff", "inputs": { "zip": 1, @@ -116,7 +118,7 @@ def test_idmap(self): "other": { 'n': 9 } - }, "http://example2.com/") + }), "http://example2.com/") self.assertEqual("http://example2.com/#stuff", ra["id"]) for item in ra["inputs"]: @@ -159,7 +161,7 @@ def test_scoped_ref(self): }, "id": "@id"}) - ra, _ = ldr.resolve_all({ + ra, _ = ldr.resolve_all(cmap({ "inputs": { "inp": "string", "inp2": "string" @@ -188,7 +190,7 @@ def test_scoped_ref(self): "out": ["out"] } } - }, "http://example2.com/") + }), "http://example2.com/") self.assertEquals( {'inputs': [{ @@ -234,13 +236,15 @@ def test_examples(self): get_data("metaschema/%s_schema.yml" % a)) with open(get_data("metaschema/%s_src.yml" % a)) as src_fp: src = ldr.resolve_all( - yaml.load(src_fp, Loader=SafeLoader), "", checklinks=False)[0] + ruamel.yaml.round_trip_load(src_fp), "", + checklinks=False)[0] with open(get_data("metaschema/%s_proc.yml" % a)) as src_proc: - proc = yaml.load(src_proc, Loader=SafeLoader) + proc = ruamel.yaml.safe_load(src_proc) self.assertEqual(proc, src) def test_yaml_float_test(self): - self.assertEqual(yaml.load("float-test: 2e-10")["float-test"], 2e-10) + self.assertEqual(ruamel.yaml.load("float-test: 2e-10")["float-test"], + 2e-10) def test_typedsl_ref(self): ldr = schema_salad.ref_resolver.Loader({}) @@ -254,16 +258,16 @@ def test_typedsl_ref(self): } }) - ra, _ = ldr.resolve_all({"type": "File"}, "") + ra, _ = ldr.resolve_all(cmap({"type": "File"}), "") self.assertEqual({'type': 'File'}, ra) - ra, _ = ldr.resolve_all({"type": "File?"}, "") + ra, _ = ldr.resolve_all(cmap({"type": "File?"}), "") self.assertEqual({'type': ['null', 'File']}, ra) - ra, _ = ldr.resolve_all({"type": "File[]"}, "") + ra, _ = ldr.resolve_all(cmap({"type": "File[]"}), "") self.assertEqual({'type': {'items': 'File', 'type': 'array'}}, ra) - ra, _ = ldr.resolve_all({"type": "File[]?"}, "") + ra, _ = ldr.resolve_all(cmap({"type": "File[]?"}), "") self.assertEqual( {'type': ['null', {'items': 'File', 'type': 'array'}]}, ra) @@ -280,12 +284,12 @@ def test_scoped_id(self): } ldr.add_context(ctx) - ra, _ = ldr.resolve_all({ + ra, _ = ldr.resolve_all(cmap({ "id": "foo", "bar": { "id": "baz" } - }, "http://example.com") + }), "http://example.com") self.assertEqual({'id': 'http://example.com/#foo', 'bar': { 'id': 'http://example.com/#foo/baz'}, @@ -294,12 +298,12 @@ def test_scoped_id(self): g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) - ra, _ = ldr.resolve_all({ + ra, _ = ldr.resolve_all(cmap({ "location": "foo", "bar": { "location": "baz" } - }, "http://example.com", checklinks=False) + }), "http://example.com", checklinks=False) self.assertEqual({'location': 'http://example.com/foo', 'bar': { 'location': 'http://example.com/baz'}, @@ -308,12 +312,12 @@ def test_scoped_id(self): g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) - ra, _ = ldr.resolve_all({ + ra, _ = ldr.resolve_all(cmap({ "id": "foo", "bar": { "location": "baz" } - }, "http://example.com", checklinks=False) + }), "http://example.com", checklinks=False) self.assertEqual({'id': 'http://example.com/#foo', 'bar': { 'location': 'http://example.com/baz'}, @@ -322,12 +326,12 @@ def test_scoped_id(self): g = makerdf(None, ra, ctx) print(g.serialize(format="n3")) - ra, _ = ldr.resolve_all({ + ra, _ = ldr.resolve_all(cmap({ "location": "foo", "bar": { "id": "baz" } - }, "http://example.com", checklinks=False) + }), "http://example.com", checklinks=False) self.assertEqual({'location': 'http://example.com/foo', 'bar': { 'id': 'http://example.com/#baz'}, @@ -338,19 +342,19 @@ def test_scoped_id(self): def test_mixin(self): ldr = schema_salad.ref_resolver.Loader({}) - ra = ldr.resolve_ref({"$mixin": get_data("tests/mixin.yml"), "one": "five"}, + ra = ldr.resolve_ref(cmap({"$mixin": get_data("tests/mixin.yml"), "one": "five"}), base_url="file://" + os.getcwd() + "/tests/") self.assertEqual({'id': 'four', 'one': 'five'}, ra[0]) ldr = schema_salad.ref_resolver.Loader({"id": "@id"}) base_url = "file://" + os.getcwd() + "/tests/" - ra = ldr.resolve_all([{ + ra = ldr.resolve_all(cmap([{ "id": "a", "m": {"$mixin": get_data("tests/mixin.yml")} }, { "id": "b", "m": {"$mixin": get_data("tests/mixin.yml")} - }], base_url=base_url) + }]), base_url=base_url) self.assertEqual([{ 'id': base_url + '#a', 'm': { @@ -364,5 +368,6 @@ def test_mixin(self): 'one': 'two'} }], ra[0]) + if __name__ == '__main__': unittest.main() diff --git a/schema_salad/tests/test_schema/CommandLineTool.yml b/schema_salad/tests/test_schema/CommandLineTool.yml new file mode 100644 index 000000000..181c51cf3 --- /dev/null +++ b/schema_salad/tests/test_schema/CommandLineTool.yml @@ -0,0 +1,894 @@ +$base: "https://w3id.org/cwl/cwl#" + +$namespaces: + cwl: "https://w3id.org/cwl/cwl#" + +$graph: + +- name: CommandLineToolDoc + type: documentation + doc: + - | + # Common Workflow Language (CWL) Command Line Tool Description, v1.0 + + This version: + * https://w3id.org/cwl/v1.0/ + + Current version: + * https://w3id.org/cwl/ + - "\n\n" + - {$include: contrib.md} + - "\n\n" + - | + # Abstract + + A Command Line Tool is a non-interactive executable program that reads + some input, performs a computation, and terminates after producing some + output. Command line programs are a flexible unit of code sharing and + reuse, unfortunately the syntax and input/output semantics among command + line programs is extremely heterogeneous. A common layer for describing + the syntax and semantics of programs can reduce this incidental + complexity by providing a consistent way to connect programs together. + This specification defines the Common Workflow Language (CWL) Command + Line Tool Description, a vendor-neutral standard for describing the + syntax and input/output semantics of command line programs. + + - {$include: intro.md} + + - | + ## Introduction to v1.0 + + This specification represents the first full release from the CWL group. + Since draft-3, version 1.0 introduces the following changes and additions: + + * The [Directory](#Directory) type. + * Syntax simplifcations: denoted by the `map<>` syntax. Example: inputs + contains a list of items, each with an id. Now one can specify + a mapping of that identifier to the corresponding + `CommandInputParamater`. + ``` + inputs: + - id: one + type: string + doc: First input parameter + - id: two + type: int + doc: Second input parameter + ``` + can be + ``` + inputs: + one: + type: string + doc: First input parameter + two: + type: int + doc: Second input parameter + ``` + * [InitialWorkDirRequirement](#InitialWorkDirRequirement): list of + files and subdirectories to be present in the output directory prior + to execution. + * Shortcuts for specifying the standard [output](#stdout) and/or + [error](#stderr) streams as a (streamable) File output. + * [SoftwareRequirement](#SoftwareRequirement) for describing software + dependencies of a tool. + * The common `description` field has been renamed to `doc`. + + ## Errata + + Post v1.0 release changes to the spec. + + * 13 July 2016: Mark `baseCommand` as optional and update descriptive text. + + ## Purpose + + Standalone programs are a flexible and interoperable form of code reuse. + Unlike monolithic applications, applications and analysis workflows which + are composed of multiple separate programs can be written in multiple + languages and execute concurrently on multiple hosts. However, POSIX + does not dictate computer-readable grammar or semantics for program input + and output, resulting in extremely heterogeneous command line grammar and + input/output semantics among program. This is a particular problem in + distributed computing (multi-node compute clusters) and virtualized + environments (such as Docker containers) where it is often necessary to + provision resources such as input files before executing the program. + + Often this gap is filled by hard coding program invocation and + implicitly assuming requirements will be met, or abstracting program + invocation with wrapper scripts or descriptor documents. Unfortunately, + where these approaches are application or platform specific it creates a + significant barrier to reproducibility and portability, as methods + developed for one platform must be manually ported to be used on new + platforms. Similarly it creates redundant work, as wrappers for popular + tools must be rewritten for each application or platform in use. + + The Common Workflow Language Command Line Tool Description is designed to + provide a common standard description of grammar and semantics for + invoking programs used in data-intensive fields such as Bioinformatics, + Chemistry, Physics, Astronomy, and Statistics. This specification + defines a precise data and execution model for Command Line Tools that + can be implemented on a variety of computing platforms, ranging from a + single workstation to cluster, grid, cloud, and high performance + computing platforms. + + - {$include: concepts.md} + - {$include: invocation.md} + + +- type: record + name: EnvironmentDef + doc: | + Define an environment variable that will be set in the runtime environment + by the workflow platform when executing the command line tool. May be the + result of executing an expression, such as getting a parameter from input. + fields: + - name: envName + type: string + doc: The environment variable name + - name: envValue + type: [string, Expression] + doc: The environment variable value + +- type: record + name: CommandLineBinding + extends: InputBinding + doc: | + + When listed under `inputBinding` in the input schema, the term + "value" refers to the the corresponding value in the input object. For + binding objects listed in `CommandLineTool.arguments`, the term "value" + refers to the effective value after evaluating `valueFrom`. + + The binding behavior when building the command line depends on the data + type of the value. If there is a mismatch between the type described by + the input schema and the effective value, such as resulting from an + expression evaluation, an implementation must use the data type of the + effective value. + + - **string**: Add `prefix` and the string to the command line. + + - **number**: Add `prefix` and decimal representation to command line. + + - **boolean**: If true, add `prefix` to the command line. If false, add + nothing. + + - **File**: Add `prefix` and the value of + [`File.path`](#File) to the command line. + + - **array**: If `itemSeparator` is specified, add `prefix` and the join + the array into a single string with `itemSeparator` separating the + items. Otherwise first add `prefix`, then recursively process + individual elements. + + - **object**: Add `prefix` only, and recursively add object fields for + which `inputBinding` is specified. + + - **null**: Add nothing. + + fields: + - name: position + type: int? + doc: "The sorting key. Default position is 0." + - name: prefix + type: string? + doc: "Command line prefix to add before the value." + - name: separate + type: boolean? + doc: | + If true (default), then the prefix and value must be added as separate + command line arguments; if false, prefix and value must be concatenated + into a single command line argument. + - name: itemSeparator + type: string? + doc: | + Join the array elements into a single string with the elements + separated by by `itemSeparator`. + - name: valueFrom + type: + - "null" + - string + - Expression + jsonldPredicate: "cwl:valueFrom" + doc: | + If `valueFrom` is a constant string value, use this as the value and + apply the binding rules above. + + If `valueFrom` is an expression, evaluate the expression to yield the + actual value to use to build the command line and apply the binding + rules above. If the inputBinding is associated with an input + parameter, the value of `self` in the expression will be the value of the + input parameter. + + When a binding is part of the `CommandLineTool.arguments` field, + the `valueFrom` field is required. + - name: shellQuote + type: boolean? + doc: | + If `ShellCommandRequirement` is in the requirements for the current command, + this controls whether the value is quoted on the command line (default is true). + Use `shellQuote: false` to inject metacharacters for operations such as pipes. + +- type: record + name: CommandOutputBinding + extends: OutputBinding + doc: | + Describes how to generate an output parameter based on the files produced + by a CommandLineTool. + + The output parameter is generated by applying these operations in + the following order: + + - glob + - loadContents + - outputEval + fields: + - name: glob + type: + - "null" + - string + - Expression + - type: array + items: string + doc: | + Find files relative to the output directory, using POSIX glob(3) + pathname matching. If an array is provided, find files that match any + pattern in the array. If an expression is provided, the expression must + return a string or an array of strings, which will then be evaluated as + one or more glob patterns. Must only match and return files which + actually exist. + - name: loadContents + type: + - "null" + - boolean + jsonldPredicate: "cwl:loadContents" + doc: | + For each file matched in `glob`, read up to + the first 64 KiB of text from the file and place it in the `contents` + field of the file object for manipulation by `outputEval`. + - name: outputEval + type: + - "null" + - string + - Expression + doc: | + Evaluate an expression to generate the output value. If `glob` was + specified, the value of `self` must be an array containing file objects + that were matched. If no files were matched, `self` must be a zero + length array; if a single file was matched, the value of `self` is an + array of a single element. Additionally, if `loadContents` is `true`, + the File objects must include up to the first 64 KiB of file contents + in the `contents` field. + + +- name: CommandInputRecordField + type: record + extends: InputRecordField + specialize: + - specializeFrom: InputRecordSchema + specializeTo: CommandInputRecordSchema + - specializeFrom: InputEnumSchema + specializeTo: CommandInputEnumSchema + - specializeFrom: InputArraySchema + specializeTo: CommandInputArraySchema + - specializeFrom: InputBinding + specializeTo: CommandLineBinding + + +- name: CommandInputRecordSchema + type: record + extends: InputRecordSchema + specialize: + - specializeFrom: InputRecordField + specializeTo: CommandInputRecordField + + +- name: CommandInputEnumSchema + type: record + extends: InputEnumSchema + specialize: + - specializeFrom: InputBinding + specializeTo: CommandLineBinding + + +- name: CommandInputArraySchema + type: record + extends: InputArraySchema + specialize: + - specializeFrom: InputRecordSchema + specializeTo: CommandInputRecordSchema + - specializeFrom: InputEnumSchema + specializeTo: CommandInputEnumSchema + - specializeFrom: InputArraySchema + specializeTo: CommandInputArraySchema + - specializeFrom: InputBinding + specializeTo: CommandLineBinding + + +- name: CommandOutputRecordField + type: record + extends: OutputRecordField + specialize: + - specializeFrom: OutputRecordSchema + specializeTo: CommandOutputRecordSchema + - specializeFrom: OutputEnumSchema + specializeTo: CommandOutputEnumSchema + - specializeFrom: OutputArraySchema + specializeTo: CommandOutputArraySchema + - specializeFrom: OutputBinding + specializeTo: CommandOutputBinding + + +- name: CommandOutputRecordSchema + type: record + extends: OutputRecordSchema + specialize: + - specializeFrom: OutputRecordField + specializeTo: CommandOutputRecordField + + +- name: CommandOutputEnumSchema + type: record + extends: OutputEnumSchema + specialize: + - specializeFrom: OutputRecordSchema + specializeTo: CommandOutputRecordSchema + - specializeFrom: OutputEnumSchema + specializeTo: CommandOutputEnumSchema + - specializeFrom: OutputArraySchema + specializeTo: CommandOutputArraySchema + - specializeFrom: OutputBinding + specializeTo: CommandOutputBinding + + +- name: CommandOutputArraySchema + type: record + extends: OutputArraySchema + specialize: + - specializeFrom: OutputRecordSchema + specializeTo: CommandOutputRecordSchema + - specializeFrom: OutputEnumSchema + specializeTo: CommandOutputEnumSchema + - specializeFrom: OutputArraySchema + specializeTo: CommandOutputArraySchema + - specializeFrom: OutputBinding + specializeTo: CommandOutputBinding + + +- type: record + name: CommandInputParameter + extends: InputParameter + doc: An input parameter for a CommandLineTool. + specialize: + - specializeFrom: InputRecordSchema + specializeTo: CommandInputRecordSchema + - specializeFrom: InputEnumSchema + specializeTo: CommandInputEnumSchema + - specializeFrom: InputArraySchema + specializeTo: CommandInputArraySchema + - specializeFrom: InputBinding + specializeTo: CommandLineBinding + +- type: record + name: CommandOutputParameter + extends: OutputParameter + doc: An output parameter for a CommandLineTool. + specialize: + - specializeFrom: OutputBinding + specializeTo: CommandOutputBinding + fields: + - name: type + type: + - "null" + - CWLType + - stdout + - stderr + - CommandOutputRecordSchema + - CommandOutputEnumSchema + - CommandOutputArraySchema + - string + - type: array + items: + - CWLType + - CommandOutputRecordSchema + - CommandOutputEnumSchema + - CommandOutputArraySchema + - string + jsonldPredicate: + "_id": "sld:type" + "_type": "@vocab" + refScope: 2 + typeDSL: True + doc: | + Specify valid types of data that may be assigned to this parameter. + +- name: stdout + type: enum + symbols: [ "cwl:stdout" ] + docParent: "#CommandOutputParameter" + doc: | + Only valid as a `type` for a `CommandLineTool` output with no + `outputBinding` set. + + The following + ``` + outputs: + an_output_name: + type: stdout + + stdout: a_stdout_file + ``` + is equivalent to + ``` + outputs: + an_output_name: + type: File + streamable: true + outputBinding: + glob: a_stdout_file + + stdout: a_stdout_file + ``` + + If there is no `stdout` name provided, a random filename will be created. + For example, the following + ``` + outputs: + an_output_name: + type: stdout + ``` + is equivalent to + ``` + outputs: + an_output_name: + type: File + streamable: true + outputBinding: + glob: random_stdout_filenameABCDEFG + + stdout: random_stdout_filenameABCDEFG + ``` + + +- name: stderr + type: enum + symbols: [ "cwl:stderr" ] + docParent: "#CommandOutputParameter" + doc: | + Only valid as a `type` for a `CommandLineTool` output with no + `outputBinding` set. + + The following + ``` + outputs: + an_output_name: + type: stderr + + stderr: a_stderr_file + ``` + is equivalent to + ``` + outputs: + an_output_name: + type: File + streamable: true + outputBinding: + glob: a_stderr_file + + stderr: a_stderr_file + ``` + + If there is no `stderr` name provided, a random filename will be created. + For example, the following + ``` + outputs: + an_output_name: + type: stderr + ``` + is equivalent to + ``` + outputs: + an_output_name: + type: File + streamable: true + outputBinding: + glob: random_stderr_filenameABCDEFG + + stderr: random_stderr_filenameABCDEFG + ``` + + +- type: record + name: CommandLineTool + extends: Process + documentRoot: true + specialize: + - specializeFrom: InputParameter + specializeTo: CommandInputParameter + - specializeFrom: OutputParameter + specializeTo: CommandOutputParameter + doc: | + This defines the schema of the CWL Command Line Tool Description document. + + fields: + - name: class + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + type: string + - name: baseCommand + doc: | + Specifies the program to execute. If an array, the first element of + the array is the command to execute, and subsequent elements are + mandatory command line arguments. The elements in `baseCommand` must + appear before any command line bindings from `inputBinding` or + `arguments`. + + If `baseCommand` is not provided or is an empty array, the first + element of the command line produced after processing `inputBinding` or + `arguments` must be used as the program to execute. + + If the program includes a path separator character it must + be an absolute path, otherwise it is an error. If the program does not + include a path separator, search the `$PATH` variable in the runtime + environment of the workflow runner find the absolute path of the + executable. + type: + - string? + - string[]? + jsonldPredicate: + "_id": "cwl:baseCommand" + "_container": "@list" + - name: arguments + doc: | + Command line bindings which are not directly associated with input parameters. + type: + - "null" + - type: array + items: [string, Expression, CommandLineBinding] + jsonldPredicate: + "_id": "cwl:arguments" + "_container": "@list" + - name: stdin + type: ["null", string, Expression] + doc: | + A path to a file whose contents must be piped into the command's + standard input stream. + - name: stderr + type: ["null", string, Expression] + jsonldPredicate: "https://w3id.org/cwl/cwl#stderr" + doc: | + Capture the command's standard error stream to a file written to + the designated output directory. + + If `stderr` is a string, it specifies the file name to use. + + If `stderr` is an expression, the expression is evaluated and must + return a string with the file name to use to capture stderr. If the + return value is not a string, or the resulting path contains illegal + characters (such as the path separator `/`) it is an error. + - name: stdout + type: ["null", string, Expression] + jsonldPredicate: "https://w3id.org/cwl/cwl#stdout" + doc: | + Capture the command's standard output stream to a file written to + the designated output directory. + + If `stdout` is a string, it specifies the file name to use. + + If `stdout` is an expression, the expression is evaluated and must + return a string with the file name to use to capture stdout. If the + return value is not a string, or the resulting path contains illegal + characters (such as the path separator `/`) it is an error. + - name: successCodes + type: int[]? + doc: | + Exit codes that indicate the process completed successfully. + + - name: temporaryFailCodes + type: int[]? + doc: | + Exit codes that indicate the process failed due to a possibly + temporary condition, where executing the process with the same + runtime environment and inputs may produce different results. + + - name: permanentFailCodes + type: int[]? + doc: + Exit codes that indicate the process failed due to a permanent logic + error, where executing the process with the same runtime environment and + same inputs is expected to always fail. + + +- type: record + name: DockerRequirement + extends: ProcessRequirement + doc: | + Indicates that a workflow component should be run in a + [Docker](http://docker.com) container, and specifies how to fetch or build + the image. + + If a CommandLineTool lists `DockerRequirement` under + `hints` (or `requirements`), it may (or must) be run in the specified Docker + container. + + The platform must first acquire or install the correct Docker image as + specified by `dockerPull`, `dockerImport`, `dockerLoad` or `dockerFile`. + + The platform must execute the tool in the container using `docker run` with + the appropriate Docker image and tool command line. + + The workflow platform may provide input files and the designated output + directory through the use of volume bind mounts. The platform may rewrite + file paths in the input object to correspond to the Docker bind mounted + locations. + + When running a tool contained in Docker, the workflow platform must not + assume anything about the contents of the Docker container, such as the + presence or absence of specific software, except to assume that the + generated command line represents a valid command within the runtime + environment of the container. + + ## Interaction with other requirements + + If [EnvVarRequirement](#EnvVarRequirement) is specified alongside a + DockerRequirement, the environment variables must be provided to Docker + using `--env` or `--env-file` and interact with the container's preexisting + environment as defined by Docker. + + fields: + - name: class + type: string + doc: "Always 'DockerRequirement'" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + - name: dockerPull + type: string? + doc: "Specify a Docker image to retrieve using `docker pull`." + - name: dockerLoad + type: string? + doc: "Specify a HTTP URL from which to download a Docker image using `docker load`." + - name: dockerFile + type: string? + doc: "Supply the contents of a Dockerfile which will be built using `docker build`." + - name: dockerImport + type: string? + doc: "Provide HTTP URL to download and gunzip a Docker images using `docker import." + - name: dockerImageId + type: string? + doc: | + The image id that will be used for `docker run`. May be a + human-readable image name or the image identifier hash. May be skipped + if `dockerPull` is specified, in which case the `dockerPull` image id + must be used. + - name: dockerOutputDirectory + type: string? + doc: | + Set the designated output directory to a specific location inside the + Docker container. + + +- type: record + name: SoftwareRequirement + extends: ProcessRequirement + doc: | + A list of software packages that should be configured in the environment of + the defined process. + fields: + - name: class + type: string + doc: "Always 'SoftwareRequirement'" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + - name: packages + type: SoftwarePackage[] + doc: "The list of software to be configured." + jsonldPredicate: + mapSubject: package + mapPredicate: specs + +- name: SoftwarePackage + type: record + fields: + - name: package + type: string + doc: "The common name of the software to be configured." + - name: version + type: string[]? + doc: "The (optional) version of the software to configured." + - name: specs + type: string[]? + doc: | + Must be one or more IRIs identifying resources for installing or + enabling the software. Implementations may provide resolvers which map + well-known software spec IRIs to some configuration action. + + For example, an IRI `https://packages.debian.org/jessie/bowtie` could + be resolved with `apt-get install bowtie`. An IRI + `https://anaconda.org/bioconda/bowtie` could be resolved with `conda + install -c bioconda bowtie`. + + Tools may also provide IRIs to index entries such as + [RRID](http://www.identifiers.org/rrid/), such as + `http://identifiers.org/rrid/RRID:SCR_005476` + + +- name: Dirent + type: record + doc: | + Define a file or subdirectory that must be placed in the designated output + directory prior to executing the command line tool. May be the result of + executing an expression, such as building a configuration file from a + template. + fields: + - name: entryname + type: ["null", string, Expression] + jsonldPredicate: + _id: cwl:entryname + doc: | + The name of the file or subdirectory to create in the output directory. + If `entry` is a File or Directory, this overrides `basename`. Optional. + - name: entry + type: [string, Expression] + jsonldPredicate: + _id: cwl:entry + doc: | + If the value is a string literal or an expression which evaluates to a + string, a new file must be created with the string as the file contents. + + If the value is an expression that evaluates to a `File` object, this + indicates the referenced file should be added to the designated output + directory prior to executing the tool. + + If the value is an expression that evaluates to a `Dirent` object, this + indicates that the File or Directory in `entry` should be added to the + designated output directory with the name in `entryname`. + + If `writable` is false, the file may be made available using a bind + mount or file system link to avoid unnecessary copying of the input + file. + - name: writable + type: boolean? + doc: | + If true, the file or directory must be writable by the tool. Changes + to the file or directory must be isolated and not visible by any other + CommandLineTool process. This may be implemented by making a copy of + the original file or directory. Default false (files and directories + read-only by default). + + +- name: InitialWorkDirRequirement + type: record + extends: ProcessRequirement + doc: + Define a list of files and subdirectories that must be created by the + workflow platform in the designated output directory prior to executing the + command line tool. + fields: + - name: class + type: string + doc: InitialWorkDirRequirement + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + - name: listing + type: + - type: array + items: [File, Directory, Dirent, string, Expression] + - string + - Expression + jsonldPredicate: + _id: "cwl:listing" + doc: | + The list of files or subdirectories that must be placed in the + designated output directory prior to executing the command line tool. + + May be an expression. If so, the expression return value must validate + as `{type: array, items: [File, Directory]}`. + + +- name: EnvVarRequirement + type: record + extends: ProcessRequirement + doc: | + Define a list of environment variables which will be set in the + execution environment of the tool. See `EnvironmentDef` for details. + fields: + - name: class + type: string + doc: "Always 'EnvVarRequirement'" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + - name: envDef + type: EnvironmentDef[] + doc: The list of environment variables. + jsonldPredicate: + mapSubject: envName + mapPredicate: envValue + + +- type: record + name: ShellCommandRequirement + extends: ProcessRequirement + doc: | + Modify the behavior of CommandLineTool to generate a single string + containing a shell command line. Each item in the argument list must be + joined into a string separated by single spaces and quoted to prevent + intepretation by the shell, unless `CommandLineBinding` for that argument + contains `shellQuote: false`. If `shellQuote: false` is specified, the + argument is joined into the command string without quoting, which allows + the use of shell metacharacters such as `|` for pipes. + fields: + - name: class + type: string + doc: "Always 'ShellCommandRequirement'" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + + +- type: record + name: ResourceRequirement + extends: ProcessRequirement + doc: | + Specify basic hardware resource requirements. + + "min" is the minimum amount of a resource that must be reserved to schedule + a job. If "min" cannot be satisfied, the job should not be run. + + "max" is the maximum amount of a resource that the job shall be permitted + to use. If a node has sufficient resources, multiple jobs may be scheduled + on a single node provided each job's "max" resource requirements are + met. If a job attempts to exceed its "max" resource allocation, an + implementation may deny additional resources, which may result in job + failure. + + If "min" is specified but "max" is not, then "max" == "min" + If "max" is specified by "min" is not, then "min" == "max". + + It is an error if max < min. + + It is an error if the value of any of these fields is negative. + + If neither "min" nor "max" is specified for a resource, an implementation may provide a default. + + fields: + - name: class + type: string + doc: "Always 'ResourceRequirement'" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + - name: coresMin + type: ["null", long, string, Expression] + doc: Minimum reserved number of CPU cores + + - name: coresMax + type: ["null", int, string, Expression] + doc: Maximum reserved number of CPU cores + + - name: ramMin + type: ["null", long, string, Expression] + doc: Minimum reserved RAM in mebibytes (2**20) + + - name: ramMax + type: ["null", long, string, Expression] + doc: Maximum reserved RAM in mebibytes (2**20) + + - name: tmpdirMin + type: ["null", long, string, Expression] + doc: Minimum reserved filesystem based storage for the designated temporary directory, in mebibytes (2**20) + + - name: tmpdirMax + type: ["null", long, string, Expression] + doc: Maximum reserved filesystem based storage for the designated temporary directory, in mebibytes (2**20) + + - name: outdirMin + type: ["null", long, string, Expression] + doc: Minimum reserved filesystem based storage for the designated output directory, in mebibytes (2**20) + + - name: outdirMax + type: ["null", long, string, Expression] + doc: Maximum reserved filesystem based storage for the designated output directory, in mebibytes (2**20) diff --git a/schema_salad/tests/test_schema/CommonWorkflowLanguage.yml b/schema_salad/tests/test_schema/CommonWorkflowLanguage.yml new file mode 100644 index 000000000..73921e899 --- /dev/null +++ b/schema_salad/tests/test_schema/CommonWorkflowLanguage.yml @@ -0,0 +1,11 @@ +$base: "https://w3id.org/cwl/cwl#" + +$namespaces: + cwl: "https://w3id.org/cwl/cwl#" + sld: "https://w3id.org/cwl/salad#" + +$graph: + +- $import: Process.yml +- $import: CommandLineTool.yml +- $import: Workflow.yml diff --git a/schema_salad/tests/test_schema/Process.yml b/schema_salad/tests/test_schema/Process.yml new file mode 100644 index 000000000..8b9bce5f0 --- /dev/null +++ b/schema_salad/tests/test_schema/Process.yml @@ -0,0 +1,743 @@ +$base: "https://w3id.org/cwl/cwl#" + +$namespaces: + cwl: "https://w3id.org/cwl/cwl#" + sld: "https://w3id.org/cwl/salad#" + +$graph: + +- name: "Common Workflow Language, v1.0" + type: documentation + doc: {$include: concepts.md} + +- $import: "metaschema_base.yml" + +- name: BaseTypesDoc + type: documentation + doc: | + ## Base types + docChild: + - "#CWLType" + - "#Process" + +- type: enum + name: CWLVersion + doc: "Version symbols for published CWL document versions." + symbols: + - cwl:draft-2 + - cwl:draft-3.dev1 + - cwl:draft-3.dev2 + - cwl:draft-3.dev3 + - cwl:draft-3.dev4 + - cwl:draft-3.dev5 + - cwl:draft-3 + - cwl:draft-4.dev1 + - cwl:draft-4.dev2 + - cwl:draft-4.dev3 + - cwl:v1.0.dev4 + - cwl:v1.0 + +- name: CWLType + type: enum + extends: "sld:PrimitiveType" + symbols: + - cwl:File + - cwl:Directory + doc: + - "Extends primitive types with the concept of a file and directory as a builtin type." + - "File: A File object" + - "Directory: A Directory object" + +- name: File + type: record + docParent: "#CWLType" + doc: | + Represents a file (or group of files if `secondaryFiles` is specified) that + must be accessible by tools using standard POSIX file system call API such as + open(2) and read(2). + fields: + - name: class + type: + type: enum + name: File_class + symbols: + - cwl:File + jsonldPredicate: + _id: "@type" + _type: "@vocab" + doc: Must be `File` to indicate this object describes a file. + - name: location + type: string? + doc: | + An IRI that identifies the file resource. This may be a relative + reference, in which case it must be resolved using the base IRI of the + document. The location may refer to a local or remote resource; the + implementation must use the IRI to retrieve file content. If an + implementation is unable to retrieve the file content stored at a + remote resource (due to unsupported protocol, access denied, or other + issue) it must signal an error. + + If the `location` field is not provided, the `contents` field must be + provided. The implementation must assign a unique identifier for + the `location` field. + + If the `path` field is provided but the `location` field is not, an + implementation may assign the value of the `path` field to `location`, + then follow the rules above. + jsonldPredicate: + _id: "@id" + _type: "@id" + - name: path + type: string? + doc: | + The local host path where the File is available when a CommandLineTool is + executed. This field must be set by the implementation. The final + path component must match the value of `basename`. This field + must not be used in any other context. The command line tool being + executed must be able to to access the file at `path` using the POSIX + `open(2)` syscall. + + As a special case, if the `path` field is provided but the `location` + field is not, an implementation may assign the value of the `path` + field to `location`, and remove the `path` field. + + If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02) + (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`, + ``, ``, and ``) or characters + [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml) + for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452) + then implementations may terminate the process with a + `permanentFailure`. + jsonldPredicate: + "_id": "cwl:path" + "_type": "@id" + - name: basename + type: string? + doc: | + The base name of the file, that is, the name of the file without any + leading directory path. The base name must not contain a slash `/`. + + If not provided, the implementation must set this field based on the + `location` field by taking the final path component after parsing + `location` as an IRI. If `basename` is provided, it is not required to + match the value from `location`. + + When this file is made available to a CommandLineTool, it must be named + with `basename`, i.e. the final component of the `path` field must match + `basename`. + jsonldPredicate: "cwl:basename" + - name: dirname + type: string? + doc: | + The name of the directory containing file, that is, the path leading up + to the final slash in the path such that `dirname + '/' + basename == + path`. + + The implementation must set this field based on the value of `path` + prior to evaluating parameter references or expressions in a + CommandLineTool document. This field must not be used in any other + context. + - name: nameroot + type: string? + doc: | + The basename root such that `nameroot + nameext == basename`, and + `nameext` is empty or begins with a period and contains at most one + period. For the purposess of path splitting leading periods on the + basename are ignored; a basename of `.cshrc` will have a nameroot of + `.cshrc`. + + The implementation must set this field automatically based on the value + of `basename` prior to evaluating parameter references or expressions. + - name: nameext + type: string? + doc: | + The basename extension such that `nameroot + nameext == basename`, and + `nameext` is empty or begins with a period and contains at most one + period. Leading periods on the basename are ignored; a basename of + `.cshrc` will have an empty `nameext`. + + The implementation must set this field automatically based on the value + of `basename` prior to evaluating parameter references or expressions. + - name: checksum + type: string? + doc: | + Optional hash code for validating file integrity. Currently must be in the form + "sha1$ + hexadecimal string" using the SHA-1 algorithm. + - name: size + type: long? + doc: Optional file size + - name: "secondaryFiles" + type: + - "null" + - type: array + items: [File, Directory] + jsonldPredicate: "cwl:secondaryFiles" + doc: | + A list of additional files that are associated with the primary file + and must be transferred alongside the primary file. Examples include + indexes of the primary file, or external references which must be + included when loading primary document. A file object listed in + `secondaryFiles` may itself include `secondaryFiles` for which the same + rules apply. + - name: format + type: string? + jsonldPredicate: + _id: cwl:format + _type: "@id" + identity: true + doc: | + The format of the file: this must be an IRI of a concept node that + represents the file format, preferrably defined within an ontology. + If no ontology is available, file formats may be tested by exact match. + + Reasoning about format compatability must be done by checking that an + input file format is the same, `owl:equivalentClass` or + `rdfs:subClassOf` the format required by the input parameter. + `owl:equivalentClass` is transitive with `rdfs:subClassOf`, e.g. if + ` owl:equivalentClass ` and ` owl:subclassOf ` then infer + ` owl:subclassOf `. + + File format ontologies may be provided in the "$schema" metadata at the + root of the document. If no ontologies are specified in `$schema`, the + runtime may perform exact file format matches. + - name: contents + type: string? + doc: | + File contents literal. Maximum of 64 KiB. + + If neither `location` nor `path` is provided, `contents` must be + non-null. The implementation must assign a unique identifier for the + `location` field. When the file is staged as input to CommandLineTool, + the value of `contents` must be written to a file. + + If `loadContents` of `inputBinding` or `outputBinding` is true and + `location` is valid, the implementation must read up to the first 64 + KiB of text from the file and place it in the "contents" field. + + +- name: Directory + type: record + docAfter: "#File" + doc: | + Represents a directory to present to a command line tool. + fields: + - name: class + type: + type: enum + name: Directory_class + symbols: + - cwl:Directory + jsonldPredicate: + _id: "@type" + _type: "@vocab" + doc: Must be `Directory` to indicate this object describes a Directory. + - name: location + type: string? + doc: | + An IRI that identifies the directory resource. This may be a relative + reference, in which case it must be resolved using the base IRI of the + document. The location may refer to a local or remote resource. If + the `listing` field is not set, the implementation must use the + location IRI to retrieve directory listing. If an implementation is + unable to retrieve the directory listing stored at a remote resource (due to + unsupported protocol, access denied, or other issue) it must signal an + error. + + If the `location` field is not provided, the `listing` field must be + provided. The implementation must assign a unique identifier for + the `location` field. + + If the `path` field is provided but the `location` field is not, an + implementation may assign the value of the `path` field to `location`, + then follow the rules above. + jsonldPredicate: + _id: "@id" + _type: "@id" + - name: path + type: string? + doc: | + The local path where the Directory is made available prior to executing a + CommandLineTool. This must be set by the implementation. This field + must not be used in any other context. The command line tool being + executed must be able to to access the directory at `path` using the POSIX + `opendir(2)` syscall. + + If the `path` contains [POSIX shell metacharacters](http://pubs.opengroup.org/onlinepubs/9699919799/utilities/V3_chap02.html#tag_18_02) + (`|`,`&`, `;`, `<`, `>`, `(`,`)`, `$`,`` ` ``, `\`, `"`, `'`, + ``, ``, and ``) or characters + [not allowed](http://www.iana.org/assignments/idna-tables-6.3.0/idna-tables-6.3.0.xhtml) + for [Internationalized Domain Names for Applications](https://tools.ietf.org/html/rfc6452) + then implementations may terminate the process with a + `permanentFailure`. + jsonldPredicate: + _id: "cwl:path" + _type: "@id" + - name: basename + type: string? + doc: | + The base name of the directory, that is, the name of the file without any + leading directory path. The base name must not contain a slash `/`. + + If not provided, the implementation must set this field based on the + `location` field by taking the final path component after parsing + `location` as an IRI. If `basename` is provided, it is not required to + match the value from `location`. + + When this file is made available to a CommandLineTool, it must be named + with `basename`, i.e. the final component of the `path` field must match + `basename`. + jsonldPredicate: "cwl:basename" + - name: listing + type: + - "null" + - type: array + items: [File, Directory] + doc: | + List of files or subdirectories contained in this directory. The name + of each file or subdirectory is determined by the `basename` field of + each `File` or `Directory` object. It is an error if a `File` shares a + `basename` with any other entry in `listing`. If two or more + `Directory` object share the same `basename`, this must be treated as + equivalent to a single subdirectory with the listings recursively + merged. + jsonldPredicate: + _id: "cwl:listing" + +- name: SchemaBase + type: record + abstract: true + fields: + - name: label + type: + - "null" + - string + jsonldPredicate: "rdfs:label" + doc: "A short, human-readable label of this object." + + +- name: Parameter + type: record + extends: SchemaBase + abstract: true + doc: | + Define an input or output parameter to a process. + + fields: + - name: secondaryFiles + type: + - "null" + - string + - Expression + - type: array + items: [string, Expression] + jsonldPredicate: "cwl:secondaryFiles" + doc: | + Only valid when `type: File` or is an array of `items: File`. + + Describes files that must be included alongside the primary file(s). + + If the value is an expression, the value of `self` in the expression + must be the primary input or output File to which this binding applies. + + If the value is a string, it specifies that the following pattern + should be applied to the primary file: + + 1. If string begins with one or more caret `^` characters, for each + caret, remove the last file extension from the path (the last + period `.` and all following characters). If there are no file + extensions, the path is unchanged. + 2. Append the remainder of the string to the end of the file path. + + - name: format + type: + - "null" + - string + - type: array + items: string + - Expression + jsonldPredicate: + _id: cwl:format + _type: "@id" + identity: true + doc: | + Only valid when `type: File` or is an array of `items: File`. + + For input parameters, this must be one or more IRIs of concept nodes + that represents file formats which are allowed as input to this + parameter, preferrably defined within an ontology. If no ontology is + available, file formats may be tested by exact match. + + For output parameters, this is the file format that will be assigned to + the output parameter. + + - name: streamable + type: boolean? + doc: | + Only valid when `type: File` or is an array of `items: File`. + + A value of `true` indicates that the file is read or written + sequentially without seeking. An implementation may use this flag to + indicate whether it is valid to stream file contents using a named + pipe. Default: `false`. + + - name: doc + type: + - string? + - string[]? + doc: "A documentation string for this type, or an array of strings which should be concatenated." + jsonldPredicate: "rdfs:comment" + + +- type: enum + name: Expression + doc: | + 'Expression' is not a real type. It indicates that a field must allow + runtime parameter references. If [InlineJavascriptRequirement](#InlineJavascriptRequirement) + is declared and supported by the platform, the field must also allow + Javascript expressions. + symbols: + - cwl:ExpressionPlaceholder + + +- name: InputBinding + type: record + abstract: true + fields: + - name: loadContents + type: + - "null" + - boolean + jsonldPredicate: "cwl:loadContents" + doc: | + Only valid when `type: File` or is an array of `items: File`. + + Read up to the first 64 KiB of text from the file and place it in the + "contents" field of the file object for use by expressions. + + +- name: OutputBinding + type: record + abstract: true + + +- name: InputSchema + extends: SchemaBase + type: record + abstract: true + + +- name: OutputSchema + extends: SchemaBase + type: record + abstract: true + + +- name: InputRecordField + type: record + extends: "sld:RecordField" + specialize: + - specializeFrom: "sld:RecordSchema" + specializeTo: InputRecordSchema + - specializeFrom: "sld:EnumSchema" + specializeTo: InputEnumSchema + - specializeFrom: "sld:ArraySchema" + specializeTo: InputArraySchema + - specializeFrom: "sld:PrimitiveType" + specializeTo: CWLType + fields: + - name: inputBinding + type: InputBinding? + jsonldPredicate: "cwl:inputBinding" + - name: label + type: string? + jsonldPredicate: "rdfs:label" + doc: "A short, human-readable label of this process object." + + +- name: InputRecordSchema + type: record + extends: ["sld:RecordSchema", InputSchema] + specialize: + - specializeFrom: "sld:RecordField" + specializeTo: InputRecordField + + +- name: InputEnumSchema + type: record + extends: ["sld:EnumSchema", InputSchema] + fields: + - name: inputBinding + type: InputBinding? + jsonldPredicate: "cwl:inputBinding" + + +- name: InputArraySchema + type: record + extends: ["sld:ArraySchema", InputSchema] + specialize: + - specializeFrom: "sld:RecordSchema" + specializeTo: InputRecordSchema + - specializeFrom: "sld:EnumSchema" + specializeTo: InputEnumSchema + - specializeFrom: "sld:ArraySchema" + specializeTo: InputArraySchema + - specializeFrom: "sld:PrimitiveType" + specializeTo: CWLType + fields: + - name: inputBinding + type: InputBinding? + jsonldPredicate: "cwl:inputBinding" + + +- name: OutputRecordField + type: record + extends: "sld:RecordField" + specialize: + - specializeFrom: "sld:RecordSchema" + specializeTo: OutputRecordSchema + - specializeFrom: "sld:EnumSchema" + specializeTo: OutputEnumSchema + - specializeFrom: "sld:ArraySchema" + specializeTo: OutputArraySchema + - specializeFrom: "sld:PrimitiveType" + specializeTo: CWLType + fields: + - name: outputBinding + type: OutputBinding? + jsonldPredicate: "cwl:outputBinding" + + +- name: OutputRecordSchema + type: record + extends: ["sld:RecordSchema", "#OutputSchema"] + docParent: "#OutputParameter" + specialize: + - specializeFrom: "sld:RecordField" + specializeTo: OutputRecordField + + +- name: OutputEnumSchema + type: record + extends: ["sld:EnumSchema", OutputSchema] + docParent: "#OutputParameter" + fields: + - name: outputBinding + type: OutputBinding? + jsonldPredicate: "cwl:outputBinding" + +- name: OutputArraySchema + type: record + extends: ["sld:ArraySchema", OutputSchema] + docParent: "#OutputParameter" + specialize: + - specializeFrom: "sld:RecordSchema" + specializeTo: OutputRecordSchema + - specializeFrom: "sld:EnumSchema" + specializeTo: OutputEnumSchema + - specializeFrom: "sld:ArraySchema" + specializeTo: OutputArraySchema + - specializeFrom: "sld:PrimitiveType" + specializeTo: CWLType + fields: + - name: outputBinding + type: OutputBinding? + jsonldPredicate: "cwl:outputBinding" + + +- name: InputParameter + type: record + extends: Parameter + fields: + - name: id + type: string + jsonldPredicate: "@id" + doc: "The unique identifier for this parameter object." + + - name: inputBinding + type: InputBinding? + jsonldPredicate: "cwl:inputBinding" + doc: | + Describes how to handle the inputs of a process and convert them + into a concrete form for execution, such as command line parameters. + + - name: default + type: Any? + jsonldPredicate: "cwl:default" + doc: | + The default value for this parameter if not provided in the input + object. + + - name: type + type: + - "null" + - CWLType + - InputRecordSchema + - InputEnumSchema + - InputArraySchema + - string + - type: array + items: + - CWLType + - InputRecordSchema + - InputEnumSchema + - InputArraySchema + - string + jsonldPredicate: + "_id": "sld:type" + "_type": "@vocab" + refScope: 2 + typeDSL: True + doc: | + Specify valid types of data that may be assigned to this parameter. + +- name: OutputParameter + type: record + extends: Parameter + fields: + - name: id + type: string + jsonldPredicate: "@id" + doc: "The unique identifier for this parameter object." + - name: outputBinding + type: OutputBinding? + jsonldPredicate: "cwl:outputBinding" + doc: | + Describes how to handle the outputs of a process. + + +- type: record + name: ProcessRequirement + abstract: true + doc: | + A process requirement declares a prerequisite that may or must be fulfilled + before executing a process. See [`Process.hints`](#process) and + [`Process.requirements`](#process). + + Process requirements are the primary mechanism for specifying extensions to + the CWL core specification. + + +- type: record + name: Process + abstract: true + doc: | + + The base executable type in CWL is the `Process` object defined by the + document. Note that the `Process` object is abstract and cannot be + directly executed. + + fields: + - name: id + type: string? + jsonldPredicate: "@id" + doc: "The unique identifier for this process object." + - name: inputs + type: + type: array + items: InputParameter + jsonldPredicate: + _id: "cwl:inputs" + mapSubject: id + mapPredicate: type + doc: | + Defines the input parameters of the process. The process is ready to + run when all required input parameters are associated with concrete + values. Input parameters include a schema for each parameter which is + used to validate the input object. It may also be used to build a user + interface for constructing the input object. + - name: outputs + type: + type: array + items: OutputParameter + jsonldPredicate: + _id: "cwl:outputs" + mapSubject: id + mapPredicate: type + doc: | + Defines the parameters representing the output of the process. May be + used to generate and/or validate the output object. + - name: requirements + type: ProcessRequirement[]? + jsonldPredicate: + _id: "cwl:requirements" + mapSubject: class + doc: | + Declares requirements that apply to either the runtime environment or the + workflow engine that must be met in order to execute this process. If + an implementation cannot satisfy all requirements, or a requirement is + listed which is not recognized by the implementation, it is a fatal + error and the implementation must not attempt to run the process, + unless overridden at user option. + - name: hints + type: Any[]? + doc: | + Declares hints applying to either the runtime environment or the + workflow engine that may be helpful in executing this process. It is + not an error if an implementation cannot satisfy all hints, however + the implementation may report a warning. + jsonldPredicate: + _id: cwl:hints + noLinkCheck: true + mapSubject: class + - name: label + type: string? + jsonldPredicate: "rdfs:label" + doc: "A short, human-readable label of this process object." + - name: doc + type: string? + jsonldPredicate: "rdfs:comment" + doc: "A long, human-readable description of this process object." + - name: cwlVersion + type: CWLVersion? + doc: | + CWL document version. Always required at the document root. Not + required for a Process embedded inside another Process. + jsonldPredicate: + "_id": "cwl:cwlVersion" + "_type": "@vocab" + +- name: InlineJavascriptRequirement + type: record + extends: ProcessRequirement + doc: | + Indicates that the workflow platform must support inline Javascript expressions. + If this requirement is not present, the workflow platform must not perform expression + interpolatation. + fields: + - name: class + type: string + doc: "Always 'InlineJavascriptRequirement'" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + - name: expressionLib + type: string[]? + doc: | + Additional code fragments that will also be inserted + before executing the expression code. Allows for function definitions that may + be called from CWL expressions. + + +- name: SchemaDefRequirement + type: record + extends: ProcessRequirement + doc: | + This field consists of an array of type definitions which must be used when + interpreting the `inputs` and `outputs` fields. When a `type` field + contain a IRI, the implementation must check if the type is defined in + `schemaDefs` and use that definition. If the type is not found in + `schemaDefs`, it is an error. The entries in `schemaDefs` must be + processed in the order listed such that later schema definitions may refer + to earlier schema definitions. + fields: + - name: class + type: string + doc: "Always 'SchemaDefRequirement'" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + - name: types + type: + type: array + items: InputSchema + doc: The list of type definitions. diff --git a/schema_salad/tests/test_schema/Workflow.yml b/schema_salad/tests/test_schema/Workflow.yml new file mode 100644 index 000000000..26bde8e29 --- /dev/null +++ b/schema_salad/tests/test_schema/Workflow.yml @@ -0,0 +1,582 @@ +$base: "https://w3id.org/cwl/cwl#" + +$namespaces: + cwl: "https://w3id.org/cwl/cwl#" + +$graph: + +- name: "WorkflowDoc" + type: documentation + doc: + - | + # Common Workflow Language (CWL) Workflow Description, v1.0 + + This version: + * https://w3id.org/cwl/v1.0/ + + Current version: + * https://w3id.org/cwl/ + - "\n\n" + - {$include: contrib.md} + - "\n\n" + - | + # Abstract + + One way to define a workflow is: an analysis task represented by a + directed graph describing a sequence of operations that transform an + input data set to output. This specification defines the Common Workflow + Language (CWL) Workflow description, a vendor-neutral standard for + representing workflows intended to be portable across a variety of + computing platforms. + + - {$include: intro.md} + + - | + + ## Introduction to v1.0 + + This specification represents the first full release from the CWL group. + Since draft-3, this draft introduces the following changes and additions: + + * The `inputs` and `outputs` fields have been renamed `in` and `out`. + * Syntax simplifcations: denoted by the `map<>` syntax. Example: `in` + contains a list of items, each with an id. Now one can specify + a mapping of that identifier to the corresponding + `InputParameter`. + ``` + in: + - id: one + type: string + doc: First input parameter + - id: two + type: int + doc: Second input parameter + ``` + can be + ``` + in: + one: + type: string + doc: First input parameter + two: + type: int + doc: Second input parameter + ``` + * The common field `description` has been renamed to `doc`. + + ## Purpose + + The Common Workflow Language Command Line Tool Description express + workflows for data-intensive science, such as Bioinformatics, Chemistry, + Physics, and Astronomy. This specification is intended to define a data + and execution model for Workflows that can be implemented on top of a + variety of computing platforms, ranging from an individual workstation to + cluster, grid, cloud, and high performance computing systems. + + - {$include: concepts.md} + +- name: ExpressionToolOutputParameter + type: record + extends: OutputParameter + fields: + - name: type + type: + - "null" + - "#CWLType" + - "#OutputRecordSchema" + - "#OutputEnumSchema" + - "#OutputArraySchema" + - string + - type: array + items: + - "#CWLType" + - "#OutputRecordSchema" + - "#OutputEnumSchema" + - "#OutputArraySchema" + - string + jsonldPredicate: + "_id": "sld:type" + "_type": "@vocab" + refScope: 2 + typeDSL: True + doc: | + Specify valid types of data that may be assigned to this parameter. + +- type: record + name: ExpressionTool + extends: Process + specialize: + - specializeFrom: "#OutputParameter" + specializeTo: "#ExpressionToolOutputParameter" + documentRoot: true + doc: | + Execute an expression as a Workflow step. + fields: + - name: "class" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + type: string + - name: expression + type: [string, Expression] + doc: | + The expression to execute. The expression must return a JSON object which + matches the output parameters of the ExpressionTool. + +- name: LinkMergeMethod + type: enum + docParent: "#WorkflowStepInput" + doc: The input link merge method, described in [WorkflowStepInput](#WorkflowStepInput). + symbols: + - merge_nested + - merge_flattened + + +- name: WorkflowOutputParameter + type: record + extends: OutputParameter + docParent: "#Workflow" + doc: | + Describe an output parameter of a workflow. The parameter must be + connected to one or more parameters defined in the workflow that will + provide the value of the output parameter. + fields: + - name: outputSource + doc: | + Specifies one or more workflow parameters that supply the value of to + the output parameter. + jsonldPredicate: + "_id": "cwl:outputSource" + "_type": "@id" + refScope: 0 + type: + - string? + - string[]? + - name: linkMerge + type: ["null", "#LinkMergeMethod"] + jsonldPredicate: "cwl:linkMerge" + doc: | + The method to use to merge multiple sources into a single array. + If not specified, the default method is "merge_nested". + - name: type + type: + - "null" + - "#CWLType" + - "#OutputRecordSchema" + - "#OutputEnumSchema" + - "#OutputArraySchema" + - string + - type: array + items: + - "#CWLType" + - "#OutputRecordSchema" + - "#OutputEnumSchema" + - "#OutputArraySchema" + - string + jsonldPredicate: + "_id": "sld:type" + "_type": "@vocab" + refScope: 2 + typeDSL: True + doc: | + Specify valid types of data that may be assigned to this parameter. + + +- name: Sink + type: record + abstract: true + fields: + - name: source + doc: | + Specifies one or more workflow parameters that will provide input to + the underlying step parameter. + jsonldPredicate: + "_id": "cwl:source" + "_type": "@id" + refScope: 2 + type: + - string? + - string[]? + - name: linkMerge + type: LinkMergeMethod? + jsonldPredicate: "cwl:linkMerge" + doc: | + The method to use to merge multiple inbound links into a single array. + If not specified, the default method is "merge_nested". + + +- type: record + name: WorkflowStepInput + extends: Sink + docParent: "#WorkflowStep" + doc: | + The input of a workflow step connects an upstream parameter (from the + workflow inputs, or the outputs of other workflows steps) with the input + parameters of the underlying step. + + ## Input object + + A WorkflowStepInput object must contain an `id` field in the form + `#fieldname` or `#stepname.fieldname`. When the `id` field contains a + period `.` the field name consists of the characters following the final + period. This defines a field of the workflow step input object with the + value of the `source` parameter(s). + + ## Merging + + To merge multiple inbound data links, + [MultipleInputFeatureRequirement](#MultipleInputFeatureRequirement) must be specified + in the workflow or workflow step requirements. + + If the sink parameter is an array, or named in a [workflow + scatter](#WorkflowStep) operation, there may be multiple inbound data links + listed in the `source` field. The values from the input links are merged + depending on the method specified in the `linkMerge` field. If not + specified, the default method is "merge_nested". + + * **merge_nested** + + The input must be an array consisting of exactly one entry for each + input link. If "merge_nested" is specified with a single link, the value + from the link must be wrapped in a single-item list. + + * **merge_flattened** + + 1. The source and sink parameters must be compatible types, or the source + type must be compatible with single element from the "items" type of + the destination array parameter. + 2. Source parameters which are arrays are concatenated. + Source parameters which are single element types are appended as + single elements. + + fields: + - name: id + type: string + jsonldPredicate: "@id" + doc: "A unique identifier for this workflow input parameter." + - name: default + type: ["null", Any] + doc: | + The default value for this parameter if there is no `source` + field. + jsonldPredicate: "cwl:default" + - name: valueFrom + type: + - "null" + - "string" + - "#Expression" + jsonldPredicate: "cwl:valueFrom" + doc: | + To use valueFrom, [StepInputExpressionRequirement](#StepInputExpressionRequirement) must + be specified in the workflow or workflow step requirements. + + If `valueFrom` is a constant string value, use this as the value for + this input parameter. + + If `valueFrom` is a parameter reference or expression, it must be + evaluated to yield the actual value to be assiged to the input field. + + The `self` value of in the parameter reference or expression must be + the value of the parameter(s) specified in the `source` field, or + null if there is no `source` field. + + The value of `inputs` in the parameter reference or expression must be + the input object to the workflow step after assigning the `source` + values and then scattering. The order of evaluating `valueFrom` among + step input parameters is undefined and the result of evaluating + `valueFrom` on a parameter must not be visible to evaluation of + `valueFrom` on other parameters. + + +- type: record + name: WorkflowStepOutput + docParent: "#WorkflowStep" + doc: | + Associate an output parameter of the underlying process with a workflow + parameter. The workflow parameter (given in the `id` field) be may be used + as a `source` to connect with input parameters of other workflow steps, or + with an output parameter of the process. + fields: + - name: id + type: string + jsonldPredicate: "@id" + doc: | + A unique identifier for this workflow output parameter. This is the + identifier to use in the `source` field of `WorkflowStepInput` to + connect the output value to downstream parameters. + + +- name: ScatterMethod + type: enum + docParent: "#WorkflowStep" + doc: The scatter method, as described in [workflow step scatter](#WorkflowStep). + symbols: + - dotproduct + - nested_crossproduct + - flat_crossproduct + + +- name: WorkflowStep + type: record + docParent: "#Workflow" + doc: | + A workflow step is an executable element of a workflow. It specifies the + underlying process implementation (such as `CommandLineTool` or another + `Workflow`) in the `run` field and connects the input and output parameters + of the underlying process to workflow parameters. + + # Scatter/gather + + To use scatter/gather, + [ScatterFeatureRequirement](#ScatterFeatureRequirement) must be specified + in the workflow or workflow step requirements. + + A "scatter" operation specifies that the associated workflow step or + subworkflow should execute separately over a list of input elements. Each + job making up a scatter operation is independent and may be executed + concurrently. + + The `scatter` field specifies one or more input parameters which will be + scattered. An input parameter may be listed more than once. The declared + type of each input parameter is implicitly wrapped in an array for each + time it appears in the `scatter` field. As a result, upstream parameters + which are connected to scattered parameters may be arrays. + + All output parameter types are also implicitly wrapped in arrays. Each job + in the scatter results in an entry in the output array. + + If `scatter` declares more than one input parameter, `scatterMethod` + describes how to decompose the input into a discrete set of jobs. + + * **dotproduct** specifies that each of the input arrays are aligned and one + element taken from each array to construct each job. It is an error + if all input arrays are not the same length. + + * **nested_crossproduct** specifies the Cartesian product of the inputs, + producing a job for every combination of the scattered inputs. The + output must be nested arrays for each level of scattering, in the + order that the input arrays are listed in the `scatter` field. + + * **flat_crossproduct** specifies the Cartesian product of the inputs, + producing a job for every combination of the scattered inputs. The + output arrays must be flattened to a single level, but otherwise listed in the + order that the input arrays are listed in the `scatter` field. + + # Subworkflows + + To specify a nested workflow as part of a workflow step, + [SubworkflowFeatureRequirement](#SubworkflowFeatureRequirement) must be + specified in the workflow or workflow step requirements. + + fields: + - name: id + type: string + jsonldPredicate: "@id" + doc: "The unique identifier for this workflow step." + - name: in + type: WorkflowStepInput[] + jsonldPredicate: + _id: "cwl:in" + mapSubject: id + mapPredicate: source + doc: | + Defines the input parameters of the workflow step. The process is ready to + run when all required input parameters are associated with concrete + values. Input parameters include a schema for each parameter which is + used to validate the input object. It may also be used build a user + interface for constructing the input object. + - name: out + type: + - type: array + items: [string, WorkflowStepOutput] + jsonldPredicate: + _id: "cwl:out" + _type: "@id" + identity: true + doc: | + Defines the parameters representing the output of the process. May be + used to generate and/or validate the output object. + - name: requirements + type: ProcessRequirement[]? + jsonldPredicate: + _id: "cwl:requirements" + mapSubject: class + doc: | + Declares requirements that apply to either the runtime environment or the + workflow engine that must be met in order to execute this workflow step. If + an implementation cannot satisfy all requirements, or a requirement is + listed which is not recognized by the implementation, it is a fatal + error and the implementation must not attempt to run the process, + unless overridden at user option. + - name: hints + type: Any[]? + jsonldPredicate: + _id: "cwl:hints" + noLinkCheck: true + mapSubject: class + doc: | + Declares hints applying to either the runtime environment or the + workflow engine that may be helpful in executing this workflow step. It is + not an error if an implementation cannot satisfy all hints, however + the implementation may report a warning. + - name: label + type: string? + jsonldPredicate: "rdfs:label" + doc: "A short, human-readable label of this process object." + - name: doc + type: string? + jsonldPredicate: "rdfs:comment" + doc: "A long, human-readable description of this process object." + - name: run + type: [string, Process] + jsonldPredicate: + "_id": "cwl:run" + "_type": "@id" + doc: | + Specifies the process to run. + - name: scatter + type: + - string? + - string[]? + jsonldPredicate: + "_id": "cwl:scatter" + "_type": "@id" + "_container": "@list" + refScope: 0 + - name: scatterMethod + doc: | + Required if `scatter` is an array of more than one element. + type: ScatterMethod? + jsonldPredicate: + "_id": "cwl:scatterMethod" + "_type": "@vocab" + + +- name: Workflow + type: record + extends: "#Process" + documentRoot: true + specialize: + - specializeFrom: "#OutputParameter" + specializeTo: "#WorkflowOutputParameter" + doc: | + A workflow describes a set of **steps** and the **dependencies** between + those steps. When a step produces output that will be consumed by a + second step, the first step is a dependency of the second step. + + When there is a dependency, the workflow engine must execute the preceeding + step and wait for it to successfully produce output before executing the + dependent step. If two steps are defined in the workflow graph that + are not directly or indirectly dependent, these steps are **independent**, + and may execute in any order or execute concurrently. A workflow is + complete when all steps have been executed. + + Dependencies between parameters are expressed using the `source` field on + [workflow step input parameters](#WorkflowStepInput) and [workflow output + parameters](#WorkflowOutputParameter). + + The `source` field expresses the dependency of one parameter on another + such that when a value is associated with the parameter specified by + `source`, that value is propagated to the destination parameter. When all + data links inbound to a given step are fufilled, the step is ready to + execute. + + ## Workflow success and failure + + A completed step must result in one of `success`, `temporaryFailure` or + `permanentFailure` states. An implementation may choose to retry a step + execution which resulted in `temporaryFailure`. An implementation may + choose to either continue running other steps of a workflow, or terminate + immediately upon `permanentFailure`. + + * If any step of a workflow execution results in `permanentFailure`, then + the workflow status is `permanentFailure`. + + * If one or more steps result in `temporaryFailure` and all other steps + complete `success` or are not executed, then the workflow status is + `temporaryFailure`. + + * If all workflow steps are executed and complete with `success`, then the + workflow status is `success`. + + # Extensions + + [ScatterFeatureRequirement](#ScatterFeatureRequirement) and + [SubworkflowFeatureRequirement](#SubworkflowFeatureRequirement) are + available as standard [extensions](#Extensions_and_Metadata) to core + workflow semantics. + + fields: + - name: "class" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + type: string + - name: steps + doc: | + The individual steps that make up the workflow. Each step is executed when all of its + input data links are fufilled. An implementation may choose to execute + the steps in a different order than listed and/or execute steps + concurrently, provided that dependencies between steps are met. + type: + - type: array + items: "#WorkflowStep" + jsonldPredicate: + mapSubject: id + + +- type: record + name: SubworkflowFeatureRequirement + extends: ProcessRequirement + doc: | + Indicates that the workflow platform must support nested workflows in + the `run` field of [WorkflowStep](#WorkflowStep). + fields: + - name: "class" + type: "string" + doc: "Always 'SubworkflowFeatureRequirement'" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + +- name: ScatterFeatureRequirement + type: record + extends: ProcessRequirement + doc: | + Indicates that the workflow platform must support the `scatter` and + `scatterMethod` fields of [WorkflowStep](#WorkflowStep). + fields: + - name: "class" + type: "string" + doc: "Always 'ScatterFeatureRequirement'" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + +- name: MultipleInputFeatureRequirement + type: record + extends: ProcessRequirement + doc: | + Indicates that the workflow platform must support multiple inbound data links + listed in the `source` field of [WorkflowStepInput](#WorkflowStepInput). + fields: + - name: "class" + type: "string" + doc: "Always 'MultipleInputFeatureRequirement'" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" + +- type: record + name: StepInputExpressionRequirement + extends: ProcessRequirement + doc: | + Indicate that the workflow platform must support the `valueFrom` field + of [WorkflowStepInput](#WorkflowStepInput). + fields: + - name: "class" + type: "string" + doc: "Always 'StepInputExpressionRequirement'" + jsonldPredicate: + "_id": "@type" + "_type": "@vocab" diff --git a/schema_salad/tests/test_schema/concepts.md b/schema_salad/tests/test_schema/concepts.md new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/schema_salad/tests/test_schema/concepts.md @@ -0,0 +1 @@ + diff --git a/schema_salad/tests/test_schema/contrib.md b/schema_salad/tests/test_schema/contrib.md new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/schema_salad/tests/test_schema/contrib.md @@ -0,0 +1 @@ + diff --git a/schema_salad/tests/test_schema/intro.md b/schema_salad/tests/test_schema/intro.md new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/schema_salad/tests/test_schema/intro.md @@ -0,0 +1 @@ + diff --git a/schema_salad/tests/test_schema/invocation.md b/schema_salad/tests/test_schema/invocation.md new file mode 100644 index 000000000..8b1378917 --- /dev/null +++ b/schema_salad/tests/test_schema/invocation.md @@ -0,0 +1 @@ + diff --git a/schema_salad/tests/test_schema/metaschema_base.yml b/schema_salad/tests/test_schema/metaschema_base.yml new file mode 100644 index 000000000..73511d141 --- /dev/null +++ b/schema_salad/tests/test_schema/metaschema_base.yml @@ -0,0 +1,164 @@ +$base: "https://w3id.org/cwl/salad#" + +$namespaces: + sld: "https://w3id.org/cwl/salad#" + dct: "http://purl.org/dc/terms/" + rdf: "http://www.w3.org/1999/02/22-rdf-syntax-ns#" + rdfs: "http://www.w3.org/2000/01/rdf-schema#" + xsd: "http://www.w3.org/2001/XMLSchema#" + +$graph: +- name: PrimitiveType + type: enum + symbols: + - "sld:null" + - "xsd:boolean" + - "xsd:int" + - "xsd:long" + - "xsd:float" + - "xsd:double" + - "xsd:string" + doc: + - | + Salad data types are based on Avro schema declarations. Refer to the + [Avro schema declaration documentation](https://avro.apache.org/docs/current/spec.html#schemas) for + detailed information. + - "null: no value" + - "boolean: a binary value" + - "int: 32-bit signed integer" + - "long: 64-bit signed integer" + - "float: single precision (32-bit) IEEE 754 floating-point number" + - "double: double precision (64-bit) IEEE 754 floating-point number" + - "string: Unicode character sequence" + + +- name: Any + type: enum + symbols: ["#Any"] + doc: | + The **Any** type validates for any non-null value. + + +- name: RecordField + type: record + doc: A field of a record. + fields: + - name: name + type: string + jsonldPredicate: "@id" + doc: | + The name of the field + + - name: doc + type: string? + doc: | + A documentation string for this field + jsonldPredicate: "rdfs:comment" + + - name: type + type: + - PrimitiveType + - RecordSchema + - EnumSchema + - ArraySchema + - string + - type: array + items: + - PrimitiveType + - RecordSchema + - EnumSchema + - ArraySchema + - string + jsonldPredicate: + _id: sld:type + _type: "@vocab" + typeDSL: true + refScope: 2 + doc: | + The field type + + +- name: RecordSchema + type: record + fields: + type: + doc: "Must be `record`" + type: + name: Record_symbol + type: enum + symbols: + - "sld:record" + jsonldPredicate: + _id: "sld:type" + _type: "@vocab" + typeDSL: true + refScope: 2 + fields: + type: RecordField[]? + jsonldPredicate: + _id: sld:fields + mapSubject: name + mapPredicate: type + doc: "Defines the fields of the record." + + +- name: EnumSchema + type: record + doc: | + Define an enumerated type. + fields: + type: + doc: "Must be `enum`" + type: + name: Enum_symbol + type: enum + symbols: + - "sld:enum" + jsonldPredicate: + _id: "sld:type" + _type: "@vocab" + typeDSL: true + refScope: 2 + symbols: + type: string[] + jsonldPredicate: + _id: "sld:symbols" + _type: "@id" + identity: true + doc: "Defines the set of valid symbols." + + +- name: ArraySchema + type: record + fields: + type: + doc: "Must be `array`" + type: + name: Array_symbol + type: enum + symbols: + - "sld:array" + jsonldPredicate: + _id: "sld:type" + _type: "@vocab" + typeDSL: true + refScope: 2 + items: + type: + - PrimitiveType + - RecordSchema + - EnumSchema + - ArraySchema + - string + - type: array + items: + - PrimitiveType + - RecordSchema + - EnumSchema + - ArraySchema + - string + jsonldPredicate: + _id: "sld:items" + _type: "@vocab" + refScope: 2 + doc: "Defines the type of the array elements." diff --git a/schema_salad/tests/test_schema/test1.cwl b/schema_salad/tests/test_schema/test1.cwl new file mode 100644 index 000000000..2406c8648 --- /dev/null +++ b/schema_salad/tests/test_schema/test1.cwl @@ -0,0 +1 @@ +class: Workflow \ No newline at end of file diff --git a/schema_salad/tests/test_schema/test10.cwl b/schema_salad/tests/test_schema/test10.cwl new file mode 100644 index 000000000..28608072e --- /dev/null +++ b/schema_salad/tests/test_schema/test10.cwl @@ -0,0 +1,10 @@ +class: Workflow +inputs: + foo: string +outputs: + bar: string +steps: + step1: + scatterMethod: [record] + in: [] + out: [out] \ No newline at end of file diff --git a/schema_salad/tests/test_schema/test11.cwl b/schema_salad/tests/test_schema/test11.cwl new file mode 100644 index 000000000..43281fbc5 --- /dev/null +++ b/schema_salad/tests/test_schema/test11.cwl @@ -0,0 +1,10 @@ +class: Workflow +inputs: + foo: string +outputs: + bar: string +steps: + step1: + run: blub.cwl + in: [] + out: [out] \ No newline at end of file diff --git a/schema_salad/tests/test_schema/test2.cwl b/schema_salad/tests/test_schema/test2.cwl new file mode 100644 index 000000000..96ae14014 --- /dev/null +++ b/schema_salad/tests/test_schema/test2.cwl @@ -0,0 +1 @@ +class: xWorkflow \ No newline at end of file diff --git a/schema_salad/tests/test_schema/test3.cwl b/schema_salad/tests/test_schema/test3.cwl new file mode 100644 index 000000000..517e920d2 --- /dev/null +++ b/schema_salad/tests/test_schema/test3.cwl @@ -0,0 +1,6 @@ +class: Workflow +inputs: + foo: string +outputs: + bar: xstring +steps: [] \ No newline at end of file diff --git a/schema_salad/tests/test_schema/test4.cwl b/schema_salad/tests/test_schema/test4.cwl new file mode 100644 index 000000000..e57292d36 --- /dev/null +++ b/schema_salad/tests/test_schema/test4.cwl @@ -0,0 +1,6 @@ +class: Workflow +inputs: + foo: string +outputs: + bar: 12 +steps: [] \ No newline at end of file diff --git a/schema_salad/tests/test_schema/test5.cwl b/schema_salad/tests/test_schema/test5.cwl new file mode 100644 index 000000000..8a7ba2220 --- /dev/null +++ b/schema_salad/tests/test_schema/test5.cwl @@ -0,0 +1,6 @@ +class: Workflow +inputs: + foo: string +outputs: + bar: string +steps: [12] \ No newline at end of file diff --git a/schema_salad/tests/test_schema/test6.cwl b/schema_salad/tests/test_schema/test6.cwl new file mode 100644 index 000000000..eff4ac5c7 --- /dev/null +++ b/schema_salad/tests/test_schema/test6.cwl @@ -0,0 +1,5 @@ +inputs: + foo: string +outputs: + bar: string +steps: [12] \ No newline at end of file diff --git a/schema_salad/tests/test_schema/test7.cwl b/schema_salad/tests/test_schema/test7.cwl new file mode 100644 index 000000000..0e12c1295 --- /dev/null +++ b/schema_salad/tests/test_schema/test7.cwl @@ -0,0 +1,10 @@ +class: Workflow +inputs: + foo: string +outputs: + bar: string +steps: + step1: + scatter_method: blub + in: [] + out: [out] \ No newline at end of file diff --git a/schema_salad/tests/test_schema/test8.cwl b/schema_salad/tests/test_schema/test8.cwl new file mode 100644 index 000000000..128cb4a75 --- /dev/null +++ b/schema_salad/tests/test_schema/test8.cwl @@ -0,0 +1,10 @@ +class: Workflow +inputs: + foo: string +outputs: + bar: string +steps: + step1: + scatterMethod: abc + in: [] + out: [out] \ No newline at end of file diff --git a/schema_salad/tests/test_schema/test9.cwl b/schema_salad/tests/test_schema/test9.cwl new file mode 100644 index 000000000..2d7ff4cd9 --- /dev/null +++ b/schema_salad/tests/test_schema/test9.cwl @@ -0,0 +1,10 @@ +class: Workflow +inputs: + foo: string +outputs: + bar: string +steps: + step1: + scatterMethod: 12 + in: [] + out: [out] \ No newline at end of file diff --git a/schema_salad/validate.py b/schema_salad/validate.py index 6e3eb5cc3..75e094b7a 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -1,29 +1,38 @@ import pprint import avro.schema +from avro.schema import Schema import sys import urlparse +import re from typing import Any, Union +from .sourceline import SourceLine, lineno_re, bullets, indent + class ValidationException(Exception): pass + class ClassValidationException(ValidationException): pass -def validate(expected_schema, datum, identifiers=set(), strict=False, foreign_properties=set()): - # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode]) -> bool - return validate_ex(expected_schema, datum, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=False) + +def validate(expected_schema, # type: Schema + datum, # type: Any + identifiers=set(), # type: Set[unicode] + strict=False, # type: bool + foreign_properties=set() # type: Set[unicode] + ): + # type: (...) -> bool + return validate_ex( + expected_schema, datum, identifiers, strict=strict, + foreign_properties=foreign_properties, raise_ex=False) + INT_MIN_VALUE = -(1 << 31) INT_MAX_VALUE = (1 << 31) - 1 LONG_MIN_VALUE = -(1 << 63) LONG_MAX_VALUE = (1 << 63) - 1 -def indent(v, nolead=False): # type: (Union[str, unicode], bool) -> unicode - if nolead: - return v.splitlines()[0] + u"\n".join([u" " + l for l in v.splitlines()[1:]]) - else: - return u"\n".join([" " + l for l in v.splitlines()]) def friendly(v): # type: (Any) -> Any if isinstance(v, avro.schema.NamedSchema): @@ -37,11 +46,6 @@ def friendly(v): # type: (Any) -> Any else: return v -def multi(v, q=""): # type: (Union[str, unicode], Union[str, unicode]) -> unicode - if '\n' in v: - return u"%s%s%s\n" % (q, v, q) - else: - return u"%s%s%s" % (q, v, q) def vpformat(datum): # type: (Any) -> str a = pprint.pformat(datum) @@ -49,9 +53,15 @@ def vpformat(datum): # type: (Any) -> str a = a[0:160] + "[...]" return a -def validate_ex(expected_schema, datum, identifiers=None, strict=False, - foreign_properties=None, raise_ex=True): - # type: (avro.schema.Schema, Any, Set[unicode], bool, Set[unicode], bool) -> bool + +def validate_ex(expected_schema, # type: Schema + datum, # type: Any + identifiers=None, # type: Set[unicode] + strict=False, # type: bool + foreign_properties=None, # type: Set[unicode] + raise_ex=True # type: bool + ): + # type: (...) -> bool """Determine if a python datum is an instance of a schema.""" if not identifiers: @@ -67,7 +77,7 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, return True else: if raise_ex: - raise ValidationException(u"the value `%s` is not null" % vpformat(datum)) + raise ValidationException(u"the value is not null") else: return False elif schema_type == 'boolean': @@ -75,7 +85,7 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, return True else: if raise_ex: - raise ValidationException(u"the value `%s` is not boolean" % vpformat(datum)) + raise ValidationException(u"the value is not boolean") else: return False elif schema_type == 'string': @@ -86,7 +96,7 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, return True else: if raise_ex: - raise ValidationException(u"the value `%s` is not string" % vpformat(datum)) + raise ValidationException(u"the value is not string") else: return False elif schema_type == 'bytes': @@ -94,12 +104,13 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, return True else: if raise_ex: - raise ValidationException(u"the value `%s` is not bytes" % vpformat(datum)) + raise ValidationException( + u"the value `%s` is not bytes" % vpformat(datum)) else: return False elif schema_type == 'int': if ((isinstance(datum, int) or isinstance(datum, long)) - and INT_MIN_VALUE <= datum <= INT_MAX_VALUE): + and INT_MIN_VALUE <= datum <= INT_MAX_VALUE): return True else: if raise_ex: @@ -108,28 +119,22 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, return False elif schema_type == 'long': if ((isinstance(datum, int) or isinstance(datum, long)) - and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE): + and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE): return True else: if raise_ex: - raise ValidationException(u"the value `%s` is not long" % vpformat(datum)) + raise ValidationException( + u"the value `%s` is not long" % vpformat(datum)) else: return False elif schema_type in ['float', 'double']: if (isinstance(datum, int) or isinstance(datum, long) - or isinstance(datum, float)): - return True - else: - if raise_ex: - raise ValidationException(u"the value `%s` is not float or double" % vpformat(datum)) - else: - return False - elif isinstance(expected_schema, avro.schema.FixedSchema): - if isinstance(datum, str) and len(datum) == expected_schema.size: + or isinstance(datum, float)): return True else: if raise_ex: - raise ValidationException(u"the value `%s` is not fixed" % vpformat(datum)) + raise ValidationException( + u"the value `%s` is not float or double" % vpformat(datum)) else: return False elif isinstance(expected_schema, avro.schema.EnumSchema): @@ -141,28 +146,42 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, raise ValidationException(u"'Any' type must be non-null") else: return False + if not isinstance(datum, basestring): + if raise_ex: + raise ValidationException( + u"value is a %s but expected a string" % (type(datum).__name__)) + else: + return False if datum in expected_schema.symbols: return True else: if raise_ex: - raise ValidationException(u"the value `%s`\n is not a valid symbol in enum %s, expected one of %s" % (vpformat(datum), expected_schema.name, "'" + "', '".join(expected_schema.symbols) + "'")) + raise ValidationException(u"the value %s is not a valid %s, expected %s%s" % (vpformat(datum), expected_schema.name, + "one of " if len( + expected_schema.symbols) > 1 else "", + "'" + "', '".join(expected_schema.symbols) + "'")) else: return False elif isinstance(expected_schema, avro.schema.ArraySchema): if isinstance(datum, list): for i, d in enumerate(datum): try: - if not validate_ex(expected_schema.items, d, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=raise_ex): + sl = SourceLine(datum, i, ValidationException) + if not validate_ex(expected_schema.items, d, identifiers, strict=strict, + foreign_properties=foreign_properties, + raise_ex=raise_ex): return False except ValidationException as v: if raise_ex: - raise ValidationException(u"At position %i\n%s" % (i, indent(str(v)))) + raise sl.makeError( + unicode("item is invalid because\n%s" % (indent(str(v))))) else: return False return True else: if raise_ex: - raise ValidationException(u"the value `%s` is not a list, expected list of %s" % (vpformat(datum), friendly(expected_schema.items))) + raise ValidationException(u"the value is not a list, expected list of %s" % ( + friendly(expected_schema.items))) else: return False elif isinstance(expected_schema, avro.schema.UnionSchema): @@ -174,34 +193,47 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, return False errors = [] # type: List[unicode] + checked = [] for s in expected_schema.schemas: + if isinstance(datum, list) and not isinstance(s, avro.schema.ArraySchema): + continue + elif isinstance(datum, dict) and not isinstance(s, avro.schema.RecordSchema): + continue + elif isinstance(datum, (bool, int, long, float, basestring)) and isinstance(s, (avro.schema.ArraySchema, avro.schema.RecordSchema)): + continue + elif datum is not None and s.type == "null": + continue + + checked.append(s) try: - validate_ex(s, datum, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=True) + validate_ex(s, datum, identifiers, strict=strict, + foreign_properties=foreign_properties, raise_ex=True) except ClassValidationException as e: raise except ValidationException as e: errors.append(unicode(e)) - - raise ValidationException(u"the value %s is not a valid type in the union, expected one of:\n%s" % ( - multi(vpformat(datum), '`'), u"\n".join([ - u"- %s, but\n %s" % ( - friendly(expected_schema.schemas[i]), indent(multi(errors[i]))) - for i in range(0, len(expected_schema.schemas))]))) + if errors: + raise ValidationException(bullets(["tried %s but\n%s" % (friendly( + checked[i]), indent(errors[i])) for i in range(0, len(errors))], "- ")) + else: + raise ValidationException("value is a %s, expected %s" % ( + type(datum).__name__, friendly(expected_schema))) elif isinstance(expected_schema, avro.schema.RecordSchema): if not isinstance(datum, dict): if raise_ex: - raise ValidationException(u"`%s`\n is not a dict" % vpformat(datum)) + raise ValidationException(u"is not a dict") else: return False classmatch = None for f in expected_schema.fields: - if f.name == "class": - d = datum.get("class") + if f.name in ("class",): + d = datum.get(f.name) if not d: if raise_ex: - raise ValidationException(u"Missing 'class' field") + raise ValidationException( + u"Missing '%s' field" % (f.name)) else: return False if expected_schema.name != d: @@ -211,7 +243,7 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, errors = [] for f in expected_schema.fields: - if f.name == "class": + if f.name in ("class",): continue if f.name in datum: @@ -223,13 +255,16 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, fieldval = None try: - if not validate_ex(f.type, fieldval, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=raise_ex): + sl = SourceLine(datum, f.name, unicode) + if not validate_ex(f.type, fieldval, identifiers, strict=strict, foreign_properties=foreign_properties, + raise_ex=raise_ex): return False except ValidationException as v: if f.name not in datum: errors.append(u"missing required field `%s`" % f.name) else: - errors.append(u"could not validate field `%s` because\n%s" % (f.name, multi(indent(str(v))))) + errors.append(sl.makeError(u"the `%s` field is not valid because\n%s" % ( + f.name, indent(str(v))))) if strict: for d in datum: @@ -238,21 +273,24 @@ def validate_ex(expected_schema, datum, identifiers=None, strict=False, if d == f.name: found = True if not found: + sl = SourceLine(datum, d, unicode) if d not in identifiers and d not in foreign_properties and d[0] not in ("@", "$"): if not raise_ex: return False split = urlparse.urlsplit(d) if split.scheme: - errors.append(u"could not validate extension field `%s` because it is not recognized and strict is True. Did you include a $schemas section?" % (d)) + errors.append(sl.makeError( + u"unrecognized extension field `%s` and strict is True. Did you include a $schemas section?" % (d))) else: - errors.append(u"could not validate field `%s` because it is not recognized and strict is True, valid fields are: %s" % (d, ", ".join(fn.name for fn in expected_schema.fields))) + errors.append(sl.makeError(u"invalid field `%s`, expected one of: %s" % ( + d, ", ".join("'%s'" % fn.name for fn in expected_schema.fields)))) if errors: if raise_ex: if classmatch: - raise ClassValidationException(u"%s record %s" % (classmatch, "\n".join(errors))) + raise ClassValidationException(bullets(errors, "* ")) else: - raise ValidationException(u"\n".join(errors)) + raise ValidationException(bullets(errors, "* ")) else: return False else: diff --git a/setup.cfg b/setup.cfg index a559cd8c3..903b12df4 100644 --- a/setup.cfg +++ b/setup.cfg @@ -7,5 +7,5 @@ universal = 1 [aliases] test=pytest -[pytest] +[tool:pytest] addopts=--pyarg schema_salad diff --git a/setup.py b/setup.py index fa639a9d6..52caae4f5 100755 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', - version='1.18', + version='1.19', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group', From f0b88d0c80880f18b8040e3aa175ad1d0d30de01 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 21 Nov 2016 23:05:08 -0500 Subject: [PATCH 007/116] Bump major version number, because of backwards-incompatible changes. (#63) --- schema_salad/ref_resolver.py | 5 ++++- schema_salad/sourceline.py | 29 ++++++++++++++++++++++++++--- setup.py | 2 +- 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index c88c2ebcd..6acf4db39 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -600,7 +600,7 @@ def resolve_all(self, elif isinstance(document, CommentedSeq): pass elif isinstance(document, (list, dict)): - raise Exception("Expected CommentedMap or CommentedSeq, got %s" % type(document)) + raise Exception("Expected CommentedMap or CommentedSeq, got %s: `%s`" % (type(document), document)) else: return (document, metadata) @@ -902,4 +902,7 @@ def _copy_dict_without_key(from_dict, filtered_key): new_dict = copy.copy(from_dict) if filtered_key in new_dict: del new_dict[filtered_key] # type: ignore + if isinstance(from_dict, CommentedMap): + new_dict.lc.data = copy.copy(from_dict.lc.data) + new_dict.lc.filename = from_dict.lc.filename return new_dict diff --git a/schema_salad/sourceline.py b/schema_salad/sourceline.py index 492deb254..e85c1a906 100644 --- a/schema_salad/sourceline.py +++ b/schema_salad/sourceline.py @@ -84,18 +84,41 @@ def cmap(d, lc=None, fn=None): # type: (Union[int, float, str, unicode, Dict, L lc = [0, 0, 0, 0] if fn is None: fn = "test" + + if isinstance(d, CommentedMap): + for k,v in d.iteritems(): + if k in d.lc.data: + d[k] = cmap(v, lc=d.lc.data[k], fn=d.lc.filename) + else: + d[k] = cmap(v, lc, fn=d.lc.filename) + return d + if isinstance(d, CommentedSeq): + for k,v in enumerate(d): + if k in d.lc.data: + d[k] = cmap(v, lc=d.lc.data[k], fn=d.lc.filename) + else: + d[k] = cmap(v, lc, fn=d.lc.filename) + return d if isinstance(d, dict): cm = CommentedMap() for k,v in d.iteritems(): + if isinstance(v, CommentedBase): + uselc = [v.lc.line, v.lc.col, v.lc.line, v.lc.col] + else: + uselc = lc cm[k] = cmap(v) - cm.lc.add_kv_line_col(k, lc) + cm.lc.add_kv_line_col(k, uselc) cm.lc.filename = fn return cm if isinstance(d, list): cs = CommentedSeq() for k,v in enumerate(d): + if isinstance(v, CommentedBase): + uselc = [v.lc.line, v.lc.col, v.lc.line, v.lc.col] + else: + uselc = lc cs.append(cmap(v)) - cs.lc.add_kv_line_col(k, lc) + cs.lc.add_kv_line_col(k, uselc) cs.lc.filename = fn return cs else: @@ -119,7 +142,7 @@ def makeError(self, msg): # type: (Text) -> Any if not isinstance(self.item, ruamel.yaml.comments.CommentedBase): return self.raise_type(msg) errs = [] - if self.key is None: + if self.key is None or self.item.lc.data is None or self.key not in self.item.lc.data: lead = "%s:%i:%i:" % (self.item.lc.filename, self.item.lc.line+1, self.item.lc.col+1) diff --git a/setup.py b/setup.py index 52caae4f5..adf31ed4a 100755 --- a/setup.py +++ b/setup.py @@ -46,7 +46,7 @@ extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', - version='1.19', + version='2.0', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group', From 1e0f6a51db31d722eda43cd7e44f880536810680 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Fri, 28 Oct 2016 12:49:12 +0300 Subject: [PATCH 008/116] set the meta viewport tag --- schema_salad/makedoc.py | 1 + 1 file changed, 1 insertion(+) diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index 5309b95f4..0325ad803 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -423,6 +423,7 @@ def avrold_doc(j, outdoc, renderlist, redirects, brand, brandlink): + """) From 56ac12bcd5f2c27816e76cccdbb00b9caa085ad3 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Mon, 12 Dec 2016 11:25:50 +0200 Subject: [PATCH 009/116] Link to v1.0 specs --- README.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 11d1b06b6..e66427cdd 100644 --- a/README.rst +++ b/README.rst @@ -70,6 +70,6 @@ provides for robust support of inline documentation. .. _JSON-LD: http://json-ld.org .. _Avro: http://avro.apache.org .. _metaschema: https://github.com/common-workflow-language/schema_salad/blob/master/schema_salad/metaschema/metaschema.yml -.. _specification: http://www.commonwl.org/draft-3/SchemaSalad.html -.. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/draft-3/CommandLineTool.yml +.. _specification: http://www.commonwl.org/v1.0/SchemaSalad.html +.. _Language: https://github.com/common-workflow-language/common-workflow-language/blob/master/v1.0/CommandLineTool.yml .. _RDF: https://www.w3.org/RDF/ From 5a15c58a2e403b9b929e1248fc37f2bfd394293a Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 16 Dec 2016 14:42:53 -0500 Subject: [PATCH 010/116] merge 1.21 changes into salad 2.0 (#68) * Loader constructor can accept custom "Fetcher" object for fetching files and checking links. * Add test for custom fetcher feature. * Fetcher is a constructor instead of an object. Fix load_schema to update cache instead of replacing it. * Add cache test. check_exists checks cache. * Fetcher includes custom urljoin. * Fix fetcher_constructor to default to None instead of DefaultFetcher. * Adjust package dependencies to be more specific about versions. * Linting * Tweak versioning to reduce chance of future unpleasant suprises from 3rd party upgrades and clean up requirements.txt. * Bump to 2.1 --- requirements.txt | 18 ++-- schema_salad/ref_resolver.py | 148 +++++++++++++++++++------------ schema_salad/schema.py | 2 +- schema_salad/tests/test_fetch.py | 57 ++++++++++++ setup.py | 18 ++-- 5 files changed, 167 insertions(+), 76 deletions(-) create mode 100644 schema_salad/tests/test_fetch.py diff --git a/requirements.txt b/requirements.txt index e30575aee..b41a60ffd 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,10 +1,10 @@ -requests -ruamel.yaml==0.12.4 -rdflib>=4.1. -rdflib-jsonld>=0.3.0 -mistune -typing>=3.5.2 ; python_version>="2.7" -avro ; python_version<"3" +typing==3.5.2.2 ; python_version>="2.7" avro-python3 ; python_version>="3" -CacheControl -lockfile +avro==1.8.1 ; python_version<"3" +ruamel.yaml==0.12.4 +rdflib==4.2.1 +rdflib-jsonld==0.4.0 +html5lib==0.9999999 +mistune==0.7.3 +CacheControl==0.11.7 +lockfile==0.12.2 diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 6acf4db39..0d2bee8b6 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -73,20 +73,89 @@ def merge_properties(a, b): def SubLoader(loader): # type: (Loader) -> Loader return Loader(loader.ctx, schemagraph=loader.graph, foreign_properties=loader.foreign_properties, idx=loader.idx, - cache=loader.cache, session=loader.session) + cache=loader.cache, fetcher_constructor=loader.fetcher_constructor) +class Fetcher(object): + def fetch_text(self, url): # type: (unicode) -> unicode + raise NotImplementedError() -class Loader(object): + def check_exists(self, url): # type: (unicode) -> bool + raise NotImplementedError() + + def urljoin(self, base_url, url): # type: (unicode, unicode) -> unicode + raise NotImplementedError() + + +class DefaultFetcher(Fetcher): + def __init__(self, cache, session): # type: (dict, requests.sessions.Session) -> None + self.cache = cache + self.session = session + + def fetch_text(self, url): + # type: (unicode) -> unicode + if url in self.cache: + return self.cache[url] + + split = urlparse.urlsplit(url) + scheme, path = split.scheme, split.path + + if scheme in [u'http', u'https'] and self.session: + try: + resp = self.session.get(url) + resp.raise_for_status() + except Exception as e: + raise RuntimeError(url, e) + return resp.text + elif scheme == 'file': + try: + with open(path) as fp: + read = fp.read() + if hasattr(read, "decode"): + return read.decode("utf-8") + else: + return read + except (OSError, IOError) as e: + if e.filename == path: + raise RuntimeError(unicode(e)) + else: + raise RuntimeError('Error reading %s: %s' % (url, e)) + else: + raise ValueError('Unsupported scheme in url: %s' % url) + + def check_exists(self, url): # type: (unicode) -> bool + if url in self.cache: + return True + + split = urlparse.urlsplit(url) + scheme, path = split.scheme, split.path + + if scheme in [u'http', u'https'] and self.session: + try: + resp = self.session.head(url) + resp.raise_for_status() + except Exception as e: + return False + return True + elif scheme == 'file': + return os.path.exists(path) + else: + raise ValueError('Unsupported scheme in url: %s' % url) + def urljoin(self, base_url, url): + return urlparse.urljoin(base_url, url) + +class Loader(object): def __init__(self, ctx, # type: ContextType - schemagraph=None, # type: Graph + schemagraph=None, # type: rdflib.graph.Graph foreign_properties=None, # type: Set[unicode] idx=None, # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]] cache=None, # type: Dict[unicode, Any] - session=None # type: requests.sessions.Session + session=None, # type: requests.sessions.Session + fetcher_constructor=None # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher] ): # type: (...) -> None + normalize = lambda url: urlparse.urlsplit(url).geturl() self.idx = None # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]] if idx is not None: @@ -113,12 +182,20 @@ def __init__(self, else: self.cache = {} - self.session = None # type: requests.sessions.Session - if session is not None: + if session is None: + self.session = CacheControl(requests.Session(), + cache=FileCache(os.path.join(os.environ["HOME"], ".cache", "salad"))) + else: self.session = session + + if fetcher_constructor: + self.fetcher_constructor = fetcher_constructor else: - self.session = CacheControl(requests.Session(), - cache=FileCache(os.path.join(os.environ["HOME"], ".cache", "salad"))) + self.fetcher_constructor = DefaultFetcher + self.fetcher = self.fetcher_constructor(self.cache, self.session) + + self.fetch_text = self.fetcher.fetch_text + self.check_exists = self.fetcher.check_exists self.url_fields = None # type: Set[unicode] self.scoped_ref_fields = None # type: Dict[unicode, int] @@ -171,7 +248,7 @@ def expand_url(self, elif scoped_ref is not None and not split.fragment: pass else: - url = urlparse.urljoin(base_url, url) + url = self.fetcher.urljoin(base_url, url) if vocab_term and url in self.rvocab: return self.rvocab[url] @@ -195,7 +272,7 @@ def add_namespaces(self, ns): # type: (Dict[unicode, unicode]) -> None def add_schemas(self, ns, base_url): # type: (Union[List[unicode], unicode], unicode) -> None for sch in aslist(ns): - fetchurl = urlparse.urljoin(base_url, sch) + fetchurl = self.fetcher.urljoin(base_url, sch) if fetchurl not in self.cache: _logger.debug("Getting external schema %s", fetchurl) content = self.fetch_text(fetchurl) @@ -346,6 +423,7 @@ def resolve_ref(self, if url in self.idx and (not mixin): return self.idx[url], {} + sl.raise_type = RuntimeError with sl: # "$include" directive means load raw text if inc: @@ -704,37 +782,6 @@ def resolve_all(self, return document, metadata - def fetch_text(self, url): - # type: (unicode) -> unicode - if url in self.cache: - return self.cache[url] - - split = urlparse.urlsplit(url) - scheme, path = split.scheme, split.path - - if scheme in [u'http', u'https'] and self.session: - try: - resp = self.session.get(url) - resp.raise_for_status() - except Exception as e: - raise RuntimeError(url, e) - return resp.text - elif scheme == 'file': - try: - with open(path) as fp: - read = fp.read() - if hasattr(read, "decode"): - return read.decode("utf-8") - else: - return read - except (OSError, IOError) as e: - if e.filename == path: - raise RuntimeError(unicode(e)) - else: - raise RuntimeError('Error reading %s: %s' % (url, e)) - else: - raise ValueError('Unsupported scheme in url: %s' % url) - def fetch(self, url, inject_ids=True): # type: (unicode, bool) -> Any if url in self.idx: return self.idx[url] @@ -758,21 +805,6 @@ def fetch(self, url, inject_ids=True): # type: (unicode, bool) -> Any self.idx[url] = result return result - def check_file(self, url): # type: (unicode) -> bool - split = urlparse.urlsplit(url) - scheme, path = split.scheme, split.path - - if scheme in [u'http', u'https'] and self.session: - try: - resp = self.session.head(url) - resp.raise_for_status() - except Exception as e: - return False - return True - elif scheme == 'file': - return os.path.exists(path) - else: - raise ValueError('Unsupported scheme in url: %s' % url) FieldType = TypeVar('FieldType', unicode, CommentedSeq, CommentedMap) @@ -809,13 +841,13 @@ def validate_link(self, field, link, docid): if link not in self.vocab and link not in self.idx and link not in self.rvocab: if field in self.scoped_ref_fields: return self.validate_scoped(field, link, docid) - elif not self.check_file(link): + elif not self.check_exists(link): raise validate.ValidationException( "Field `%s` contains undefined reference to `%s`" % (field, link)) elif link not in self.idx and link not in self.rvocab: if field in self.scoped_ref_fields: return self.validate_scoped(field, link, docid) - elif not self.check_file(link): + elif not self.check_exists(link): raise validate.ValidationException( "Field `%s` contains undefined reference to `%s`" % (field, link)) elif isinstance(link, CommentedSeq): diff --git a/schema_salad/schema.py b/schema_salad/schema.py index ff2d18cef..342ec4680 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -188,7 +188,7 @@ def load_schema(schema_ref, # type: Union[CommentedMap, CommentedSeq, unicode] metaschema_names, metaschema_doc, metaschema_loader = get_metaschema() if cache is not None: - metaschema_loader.cache = cache + metaschema_loader.cache.update(cache) schema_doc, schema_metadata = metaschema_loader.resolve_ref(schema_ref, "") if not isinstance(schema_doc, list): diff --git a/schema_salad/tests/test_fetch.py b/schema_salad/tests/test_fetch.py new file mode 100644 index 000000000..8fb9e5a69 --- /dev/null +++ b/schema_salad/tests/test_fetch.py @@ -0,0 +1,57 @@ +import unittest +import schema_salad.ref_resolver +import schema_salad.main +import schema_salad.schema +from schema_salad.jsonld_context import makerdf +import rdflib +import ruamel.yaml as yaml +import json +import os +import urlparse + +class TestFetcher(unittest.TestCase): + def test_fetcher(self): + class TestFetcher(schema_salad.ref_resolver.Fetcher): + def __init__(self, a, b): + pass + + def fetch_text(self, url): # type: (unicode) -> unicode + if url == "keep:abc+123/foo.txt": + return "hello: keepfoo" + if url.endswith("foo.txt"): + return "hello: foo" + else: + raise RuntimeError("Not foo.txt") + + def check_exists(self, url): # type: (unicode) -> bool + if url.endswith("foo.txt"): + return True + else: + return False + + def urljoin(self, base, url): + urlsp = urlparse.urlsplit(url) + if urlsp.scheme: + return url + basesp = urlparse.urlsplit(base) + + if basesp.scheme == "keep": + return base + "/" + url + return urlparse.urljoin(base, url) + + loader = schema_salad.ref_resolver.Loader({}, fetcher_constructor=TestFetcher) + self.assertEqual({"hello": "foo"}, loader.resolve_ref("foo.txt")[0]) + self.assertEqual({"hello": "keepfoo"}, loader.resolve_ref("foo.txt", base_url="keep:abc+123")[0]) + self.assertTrue(loader.check_exists("foo.txt")) + + with self.assertRaises(RuntimeError): + loader.resolve_ref("bar.txt") + self.assertFalse(loader.check_exists("bar.txt")) + + def test_cache(self): + loader = schema_salad.ref_resolver.Loader({}) + foo = "file://%s/foo.txt" % os.getcwd() + loader.cache.update({foo: "hello: foo"}) + print loader.cache + self.assertEqual({"hello": "foo"}, loader.resolve_ref("foo.txt")[0]) + self.assertTrue(loader.check_exists(foo)) diff --git a/setup.py b/setup.py index adf31ed4a..97c466663 100755 --- a/setup.py +++ b/setup.py @@ -28,13 +28,15 @@ requirements = [] install_requires = [ - 'requests', - 'ruamel.yaml == 0.12.4', - 'rdflib >= 4.1.0', - 'rdflib-jsonld >= 0.3.0', - 'mistune', - 'typing >= 3.5.2', - 'CacheControl', + 'setuptools', + 'requests >= 1.0', + 'ruamel.yaml >= 0.12.4, < 0.12.5', + 'rdflib >= 4.2.0, < 4.3.0', + 'rdflib-jsonld >= 0.3.0, < 0.5.0', + 'html5lib >= 0.90, <= 0.9999999', + 'mistune >= 0.7.3, < 0.8', + 'typing >= 3.5.2, < 3.6', + 'CacheControl >= 0.11.7, < 0.12', 'lockfile >= 0.9'] install_requires.append("avro") # TODO: remove me once cwltool is @@ -46,7 +48,7 @@ extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', - version='2.0', + version='2.1', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group', From 9a9d639212fe0980b1018c262026037b53bd1ece Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 16 Dec 2016 16:07:32 -0500 Subject: [PATCH 011/116] Tweak cmap to better propagate filename down. (#69) --- schema_salad/sourceline.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/schema_salad/sourceline.py b/schema_salad/sourceline.py index e85c1a906..a1a44ca14 100644 --- a/schema_salad/sourceline.py +++ b/schema_salad/sourceline.py @@ -86,27 +86,31 @@ def cmap(d, lc=None, fn=None): # type: (Union[int, float, str, unicode, Dict, L fn = "test" if isinstance(d, CommentedMap): + fn = d.lc.filename if hasattr(d.lc, "filename") else fn for k,v in d.iteritems(): if k in d.lc.data: - d[k] = cmap(v, lc=d.lc.data[k], fn=d.lc.filename) + d[k] = cmap(v, lc=d.lc.data[k], fn=fn) else: - d[k] = cmap(v, lc, fn=d.lc.filename) + d[k] = cmap(v, lc, fn=fn) return d if isinstance(d, CommentedSeq): + fn = d.lc.filename if hasattr(d.lc, "filename") else fn for k,v in enumerate(d): if k in d.lc.data: - d[k] = cmap(v, lc=d.lc.data[k], fn=d.lc.filename) + d[k] = cmap(v, lc=d.lc.data[k], fn=fn) else: - d[k] = cmap(v, lc, fn=d.lc.filename) + d[k] = cmap(v, lc, fn=fn) return d if isinstance(d, dict): cm = CommentedMap() for k,v in d.iteritems(): if isinstance(v, CommentedBase): uselc = [v.lc.line, v.lc.col, v.lc.line, v.lc.col] + vfn = v.lc.filename if hasattr(v.lc, "filename") else fn else: uselc = lc - cm[k] = cmap(v) + vfn = fn + cm[k] = cmap(v, lc=uselc, fn=vfn) cm.lc.add_kv_line_col(k, uselc) cm.lc.filename = fn return cm @@ -115,9 +119,11 @@ def cmap(d, lc=None, fn=None): # type: (Union[int, float, str, unicode, Dict, L for k,v in enumerate(d): if isinstance(v, CommentedBase): uselc = [v.lc.line, v.lc.col, v.lc.line, v.lc.col] + vfn = v.lc.filename if hasattr(v.lc, "filename") else fn else: uselc = lc - cs.append(cmap(v)) + vfn = fn + cs.append(cmap(v, lc=uselc, fn=vfn)) cs.lc.add_kv_line_col(k, uselc) cs.lc.filename = fn return cs From 159541c5726f9f1445aab13583743db45952f962 Mon Sep 17 00:00:00 2001 From: Pau Ruiz i Safont Date: Wed, 21 Dec 2016 09:45:09 +0000 Subject: [PATCH 012/116] Paths and URIs conversions are more generalized made the compatible with win32 now --- schema_salad/ref_resolver.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 0d2bee8b6..72957606d 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -4,11 +4,13 @@ import hashlib import logging import collections +import urllib import urlparse import re import copy import pprint from StringIO import StringIO +import pathlib2 as pathlib from . import validate from .aslist import aslist @@ -108,7 +110,7 @@ def fetch_text(self, url): return resp.text elif scheme == 'file': try: - with open(path) as fp: + with open(urllib.url2pathname(urlparse.urlparse(url).path)) as fp: read = fp.read() if hasattr(read, "decode"): return read.decode("utf-8") @@ -137,7 +139,7 @@ def check_exists(self, url): # type: (unicode) -> bool return False return True elif scheme == 'file': - return os.path.exists(path) + return os.path.exists(urllib.url2pathname(urlparse.urlparse(url).path)) else: raise ValueError('Unsupported scheme in url: %s' % url) @@ -247,6 +249,8 @@ def expand_url(self, (splitbase.scheme, splitbase.netloc, pt, splitbase.query, frg)) elif scoped_ref is not None and not split.fragment: pass + elif base_url is None: + url = pathlib.Path(os.path.join(os.getcwd(), url)).as_uri() else: url = self.fetcher.urljoin(base_url, url) @@ -371,7 +375,6 @@ def resolve_ref(self, checklinks=True # type: bool ): # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, unicode], Dict[unicode, Any]] - base_url = base_url or u'file://%s/' % os.path.abspath('.') obj = None # type: CommentedMap resolved_obj = None # type: Union[CommentedMap, CommentedSeq, unicode] @@ -418,7 +421,7 @@ def resolve_ref(self, raise ValueError(u"Expected CommentedMap or string, got %s: `%s`" % (type(ref), unicode(ref))) url = self.expand_url(ref, base_url, scoped_id=(obj is not None)) - + base_url = base_url or pathlib.Path(os.getcwd()).as_uri() + '/' # Has this reference been loaded already? if url in self.idx and (not mixin): return self.idx[url], {} From 0c0e4cc8de0bc020e3704125e67c5e2da86cee9f Mon Sep 17 00:00:00 2001 From: Pau Ruiz i Safont Date: Wed, 21 Dec 2016 10:04:34 +0000 Subject: [PATCH 013/116] Added pathlib2 requirement To make the path changes work under python2.7 --- requirements.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/requirements.txt b/requirements.txt index b41a60ffd..79f8af063 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ html5lib==0.9999999 mistune==0.7.3 CacheControl==0.11.7 lockfile==0.12.2 +pathlib2==2.1.0 From f302f5f25fed9394ce8492cb5afe282c5a2e7a35 Mon Sep 17 00:00:00 2001 From: Pau Ruiz i Safont Date: Wed, 21 Dec 2016 11:34:16 +0000 Subject: [PATCH 014/116] Fix a couple of test cases By having a less disrupting change --- schema_salad/ref_resolver.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 72957606d..33a0194d6 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -249,8 +249,6 @@ def expand_url(self, (splitbase.scheme, splitbase.netloc, pt, splitbase.query, frg)) elif scoped_ref is not None and not split.fragment: pass - elif base_url is None: - url = pathlib.Path(os.path.join(os.getcwd(), url)).as_uri() else: url = self.fetcher.urljoin(base_url, url) @@ -381,6 +379,10 @@ def resolve_ref(self, inc = False mixin = None # type: Dict[unicode, Any] + if not base_url: + ref = pathlib.Path(os.path.join(os.getcwd(), ref)).as_uri() + base_url = pathlib.Path(os.getcwd()).as_uri() + '/' + sl = SourceLine(obj, None, ValueError) # If `ref` is a dict, look for special directives. if isinstance(ref, CommentedMap): @@ -421,7 +423,6 @@ def resolve_ref(self, raise ValueError(u"Expected CommentedMap or string, got %s: `%s`" % (type(ref), unicode(ref))) url = self.expand_url(ref, base_url, scoped_id=(obj is not None)) - base_url = base_url or pathlib.Path(os.getcwd()).as_uri() + '/' # Has this reference been loaded already? if url in self.idx and (not mixin): return self.idx[url], {} From da751f7f6f80291527368d73ad5f728c80ff6e90 Mon Sep 17 00:00:00 2001 From: Pau Ruiz i Safont Date: Wed, 21 Dec 2016 11:41:47 +0000 Subject: [PATCH 015/116] Don't assume the type of ref Only convert to file:/// Uri if it's a a string --- schema_salad/ref_resolver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 33a0194d6..f06bff2dd 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -380,7 +380,8 @@ def resolve_ref(self, mixin = None # type: Dict[unicode, Any] if not base_url: - ref = pathlib.Path(os.path.join(os.getcwd(), ref)).as_uri() + if isinstance(ref, unicode): + ref = pathlib.Path(os.path.join(os.getcwd(), ref)).as_uri() base_url = pathlib.Path(os.getcwd()).as_uri() + '/' sl = SourceLine(obj, None, ValueError) From cb754df9ddb15ec6547f88452b6209c543ad2c8a Mon Sep 17 00:00:00 2001 From: Pau Ruiz i Safont Date: Wed, 21 Dec 2016 12:07:44 +0000 Subject: [PATCH 016/116] Make url2pathname args compatible with python3 --- schema_salad/ref_resolver.py | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index f06bff2dd..ca6977f9d 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -10,7 +10,12 @@ import copy import pprint from StringIO import StringIO -import pathlib2 as pathlib +try: + # python 3 + import pathlib +except: + # python 2 + import pathlib2 as pathlib from . import validate from .aslist import aslist @@ -110,7 +115,7 @@ def fetch_text(self, url): return resp.text elif scheme == 'file': try: - with open(urllib.url2pathname(urlparse.urlparse(url).path)) as fp: + with open(urllib.url2pathname(str(urlparse.urlparse(url).path))) as fp: read = fp.read() if hasattr(read, "decode"): return read.decode("utf-8") @@ -139,7 +144,7 @@ def check_exists(self, url): # type: (unicode) -> bool return False return True elif scheme == 'file': - return os.path.exists(urllib.url2pathname(urlparse.urlparse(url).path)) + return os.path.exists(urllib.url2pathname(str(urlparse.urlparse(url).path))) else: raise ValueError('Unsupported scheme in url: %s' % url) From ad742f3ff4cf1756318788eaf5f03ad68b15593c Mon Sep 17 00:00:00 2001 From: Pau Ruiz i Safont Date: Wed, 21 Dec 2016 14:16:52 +0000 Subject: [PATCH 017/116] Manage pathlib and pathlib2 for all python versions --- requirements.txt | 2 +- schema_salad/ref_resolver.py | 3 +-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/requirements.txt b/requirements.txt index 79f8af063..e55fed407 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,6 +1,7 @@ typing==3.5.2.2 ; python_version>="2.7" avro-python3 ; python_version>="3" avro==1.8.1 ; python_version<"3" +pathlib2==2.1.0 ; python_version<"3.4" ruamel.yaml==0.12.4 rdflib==4.2.1 rdflib-jsonld==0.4.0 @@ -8,4 +9,3 @@ html5lib==0.9999999 mistune==0.7.3 CacheControl==0.11.7 lockfile==0.12.2 -pathlib2==2.1.0 diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index ca6977f9d..6a0d20af4 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -11,10 +11,9 @@ import pprint from StringIO import StringIO try: - # python 3 + # python >3.3 import pathlib except: - # python 2 import pathlib2 as pathlib from . import validate From 0e616d1e00737851a82891b79579d54cc97fb8aa Mon Sep 17 00:00:00 2001 From: Pau Ruiz i Safont Date: Wed, 21 Dec 2016 15:25:54 +0000 Subject: [PATCH 018/116] Fix types --- schema_salad/ref_resolver.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 6a0d20af4..0ba1a9348 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -12,9 +12,9 @@ from StringIO import StringIO try: # python >3.3 - import pathlib + import pathlib # type: ignore except: - import pathlib2 as pathlib + import pathlib2 as pathlib # type: ignore from . import validate from .aslist import aslist From 7a793ac2b2abc640f5f652330eec01a8da8de8f2 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 21 Dec 2016 18:02:24 +0200 Subject: [PATCH 019/116] drop html5lib pin, add pip-conflict-check --- setup.py | 1 - tox.ini | 8 +++++++- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 97c466663..36a07f110 100755 --- a/setup.py +++ b/setup.py @@ -33,7 +33,6 @@ 'ruamel.yaml >= 0.12.4, < 0.12.5', 'rdflib >= 4.2.0, < 4.3.0', 'rdflib-jsonld >= 0.3.0, < 0.5.0', - 'html5lib >= 0.90, <= 0.9999999', 'mistune >= 0.7.3, < 0.8', 'typing >= 3.5.2, < 3.6', 'CacheControl >= 0.11.7, < 0.12', diff --git a/tox.ini b/tox.ini index acd2ee831..03201f604 100644 --- a/tox.ini +++ b/tox.ini @@ -1,6 +1,6 @@ [tox] #envlist = py35-lint,py34-lint,py33-lint,py27-lint,py35-unit,py34-unit,py33-unit,py27-unit -envlist = py27-lint, py27-unit, py35-mypy +envlist = py27-lint, py27-unit, py35-mypy, py27-pipconflictchecker skipsdist = True [tox:travis] @@ -38,6 +38,12 @@ commands = flake8 schema_salad setup.py whitelist_externals = flake8 deps = flake8 +[testenv:py27-pipconflictchecker] +commands = pipconflictchecker +whitelist_externals = pipconflictchecker +deps = pip-conflict-checker + + [testenv:py35-unit] commands = python setup.py test From 01af23db1500a991f58be5625290e53de488f166 Mon Sep 17 00:00:00 2001 From: Pau Ruiz i Safont Date: Wed, 21 Dec 2016 16:20:18 +0000 Subject: [PATCH 020/116] Fix typing Stopped using native pathlib, like in cwltool --- requirements.txt | 2 +- schema_salad/ref_resolver.py | 10 +- setup.py | 3 +- typeshed/2.7/pathlib2.pyi | 188 +++++++++++++++++++++++++++++++++++ 4 files changed, 194 insertions(+), 9 deletions(-) create mode 100644 typeshed/2.7/pathlib2.pyi diff --git a/requirements.txt b/requirements.txt index e55fed407..ecc3ddef7 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ typing==3.5.2.2 ; python_version>="2.7" avro-python3 ; python_version>="3" avro==1.8.1 ; python_version<"3" -pathlib2==2.1.0 ; python_version<"3.4" +pathlib2==2.1.0 ruamel.yaml==0.12.4 rdflib==4.2.1 rdflib-jsonld==0.4.0 diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 0ba1a9348..92c8cfa2a 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -10,11 +10,7 @@ import copy import pprint from StringIO import StringIO -try: - # python >3.3 - import pathlib # type: ignore -except: - import pathlib2 as pathlib # type: ignore +import pathlib2 from . import validate from .aslist import aslist @@ -385,8 +381,8 @@ def resolve_ref(self, if not base_url: if isinstance(ref, unicode): - ref = pathlib.Path(os.path.join(os.getcwd(), ref)).as_uri() - base_url = pathlib.Path(os.getcwd()).as_uri() + '/' + ref = pathlib2.Path(os.path.join(os.getcwd(), ref)).as_uri() + base_url = pathlib2.Path(os.getcwd()).as_uri() + '/' sl = SourceLine(obj, None, ValueError) # If `ref` is a dict, look for special directives. diff --git a/setup.py b/setup.py index 97c466663..c8a768749 100755 --- a/setup.py +++ b/setup.py @@ -37,7 +37,8 @@ 'mistune >= 0.7.3, < 0.8', 'typing >= 3.5.2, < 3.6', 'CacheControl >= 0.11.7, < 0.12', - 'lockfile >= 0.9'] + 'lockfile >= 0.9', + 'pathlib2 >= 2.1.0'] install_requires.append("avro") # TODO: remove me once cwltool is # available in Debian Stable, Ubuntu 12.04 LTS diff --git a/typeshed/2.7/pathlib2.pyi b/typeshed/2.7/pathlib2.pyi new file mode 100644 index 000000000..30bbe2cbd --- /dev/null +++ b/typeshed/2.7/pathlib2.pyi @@ -0,0 +1,188 @@ +# Stubs for pathlib2 (Python 2) +# +# NOTE: This dynamically typed stub was automatically generated by stubgen. + +from typing import Any, AnyStr, Type, TypeVar, Optional, Union +from collections import Sequence + +_P = TypeVar('_P', bound='PurePath') + +intern = ... # type: Any +basestring = ... # type: Any +supports_symlinks = ... # type: bool +nt = ... # type: Any + +class _Flavour: + join = ... # type: Any + def __init__(self) -> None: ... + def parse_parts(self, parts): ... + def join_parsed_parts(self, drv, root, parts, drv2, root2, parts2): ... + +class _WindowsFlavour(_Flavour): + sep = ... # type: str + altsep = ... # type: str + has_drv = ... # type: bool + pathmod = ... # type: Any + is_supported = ... # type: Any + drive_letters = ... # type: Any + ext_namespace_prefix = ... # type: str + reserved_names = ... # type: Any + def splitroot(self, part, sep: Any = ...): ... + def casefold(self, s): ... + def casefold_parts(self, parts): ... + def resolve(self, path): ... + def is_reserved(self, parts): ... + def make_uri(self, path): ... + def gethomedir(self, username): ... + +class _PosixFlavour(_Flavour): + sep = ... # type: str + altsep = ... # type: str + has_drv = ... # type: bool + pathmod = ... # type: Any + is_supported = ... # type: Any + def splitroot(self, part, sep: Any = ...): ... + def casefold(self, s): ... + def casefold_parts(self, parts): ... + def resolve(self, path): ... + def is_reserved(self, parts): ... + def make_uri(self, path): ... + def gethomedir(self, username): ... + +class _Accessor: ... + +class _NormalAccessor(_Accessor): + stat = ... # type: Any + lstat = ... # type: Any + open = ... # type: Any + listdir = ... # type: Any + chmod = ... # type: Any + lchmod = ... # type: Any + #def lchmod(self, pathobj, mode): ... + mkdir = ... # type: Any + unlink = ... # type: Any + rmdir = ... # type: Any + rename = ... # type: Any + replace = ... # type: Any + symlink = ... # type: Any + #def symlink(a, b, target_is_directory): ... + #@staticmethod + #def symlink(a, b, target_is_directory): ... + utime = ... # type: Any + def readlink(self, path): ... + +class _Selector: + child_parts = ... # type: Any + successor = ... # type: Any + def __init__(self, child_parts) -> None: ... + def select_from(self, parent_path): ... + +class _TerminatingSelector: ... + +class _PreciseSelector(_Selector): + name = ... # type: Any + def __init__(self, name, child_parts) -> None: ... + +class _WildcardSelector(_Selector): + pat = ... # type: Any + def __init__(self, pat, child_parts) -> None: ... + +class _RecursiveWildcardSelector(_Selector): + def __init__(self, pat, child_parts) -> None: ... + +class _PathParents(Sequence): + def __init__(self, path) -> None: ... + def __len__(self): ... + def __getitem__(self, idx): ... + +class PurePath: + def __new__(cls, *args): ... + def __reduce__(self): ... + def as_posix(self): ... + def __bytes__(self): ... + def as_uri(self) -> str: ... + def __eq__(self, other): ... + def __ne__(self, other): ... + def __hash__(self): ... + def __lt__(self, other): ... + def __le__(self, other): ... + def __gt__(self, other): ... + def __ge__(self, other): ... + drive = ... # type: Any + root = ... # type: Any + @property + def anchor(self): ... + @property + def name(self): ... + @property + def suffix(self): ... + @property + def suffixes(self): ... + @property + def stem(self): ... + def with_name(self, name): ... + def with_suffix(self, suffix): ... + def relative_to(self, *other): ... + @property + def parts(self): ... + def joinpath(self, *args): ... + def __truediv__(self, key): ... + def __rtruediv__(self, key): ... + __div__ = ... # type: Any + __rdiv__ = ... # type: Any + @property + def parent(self): ... + @property + def parents(self): ... + def is_absolute(self): ... + def is_reserved(self): ... + def match(self, path_pattern): ... + +class PurePosixPath(PurePath): ... +class PureWindowsPath(PurePath): ... + +class Path(PurePath): + def __new__(cls: Type[_P], *args: Union[AnyStr, PurePath], + **kwargs: Any) -> _P: ... + def __enter__(self): ... + def __exit__(self, t, v, tb): ... + @classmethod + def cwd(cls): ... + @classmethod + def home(cls): ... + def samefile(self, other_path): ... + def iterdir(self): ... + def glob(self, pattern): ... + def rglob(self, pattern): ... + def absolute(self): ... + def resolve(self): ... + def stat(self): ... + def owner(self): ... + def group(self): ... + def open(self, mode: str = ..., buffering: int = ..., encoding: Optional[Any] = ..., errors: Optional[Any] = ..., newline: Optional[Any] = ...): ... + def read_bytes(self): ... + def read_text(self, encoding: Optional[Any] = ..., errors: Optional[Any] = ...): ... + def write_bytes(self, data): ... + def write_text(self, data, encoding: Optional[Any] = ..., errors: Optional[Any] = ...): ... + def touch(self, mode: int = ..., exist_ok: bool = ...): ... + def mkdir(self, mode: int = ..., parents: bool = ..., exist_ok: bool = ...): ... + def chmod(self, mode): ... + def lchmod(self, mode): ... + def unlink(self): ... + def rmdir(self): ... + def lstat(self): ... + def rename(self, target): ... + def replace(self, target): ... + def symlink_to(self, target, target_is_directory: bool = ...): ... + def exists(self): ... + def is_dir(self): ... + def is_file(self): ... + def is_symlink(self): ... + def is_block_device(self): ... + def is_char_device(self): ... + def is_fifo(self): ... + def is_socket(self): ... + def expanduser(self): ... + +class PosixPath(Path, PurePosixPath): ... +class WindowsPath(Path, PureWindowsPath): ... From a5bbb369cfca55dda577927bb4e6d0d7ba2c0ae4 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 23 Dec 2016 11:08:31 -0500 Subject: [PATCH 021/116] Ensure keys are sorted when generated CommentedMap from a regular dict. (#72) --- schema_salad/sourceline.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/schema_salad/sourceline.py b/schema_salad/sourceline.py index a1a44ca14..e09171c80 100644 --- a/schema_salad/sourceline.py +++ b/schema_salad/sourceline.py @@ -103,7 +103,8 @@ def cmap(d, lc=None, fn=None): # type: (Union[int, float, str, unicode, Dict, L return d if isinstance(d, dict): cm = CommentedMap() - for k,v in d.iteritems(): + for k in sorted(d.keys()): + v = d[k] if isinstance(v, CommentedBase): uselc = [v.lc.line, v.lc.col, v.lc.line, v.lc.col] vfn = v.lc.filename if hasattr(v.lc, "filename") else fn From a3ba891f027730e19201ea846df09db318bfd60f Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 27 Dec 2016 10:52:25 -0500 Subject: [PATCH 022/116] Relax ruamel.yaml version pin. (#73) --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index ecc3ddef7..9cbbbc7bc 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,7 +2,7 @@ typing==3.5.2.2 ; python_version>="2.7" avro-python3 ; python_version>="3" avro==1.8.1 ; python_version<"3" pathlib2==2.1.0 -ruamel.yaml==0.12.4 +ruamel.yaml==0.13.7 rdflib==4.2.1 rdflib-jsonld==0.4.0 html5lib==0.9999999 diff --git a/setup.py b/setup.py index 66ed96bf4..c3934529e 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ install_requires = [ 'setuptools', 'requests >= 1.0', - 'ruamel.yaml >= 0.12.4, < 0.12.5', + 'ruamel.yaml >= 0.12.4', 'rdflib >= 4.2.0, < 4.3.0', 'rdflib-jsonld >= 0.3.0, < 0.5.0', 'mistune >= 0.7.3, < 0.8', From 8edfbc2fc34820e2d29e927eca0e6bd848c618f2 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 27 Dec 2016 14:13:02 -0500 Subject: [PATCH 023/116] Bugfix resolving references to document fragments. (#74) * Bugfix resolving references to document fragments. --- schema_salad/ref_resolver.py | 6 ++++-- schema_salad/tests/frag.yml | 4 ++++ schema_salad/tests/test_examples.py | 13 +++++++++---- 3 files changed, 17 insertions(+), 6 deletions(-) create mode 100644 schema_salad/tests/frag.yml diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 92c8cfa2a..7912a7222 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -380,10 +380,12 @@ def resolve_ref(self, mixin = None # type: Dict[unicode, Any] if not base_url: - if isinstance(ref, unicode): - ref = pathlib2.Path(os.path.join(os.getcwd(), ref)).as_uri() base_url = pathlib2.Path(os.getcwd()).as_uri() + '/' + if isinstance(ref, (str, unicode)) and os.sep == "\\": + # Convert Windows paths + ref = ref.replace("\\", "/") + sl = SourceLine(obj, None, ValueError) # If `ref` is a dict, look for special directives. if isinstance(ref, CommentedMap): diff --git a/schema_salad/tests/frag.yml b/schema_salad/tests/frag.yml new file mode 100644 index 000000000..7e8818d39 --- /dev/null +++ b/schema_salad/tests/frag.yml @@ -0,0 +1,4 @@ +- id: foo1 + bar: b1 +- id: foo2 + bar: b2 \ No newline at end of file diff --git a/schema_salad/tests/test_examples.py b/schema_salad/tests/test_examples.py index 6ff7ae262..9191f515c 100644 --- a/schema_salad/tests/test_examples.py +++ b/schema_salad/tests/test_examples.py @@ -243,7 +243,7 @@ def test_examples(self): self.assertEqual(proc, src) def test_yaml_float_test(self): - self.assertEqual(ruamel.yaml.load("float-test: 2e-10")["float-test"], + self.assertEqual(ruamel.yaml.safe_load("float-test: 2e-10")["float-test"], 2e-10) def test_typedsl_ref(self): @@ -341,13 +341,13 @@ def test_scoped_id(self): print(g.serialize(format="n3")) def test_mixin(self): + base_url = "file://" + os.getcwd() + "/tests/" ldr = schema_salad.ref_resolver.Loader({}) ra = ldr.resolve_ref(cmap({"$mixin": get_data("tests/mixin.yml"), "one": "five"}), - base_url="file://" + os.getcwd() + "/tests/") + base_url=base_url) self.assertEqual({'id': 'four', 'one': 'five'}, ra[0]) - ldr = schema_salad.ref_resolver.Loader({"id": "@id"}) - base_url = "file://" + os.getcwd() + "/tests/" + ra = ldr.resolve_all(cmap([{ "id": "a", "m": {"$mixin": get_data("tests/mixin.yml")} @@ -368,6 +368,11 @@ def test_mixin(self): 'one': 'two'} }], ra[0]) + def test_fragment(self): + ldr = schema_salad.ref_resolver.Loader({"id": "@id"}) + b, _ = ldr.resolve_ref("schema_salad/tests/frag.yml#foo2") + self.assertEquals({"id": b["id"], "bar":"b2"}, b) + if __name__ == '__main__': unittest.main() From dd496a12e01d58bb0e0b64647376955505f29deb Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 28 Dec 2016 12:04:31 -0700 Subject: [PATCH 024/116] Include all relevant files so tests work post installation --- MANIFEST.in | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index bf8066c1f..c1b69ae5d 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,5 @@ include gittaggers.py Makefile -include schema_salad/tests/*.py schema_salad/tests/*.yml schema_salad/tests/*.owl +include schema_salad/tests/*.py schema_salad/tests/*.yml +include schema_salad/tests/test_schema/*.yml schema_salad/tests/*.owl include schema_salad/metaschema/* global-exclude *.pyc From a37b623dfdd4723e77e002a711400e74cd0f0081 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 28 Dec 2016 13:07:04 -0700 Subject: [PATCH 025/116] ensure all test data is loaded flexibly --- schema_salad/tests/test_errors.py | 6 ++++-- schema_salad/tests/test_examples.py | 15 ++------------- schema_salad/tests/util.py | 13 +++++++++++++ 3 files changed, 19 insertions(+), 15 deletions(-) create mode 100644 schema_salad/tests/util.py diff --git a/schema_salad/tests/test_errors.py b/schema_salad/tests/test_errors.py index 79f3d6035..25a5eea8c 100644 --- a/schema_salad/tests/test_errors.py +++ b/schema_salad/tests/test_errors.py @@ -1,3 +1,4 @@ +from .util import get_data import unittest from typing import cast from schema_salad.schema import load_schema, load_and_validate @@ -7,7 +8,7 @@ class TestErrors(unittest.TestCase): def test_errors(self): document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema( - u"schema_salad/tests/test_schema/CommonWorkflowLanguage.yml") + get_data(u"tests/test_schema/CommonWorkflowLanguage.yml")) avsc_names = cast(Names, avsc_names) for t in ("test_schema/test1.cwl", @@ -23,7 +24,8 @@ def test_errors(self): "test_schema/test11.cwl"): with self.assertRaises(ValidationException): try: - load_and_validate(document_loader, avsc_names, unicode("schema_salad/tests/"+t), True) + load_and_validate(document_loader, avsc_names, + unicode(get_data("tests/"+t)), True) except ValidationException as e: print "\n", e raise diff --git a/schema_salad/tests/test_examples.py b/schema_salad/tests/test_examples.py index 9191f515c..949bc6767 100644 --- a/schema_salad/tests/test_examples.py +++ b/schema_salad/tests/test_examples.py @@ -1,9 +1,9 @@ +from .util import get_data import unittest import schema_salad.ref_resolver import schema_salad.main import schema_salad.schema from schema_salad.jsonld_context import makerdf -from pkg_resources import Requirement, resource_filename, ResolutionError # type: ignore import rdflib import ruamel.yaml import json @@ -17,17 +17,6 @@ from ruamel.yaml.comments import CommentedSeq, CommentedMap -def get_data(filename): - filepath = None - try: - filepath = resource_filename( - Requirement.parse("schema-salad"), filename) - except ResolutionError: - pass - if not filepath or not os.path.isfile(filepath): - filepath = os.path.join(os.path.dirname(__file__), os.pardir, filename) - return filepath - class TestSchemas(unittest.TestCase): def test_schemas(self): @@ -370,7 +359,7 @@ def test_mixin(self): def test_fragment(self): ldr = schema_salad.ref_resolver.Loader({"id": "@id"}) - b, _ = ldr.resolve_ref("schema_salad/tests/frag.yml#foo2") + b, _ = ldr.resolve_ref(get_data("tests/frag.yml#foo2")) self.assertEquals({"id": b["id"], "bar":"b2"}, b) diff --git a/schema_salad/tests/util.py b/schema_salad/tests/util.py new file mode 100644 index 000000000..0fcaf526e --- /dev/null +++ b/schema_salad/tests/util.py @@ -0,0 +1,13 @@ +from pkg_resources import Requirement, resource_filename, ResolutionError # type: ignore +import os + +def get_data(filename): + filepath = None + try: + filepath = resource_filename( + Requirement.parse("schema-salad"), filename) + except ResolutionError: + pass + if not filepath or not os.path.isfile(filepath): + filepath = os.path.join(os.path.dirname(__file__), os.pardir, filename) + return filepath From a2e42365d60b7f289f4cb676932777bf96af649a Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 28 Dec 2016 13:27:58 -0700 Subject: [PATCH 026/116] simply the manifest --- MANIFEST.in | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index c1b69ae5d..f93ff6fd7 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,5 +1,4 @@ include gittaggers.py Makefile -include schema_salad/tests/*.py schema_salad/tests/*.yml -include schema_salad/tests/test_schema/*.yml schema_salad/tests/*.owl +include schema_salad/tests/* include schema_salad/metaschema/* global-exclude *.pyc From f34d4ac3e2263a14956c7ccd5e32b6b767793f33 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 28 Dec 2016 13:37:37 -0700 Subject: [PATCH 027/116] explicitly include MD files & fix release test --- MANIFEST.in | 3 +++ release-test.sh | 7 ++++--- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index f93ff6fd7..abcfe2a15 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,4 +1,7 @@ include gittaggers.py Makefile include schema_salad/tests/* +include schema_salad/tests/test_schema/*.md +include schema_salad/tests/test_schema/*.yml +include schema_salad/tests/test_schema/*.cwl include schema_salad/metaschema/* global-exclude *.pyc diff --git a/release-test.sh b/release-test.sh index 0873214f0..9091245df 100755 --- a/release-test.sh +++ b/release-test.sh @@ -58,10 +58,11 @@ rm lib/python-wheels/setuptools* \ && pip install setuptools==20.10.1 pip install ${package}*tar.gz pip install pytest -tar xzf ${package}*tar.gz -cd ${package}* +mkdir out +tar --extract --directory=out -z -f ${package}*.tar.gz +cd out/${package}* make dist make test pip uninstall -y ${package} || true; pip uninstall -y ${package} || true; make install mkdir ../not-${module} -pushd ../not-${module} ; ../bin/${run_tests}; popd +pushd ../not-${module} ; ../../bin/${run_tests}; popd From 0c7cc026eab23fe68243d8e4970e9241c739868f Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 11 Jan 2017 10:49:27 -0500 Subject: [PATCH 028/116] Remove pathlib2 because it is only used for one thing but creates dependency issues. (#79) --- requirements.txt | 1 - schema_salad/ref_resolver.py | 26 ++++++++++++++++++++------ setup.py | 3 +-- 3 files changed, 21 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 9cbbbc7bc..ee9f1bc19 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,6 @@ typing==3.5.2.2 ; python_version>="2.7" avro-python3 ; python_version>="3" avro==1.8.1 ; python_version<"3" -pathlib2==2.1.0 ruamel.yaml==0.13.7 rdflib==4.2.1 rdflib-jsonld==0.4.0 diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 7912a7222..649f8df14 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -8,9 +8,8 @@ import urlparse import re import copy -import pprint +import urllib from StringIO import StringIO -import pathlib2 from . import validate from .aslist import aslist @@ -37,6 +36,21 @@ DocumentOrStrType = TypeVar( 'DocumentOrStrType', CommentedSeq, CommentedMap, unicode) +def file_uri(path): # type: (unicode) -> unicode + if path.startswith("file://"): + return path + urlpath = urllib.pathname2url(str(path)) + if urlpath.startswith("//"): + return "file:%s" % urlpath + else: + return "file://%s" % urlpath + +def uri_file_path(url): # type: (unicode) -> unicode + split = urlparse.urlsplit(url) + if split.scheme == "file": + urllib.url2pathname(str(split.path)) + else: + raise ValueError("Not a file URI") class NormDict(CommentedMap): @@ -110,7 +124,7 @@ def fetch_text(self, url): return resp.text elif scheme == 'file': try: - with open(urllib.url2pathname(str(urlparse.urlparse(url).path))) as fp: + with open(urllib.url2pathname(str(path))) as fp: read = fp.read() if hasattr(read, "decode"): return read.decode("utf-8") @@ -139,7 +153,7 @@ def check_exists(self, url): # type: (unicode) -> bool return False return True elif scheme == 'file': - return os.path.exists(urllib.url2pathname(str(urlparse.urlparse(url).path))) + return os.path.exists(urllib.url2pathname(str(path))) else: raise ValueError('Unsupported scheme in url: %s' % url) @@ -380,10 +394,10 @@ def resolve_ref(self, mixin = None # type: Dict[unicode, Any] if not base_url: - base_url = pathlib2.Path(os.getcwd()).as_uri() + '/' + base_url = file_uri(os.getcwd()) + "/" if isinstance(ref, (str, unicode)) and os.sep == "\\": - # Convert Windows paths + # Convert Windows path separator in ref ref = ref.replace("\\", "/") sl = SourceLine(obj, None, ValueError) diff --git a/setup.py b/setup.py index c3934529e..e5ac19ba2 100755 --- a/setup.py +++ b/setup.py @@ -36,8 +36,7 @@ 'mistune >= 0.7.3, < 0.8', 'typing >= 3.5.2, < 3.6', 'CacheControl >= 0.11.7, < 0.12', - 'lockfile >= 0.9', - 'pathlib2 >= 2.1.0'] + 'lockfile >= 0.9'] install_requires.append("avro") # TODO: remove me once cwltool is # available in Debian Stable, Ubuntu 12.04 LTS From b88352677c387fe47620e47565ae6175d9073569 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 11 Jan 2017 12:57:12 -0500 Subject: [PATCH 029/116] Bugfix to uri_file_path. (#80) * Allow document fragments in paths with correct quoting * Add test for file_uri and uri_file_path. --- schema_salad/ref_resolver.py | 14 ++++++++------ schema_salad/tests/test_examples.py | 8 ++++++++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 649f8df14..ed25f0eac 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -36,19 +36,21 @@ DocumentOrStrType = TypeVar( 'DocumentOrStrType', CommentedSeq, CommentedMap, unicode) -def file_uri(path): # type: (unicode) -> unicode +def file_uri(path): # type: (str) -> str if path.startswith("file://"): return path - urlpath = urllib.pathname2url(str(path)) + pathsp = path.split("#", 2) + frag = "#" + urllib.quote(str(pathsp[1])) if len(pathsp) == 2 else "" + urlpath = urllib.pathname2url(str(pathsp[0])) if urlpath.startswith("//"): - return "file:%s" % urlpath + return "file:%s%s" % (urlpath, frag) else: - return "file://%s" % urlpath + return "file://%s%s" % (urlpath, frag) -def uri_file_path(url): # type: (unicode) -> unicode +def uri_file_path(url): # type: (str) -> str split = urlparse.urlsplit(url) if split.scheme == "file": - urllib.url2pathname(str(split.path)) + return urllib.url2pathname(str(split.path)) + ("#" + urllib.unquote(str(split.fragment)) if split.fragment else "") else: raise ValueError("Not a file URI") diff --git a/schema_salad/tests/test_examples.py b/schema_salad/tests/test_examples.py index 949bc6767..6b0277c02 100644 --- a/schema_salad/tests/test_examples.py +++ b/schema_salad/tests/test_examples.py @@ -362,6 +362,14 @@ def test_fragment(self): b, _ = ldr.resolve_ref(get_data("tests/frag.yml#foo2")) self.assertEquals({"id": b["id"], "bar":"b2"}, b) + def test_file_uri(self): + # Note: this test probably won't pass on Windows. Someone with a + # windows box should add an alternate test. + self.assertEquals("file:///foo/bar%20baz/quux", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux")) + self.assertEquals("/foo/bar baz/quux", schema_salad.ref_resolver.uri_file_path("file:///foo/bar%20baz/quux")) + self.assertEquals("file:///foo/bar%20baz/quux#zing%20zong", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux#zing zong")) + self.assertEquals("/foo/bar baz/quux#zing zong", schema_salad.ref_resolver.uri_file_path("file:///foo/bar%20baz/quux#zing%20zong")) + if __name__ == '__main__': unittest.main() From 55018dda947ac5d01bf080d8c977d9a2628c6573 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 11 Jan 2017 13:02:27 -0500 Subject: [PATCH 030/116] Bump version to 2.2 (#81) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e5ac19ba2..1e8c2fde5 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', - version='2.1', + version='2.2', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group', From d21fa841af6d45e3be00697e253cadd3869f68f4 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 19 Jan 2017 10:10:16 -0500 Subject: [PATCH 031/116] Expand uris in $schemas in resolve_all(). (#82) --- schema_salad/ref_resolver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index ed25f0eac..b1f64c8ef 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -326,7 +326,7 @@ def add_context(self, newcontext, baseuri=""): raise validate.ValidationException( "Refreshing context that already has stuff in it") - self.url_fields = set() + self.url_fields = set(("$schemas",)) self.scoped_ref_fields = {} self.vocab_fields = set() self.identifiers = set() From 42f4bddce38b3255b4613f8be606a372c629e56d Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 26 Jan 2017 11:07:27 -0500 Subject: [PATCH 032/116] Fix performance regression in schema loading by stripping ruamel.yaml metadata, (#83) avoiding unnecessary copies, and using shallow copies. --- schema_salad/schema.py | 29 +++++++++++++++++++++-------- 1 file changed, 21 insertions(+), 8 deletions(-) diff --git a/schema_salad/schema.py b/schema_salad/schema.py index 342ec4680..fe3542d24 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -332,7 +332,6 @@ def replace_type(items, spec, loader, found): # type: (Any, Dict[unicode, Any], Loader, Set[unicode]) -> Any """ Go through and replace types in the 'spec' mapping""" - items = copy.deepcopy(items) if isinstance(items, dict): # recursively check these fields for types to replace if "type" in items and items["type"] in ("record", "enum"): @@ -342,6 +341,7 @@ def replace_type(items, spec, loader, found): else: found.add(items["name"]) + items = copy.copy(items) for n in ("type", "items", "fields"): if n in items: items[n] = replace_type(items[n], spec, loader, found) @@ -388,8 +388,8 @@ def make_valid_avro(items, # type: Avro union=False # type: bool ): # type: (...) -> Union[Avro, Dict] - items = copy.deepcopy(items) if isinstance(items, dict): + items = copy.copy(items) if items.get("name"): items["name"] = avro_name(items["name"]) @@ -424,19 +424,31 @@ def make_valid_avro(items, # type: Avro items = avro_name(items) return items +def deepcopy_strip(item): # type: (Any) -> Any + """Make a deep copy of list and dict objects. + + Intentionally do not copy attributes. This is to discard CommentedMap and + CommentedSeq metadata which is very expensive with regular copy.deepcopy. + + """ + + if isinstance(item, dict): + return {k: deepcopy_strip(v) for k,v in item.iteritems()} + elif isinstance(item, list): + return [deepcopy_strip(k) for k in item] + else: + return item def extend_and_specialize(items, loader): # type: (List[Dict[unicode, Any]], Loader) -> List[Dict[unicode, Any]] """Apply 'extend' and 'specialize' to fully materialize derived record types.""" - types = {} # type: Dict[unicode, Any] - for t in items: - types[t["name"]] = t + items = deepcopy_strip(items) + types = {t["name"]: t for t in items} # type: Dict[unicode, Any] n = [] for t in items: - t = copy.deepcopy(t) if "extends" in t: spec = {} # type: Dict[unicode, unicode] if "specialize" in t: @@ -450,7 +462,7 @@ def extend_and_specialize(items, loader): raise Exception("Extends %s in %s refers to invalid base type" % ( t["extends"], t["name"])) - basetype = copy.deepcopy(types[ex]) + basetype = copy.copy(types[ex]) if t["type"] == "record": if spec: @@ -466,6 +478,7 @@ def extend_and_specialize(items, loader): exsym.extend(basetype.get("symbols", [])) if t["type"] == "record": + t = copy.copy(t) exfields.extend(t.get("fields", [])) t["fields"] = exfields @@ -477,6 +490,7 @@ def extend_and_specialize(items, loader): else: fieldnames.add(field["name"]) elif t["type"] == "enum": + t = copy.copy(t) exsym.extend(t.get("symbols", [])) t["symbol"] = exsym @@ -507,7 +521,6 @@ def extend_and_specialize(items, loader): return n - def make_avro_schema(i, # type: List[Dict[unicode, Any]] loader # type: Loader ): From 822c8c161c8aa788c7e84eb5ff1d390a4dc34195 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 8 Feb 2017 13:20:42 +0200 Subject: [PATCH 033/116] upgrade to mypy 0.470 --- schema_salad/ref_resolver.py | 2 +- tox.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index b1f64c8ef..dec2f57c6 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -56,7 +56,7 @@ def uri_file_path(url): # type: (str) -> str class NormDict(CommentedMap): - def __init__(self, normalize=unicode): # type: (type) -> None + def __init__(self, normalize=unicode): # type: (Callable) -> None super(NormDict, self).__init__() self.normalize = normalize diff --git a/tox.ini b/tox.ini index 03201f604..0c4ef7663 100644 --- a/tox.ini +++ b/tox.ini @@ -14,7 +14,7 @@ deps = -rrequirements.txt commands = make mypy whitelist_externals = make deps = - mypy-lang>=0.4 + mypy>=0.470 typed-ast -rrequirements.txt From e47f4e6f30d774b97f31c0c46ff1c176010713ef Mon Sep 17 00:00:00 2001 From: chapmanb Date: Thu, 16 Feb 2017 05:17:35 -0500 Subject: [PATCH 034/116] Improve debugging: missing filename and bad lists These are debugging helpers for problematic CWL inputs, helping with ongoing work debugging CWL Toil runs on AWS. - Avoids failing on LineCol inputs that do not have file and position references. Without this in place the missing `filename` and None attributes for `line` and `col` obscure the real error message. Fixes common-workflow-language/cwltool#264 - Prints out value of a CWL data item that is not an expected list. --- schema_salad/sourceline.py | 12 ++++++------ schema_salad/validate.py | 4 ++-- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/schema_salad/sourceline.py b/schema_salad/sourceline.py index e09171c80..0589b3efb 100644 --- a/schema_salad/sourceline.py +++ b/schema_salad/sourceline.py @@ -150,13 +150,13 @@ def makeError(self, msg): # type: (Text) -> Any return self.raise_type(msg) errs = [] if self.key is None or self.item.lc.data is None or self.key not in self.item.lc.data: - lead = "%s:%i:%i:" % (self.item.lc.filename, - self.item.lc.line+1, - self.item.lc.col+1) + lead = "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "", + (self.item.lc.line or 0)+1, + (self.item.lc.col or 0)+1) else: - lead = "%s:%i:%i:" % (self.item.lc.filename, - self.item.lc.data[self.key][0]+1, - self.item.lc.data[self.key][1]+1) + lead = "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "", + (self.item.lc.data[self.key][0] or 0)+1, + (self.item.lc.data[self.key][1] or 0)+1) for m in msg.splitlines(): if lineno_re.match(m): errs.append(m) diff --git a/schema_salad/validate.py b/schema_salad/validate.py index 75e094b7a..0f3161eab 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -180,8 +180,8 @@ def validate_ex(expected_schema, # type: Schema return True else: if raise_ex: - raise ValidationException(u"the value is not a list, expected list of %s" % ( - friendly(expected_schema.items))) + raise ValidationException(u"the value %s is not a list, expected list of %s" % ( + vpformat(datum), friendly(expected_schema.items))) else: return False elif isinstance(expected_schema, avro.schema.UnionSchema): From 9676ea8882a4f7cc956ac8747645bd569bcbb562 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Thu, 16 Feb 2017 11:33:20 +0100 Subject: [PATCH 035/116] track mypy rename --- Makefile | 2 +- tox.ini | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/Makefile b/Makefile index f5a824066..98d4118cb 100644 --- a/Makefile +++ b/Makefile @@ -187,6 +187,6 @@ jenkins: . env3/bin/activate ; \ pip install -U setuptools pip wheel ; \ ${MAKE} install-dep ; \ - pip install -U mypy-lang typed-ast ; ${MAKE} mypy + pip install -U mypy ; ${MAKE} mypy FORCE: diff --git a/tox.ini b/tox.ini index 0c4ef7663..43a7f9bc2 100644 --- a/tox.ini +++ b/tox.ini @@ -15,7 +15,6 @@ commands = make mypy whitelist_externals = make deps = mypy>=0.470 - typed-ast -rrequirements.txt [testenv:py35-lint] From 62094960c93759acd910560769d26b49b57956b4 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Thu, 16 Feb 2017 13:53:44 +0100 Subject: [PATCH 036/116] newer pip dependency --- release-test.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/release-test.sh b/release-test.sh index 9091245df..e69224789 100755 --- a/release-test.sh +++ b/release-test.sh @@ -7,7 +7,7 @@ package=schema-salad module=schema_salad repo=https://github.com/common-workflow-language/schema_salad.git run_tests="py.test --pyarg ${module}" -pipver=6.0 # minimum required version of pip +pipver=7.0.2 # minimum required version of pip rm -Rf testenv? || /bin/true @@ -21,7 +21,7 @@ virtualenv testenv4 source testenv1/bin/activate rm testenv1/lib/python-wheels/setuptools* \ && pip install --force-reinstall -U pip==${pipver} \ - && pip install setuptools==20.10.1 + && pip install setuptools==20.10.1 wheel make install-dependencies make test pip uninstall -y ${package} || true; pip uninstall -y ${package} || true; make install @@ -37,7 +37,7 @@ cd testenv2 source bin/activate rm lib/python-wheels/setuptools* \ && pip install --force-reinstall -U pip==${pipver} \ - && pip install setuptools==20.10.1 + && pip install setuptools==20.10.1 wheel pip install -e git+${repo}@${HEAD}#egg=${package} cd src/${package} make install-dependencies @@ -55,7 +55,7 @@ cd ../testenv3/ source bin/activate rm lib/python-wheels/setuptools* \ && pip install --force-reinstall -U pip==${pipver} \ - && pip install setuptools==20.10.1 + && pip install setuptools==20.10.1 wheel pip install ${package}*tar.gz pip install pytest mkdir out From 7522a24afbe34fb8eb07be3f97539fac7b2e14e0 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 22 Feb 2017 10:16:04 -0500 Subject: [PATCH 037/116] Make split_frag optional in converting fileuri. (#88) * Make split_frag optional in converting fileuri. --- schema_salad/ref_resolver.py | 12 ++++++++---- schema_salad/tests/test_examples.py | 3 ++- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index dec2f57c6..f050d8123 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -36,12 +36,16 @@ DocumentOrStrType = TypeVar( 'DocumentOrStrType', CommentedSeq, CommentedMap, unicode) -def file_uri(path): # type: (str) -> str +def file_uri(path, split_frag=False): # type: (str, bool) -> str if path.startswith("file://"): return path - pathsp = path.split("#", 2) - frag = "#" + urllib.quote(str(pathsp[1])) if len(pathsp) == 2 else "" - urlpath = urllib.pathname2url(str(pathsp[0])) + if split_frag: + pathsp = path.split("#", 2) + frag = "#" + urllib.quote(str(pathsp[1])) if len(pathsp) == 2 else "" + urlpath = urllib.pathname2url(str(pathsp[0])) + else: + urlpath = urllib.pathname2url(path) + frag = "" if urlpath.startswith("//"): return "file:%s%s" % (urlpath, frag) else: diff --git a/schema_salad/tests/test_examples.py b/schema_salad/tests/test_examples.py index 6b0277c02..adc04fcc6 100644 --- a/schema_salad/tests/test_examples.py +++ b/schema_salad/tests/test_examples.py @@ -367,7 +367,8 @@ def test_file_uri(self): # windows box should add an alternate test. self.assertEquals("file:///foo/bar%20baz/quux", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux")) self.assertEquals("/foo/bar baz/quux", schema_salad.ref_resolver.uri_file_path("file:///foo/bar%20baz/quux")) - self.assertEquals("file:///foo/bar%20baz/quux#zing%20zong", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux#zing zong")) + self.assertEquals("file:///foo/bar%20baz/quux%23zing%20zong", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux#zing zong")) + self.assertEquals("file:///foo/bar%20baz/quux#zing%20zong", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux#zing zong", split_frag=True)) self.assertEquals("/foo/bar baz/quux#zing zong", schema_salad.ref_resolver.uri_file_path("file:///foo/bar%20baz/quux#zing%20zong")) From 50d0361192a47abb7f64754e4f3f221fc2c627b5 Mon Sep 17 00:00:00 2001 From: Tjelvar Olsson Date: Mon, 27 Feb 2017 15:28:43 +0000 Subject: [PATCH 038/116] Make ref_resolver.Loader more fault tolerant Currently it falls over is HOME environment variable is not set. --- schema_salad/ref_resolver.py | 2 +- schema_salad/tests/test_ref_resolver.py | 10 ++++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) create mode 100644 schema_salad/tests/test_ref_resolver.py diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index f050d8123..63f522010 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -204,7 +204,7 @@ def __init__(self, else: self.cache = {} - if session is None: + if (session is None) and ("HOME" in os.environ): self.session = CacheControl(requests.Session(), cache=FileCache(os.path.join(os.environ["HOME"], ".cache", "salad"))) else: diff --git a/schema_salad/tests/test_ref_resolver.py b/schema_salad/tests/test_ref_resolver.py new file mode 100644 index 000000000..c8e5d8278 --- /dev/null +++ b/schema_salad/tests/test_ref_resolver.py @@ -0,0 +1,10 @@ +"""Test the ref_resolver module.""" + +def test_Loader_initialisation_when_HOME_env_is_missing(): + from schema_salad.ref_resolver import Loader + import os + + # Simulate missing HOME environment variable. + if "HOME" in os.environ: + del os.environ["HOME"] + Loader(ctx={}) From f16c1a84402d7e5eb10be737d66516dc53242ace Mon Sep 17 00:00:00 2001 From: Tjelvar Olsson Date: Mon, 27 Feb 2017 17:41:07 +0000 Subject: [PATCH 039/116] Add more directory fallbacks Loader session --- schema_salad/ref_resolver.py | 16 ++++++-- schema_salad/tests/test_ref_resolver.py | 50 +++++++++++++++++++++++-- 2 files changed, 60 insertions(+), 6 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 63f522010..f1ea0d8f1 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -204,9 +204,19 @@ def __init__(self, else: self.cache = {} - if (session is None) and ("HOME" in os.environ): - self.session = CacheControl(requests.Session(), - cache=FileCache(os.path.join(os.environ["HOME"], ".cache", "salad"))) + if session is None: + if "HOME" in os.environ: + self.session = CacheControl( + requests.Session(), + cache=FileCache(os.path.join(os.environ["HOME"], ".cache", "salad"))) + elif "TMP" in os.environ: + self.session = CacheControl( + requests.Session(), + cache=FileCache(os.path.join(os.environ["TMP"], ".cache", "salad"))) + else: + self.session = CacheControl( + requests.Session(), + cache=FileCache("/tmp", ".cache", "salad")) else: self.session = session diff --git a/schema_salad/tests/test_ref_resolver.py b/schema_salad/tests/test_ref_resolver.py index c8e5d8278..659d830fd 100644 --- a/schema_salad/tests/test_ref_resolver.py +++ b/schema_salad/tests/test_ref_resolver.py @@ -1,10 +1,54 @@ """Test the ref_resolver module.""" -def test_Loader_initialisation_when_HOME_env_is_missing(): +import shutil +import tempfile + +import pytest # type: ignore + +@pytest.fixture +def tmp_dir_fixture(request): + d = tempfile.mkdtemp() + + @request.addfinalizer + def teardown(): + shutil.rmtree(d) + return d + +def test_Loader_initialisation_for_HOME_env_var(tmp_dir_fixture): + import os from schema_salad.ref_resolver import Loader + from requests import Session + + # Ensure HOME is set. + os.environ["HOME"] = tmp_dir_fixture + + loader = Loader(ctx={}) + assert isinstance(loader.session, Session) + +def test_Loader_initialisation_for_TMP_env_var(tmp_dir_fixture): import os + from schema_salad.ref_resolver import Loader + from requests import Session - # Simulate missing HOME environment variable. + # Ensure HOME is missing. if "HOME" in os.environ: del os.environ["HOME"] - Loader(ctx={}) + # Ensure TMP is present. + os.environ["TMP"] = tmp_dir_fixture + + loader = Loader(ctx={}) + assert isinstance(loader.session, Session) + +def test_Loader_initialisation_with_neither_TMP_HOME_set(tmp_dir_fixture): + import os + from schema_salad.ref_resolver import Loader + from requests import Session + + # Ensure HOME is missing. + if "HOME" in os.environ: + del os.environ["HOME"] + if "TMP" in os.environ: + del os.environ["TMP"] + + loader = Loader(ctx={}) + assert isinstance(loader.session, Session) From 15c0ab88b48ccf28ba10804b9a6c061207e07281 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 2 Mar 2017 10:08:35 -0500 Subject: [PATCH 040/116] Relax strict checking foreign properties (#90) * Separate strictness of checking foreign properties from strict checking of primary properties. * Bump version. * Fix early exit check. * Tweak foreign property warning. * follow two spaces before a comment rule in this case the comments are type definitions --- schema_salad/validate.py | 80 +++++++++++++++++++++++++--------------- setup.py | 2 +- 2 files changed, 52 insertions(+), 30 deletions(-) diff --git a/schema_salad/validate.py b/schema_salad/validate.py index 0f3161eab..6e9e936a0 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -4,9 +4,12 @@ import sys import urlparse import re +import logging + from typing import Any, Union from .sourceline import SourceLine, lineno_re, bullets, indent +_logger = logging.getLogger("salad") class ValidationException(Exception): pass @@ -54,12 +57,14 @@ def vpformat(datum): # type: (Any) -> str return a -def validate_ex(expected_schema, # type: Schema - datum, # type: Any - identifiers=None, # type: Set[unicode] - strict=False, # type: bool - foreign_properties=None, # type: Set[unicode] - raise_ex=True # type: bool +def validate_ex(expected_schema, # type: Schema + datum, # type: Any + identifiers=None, # type: Set[unicode] + strict=False, # type: bool + foreign_properties=None, # type: Set[unicode] + raise_ex=True, # type: bool + strict_foreign_properties=False, # type: bool + logger=_logger # type: logging.Logger ): # type: (...) -> bool """Determine if a python datum is an instance of a schema.""" @@ -167,9 +172,11 @@ def validate_ex(expected_schema, # type: Schema for i, d in enumerate(datum): try: sl = SourceLine(datum, i, ValidationException) - if not validate_ex(expected_schema.items, d, identifiers, strict=strict, + if not validate_ex(expected_schema.items, d, identifiers, + strict=strict, foreign_properties=foreign_properties, - raise_ex=raise_ex): + raise_ex=raise_ex, + strict_foreign_properties=strict_foreign_properties): return False except ValidationException as v: if raise_ex: @@ -186,7 +193,8 @@ def validate_ex(expected_schema, # type: Schema return False elif isinstance(expected_schema, avro.schema.UnionSchema): for s in expected_schema.schemas: - if validate_ex(s, datum, identifiers, strict=strict, raise_ex=False): + if validate_ex(s, datum, identifiers, strict=strict, raise_ex=False, + strict_foreign_properties=strict_foreign_properties): return True if not raise_ex: @@ -207,7 +215,9 @@ def validate_ex(expected_schema, # type: Schema checked.append(s) try: validate_ex(s, datum, identifiers, strict=strict, - foreign_properties=foreign_properties, raise_ex=True) + foreign_properties=foreign_properties, + raise_ex=True, + strict_foreign_properties=strict_foreign_properties) except ClassValidationException as e: raise except ValidationException as e: @@ -256,8 +266,10 @@ def validate_ex(expected_schema, # type: Schema try: sl = SourceLine(datum, f.name, unicode) - if not validate_ex(f.type, fieldval, identifiers, strict=strict, foreign_properties=foreign_properties, - raise_ex=raise_ex): + if not validate_ex(f.type, fieldval, identifiers, strict=strict, + foreign_properties=foreign_properties, + raise_ex=raise_ex, + strict_foreign_properties=strict_foreign_properties): return False except ValidationException as v: if f.name not in datum: @@ -266,24 +278,34 @@ def validate_ex(expected_schema, # type: Schema errors.append(sl.makeError(u"the `%s` field is not valid because\n%s" % ( f.name, indent(str(v))))) - if strict: - for d in datum: - found = False - for f in expected_schema.fields: - if d == f.name: - found = True - if not found: - sl = SourceLine(datum, d, unicode) - if d not in identifiers and d not in foreign_properties and d[0] not in ("@", "$"): - if not raise_ex: - return False - split = urlparse.urlsplit(d) - if split.scheme: - errors.append(sl.makeError( - u"unrecognized extension field `%s` and strict is True. Did you include a $schemas section?" % (d))) + for d in datum: + found = False + for f in expected_schema.fields: + if d == f.name: + found = True + if not found: + sl = SourceLine(datum, d, unicode) + if d not in identifiers and d not in foreign_properties and d[0] not in ("@", "$"): + if (d not in identifiers and strict) and ( + d not in foreign_properties and strict_foreign_properties) and not raise_ex: + return False + split = urlparse.urlsplit(d) + if split.scheme: + err = sl.makeError(u"unrecognized extension field `%s`%s." + " Did you include " + "a $schemas section?" % ( + d, " and strict_foreign_properties is True" if strict_foreign_properties else "")) + if strict_foreign_properties: + errors.append(err) + else: + logger.warn(err) + else: + err = sl.makeError(u"invalid field `%s`, expected one of: %s" % ( + d, ", ".join("'%s'" % fn.name for fn in expected_schema.fields))) + if strict: + errors.append(err) else: - errors.append(sl.makeError(u"invalid field `%s`, expected one of: %s" % ( - d, ", ".join("'%s'" % fn.name for fn in expected_schema.fields)))) + logger.warn(err) if errors: if raise_ex: diff --git a/setup.py b/setup.py index 1e8c2fde5..8bfe65737 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', - version='2.2', + version='2.3', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group', From 1e05b445a56d8cd9da56c6a2bea1ed50ef806c2b Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Thu, 2 Mar 2017 17:51:34 -0500 Subject: [PATCH 041/116] Set publicID when passing plain data to RDF parser. (#91) * Set publicID when passing plain data to RDF parser. --- requirements.txt | 3 +-- schema_salad/jsonld_context.py | 4 ++-- schema_salad/ref_resolver.py | 2 +- setup.py | 2 +- tox.ini | 11 ++++++++++- 5 files changed, 15 insertions(+), 7 deletions(-) diff --git a/requirements.txt b/requirements.txt index ee9f1bc19..7684172aa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,9 +2,8 @@ typing==3.5.2.2 ; python_version>="2.7" avro-python3 ; python_version>="3" avro==1.8.1 ; python_version<"3" ruamel.yaml==0.13.7 -rdflib==4.2.1 +rdflib==4.2.2 rdflib-jsonld==0.4.0 -html5lib==0.9999999 mistune==0.7.3 CacheControl==0.11.7 lockfile==0.12.2 diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py index 7141b0742..d3f615369 100755 --- a/schema_salad/jsonld_context.py +++ b/schema_salad/jsonld_context.py @@ -236,10 +236,10 @@ def makerdf(workflow, # type: Union[str, unicode] if isinstance(wf, list): for w in wf: w["@context"] = ctx - g.parse(data=json.dumps(w), format='json-ld', location=workflow) + g.parse(data=json.dumps(w), format='json-ld', publicID=str(workflow)) else: wf["@context"] = ctx - g.parse(data=json.dumps(wf), format='json-ld', location=workflow) + g.parse(data=json.dumps(wf), format='json-ld', publicID=str(workflow)) # Bug in json-ld loader causes @id fields to be added to the graph for sub, pred, obj in g.triples((None, URIRef("@id"), None)): diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index f1ea0d8f1..5d028ad4c 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -311,7 +311,7 @@ def add_schemas(self, ns, base_url): self.cache[fetchurl] = rdflib.graph.Graph() for fmt in ['xml', 'turtle', 'rdfa']: try: - self.cache[fetchurl].parse(data=content, format=fmt) + self.cache[fetchurl].parse(data=content, format=fmt, publicID=str(fetchurl)) self.graph += self.cache[fetchurl] break except xml.sax.SAXParseException: diff --git a/setup.py b/setup.py index 8bfe65737..c72e827ba 100755 --- a/setup.py +++ b/setup.py @@ -31,7 +31,7 @@ 'setuptools', 'requests >= 1.0', 'ruamel.yaml >= 0.12.4', - 'rdflib >= 4.2.0, < 4.3.0', + 'rdflib >= 4.2.2, < 4.3.0', 'rdflib-jsonld >= 0.3.0, < 0.5.0', 'mistune >= 0.7.3, < 0.8', 'typing >= 3.5.2, < 3.6', diff --git a/tox.ini b/tox.ini index 43a7f9bc2..8a81fbaa0 100644 --- a/tox.ini +++ b/tox.ini @@ -10,11 +10,20 @@ skipsdist = True [testenv] deps = -rrequirements.txt +[testenv:py35-py2_mypy] +commands = make mypy +whitelist_externals = make +deps = + mypy==0.470 + typed-ast==0.6.3 + -rrequirements.txt + [testenv:py35-mypy] commands = make mypy whitelist_externals = make deps = - mypy>=0.470 + mypy==0.470 + typed-ast==0.6.3 -rrequirements.txt [testenv:py35-lint] From d2d7b1e16d11b224391cec18b291279b2a158915 Mon Sep 17 00:00:00 2001 From: kapil kumar Date: Sun, 5 Mar 2017 15:50:28 +0530 Subject: [PATCH 042/116] added a warning message when error is being ignored --- schema_salad/ref_resolver.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 5d028ad4c..f19767a5e 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -954,7 +954,8 @@ def validate_links(self, document, base_url): else: errors.append(sl.makeError("checking item\n%s" % ( validate.indent(unicode(v))))) - + else: + _logger.warn( validate.indent(unicode(v))) if errors: if len(errors) > 1: raise validate.ValidationException( From 4f4fe734e1a6eb48ed978e31ca68c00cb39d65a7 Mon Sep 17 00:00:00 2001 From: kapil kumar Date: Sun, 5 Mar 2017 16:56:20 +0530 Subject: [PATCH 043/116] added instalation from source instructions --- README.rst | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/README.rst b/README.rst index e66427cdd..38ffb4298 100644 --- a/README.rst +++ b/README.rst @@ -27,6 +27,13 @@ Usage $ python >>> import schema_salad +To install from source:: + + git clone https://github.com/common-workflow-language/schema_salad + cd schema_salad + python setup.py install + + Documentation ------------- From 8ab39f9b4ed1b8c211a1ff3dfd30bfb1250d894f Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Mon, 6 Mar 2017 14:02:54 +0200 Subject: [PATCH 044/116] Upgrade to mypy v0.501 --strict --- Makefile | 4 +- schema_salad/jsonld_context.py | 22 ++-- schema_salad/main.py | 6 +- schema_salad/makedoc.py | 29 +++--- schema_salad/ref_resolver.py | 177 ++++++++++++++++++--------------- schema_salad/schema.py | 16 +-- schema_salad/sourceline.py | 12 ++- schema_salad/tests/util.py | 3 +- schema_salad/validate.py | 6 +- tox.ini | 4 +- typeshed/2.7/rdflib/graph.pyi | 4 +- 11 files changed, 154 insertions(+), 129 deletions(-) diff --git a/Makefile b/Makefile index 98d4118cb..29c6b736a 100644 --- a/Makefile +++ b/Makefile @@ -173,9 +173,7 @@ mypy: ${PYSOURCES} rm -Rf typeshed/2.7/ruamel/yaml ln -s $(shell python -c 'from __future__ import print_function; import ruamel.yaml; import os.path; print(os.path.dirname(ruamel.yaml.__file__))') \ typeshed/2.7/ruamel/ - MYPYPATH=typeshed/2.7 mypy --py2 --disallow-untyped-calls \ - --fast-parser --warn-redundant-casts --warn-unused-ignores \ - schema_salad + MYPYPATH=typeshed/2.7 mypy --py2 --strict schema_salad/*.py jenkins: rm -Rf env && virtualenv env diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py index d3f615369..43a2a38e6 100755 --- a/schema_salad/jsonld_context.py +++ b/schema_salad/jsonld_context.py @@ -19,25 +19,26 @@ import urlparse import logging from .aslist import aslist -from typing import Any, cast, Dict, Iterable, Tuple, Union +from typing import (cast, Any, Dict, Iterable, List, Optional, Text, Tuple, + Union) from .ref_resolver import Loader, ContextType _logger = logging.getLogger("salad") def pred(datatype, # type: Dict[str, Union[Dict, str]] - field, # type: Dict + field, # type: Optional[Dict] name, # type: str context, # type: ContextType defaultBase, # type: str namespaces # type: Dict[str, rdflib.namespace.Namespace] ): - # type: (...) -> Union[Dict, str] + # type: (...) -> Union[Dict, Text] split = urlparse.urlsplit(name) - vee = None # type: Union[str, unicode] + vee = None # type: Optional[Union[str, unicode]] - if split.scheme: + if split.scheme != '': vee = name (ns, ln) = rdflib.namespace.split_uri(unicode(vee)) name = ln @@ -45,9 +46,9 @@ def pred(datatype, # type: Dict[str, Union[Dict, str]] vee = unicode(namespaces[ns[0:-1]][ln]) _logger.debug("name, v %s %s", name, vee) - v = None # type: Any + v = None # type: Optional[Dict] - if field and "jsonldPredicate" in field: + if field is not None and "jsonldPredicate" in field: if isinstance(field["jsonldPredicate"], dict): v = {} for k, val in field["jsonldPredicate"].items(): @@ -132,14 +133,15 @@ def process_type(t, # type: Dict[str, Any] _logger.debug("Processing field %s", i) - v = pred(t, i, fieldname, context, defaultPrefix, namespaces) + v = pred(t, i, fieldname, context, defaultPrefix, + namespaces) # type: Union[Dict[Any, Any], unicode, None] if isinstance(v, basestring): v = v if v[0] != "@" else None - else: + elif v is not None: v = v["_@id"] if v.get("_@id", "@")[0] != "@" else None - if v: + if bool(v): (ns, ln) = rdflib.namespace.split_uri(unicode(v)) if ns[0:-1] in namespaces: propnode = namespaces[ns[0:-1]][ln] diff --git a/schema_salad/main.py b/schema_salad/main.py index f51184bc7..89279f8e8 100644 --- a/schema_salad/main.py +++ b/schema_salad/main.py @@ -146,7 +146,11 @@ def main(argsl=None): # type: (List[str]) -> int metactx = schema_raw_doc.get("$namespaces", {}) if "$base" in schema_raw_doc: metactx["@base"] = schema_raw_doc["$base"] - (schema_ctx, rdfs) = jsonld_context.salad_to_jsonld_context(schema_doc, metactx) + if schema_doc is not None: + (schema_ctx, rdfs) = jsonld_context.salad_to_jsonld_context( + schema_doc, metactx) + else: + raise Exception("schema_doc is None??") # Create the loader that will be used to load the target document. document_loader = Loader(schema_ctx) diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index 0325ad803..259490d6c 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -12,7 +12,7 @@ from .add_dictlist import add_dictlist import re import argparse -from typing import Any, IO, Union +from typing import cast, Any, Dict, IO, List, Optional, Set, Text, Union _logger = logging.getLogger("salad") @@ -35,7 +35,7 @@ def has_types(items): # type: (Any) -> List[basestring] return [] -def linkto(item): +def linkto(item): # type: (Text) -> Text _, frg = urlparse.urldefrag(item) return "[%s](#%s)" % (frg, to_id(frg)) @@ -46,10 +46,10 @@ def __init__(self): # type: () -> None super(mistune.Renderer, self).__init__() self.options = {} - def header(self, text, level, raw=None): + def header(self, text, level, raw=None): # type: (Text, int, Any) -> Text return """%s""" % (level, to_id(text), text, level) - def table(self, header, body): + def table(self, header, body): # type: (Text, Text) -> Text return ( '\n%s\n' '\n%s\n
\n' @@ -136,7 +136,7 @@ def number_headings(toc, maindoc): # type: (ToC, str) -> str if not skip: m = re.match(r'^(#+) (.*)', line) - if m: + if m is not None: num = toc.add_entry(len(m.group(1)), m.group(2)) line = "%s %s %s" % (m.group(1), num, m.group(2)) line = re.sub(r'^(https?://\S+)', r'[\1](\1)', line) @@ -167,7 +167,7 @@ def __init__(self, toc, j, renderlist, redirects): self.docAfter = {} # type: Dict[str, List] self.rendered = set() # type: Set[str] self.redirects = redirects - self.title = None # type: str + self.title = None # type: Optional[str] for t in j: if "extends" in t: @@ -224,7 +224,7 @@ def typefmt(self, tp, # type: Any redirects, # type: Dict[str, str] nbsp=False, # type: bool - jsonldPredicate=None # type: Dict[str, str] + jsonldPredicate=None # type: Optional[Dict[str, str]] ): # type: (...) -> Union[str, unicode] global primitiveType @@ -237,7 +237,7 @@ def typefmt(self, if tp["type"] == "https://w3id.org/cwl/salad#array": ar = "array<%s>" % (self.typefmt( tp["items"], redirects, nbsp=True)) - if jsonldPredicate and "mapSubject" in jsonldPredicate: + if jsonldPredicate is not None and "mapSubject" in jsonldPredicate: if "mapPredicate" in jsonldPredicate: ar += " | map<%s.%s, %s.%s>" % (self.typefmt(tp["items"], redirects), jsonldPredicate[ @@ -251,7 +251,7 @@ def typefmt(self, self.typefmt(tp["items"], redirects)) return ar if tp["type"] in ("https://w3id.org/cwl/salad#record", "https://w3id.org/cwl/salad#enum"): - frg = schema.avro_name(tp["name"]) + frg = cast(Text, schema.avro_name(tp["name"])) if tp["name"] in redirects: return """
%s""" % (redirects[tp["name"]], frg) elif tp["name"] in self.typemap: @@ -267,9 +267,10 @@ def typefmt(self, return """%s""" % (primitiveType, schema.avro_name(str(tp))) else: _, frg = urlparse.urldefrag(tp) - if frg: + if frg is not '': tp = frg return """%s""" % (to_id(tp), tp) + raise Exception("We should not be here!") def render_type(self, f, depth): # type: (Dict[str, Any], int) -> None if f["name"] in self.rendered or f["name"] in self.redirects: @@ -328,9 +329,11 @@ def extendsfrom(item, ex): doc = "" if self.title is None and f["doc"]: - self.title = f["doc"][0:f["doc"].index("\n")] - if self.title.startswith('# '): - self.title = self.title[2:] + title = f["doc"][0:f["doc"].index("\n")] + if title.startswith('# '): + self.title = title[2:] + else: + self.title = title if f["type"] == "documentation": f["doc"] = number_headings(self.toc, f["doc"]) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 5d028ad4c..abc2a971d 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -27,8 +27,8 @@ from rdflib.namespace import RDF, RDFS, OWL from rdflib.plugins.parsers.notation3 import BadSyntax import xml.sax -from typing import (Any, AnyStr, Callable, cast, Dict, List, Iterable, Tuple, - TypeVar, Union) +from typing import (cast, Any, AnyStr, Callable, Dict, List, Iterable, + Optional, Set, Text, Tuple, TypeVar, Union) _logger = logging.getLogger("salad") ContextType = Dict[unicode, Union[Dict, unicode, Iterable[unicode]]] @@ -54,7 +54,9 @@ def file_uri(path, split_frag=False): # type: (str, bool) -> str def uri_file_path(url): # type: (str) -> str split = urlparse.urlsplit(url) if split.scheme == "file": - return urllib.url2pathname(str(split.path)) + ("#" + urllib.unquote(str(split.fragment)) if split.fragment else "") + return urllib.url2pathname( + str(split.path)) + ("#" + urllib.unquote(str(split.fragment)) + if bool(split.fragment) else "") else: raise ValueError("Not a file URI") @@ -64,20 +66,20 @@ def __init__(self, normalize=unicode): # type: (Callable) -> None super(NormDict, self).__init__() self.normalize = normalize - def __getitem__(self, key): + def __getitem__(self, key): # type: (Any) -> Any return super(NormDict, self).__getitem__(self.normalize(key)) - def __setitem__(self, key, value): + def __setitem__(self, key, value): # type: (Any, Any) -> Any return super(NormDict, self).__setitem__(self.normalize(key), value) - def __delitem__(self, key): + def __delitem__(self, key): # type: (Any) -> Any return super(NormDict, self).__delitem__(self.normalize(key)) - def __contains__(self, key): + def __contains__(self, key): # type: (Any) -> Any return super(NormDict, self).__contains__(self.normalize(key)) -def merge_properties(a, b): +def merge_properties(a, b): # type: (List[Any], List[Any]) -> Dict[Any, Any] c = {} for i in a: if i not in b: @@ -109,7 +111,10 @@ def urljoin(self, base_url, url): # type: (unicode, unicode) -> unicode class DefaultFetcher(Fetcher): - def __init__(self, cache, session): # type: (dict, requests.sessions.Session) -> None + def __init__(self, + cache, # type: Dict[Text, Text] + session # type: Optional[requests.sessions.Session] + ): # type: (...) -> None self.cache = cache self.session = session @@ -121,7 +126,7 @@ def fetch_text(self, url): split = urlparse.urlsplit(url) scheme, path = split.scheme, split.path - if scheme in [u'http', u'https'] and self.session: + if scheme in [u'http', u'https'] and self.session is not None: try: resp = self.session.get(url) resp.raise_for_status() @@ -151,7 +156,7 @@ def check_exists(self, url): # type: (unicode) -> bool split = urlparse.urlsplit(url) scheme, path = split.scheme, split.path - if scheme in [u'http', u'https'] and self.session: + if scheme in [u'http', u'https'] and self.session is not None: try: resp = self.session.head(url) resp.raise_for_status() @@ -163,7 +168,7 @@ def check_exists(self, url): # type: (unicode) -> bool else: raise ValueError('Unsupported scheme in url: %s' % url) - def urljoin(self, base_url, url): + def urljoin(self, base_url, url): # type: (Text, Text) -> Text return urlparse.urljoin(base_url, url) class Loader(object): @@ -171,7 +176,7 @@ def __init__(self, ctx, # type: ContextType schemagraph=None, # type: rdflib.graph.Graph foreign_properties=None, # type: Set[unicode] - idx=None, # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]] + idx=None, # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode, None]] cache=None, # type: Dict[unicode, Any] session=None, # type: requests.sessions.Session fetcher_constructor=None # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher] @@ -179,26 +184,22 @@ def __init__(self, # type: (...) -> None normalize = lambda url: urlparse.urlsplit(url).geturl() - self.idx = None # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode]] if idx is not None: self.idx = idx else: self.idx = NormDict(normalize) self.ctx = {} # type: ContextType - self.graph = None # type: Graph if schemagraph is not None: self.graph = schemagraph else: self.graph = rdflib.graph.Graph() - self.foreign_properties = None # type: Set[unicode] if foreign_properties is not None: self.foreign_properties = foreign_properties else: self.foreign_properties = set() - self.cache = None # type: Dict[unicode, Any] if cache is not None: self.cache = cache else: @@ -220,7 +221,7 @@ def __init__(self, else: self.session = session - if fetcher_constructor: + if fetcher_constructor is not None: self.fetcher_constructor = fetcher_constructor else: self.fetcher_constructor = DefaultFetcher @@ -229,18 +230,18 @@ def __init__(self, self.fetch_text = self.fetcher.fetch_text self.check_exists = self.fetcher.check_exists - self.url_fields = None # type: Set[unicode] - self.scoped_ref_fields = None # type: Dict[unicode, int] - self.vocab_fields = None # type: Set[unicode] - self.identifiers = None # type: Set[unicode] - self.identity_links = None # type: Set[unicode] - self.standalone = None # type: Set[unicode] - self.nolinkcheck = None # type: Set[unicode] + self.url_fields = set() # type: Set[unicode] + self.scoped_ref_fields = {} # type: Dict[unicode, int] + self.vocab_fields = set() # type: Set[unicode] + self.identifiers = set() # type: Set[unicode] + self.identity_links = set() # type: Set[unicode] + self.standalone = None # type: Optional[Set[unicode]] + self.nolinkcheck = set() # type: Set[unicode] self.vocab = {} # type: Dict[unicode, unicode] self.rvocab = {} # type: Dict[unicode, unicode] - self.idmap = None # type: Dict[unicode, Any] - self.mapPredicate = None # type: Dict[unicode, unicode] - self.type_dsl_fields = None # type: Set[unicode] + self.idmap = {} # type: Dict[unicode, Any] + self.mapPredicate = {} # type: Dict[unicode, unicode] + self.type_dsl_fields = set() # type: Set[unicode] self.add_context(ctx) @@ -258,23 +259,24 @@ def expand_url(self, if vocab_term and url in self.vocab: return url - if self.vocab and u":" in url: + if bool(self.vocab) and u":" in url: prefix = url.split(u":")[0] if prefix in self.vocab: url = self.vocab[prefix] + url[len(prefix) + 1:] split = urlparse.urlsplit(url) - if split.scheme or url.startswith(u"$(") or url.startswith(u"${"): + if (bool(split.scheme) or url.startswith(u"$(") + or url.startswith(u"${")): pass - elif scoped_id and not split.fragment: + elif scoped_id and not bool(split.fragment): splitbase = urlparse.urlsplit(base_url) frg = u"" - if splitbase.fragment: + if bool(splitbase.fragment): frg = splitbase.fragment + u"/" + split.path else: frg = split.path - pt = splitbase.path if splitbase.path else "/" + pt = splitbase.path if splitbase.path != '' else "/" url = urlparse.urlunsplit( (splitbase.scheme, splitbase.netloc, pt, splitbase.query, frg)) elif scoped_ref is not None and not split.fragment: @@ -336,7 +338,7 @@ def add_schemas(self, ns, base_url): def add_context(self, newcontext, baseuri=""): # type: (ContextType, unicode) -> None - if self.vocab: + if bool(self.vocab): raise validate.ValidationException( "Refreshing context that already has stuff in it") @@ -402,63 +404,69 @@ def resolve_ref(self, base_url=None, # type: unicode checklinks=True # type: bool ): - # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, unicode], Dict[unicode, Any]] + # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, unicode, None], Dict[unicode, Any]] - obj = None # type: CommentedMap - resolved_obj = None # type: Union[CommentedMap, CommentedSeq, unicode] + lref = ref # type: Union[CommentedMap, CommentedSeq, unicode, None] + obj = None # type: Optional[CommentedMap] + resolved_obj = None # type: Optional[Union[CommentedMap, CommentedSeq, unicode]] inc = False - mixin = None # type: Dict[unicode, Any] + mixin = None # type: Optional[Dict[unicode, Any]] if not base_url: base_url = file_uri(os.getcwd()) + "/" - if isinstance(ref, (str, unicode)) and os.sep == "\\": + if isinstance(lref, (str, unicode)) and os.sep == "\\": # Convert Windows path separator in ref - ref = ref.replace("\\", "/") + lref = lref.replace("\\", "/") sl = SourceLine(obj, None, ValueError) # If `ref` is a dict, look for special directives. - if isinstance(ref, CommentedMap): - obj = ref + if isinstance(lref, CommentedMap): + obj = lref if "$import" in obj: sl = SourceLine(obj, "$import", RuntimeError) if len(obj) == 1: - ref = obj[u"$import"] + lref = obj[u"$import"] obj = None else: raise sl.makeError( - u"'$import' must be the only field in %s" % (unicode(obj))) + u"'$import' must be the only field in %s" + % (unicode(obj))) elif "$include" in obj: sl = SourceLine(obj, "$include", RuntimeError) if len(obj) == 1: - ref = obj[u"$include"] + lref = obj[u"$include"] inc = True obj = None else: raise sl.makeError( - u"'$include' must be the only field in %s" % (unicode(obj))) + u"'$include' must be the only field in %s" + % (unicode(obj))) elif "$mixin" in obj: sl = SourceLine(obj, "$mixin", RuntimeError) - ref = obj[u"$mixin"] + lref = obj[u"$mixin"] mixin = obj obj = None else: - ref = None + lref = None for identifier in self.identifiers: if identifier in obj: - ref = obj[identifier] + lref = obj[identifier] break - if not ref: + if not lref: raise sl.makeError( - u"Object `%s` does not have identifier field in %s" % (relname(obj), self.identifiers)) + u"Object `%s` does not have identifier field in %s" + % (relname(obj), self.identifiers)) - if not isinstance(ref, (str, unicode)): - raise ValueError(u"Expected CommentedMap or string, got %s: `%s`" % (type(ref), unicode(ref))) + if not isinstance(lref, (str, unicode)): + raise ValueError(u"Expected CommentedMap or string, got %s: `%s`" + % (type(lref), unicode(lref))) - url = self.expand_url(ref, base_url, scoped_id=(obj is not None)) + url = self.expand_url(lref, base_url, scoped_id=(obj is not None)) # Has this reference been loaded already? if url in self.idx and (not mixin): - return self.idx[url], {} + return cast(Union[CommentedMap, CommentedSeq, unicode], + self.idx[url]), {} sl.raise_type = RuntimeError with sl: @@ -467,7 +475,7 @@ def resolve_ref(self, return self.fetch_text(url), {} doc = None - if obj: + if isinstance(obj, collections.MutableMapping): for identifier in self.identifiers: obj[identifier] = url doc_url = url @@ -479,15 +487,15 @@ def resolve_ref(self, # so if we didn't find the reference earlier then it must not # exist. raise validate.ValidationException( - u"Reference `#%s` not found in file `%s`." % (frg, doc_url)) + u"Reference `#%s` not found in file `%s`." + % (frg, doc_url)) doc = self.fetch(doc_url, inject_ids=(not mixin)) # Recursively expand urls and resolve directives - if mixin: + if bool(mixin): doc = copy.deepcopy(doc) doc.update(mixin) del doc["$mixin"] - url = None resolved_obj, metadata = self.resolve_all( doc, base_url, file_base=doc_url, checklinks=checklinks) else: @@ -496,7 +504,7 @@ def resolve_ref(self, # Requested reference should be in the index now, otherwise it's a bad # reference - if url is not None: + if not bool(mixin): if url in self.idx: resolved_obj = self.idx[url] else: @@ -529,7 +537,7 @@ def _resolve_idmap(self, ls = CommentedSeq() for k in sorted(idmapFieldValue.keys()): val = idmapFieldValue[k] - v = None # type: CommentedMap + v = None # type: Optional[CommentedMap] if not isinstance(val, CommentedMap): if idmapField in loader.mapPredicate: v = CommentedMap( @@ -574,13 +582,13 @@ def _type_dsl(self, return t first = m.group(1) second = third = None - if m.group(2): + if bool(m.group(2)): second = CommentedMap((("type", "array"), ("items", first))) second.lc.add_kv_line_col("type", lc) second.lc.add_kv_line_col("items", lc) second.lc.filename = filename - if m.group(3): + if bool(m.group(3)): third = CommentedSeq([u"null", second or first]) third.lc.add_kv_line_col(0, lc) third.lc.add_kv_line_col(1, lc) @@ -698,7 +706,7 @@ def resolve_all(self, file_base=None, # type: unicode checklinks=True # type: bool ): - # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, unicode], Dict[unicode, Any]] + # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, unicode, None], Dict[unicode, Any]] loader = self metadata = CommentedMap() # type: CommentedMap if file_base is None: @@ -719,14 +727,14 @@ def resolve_all(self, else: return (document, metadata) - newctx = None # type: Loader + newctx = None # type: Optional[Loader] if isinstance(document, CommentedMap): # Handle $base, $profile, $namespaces, $schemas and $graph if u"$base" in document: base_url = document[u"$base"] if u"$profile" in document: - if not newctx: + if newctx is None: newctx = SubLoader(self) prof = self.fetch(document[u"$profile"]) newctx.add_namespaces(document.get(u"$namespaces", {})) @@ -734,16 +742,16 @@ def resolve_all(self, u"$schemas", []), document[u"$profile"]) if u"$namespaces" in document: - if not newctx: + if newctx is None: newctx = SubLoader(self) newctx.add_namespaces(document[u"$namespaces"]) if u"$schemas" in document: - if not newctx: + if newctx is None: newctx = SubLoader(self) newctx.add_schemas(document[u"$schemas"], file_base) - if newctx: + if newctx is not None: loader = newctx if u"$graph" in document: @@ -833,7 +841,8 @@ def fetch(self, url, inject_ids=True): # type: (unicode, bool) -> Any add_lc_filename(result, url) except yaml.parser.ParserError as e: raise validate.ValidationException("Syntax error %s" % (e)) - if isinstance(result, CommentedMap) and inject_ids and self.identifiers: + if (isinstance(result, CommentedMap) and inject_ids + and bool(self.identifiers)): for identifier in self.identifiers: if identifier not in result: result[identifier] = url @@ -875,7 +884,8 @@ def validate_link(self, field, link, docid): return link if isinstance(link, (str, unicode)): if field in self.vocab_fields: - if link not in self.vocab and link not in self.idx and link not in self.rvocab: + if (link not in self.vocab and link not in self.idx + and link not in self.rvocab): if field in self.scoped_ref_fields: return self.validate_scoped(field, link, docid) elif not self.check_exists(link): @@ -886,7 +896,8 @@ def validate_link(self, field, link, docid): return self.validate_scoped(field, link, docid) elif not self.check_exists(link): raise validate.ValidationException( - "Field `%s` contains undefined reference to `%s`" % (field, link)) + "Field `%s` contains undefined reference to `%s`" + % (field, link)) elif isinstance(link, CommentedSeq): errors = [] for n, i in enumerate(link): @@ -894,26 +905,28 @@ def validate_link(self, field, link, docid): link[n] = self.validate_link(field, i, docid) except validate.ValidationException as v: errors.append(v) - if errors: + if bool(errors): raise validate.ValidationException( "\n".join([unicode(e) for e in errors])) elif isinstance(link, CommentedMap): self.validate_links(link, docid) else: raise validate.ValidationException( - "`%s` field is %s, expected string, list, or a dict." % (field, type(link).__name__)) + "`%s` field is %s, expected string, list, or a dict." + % (field, type(link).__name__)) return link - def getid(self, d): # type: (Any) -> unicode + def getid(self, d): # type: (Any) -> Optional[Text] if isinstance(d, dict): for i in self.identifiers: if i in d: - if isinstance(d[i], (str, unicode)): - return d[i] + idd = d[i] + if isinstance(idd, (str, unicode)): + return idd return None def validate_links(self, document, base_url): - # type: (Union[CommentedMap, CommentedSeq, unicode], unicode) -> None + # type: (Union[CommentedMap, CommentedSeq, unicode, None], unicode) -> None docid = self.getid(document) if not docid: docid = base_url @@ -944,9 +957,9 @@ def validate_links(self, document, base_url): except validate.ValidationException as v: if key not in self.nolinkcheck: docid2 = self.getid(val) - if docid2: - errors.append(sl.makeError("checking object `%s`\n%s" % ( - relname(docid2), validate.indent(unicode(v))))) + if docid2 is not None: + errors.append(sl.makeError("checking object `%s`\n%s" + % (relname(docid2), validate.indent(unicode(v))))) else: if isinstance(key, basestring): errors.append(sl.makeError("checking field `%s`\n%s" % ( @@ -955,7 +968,7 @@ def validate_links(self, document, base_url): errors.append(sl.makeError("checking item\n%s" % ( validate.indent(unicode(v))))) - if errors: + if bool(errors): if len(errors) > 1: raise validate.ValidationException( u"\n".join([unicode(e) for e in errors])) diff --git a/schema_salad/schema.py b/schema_salad/schema.py index fe3542d24..cf8210d7f 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -20,7 +20,7 @@ from .aslist import aslist from . import jsonld_context from .sourceline import SourceLine, strip_dup_lineno, add_lc_filename, bullets, relname -from typing import Any, AnyStr, cast, Dict, List, Tuple, TypeVar, Union +from typing import cast, Any, AnyStr, Dict, List, Set, Tuple, TypeVar, Union from ruamel.yaml.comments import CommentedSeq, CommentedMap _logger = logging.getLogger("salad") @@ -243,14 +243,14 @@ def load_and_validate(document_loader, # type: Loader except validate.ValidationException as v: validationErrors += unicode(v) - if validationErrors: + if validationErrors != u"": raise validate.ValidationException(validationErrors) return data, metadata def validate_doc(schema_names, # type: Names - doc, # type: Union[Dict[unicode, Any], List[Dict[unicode, Any]], unicode] + doc, # type: Union[Dict[unicode, Any], List[Dict[unicode, Any]], unicode, None] loader, # type: Loader strict, # type: bool source=None @@ -323,7 +323,7 @@ def validate_doc(schema_names, # type: Names break anyerrors.append(u"%s\n%s" % (objerr, validate.indent(bullets(errors, "- ")))) - if anyerrors: + if len(anyerrors) > 0: raise validate.ValidationException( strip_dup_lineno(bullets(anyerrors, "* "))) @@ -371,7 +371,7 @@ def replace_type(items, spec, loader, found): def avro_name(url): # type: (AnyStr) -> AnyStr doc_url, frg = urlparse.urldefrag(url) - if frg: + if frg != '': if '/' in frg: return frg[frg.rindex('/') + 1:] else: @@ -387,7 +387,7 @@ def make_valid_avro(items, # type: Avro found, # type: Set[unicode] union=False # type: bool ): - # type: (...) -> Union[Avro, Dict] + # type: (...) -> Union[Avro, Dict, unicode] if isinstance(items, dict): items = copy.copy(items) if items.get("name"): @@ -402,7 +402,7 @@ def make_valid_avro(items, # type: Avro "Named schemas must have a non-empty name: %s" % items) if items["name"] in found: - return items["name"] + return cast(unicode, items["name"]) else: found.add(items["name"]) for n in ("type", "items", "values", "fields"): @@ -465,7 +465,7 @@ def extend_and_specialize(items, loader): basetype = copy.copy(types[ex]) if t["type"] == "record": - if spec: + if len(spec) > 0: basetype["fields"] = replace_type( basetype.get("fields", []), spec, loader, set()) diff --git a/schema_salad/sourceline.py b/schema_salad/sourceline.py index 0589b3efb..4c20e6a0f 100644 --- a/schema_salad/sourceline.py +++ b/schema_salad/sourceline.py @@ -46,7 +46,7 @@ def indent(v, nolead=False, shift=u" ", bullet=u" "): # type: (Text, bool, Te else: def lineno(i, l): # type: (int, Text) -> Text r = lineno_re.match(l) - if r: + if bool(r): return r.group(1) + (bullet if i == 0 else shift) + r.group(2) else: return (bullet if i == 0 else shift) + l @@ -137,10 +137,14 @@ def __init__(self, item, key=None, raise_type=unicode): # type: (Any, Any, Call self.key = key self.raise_type = raise_type - def __enter__(self): + def __enter__(self): # type: () -> SourceLine return self - def __exit__(self, exc_type, exc_value, traceback): + def __exit__(self, + exc_type, # type: Any + exc_value, # type: Any + traceback # type: Any + ): # -> Any if not exc_value: return raise self.makeError(unicode(exc_value)) @@ -158,7 +162,7 @@ def makeError(self, msg): # type: (Text) -> Any (self.item.lc.data[self.key][0] or 0)+1, (self.item.lc.data[self.key][1] or 0)+1) for m in msg.splitlines(): - if lineno_re.match(m): + if bool(lineno_re.match(m)): errs.append(m) else: errs.append("%s %s" % (lead, m)) diff --git a/schema_salad/tests/util.py b/schema_salad/tests/util.py index 0fcaf526e..6353adda4 100644 --- a/schema_salad/tests/util.py +++ b/schema_salad/tests/util.py @@ -1,7 +1,8 @@ from pkg_resources import Requirement, resource_filename, ResolutionError # type: ignore +from typing import Optional, Text import os -def get_data(filename): +def get_data(filename): # type: (Text) -> Optional[Text] filepath = None try: filepath = resource_filename( diff --git a/schema_salad/validate.py b/schema_salad/validate.py index 6e9e936a0..de6a7f218 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -6,7 +6,7 @@ import re import logging -from typing import Any, Union +from typing import Any, List, Set, Union from .sourceline import SourceLine, lineno_re, bullets, indent _logger = logging.getLogger("salad") @@ -222,7 +222,7 @@ def validate_ex(expected_schema, # type: Schema raise except ValidationException as e: errors.append(unicode(e)) - if errors: + if bool(errors): raise ValidationException(bullets(["tried %s but\n%s" % (friendly( checked[i]), indent(errors[i])) for i in range(0, len(errors))], "- ")) else: @@ -307,7 +307,7 @@ def validate_ex(expected_schema, # type: Schema else: logger.warn(err) - if errors: + if bool(errors): if raise_ex: if classmatch: raise ClassValidationException(bullets(errors, "* ")) diff --git a/tox.ini b/tox.ini index 8a81fbaa0..26303bdc0 100644 --- a/tox.ini +++ b/tox.ini @@ -14,7 +14,7 @@ deps = -rrequirements.txt commands = make mypy whitelist_externals = make deps = - mypy==0.470 + mypy>=0.501 typed-ast==0.6.3 -rrequirements.txt @@ -22,7 +22,7 @@ deps = commands = make mypy whitelist_externals = make deps = - mypy==0.470 + mypy>=0.501 typed-ast==0.6.3 -rrequirements.txt diff --git a/typeshed/2.7/rdflib/graph.pyi b/typeshed/2.7/rdflib/graph.pyi index 6a36968bd..03a465985 100644 --- a/typeshed/2.7/rdflib/graph.pyi +++ b/typeshed/2.7/rdflib/graph.pyi @@ -2,7 +2,7 @@ # # NOTE: This dynamically typed stub was automatically generated by stubgen. -from typing import Any, AnyStr, Union, IO, Tuple, Iterator +from typing import Any, AnyStr, Dict, Union, IO, Tuple, Iterator from StringIO import StringIO as BytesIO from rdflib.term import Node, URIRef from rdflib.store import Store @@ -26,7 +26,7 @@ class Graph(Node): def add(self, __tuple_arg_2: Tuple[Node, Node, Node]) -> None: ... def addN(self, quads): ... def remove(self, __tuple_arg_2: Tuple[Union[AnyStr, URIRef], AnyStr, Union[AnyStr, URIRef]]) -> None: ... - def triples(self, __tuple_arg_2: Tuple[Union[AnyStr, URIRef], AnyStr, Union[AnyStr, URIRef]]) -> Iterator[Tuple[AnyStr, AnyStr, AnyStr]]: ... + def triples(self, __tuple_arg_2: Tuple[Union[AnyStr, URIRef, None], Union[AnyStr, None], Union[AnyStr, URIRef, None]]) -> Iterator[Tuple[AnyStr, AnyStr, AnyStr]]: ... def __getitem__(self, item): ... def __len__(self): ... def __iter__(self): ... From f1d824dcd14827fa426924464fcc929fb59fc78b Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Tue, 7 Mar 2017 11:34:48 +0200 Subject: [PATCH 045/116] Don't switch to mypy v0.501 yet But keep the improvements --- Makefile | 4 +++- schema_salad/ref_resolver.py | 9 ++++----- tox.ini | 4 ++-- 3 files changed, 9 insertions(+), 8 deletions(-) diff --git a/Makefile b/Makefile index 29c6b736a..98d4118cb 100644 --- a/Makefile +++ b/Makefile @@ -173,7 +173,9 @@ mypy: ${PYSOURCES} rm -Rf typeshed/2.7/ruamel/yaml ln -s $(shell python -c 'from __future__ import print_function; import ruamel.yaml; import os.path; print(os.path.dirname(ruamel.yaml.__file__))') \ typeshed/2.7/ruamel/ - MYPYPATH=typeshed/2.7 mypy --py2 --strict schema_salad/*.py + MYPYPATH=typeshed/2.7 mypy --py2 --disallow-untyped-calls \ + --fast-parser --warn-redundant-casts --warn-unused-ignores \ + schema_salad jenkins: rm -Rf env && virtualenv env diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index abc2a971d..181ed2c45 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -67,16 +67,16 @@ def __init__(self, normalize=unicode): # type: (Callable) -> None self.normalize = normalize def __getitem__(self, key): # type: (Any) -> Any - return super(NormDict, self).__getitem__(self.normalize(key)) + return super(NormDict, self).__getitem__(self.normalize(key)) # type: ignore def __setitem__(self, key, value): # type: (Any, Any) -> Any return super(NormDict, self).__setitem__(self.normalize(key), value) def __delitem__(self, key): # type: (Any) -> Any - return super(NormDict, self).__delitem__(self.normalize(key)) + return super(NormDict, self).__delitem__(self.normalize(key)) # type: ignore def __contains__(self, key): # type: (Any) -> Any - return super(NormDict, self).__contains__(self.normalize(key)) + return super(NormDict, self).__contains__(self.normalize(key)) # type: ignore def merge_properties(a, b): # type: (List[Any], List[Any]) -> Dict[Any, Any] @@ -465,8 +465,7 @@ def resolve_ref(self, url = self.expand_url(lref, base_url, scoped_id=(obj is not None)) # Has this reference been loaded already? if url in self.idx and (not mixin): - return cast(Union[CommentedMap, CommentedSeq, unicode], - self.idx[url]), {} + return self.idx[url], {} sl.raise_type = RuntimeError with sl: diff --git a/tox.ini b/tox.ini index 26303bdc0..8a81fbaa0 100644 --- a/tox.ini +++ b/tox.ini @@ -14,7 +14,7 @@ deps = -rrequirements.txt commands = make mypy whitelist_externals = make deps = - mypy>=0.501 + mypy==0.470 typed-ast==0.6.3 -rrequirements.txt @@ -22,7 +22,7 @@ deps = commands = make mypy whitelist_externals = make deps = - mypy>=0.501 + mypy==0.470 typed-ast==0.6.3 -rrequirements.txt From 39516e510f090b9056397f97b25fc968a36542ec Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 8 Mar 2017 11:56:31 -0500 Subject: [PATCH 046/116] Add map and typedsl resolution documentation. (#96) * Add map and typedsl resolution documentation. * Improve schema layout in spec. * Update Salad spec to mark as v1.0 instead of draft-1. --- schema_salad/makedoc.py | 30 +++++++++------- schema_salad/metaschema/map_res.yml | 36 +++++++++++++++++++ schema_salad/metaschema/map_res_proc.yml | 12 +++++++ schema_salad/metaschema/map_res_schema.yml | 30 ++++++++++++++++ schema_salad/metaschema/map_res_src.yml | 8 +++++ schema_salad/metaschema/metaschema.yml | 7 ++++ schema_salad/metaschema/metaschema_base.yml | 7 ++++ schema_salad/metaschema/salad.md | 19 ++++++---- schema_salad/metaschema/typedsl_res.yml | 33 +++++++++++++++++ schema_salad/metaschema/typedsl_res_proc.yml | 26 ++++++++++++++ .../metaschema/typedsl_res_schema.yml | 17 +++++++++ schema_salad/metaschema/typedsl_res_src.yml | 9 +++++ schema_salad/schema.py | 10 +++++- setup.py | 2 +- 14 files changed, 224 insertions(+), 22 deletions(-) create mode 100644 schema_salad/metaschema/map_res.yml create mode 100644 schema_salad/metaschema/map_res_proc.yml create mode 100644 schema_salad/metaschema/map_res_schema.yml create mode 100644 schema_salad/metaschema/map_res_src.yml create mode 100644 schema_salad/metaschema/typedsl_res.yml create mode 100644 schema_salad/metaschema/typedsl_res_proc.yml create mode 100644 schema_salad/metaschema/typedsl_res_schema.yml create mode 100644 schema_salad/metaschema/typedsl_res_src.yml diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index 259490d6c..e4d789d18 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -158,8 +158,8 @@ def fix_doc(doc): # type: (Union[List[str], str]) -> str class RenderType(object): - def __init__(self, toc, j, renderlist, redirects): - # type: (ToC, List[Dict], str, Dict) -> None + def __init__(self, toc, j, renderlist, redirects, primitiveType): + # type: (ToC, List[Dict], str, Dict, str) -> None self.typedoc = StringIO() self.toc = toc self.subs = {} # type: Dict[str, str] @@ -168,6 +168,7 @@ def __init__(self, toc, j, renderlist, redirects): self.rendered = set() # type: Set[str] self.redirects = redirects self.title = None # type: Optional[str] + self.primitiveType = primitiveType for t in j: if "extends" in t: @@ -227,7 +228,6 @@ def typefmt(self, jsonldPredicate=None # type: Optional[Dict[str, str]] ): # type: (...) -> Union[str, unicode] - global primitiveType if isinstance(tp, list): if nbsp and len(tp) <= 3: return " | ".join([self.typefmt(n, redirects, jsonldPredicate=jsonldPredicate) for n in tp]) @@ -264,7 +264,7 @@ def typefmt(self, if str(tp) in redirects: return """%s""" % (redirects[tp], redirects[tp]) elif str(tp) in basicTypes: - return """%s""" % (primitiveType, schema.avro_name(str(tp))) + return """%s""" % (self.primitiveType, schema.avro_name(str(tp))) else: _, frg = urlparse.urldefrag(tp) if frg is not '': @@ -277,6 +277,9 @@ def render_type(self, f, depth): # type: (Dict[str, Any], int) -> None return self.rendered.add(f["name"]) + if f.get("abstract"): + return + if "doc" not in f: f["doc"] = "" @@ -324,7 +327,7 @@ def extendsfrom(item, ex): _, frg = urlparse.urldefrag(f["name"]) num = self.toc.add_entry(depth, frg) - doc = "## %s %s\n" % (num, frg) + doc = "%s %s %s\n" % (("#" * depth), num, frg) else: doc = "" @@ -413,12 +416,12 @@ def extendsfrom(item, ex): self.render_type(self.typemap[s], depth) -def avrold_doc(j, outdoc, renderlist, redirects, brand, brandlink): - # type: (List[Dict[unicode, Any]], IO[Any], str, Dict, str, str) -> None +def avrold_doc(j, outdoc, renderlist, redirects, brand, brandlink, primtype): + # type: (List[Dict[unicode, Any]], IO[Any], str, Dict, str, str, str) -> None toc = ToC() toc.start_numbering = False - rt = RenderType(toc, j, renderlist, redirects) + rt = RenderType(toc, j, renderlist, redirects, primtype) content = rt.typedoc.getvalue() # type: unicode outdoc.write(""" @@ -496,8 +499,7 @@ def avrold_doc(j, outdoc, renderlist, redirects, brand, brandlink): """) -if __name__ == "__main__": - +def main(): # type: () -> None parser = argparse.ArgumentParser() parser.add_argument("schema") parser.add_argument('--only', action='append') @@ -526,10 +528,12 @@ def avrold_doc(j, outdoc, renderlist, redirects, brand, brandlink): s.append(j) else: raise ValueError("Schema must resolve to a list or a dict") - - primitiveType = args.primtype redirect = {} for r in (args.redirect or []): redirect[r.split("=")[0]] = r.split("=")[1] renderlist = args.only if args.only else [] - avrold_doc(s, sys.stdout, renderlist, redirect, args.brand, args.brandlink) + avrold_doc(s, sys.stdout, renderlist, redirect, args.brand, args.brandlink, args.primtype) + + +if __name__ == "__main__": + main() diff --git a/schema_salad/metaschema/map_res.yml b/schema_salad/metaschema/map_res.yml new file mode 100644 index 000000000..bbcee486b --- /dev/null +++ b/schema_salad/metaschema/map_res.yml @@ -0,0 +1,36 @@ +- | + ## Identifier maps + + The schema may designate certain fields as having a `mapSubject`. If the + value of the field is a JSON object, it must be transformed into an array of + JSON objects. Each key-value pair from the source JSON object is a list + item, each list item must be a JSON objects, and the value of the key is + assigned to the field specified by `mapSubject`. + + Fields which have `mapSubject` specified may also supply a `mapPredicate`. + If the value of a map item is not a JSON object, the item is transformed to a + JSON object with the key assigned to the field specified by `mapSubject` and + the value assigned to the field specified by `mapPredicate`. + + ### Identifier map example + + Given the following schema: + + ``` +- $include: map_res_schema.yml +- | + ``` + + Process the following example: + + ``` +- $include: map_res_src.yml +- | + ``` + + This becomes: + + ``` +- $include: map_res_proc.yml +- | + ``` diff --git a/schema_salad/metaschema/map_res_proc.yml b/schema_salad/metaschema/map_res_proc.yml new file mode 100644 index 000000000..52e9c2204 --- /dev/null +++ b/schema_salad/metaschema/map_res_proc.yml @@ -0,0 +1,12 @@ +{ + "mapped": [ + { + "value": "daphne", + "key": "fred" + }, + { + "value": "scooby", + "key": "shaggy" + } + ] +} \ No newline at end of file diff --git a/schema_salad/metaschema/map_res_schema.yml b/schema_salad/metaschema/map_res_schema.yml new file mode 100644 index 000000000..086cc29ba --- /dev/null +++ b/schema_salad/metaschema/map_res_schema.yml @@ -0,0 +1,30 @@ +{ + "$graph": [{ + "name": "MappedType", + "type": "record", + "documentRoot": true, + "fields": [{ + "name": "mapped", + "type": { + "type": "array", + "items": "ExampleRecord" + }, + "jsonldPredicate": { + "mapSubject": "key", + "mapPredicate": "value" + } + }], + }, + { + "name": "ExampleRecord", + "type": "record", + "fields": [{ + "name": "key", + "type": "string" + }, { + "name": "value", + "type": "string" + } + ] + }] +} diff --git a/schema_salad/metaschema/map_res_src.yml b/schema_salad/metaschema/map_res_src.yml new file mode 100644 index 000000000..9df0c3566 --- /dev/null +++ b/schema_salad/metaschema/map_res_src.yml @@ -0,0 +1,8 @@ +{ + "mapped": { + "shaggy": { + "value": "scooby" + }, + "fred": "daphne" + } +} \ No newline at end of file diff --git a/schema_salad/metaschema/metaschema.yml b/schema_salad/metaschema/metaschema.yml index d5472e968..a1515258a 100644 --- a/schema_salad/metaschema/metaschema.yml +++ b/schema_salad/metaschema/metaschema.yml @@ -18,6 +18,8 @@ $graph: - $import: link_res.yml - $import: vocab_res.yml - $include: import_include.md + - $import: map_res.yml + - $import: typedsl_res.yml - name: "Link_Validation" type: documentation @@ -154,6 +156,7 @@ $graph: - name: NamedType type: record abstract: true + docParent: "#Schema" fields: - name: name type: string @@ -164,6 +167,7 @@ $graph: - name: DocType type: record abstract: true + docParent: "#Schema" fields: - name: doc type: @@ -240,6 +244,7 @@ $graph: - name: SaladRecordSchema + docParent: "#Schema" type: record extends: [NamedType, RecordSchema, SchemaDefinedType] documentRoot: true @@ -277,6 +282,7 @@ $graph: mapPredicate: specializeTo - name: SaladEnumSchema + docParent: "#Schema" type: record extends: [EnumSchema, SchemaDefinedType] documentRoot: true @@ -297,6 +303,7 @@ $graph: - name: Documentation type: record + docParent: "#Schema" extends: [NamedType, DocType] documentRoot: true doc: | diff --git a/schema_salad/metaschema/metaschema_base.yml b/schema_salad/metaschema/metaschema_base.yml index 73511d141..d8bf0a3c3 100644 --- a/schema_salad/metaschema/metaschema_base.yml +++ b/schema_salad/metaschema/metaschema_base.yml @@ -8,6 +8,12 @@ $namespaces: xsd: "http://www.w3.org/2001/XMLSchema#" $graph: + +- name: "Schema" + type: documentation + doc: | + # Schema + - name: PrimitiveType type: enum symbols: @@ -35,6 +41,7 @@ $graph: - name: Any type: enum symbols: ["#Any"] + docAfter: "#PrimitiveType" doc: | The **Any** type validates for any non-null value. diff --git a/schema_salad/metaschema/salad.md b/schema_salad/metaschema/salad.md index 6dd3e6a1c..2d4681ee3 100644 --- a/schema_salad/metaschema/salad.md +++ b/schema_salad/metaschema/salad.md @@ -26,7 +26,7 @@ Web. This document is the product of the [Common Workflow Language working group](https://groups.google.com/forum/#!forum/common-workflow-language). The -latest version of this document is available in the "schema_salad" directory at +latest version of this document is available in the "schema_salad" repository at https://github.com/common-workflow-language/schema_salad @@ -38,7 +38,7 @@ under the terms of the Apache License, version 2.0. # Introduction The JSON data model is an extremely popular way to represent structured -data. It is attractive because of it's relative simplicity and is a +data. It is attractive because of its relative simplicity and is a natural fit with the standard types of many programming languages. However, this simplicity means that basic JSON lacks expressive features useful for working with complex data structures and document formats, such @@ -70,12 +70,17 @@ and RDF schema, and production of RDF triples by applying the JSON-LD context. The schema language also provides for robust support of inline documentation. -## Introduction to draft 1 +## Introduction to v1.0 -This is the first version of Schema Salad. It is developed concurrently -with draft 3 of the Common Workflow Language for use in specifying the -Common Workflow Language, however Schema Salad is intended to be useful to -a broader audience. +This is the second version of of the Schema Salad specification. It is +developed concurrently with v1.0 of the Common Workflow Language for use in +specifying the Common Workflow Language, however Schema Salad is intended to be +useful to a broader audience. Compared to the draft-1 schema salad +specification, the following changes have been made: + +* Use of [mapSubject and mapPredicate](#Identifier_maps) to transform maps to lists of records. +* Resolution of the [domain Specific Language for types](#Domain_Specific_Language_for_types) +* Consolidation of the formal [schema into section 5](#Schema). ## References to Other Specifications diff --git a/schema_salad/metaschema/typedsl_res.yml b/schema_salad/metaschema/typedsl_res.yml new file mode 100644 index 000000000..b1a0c1d51 --- /dev/null +++ b/schema_salad/metaschema/typedsl_res.yml @@ -0,0 +1,33 @@ +- | + ## Domain Specific Language for types + + Fields may be tagged `typeDSL: true`. If so, the field is expanded using the + following micro-DSL for schema salad types: + + * If the type ends with a question mark `?` it is expanded to a union with `null` + * If the type ends with square brackets `[]` it is expanded to an array with items of the preceeding type symbol + * The type may end with both `[]?` to indicate it is an optional array. + * Identifier resolution is applied after type DSL expansion. + + ### Type DSL example + + Given the following schema: + + ``` +- $include: typedsl_res_schema.yml +- | + ``` + + Process the following example: + + ``` +- $include: typedsl_res_src.yml +- | + ``` + + This becomes: + + ``` +- $include: typedsl_res_proc.yml +- | + ``` diff --git a/schema_salad/metaschema/typedsl_res_proc.yml b/schema_salad/metaschema/typedsl_res_proc.yml new file mode 100644 index 000000000..8097a6ac5 --- /dev/null +++ b/schema_salad/metaschema/typedsl_res_proc.yml @@ -0,0 +1,26 @@ +[ + { + "extype": "string" + }, + { + "extype": [ + "null", + "string" + ] + }, + { + "extype": { + "type": "array", + "items": "string" + } + }, + { + "extype": [ + "null", + { + "type": "array", + "items": "string" + } + ] + } +] diff --git a/schema_salad/metaschema/typedsl_res_schema.yml b/schema_salad/metaschema/typedsl_res_schema.yml new file mode 100644 index 000000000..52459a657 --- /dev/null +++ b/schema_salad/metaschema/typedsl_res_schema.yml @@ -0,0 +1,17 @@ +{ + "$graph": [ + {"$import": "metaschema_base.yml"}, + { + "name": "TypeDSLExample", + "type": "record", + "documentRoot": true, + "fields": [{ + "name": "extype", + "type": "string", + "jsonldPredicate": { + _type: "@vocab", + "typeDSL": true + } + }] + }] +} diff --git a/schema_salad/metaschema/typedsl_res_src.yml b/schema_salad/metaschema/typedsl_res_src.yml new file mode 100644 index 000000000..6ecbd50d1 --- /dev/null +++ b/schema_salad/metaschema/typedsl_res_src.yml @@ -0,0 +1,9 @@ +[{ + "extype": "string" +}, { + "extype": "string?" +}, { + "extype": "string[]" +}, { + "extype": "string[]?" +}] diff --git a/schema_salad/schema.py b/schema_salad/schema.py index cf8210d7f..62be6f133 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -45,7 +45,15 @@ 'link_res_proc.yml', 'vocab_res_schema.yml', 'vocab_res_src.yml', - 'vocab_res_proc.yml') + 'vocab_res_proc.yml', + 'map_res.yml', + 'map_res_schema.yml', + 'map_res_src.yml', + 'map_res_proc.yml', + 'typedsl_res.yml', + 'typedsl_res_schema.yml', + 'typedsl_res_src.yml', + 'typedsl_res_proc.yml') def get_metaschema(): diff --git a/setup.py b/setup.py index c72e827ba..0b7a4994c 100755 --- a/setup.py +++ b/setup.py @@ -64,7 +64,7 @@ test_suite='tests', tests_require=['pytest'], entry_points={ - 'console_scripts': ["schema-salad-tool=schema_salad.main:main"] + 'console_scripts': ["schema-salad-tool=schema_salad.main:main", "schema-salad-doc=schema_salad.makedoc:main"] }, zip_safe=True, cmdclass={'egg_info': tagger}, From 0114153420d3a2d84305eda0759a4842ceae3144 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Wed, 8 Mar 2017 12:19:42 -0500 Subject: [PATCH 047/116] In vocab flag (#97) * Mark named items as "not in vocabulary" so that they can only be referenced by fully qualified URI and not by short name. Enables adding validated spec extensions without adding new keywords. --- schema_salad/jsonld_context.py | 21 ++++++++------------- schema_salad/metaschema/metaschema.yml | 6 ++++++ schema_salad/schema.py | 3 ++- setup.py | 2 +- 4 files changed, 17 insertions(+), 15 deletions(-) diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py index 43a2a38e6..a451769c4 100755 --- a/schema_salad/jsonld_context.py +++ b/schema_salad/jsonld_context.py @@ -69,11 +69,6 @@ def pred(datatype, # type: Dict[str, Union[Dict, str]] "Dictionaries") else: raise Exception("jsonldPredicate must be a List of Dictionaries.") - # if not v: - # if field and "jsonldPrefix" in field: - # defaultBase = field["jsonldPrefix"] - # elif "jsonldPrefix" in datatype: - # defaultBase = datatype["jsonldPrefix"] ret = v or vee @@ -108,14 +103,14 @@ def process_type(t, # type: Dict[str, Any] g.add((classnode, RDF.type, RDFS.Class)) split = urlparse.urlsplit(recordname) - if "jsonldPrefix" in t: - predicate = "%s:%s" % (t["jsonldPrefix"], recordname) - elif split.scheme: - (ns, ln) = rdflib.namespace.split_uri(unicode(recordname)) - predicate = recordname - recordname = ln - else: - predicate = "%s:%s" % (defaultPrefix, recordname) + predicate = recordname + if t.get("inVocab", True): + if split.scheme: + (ns, ln) = rdflib.namespace.split_uri(unicode(recordname)) + predicate = recordname + recordname = ln + else: + predicate = "%s:%s" % (defaultPrefix, recordname) if context.get(recordname, predicate) != predicate: raise Exception("Predicate collision on '%s', '%s' != '%s'" % ( diff --git a/schema_salad/metaschema/metaschema.yml b/schema_salad/metaschema/metaschema.yml index a1515258a..28b9e662b 100644 --- a/schema_salad/metaschema/metaschema.yml +++ b/schema_salad/metaschema/metaschema.yml @@ -162,6 +162,12 @@ $graph: type: string jsonldPredicate: "@id" doc: "The identifier for this type" + - name: inVocab + type: boolean? + doc: | + By default or if "true", include the short name of this type in the + vocabulary (the keys of the JSON-LD context). If false, do not include + the short name in the vocabulary. - name: DocType diff --git a/schema_salad/schema.py b/schema_salad/schema.py index 62be6f133..f961fae62 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -399,7 +399,8 @@ def make_valid_avro(items, # type: Avro if isinstance(items, dict): items = copy.copy(items) if items.get("name"): - items["name"] = avro_name(items["name"]) + if items.get("inVocab", True): + items["name"] = avro_name(items["name"]) if "type" in items and items["type"] in ("https://w3id.org/cwl/salad#record", "https://w3id.org/cwl/salad#enum", "record", "enum"): if (hasattr(items, "get") and items.get("abstract")) or ("abstract" diff --git a/setup.py b/setup.py index 0b7a4994c..486601ec9 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', - version='2.3', + version='2.4', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group', From a082ab524dc4d6566293428fc928baaad2192b7e Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sun, 19 Mar 2017 01:15:43 +0530 Subject: [PATCH 048/116] schema_salad/tests: use print() in tests --- schema_salad/tests/test_errors.py | 2 +- schema_salad/tests/test_fetch.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/schema_salad/tests/test_errors.py b/schema_salad/tests/test_errors.py index 25a5eea8c..44b712d6b 100644 --- a/schema_salad/tests/test_errors.py +++ b/schema_salad/tests/test_errors.py @@ -27,5 +27,5 @@ def test_errors(self): load_and_validate(document_loader, avsc_names, unicode(get_data("tests/"+t)), True) except ValidationException as e: - print "\n", e + print("\n", e) raise diff --git a/schema_salad/tests/test_fetch.py b/schema_salad/tests/test_fetch.py index 8fb9e5a69..6dc28d0bb 100644 --- a/schema_salad/tests/test_fetch.py +++ b/schema_salad/tests/test_fetch.py @@ -52,6 +52,6 @@ def test_cache(self): loader = schema_salad.ref_resolver.Loader({}) foo = "file://%s/foo.txt" % os.getcwd() loader.cache.update({foo: "hello: foo"}) - print loader.cache + print(loader.cache) self.assertEqual({"hello": "foo"}, loader.resolve_ref("foo.txt")[0]) self.assertTrue(loader.check_exists(foo)) From 04c678485a772543ed1f7cce46a993dbe7c80b78 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Fri, 17 Mar 2017 13:41:52 +0530 Subject: [PATCH 049/116] schema_salad/main.py: --version now correctly prints version --- schema_salad/main.py | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/schema_salad/main.py b/schema_salad/main.py index 89279f8e8..bfe084725 100644 --- a/schema_salad/main.py +++ b/schema_salad/main.py @@ -62,8 +62,6 @@ def main(argsl=None): # type: (List[str]) -> int "--print-index", action="store_true", help="Print node index") exgroup.add_argument("--print-metadata", action="store_true", help="Print document metadata") - exgroup.add_argument("--version", action="store_true", - help="Print version") exgroup = parser.add_mutually_exclusive_group() exgroup.add_argument("--strict", action="store_true", help="Strict validation (unrecognized or out of place fields are error)", @@ -79,11 +77,18 @@ def main(argsl=None): # type: (List[str]) -> int exgroup.add_argument("--debug", action="store_true", help="Print even more logging") - parser.add_argument("schema", type=str) + parser.add_argument("schema", type=str, nargs="?", default=None) parser.add_argument("document", type=str, nargs="?", default=None) + parser.add_argument("--version", "-v", action="store_true", + help="Print version", default=None) + args = parser.parse_args(argsl) + if args.version is None and args.schema is None: + print('%s: error: too few arguments' % sys.argv[0]) + exit(0) + if args.quiet: _logger.setLevel(logging.WARN) if args.debug: @@ -92,10 +97,10 @@ def main(argsl=None): # type: (List[str]) -> int pkg = pkg_resources.require("schema_salad") if pkg: if args.version: - print("%s %s" % (sys.argv[0], pkg[0].version)) + print("%s Current version: %s" % (sys.argv[0], pkg[0].version)) return 0 else: - _logger.info("%s %s", sys.argv[0], pkg[0].version) + _logger.info("%s Current version: %s", sys.argv[0], pkg[0].version) # Get the metaschema to validate the schema metaschema_names, metaschema_doc, metaschema_loader = schema.get_metaschema() From a63f75fce3560aa218818c3f02bf29a40f4a65fc Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Tue, 21 Mar 2017 12:18:51 +0530 Subject: [PATCH 050/116] schema_salad/tests: add tests for cli args testing: --version, empty args - fix return condition in main() --- schema_salad/main.py | 2 +- schema_salad/tests/test_cli_args.py | 40 +++++++++++++++++++++++++++++ 2 files changed, 41 insertions(+), 1 deletion(-) create mode 100644 schema_salad/tests/test_cli_args.py diff --git a/schema_salad/main.py b/schema_salad/main.py index bfe084725..0e2adf5d1 100644 --- a/schema_salad/main.py +++ b/schema_salad/main.py @@ -87,7 +87,7 @@ def main(argsl=None): # type: (List[str]) -> int if args.version is None and args.schema is None: print('%s: error: too few arguments' % sys.argv[0]) - exit(0) + return 1 if args.quiet: _logger.setLevel(logging.WARN) diff --git a/schema_salad/tests/test_cli_args.py b/schema_salad/tests/test_cli_args.py new file mode 100644 index 000000000..a1d2f0e75 --- /dev/null +++ b/schema_salad/tests/test_cli_args.py @@ -0,0 +1,40 @@ +import unittest +import sys + +import schema_salad.main as cli_parser + +""" for capturing print() output """ +from contextlib import contextmanager +from StringIO import StringIO + +@contextmanager +def captured_output(): + new_out, new_err = StringIO(), StringIO() + old_out, old_err = sys.stdout, sys.stderr + try: + sys.stdout, sys.stderr = new_out, new_err + yield sys.stdout, sys.stderr + finally: + sys.stdout, sys.stderr = old_out, old_err + + +""" test different sets of command line arguments""" +class ParseCliArgs(unittest.TestCase): + + def test_version(self): + args = [["--version"], ["-v"]] + for arg in args: + with captured_output() as (out, err): + cli_parser.main(arg) + + response = out.getvalue().strip() # capture output and strip newline + self.assertTrue("Current version" in response) + + def test_empty_input(self): + # running schema_salad tool wihtout any args + args = [] + with captured_output() as (out, err): + cli_parser.main(args) + + response = out.getvalue().strip() + self.assertTrue("error: too few arguments" in response) From a560ef3924dea732a5263bfec31febe92b34d4d3 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Mon, 27 Mar 2017 09:56:59 -0400 Subject: [PATCH 051/116] Pass through logger to capture warnings. (#101) --- schema_salad/validate.py | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) diff --git a/schema_salad/validate.py b/schema_salad/validate.py index de6a7f218..cc377c93a 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -176,7 +176,8 @@ def validate_ex(expected_schema, # type: Schema strict=strict, foreign_properties=foreign_properties, raise_ex=raise_ex, - strict_foreign_properties=strict_foreign_properties): + strict_foreign_properties=strict_foreign_properties, + logger=logger): return False except ValidationException as v: if raise_ex: @@ -194,7 +195,8 @@ def validate_ex(expected_schema, # type: Schema elif isinstance(expected_schema, avro.schema.UnionSchema): for s in expected_schema.schemas: if validate_ex(s, datum, identifiers, strict=strict, raise_ex=False, - strict_foreign_properties=strict_foreign_properties): + strict_foreign_properties=strict_foreign_properties, + logger=logger): return True if not raise_ex: @@ -217,7 +219,8 @@ def validate_ex(expected_schema, # type: Schema validate_ex(s, datum, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=True, - strict_foreign_properties=strict_foreign_properties) + strict_foreign_properties=strict_foreign_properties, + logger=logger) except ClassValidationException as e: raise except ValidationException as e: @@ -269,7 +272,8 @@ def validate_ex(expected_schema, # type: Schema if not validate_ex(f.type, fieldval, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=raise_ex, - strict_foreign_properties=strict_foreign_properties): + strict_foreign_properties=strict_foreign_properties, + logger=logger): return False except ValidationException as v: if f.name not in datum: From 8a6eaff53eadca000a2c40624b355d83d89f6656 Mon Sep 17 00:00:00 2001 From: Kapil kumar Date: Mon, 27 Mar 2017 19:38:58 +0530 Subject: [PATCH 052/116] Validating Ids for duplicate Issue#56 (#98) * Validating duplicate ids are not present Issue#56 --- schema_salad/main.py | 2 +- schema_salad/ref_resolver.py | 27 +++++++++++++++-------- schema_salad/schema.py | 2 +- schema_salad/sourceline.py | 15 ++++++++----- schema_salad/tests/test_errors.py | 5 ++++- schema_salad/tests/test_schema/test12.cwl | 16 ++++++++++++++ schema_salad/tests/test_schema/test13.cwl | 20 +++++++++++++++++ schema_salad/tests/test_schema/test14.cwl | 11 +++++++++ setup.py | 2 +- 9 files changed, 81 insertions(+), 19 deletions(-) create mode 100644 schema_salad/tests/test_schema/test12.cwl create mode 100644 schema_salad/tests/test_schema/test13.cwl create mode 100644 schema_salad/tests/test_schema/test14.cwl diff --git a/schema_salad/main.py b/schema_salad/main.py index 0e2adf5d1..cbe566152 100644 --- a/schema_salad/main.py +++ b/schema_salad/main.py @@ -139,7 +139,7 @@ def main(argsl=None): # type: (List[str]) -> int try: schema.validate_doc(metaschema_names, schema_doc, metaschema_loader, args.strict, - source=schema_metadata["name"]) + source=schema_metadata.get("name")) except validate.ValidationException as e: _logger.error("While validating schema `%s`:\n%s" % (args.schema, str(e))) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index d9a8bbf07..d04001d1c 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -822,7 +822,8 @@ def resolve_all(self, loader.idx[metadata[identifer]] = document if checklinks: - self.validate_links(document, u"") + all_doc_ids={} # type: Dict[Text, Text] + self.validate_links(document, u"", all_doc_ids) return document, metadata @@ -877,8 +878,8 @@ def validate_scoped(self, field, link, docid): raise validate.ValidationException( "Field `%s` references unknown identifier `%s`, tried %s" % (field, link, ", ".join(tried))) - def validate_link(self, field, link, docid): - # type: (unicode, FieldType, unicode) -> FieldType + def validate_link(self, field, link, docid, all_doc_ids): + # type: (unicode, FieldType, unicode, Dict[Text, Text]) -> FieldType if field in self.nolinkcheck: return link if isinstance(link, (str, unicode)): @@ -901,14 +902,14 @@ def validate_link(self, field, link, docid): errors = [] for n, i in enumerate(link): try: - link[n] = self.validate_link(field, i, docid) + link[n] = self.validate_link(field, i, docid, all_doc_ids) except validate.ValidationException as v: errors.append(v) if bool(errors): raise validate.ValidationException( "\n".join([unicode(e) for e in errors])) elif isinstance(link, CommentedMap): - self.validate_links(link, docid) + self.validate_links(link, docid, all_doc_ids) else: raise validate.ValidationException( "`%s` field is %s, expected string, list, or a dict." @@ -924,8 +925,8 @@ def getid(self, d): # type: (Any) -> Optional[Text] return idd return None - def validate_links(self, document, base_url): - # type: (Union[CommentedMap, CommentedSeq, unicode, None], unicode) -> None + def validate_links(self, document, base_url, all_doc_ids): + # type: (Union[CommentedMap, CommentedSeq, unicode, None], unicode, Dict[Text, Text]) -> None docid = self.getid(document) if not docid: docid = base_url @@ -939,7 +940,15 @@ def validate_links(self, document, base_url): for d in self.url_fields: sl = SourceLine(document, d, validate.ValidationException) if d in document and d not in self.identity_links: - document[d] = self.validate_link(d, document[d], docid) + document[d] = self.validate_link(d, document[d], docid, all_doc_ids) + for identifier in self.identifiers: # validate that each id is defined uniquely + if identifier in document: + sl = SourceLine(document, identifier, validate.ValidationException) + if document[identifier] in all_doc_ids and sl.makeLead() != all_doc_ids[document[identifier]]: + raise validate.ValidationException( + "%s object %s `%s` previously defined" % (all_doc_ids[document[identifier]], identifier, relname(document[identifier]), )) + else: + all_doc_ids[document[identifier]] = sl.makeLead() except validate.ValidationException as v: errors.append(sl.makeError(unicode(v))) if hasattr(document, "iteritems"): @@ -952,7 +961,7 @@ def validate_links(self, document, base_url): for key, val in iterator: sl = SourceLine(document, key, validate.ValidationException) try: - self.validate_links(val, docid) + self.validate_links(val, docid, all_doc_ids) except validate.ValidationException as v: if key not in self.nolinkcheck: docid2 = self.getid(val) diff --git a/schema_salad/schema.py b/schema_salad/schema.py index f961fae62..6f55d6fd9 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -242,7 +242,7 @@ def load_and_validate(document_loader, # type: Loader validationErrors = u"" try: - document_loader.validate_links(data, u"") + document_loader.validate_links(data, u"", {}) except validate.ValidationException as v: validationErrors = unicode(v) + "\n" diff --git a/schema_salad/sourceline.py b/schema_salad/sourceline.py index 4c20e6a0f..6ab2a8838 100644 --- a/schema_salad/sourceline.py +++ b/schema_salad/sourceline.py @@ -149,18 +149,21 @@ def __exit__(self, return raise self.makeError(unicode(exc_value)) - def makeError(self, msg): # type: (Text) -> Any - if not isinstance(self.item, ruamel.yaml.comments.CommentedBase): - return self.raise_type(msg) - errs = [] + def makeLead(self): # type: () -> Text if self.key is None or self.item.lc.data is None or self.key not in self.item.lc.data: - lead = "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "", + return "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "", (self.item.lc.line or 0)+1, (self.item.lc.col or 0)+1) else: - lead = "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "", + return "%s:%i:%i:" % (self.item.lc.filename if hasattr(self.item.lc, "filename") else "", (self.item.lc.data[self.key][0] or 0)+1, (self.item.lc.data[self.key][1] or 0)+1) + + def makeError(self, msg): # type: (Text) -> Any + if not isinstance(self.item, ruamel.yaml.comments.CommentedBase): + return self.raise_type(msg) + errs = [] + lead = self.makeLead() for m in msg.splitlines(): if bool(lineno_re.match(m)): errs.append(m) diff --git a/schema_salad/tests/test_errors.py b/schema_salad/tests/test_errors.py index 44b712d6b..be0bfc530 100644 --- a/schema_salad/tests/test_errors.py +++ b/schema_salad/tests/test_errors.py @@ -21,7 +21,10 @@ def test_errors(self): "test_schema/test8.cwl", "test_schema/test9.cwl", "test_schema/test10.cwl", - "test_schema/test11.cwl"): + "test_schema/test11.cwl", + "test_schema/test12.cwl", + "test_schema/test13.cwl", + "test_schema/test14.cwl"): with self.assertRaises(ValidationException): try: load_and_validate(document_loader, avsc_names, diff --git a/schema_salad/tests/test_schema/test12.cwl b/schema_salad/tests/test_schema/test12.cwl new file mode 100644 index 000000000..d994e7cbc --- /dev/null +++ b/schema_salad/tests/test_schema/test12.cwl @@ -0,0 +1,16 @@ +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: echo +inputs: + - id: example_flag + type: boolean + inputBinding: + position: 1 + prefix: -f + - id: example_flag + type: int + inputBinding: + position: 3 + prefix: --example-string + +outputs: [] diff --git a/schema_salad/tests/test_schema/test13.cwl b/schema_salad/tests/test_schema/test13.cwl new file mode 100644 index 000000000..caa274d2a --- /dev/null +++ b/schema_salad/tests/test_schema/test13.cwl @@ -0,0 +1,20 @@ +cwlVersion: v1.0 +class: Workflow +inputs: + example_flag: + type: boolean + inputBinding: + position: 1 + prefix: -f + +outputs: [] + +steps: + example_flag: + in: [] + out: [] + run: + id: blah + class: CommandLineTool + inputs: [] + outputs: [] \ No newline at end of file diff --git a/schema_salad/tests/test_schema/test14.cwl b/schema_salad/tests/test_schema/test14.cwl new file mode 100644 index 000000000..729ee83df --- /dev/null +++ b/schema_salad/tests/test_schema/test14.cwl @@ -0,0 +1,11 @@ +cwlVersion: v1.0 +class: CommandLineTool +baseCommand: echo +inputs: + example_flag: + type: boolean + inputBinding: + position: 1 + prefix: -f +outputs: + example_flag: int diff --git a/setup.py b/setup.py index 486601ec9..25b4042e0 100755 --- a/setup.py +++ b/setup.py @@ -47,7 +47,7 @@ extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', - version='2.4', + version='2.5', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group', From 424ad78f561cb2f5566730bd19e98cfd8947c9db Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Tue, 28 Mar 2017 15:57:58 -0400 Subject: [PATCH 053/116] Fix for self-colliding ids on $import (#102) * Fix for self-colliding ids on $import --- MANIFEST.in | 1 + schema_salad/ref_resolver.py | 9 ++++++--- schema_salad/validate.py | 6 +++--- 3 files changed, 10 insertions(+), 6 deletions(-) diff --git a/MANIFEST.in b/MANIFEST.in index abcfe2a15..c3870ab6e 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,4 +4,5 @@ include schema_salad/tests/test_schema/*.md include schema_salad/tests/test_schema/*.yml include schema_salad/tests/test_schema/*.cwl include schema_salad/metaschema/* +global-exclude *~ global-exclude *.pyc diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index d04001d1c..7eee213fd 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -233,7 +233,7 @@ def __init__(self, self.url_fields = set() # type: Set[unicode] self.scoped_ref_fields = {} # type: Dict[unicode, int] self.vocab_fields = set() # type: Set[unicode] - self.identifiers = set() # type: Set[unicode] + self.identifiers = [] # type: List[unicode] self.identity_links = set() # type: Set[unicode] self.standalone = None # type: Optional[Set[unicode]] self.nolinkcheck = set() # type: Set[unicode] @@ -345,7 +345,7 @@ def add_context(self, newcontext, baseuri=""): self.url_fields = set(("$schemas",)) self.scoped_ref_fields = {} self.vocab_fields = set() - self.identifiers = set() + self.identifiers = [] self.identity_links = set() self.standalone = set() self.nolinkcheck = set() @@ -361,7 +361,7 @@ def add_context(self, newcontext, baseuri=""): for key, value in self.ctx.items(): if value == u"@id": - self.identifiers.add(key) + self.identifiers.append(key) self.identity_links.add(key) elif isinstance(value, dict) and value.get(u"@type") == u"@id": self.url_fields.add(key) @@ -393,6 +393,8 @@ def add_context(self, newcontext, baseuri=""): for k, v in self.vocab.items(): self.rvocab[self.expand_url(v, u"", scoped_id=False)] = k + self.identifiers.sort() + _logger.debug("identifiers is %s", self.identifiers) _logger.debug("identity_links is %s", self.identity_links) _logger.debug("url_fields is %s", self.url_fields) @@ -949,6 +951,7 @@ def validate_links(self, document, base_url, all_doc_ids): "%s object %s `%s` previously defined" % (all_doc_ids[document[identifier]], identifier, relname(document[identifier]), )) else: all_doc_ids[document[identifier]] = sl.makeLead() + break except validate.ValidationException as v: errors.append(sl.makeError(unicode(v))) if hasattr(document, "iteritems"): diff --git a/schema_salad/validate.py b/schema_salad/validate.py index cc377c93a..255f65f1e 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -21,7 +21,7 @@ class ClassValidationException(ValidationException): def validate(expected_schema, # type: Schema datum, # type: Any - identifiers=set(), # type: Set[unicode] + identifiers=[], # type: List[unicode] strict=False, # type: bool foreign_properties=set() # type: Set[unicode] ): @@ -59,7 +59,7 @@ def vpformat(datum): # type: (Any) -> str def validate_ex(expected_schema, # type: Schema datum, # type: Any - identifiers=None, # type: Set[unicode] + identifiers=None, # type: List[unicode] strict=False, # type: bool foreign_properties=None, # type: Set[unicode] raise_ex=True, # type: bool @@ -70,7 +70,7 @@ def validate_ex(expected_schema, # type: Schema """Determine if a python datum is an instance of a schema.""" if not identifiers: - identifiers = set() + identifiers = [] if not foreign_properties: foreign_properties = set() From 5b0b01992d435c9e3ed64d91f9ad700bc16cd0e3 Mon Sep 17 00:00:00 2001 From: Peter Amstutz Date: Fri, 28 Apr 2017 10:20:41 -0400 Subject: [PATCH 054/116] Validation fix. Must raise exception when raise_ex is true and record class (#107) doesn't match expectation. --- schema_salad/validate.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/schema_salad/validate.py b/schema_salad/validate.py index 255f65f1e..5452a4e99 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -250,7 +250,11 @@ def validate_ex(expected_schema, # type: Schema else: return False if expected_schema.name != d: - return False + if raise_ex: + raise ValidationException( + u"Expected class '%s' but this is '%s'" % (expected_schema.name, d)) + else: + return False classmatch = d break From ffc3367ad9bed86a6b430c74fa3f220080847bb4 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Tue, 30 May 2017 10:47:57 +0300 Subject: [PATCH 055/116] exclude tests from coverage report --- Makefile | 30 +++++++++++++----------------- 1 file changed, 13 insertions(+), 17 deletions(-) diff --git a/Makefile b/Makefile index 98d4118cb..139e2b43e 100644 --- a/Makefile +++ b/Makefile @@ -27,6 +27,8 @@ MODULE=schema_salad # `[[` conditional expressions. PYSOURCES=$(wildcard ${MODULE}/**.py tests/*.py) setup.py DEVPKGS=pep8 diff_cover autopep8 pylint coverage pep257 pytest flake8 +COVBASE=coverage run --branch --append --source=${MODULE} \ + --omit=schema_salad/tests/* VERSION=$(shell git describe --tags --dirty | sed s/v//) @@ -103,37 +105,31 @@ diff_pylint_report: pylint_report.txt diff-quality --violations=pylint pylint_report.txt .coverage: $(PYSOURCES) - coverage run --branch --source=${MODULE} setup.py test - coverage run --append --branch --source=${MODULE} \ - -m schema_salad.main \ + rm -f .coverage + $(COVBASE) setup.py test + $(COVBASE) -m schema_salad.main \ --print-jsonld-context schema_salad/metaschema/metaschema.yml \ > /dev/null - coverage run --append --branch --source=${MODULE} \ - -m schema_salad.main \ + $(COVBASE) -m schema_salad.main \ --print-rdfs schema_salad/metaschema/metaschema.yml \ > /dev/null - coverage run --append --branch --source=${MODULE} \ - -m schema_salad.main \ + $(COVBASE) -m schema_salad.main \ --print-avro schema_salad/metaschema/metaschema.yml \ > /dev/null - coverage run --append --branch --source=${MODULE} \ - -m schema_salad.main \ + $(COVBASE) -m schema_salad.main \ --print-rdf schema_salad/metaschema/metaschema.yml \ > /dev/null - coverage run --append --branch --source=${MODULE} \ - -m schema_salad.main \ + $(COVBASE) -m schema_salad.main \ --print-pre schema_salad/metaschema/metaschema.yml \ > /dev/null - coverage run --append --branch --source=${MODULE} \ - -m schema_salad.main \ + $(COVBASE) -m schema_salad.main \ --print-index schema_salad/metaschema/metaschema.yml \ > /dev/null - coverage run --append --branch --source=${MODULE} \ - -m schema_salad.main \ + $(COVBASE) -m schema_salad.main \ --print-metadata schema_salad/metaschema/metaschema.yml \ > /dev/null - coverage run --append --branch --source=${MODULE} \ - -m schema_salad.makedoc schema_salad/metaschema/metaschema.yml \ + $(COVBASE) -m schema_salad.makedoc \ + schema_salad/metaschema/metaschema.yml \ > /dev/null coverage.xml: .coverage From 3bba34ef72d9f17294ae3564450a7ea3c6f95c17 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sun, 11 Jun 2017 20:08:45 +0530 Subject: [PATCH 056/116] minor refactor: create utils.py --- schema_salad/add_dictlist.py | 8 -------- schema_salad/aslist.py | 11 ----------- schema_salad/jsonld_context.py | 2 +- schema_salad/makedoc.py | 3 +-- schema_salad/ref_resolver.py | 2 +- schema_salad/schema.py | 3 +-- schema_salad/utils.py | 18 ++++++++++++++++++ 7 files changed, 22 insertions(+), 25 deletions(-) delete mode 100644 schema_salad/add_dictlist.py delete mode 100644 schema_salad/aslist.py create mode 100644 schema_salad/utils.py diff --git a/schema_salad/add_dictlist.py b/schema_salad/add_dictlist.py deleted file mode 100644 index 711f580e9..000000000 --- a/schema_salad/add_dictlist.py +++ /dev/null @@ -1,8 +0,0 @@ -import sys -from typing import Any, Dict - - -def add_dictlist(di, key, val): # type: (Dict, Any, Any) -> None - if key not in di: - di[key] = [] - di[key].append(val) diff --git a/schema_salad/aslist.py b/schema_salad/aslist.py deleted file mode 100644 index 27602ab8d..000000000 --- a/schema_salad/aslist.py +++ /dev/null @@ -1,11 +0,0 @@ -import sys -from typing import Any, List - - -def aslist(l): # type: (Any) -> List - """Convenience function to wrap single items and lists, and return lists unchanged.""" - - if isinstance(l, list): - return l - else: - return [l] diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py index a451769c4..657332a97 100755 --- a/schema_salad/jsonld_context.py +++ b/schema_salad/jsonld_context.py @@ -18,7 +18,7 @@ from rdflib.namespace import RDF, RDFS import urlparse import logging -from .aslist import aslist +from schema_salad.utils import aslist from typing import (cast, Any, Dict, Iterable, List, Optional, Text, Tuple, Union) from .ref_resolver import Loader, ContextType diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index e4d789d18..61e7d84a7 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -8,8 +8,7 @@ from StringIO import StringIO import logging import urlparse -from .aslist import aslist -from .add_dictlist import add_dictlist +from schema_salad.utils import add_dictlist, aslist import re import argparse from typing import cast, Any, Dict, IO, List, Optional, Set, Text, Union diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 7eee213fd..65555dd72 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -12,7 +12,7 @@ from StringIO import StringIO from . import validate -from .aslist import aslist +from schema_salad.utils import aslist from .flatten import flatten from .sourceline import SourceLine, add_lc_filename, relname diff --git a/schema_salad/schema.py b/schema_salad/schema.py index 6f55d6fd9..10bbda20c 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -1,6 +1,6 @@ import avro import copy -from .add_dictlist import add_dictlist +from schema_salad.utils import add_dictlist, aslist import sys import pprint from pkg_resources import resource_stream @@ -17,7 +17,6 @@ from .ref_resolver import Loader, DocumentType from .flatten import flatten import logging -from .aslist import aslist from . import jsonld_context from .sourceline import SourceLine, strip_dup_lineno, add_lc_filename, bullets, relname from typing import cast, Any, AnyStr, Dict, List, Set, Tuple, TypeVar, Union diff --git a/schema_salad/utils.py b/schema_salad/utils.py new file mode 100644 index 000000000..6ce28fa46 --- /dev/null +++ b/schema_salad/utils.py @@ -0,0 +1,18 @@ +from __future__ import absolute_import + +from typing import Any, Dict, List + + +def add_dictlist(di, key, val): # type: (Dict, Any, Any) -> None + if key not in di: + di[key] = [] + di[key].append(val) + + +def aslist(l): # type: (Any) -> List + """Convenience function to wrap single items and lists, and return lists unchanged.""" + + if isinstance(l, list): + return l + else: + return [l] From 6a582c3b223c491b7347745cf22ba48005c67362 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sun, 11 Jun 2017 20:53:40 +0530 Subject: [PATCH 057/116] refactor flatten function. move to utils.py --- schema_salad/flatten.py | 26 -------------------------- schema_salad/ref_resolver.py | 3 +-- schema_salad/schema.py | 3 +-- schema_salad/utils.py | 23 +++++++++++++++++++++++ 4 files changed, 25 insertions(+), 30 deletions(-) delete mode 100644 schema_salad/flatten.py diff --git a/schema_salad/flatten.py b/schema_salad/flatten.py deleted file mode 100644 index a417b343b..000000000 --- a/schema_salad/flatten.py +++ /dev/null @@ -1,26 +0,0 @@ -import sys -from typing import Any, Tuple - -# http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html - - -def flatten(l, ltypes=(list, tuple)): - # type: (Any, Any) -> Any - if l is None: - return [] - if not isinstance(l, ltypes): - return [l] - - ltype = type(l) - l = list(l) - i = 0 - while i < len(l): - while isinstance(l[i], ltypes): - if not l[i]: - l.pop(i) - i -= 1 - break - else: - l[i:i + 1] = l[i] - i += 1 - return ltype(l) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 65555dd72..662eba94a 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -12,8 +12,7 @@ from StringIO import StringIO from . import validate -from schema_salad.utils import aslist -from .flatten import flatten +from schema_salad.utils import aslist, flatten from .sourceline import SourceLine, add_lc_filename, relname import requests diff --git a/schema_salad/schema.py b/schema_salad/schema.py index 10bbda20c..ca11aed4b 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -1,6 +1,6 @@ import avro import copy -from schema_salad.utils import add_dictlist, aslist +from schema_salad.utils import add_dictlist, aslist, flatten import sys import pprint from pkg_resources import resource_stream @@ -15,7 +15,6 @@ from avro.schema import Names, SchemaParseException from . import ref_resolver from .ref_resolver import Loader, DocumentType -from .flatten import flatten import logging from . import jsonld_context from .sourceline import SourceLine, strip_dup_lineno, add_lc_filename, bullets, relname diff --git a/schema_salad/utils.py b/schema_salad/utils.py index 6ce28fa46..2ba98dc45 100644 --- a/schema_salad/utils.py +++ b/schema_salad/utils.py @@ -16,3 +16,26 @@ def aslist(l): # type: (Any) -> List return l else: return [l] + +# http://rightfootin.blogspot.com/2006/09/more-on-python-flatten.html + +def flatten(l, ltypes=(list, tuple)): + # type: (Any, Any) -> Any + if l is None: + return [] + if not isinstance(l, ltypes): + return [l] + + ltype = type(l) + l = list(l) + i = 0 + while i < len(l): + while isinstance(l[i], ltypes): + if not l[i]: + l.pop(i) + i -= 1 + break + else: + l[i:i + 1] = l[i] + i += 1 + return ltype(l) From e39018ad1528ab66feb396e4a52c46693be6b29a Mon Sep 17 00:00:00 2001 From: Stian Soiland-Reyes Date: Fri, 23 Jun 2017 16:01:05 +0100 Subject: [PATCH 058/116] Optional skip_schemas Fixes common-workflow-language/cwltool#433 --- schema_salad/ref_resolver.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 7eee213fd..22a9a14c1 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -97,7 +97,8 @@ def merge_properties(a, b): # type: (List[Any], List[Any]) -> Dict[Any, Any] def SubLoader(loader): # type: (Loader) -> Loader return Loader(loader.ctx, schemagraph=loader.graph, foreign_properties=loader.foreign_properties, idx=loader.idx, - cache=loader.cache, fetcher_constructor=loader.fetcher_constructor) + cache=loader.cache, fetcher_constructor=loader.fetcher_constructor, + skip_schemas=loader.skip_schemas) class Fetcher(object): def fetch_text(self, url): # type: (unicode) -> unicode @@ -179,7 +180,8 @@ def __init__(self, idx=None, # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode, None]] cache=None, # type: Dict[unicode, Any] session=None, # type: requests.sessions.Session - fetcher_constructor=None # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher] + fetcher_constructor=None, # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher] + skip_schemas=None # type: bool ): # type: (...) -> None @@ -205,6 +207,11 @@ def __init__(self, else: self.cache = {} + if skip_schemas is not None: + self.skip_schemas = skip_schemas + else: + self.skip_schemas = False + if session is None: if "HOME" in os.environ: self.session = CacheControl( @@ -305,6 +312,8 @@ def add_namespaces(self, ns): # type: (Dict[unicode, unicode]) -> None def add_schemas(self, ns, base_url): # type: (Union[List[unicode], unicode], unicode) -> None + if self.skip_schemas: + return for sch in aslist(ns): fetchurl = self.fetcher.urljoin(base_url, sch) if fetchurl not in self.cache: From b61311cef0ae4e7f49e57514a7e2de5f0855c534 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sun, 25 Jun 2017 16:42:41 +0530 Subject: [PATCH 059/116] Makefile: pin mypy, typed-ast version for jenkins make target --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 139e2b43e..f8867a5bd 100644 --- a/Makefile +++ b/Makefile @@ -183,6 +183,6 @@ jenkins: . env3/bin/activate ; \ pip install -U setuptools pip wheel ; \ ${MAKE} install-dep ; \ - pip install -U mypy ; ${MAKE} mypy + pip install -U mypy==0.470 typed-ast==0.6.3 ; ${MAKE} mypy FORCE: From ba88f5ebaec9d09accd4a9b4cc9c7d70e6913c53 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sun, 25 Jun 2017 18:01:53 +0530 Subject: [PATCH 060/116] add mypy, typed-ast dependency in requirements.txt --- Makefile | 2 +- requirements.txt | 2 ++ tox.ini | 4 ---- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index f8867a5bd..33296b2cc 100644 --- a/Makefile +++ b/Makefile @@ -183,6 +183,6 @@ jenkins: . env3/bin/activate ; \ pip install -U setuptools pip wheel ; \ ${MAKE} install-dep ; \ - pip install -U mypy==0.470 typed-ast==0.6.3 ; ${MAKE} mypy + pip install -U -r requirements.txt ; ${MAKE} mypy FORCE: diff --git a/requirements.txt b/requirements.txt index 7684172aa..f533ee940 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,3 +7,5 @@ rdflib-jsonld==0.4.0 mistune==0.7.3 CacheControl==0.11.7 lockfile==0.12.2 +mypy==0.470 ; python_version>="3" +typed-ast==0.6.3 ; python_version>="3" \ No newline at end of file diff --git a/tox.ini b/tox.ini index 8a81fbaa0..27b745cfe 100644 --- a/tox.ini +++ b/tox.ini @@ -14,16 +14,12 @@ deps = -rrequirements.txt commands = make mypy whitelist_externals = make deps = - mypy==0.470 - typed-ast==0.6.3 -rrequirements.txt [testenv:py35-mypy] commands = make mypy whitelist_externals = make deps = - mypy==0.470 - typed-ast==0.6.3 -rrequirements.txt [testenv:py35-lint] From 5a76bb61819fb198d1e4aaceabce3c083364a791 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sun, 25 Jun 2017 20:29:04 +0530 Subject: [PATCH 061/116] create mypy_requirements.txt for mypy related deps --- Makefile | 2 +- mypy_requirements.txt | 2 ++ requirements.txt | 4 +--- tox.ini | 2 ++ 4 files changed, 6 insertions(+), 4 deletions(-) create mode 100644 mypy_requirements.txt diff --git a/Makefile b/Makefile index 33296b2cc..10aa0677d 100644 --- a/Makefile +++ b/Makefile @@ -183,6 +183,6 @@ jenkins: . env3/bin/activate ; \ pip install -U setuptools pip wheel ; \ ${MAKE} install-dep ; \ - pip install -U -r requirements.txt ; ${MAKE} mypy + pip install -U -r mypy_requirements.txt ; ${MAKE} mypy FORCE: diff --git a/mypy_requirements.txt b/mypy_requirements.txt new file mode 100644 index 000000000..1430406b3 --- /dev/null +++ b/mypy_requirements.txt @@ -0,0 +1,2 @@ +mypy==0.470 ; python_version>="3" +typed-ast==0.6.3 ; python_version>="3" \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index f533ee940..7ee05c44a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -6,6 +6,4 @@ rdflib==4.2.2 rdflib-jsonld==0.4.0 mistune==0.7.3 CacheControl==0.11.7 -lockfile==0.12.2 -mypy==0.470 ; python_version>="3" -typed-ast==0.6.3 ; python_version>="3" \ No newline at end of file +lockfile==0.12.2 \ No newline at end of file diff --git a/tox.ini b/tox.ini index 27b745cfe..7b5a189e4 100644 --- a/tox.ini +++ b/tox.ini @@ -14,12 +14,14 @@ deps = -rrequirements.txt commands = make mypy whitelist_externals = make deps = + -rmypy_requirements.txt -rrequirements.txt [testenv:py35-mypy] commands = make mypy whitelist_externals = make deps = + -rmypy_requirements.txt -rrequirements.txt [testenv:py35-lint] From 3080c694e00e260d2c45847bd0441fb8f50dd091 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 17 May 2017 16:30:56 +0530 Subject: [PATCH 062/116] Apply Python3 modernize transforms Main changes: - use of __future__ - using six lib to: - deal with checking unicode - dict operations - other API changes --- schema_salad/__init__.py | 1 + schema_salad/__main__.py | 1 + schema_salad/jsonld_context.py | 16 ++++--- schema_salad/main.py | 12 ++--- schema_salad/makedoc.py | 5 ++- schema_salad/ref_resolver.py | 81 ++++++++++++++++++---------------- schema_salad/schema.py | 18 ++++---- schema_salad/sourceline.py | 10 +++-- schema_salad/validate.py | 23 +++++----- 9 files changed, 93 insertions(+), 74 deletions(-) diff --git a/schema_salad/__init__.py b/schema_salad/__init__.py index 381ec7664..a751d64be 100644 --- a/schema_salad/__init__.py +++ b/schema_salad/__init__.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import logging import sys import typing diff --git a/schema_salad/__main__.py b/schema_salad/__main__.py index 4bf3d7eae..5890f6f28 100644 --- a/schema_salad/__main__.py +++ b/schema_salad/__main__.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import from . import main import sys import typing diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py index 657332a97..dc3c759a8 100755 --- a/schema_salad/jsonld_context.py +++ b/schema_salad/jsonld_context.py @@ -1,7 +1,9 @@ +from __future__ import absolute_import import collections import shutil import json import ruamel.yaml as yaml +import six try: from ruamel.yaml import CSafeLoader as SafeLoader except ImportError: @@ -40,10 +42,10 @@ def pred(datatype, # type: Dict[str, Union[Dict, str]] if split.scheme != '': vee = name - (ns, ln) = rdflib.namespace.split_uri(unicode(vee)) + (ns, ln) = rdflib.namespace.split_uri(six.text_type(vee)) name = ln if ns[0:-1] in namespaces: - vee = unicode(namespaces[ns[0:-1]][ln]) + vee = six.text_type(namespaces[ns[0:-1]][ln]) _logger.debug("name, v %s %s", name, vee) v = None # type: Optional[Dict] @@ -106,7 +108,7 @@ def process_type(t, # type: Dict[str, Any] predicate = recordname if t.get("inVocab", True): if split.scheme: - (ns, ln) = rdflib.namespace.split_uri(unicode(recordname)) + (ns, ln) = rdflib.namespace.split_uri(six.text_type(recordname)) predicate = recordname recordname = ln else: @@ -131,13 +133,13 @@ def process_type(t, # type: Dict[str, Any] v = pred(t, i, fieldname, context, defaultPrefix, namespaces) # type: Union[Dict[Any, Any], unicode, None] - if isinstance(v, basestring): + if isinstance(v, six.string_types): v = v if v[0] != "@" else None elif v is not None: v = v["_@id"] if v.get("_@id", "@")[0] != "@" else None if bool(v): - (ns, ln) = rdflib.namespace.split_uri(unicode(v)) + (ns, ln) = rdflib.namespace.split_uri(six.text_type(v)) if ns[0:-1] in namespaces: propnode = namespaces[ns[0:-1]][ln] else: @@ -211,7 +213,7 @@ def makerdf(workflow, # type: Union[str, unicode] # type: (...) -> Graph prefixes = {} idfields = [] - for k, v in ctx.iteritems(): + for k, v in six.iteritems(ctx): if isinstance(v, dict): url = v["@id"] else: @@ -242,7 +244,7 @@ def makerdf(workflow, # type: Union[str, unicode] for sub, pred, obj in g.triples((None, URIRef("@id"), None)): g.remove((sub, pred, obj)) - for k2, v2 in prefixes.iteritems(): + for k2, v2 in six.iteritems(prefixes): g.namespace_manager.bind(k2, v2) return g diff --git a/schema_salad/main.py b/schema_salad/main.py index cbe566152..20a7a13e2 100644 --- a/schema_salad/main.py +++ b/schema_salad/main.py @@ -1,4 +1,5 @@ from __future__ import print_function +from __future__ import absolute_import import argparse import logging import sys @@ -20,6 +21,7 @@ from . import validate from .sourceline import strip_dup_lineno from .ref_resolver import Loader +import six _logger = logging.getLogger("salad") @@ -118,8 +120,8 @@ def main(argsl=None): # type: (List[str]) -> int except (validate.ValidationException) as e: _logger.error("Schema `%s` failed link checking:\n%s", args.schema, e, exc_info=(True if args.debug else False)) - _logger.debug("Index is %s", metaschema_loader.idx.keys()) - _logger.debug("Vocabulary is %s", metaschema_loader.vocab.keys()) + _logger.debug("Index is %s", list(metaschema_loader.idx.keys())) + _logger.debug("Vocabulary is %s", list(metaschema_loader.vocab.keys())) return 1 except (RuntimeError) as e: _logger.error("Schema `%s` read error:\n%s", @@ -132,7 +134,7 @@ def main(argsl=None): # type: (List[str]) -> int return 0 if not args.document and args.print_index: - print(json.dumps(metaschema_loader.idx.keys(), indent=4)) + print(json.dumps(list(metaschema_loader.idx.keys()), indent=4)) return 0 # Validate the schema document against the metaschema @@ -210,7 +212,7 @@ def main(argsl=None): # type: (List[str]) -> int document, doc_metadata = document_loader.resolve_ref(uri) except (validate.ValidationException, RuntimeError) as e: _logger.error("Document `%s` failed validation:\n%s", - args.document, strip_dup_lineno(unicode(e)), exc_info=args.debug) + args.document, strip_dup_lineno(six.text_type(e)), exc_info=args.debug) return 1 # Optionally print the document after ref resolution @@ -219,7 +221,7 @@ def main(argsl=None): # type: (List[str]) -> int return 0 if args.print_index: - print(json.dumps(document_loader.idx.keys(), indent=4)) + print(json.dumps(list(document_loader.idx.keys()), indent=4)) return 0 # Validate the schema document against the metaschema diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index 61e7d84a7..bebf5c6cf 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import mistune from . import schema import json @@ -12,6 +13,8 @@ import re import argparse from typing import cast, Any, Dict, IO, List, Optional, Set, Text, Union +import six +from six.moves import range _logger = logging.getLogger("salad") @@ -29,7 +32,7 @@ def has_types(items): # type: (Any) -> List[basestring] for i in items: r.extend(has_types(i)) return r - if isinstance(items, basestring): + if isinstance(items, six.string_types): return [items] return [] diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 79f2dfc1b..e105d151f 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import sys import os import json @@ -28,12 +29,14 @@ import xml.sax from typing import (cast, Any, AnyStr, Callable, Dict, List, Iterable, Optional, Set, Text, Tuple, TypeVar, Union) +import six +from six.moves import range _logger = logging.getLogger("salad") -ContextType = Dict[unicode, Union[Dict, unicode, Iterable[unicode]]] +ContextType = Dict[six.text_type, Union[Dict, six.text_type, Iterable[six.text_type]]] DocumentType = TypeVar('DocumentType', CommentedSeq, CommentedMap) DocumentOrStrType = TypeVar( - 'DocumentOrStrType', CommentedSeq, CommentedMap, unicode) + 'DocumentOrStrType', CommentedSeq, CommentedMap, six.text_type) def file_uri(path, split_frag=False): # type: (str, bool) -> str if path.startswith("file://"): @@ -61,7 +64,7 @@ def uri_file_path(url): # type: (str) -> str class NormDict(CommentedMap): - def __init__(self, normalize=unicode): # type: (Callable) -> None + def __init__(self, normalize=six.text_type): # type: (Callable) -> None super(NormDict, self).__init__() self.normalize = normalize @@ -143,7 +146,7 @@ def fetch_text(self, url): return read except (OSError, IOError) as e: if e.filename == path: - raise RuntimeError(unicode(e)) + raise RuntimeError(six.text_type(e)) else: raise RuntimeError('Error reading %s: %s' % (url, e)) else: @@ -297,14 +300,14 @@ def expand_url(self, def _add_properties(self, s): # type: (unicode) -> None for _, _, rng in self.graph.triples((s, RDFS.range, None)): - literal = ((unicode(rng).startswith( + literal = ((six.text_type(rng).startswith( u"http://www.w3.org/2001/XMLSchema#") and - not unicode(rng) == u"http://www.w3.org/2001/XMLSchema#anyURI") - or unicode(rng) == + not six.text_type(rng) == u"http://www.w3.org/2001/XMLSchema#anyURI") + or six.text_type(rng) == u"http://www.w3.org/2000/01/rdf-schema#Literal") if not literal: - self.url_fields.add(unicode(s)) - self.foreign_properties.add(unicode(s)) + self.url_fields.add(six.text_type(s)) + self.foreign_properties.add(six.text_type(s)) def add_namespaces(self, ns): # type: (Dict[unicode, unicode]) -> None self.vocab.update(ns) @@ -342,7 +345,7 @@ def add_schemas(self, ns, base_url): self._add_properties(s) for s, _, _ in self.graph.triples((None, None, None)): - self.idx[unicode(s)] = None + self.idx[six.text_type(s)] = None def add_context(self, newcontext, baseuri=""): # type: (ContextType, unicode) -> None @@ -395,7 +398,7 @@ def add_context(self, newcontext, baseuri=""): if isinstance(value, dict) and u"@id" in value: self.vocab[key] = value[u"@id"] - elif isinstance(value, basestring): + elif isinstance(value, six.string_types): self.vocab[key] = value for k, v in self.vocab.items(): @@ -425,7 +428,7 @@ def resolve_ref(self, if not base_url: base_url = file_uri(os.getcwd()) + "/" - if isinstance(lref, (str, unicode)) and os.sep == "\\": + if isinstance(lref, (str, six.text_type)) and os.sep == "\\": # Convert Windows path separator in ref lref = lref.replace("\\", "/") @@ -441,7 +444,7 @@ def resolve_ref(self, else: raise sl.makeError( u"'$import' must be the only field in %s" - % (unicode(obj))) + % (six.text_type(obj))) elif "$include" in obj: sl = SourceLine(obj, "$include", RuntimeError) if len(obj) == 1: @@ -451,7 +454,7 @@ def resolve_ref(self, else: raise sl.makeError( u"'$include' must be the only field in %s" - % (unicode(obj))) + % (six.text_type(obj))) elif "$mixin" in obj: sl = SourceLine(obj, "$mixin", RuntimeError) lref = obj[u"$mixin"] @@ -468,9 +471,9 @@ def resolve_ref(self, u"Object `%s` does not have identifier field in %s" % (relname(obj), self.identifiers)) - if not isinstance(lref, (str, unicode)): + if not isinstance(lref, (str, six.text_type)): raise ValueError(u"Expected CommentedMap or string, got %s: `%s`" - % (type(lref), unicode(lref))) + % (type(lref), six.text_type(lref))) url = self.expand_url(lref, base_url, scoped_id=(obj is not None)) # Has this reference been loaded already? @@ -583,7 +586,7 @@ def _type_dsl(self, filename): # type: (...) -> Union[unicode, Dict[unicode, unicode], List[Union[unicode, Dict[unicode, unicode]]]] - if not isinstance(t, (str, unicode)): + if not isinstance(t, (str, six.text_type)): return t m = Loader.typeDSLregex.match(t) @@ -612,7 +615,7 @@ def _resolve_type_dsl(self, for d in loader.type_dsl_fields: if d in document: datum2 = datum = document[d] - if isinstance(datum, (str, unicode)): + if isinstance(datum, (str, six.text_type)): datum2 = self._type_dsl(datum, document.lc.data[ d], document.lc.filename) elif isinstance(datum, CommentedSeq): @@ -648,12 +651,12 @@ def _resolve_identifier(self, document, loader, base_url): # Expand identifier field (usually 'id') to resolve scope for identifer in loader.identifiers: if identifer in document: - if isinstance(document[identifer], basestring): + if isinstance(document[identifer], six.string_types): document[identifer] = loader.expand_url( document[identifer], base_url, scoped_id=True) if (document[identifer] not in loader.idx or isinstance( - loader.idx[document[identifer]], basestring)): + loader.idx[document[identifer]], six.string_types)): loader.idx[document[identifer]] = document base_url = document[identifer] else: @@ -669,7 +672,7 @@ def _resolve_identity(self, document, loader, base_url): for identifer in loader.identity_links: if identifer in document and isinstance(document[identifer], list): for n, v in enumerate(document[identifer]): - if isinstance(document[identifer][n], basestring): + if isinstance(document[identifer][n], six.string_types): document[identifer][n] = loader.expand_url( document[identifer][n], base_url, scoped_id=True) if document[identifer][n] not in loader.idx: @@ -695,14 +698,14 @@ def _resolve_uris(self, for d in loader.url_fields: if d in document: datum = document[d] - if isinstance(datum, (str, unicode)): + if isinstance(datum, (str, six.text_type)): document[d] = loader.expand_url( datum, base_url, scoped_id=False, vocab_term=(d in loader.vocab_fields), scoped_ref=self.scoped_ref_fields.get(d)) elif isinstance(datum, list): for i, url in enumerate(datum): - if isinstance(url, (str, unicode)): + if isinstance(url, (str, six.text_type)): datum[i] = loader.expand_url( url, base_url, scoped_id=False, vocab_term=(d in loader.vocab_fields), @@ -791,7 +794,7 @@ def resolve_all(self, except validate.ValidationException as v: _logger.warn("loader is %s", id(loader), exc_info=True) raise validate.ValidationException("(%s) (%s) Validation error in field %s:\n%s" % ( - id(loader), file_base, key, validate.indent(unicode(v)))) + id(loader), file_base, key, validate.indent(six.text_type(v)))) elif isinstance(document, CommentedSeq): i = 0 @@ -822,11 +825,11 @@ def resolve_all(self, except validate.ValidationException as v: _logger.warn("failed", exc_info=True) raise validate.ValidationException("(%s) (%s) Validation error in position %i:\n%s" % ( - id(loader), file_base, i, validate.indent(unicode(v)))) + id(loader), file_base, i, validate.indent(six.text_type(v)))) for identifer in loader.identity_links: if identifer in metadata: - if isinstance(metadata[identifer], (str, unicode)): + if isinstance(metadata[identifer], (str, six.text_type)): metadata[identifer] = loader.expand_url( metadata[identifer], base_url, scoped_id=True) loader.idx[metadata[identifer]] = document @@ -862,7 +865,7 @@ def fetch(self, url, inject_ids=True): # type: (unicode, bool) -> Any return result - FieldType = TypeVar('FieldType', unicode, CommentedSeq, CommentedMap) + FieldType = TypeVar('FieldType', six.text_type, CommentedSeq, CommentedMap) def validate_scoped(self, field, link, docid): # type: (unicode, unicode, unicode) -> unicode @@ -892,7 +895,7 @@ def validate_link(self, field, link, docid, all_doc_ids): # type: (unicode, FieldType, unicode, Dict[Text, Text]) -> FieldType if field in self.nolinkcheck: return link - if isinstance(link, (str, unicode)): + if isinstance(link, (str, six.text_type)): if field in self.vocab_fields: if (link not in self.vocab and link not in self.idx and link not in self.rvocab): @@ -917,7 +920,7 @@ def validate_link(self, field, link, docid, all_doc_ids): errors.append(v) if bool(errors): raise validate.ValidationException( - "\n".join([unicode(e) for e in errors])) + "\n".join([six.text_type(e) for e in errors])) elif isinstance(link, CommentedMap): self.validate_links(link, docid, all_doc_ids) else: @@ -931,7 +934,7 @@ def getid(self, d): # type: (Any) -> Optional[Text] for i in self.identifiers: if i in d: idd = d[i] - if isinstance(idd, (str, unicode)): + if isinstance(idd, (str, six.text_type)): return idd return None @@ -961,11 +964,11 @@ def validate_links(self, document, base_url, all_doc_ids): all_doc_ids[document[identifier]] = sl.makeLead() break except validate.ValidationException as v: - errors.append(sl.makeError(unicode(v))) + errors.append(sl.makeError(six.text_type(v))) if hasattr(document, "iteritems"): - iterator = document.iteritems() + iterator = six.iteritems(document) else: - iterator = document.items() + iterator = list(document.items()) else: return @@ -978,20 +981,20 @@ def validate_links(self, document, base_url, all_doc_ids): docid2 = self.getid(val) if docid2 is not None: errors.append(sl.makeError("checking object `%s`\n%s" - % (relname(docid2), validate.indent(unicode(v))))) + % (relname(docid2), validate.indent(six.text_type(v))))) else: - if isinstance(key, basestring): + if isinstance(key, six.string_types): errors.append(sl.makeError("checking field `%s`\n%s" % ( - key, validate.indent(unicode(v))))) + key, validate.indent(six.text_type(v))))) else: errors.append(sl.makeError("checking item\n%s" % ( - validate.indent(unicode(v))))) + validate.indent(six.text_type(v))))) else: - _logger.warn( validate.indent(unicode(v))) + _logger.warn( validate.indent(six.text_type(v))) if bool(errors): if len(errors) > 1: raise validate.ValidationException( - u"\n".join([unicode(e) for e in errors])) + u"\n".join([six.text_type(e) for e in errors])) else: raise errors[0] return diff --git a/schema_salad/schema.py b/schema_salad/schema.py index ca11aed4b..6724d83ac 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import avro import copy from schema_salad.utils import add_dictlist, aslist, flatten @@ -10,6 +11,7 @@ import json import urlparse import os +import six AvroSchemaFromJSONData = avro.schema.make_avsc_object # AvroSchemaFromJSONData=avro.schema.SchemaFromJSONData from avro.schema import Names, SchemaParseException @@ -242,12 +244,12 @@ def load_and_validate(document_loader, # type: Loader try: document_loader.validate_links(data, u"", {}) except validate.ValidationException as v: - validationErrors = unicode(v) + "\n" + validationErrors = six.text_type(v) + "\n" try: validate_doc(avsc_names, data, document_loader, strict, source=source) except validate.ValidationException as v: - validationErrors += unicode(v) + validationErrors += six.text_type(v) if validationErrors != u"": raise validate.ValidationException(validationErrors) @@ -290,7 +292,7 @@ def validate_doc(schema_names, # type: Names anyerrors = [] for pos, item in enumerate(validate_doc): - sl = SourceLine(validate_doc, pos, unicode) + sl = SourceLine(validate_doc, pos, six.text_type) success = False for r in roots: success = validate.validate_ex( @@ -358,7 +360,7 @@ def replace_type(items, spec, loader, found): elif isinstance(items, list): # recursively transform list return [replace_type(i, spec, loader, found) for i in items] - elif isinstance(items, (str, unicode)): + elif isinstance(items, (str, six.text_type)): # found a string which is a symbol corresponding to a type. replace_with = None if items in loader.vocab: @@ -385,7 +387,7 @@ def avro_name(url): # type: (AnyStr) -> AnyStr return url -Avro = TypeVar('Avro', Dict[unicode, Any], List[Any], unicode) +Avro = TypeVar('Avro', Dict[six.text_type, Any], List[Any], six.text_type) def make_valid_avro(items, # type: Avro @@ -409,7 +411,7 @@ def make_valid_avro(items, # type: Avro "Named schemas must have a non-empty name: %s" % items) if items["name"] in found: - return cast(unicode, items["name"]) + return cast(six.text_type, items["name"]) else: found.add(items["name"]) for n in ("type", "items", "values", "fields"): @@ -424,7 +426,7 @@ def make_valid_avro(items, # type: Avro for i in items: ret.append(make_valid_avro(i, alltypes, found, union=union)) return ret - if union and isinstance(items, (str, unicode)): + if union and isinstance(items, (str, six.text_type)): if items in alltypes and avro_name(items) not in found: return cast(Dict, make_valid_avro(alltypes[items], alltypes, found, union=union)) @@ -440,7 +442,7 @@ def deepcopy_strip(item): # type: (Any) -> Any """ if isinstance(item, dict): - return {k: deepcopy_strip(v) for k,v in item.iteritems()} + return {k: deepcopy_strip(v) for k,v in six.iteritems(item)} elif isinstance(item, list): return [deepcopy_strip(k) for k in item] else: diff --git a/schema_salad/sourceline.py b/schema_salad/sourceline.py index 6ab2a8838..b75998d91 100644 --- a/schema_salad/sourceline.py +++ b/schema_salad/sourceline.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import ruamel.yaml from ruamel.yaml.comments import CommentedBase, CommentedMap, CommentedSeq import re @@ -5,6 +6,7 @@ from typing import (Any, AnyStr, Callable, cast, Dict, List, Iterable, Tuple, TypeVar, Union, Text) +import six lineno_re = re.compile(u"^(.*?:[0-9]+:[0-9]+: )(( *)(.*))") @@ -15,7 +17,7 @@ def _add_lc_filename(r, source): # type: (ruamel.yaml.comments.CommentedBase, A for d in r: _add_lc_filename(d, source) elif isinstance(r, dict): - for d in r.itervalues(): + for d in six.itervalues(r): _add_lc_filename(d, source) def relname(source): # type: (AnyStr) -> AnyStr @@ -87,7 +89,7 @@ def cmap(d, lc=None, fn=None): # type: (Union[int, float, str, unicode, Dict, L if isinstance(d, CommentedMap): fn = d.lc.filename if hasattr(d.lc, "filename") else fn - for k,v in d.iteritems(): + for k,v in six.iteritems(d): if k in d.lc.data: d[k] = cmap(v, lc=d.lc.data[k], fn=fn) else: @@ -132,7 +134,7 @@ def cmap(d, lc=None, fn=None): # type: (Union[int, float, str, unicode, Dict, L return d class SourceLine(object): - def __init__(self, item, key=None, raise_type=unicode): # type: (Any, Any, Callable) -> None + def __init__(self, item, key=None, raise_type=six.text_type): # type: (Any, Any, Callable) -> None self.item = item self.key = key self.raise_type = raise_type @@ -147,7 +149,7 @@ def __exit__(self, ): # -> Any if not exc_value: return - raise self.makeError(unicode(exc_value)) + raise self.makeError(six.text_type(exc_value)) def makeLead(self): # type: () -> Text if self.key is None or self.item.lc.data is None or self.key not in self.item.lc.data: diff --git a/schema_salad/validate.py b/schema_salad/validate.py index 5452a4e99..977798a47 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import pprint import avro.schema from avro.schema import Schema @@ -8,6 +9,8 @@ from typing import Any, List, Set, Union from .sourceline import SourceLine, lineno_re, bullets, indent +import six +from six.moves import range _logger = logging.getLogger("salad") @@ -94,7 +97,7 @@ def validate_ex(expected_schema, # type: Schema else: return False elif schema_type == 'string': - if isinstance(datum, basestring): + if isinstance(datum, six.string_types): return True elif isinstance(datum, bytes): datum = datum.decode(u"utf-8") @@ -114,7 +117,7 @@ def validate_ex(expected_schema, # type: Schema else: return False elif schema_type == 'int': - if ((isinstance(datum, int) or isinstance(datum, long)) + if ((isinstance(datum, int) or isinstance(datum, int)) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE): return True else: @@ -123,7 +126,7 @@ def validate_ex(expected_schema, # type: Schema else: return False elif schema_type == 'long': - if ((isinstance(datum, int) or isinstance(datum, long)) + if ((isinstance(datum, int) or isinstance(datum, int)) and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE): return True else: @@ -133,7 +136,7 @@ def validate_ex(expected_schema, # type: Schema else: return False elif schema_type in ['float', 'double']: - if (isinstance(datum, int) or isinstance(datum, long) + if (isinstance(datum, int) or isinstance(datum, int) or isinstance(datum, float)): return True else: @@ -151,7 +154,7 @@ def validate_ex(expected_schema, # type: Schema raise ValidationException(u"'Any' type must be non-null") else: return False - if not isinstance(datum, basestring): + if not isinstance(datum, six.string_types): if raise_ex: raise ValidationException( u"value is a %s but expected a string" % (type(datum).__name__)) @@ -182,7 +185,7 @@ def validate_ex(expected_schema, # type: Schema except ValidationException as v: if raise_ex: raise sl.makeError( - unicode("item is invalid because\n%s" % (indent(str(v))))) + six.text_type("item is invalid because\n%s" % (indent(str(v))))) else: return False return True @@ -209,7 +212,7 @@ def validate_ex(expected_schema, # type: Schema continue elif isinstance(datum, dict) and not isinstance(s, avro.schema.RecordSchema): continue - elif isinstance(datum, (bool, int, long, float, basestring)) and isinstance(s, (avro.schema.ArraySchema, avro.schema.RecordSchema)): + elif isinstance(datum, (bool, int, int, float, six.string_types)) and isinstance(s, (avro.schema.ArraySchema, avro.schema.RecordSchema)): continue elif datum is not None and s.type == "null": continue @@ -224,7 +227,7 @@ def validate_ex(expected_schema, # type: Schema except ClassValidationException as e: raise except ValidationException as e: - errors.append(unicode(e)) + errors.append(six.text_type(e)) if bool(errors): raise ValidationException(bullets(["tried %s but\n%s" % (friendly( checked[i]), indent(errors[i])) for i in range(0, len(errors))], "- ")) @@ -272,7 +275,7 @@ def validate_ex(expected_schema, # type: Schema fieldval = None try: - sl = SourceLine(datum, f.name, unicode) + sl = SourceLine(datum, f.name, six.text_type) if not validate_ex(f.type, fieldval, identifiers, strict=strict, foreign_properties=foreign_properties, raise_ex=raise_ex, @@ -292,7 +295,7 @@ def validate_ex(expected_schema, # type: Schema if d == f.name: found = True if not found: - sl = SourceLine(datum, d, unicode) + sl = SourceLine(datum, d, six.text_type) if d not in identifiers and d not in foreign_properties and d[0] not in ("@", "$"): if (d not in identifiers and strict) and ( d not in foreign_properties and strict_foreign_properties) and not raise_ex: From e9bb3ec08ec0c4196ec887ddaaea6d75712527df Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 17 May 2017 18:57:19 +0530 Subject: [PATCH 063/116] mypy type annotations fix: replace unicode -> Text --- schema_salad/jsonld_context.py | 12 ++--- schema_salad/main.py | 6 +-- schema_salad/makedoc.py | 10 ++-- schema_salad/ref_resolver.py | 96 +++++++++++++++++----------------- schema_salad/schema.py | 44 ++++++++-------- schema_salad/sourceline.py | 2 +- schema_salad/validate.py | 14 ++--- 7 files changed, 92 insertions(+), 92 deletions(-) diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py index dc3c759a8..b68e61558 100755 --- a/schema_salad/jsonld_context.py +++ b/schema_salad/jsonld_context.py @@ -38,7 +38,7 @@ def pred(datatype, # type: Dict[str, Union[Dict, str]] # type: (...) -> Union[Dict, Text] split = urlparse.urlsplit(name) - vee = None # type: Optional[Union[str, unicode]] + vee = None # type: Optional[Union[str, Text]] if split.scheme != '': vee = name @@ -131,7 +131,7 @@ def process_type(t, # type: Dict[str, Any] _logger.debug("Processing field %s", i) v = pred(t, i, fieldname, context, defaultPrefix, - namespaces) # type: Union[Dict[Any, Any], unicode, None] + namespaces) # type: Union[Dict[Any, Any], Text, None] if isinstance(v, six.string_types): v = v if v[0] != "@" else None @@ -190,8 +190,8 @@ def salad_to_jsonld_context(j, schema_ctx): return (context, g) -def fix_jsonld_ids(obj, # type: Union[Dict[unicode, Any], List[Dict[unicode, Any]]] - ids # type: List[unicode] +def fix_jsonld_ids(obj, # type: Union[Dict[Text, Any], List[Dict[Text, Any]]] + ids # type: List[Text] ): # type: (...) -> None if isinstance(obj, dict): @@ -205,8 +205,8 @@ def fix_jsonld_ids(obj, # type: Union[Dict[unicode, Any], List[Dict[unicode, fix_jsonld_ids(entry, ids) -def makerdf(workflow, # type: Union[str, unicode] - wf, # type: Union[List[Dict[unicode, Any]], Dict[unicode, Any]] +def makerdf(workflow, # type: Union[str, Text] + wf, # type: Union[List[Dict[Text, Any]], Dict[Text, Any]] ctx, # type: ContextType graph=None # type: Graph ): diff --git a/schema_salad/main.py b/schema_salad/main.py index 20a7a13e2..ea432957f 100644 --- a/schema_salad/main.py +++ b/schema_salad/main.py @@ -10,7 +10,7 @@ import pkg_resources # part of setuptools -from typing import Any, Dict, List, Union +from typing import Any, Dict, List, Union, Text from rdflib import Graph, plugin from rdflib.serializer import Serializer @@ -30,8 +30,8 @@ def printrdf(workflow, # type: str - wf, # type: Union[List[Dict[unicode, Any]], Dict[unicode, Any]] - ctx, # type: Dict[unicode, Any] + wf, # type: Union[List[Dict[Text, Any]], Dict[Text, Any]] + ctx, # type: Dict[Text, Any] sr # type: str ): # type: (...) -> None diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index bebf5c6cf..3f2a0e840 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -58,7 +58,7 @@ def table(self, header, body): # type: (Text, Text) -> Text ) % (header, body) -def to_id(text): # type: (Union[str, unicode]) -> Union[str, unicode] +def to_id(text): # type: (Union[str, Text]) -> Union[str, Text] textid = text if text[0] in ("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"): try: @@ -229,7 +229,7 @@ def typefmt(self, nbsp=False, # type: bool jsonldPredicate=None # type: Optional[Dict[str, str]] ): - # type: (...) -> Union[str, unicode] + # type: (...) -> Union[str, Text] if isinstance(tp, list): if nbsp and len(tp) <= 3: return " | ".join([self.typefmt(n, redirects, jsonldPredicate=jsonldPredicate) for n in tp]) @@ -419,12 +419,12 @@ def extendsfrom(item, ex): def avrold_doc(j, outdoc, renderlist, redirects, brand, brandlink, primtype): - # type: (List[Dict[unicode, Any]], IO[Any], str, Dict, str, str, str) -> None + # type: (List[Dict[Text, Any]], IO[Any], str, Dict, str, str, str) -> None toc = ToC() toc.start_numbering = False rt = RenderType(toc, j, renderlist, redirects, primtype) - content = rt.typedoc.getvalue() # type: unicode + content = rt.typedoc.getvalue() # type: Text outdoc.write(""" @@ -512,7 +512,7 @@ def main(): # type: () -> None args = parser.parse_args() - s = [] # type: List[Dict[unicode, Any]] + s = [] # type: List[Dict[Text, Any]] a = args.schema with open(a) as f: if a.endswith("md"): diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index e105d151f..a58f22017 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -103,13 +103,13 @@ def SubLoader(loader): # type: (Loader) -> Loader skip_schemas=loader.skip_schemas) class Fetcher(object): - def fetch_text(self, url): # type: (unicode) -> unicode + def fetch_text(self, url): # type: (Text) -> Text raise NotImplementedError() - def check_exists(self, url): # type: (unicode) -> bool + def check_exists(self, url): # type: (Text) -> bool raise NotImplementedError() - def urljoin(self, base_url, url): # type: (unicode, unicode) -> unicode + def urljoin(self, base_url, url): # type: (Text, Text) -> Text raise NotImplementedError() @@ -122,7 +122,7 @@ def __init__(self, self.session = session def fetch_text(self, url): - # type: (unicode) -> unicode + # type: (Text) -> Text if url in self.cache: return self.cache[url] @@ -152,7 +152,7 @@ def fetch_text(self, url): else: raise ValueError('Unsupported scheme in url: %s' % url) - def check_exists(self, url): # type: (unicode) -> bool + def check_exists(self, url): # type: (Text) -> bool if url in self.cache: return True @@ -178,9 +178,9 @@ class Loader(object): def __init__(self, ctx, # type: ContextType schemagraph=None, # type: rdflib.graph.Graph - foreign_properties=None, # type: Set[unicode] - idx=None, # type: Dict[unicode, Union[CommentedMap, CommentedSeq, unicode, None]] - cache=None, # type: Dict[unicode, Any] + foreign_properties=None, # type: Set[Text] + idx=None, # type: Dict[Text, Union[CommentedMap, CommentedSeq, Text, None]] + cache=None, # type: Dict[Text, Any] session=None, # type: requests.sessions.Session fetcher_constructor=None, # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher] skip_schemas=None # type: bool @@ -239,29 +239,29 @@ def __init__(self, self.fetch_text = self.fetcher.fetch_text self.check_exists = self.fetcher.check_exists - self.url_fields = set() # type: Set[unicode] - self.scoped_ref_fields = {} # type: Dict[unicode, int] - self.vocab_fields = set() # type: Set[unicode] - self.identifiers = [] # type: List[unicode] - self.identity_links = set() # type: Set[unicode] - self.standalone = None # type: Optional[Set[unicode]] - self.nolinkcheck = set() # type: Set[unicode] - self.vocab = {} # type: Dict[unicode, unicode] - self.rvocab = {} # type: Dict[unicode, unicode] - self.idmap = {} # type: Dict[unicode, Any] - self.mapPredicate = {} # type: Dict[unicode, unicode] - self.type_dsl_fields = set() # type: Set[unicode] + self.url_fields = set() # type: Set[Text] + self.scoped_ref_fields = {} # type: Dict[Text, int] + self.vocab_fields = set() # type: Set[Text] + self.identifiers = [] # type: List[Text] + self.identity_links = set() # type: Set[Text] + self.standalone = None # type: Optional[Set[Text]] + self.nolinkcheck = set() # type: Set[Text] + self.vocab = {} # type: Dict[Text, Text] + self.rvocab = {} # type: Dict[Text, Text] + self.idmap = {} # type: Dict[Text, Any] + self.mapPredicate = {} # type: Dict[Text, Text] + self.type_dsl_fields = set() # type: Set[Text] self.add_context(ctx) def expand_url(self, - url, # type: unicode - base_url, # type: unicode + url, # type: Text + base_url, # type: Text scoped_id=False, # type: bool vocab_term=False, # type: bool scoped_ref=None # type: int ): - # type: (...) -> unicode + # type: (...) -> Text if url in (u"@id", u"@type"): return url @@ -298,7 +298,7 @@ def expand_url(self, else: return url - def _add_properties(self, s): # type: (unicode) -> None + def _add_properties(self, s): # type: (Text) -> None for _, _, rng in self.graph.triples((s, RDFS.range, None)): literal = ((six.text_type(rng).startswith( u"http://www.w3.org/2001/XMLSchema#") and @@ -309,11 +309,11 @@ def _add_properties(self, s): # type: (unicode) -> None self.url_fields.add(six.text_type(s)) self.foreign_properties.add(six.text_type(s)) - def add_namespaces(self, ns): # type: (Dict[unicode, unicode]) -> None + def add_namespaces(self, ns): # type: (Dict[Text, Text]) -> None self.vocab.update(ns) def add_schemas(self, ns, base_url): - # type: (Union[List[unicode], unicode], unicode) -> None + # type: (Union[List[Text], Text], Text) -> None if self.skip_schemas: return for sch in aslist(ns): @@ -348,7 +348,7 @@ def add_schemas(self, ns, base_url): self.idx[six.text_type(s)] = None def add_context(self, newcontext, baseuri=""): - # type: (ContextType, unicode) -> None + # type: (ContextType, Text) -> None if bool(self.vocab): raise validate.ValidationException( "Refreshing context that already has stuff in it") @@ -413,17 +413,17 @@ def add_context(self, newcontext, baseuri=""): _logger.debug("vocab is %s", self.vocab) def resolve_ref(self, - ref, # type: Union[CommentedMap, CommentedSeq, unicode] - base_url=None, # type: unicode + ref, # type: Union[CommentedMap, CommentedSeq, Text] + base_url=None, # type: Text checklinks=True # type: bool ): - # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, unicode, None], Dict[unicode, Any]] + # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, Text, None], Dict[Text, Any]] - lref = ref # type: Union[CommentedMap, CommentedSeq, unicode, None] + lref = ref # type: Union[CommentedMap, CommentedSeq, Text, None] obj = None # type: Optional[CommentedMap] - resolved_obj = None # type: Optional[Union[CommentedMap, CommentedSeq, unicode]] + resolved_obj = None # type: Optional[Union[CommentedMap, CommentedSeq, Text]] inc = False - mixin = None # type: Optional[Dict[unicode, Any]] + mixin = None # type: Optional[Dict[Text, Any]] if not base_url: base_url = file_uri(os.getcwd()) + "/" @@ -581,10 +581,10 @@ def _resolve_idmap(self, typeDSLregex = re.compile(ur"^([^[?]+)(\[\])?(\?)?$") def _type_dsl(self, - t, # type: Union[unicode, Dict, List] + t, # type: Union[Text, Dict, List] lc, filename): - # type: (...) -> Union[unicode, Dict[unicode, unicode], List[Union[unicode, Dict[unicode, unicode]]]] + # type: (...) -> Union[Text, Dict[Text, Text], List[Union[Text, Dict[Text, Text]]]] if not isinstance(t, (str, six.text_type)): return t @@ -627,7 +627,7 @@ def _resolve_type_dsl(self, t, datum.lc.data[n], document.lc.filename)) if isinstance(datum2, CommentedSeq): datum3 = CommentedSeq() - seen = [] # type: List[unicode] + seen = [] # type: List[Text] for i, item in enumerate(datum2): if isinstance(item, CommentedSeq): for j, v in enumerate(item): @@ -647,7 +647,7 @@ def _resolve_type_dsl(self, document[d] = datum2 def _resolve_identifier(self, document, loader, base_url): - # type: (CommentedMap, Loader, unicode) -> unicode + # type: (CommentedMap, Loader, Text) -> Text # Expand identifier field (usually 'id') to resolve scope for identifer in loader.identifiers: if identifer in document: @@ -666,7 +666,7 @@ def _resolve_identifier(self, document, loader, base_url): return base_url def _resolve_identity(self, document, loader, base_url): - # type: (Dict[unicode, List[unicode]], Loader, unicode) -> None + # type: (Dict[Text, List[Text]], Loader, Text) -> None # Resolve scope for identity fields (fields where the value is the # identity of a standalone node, such as enum symbols) for identifer in loader.identity_links: @@ -680,7 +680,7 @@ def _resolve_identity(self, document, loader, base_url): n]] = document[identifer][n] def _normalize_fields(self, document, loader): - # type: (Dict[unicode, unicode], Loader) -> None + # type: (Dict[Text, Text], Loader) -> None # Normalize fields which are prefixed or full URIn to vocabulary terms for d in document: d2 = loader.expand_url(d, u"", scoped_id=False, vocab_term=True) @@ -689,9 +689,9 @@ def _normalize_fields(self, document, loader): del document[d] def _resolve_uris(self, - document, # type: Dict[unicode, Union[unicode, List[unicode]]] + document, # type: Dict[Text, Union[Text, List[Text]]] loader, # type: Loader - base_url # type: unicode + base_url # type: Text ): # type: (...) -> None # Resolve remaining URLs based on document base @@ -714,11 +714,11 @@ def _resolve_uris(self, def resolve_all(self, document, # type: Union[CommentedMap, CommentedSeq] - base_url, # type: unicode - file_base=None, # type: unicode + base_url, # type: Text + file_base=None, # type: Text checklinks=True # type: bool ): - # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, unicode, None], Dict[unicode, Any]] + # type: (...) -> Tuple[Union[CommentedMap, CommentedSeq, Text, None], Dict[Text, Any]] loader = self metadata = CommentedMap() # type: CommentedMap if file_base is None: @@ -840,7 +840,7 @@ def resolve_all(self, return document, metadata - def fetch(self, url, inject_ids=True): # type: (unicode, bool) -> Any + def fetch(self, url, inject_ids=True): # type: (Text, bool) -> Any if url in self.idx: return self.idx[url] try: @@ -868,7 +868,7 @@ def fetch(self, url, inject_ids=True): # type: (unicode, bool) -> Any FieldType = TypeVar('FieldType', six.text_type, CommentedSeq, CommentedMap) def validate_scoped(self, field, link, docid): - # type: (unicode, unicode, unicode) -> unicode + # type: (Text, Text, Text) -> Text split = urlparse.urlsplit(docid) sp = split.fragment.split(u"/") n = self.scoped_ref_fields[field] @@ -892,7 +892,7 @@ def validate_scoped(self, field, link, docid): "Field `%s` references unknown identifier `%s`, tried %s" % (field, link, ", ".join(tried))) def validate_link(self, field, link, docid, all_doc_ids): - # type: (unicode, FieldType, unicode, Dict[Text, Text]) -> FieldType + # type: (Text, FieldType, Text, Dict[Text, Text]) -> FieldType if field in self.nolinkcheck: return link if isinstance(link, (str, six.text_type)): @@ -939,7 +939,7 @@ def getid(self, d): # type: (Any) -> Optional[Text] return None def validate_links(self, document, base_url, all_doc_ids): - # type: (Union[CommentedMap, CommentedSeq, unicode, None], unicode, Dict[Text, Text]) -> None + # type: (Union[CommentedMap, CommentedSeq, Text, None], Text, Dict[Text, Text]) -> None docid = self.getid(document) if not docid: docid = base_url diff --git a/schema_salad/schema.py b/schema_salad/schema.py index 6724d83ac..e2812501b 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -20,7 +20,7 @@ import logging from . import jsonld_context from .sourceline import SourceLine, strip_dup_lineno, add_lc_filename, bullets, relname -from typing import cast, Any, AnyStr, Dict, List, Set, Tuple, TypeVar, Union +from typing import cast, Any, AnyStr, Dict, List, Set, Tuple, TypeVar, Union, Text from ruamel.yaml.comments import CommentedSeq, CommentedMap _logger = logging.getLogger("salad") @@ -57,7 +57,7 @@ def get_metaschema(): - # type: () -> Tuple[Names, List[Dict[unicode, Any]], Loader] + # type: () -> Tuple[Names, List[Dict[Text, Any]], Loader] loader = ref_resolver.Loader({ "Any": "https://w3id.org/cwl/salad#Any", "ArraySchema": "https://w3id.org/cwl/salad#ArraySchema", @@ -186,10 +186,10 @@ def get_metaschema(): return (sch_names, j, loader) -def load_schema(schema_ref, # type: Union[CommentedMap, CommentedSeq, unicode] +def load_schema(schema_ref, # type: Union[CommentedMap, CommentedSeq, Text] cache=None # type: Dict ): - # type: (...) -> Tuple[Loader, Union[Names, SchemaParseException], Dict[unicode, Any], Loader] + # type: (...) -> Tuple[Loader, Union[Names, SchemaParseException], Dict[Text, Any], Loader] """Load a schema that can be used to validate documents using load_and_validate. return document_loader, avsc_names, schema_metadata, metaschema_loader""" @@ -220,10 +220,10 @@ def load_schema(schema_ref, # type: Union[CommentedMap, CommentedSeq, unicode] def load_and_validate(document_loader, # type: Loader avsc_names, # type: Names - document, # type: Union[CommentedMap, unicode] + document, # type: Union[CommentedMap, Text] strict # type: bool ): - # type: (...) -> Tuple[Any, Dict[unicode, Any]] + # type: (...) -> Tuple[Any, Dict[Text, Any]] """Load a document and validate it with the provided schema. return data, metadata @@ -258,7 +258,7 @@ def load_and_validate(document_loader, # type: Loader def validate_doc(schema_names, # type: Names - doc, # type: Union[Dict[unicode, Any], List[Dict[unicode, Any]], unicode, None] + doc, # type: Union[Dict[Text, Any], List[Dict[Text, Any]], Text, None] loader, # type: Loader strict, # type: bool source=None @@ -302,7 +302,7 @@ def validate_doc(schema_names, # type: Names break if not success: - errors = [] # type: List[unicode] + errors = [] # type: List[Text] for r in roots: if hasattr(r, "get_prop"): name = r.get_prop(u"name") @@ -337,7 +337,7 @@ def validate_doc(schema_names, # type: Names def replace_type(items, spec, loader, found): - # type: (Any, Dict[unicode, Any], Loader, Set[unicode]) -> Any + # type: (Any, Dict[Text, Any], Loader, Set[Text]) -> Any """ Go through and replace types in the 'spec' mapping""" if isinstance(items, dict): @@ -391,11 +391,11 @@ def avro_name(url): # type: (AnyStr) -> AnyStr def make_valid_avro(items, # type: Avro - alltypes, # type: Dict[unicode, Dict[unicode, Any]] - found, # type: Set[unicode] + alltypes, # type: Dict[Text, Dict[Text, Any]] + found, # type: Set[Text] union=False # type: bool ): - # type: (...) -> Union[Avro, Dict, unicode] + # type: (...) -> Union[Avro, Dict, Text] if isinstance(items, dict): items = copy.copy(items) if items.get("name"): @@ -449,23 +449,23 @@ def deepcopy_strip(item): # type: (Any) -> Any return item def extend_and_specialize(items, loader): - # type: (List[Dict[unicode, Any]], Loader) -> List[Dict[unicode, Any]] + # type: (List[Dict[Text, Any]], Loader) -> List[Dict[Text, Any]] """Apply 'extend' and 'specialize' to fully materialize derived record types.""" items = deepcopy_strip(items) - types = {t["name"]: t for t in items} # type: Dict[unicode, Any] + types = {t["name"]: t for t in items} # type: Dict[Text, Any] n = [] for t in items: if "extends" in t: - spec = {} # type: Dict[unicode, unicode] + spec = {} # type: Dict[Text, Text] if "specialize" in t: for sp in aslist(t["specialize"]): spec[sp["specializeFrom"]] = sp["specializeTo"] - exfields = [] # type: List[unicode] - exsym = [] # type: List[unicode] + exfields = [] # type: List[Text] + exsym = [] # type: List[Text] for ex in aslist(t["extends"]): if ex not in types: raise Exception("Extends %s in %s refers to invalid base type" % ( @@ -491,7 +491,7 @@ def extend_and_specialize(items, loader): exfields.extend(t.get("fields", [])) t["fields"] = exfields - fieldnames = set() # type: Set[unicode] + fieldnames = set() # type: Set[Text] for field in t["fields"]: if field["name"] in fieldnames: raise validate.ValidationException( @@ -511,7 +511,7 @@ def extend_and_specialize(items, loader): for t in n: ex_types[t["name"]] = t - extended_by = {} # type: Dict[unicode, unicode] + extended_by = {} # type: Dict[Text, Text] for t in n: if "extends" in t: for ex in aslist(t["extends"]): @@ -530,15 +530,15 @@ def extend_and_specialize(items, loader): return n -def make_avro_schema(i, # type: List[Dict[unicode, Any]] +def make_avro_schema(i, # type: List[Dict[Text, Any]] loader # type: Loader ): - # type: (...) -> Tuple[Union[Names, SchemaParseException], List[Dict[unicode, Any]]] + # type: (...) -> Tuple[Union[Names, SchemaParseException], List[Dict[Text, Any]]] names = avro.schema.Names() j = extend_and_specialize(i, loader) - name_dict = {} # type: Dict[unicode, Dict[unicode, Any]] + name_dict = {} # type: Dict[Text, Dict[Text, Any]] for t in j: name_dict[t["name"]] = t j2 = make_valid_avro(j, name_dict, set()) diff --git a/schema_salad/sourceline.py b/schema_salad/sourceline.py index b75998d91..aed31c7c6 100644 --- a/schema_salad/sourceline.py +++ b/schema_salad/sourceline.py @@ -81,7 +81,7 @@ def strip_dup_lineno(text, maxline=None): # type: (Text, int) -> Text msg.append(" " * len(g.group(1)) + g2) return "\n".join(msg) -def cmap(d, lc=None, fn=None): # type: (Union[int, float, str, unicode, Dict, List], List[int], unicode) -> Union[int, float, str, unicode, CommentedMap, CommentedSeq] +def cmap(d, lc=None, fn=None): # type: (Union[int, float, str, Text, Dict, List], List[int], Text) -> Union[int, float, str, Text, CommentedMap, CommentedSeq] if lc is None: lc = [0, 0, 0, 0] if fn is None: diff --git a/schema_salad/validate.py b/schema_salad/validate.py index 977798a47..65651afbd 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -7,7 +7,7 @@ import re import logging -from typing import Any, List, Set, Union +from typing import Any, List, Set, Union, Text from .sourceline import SourceLine, lineno_re, bullets, indent import six from six.moves import range @@ -24,9 +24,9 @@ class ClassValidationException(ValidationException): def validate(expected_schema, # type: Schema datum, # type: Any - identifiers=[], # type: List[unicode] + identifiers=[], # type: List[Text] strict=False, # type: bool - foreign_properties=set() # type: Set[unicode] + foreign_properties=set() # type: Set[Text] ): # type: (...) -> bool return validate_ex( @@ -62,9 +62,9 @@ def vpformat(datum): # type: (Any) -> str def validate_ex(expected_schema, # type: Schema datum, # type: Any - identifiers=None, # type: List[unicode] + identifiers=None, # type: List[Text] strict=False, # type: bool - foreign_properties=None, # type: Set[unicode] + foreign_properties=None, # type: Set[Text] raise_ex=True, # type: bool strict_foreign_properties=False, # type: bool logger=_logger # type: logging.Logger @@ -205,14 +205,14 @@ def validate_ex(expected_schema, # type: Schema if not raise_ex: return False - errors = [] # type: List[unicode] + errors = [] # type: List[Text] checked = [] for s in expected_schema.schemas: if isinstance(datum, list) and not isinstance(s, avro.schema.ArraySchema): continue elif isinstance(datum, dict) and not isinstance(s, avro.schema.RecordSchema): continue - elif isinstance(datum, (bool, int, int, float, six.string_types)) and isinstance(s, (avro.schema.ArraySchema, avro.schema.RecordSchema)): + elif isinstance(datum, (bool, int, int, float, int)) and isinstance(s, (avro.schema.ArraySchema, avro.schema.RecordSchema)): continue elif datum is not None and s.type == "null": continue From df685ea14da47ea04b5758e8cd7436362bde0b3c Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 18 May 2017 01:24:52 +0530 Subject: [PATCH 064/116] modernize tests --- schema_salad/tests/test_cli_args.py | 1 + schema_salad/tests/test_errors.py | 7 +++++-- schema_salad/tests/test_examples.py | 10 ++++++---- schema_salad/tests/test_fetch.py | 4 +++- schema_salad/tests/test_ref_resolver.py | 1 + schema_salad/tests/util.py | 1 + 6 files changed, 17 insertions(+), 7 deletions(-) diff --git a/schema_salad/tests/test_cli_args.py b/schema_salad/tests/test_cli_args.py index a1d2f0e75..609456733 100644 --- a/schema_salad/tests/test_cli_args.py +++ b/schema_salad/tests/test_cli_args.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import import unittest import sys diff --git a/schema_salad/tests/test_errors.py b/schema_salad/tests/test_errors.py index be0bfc530..7a4d39752 100644 --- a/schema_salad/tests/test_errors.py +++ b/schema_salad/tests/test_errors.py @@ -1,9 +1,12 @@ +from __future__ import absolute_import +from __future__ import print_function from .util import get_data import unittest from typing import cast from schema_salad.schema import load_schema, load_and_validate from schema_salad.validate import ValidationException from avro.schema import Names +import six class TestErrors(unittest.TestCase): def test_errors(self): @@ -28,7 +31,7 @@ def test_errors(self): with self.assertRaises(ValidationException): try: load_and_validate(document_loader, avsc_names, - unicode(get_data("tests/"+t)), True) + six.text_type(get_data("tests/"+t)), True) except ValidationException as e: - print("\n", e) + print(("\n", e)) raise diff --git a/schema_salad/tests/test_examples.py b/schema_salad/tests/test_examples.py index adc04fcc6..440f04cdc 100644 --- a/schema_salad/tests/test_examples.py +++ b/schema_salad/tests/test_examples.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import +from __future__ import print_function from .util import get_data import unittest import schema_salad.ref_resolver @@ -285,7 +287,7 @@ def test_scoped_id(self): }, ra) g = makerdf(None, ra, ctx) - print(g.serialize(format="n3")) + print((g.serialize(format="n3"))) ra, _ = ldr.resolve_all(cmap({ "location": "foo", @@ -299,7 +301,7 @@ def test_scoped_id(self): }, ra) g = makerdf(None, ra, ctx) - print(g.serialize(format="n3")) + print((g.serialize(format="n3"))) ra, _ = ldr.resolve_all(cmap({ "id": "foo", @@ -313,7 +315,7 @@ def test_scoped_id(self): }, ra) g = makerdf(None, ra, ctx) - print(g.serialize(format="n3")) + print((g.serialize(format="n3"))) ra, _ = ldr.resolve_all(cmap({ "location": "foo", @@ -327,7 +329,7 @@ def test_scoped_id(self): }, ra) g = makerdf(None, ra, ctx) - print(g.serialize(format="n3")) + print((g.serialize(format="n3"))) def test_mixin(self): base_url = "file://" + os.getcwd() + "/tests/" diff --git a/schema_salad/tests/test_fetch.py b/schema_salad/tests/test_fetch.py index 6dc28d0bb..460d93f34 100644 --- a/schema_salad/tests/test_fetch.py +++ b/schema_salad/tests/test_fetch.py @@ -1,3 +1,5 @@ +from __future__ import absolute_import +from __future__ import print_function import unittest import schema_salad.ref_resolver import schema_salad.main @@ -52,6 +54,6 @@ def test_cache(self): loader = schema_salad.ref_resolver.Loader({}) foo = "file://%s/foo.txt" % os.getcwd() loader.cache.update({foo: "hello: foo"}) - print(loader.cache) + print((loader.cache)) self.assertEqual({"hello": "foo"}, loader.resolve_ref("foo.txt")[0]) self.assertTrue(loader.check_exists(foo)) diff --git a/schema_salad/tests/test_ref_resolver.py b/schema_salad/tests/test_ref_resolver.py index 659d830fd..fdb5e61a7 100644 --- a/schema_salad/tests/test_ref_resolver.py +++ b/schema_salad/tests/test_ref_resolver.py @@ -1,5 +1,6 @@ """Test the ref_resolver module.""" +from __future__ import absolute_import import shutil import tempfile diff --git a/schema_salad/tests/util.py b/schema_salad/tests/util.py index 6353adda4..0f71eced9 100644 --- a/schema_salad/tests/util.py +++ b/schema_salad/tests/util.py @@ -1,3 +1,4 @@ +from __future__ import absolute_import from pkg_resources import Requirement, resource_filename, ResolutionError # type: ignore from typing import Optional, Text import os From b4273f288fbdedd1bfdbd9abd76135150064d3c6 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 18 May 2017 14:11:28 +0530 Subject: [PATCH 065/116] fix urlparse using six in tests and schema_salad core; no automated fixers --- schema_salad/jsonld_context.py | 9 +++++---- schema_salad/main.py | 8 +++++--- schema_salad/makedoc.py | 23 ++++++++++++---------- schema_salad/ref_resolver.py | 33 +++++++++++++++++--------------- schema_salad/schema.py | 7 ++++--- schema_salad/tests/test_fetch.py | 9 +++++---- schema_salad/validate.py | 5 +++-- 7 files changed, 53 insertions(+), 41 deletions(-) diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py index b68e61558..cfc769e29 100755 --- a/schema_salad/jsonld_context.py +++ b/schema_salad/jsonld_context.py @@ -4,6 +4,8 @@ import json import ruamel.yaml as yaml import six +# import urlparse +from six.moves.urllib import parse try: from ruamel.yaml import CSafeLoader as SafeLoader except ImportError: @@ -18,7 +20,6 @@ from rdflib import Graph, URIRef import rdflib.namespace from rdflib.namespace import RDF, RDFS -import urlparse import logging from schema_salad.utils import aslist from typing import (cast, Any, Dict, Iterable, List, Optional, Text, Tuple, @@ -36,7 +37,7 @@ def pred(datatype, # type: Dict[str, Union[Dict, str]] namespaces # type: Dict[str, rdflib.namespace.Namespace] ): # type: (...) -> Union[Dict, Text] - split = urlparse.urlsplit(name) + split = parse.urlsplit(name) vee = None # type: Optional[Union[str, Text]] @@ -104,7 +105,7 @@ def process_type(t, # type: Dict[str, Any] classnode = URIRef(recordname) g.add((classnode, RDF.type, RDFS.Class)) - split = urlparse.urlsplit(recordname) + split = parse.urlsplit(recordname) predicate = recordname if t.get("inVocab", True): if split.scheme: @@ -220,7 +221,7 @@ def makerdf(workflow, # type: Union[str, Text] url = v if url == "@id": idfields.append(k) - doc_url, frg = urlparse.urldefrag(url) + doc_url, frg = parse.urldefrag(url) if "/" in frg: p = frg.split("/")[0] prefixes[p] = u"%s#%s/" % (doc_url, p) diff --git a/schema_salad/main.py b/schema_salad/main.py index ea432957f..530877228 100644 --- a/schema_salad/main.py +++ b/schema_salad/main.py @@ -6,7 +6,9 @@ import traceback import json import os -import urlparse +# import urlparse +from six.moves.urllib import parse + import pkg_resources # part of setuptools @@ -110,7 +112,7 @@ def main(argsl=None): # type: (List[str]) -> int # Load schema document and resolve refs schema_uri = args.schema - if not urlparse.urlparse(schema_uri)[0]: + if not parse.urlparse(schema_uri)[0]: schema_uri = "file://" + os.path.abspath(schema_uri) schema_raw_doc = metaschema_loader.fetch(schema_uri) @@ -207,7 +209,7 @@ def main(argsl=None): # type: (List[str]) -> int # Load target document and resolve refs try: uri = args.document - if not urlparse.urlparse(uri)[0]: + if not parse.urlparse(uri)[0]: doc = "file://" + os.path.abspath(uri) document, doc_metadata = document_loader.resolve_ref(uri) except (validate.ValidationException, RuntimeError) as e: diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index 3f2a0e840..01af98af5 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -1,6 +1,7 @@ from __future__ import absolute_import + import mistune -from . import schema +import argparse import json import os import copy @@ -8,13 +9,15 @@ import sys from StringIO import StringIO import logging -import urlparse + +from . import schema from schema_salad.utils import add_dictlist, aslist -import re -import argparse -from typing import cast, Any, Dict, IO, List, Optional, Set, Text, Union + import six from six.moves import range +from six.moves.urllib import parse + +from typing import cast, Any, Dict, IO, List, Optional, Set, Text, Union _logger = logging.getLogger("salad") @@ -38,7 +41,7 @@ def has_types(items): # type: (Any) -> List[basestring] def linkto(item): # type: (Text) -> Text - _, frg = urlparse.urldefrag(item) + _, frg = parse.urldefrag(item) return "[%s](#%s)" % (frg, to_id(frg)) @@ -206,8 +209,8 @@ def __init__(self, toc, j, renderlist, redirects, primitiveType): if tp not in self.uses: self.uses[tp] = [] if (t["name"], f["name"]) not in self.uses[tp]: - _, frg1 = urlparse.urldefrag(t["name"]) - _, frg2 = urlparse.urldefrag(f["name"]) + _, frg1 = parse.urldefrag(t["name"]) + _, frg2 = parse.urldefrag(f["name"]) self.uses[tp].append((frg1, frg2)) if tp not in basicTypes and tp not in self.record_refs[t["name"]]: self.record_refs[t["name"]].append(tp) @@ -268,7 +271,7 @@ def typefmt(self, elif str(tp) in basicTypes: return """%s""" % (self.primitiveType, schema.avro_name(str(tp))) else: - _, frg = urlparse.urldefrag(tp) + _, frg = parse.urldefrag(tp) if frg is not '': tp = frg return """%s""" % (to_id(tp), tp) @@ -327,7 +330,7 @@ def extendsfrom(item, ex): lines.append(l) f["doc"] = "\n".join(lines) - _, frg = urlparse.urldefrag(f["name"]) + _, frg = parse.urldefrag(f["name"]) num = self.toc.add_entry(depth, frg) doc = "%s %s %s\n" % (("#" * depth), num, frg) else: diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index a58f22017..4ac5dad46 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -5,11 +5,15 @@ import hashlib import logging import collections + +import six +from six.moves import range import urllib -import urlparse +# import urlparse +from six.moves.urllib import parse + import re import copy -import urllib from StringIO import StringIO from . import validate @@ -29,8 +33,7 @@ import xml.sax from typing import (cast, Any, AnyStr, Callable, Dict, List, Iterable, Optional, Set, Text, Tuple, TypeVar, Union) -import six -from six.moves import range + _logger = logging.getLogger("salad") ContextType = Dict[six.text_type, Union[Dict, six.text_type, Iterable[six.text_type]]] @@ -54,7 +57,7 @@ def file_uri(path, split_frag=False): # type: (str, bool) -> str return "file://%s%s" % (urlpath, frag) def uri_file_path(url): # type: (str) -> str - split = urlparse.urlsplit(url) + split = parse.urlsplit(url) if split.scheme == "file": return urllib.url2pathname( str(split.path)) + ("#" + urllib.unquote(str(split.fragment)) @@ -126,7 +129,7 @@ def fetch_text(self, url): if url in self.cache: return self.cache[url] - split = urlparse.urlsplit(url) + split = parse.urlsplit(url) scheme, path = split.scheme, split.path if scheme in [u'http', u'https'] and self.session is not None: @@ -156,7 +159,7 @@ def check_exists(self, url): # type: (Text) -> bool if url in self.cache: return True - split = urlparse.urlsplit(url) + split = parse.urlsplit(url) scheme, path = split.scheme, split.path if scheme in [u'http', u'https'] and self.session is not None: @@ -172,7 +175,7 @@ def check_exists(self, url): # type: (Text) -> bool raise ValueError('Unsupported scheme in url: %s' % url) def urljoin(self, base_url, url): # type: (Text, Text) -> Text - return urlparse.urljoin(base_url, url) + return parse.urljoin(base_url, url) class Loader(object): def __init__(self, @@ -187,7 +190,7 @@ def __init__(self, ): # type: (...) -> None - normalize = lambda url: urlparse.urlsplit(url).geturl() + normalize = lambda url: parse.urlsplit(url).geturl() if idx is not None: self.idx = idx else: @@ -273,20 +276,20 @@ def expand_url(self, if prefix in self.vocab: url = self.vocab[prefix] + url[len(prefix) + 1:] - split = urlparse.urlsplit(url) + split = parse.urlsplit(url) if (bool(split.scheme) or url.startswith(u"$(") or url.startswith(u"${")): pass elif scoped_id and not bool(split.fragment): - splitbase = urlparse.urlsplit(base_url) + splitbase = parse.urlsplit(base_url) frg = u"" if bool(splitbase.fragment): frg = splitbase.fragment + u"/" + split.path else: frg = split.path pt = splitbase.path if splitbase.path != '' else "/" - url = urlparse.urlunsplit( + url = parse.urlunsplit( (splitbase.scheme, splitbase.netloc, pt, splitbase.query, frg)) elif scoped_ref is not None and not split.fragment: pass @@ -493,7 +496,7 @@ def resolve_ref(self, doc_url = url else: # Load structured document - doc_url, frg = urlparse.urldefrag(url) + doc_url, frg = parse.urldefrag(url) if doc_url in self.idx and (not mixin): # If the base document is in the index, it was already loaded, # so if we didn't find the reference earlier then it must not @@ -869,7 +872,7 @@ def fetch(self, url, inject_ids=True): # type: (Text, bool) -> Any def validate_scoped(self, field, link, docid): # type: (Text, Text, Text) -> Text - split = urlparse.urlsplit(docid) + split = parse.urlsplit(docid) sp = split.fragment.split(u"/") n = self.scoped_ref_fields[field] while n > 0 and len(sp) > 0: @@ -878,7 +881,7 @@ def validate_scoped(self, field, link, docid): tried = [] while True: sp.append(link) - url = urlparse.urlunsplit(( + url = parse.urlunsplit(( split.scheme, split.netloc, split.path, split.query, u"/".join(sp))) tried.append(url) diff --git a/schema_salad/schema.py b/schema_salad/schema.py index e2812501b..897269741 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -9,9 +9,10 @@ import avro.schema from . import validate import json -import urlparse -import os +# import urlparse +from six.moves.urllib import parse import six +import os AvroSchemaFromJSONData = avro.schema.make_avsc_object # AvroSchemaFromJSONData=avro.schema.SchemaFromJSONData from avro.schema import Names, SchemaParseException @@ -378,7 +379,7 @@ def replace_type(items, spec, loader, found): def avro_name(url): # type: (AnyStr) -> AnyStr - doc_url, frg = urlparse.urldefrag(url) + doc_url, frg = parse.urldefrag(url) if frg != '': if '/' in frg: return frg[frg.rindex('/') + 1:] diff --git a/schema_salad/tests/test_fetch.py b/schema_salad/tests/test_fetch.py index 460d93f34..a318d3b95 100644 --- a/schema_salad/tests/test_fetch.py +++ b/schema_salad/tests/test_fetch.py @@ -9,7 +9,8 @@ import ruamel.yaml as yaml import json import os -import urlparse +# import urlparse +from six.moves.urllib import parse class TestFetcher(unittest.TestCase): def test_fetcher(self): @@ -32,14 +33,14 @@ def check_exists(self, url): # type: (unicode) -> bool return False def urljoin(self, base, url): - urlsp = urlparse.urlsplit(url) + urlsp = parse.urlsplit(url) if urlsp.scheme: return url - basesp = urlparse.urlsplit(base) + basesp = parse.urlsplit(base) if basesp.scheme == "keep": return base + "/" + url - return urlparse.urljoin(base, url) + return parse.urljoin(base, url) loader = schema_salad.ref_resolver.Loader({}, fetcher_constructor=TestFetcher) self.assertEqual({"hello": "foo"}, loader.resolve_ref("foo.txt")[0]) diff --git a/schema_salad/validate.py b/schema_salad/validate.py index 65651afbd..fb41a13e5 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -3,7 +3,8 @@ import avro.schema from avro.schema import Schema import sys -import urlparse +# import urlparse +from six.moves.urllib import parse import re import logging @@ -300,7 +301,7 @@ def validate_ex(expected_schema, # type: Schema if (d not in identifiers and strict) and ( d not in foreign_properties and strict_foreign_properties) and not raise_ex: return False - split = urlparse.urlsplit(d) + split = parse.urlsplit(d) if split.scheme: err = sl.makeError(u"unrecognized extension field `%s`%s." " Did you include " From e795ac9c03aa2563140848e0d612de8a577c47cb Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 18 May 2017 15:14:30 +0530 Subject: [PATCH 066/116] fix: make regex command python3 compatible --- schema_salad/ref_resolver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 4ac5dad46..bc2e691d4 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -581,7 +581,7 @@ def _resolve_idmap(self, document[idmapField] = ls - typeDSLregex = re.compile(ur"^([^[?]+)(\[\])?(\?)?$") + typeDSLregex = re.compile(u"^([^[?]+)(\[\])?(\?)?$") def _type_dsl(self, t, # type: Union[Text, Dict, List] From 3704c9880d6df449837badcc52cdc159fb3be6db Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 18 May 2017 15:32:26 +0530 Subject: [PATCH 067/116] py3 fix: use six StringIO import --- schema_salad/makedoc.py | 3 ++- schema_salad/ref_resolver.py | 5 +++-- schema_salad/tests/test_cli_args.py | 4 ++-- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index 01af98af5..cde11ee03 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -7,7 +7,7 @@ import copy import re import sys -from StringIO import StringIO + import logging from . import schema @@ -16,6 +16,7 @@ import six from six.moves import range from six.moves.urllib import parse +from six import StringIO from typing import cast, Any, Dict, IO, List, Optional, Set, Text, Union diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index bc2e691d4..cf5ad8d4a 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -9,12 +9,13 @@ import six from six.moves import range import urllib -# import urlparse + from six.moves.urllib import parse +from six import StringIO import re import copy -from StringIO import StringIO + from . import validate from schema_salad.utils import aslist, flatten diff --git a/schema_salad/tests/test_cli_args.py b/schema_salad/tests/test_cli_args.py index 609456733..16b7e5cfa 100644 --- a/schema_salad/tests/test_cli_args.py +++ b/schema_salad/tests/test_cli_args.py @@ -4,9 +4,9 @@ import schema_salad.main as cli_parser -""" for capturing print() output """ +# for capturing print() output from contextlib import contextmanager -from StringIO import StringIO +from six import StringIO @contextmanager def captured_output(): From cec021360be6fdc32e0b3e6de56b772f38388a34 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 18 May 2017 15:51:57 +0530 Subject: [PATCH 068/116] setup.py: install different version of avro in case of py3 runtime --- setup.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/setup.py b/setup.py index 25b4042e0..48d79e9d1 100755 --- a/setup.py +++ b/setup.py @@ -38,13 +38,13 @@ 'CacheControl >= 0.11.7, < 0.12', 'lockfile >= 0.9'] -install_requires.append("avro") # TODO: remove me once cwltool is +# install_requires.append("avro") # TODO: remove me once cwltool is # available in Debian Stable, Ubuntu 12.04 LTS -# extras_require={ # TODO: uncomment me, same conditions as above -# ':python_version<"3"': ['avro'], -# ':python_version>="3"': ['avro-python3']} -extras_require = {} # TODO: to be removed when the above is added +extras_require={ # TODO: uncomment me, same conditions as above + ':python_version<"3"': ['avro'], + ':python_version>="3"': ['avro-python3']} +# extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', version='2.5', From 9108c7ba6439bc6e8bc34bd66dbab108d8e53ab8 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 18 May 2017 16:27:32 +0530 Subject: [PATCH 069/116] fix: minor: py3 compatibility wrt avro --- schema_salad/schema.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/schema_salad/schema.py b/schema_salad/schema.py index 897269741..5620d54b2 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -13,8 +13,13 @@ from six.moves.urllib import parse import six import os -AvroSchemaFromJSONData = avro.schema.make_avsc_object -# AvroSchemaFromJSONData=avro.schema.SchemaFromJSONData + +if six.PY3: + AvroSchemaFromJSONData = avro.schema.SchemaFromJSONData +else: + AvroSchemaFromJSONData = avro.schema.make_avsc_object + + from avro.schema import Names, SchemaParseException from . import ref_resolver from .ref_resolver import Loader, DocumentType From 916aaa124019d70887060c0804a32fbb613481f5 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 18 May 2017 16:41:41 +0530 Subject: [PATCH 070/116] py3: use six.moves.urllib instead of urllib --- schema_salad/ref_resolver.py | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index cf5ad8d4a..f7e2d1c2f 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -8,9 +8,8 @@ import six from six.moves import range -import urllib - from six.moves.urllib import parse +from six.moves import urllib from six import StringIO import re @@ -47,10 +46,10 @@ def file_uri(path, split_frag=False): # type: (str, bool) -> str return path if split_frag: pathsp = path.split("#", 2) - frag = "#" + urllib.quote(str(pathsp[1])) if len(pathsp) == 2 else "" - urlpath = urllib.pathname2url(str(pathsp[0])) + frag = "#" + urllib.parse.quote(str(pathsp[1])) if len(pathsp) == 2 else "" + urlpath = urllib.request.pathname2url(str(pathsp[0])) else: - urlpath = urllib.pathname2url(path) + urlpath = urllib.request.pathname2url(path) frag = "" if urlpath.startswith("//"): return "file:%s%s" % (urlpath, frag) @@ -60,8 +59,8 @@ def file_uri(path, split_frag=False): # type: (str, bool) -> str def uri_file_path(url): # type: (str) -> str split = parse.urlsplit(url) if split.scheme == "file": - return urllib.url2pathname( - str(split.path)) + ("#" + urllib.unquote(str(split.fragment)) + return urllib.request.url2pathname( + str(split.path)) + ("#" + urllib.parse.unquote(str(split.fragment)) if bool(split.fragment) else "") else: raise ValueError("Not a file URI") @@ -142,7 +141,7 @@ def fetch_text(self, url): return resp.text elif scheme == 'file': try: - with open(urllib.url2pathname(str(path))) as fp: + with open(urllib.request.url2pathname(str(path))) as fp: read = fp.read() if hasattr(read, "decode"): return read.decode("utf-8") @@ -171,7 +170,7 @@ def check_exists(self, url): # type: (Text) -> bool return False return True elif scheme == 'file': - return os.path.exists(urllib.url2pathname(str(path))) + return os.path.exists(urllib.request.url2pathname(str(path))) else: raise ValueError('Unsupported scheme in url: %s' % url) From 4d3c6deddf5f9288c7d7b4ef1def8cf1df0ab2a5 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 18 May 2017 18:08:39 +0530 Subject: [PATCH 071/116] fix: use list of dict keys to iterate, fix flake8 warnings - py3 compatibility --- schema_salad/ref_resolver.py | 2 +- schema_salad/schema.py | 7 +++---- setup.py | 7 ++++--- 3 files changed, 8 insertions(+), 8 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index f7e2d1c2f..512d19216 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -685,7 +685,7 @@ def _resolve_identity(self, document, loader, base_url): def _normalize_fields(self, document, loader): # type: (Dict[Text, Text], Loader) -> None # Normalize fields which are prefixed or full URIn to vocabulary terms - for d in document: + for d in list(document.keys()): d2 = loader.expand_url(d, u"", scoped_id=False, vocab_term=True) if d != d2: document[d2] = document[d] diff --git a/schema_salad/schema.py b/schema_salad/schema.py index 5620d54b2..a2beb43ba 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -9,16 +9,15 @@ import avro.schema from . import validate import json -# import urlparse + from six.moves.urllib import parse import six import os -if six.PY3: +if six.PY3: AvroSchemaFromJSONData = avro.schema.SchemaFromJSONData -else: +else: AvroSchemaFromJSONData = avro.schema.make_avsc_object - from avro.schema import Names, SchemaParseException from . import ref_resolver diff --git a/setup.py b/setup.py index 48d79e9d1..a39bb7322 100755 --- a/setup.py +++ b/setup.py @@ -41,9 +41,10 @@ # install_requires.append("avro") # TODO: remove me once cwltool is # available in Debian Stable, Ubuntu 12.04 LTS -extras_require={ # TODO: uncomment me, same conditions as above - ':python_version<"3"': ['avro'], - ':python_version>="3"': ['avro-python3']} + +extras_require={ # TODO: uncomment me, same conditions as above + ':python_version<"3"': ['avro'], + ':python_version>="3"': ['avro-python3']} # extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', From 7ee4848917c1f8c5ca4ede228df19c09ad9ef55c Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 18 May 2017 18:12:10 +0530 Subject: [PATCH 072/116] .gitignore: add more local folders --- .gitignore | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/.gitignore b/.gitignore index 85014c5d4..8f91562c7 100644 --- a/.gitignore +++ b/.gitignore @@ -2,3 +2,11 @@ .eggs *.egg-info/ *pyc + +build/ +dist/ +ruamel +typeshed/2.7/ruamel/yaml +# virtualenv +venv/ +.cache/ From 965378f3b4b18ba7983eae1e71c54251487a9c43 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Mon, 22 May 2017 21:46:14 +0530 Subject: [PATCH 073/116] .tox.ini: turn on python3 unit testing --- tox.ini | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tox.ini b/tox.ini index 7b5a189e4..281226e55 100644 --- a/tox.ini +++ b/tox.ini @@ -1,11 +1,11 @@ [tox] #envlist = py35-lint,py34-lint,py33-lint,py27-lint,py35-unit,py34-unit,py33-unit,py27-unit -envlist = py27-lint, py27-unit, py35-mypy, py27-pipconflictchecker +envlist = py27-lint, py27-unit, py35-mypy, py27-pipconflictchecker, py35-unit skipsdist = True [tox:travis] 2.7 = py27 -3.5 = py35-mypy +3.5 = py35-mypy, py35-unit [testenv] deps = -rrequirements.txt From 3095597dec6922035e02d5719c37e3ea5338c95f Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sat, 27 May 2017 22:22:23 +0530 Subject: [PATCH 074/116] fix: use six.integer_types to handle int, long incompatibles --- schema_salad/validate.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/schema_salad/validate.py b/schema_salad/validate.py index fb41a13e5..3d9d44af2 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -118,7 +118,7 @@ def validate_ex(expected_schema, # type: Schema else: return False elif schema_type == 'int': - if ((isinstance(datum, int) or isinstance(datum, int)) + if (isinstance(datum, six.integer_types) and INT_MIN_VALUE <= datum <= INT_MAX_VALUE): return True else: @@ -127,7 +127,7 @@ def validate_ex(expected_schema, # type: Schema else: return False elif schema_type == 'long': - if ((isinstance(datum, int) or isinstance(datum, int)) + if ((isinstance(datum, six.integer_types)) and LONG_MIN_VALUE <= datum <= LONG_MAX_VALUE): return True else: @@ -137,7 +137,7 @@ def validate_ex(expected_schema, # type: Schema else: return False elif schema_type in ['float', 'double']: - if (isinstance(datum, int) or isinstance(datum, int) + if (isinstance(datum, six.integer_types) or isinstance(datum, float)): return True else: From d23c804fddc007ba723232a2ad0386e08e8ac1a1 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sat, 27 May 2017 22:32:42 +0530 Subject: [PATCH 075/116] tests: use assertEqual() inplace assertEquals() --- schema_salad/tests/test_examples.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/schema_salad/tests/test_examples.py b/schema_salad/tests/test_examples.py index 440f04cdc..dc538880f 100644 --- a/schema_salad/tests/test_examples.py +++ b/schema_salad/tests/test_examples.py @@ -49,7 +49,7 @@ def test_schemas(self): # "edam:has_format": "edam:format_1915" # }, "") - # self.assertEquals(ra, { + # self.assertEqual(ra, { # "$schemas": ["tests/EDAM.owl"], # "$namespaces": {"edam": "http://edamontology.org/"}, # 'http://edamontology.org/has_format': 'http://edamontology.org/format_1915' @@ -114,12 +114,12 @@ def test_idmap(self): self.assertEqual("http://example2.com/#stuff", ra["id"]) for item in ra["inputs"]: if item["a"] == 2: - self.assertEquals( + self.assertEqual( 'http://example2.com/#stuff/zing', item["id"]) else: - self.assertEquals('http://example2.com/#stuff/zip', item["id"]) - self.assertEquals(['http://example2.com/#stuff/out'], ra['outputs']) - self.assertEquals({'n': 9}, ra['other']) + self.assertEqual('http://example2.com/#stuff/zip', item["id"]) + self.assertEqual(['http://example2.com/#stuff/out'], ra['outputs']) + self.assertEqual({'n': 9}, ra['other']) def test_scoped_ref(self): ldr = schema_salad.ref_resolver.Loader({}) @@ -183,7 +183,7 @@ def test_scoped_ref(self): } }), "http://example2.com/") - self.assertEquals( + self.assertEqual( {'inputs': [{ 'id': 'http://example2.com/#inp', 'type': 'string' @@ -362,16 +362,16 @@ def test_mixin(self): def test_fragment(self): ldr = schema_salad.ref_resolver.Loader({"id": "@id"}) b, _ = ldr.resolve_ref(get_data("tests/frag.yml#foo2")) - self.assertEquals({"id": b["id"], "bar":"b2"}, b) + self.assertEqual({"id": b["id"], "bar":"b2"}, b) def test_file_uri(self): # Note: this test probably won't pass on Windows. Someone with a # windows box should add an alternate test. - self.assertEquals("file:///foo/bar%20baz/quux", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux")) - self.assertEquals("/foo/bar baz/quux", schema_salad.ref_resolver.uri_file_path("file:///foo/bar%20baz/quux")) - self.assertEquals("file:///foo/bar%20baz/quux%23zing%20zong", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux#zing zong")) - self.assertEquals("file:///foo/bar%20baz/quux#zing%20zong", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux#zing zong", split_frag=True)) - self.assertEquals("/foo/bar baz/quux#zing zong", schema_salad.ref_resolver.uri_file_path("file:///foo/bar%20baz/quux#zing%20zong")) + self.assertEqual("file:///foo/bar%20baz/quux", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux")) + self.assertEqual("/foo/bar baz/quux", schema_salad.ref_resolver.uri_file_path("file:///foo/bar%20baz/quux")) + self.assertEqual("file:///foo/bar%20baz/quux%23zing%20zong", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux#zing zong")) + self.assertEqual("file:///foo/bar%20baz/quux#zing%20zong", schema_salad.ref_resolver.file_uri("/foo/bar baz/quux#zing zong", split_frag=True)) + self.assertEqual("/foo/bar baz/quux#zing zong", schema_salad.ref_resolver.uri_file_path("file:///foo/bar%20baz/quux#zing%20zong")) if __name__ == '__main__': From 9874d110ee72ff1dd6512f44bb0e694b0f873ba6 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sat, 27 May 2017 22:55:40 +0530 Subject: [PATCH 076/116] use same import pattern for six.urllib accross the codebase - also, re-ogranise six lib imports --- schema_salad/jsonld_context.py | 13 +++++++------ schema_salad/main.py | 7 +++---- schema_salad/makedoc.py | 15 +++++++-------- schema_salad/ref_resolver.py | 23 +++++++++++------------ schema_salad/schema.py | 6 +++--- schema_salad/tests/test_fetch.py | 10 +++++----- schema_salad/validate.py | 10 +++++----- 7 files changed, 41 insertions(+), 43 deletions(-) diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py index cfc769e29..5533b5cad 100755 --- a/schema_salad/jsonld_context.py +++ b/schema_salad/jsonld_context.py @@ -2,10 +2,11 @@ import collections import shutil import json -import ruamel.yaml as yaml + import six -# import urlparse -from six.moves.urllib import parse +from six.moves import urllib + +import ruamel.yaml as yaml try: from ruamel.yaml import CSafeLoader as SafeLoader except ImportError: @@ -37,7 +38,7 @@ def pred(datatype, # type: Dict[str, Union[Dict, str]] namespaces # type: Dict[str, rdflib.namespace.Namespace] ): # type: (...) -> Union[Dict, Text] - split = parse.urlsplit(name) + split = urllib.parse.urlsplit(name) vee = None # type: Optional[Union[str, Text]] @@ -105,7 +106,7 @@ def process_type(t, # type: Dict[str, Any] classnode = URIRef(recordname) g.add((classnode, RDF.type, RDFS.Class)) - split = parse.urlsplit(recordname) + split = urllib.parse.urlsplit(recordname) predicate = recordname if t.get("inVocab", True): if split.scheme: @@ -221,7 +222,7 @@ def makerdf(workflow, # type: Union[str, Text] url = v if url == "@id": idfields.append(k) - doc_url, frg = parse.urldefrag(url) + doc_url, frg = urllib.parse.urldefrag(url) if "/" in frg: p = frg.split("/")[0] prefixes[p] = u"%s#%s/" % (doc_url, p) diff --git a/schema_salad/main.py b/schema_salad/main.py index 530877228..33225aae0 100644 --- a/schema_salad/main.py +++ b/schema_salad/main.py @@ -6,9 +6,8 @@ import traceback import json import os -# import urlparse -from six.moves.urllib import parse +from six.moves import urllib import pkg_resources # part of setuptools @@ -112,7 +111,7 @@ def main(argsl=None): # type: (List[str]) -> int # Load schema document and resolve refs schema_uri = args.schema - if not parse.urlparse(schema_uri)[0]: + if not urllib.parse.urlparse(schema_uri)[0]: schema_uri = "file://" + os.path.abspath(schema_uri) schema_raw_doc = metaschema_loader.fetch(schema_uri) @@ -209,7 +208,7 @@ def main(argsl=None): # type: (List[str]) -> int # Load target document and resolve refs try: uri = args.document - if not parse.urlparse(uri)[0]: + if not urllib.parse.urlparse(uri)[0]: doc = "file://" + os.path.abspath(uri) document, doc_metadata = document_loader.resolve_ref(uri) except (validate.ValidationException, RuntimeError) as e: diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index cde11ee03..0015ca6f3 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -7,15 +7,14 @@ import copy import re import sys - import logging from . import schema -from schema_salad.utils import add_dictlist, aslist +from .utils import add_dictlist, aslist import six from six.moves import range -from six.moves.urllib import parse +from six.moves import urllib from six import StringIO from typing import cast, Any, Dict, IO, List, Optional, Set, Text, Union @@ -42,7 +41,7 @@ def has_types(items): # type: (Any) -> List[basestring] def linkto(item): # type: (Text) -> Text - _, frg = parse.urldefrag(item) + _, frg = urllib.parse.urldefrag(item) return "[%s](#%s)" % (frg, to_id(frg)) @@ -210,8 +209,8 @@ def __init__(self, toc, j, renderlist, redirects, primitiveType): if tp not in self.uses: self.uses[tp] = [] if (t["name"], f["name"]) not in self.uses[tp]: - _, frg1 = parse.urldefrag(t["name"]) - _, frg2 = parse.urldefrag(f["name"]) + _, frg1 = urllib.parse.urldefrag(t["name"]) + _, frg2 = urllib.parse.urldefrag(f["name"]) self.uses[tp].append((frg1, frg2)) if tp not in basicTypes and tp not in self.record_refs[t["name"]]: self.record_refs[t["name"]].append(tp) @@ -272,7 +271,7 @@ def typefmt(self, elif str(tp) in basicTypes: return """%s""" % (self.primitiveType, schema.avro_name(str(tp))) else: - _, frg = parse.urldefrag(tp) + _, frg = urllib.parse.urldefrag(tp) if frg is not '': tp = frg return """%s""" % (to_id(tp), tp) @@ -331,7 +330,7 @@ def extendsfrom(item, ex): lines.append(l) f["doc"] = "\n".join(lines) - _, frg = parse.urldefrag(f["name"]) + _, frg = urllib.parse.urldefrag(f["name"]) num = self.toc.add_entry(depth, frg) doc = "%s %s %s\n" % (("#" * depth), num, frg) else: diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 512d19216..2b1717fcd 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -8,7 +8,6 @@ import six from six.moves import range -from six.moves.urllib import parse from six.moves import urllib from six import StringIO @@ -57,7 +56,7 @@ def file_uri(path, split_frag=False): # type: (str, bool) -> str return "file://%s%s" % (urlpath, frag) def uri_file_path(url): # type: (str) -> str - split = parse.urlsplit(url) + split = urllib.parse.urlsplit(url) if split.scheme == "file": return urllib.request.url2pathname( str(split.path)) + ("#" + urllib.parse.unquote(str(split.fragment)) @@ -129,7 +128,7 @@ def fetch_text(self, url): if url in self.cache: return self.cache[url] - split = parse.urlsplit(url) + split = urllib.parse.urlsplit(url) scheme, path = split.scheme, split.path if scheme in [u'http', u'https'] and self.session is not None: @@ -159,7 +158,7 @@ def check_exists(self, url): # type: (Text) -> bool if url in self.cache: return True - split = parse.urlsplit(url) + split = urllib.parse.urlsplit(url) scheme, path = split.scheme, split.path if scheme in [u'http', u'https'] and self.session is not None: @@ -175,7 +174,7 @@ def check_exists(self, url): # type: (Text) -> bool raise ValueError('Unsupported scheme in url: %s' % url) def urljoin(self, base_url, url): # type: (Text, Text) -> Text - return parse.urljoin(base_url, url) + return urllib.parse.urljoin(base_url, url) class Loader(object): def __init__(self, @@ -190,7 +189,7 @@ def __init__(self, ): # type: (...) -> None - normalize = lambda url: parse.urlsplit(url).geturl() + normalize = lambda url: urllib.parse.urlsplit(url).geturl() if idx is not None: self.idx = idx else: @@ -276,20 +275,20 @@ def expand_url(self, if prefix in self.vocab: url = self.vocab[prefix] + url[len(prefix) + 1:] - split = parse.urlsplit(url) + split = urllib.parse.urlsplit(url) if (bool(split.scheme) or url.startswith(u"$(") or url.startswith(u"${")): pass elif scoped_id and not bool(split.fragment): - splitbase = parse.urlsplit(base_url) + splitbase = urllib.parse.urlsplit(base_url) frg = u"" if bool(splitbase.fragment): frg = splitbase.fragment + u"/" + split.path else: frg = split.path pt = splitbase.path if splitbase.path != '' else "/" - url = parse.urlunsplit( + url = urllib.parse.urlunsplit( (splitbase.scheme, splitbase.netloc, pt, splitbase.query, frg)) elif scoped_ref is not None and not split.fragment: pass @@ -496,7 +495,7 @@ def resolve_ref(self, doc_url = url else: # Load structured document - doc_url, frg = parse.urldefrag(url) + doc_url, frg = urllib.parse.urldefrag(url) if doc_url in self.idx and (not mixin): # If the base document is in the index, it was already loaded, # so if we didn't find the reference earlier then it must not @@ -872,7 +871,7 @@ def fetch(self, url, inject_ids=True): # type: (Text, bool) -> Any def validate_scoped(self, field, link, docid): # type: (Text, Text, Text) -> Text - split = parse.urlsplit(docid) + split = urllib.parse.urlsplit(docid) sp = split.fragment.split(u"/") n = self.scoped_ref_fields[field] while n > 0 and len(sp) > 0: @@ -881,7 +880,7 @@ def validate_scoped(self, field, link, docid): tried = [] while True: sp.append(link) - url = parse.urlunsplit(( + url = urllib.parse.urlunsplit(( split.scheme, split.netloc, split.path, split.query, u"/".join(sp))) tried.append(url) diff --git a/schema_salad/schema.py b/schema_salad/schema.py index a2beb43ba..0e477f86b 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -9,10 +9,10 @@ import avro.schema from . import validate import json +import os -from six.moves.urllib import parse import six -import os +from six.moves import urllib if six.PY3: AvroSchemaFromJSONData = avro.schema.SchemaFromJSONData @@ -383,7 +383,7 @@ def replace_type(items, spec, loader, found): def avro_name(url): # type: (AnyStr) -> AnyStr - doc_url, frg = parse.urldefrag(url) + doc_url, frg = urllib.parse.urldefrag(url) if frg != '': if '/' in frg: return frg[frg.rindex('/') + 1:] diff --git a/schema_salad/tests/test_fetch.py b/schema_salad/tests/test_fetch.py index a318d3b95..210e4568b 100644 --- a/schema_salad/tests/test_fetch.py +++ b/schema_salad/tests/test_fetch.py @@ -9,8 +9,8 @@ import ruamel.yaml as yaml import json import os -# import urlparse -from six.moves.urllib import parse + +from six.moves import urllib class TestFetcher(unittest.TestCase): def test_fetcher(self): @@ -33,14 +33,14 @@ def check_exists(self, url): # type: (unicode) -> bool return False def urljoin(self, base, url): - urlsp = parse.urlsplit(url) + urlsp = urllib.parse.urlsplit(url) if urlsp.scheme: return url - basesp = parse.urlsplit(base) + basesp = urllib.parse.urlsplit(base) if basesp.scheme == "keep": return base + "/" + url - return parse.urljoin(base, url) + return urllib.parse.urljoin(base, url) loader = schema_salad.ref_resolver.Loader({}, fetcher_constructor=TestFetcher) self.assertEqual({"hello": "foo"}, loader.resolve_ref("foo.txt")[0]) diff --git a/schema_salad/validate.py b/schema_salad/validate.py index 3d9d44af2..c82a0d930 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -3,16 +3,16 @@ import avro.schema from avro.schema import Schema import sys -# import urlparse -from six.moves.urllib import parse import re import logging -from typing import Any, List, Set, Union, Text -from .sourceline import SourceLine, lineno_re, bullets, indent import six +from six.moves import urllib from six.moves import range +from typing import Any, List, Set, Union, Text +from .sourceline import SourceLine, lineno_re, bullets, indent + _logger = logging.getLogger("salad") class ValidationException(Exception): @@ -301,7 +301,7 @@ def validate_ex(expected_schema, # type: Schema if (d not in identifiers and strict) and ( d not in foreign_properties and strict_foreign_properties) and not raise_ex: return False - split = parse.urlsplit(d) + split = urllib.parse.urlsplit(d) if split.scheme: err = sl.makeError(u"unrecognized extension field `%s`%s." " Did you include " From 508c8d34880b8b479ce7f86d67ad8fae44de863b Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sun, 28 May 2017 01:10:34 +0530 Subject: [PATCH 077/116] setup.py: bump version to '3.0' --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a39bb7322..d4ab114cc 100755 --- a/setup.py +++ b/setup.py @@ -48,7 +48,7 @@ # extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', - version='2.5', + version='3.0', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group', From 5efed90a9176c971e66cc2aac1fd357840ac530c Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sun, 28 May 2017 01:48:41 +0530 Subject: [PATCH 078/116] tox.ini: re-write file, include unit tests for all supported versions of python - enable more python3 versions on travis for unit testing --- .travis.yml | 3 +++ tox.ini | 54 ++++++++++++++++------------------------------------- 2 files changed, 19 insertions(+), 38 deletions(-) diff --git a/.travis.yml b/.travis.yml index 869d06fbe..f59182cde 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,10 @@ sudo: false language: python python: - 2.7 + - 3.3 + - 3.4 - 3.5 + - 3.6 os: - linux diff --git a/tox.ini b/tox.ini index 281226e55..3027cfe59 100644 --- a/tox.ini +++ b/tox.ini @@ -1,21 +1,27 @@ [tox] -#envlist = py35-lint,py34-lint,py33-lint,py27-lint,py35-unit,py34-unit,py33-unit,py27-unit -envlist = py27-lint, py27-unit, py35-mypy, py27-pipconflictchecker, py35-unit +envlist = + # py{27,33,34,35.36}-lint, + py27-lint + py{27,33,34,35,36}-unit, + py35-mypy, + py27-pipconflictchecker + skipsdist = True +skip_missing_interpreters = True [tox:travis] 2.7 = py27 -3.5 = py35-mypy, py35-unit +3.3 = py33-unit +3.4 = py34-unit +3.5 = py35 +3.6 = py36-unit [testenv] deps = -rrequirements.txt + # py{27,33,34,35,36}-lint: flake8 -[testenv:py35-py2_mypy] -commands = make mypy -whitelist_externals = make -deps = - -rmypy_requirements.txt - -rrequirements.txt +commands = + py{27,33,34,35,36}-unit: python setup.py test [testenv:py35-mypy] commands = make mypy @@ -24,21 +30,6 @@ deps = -rmypy_requirements.txt -rrequirements.txt -[testenv:py35-lint] -commands = flake8 schema_salad setup.py -whitelist_externals = flake8 -deps = flake8 - -[testenv:py34-lint] -commands = flake8 schema_salad setup.py -whitelist_externals = flake8 -deps = flake8 - -[testenv:py33-lint] -commands = flake8 schema_salad setup.py -whitelist_externals = flake8 -deps = flake8 - [testenv:py27-lint] commands = flake8 schema_salad setup.py whitelist_externals = flake8 @@ -47,17 +38,4 @@ deps = flake8 [testenv:py27-pipconflictchecker] commands = pipconflictchecker whitelist_externals = pipconflictchecker -deps = pip-conflict-checker - - -[testenv:py35-unit] -commands = python setup.py test - -[testenv:py34-unit] -commands = python setup.py test - -[testenv:py33-unit] -commands = python setup.py test - -[testenv:py27-unit] -commands = python setup.py test +deps = pip-conflict-checker \ No newline at end of file From 29dcb7f19bc913e3f37c5da5878a84c350565f05 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sun, 28 May 2017 13:39:43 +0530 Subject: [PATCH 079/116] tox.ini: fix bugs, enable flake8 linting for all python versions --- tox.ini | 23 +++++++++++------------ 1 file changed, 11 insertions(+), 12 deletions(-) diff --git a/tox.ini b/tox.ini index 3027cfe59..f5fc3b36d 100644 --- a/tox.ini +++ b/tox.ini @@ -1,7 +1,6 @@ [tox] envlist = - # py{27,33,34,35.36}-lint, - py27-lint + py{27,33,34,35,36}-lint, py{27,33,34,35,36}-unit, py35-mypy, py27-pipconflictchecker @@ -11,17 +10,22 @@ skip_missing_interpreters = True [tox:travis] 2.7 = py27 -3.3 = py33-unit -3.4 = py34-unit +3.3 = py33 +3.4 = py34 3.5 = py35 -3.6 = py36-unit +3.6 = py36 [testenv] -deps = -rrequirements.txt - # py{27,33,34,35,36}-lint: flake8 +deps = + -rrequirements.txt + py{27,33,34,35,36}-lint: flake8 commands = py{27,33,34,35,36}-unit: python setup.py test + py{27,33,34,35,36}-lint: flake8 schema_salad setup.py + +whitelist_externals = + py{27,33,34,35,36}-lint: flake8 [testenv:py35-mypy] commands = make mypy @@ -30,11 +34,6 @@ deps = -rmypy_requirements.txt -rrequirements.txt -[testenv:py27-lint] -commands = flake8 schema_salad setup.py -whitelist_externals = flake8 -deps = flake8 - [testenv:py27-pipconflictchecker] commands = pipconflictchecker whitelist_externals = pipconflictchecker From b1605e33155c666078bd234a8d0958bf394a838c Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sun, 28 May 2017 16:03:25 +0530 Subject: [PATCH 080/116] fix: use six integer_types and string_types in place of int and basestring --- schema_salad/validate.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema_salad/validate.py b/schema_salad/validate.py index c82a0d930..c004b1e98 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -213,7 +213,7 @@ def validate_ex(expected_schema, # type: Schema continue elif isinstance(datum, dict) and not isinstance(s, avro.schema.RecordSchema): continue - elif isinstance(datum, (bool, int, int, float, int)) and isinstance(s, (avro.schema.ArraySchema, avro.schema.RecordSchema)): + elif isinstance(datum, (bool, six.integer_types, float, six.string_types)) and isinstance(s, (avro.schema.ArraySchema, avro.schema.RecordSchema)): # type: ignore continue elif datum is not None and s.type == "null": continue From c66a3f968ceb261206bcf26f7155711926cb710e Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Fri, 2 Jun 2017 12:43:43 +0530 Subject: [PATCH 081/116] tests: remove redundant parentheses in print statements --- schema_salad/tests/test_errors.py | 2 +- schema_salad/tests/test_examples.py | 8 ++++---- schema_salad/tests/test_fetch.py | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) diff --git a/schema_salad/tests/test_errors.py b/schema_salad/tests/test_errors.py index 7a4d39752..2943681e5 100644 --- a/schema_salad/tests/test_errors.py +++ b/schema_salad/tests/test_errors.py @@ -33,5 +33,5 @@ def test_errors(self): load_and_validate(document_loader, avsc_names, six.text_type(get_data("tests/"+t)), True) except ValidationException as e: - print(("\n", e)) + print("\n", e) raise diff --git a/schema_salad/tests/test_examples.py b/schema_salad/tests/test_examples.py index dc538880f..4ffdd0d5c 100644 --- a/schema_salad/tests/test_examples.py +++ b/schema_salad/tests/test_examples.py @@ -287,7 +287,7 @@ def test_scoped_id(self): }, ra) g = makerdf(None, ra, ctx) - print((g.serialize(format="n3"))) + print(g.serialize(format="n3")) ra, _ = ldr.resolve_all(cmap({ "location": "foo", @@ -301,7 +301,7 @@ def test_scoped_id(self): }, ra) g = makerdf(None, ra, ctx) - print((g.serialize(format="n3"))) + print(g.serialize(format="n3")) ra, _ = ldr.resolve_all(cmap({ "id": "foo", @@ -315,7 +315,7 @@ def test_scoped_id(self): }, ra) g = makerdf(None, ra, ctx) - print((g.serialize(format="n3"))) + print(g.serialize(format="n3")) ra, _ = ldr.resolve_all(cmap({ "location": "foo", @@ -329,7 +329,7 @@ def test_scoped_id(self): }, ra) g = makerdf(None, ra, ctx) - print((g.serialize(format="n3"))) + print(g.serialize(format="n3")) def test_mixin(self): base_url = "file://" + os.getcwd() + "/tests/" diff --git a/schema_salad/tests/test_fetch.py b/schema_salad/tests/test_fetch.py index 210e4568b..09cb385da 100644 --- a/schema_salad/tests/test_fetch.py +++ b/schema_salad/tests/test_fetch.py @@ -55,6 +55,6 @@ def test_cache(self): loader = schema_salad.ref_resolver.Loader({}) foo = "file://%s/foo.txt" % os.getcwd() loader.cache.update({foo: "hello: foo"}) - print((loader.cache)) + print(loader.cache) self.assertEqual({"hello": "foo"}, loader.resolve_ref("foo.txt")[0]) self.assertTrue(loader.check_exists(foo)) From ff000db96d30e136acbd39904ab360e329edf94f Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Fri, 2 Jun 2017 12:59:27 +0530 Subject: [PATCH 082/116] setup.py: add six as explicit dependency --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index d4ab114cc..29e297d1c 100755 --- a/setup.py +++ b/setup.py @@ -36,7 +36,8 @@ 'mistune >= 0.7.3, < 0.8', 'typing >= 3.5.2, < 3.6', 'CacheControl >= 0.11.7, < 0.12', - 'lockfile >= 0.9'] + 'lockfile >= 0.9', + 'six >= 1.8.0'] # install_requires.append("avro") # TODO: remove me once cwltool is # available in Debian Stable, Ubuntu 12.04 LTS From b0b1f14d282f1115c1d1c39e7517ecc0ce5f2ad4 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Tue, 6 Jun 2017 12:08:01 +0530 Subject: [PATCH 083/116] disable git version tagging to avoid testing conflicts with cwltool --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 29e297d1c..d2882480e 100755 --- a/setup.py +++ b/setup.py @@ -69,7 +69,7 @@ 'console_scripts': ["schema-salad-tool=schema_salad.main:main", "schema-salad-doc=schema_salad.makedoc:main"] }, zip_safe=True, - cmdclass={'egg_info': tagger}, + # cmdclass={'egg_info': tagger}, classifiers=[ "Environment :: Console", "Intended Audience :: Science/Research", From 1eb19b60d98d3e7290b17cc7f8ee27aa72033da7 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Tue, 27 Jun 2017 18:34:31 +0530 Subject: [PATCH 084/116] use typing>=3.5.3 --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index 7ee05c44a..83d3010d3 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -typing==3.5.2.2 ; python_version>="2.7" +typing==3.5.3 ; python_version>="2.7" avro-python3 ; python_version>="3" avro==1.8.1 ; python_version<"3" ruamel.yaml==0.13.7 diff --git a/setup.py b/setup.py index d2882480e..508f49a2e 100755 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ 'rdflib >= 4.2.2, < 4.3.0', 'rdflib-jsonld >= 0.3.0, < 0.5.0', 'mistune >= 0.7.3, < 0.8', - 'typing >= 3.5.2, < 3.6', + 'typing >= 3.5.3', 'CacheControl >= 0.11.7, < 0.12', 'lockfile >= 0.9', 'six >= 1.8.0'] From 8b6f9b34b4d5b1092adf43d81d7eb59b3f6d510e Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 17 May 2017 16:30:56 +0530 Subject: [PATCH 085/116] Apply Python3 modernize transforms Main changes: - use of __future__ - using six lib to: - deal with checking unicode - dict operations - other API changes --- schema_salad/jsonld_context.py | 1 + schema_salad/makedoc.py | 2 ++ schema_salad/ref_resolver.py | 2 ++ schema_salad/validate.py | 2 ++ 4 files changed, 7 insertions(+) diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py index 5533b5cad..b2210950e 100755 --- a/schema_salad/jsonld_context.py +++ b/schema_salad/jsonld_context.py @@ -7,6 +7,7 @@ from six.moves import urllib import ruamel.yaml as yaml +import six try: from ruamel.yaml import CSafeLoader as SafeLoader except ImportError: diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index 0015ca6f3..5c345ff3c 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -18,6 +18,8 @@ from six import StringIO from typing import cast, Any, Dict, IO, List, Optional, Set, Text, Union +import six +from six.moves import range _logger = logging.getLogger("salad") diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 2b1717fcd..aa2ada4cb 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -32,6 +32,8 @@ import xml.sax from typing import (cast, Any, AnyStr, Callable, Dict, List, Iterable, Optional, Set, Text, Tuple, TypeVar, Union) +import six +from six.moves import range _logger = logging.getLogger("salad") diff --git a/schema_salad/validate.py b/schema_salad/validate.py index c004b1e98..b59e8e670 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -12,6 +12,8 @@ from typing import Any, List, Set, Union, Text from .sourceline import SourceLine, lineno_re, bullets, indent +import six +from six.moves import range _logger = logging.getLogger("salad") From 6a11b1682915ca895338b12b3c6379e4bf628374 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 17 May 2017 18:57:19 +0530 Subject: [PATCH 086/116] mypy type annotations fix: replace unicode -> Text --- schema_salad/validate.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/schema_salad/validate.py b/schema_salad/validate.py index b59e8e670..c004b1e98 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -12,8 +12,6 @@ from typing import Any, List, Set, Union, Text from .sourceline import SourceLine, lineno_re, bullets, indent -import six -from six.moves import range _logger = logging.getLogger("salad") From 0773e44785f31f127ee2517938b822db1631d60b Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 18 May 2017 14:11:28 +0530 Subject: [PATCH 087/116] fix urlparse using six in tests and schema_salad core; no automated fixers --- schema_salad/jsonld_context.py | 8 +++++--- schema_salad/makedoc.py | 3 --- schema_salad/ref_resolver.py | 2 -- 3 files changed, 5 insertions(+), 8 deletions(-) diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py index b2210950e..d1c0d3ba2 100755 --- a/schema_salad/jsonld_context.py +++ b/schema_salad/jsonld_context.py @@ -8,6 +8,8 @@ import ruamel.yaml as yaml import six +# import urlparse +from six.moves.urllib import parse try: from ruamel.yaml import CSafeLoader as SafeLoader except ImportError: @@ -39,7 +41,7 @@ def pred(datatype, # type: Dict[str, Union[Dict, str]] namespaces # type: Dict[str, rdflib.namespace.Namespace] ): # type: (...) -> Union[Dict, Text] - split = urllib.parse.urlsplit(name) + split = parse.urlsplit(name) vee = None # type: Optional[Union[str, Text]] @@ -107,7 +109,7 @@ def process_type(t, # type: Dict[str, Any] classnode = URIRef(recordname) g.add((classnode, RDF.type, RDFS.Class)) - split = urllib.parse.urlsplit(recordname) + split = parse.urlsplit(recordname) predicate = recordname if t.get("inVocab", True): if split.scheme: @@ -223,7 +225,7 @@ def makerdf(workflow, # type: Union[str, Text] url = v if url == "@id": idfields.append(k) - doc_url, frg = urllib.parse.urldefrag(url) + doc_url, frg = parse.urldefrag(url) if "/" in frg: p = frg.split("/")[0] prefixes[p] = u"%s#%s/" % (doc_url, p) diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index 5c345ff3c..b7f019818 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -16,10 +16,7 @@ from six.moves import range from six.moves import urllib from six import StringIO - from typing import cast, Any, Dict, IO, List, Optional, Set, Text, Union -import six -from six.moves import range _logger = logging.getLogger("salad") diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index aa2ada4cb..2b1717fcd 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -32,8 +32,6 @@ import xml.sax from typing import (cast, Any, AnyStr, Callable, Dict, List, Iterable, Optional, Set, Text, Tuple, TypeVar, Union) -import six -from six.moves import range _logger = logging.getLogger("salad") From 150730c03b21143755a59d6a0be4fb03de632aff Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 18 May 2017 15:32:26 +0530 Subject: [PATCH 088/116] py3 fix: use six StringIO import --- schema_salad/ref_resolver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index 2b1717fcd..d2d6dfb58 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -16,7 +16,7 @@ from . import validate -from schema_salad.utils import aslist, flatten +from .utils import aslist, flatten from .sourceline import SourceLine, add_lc_filename, relname import requests From 67d7481239bdc8a01076ce48bb29d766c7a312c6 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sat, 27 May 2017 22:55:40 +0530 Subject: [PATCH 089/116] use same import pattern for six.urllib accross the codebase - also, re-ogranise six lib imports --- schema_salad/jsonld_context.py | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py index d1c0d3ba2..5533b5cad 100755 --- a/schema_salad/jsonld_context.py +++ b/schema_salad/jsonld_context.py @@ -7,9 +7,6 @@ from six.moves import urllib import ruamel.yaml as yaml -import six -# import urlparse -from six.moves.urllib import parse try: from ruamel.yaml import CSafeLoader as SafeLoader except ImportError: @@ -41,7 +38,7 @@ def pred(datatype, # type: Dict[str, Union[Dict, str]] namespaces # type: Dict[str, rdflib.namespace.Namespace] ): # type: (...) -> Union[Dict, Text] - split = parse.urlsplit(name) + split = urllib.parse.urlsplit(name) vee = None # type: Optional[Union[str, Text]] @@ -109,7 +106,7 @@ def process_type(t, # type: Dict[str, Any] classnode = URIRef(recordname) g.add((classnode, RDF.type, RDFS.Class)) - split = parse.urlsplit(recordname) + split = urllib.parse.urlsplit(recordname) predicate = recordname if t.get("inVocab", True): if split.scheme: @@ -225,7 +222,7 @@ def makerdf(workflow, # type: Union[str, Text] url = v if url == "@id": idfields.append(k) - doc_url, frg = parse.urldefrag(url) + doc_url, frg = urllib.parse.urldefrag(url) if "/" in frg: p = frg.split("/")[0] prefixes[p] = u"%s#%s/" % (doc_url, p) From 090c1ba57b8306aeb6664c6b5c3358d13197b6e5 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Sun, 28 May 2017 17:12:11 +0530 Subject: [PATCH 090/116] mypy: use latest stable version of mypy - use update ruamel.yaml package which includes more mypy annotations --- Makefile | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/Makefile b/Makefile index 10aa0677d..859f221c8 100644 --- a/Makefile +++ b/Makefile @@ -170,7 +170,7 @@ mypy: ${PYSOURCES} ln -s $(shell python -c 'from __future__ import print_function; import ruamel.yaml; import os.path; print(os.path.dirname(ruamel.yaml.__file__))') \ typeshed/2.7/ruamel/ MYPYPATH=typeshed/2.7 mypy --py2 --disallow-untyped-calls \ - --fast-parser --warn-redundant-casts --warn-unused-ignores \ + --warn-redundant-casts --warn-unused-ignores \ schema_salad jenkins: diff --git a/requirements.txt b/requirements.txt index 83d3010d3..7412a5821 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ typing==3.5.3 ; python_version>="2.7" avro-python3 ; python_version>="3" avro==1.8.1 ; python_version<"3" -ruamel.yaml==0.13.7 +ruamel.yaml==0.14.0 rdflib==4.2.2 rdflib-jsonld==0.4.0 mistune==0.7.3 From d16a3085fa30c835f11a89eb6a17406cf633f22d Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 31 May 2017 13:05:01 +0530 Subject: [PATCH 091/116] mypy: remove redundant ignore type annotations --- schema_salad/ref_resolver.py | 12 ++++++------ schema_salad/schema.py | 4 ++-- schema_salad/validate.py | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index d2d6dfb58..a517135a0 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -71,16 +71,16 @@ def __init__(self, normalize=six.text_type): # type: (Callable) -> None self.normalize = normalize def __getitem__(self, key): # type: (Any) -> Any - return super(NormDict, self).__getitem__(self.normalize(key)) # type: ignore + return super(NormDict, self).__getitem__(self.normalize(key)) def __setitem__(self, key, value): # type: (Any, Any) -> Any return super(NormDict, self).__setitem__(self.normalize(key), value) def __delitem__(self, key): # type: (Any) -> Any - return super(NormDict, self).__delitem__(self.normalize(key)) # type: ignore + return super(NormDict, self).__delitem__(self.normalize(key)) def __contains__(self, key): # type: (Any) -> Any - return super(NormDict, self).__contains__(self.normalize(key)) # type: ignore + return super(NormDict, self).__contains__(self.normalize(key)) def merge_properties(a, b): # type: (List[Any], List[Any]) -> Dict[Any, Any] @@ -814,7 +814,7 @@ def resolve_all(self, document.lc.data[ j - 1] = document.lc.data[j - llen] for item in l: - document.insert(i, item) # type: ignore + document.insert(i, item) document.lc.data[i] = lc i += 1 else: @@ -852,7 +852,7 @@ def fetch(self, url, inject_ids=True): # type: (Text, bool) -> Any else: textIO = StringIO(text) textIO.name = url # type: ignore - result = yaml.round_trip_load(textIO) # type: ignore + result = yaml.round_trip_load(textIO) add_lc_filename(result, url) except yaml.parser.ParserError as e: raise validate.ValidationException("Syntax error %s" % (e)) @@ -1008,7 +1008,7 @@ def _copy_dict_without_key(from_dict, filtered_key): # type: (D, Any) -> D new_dict = copy.copy(from_dict) if filtered_key in new_dict: - del new_dict[filtered_key] # type: ignore + del new_dict[filtered_key] if isinstance(from_dict, CommentedMap): new_dict.lc.data = copy.copy(from_dict.lc.data) new_dict.lc.filename = from_dict.lc.filename diff --git a/schema_salad/schema.py b/schema_salad/schema.py index 0e477f86b..cb2cbf08a 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -176,7 +176,7 @@ def get_metaschema(): loader.cache["https://w3id.org/cwl/salad"] = rs.read() rs.close() - j = yaml.round_trip_load(loader.cache["https://w3id.org/cwl/salad"]) # type: ignore + j = yaml.round_trip_load(loader.cache["https://w3id.org/cwl/salad"]) add_lc_filename(j, "metaschema.yml") j, _ = loader.resolve_all(j, "https://w3id.org/cwl/salad#") @@ -429,7 +429,7 @@ def make_valid_avro(items, # type: Avro if isinstance(items, list): ret = [] for i in items: - ret.append(make_valid_avro(i, alltypes, found, union=union)) + ret.append(make_valid_avro(i, alltypes, found, union=union)) # type: ignore return ret if union and isinstance(items, (str, six.text_type)): if items in alltypes and avro_name(items) not in found: diff --git a/schema_salad/validate.py b/schema_salad/validate.py index c004b1e98..2fab41596 100644 --- a/schema_salad/validate.py +++ b/schema_salad/validate.py @@ -213,7 +213,7 @@ def validate_ex(expected_schema, # type: Schema continue elif isinstance(datum, dict) and not isinstance(s, avro.schema.RecordSchema): continue - elif isinstance(datum, (bool, six.integer_types, float, six.string_types)) and isinstance(s, (avro.schema.ArraySchema, avro.schema.RecordSchema)): # type: ignore + elif isinstance(datum, (bool, six.integer_types, float, six.string_types)) and isinstance(s, (avro.schema.ArraySchema, avro.schema.RecordSchema)): continue elif datum is not None and s.type == "null": continue From 4958ebf667658e950604d866538c7ce91792c04d Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 7 Jun 2017 16:32:44 +0530 Subject: [PATCH 092/116] use Text type where ever Union[str, unicode] is expected --- schema_salad/jsonld_context.py | 4 ++-- schema_salad/makedoc.py | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/schema_salad/jsonld_context.py b/schema_salad/jsonld_context.py index 5533b5cad..57e6ff47c 100755 --- a/schema_salad/jsonld_context.py +++ b/schema_salad/jsonld_context.py @@ -40,7 +40,7 @@ def pred(datatype, # type: Dict[str, Union[Dict, str]] # type: (...) -> Union[Dict, Text] split = urllib.parse.urlsplit(name) - vee = None # type: Optional[Union[str, Text]] + vee = None # type: Optional[Text] if split.scheme != '': vee = name @@ -207,7 +207,7 @@ def fix_jsonld_ids(obj, # type: Union[Dict[Text, Any], List[Dict[Text, Any]] fix_jsonld_ids(entry, ids) -def makerdf(workflow, # type: Union[str, Text] +def makerdf(workflow, # type: Text wf, # type: Union[List[Dict[Text, Any]], Dict[Text, Any]] ctx, # type: ContextType graph=None # type: Graph diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index b7f019818..5cbc9de04 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -60,7 +60,7 @@ def table(self, header, body): # type: (Text, Text) -> Text ) % (header, body) -def to_id(text): # type: (Union[str, Text]) -> Union[str, Text] +def to_id(text): # type: (Text) -> Text textid = text if text[0] in ("0", "1", "2", "3", "4", "5", "6", "7", "8", "9"): try: @@ -231,7 +231,7 @@ def typefmt(self, nbsp=False, # type: bool jsonldPredicate=None # type: Optional[Dict[str, str]] ): - # type: (...) -> Union[str, Text] + # type: (...) -> Text if isinstance(tp, list): if nbsp and len(tp) <= 3: return " | ".join([self.typefmt(n, redirects, jsonldPredicate=jsonldPredicate) for n in tp]) From 62cbece52863e2abda4acb59a4717d4b2dc33f6b Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 7 Jun 2017 16:39:26 +0530 Subject: [PATCH 093/116] add stub file for mistune package author = "Aleksandr Slepchenkov" email = "Sl.aleksandr28@gmail.com" --- typeshed/2.7/mistune.pyi | 164 ---------------------- typeshed/2and3/mistune.pyi | 271 +++++++++++++++++++++++++++++++++++++ 2 files changed, 271 insertions(+), 164 deletions(-) delete mode 100644 typeshed/2.7/mistune.pyi create mode 100644 typeshed/2and3/mistune.pyi diff --git a/typeshed/2.7/mistune.pyi b/typeshed/2.7/mistune.pyi deleted file mode 100644 index 0b9b0d95d..000000000 --- a/typeshed/2.7/mistune.pyi +++ /dev/null @@ -1,164 +0,0 @@ -# Stubs for mistune (Python 2) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any - -def escape(text, quote=False, smart_amp=True): ... - -class BlockGrammar: - def_links = ... # type: Any - def_footnotes = ... # type: Any - newline = ... # type: Any - block_code = ... # type: Any - fences = ... # type: Any - hrule = ... # type: Any - heading = ... # type: Any - lheading = ... # type: Any - block_quote = ... # type: Any - list_block = ... # type: Any - list_item = ... # type: Any - list_bullet = ... # type: Any - paragraph = ... # type: Any - block_html = ... # type: Any - table = ... # type: Any - nptable = ... # type: Any - text = ... # type: Any - -class BlockLexer: - grammar_class = ... # type: Any - default_rules = ... # type: Any - list_rules = ... # type: Any - footnote_rules = ... # type: Any - tokens = ... # type: Any - def_links = ... # type: Any - def_footnotes = ... # type: Any - rules = ... # type: Any - def __init__(self, rules=None, **kwargs): ... - def __call__(self, text, rules=None): ... - def parse(self, text, rules=None): ... - def parse_newline(self, m): ... - def parse_block_code(self, m): ... - def parse_fences(self, m): ... - def parse_heading(self, m): ... - def parse_lheading(self, m): ... - def parse_hrule(self, m): ... - def parse_list_block(self, m): ... - def parse_block_quote(self, m): ... - def parse_def_links(self, m): ... - def parse_def_footnotes(self, m): ... - def parse_table(self, m): ... - def parse_nptable(self, m): ... - def parse_block_html(self, m): ... - def parse_paragraph(self, m): ... - def parse_text(self, m): ... - -class InlineGrammar: - escape = ... # type: Any - inline_html = ... # type: Any - autolink = ... # type: Any - link = ... # type: Any - reflink = ... # type: Any - nolink = ... # type: Any - url = ... # type: Any - double_emphasis = ... # type: Any - emphasis = ... # type: Any - code = ... # type: Any - linebreak = ... # type: Any - strikethrough = ... # type: Any - footnote = ... # type: Any - text = ... # type: Any - def hard_wrap(self): ... - -class InlineLexer: - grammar_class = ... # type: Any - default_rules = ... # type: Any - inline_html_rules = ... # type: Any - renderer = ... # type: Any - links = ... # type: Any - footnotes = ... # type: Any - footnote_index = ... # type: Any - rules = ... # type: Any - def __init__(self, renderer, rules=None, **kwargs): ... - def __call__(self, text, rules=None): ... - def setup(self, links, footnotes): ... - line_match = ... # type: Any - line_started = ... # type: Any - def output(self, text, rules=None): ... - def output_escape(self, m): ... - def output_autolink(self, m): ... - def output_url(self, m): ... - def output_inline_html(self, m): ... - def output_footnote(self, m): ... - def output_link(self, m): ... - def output_reflink(self, m): ... - def output_nolink(self, m): ... - def output_double_emphasis(self, m): ... - def output_emphasis(self, m): ... - def output_code(self, m): ... - def output_linebreak(self, m): ... - def output_strikethrough(self, m): ... - def output_text(self, m): ... - -class Renderer: - options = ... # type: Any - def __init__(self, **kwargs) -> None: ... - def placeholder(self): ... - def block_code(self, code, lang=None): ... - def block_quote(self, text): ... - def block_html(self, html): ... - def header(self, text, level, raw=None): ... - def hrule(self): ... - def list(self, body, ordered=True): ... - def list_item(self, text): ... - def paragraph(self, text): ... - def table(self, header, body): ... - def table_row(self, content): ... - def table_cell(self, content, **flags): ... - def double_emphasis(self, text): ... - def emphasis(self, text): ... - def codespan(self, text): ... - def linebreak(self): ... - def strikethrough(self, text): ... - def text(self, text): ... - def autolink(self, link, is_email=False): ... - def link(self, link, title, text): ... - def image(self, src, title, text): ... - def inline_html(self, html): ... - def newline(self): ... - def footnote_ref(self, key, index): ... - def footnote_item(self, key, text): ... - def footnotes(self, text): ... - -class Markdown: - renderer = ... # type: Any - inline = ... # type: Any - block = ... # type: Any - footnotes = ... # type: Any - tokens = ... # type: Any - def __init__(self, renderer=None, inline=None, block=None, **kwargs): ... - def __call__(self, text): ... - def render(self, text): ... - def parse(self, text): ... - token = ... # type: Any - def pop(self): ... - def peek(self): ... - def output(self, text, rules=None): ... - def tok(self): ... - def tok_text(self): ... - def output_newline(self): ... - def output_hrule(self): ... - def output_heading(self): ... - def output_code(self): ... - def output_table(self): ... - def output_block_quote(self): ... - def output_list(self): ... - def output_list_item(self): ... - def output_loose_item(self): ... - def output_footnote(self): ... - def output_close_html(self): ... - def output_open_html(self): ... - def output_paragraph(self): ... - def output_text(self): ... - -def markdown(text: str, escape: bool=True, **kwargs) -> str: ... diff --git a/typeshed/2and3/mistune.pyi b/typeshed/2and3/mistune.pyi new file mode 100644 index 000000000..d267d9216 --- /dev/null +++ b/typeshed/2and3/mistune.pyi @@ -0,0 +1,271 @@ +__author__ = "Aleksandr Slepchenkov" +__email__ = "Sl.aleksandr28@gmail.com" + +from typing import Any, Optional, Pattern, List, Text, Tuple, Dict, Match, Type, Sequence, Iterable + +Tokens = List[Dict[Text, Any]] +# There are too much levels of optional unions of lists of text in cell and align 385 and 396 lines in mistune + + +def escape(text: Text, quote: bool = ..., smart_amp: bool = ...) -> Text: ... + + +class BlockGrammar: + def_links = ... # type: Pattern + def_footnotes = ... # type: Pattern + newline = ... # type: Pattern + block_code = ... # type: Pattern + fences = ... # type: Pattern + hrule = ... # type: Pattern + heading = ... # type: Pattern + lheading = ... # type: Pattern + block_quote = ... # type: Pattern + list_block = ... # type: Pattern + list_item = ... # type: Pattern + list_bullet = ... # type: Pattern + paragraph = ... # type: Pattern + block_html = ... # type: Pattern + table = ... # type: Pattern + nptable = ... # type: Pattern + text = ... # type: Pattern + + +class BlockLexer: + grammar_class = ... # type: Type[BlockGrammar] + default_rules = ... # type: List[Text] + list_rules = ... # type: Tuple[Text] + footnote_rules = ... # type: Tuple[Text] + tokens = ... # type: Tokens + def_links = ... # type: Dict[Text, Dict[Text, Text]] + def_footnotes = ... # type: Dict[Text, int] + rules = ... # type: BlockGrammar + + def __init__(self, rules: Optional[BlockGrammar] = ..., **kwargs) -> None: ... + + def __call__(self, text: Text, rules: Optional[Sequence[Text]] = ...) -> Tokens: ... + + def parse(self, text: Text, rules: Optional[Sequence[Text]] = ...) -> Tokens: ... + + def parse_newline(self, m: Match) -> None: ... + + def parse_block_code(self, m: Match) -> None: ... + + def parse_fences(self, m: Match) -> None: ... + + def parse_heading(self, m: Match) -> None: ... + + def parse_lheading(self, m: Match) -> None: ... + + def parse_hrule(self, m: Match) -> None: ... + + def parse_list_block(self, m: Match) -> None: ... + + def parse_block_quote(self, m: Match) -> None: ... + + def parse_def_links(self, m: Match) -> None: ... + + def parse_def_footnotes(self, m: Match) -> None: ... + + def parse_table(self, m: Match) -> None: ... + + def parse_nptable(self, m: Match) -> None: ... + + def parse_block_html(self, m: Match) -> None: ... + + def parse_paragraph(self, m: Match) -> None: ... + + def parse_text(self, m: Match) -> None: ... + + +class InlineGrammar: + escape = ... # type: Pattern + inline_html = ... # type: Pattern + autolink = ... # type: Pattern + link = ... # type: Pattern + reflink = ... # type: Pattern + nolink = ... # type: Pattern + url = ... # type: Pattern + double_emphasis = ... # type: Pattern + emphasis = ... # type: Pattern + code = ... # type: Pattern + linebreak = ... # type: Pattern + strikethrough = ... # type: Pattern + footnote = ... # type: Pattern + text = ... # type: Pattern + + def hard_wrap(self) -> None: ... + + +class InlineLexer: + grammar_class = ... # type: Type[InlineGrammar] + default_rules = ... # type: List[Text] + inline_html_rules = ... # type: List[Text] + renderer = ... # type: Renderer + links = ... # type: Dict[Any, Dict] + footnotes = ... # type: Dict[Text, int] + footnote_index = ... # type: int + _in_link = ... # type: bool + _in_footnote = ... # type: bool + _parse_inline_html = ... # type: bool + rules = ... # type: InlineGrammar + + def __init__(self, renderer: Renderer, rules: Optional[InlineGrammar] = ..., **kwargs) -> None: ... + + def __call__(self, text: Text, rules: Optional[Sequence[Text]] = ...) -> Text: ... + + def setup(self, links: Optional[Dict[Any, Dict]], footnotes: Optional[Dict[Text, int]]) -> None: ... + + line_match = ... # type: Match + line_started = ... # type: bool + + def output(self, text: Text, rules: Optional[Sequence[Text]] = ...) -> Text: ... + + def output_escape(self, m: Match) -> Text: ... + + def output_autolink(self, m: Match) -> Text: ... + + def output_url(self, m: Match) -> Text: ... + + def output_inline_html(self, m: Match) -> Text: ... + + def output_footnote(self, m: Match) -> Optional[Text]: ... + + def output_link(self, m: Match) -> Text: ... + + def output_reflink(self, m: Match) -> Optional[Text]: ... + + def output_nolink(self, m: Match) -> Optional[Text]: ... + + def output_double_emphasis(self, m: Match) -> Text: ... + + def output_emphasis(self, m: Match) -> Text: ... + + def output_code(self, m: Match) -> Text: ... + + def output_linebreak(self, m: Match) -> Text: ... + + def output_strikethrough(self, m: Match) -> Text: ... + + def output_text(self, m: Match) -> Text: ... + + +class Renderer: + options = ... # type: Dict + + def __init__(self, **kwargs) -> None: ... + + def placeholder(self) -> Text: ... + + def block_code(self, code: Text, + lang: Any = ...) -> Text: ... # It seems that lang should be string, however other types are valid as well + + def block_quote(self, text: Text) -> Text: ... + + def block_html(self, html: Text) -> Text: ... + + def header(self, text: Text, level: int, raw: Optional[Text] = ...) -> Text: ... + + def hrule(self) -> Text: ... + + def list(self, body: Any, + ordered: bool = ...) -> Text: ... # body - same reason as for lang above, and for other Any in this class + + def list_item(self, text: Any) -> Text: ... + + def paragraph(self, text: Text) -> Text: ... + + def table(self, header: Any, body: Any) -> Text: ... + + def table_row(self, content: Any) -> Text: ... + + def table_cell(self, content: Any, **flags) -> Text: ... + + def double_emphasis(self, text: Any) -> Text: ... + + def emphasis(self, text: Any) -> Text: ... + + def codespan(self, text: Text) -> Text: ... + + def linebreak(self) -> Text: ... + + def strikethrough(self, text: Any) -> Text: ... + + def text(self, text: Any) -> Text: ... + + def escape(self, text: Any) -> Text: ... + + def autolink(self, link: Any, is_email: bool = ...) -> Text: ... + + def link(self, link: Any, title: Any, text: Any) -> Text: ... + + def image(self, src: Any, title: Any, text: Any) -> Text: ... + + def inline_html(self, html: Any) -> Text: ... + + def newline(self) -> Text: ... + + def footnote_ref(self, key: Any, index: int) -> Text: ... + + def footnote_item(self, key: Any, text: Text) -> Text: ... + + def footnotes(self, text: Any) -> Text: ... + + +class Markdown: + renderer = ... # type: Renderer + inline = ... # type: InlineLexer + block = ... # type: BlockLexer + footnotes = ... # type: List[Dict[Text, Any]] + tokens = ... # type: Tokens + + def __init__(self, renderer: Optional[Renderer] = ..., inline: Optional[InlineLexer] = ..., + block: Optional[BlockLexer] = ..., **kwargs) -> None: ... + + def __call__(self, text: Text) -> Text: ... + + def render(self, text: Text) -> Text: ... + + def parse(self, text: Text) -> Text: ... + + token = ... # type: Dict[Text, Any] + + def pop(self) -> Optional[Dict[Text, Any]]: ... + + def peek(self) -> Optional[Dict[Text, Any]]: ... + + def output(self, text: Text, rules: Optional[Sequence[Text]] = ...): ... + + def tok(self) -> Text: ... + + def tok_text(self) -> Text: ... + + def output_newline(self) -> Text: ... + + def output_hrule(self) -> Text: ... + + def output_heading(self) -> Text: ... + + def output_code(self) -> Text: ... + + def output_table(self) -> Text: ... + + def output_block_quote(self) -> Text: ... + + def output_list(self) -> Text: ... + + def output_list_item(self) -> Text: ... + + def output_loose_item(self) -> Text: ... + + def output_footnote(self) -> Text: ... + + def output_close_html(self) -> Text: ... + + def output_open_html(self) -> Text: ... + + def output_paragraph(self) -> Text: ... + + def output_text(self) -> Text: ... + + +def markdown(text: Text, escape: bool = ..., **kwargs) -> Text: ... \ No newline at end of file From 963ea3f97a06cac39b09308fa171ab8453d7bff3 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 7 Jun 2017 16:53:16 +0530 Subject: [PATCH 094/116] mypy: add typeshed/2and3 in mypypath --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 859f221c8..3690f863c 100644 --- a/Makefile +++ b/Makefile @@ -169,7 +169,7 @@ mypy: ${PYSOURCES} rm -Rf typeshed/2.7/ruamel/yaml ln -s $(shell python -c 'from __future__ import print_function; import ruamel.yaml; import os.path; print(os.path.dirname(ruamel.yaml.__file__))') \ typeshed/2.7/ruamel/ - MYPYPATH=typeshed/2.7 mypy --py2 --disallow-untyped-calls \ + MYPYPATH=$MYPYPATH:typeshed/2.7:typeshed/2and3 mypy --py2 --disallow-untyped-calls \ --warn-redundant-casts --warn-unused-ignores \ schema_salad From 2850043486ab2826e52ec2e317fb303800e47552 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 7 Jun 2017 16:53:44 +0530 Subject: [PATCH 095/116] makedoc.py: pass unicode string mintune markdown function --- schema_salad/makedoc.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index 5cbc9de04..a9ae01160 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -331,9 +331,9 @@ def extendsfrom(item, ex): _, frg = urllib.parse.urldefrag(f["name"]) num = self.toc.add_entry(depth, frg) - doc = "%s %s %s\n" % (("#" * depth), num, frg) + doc = u"%s %s %s\n" % (("#" * depth), num, frg) else: - doc = "" + doc = u"" if self.title is None and f["doc"]: title = f["doc"][0:f["doc"].index("\n")] From c6c88f11de0f633f414136a674849b862de6bc04 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 7 Jun 2017 18:00:54 +0530 Subject: [PATCH 096/116] add mypy.ini and ignore all ruamel.yaml package errors --- mypy.ini | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 mypy.ini diff --git a/mypy.ini b/mypy.ini new file mode 100644 index 000000000..3dccfb5bc --- /dev/null +++ b/mypy.ini @@ -0,0 +1,2 @@ +[mypy-ruamel.*] +ignore_errors = True \ No newline at end of file From 1d111904d1a45bd63f801186f646e7505bd3b22f Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 7 Jun 2017 22:43:43 +0530 Subject: [PATCH 097/116] Makefile: add target for mypy without --py2 flag --- .gitignore | 2 +- Makefile | 14 +++++++++++--- tox.ini | 4 +++- typeshed/{2.7 => 2and3}/ruamel/__init__.py | 0 4 files changed, 15 insertions(+), 5 deletions(-) rename typeshed/{2.7 => 2and3}/ruamel/__init__.py (100%) diff --git a/.gitignore b/.gitignore index 8f91562c7..3d7187d48 100644 --- a/.gitignore +++ b/.gitignore @@ -6,7 +6,7 @@ build/ dist/ ruamel -typeshed/2.7/ruamel/yaml +typeshed/2and3/ruamel/yaml # virtualenv venv/ .cache/ diff --git a/Makefile b/Makefile index 3690f863c..d54c67b6b 100644 --- a/Makefile +++ b/Makefile @@ -165,14 +165,22 @@ list-author-emails: @echo 'name, E-Mail Address' @git log --format='%aN,%aE' | sort -u | grep -v 'root' -mypy: ${PYSOURCES} - rm -Rf typeshed/2.7/ruamel/yaml +mypy2: ${PYSOURCES} + rm -Rf typeshed/2and3/ruamel/yaml ln -s $(shell python -c 'from __future__ import print_function; import ruamel.yaml; import os.path; print(os.path.dirname(ruamel.yaml.__file__))') \ - typeshed/2.7/ruamel/ + typeshed/2and3/ruamel/ MYPYPATH=$MYPYPATH:typeshed/2.7:typeshed/2and3 mypy --py2 --disallow-untyped-calls \ --warn-redundant-casts --warn-unused-ignores \ schema_salad +mypy: ${PYSOURCES} + rm -Rf typeshed/2and3/ruamel/yaml + ln -s $(shell python -c 'from __future__ import print_function; import ruamel.yaml; import os.path; print(os.path.dirname(ruamel.yaml.__file__))') \ + typeshed/2and3/ruamel/ + MYPYPATH=$MYPYPATH:typeshed/3:typeshed/2and3 mypy --disallow-untyped-calls \ + --warn-redundant-casts --warn-unused-ignores \ + schema_salad + jenkins: rm -Rf env && virtualenv env . env/bin/activate ; \ diff --git a/tox.ini b/tox.ini index f5fc3b36d..94686256f 100644 --- a/tox.ini +++ b/tox.ini @@ -28,7 +28,9 @@ whitelist_externals = py{27,33,34,35,36}-lint: flake8 [testenv:py35-mypy] -commands = make mypy +commands = + make mypy2 + # make mypy # not enabled for now whitelist_externals = make deps = -rmypy_requirements.txt diff --git a/typeshed/2.7/ruamel/__init__.py b/typeshed/2and3/ruamel/__init__.py similarity index 100% rename from typeshed/2.7/ruamel/__init__.py rename to typeshed/2and3/ruamel/__init__.py From f051f4994b7140b776ce86121e7413113565943f Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 7 Jun 2017 22:54:05 +0530 Subject: [PATCH 098/116] typesheds/2.7: remove stub files which already come with mypy==0.511 --- typeshed/2.7/argparse.pyi | 176 ------------------ typeshed/2.7/pprint.pyi | 21 --- typeshed/2.7/re.pyi | 99 ---------- typeshed/2.7/requests/__init__.pyi | 38 ---- typeshed/2.7/requests/adapters.pyi | 69 ------- typeshed/2.7/requests/api.pyi | 14 -- typeshed/2.7/requests/auth.pyi | 41 ---- typeshed/2.7/requests/compat.pyi | 6 - typeshed/2.7/requests/cookies.pyi | 61 ------ typeshed/2.7/requests/exceptions.pyi | 26 --- typeshed/2.7/requests/hooks.pyi | 8 - typeshed/2.7/requests/models.pyi | 133 ------------- typeshed/2.7/requests/packages/__init__.pyi | 8 - .../requests/packages/urllib3/__init__.pyi | 12 -- .../packages/urllib3/_collections.pyi | 51 ----- .../requests/packages/urllib3/connection.pyi | 51 ----- .../packages/urllib3/connectionpool.pyi | 87 --------- .../packages/urllib3/contrib/__init__.pyi | 4 - .../requests/packages/urllib3/exceptions.pyi | 54 ------ .../2.7/requests/packages/urllib3/fields.pyi | 16 -- .../requests/packages/urllib3/filepost.pyi | 19 -- .../packages/urllib3/packages/__init__.pyi | 4 - .../packages/ssl_match_hostname/__init__.pyi | 1 - .../ssl_match_hostname/_implementation.pyi | 7 - .../requests/packages/urllib3/poolmanager.pyi | 31 --- .../2.7/requests/packages/urllib3/request.pyi | 13 -- .../requests/packages/urllib3/response.pyi | 58 ------ .../packages/urllib3/util/__init__.pyi | 7 - .../packages/urllib3/util/connection.pyi | 11 -- .../packages/urllib3/util/request.pyi | 12 -- .../packages/urllib3/util/response.pyi | 5 - .../requests/packages/urllib3/util/retry.pyi | 36 ---- .../packages/urllib3/util/timeout.pyi | 24 --- .../requests/packages/urllib3/util/url.pyi | 26 --- typeshed/2.7/requests/sessions.pyi | 92 --------- typeshed/2.7/requests/status_codes.pyi | 8 - typeshed/2.7/requests/structures.pyi | 21 --- typeshed/2.7/requests/utils.pyi | 52 ------ 38 files changed, 1402 deletions(-) delete mode 100644 typeshed/2.7/argparse.pyi delete mode 100644 typeshed/2.7/pprint.pyi delete mode 100644 typeshed/2.7/re.pyi delete mode 100644 typeshed/2.7/requests/__init__.pyi delete mode 100644 typeshed/2.7/requests/adapters.pyi delete mode 100644 typeshed/2.7/requests/api.pyi delete mode 100644 typeshed/2.7/requests/auth.pyi delete mode 100644 typeshed/2.7/requests/compat.pyi delete mode 100644 typeshed/2.7/requests/cookies.pyi delete mode 100644 typeshed/2.7/requests/exceptions.pyi delete mode 100644 typeshed/2.7/requests/hooks.pyi delete mode 100644 typeshed/2.7/requests/models.pyi delete mode 100644 typeshed/2.7/requests/packages/__init__.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/__init__.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/_collections.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/connection.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/connectionpool.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/contrib/__init__.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/exceptions.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/fields.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/filepost.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/packages/__init__.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/packages/ssl_match_hostname/__init__.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/poolmanager.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/request.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/response.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/util/__init__.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/util/connection.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/util/request.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/util/response.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/util/retry.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/util/timeout.pyi delete mode 100644 typeshed/2.7/requests/packages/urllib3/util/url.pyi delete mode 100644 typeshed/2.7/requests/sessions.pyi delete mode 100644 typeshed/2.7/requests/status_codes.pyi delete mode 100644 typeshed/2.7/requests/structures.pyi delete mode 100644 typeshed/2.7/requests/utils.pyi diff --git a/typeshed/2.7/argparse.pyi b/typeshed/2.7/argparse.pyi deleted file mode 100644 index 13a7b15e5..000000000 --- a/typeshed/2.7/argparse.pyi +++ /dev/null @@ -1,176 +0,0 @@ -# Stubs for argparse (Python 2) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any, Callable, Dict, List, IO, Iterable, Sequence, Union - -SUPPRESS = ... # type: Any -OPTIONAL = ... # type: Any -ZERO_OR_MORE = ... # type: Any -ONE_OR_MORE = ... # type: Any -PARSER = ... # type: Any -REMAINDER = ... # type: Any - -class _AttributeHolder: ... - -class HelpFormatter: - def __init__(self, prog, indent_increment=..., max_help_position=..., width=...) -> None: ... - class _Section: - formatter = ... # type: Any - parent = ... # type: Any - heading = ... # type: Any - items = ... # type: Any - def __init__(self, formatter, parent, heading=...) -> None: ... - def format_help(self): ... - def start_section(self, heading): ... - def end_section(self): ... - def add_text(self, text): ... - def add_usage(self, usage, actions, groups, prefix=...): ... - def add_argument(self, action): ... - def add_arguments(self, actions): ... - def format_help(self): ... - -class RawDescriptionHelpFormatter(HelpFormatter): ... -class RawTextHelpFormatter(RawDescriptionHelpFormatter): ... -class ArgumentDefaultsHelpFormatter(HelpFormatter): ... - -class ArgumentError(Exception): - argument_name = ... # type: Any - message = ... # type: Any - def __init__(self, argument, message) -> None: ... - -class ArgumentTypeError(Exception): ... - -class Action(_AttributeHolder): - option_strings = ... # type: Any - dest = ... # type: Any - nargs = ... # type: Any - const = ... # type: Any - default = ... # type: Any - type = ... # type: Any - choices = ... # type: Any - required = ... # type: Any - help = ... # type: Any - metavar = ... # type: Any - def __init__(self, option_strings: List[str], dest=str, nargs: Union[int, str]=..., const: Any =..., default: Any =..., type: Callable[[str], Any] =..., choices: Iterable[Any] =..., required: bool=..., help: str=..., metavar: str =...) -> None: ... - def __call__(self, parser, namespace, values, option_string=...): ... - -class _StoreAction(Action): - def __init__(self, option_strings, dest, nargs=..., const=..., default=..., type=..., - choices=..., required=..., help=..., metavar=...): ... - def __call__(self, parser, namespace, values, option_string=...): ... - -class _StoreConstAction(Action): - def __init__(self, option_strings, dest, const, default=..., required=..., help=..., - metavar=...): ... - def __call__(self, parser, namespace, values, option_string=...): ... - -class _StoreTrueAction(_StoreConstAction): - def __init__(self, option_strings, dest, default=..., required=..., help=...) -> None: ... - -class _StoreFalseAction(_StoreConstAction): - def __init__(self, option_strings, dest, default=..., required=..., help=...) -> None: ... - -class _AppendAction(Action): - def __init__(self, option_strings, dest, nargs=..., const=..., default=..., type=..., - choices=..., required=..., help=..., metavar=...): ... - def __call__(self, parser, namespace, values, option_string=...): ... - -class _AppendConstAction(Action): - def __init__(self, option_strings, dest, const, default=..., required=..., help=..., - metavar=...): ... - def __call__(self, parser, namespace, values, option_string=...): ... - -class _CountAction(Action): - def __init__(self, option_strings, dest, default=..., required=..., help=...) -> None: ... - def __call__(self, parser, namespace, values, option_string=...): ... - -class _HelpAction(Action): - def __init__(self, option_strings, dest=..., default=..., help=...) -> None: ... - def __call__(self, parser, namespace, values, option_string=...): ... - -class _VersionAction(Action): - version = ... # type: Any - def __init__(self, option_strings, version=..., dest=..., default=..., help=...) -> None: ... - def __call__(self, parser, namespace, values, option_string=...): ... - -class _SubParsersAction(Action): - class _ChoicesPseudoAction(Action): - def __init__(self, name, help) -> None: ... - def __init__(self, option_strings, prog, parser_class, dest=..., help=..., metavar=...) -> None: ... - def add_parser(self, name, **kwargs): ... - def __call__(self, parser, namespace, values, option_string=...): ... - -class FileType: - def __init__(self, mode=..., bufsize=...) -> None: ... - def __call__(self, string): ... - -class Namespace(_AttributeHolder): - def __init__(self, **kwargs) -> None: ... - __hash__ = ... # type: Any - def __eq__(self, other): ... - def __ne__(self, other): ... - def __contains__(self, key): ... - def __getattr__(self, name: str) -> Any: ... - -class _ActionsContainer: - description = ... # type: Any - argument_default = ... # type: Any - prefix_chars = ... # type: Any - conflict_handler = ... # type: Any - def __init__(self, description, prefix_chars, argument_default, conflict_handler) -> None: ... - def register(self, registry_name, value, object): ... - def set_defaults(self, **kwargs): ... - def get_default(self, dest): ... - def add_argument(self, - *args: Union[str, unicode], - action: Union[str, Action] = ..., - nargs: str = ..., - const: Any = ..., - default: Any = ..., - type: Any = ..., - choices: Any = ..., # TODO: Container? - required: bool = ..., - help: str = ..., - metavar: str = ..., - dest: str = ..., - version: str = ... - ) -> None: ... - def add_argument_group(self, *args, **kwargs): ... - def add_mutually_exclusive_group(self, **kwargs) -> _MutuallyExclusiveGroup: ... - -class _ArgumentGroup(_ActionsContainer): - title = ... # type: Any - def __init__(self, container, title=..., description=..., **kwargs) -> None: ... - -class _MutuallyExclusiveGroup(_ArgumentGroup): - required = ... # type: Any - def __init__(self, container, required=...) -> None: ... - -class ArgumentParser(_AttributeHolder, _ActionsContainer): - prog = ... # type: Any - usage = ... # type: Any - epilog = ... # type: Any - version = ... # type: Any - formatter_class = ... # type: Any - fromfile_prefix_chars = ... # type: Any - add_help = ... # type: Any - def __init__(self, prog: str=..., usage: str=..., description: str=..., - epilog: str=..., version: None=..., - parents: Iterable[ArgumentParser]=..., - formatter_class: HelpFormatter=..., prefix_chars: str=..., - fromfile_prefix_chars: str=..., - argument_default: str=..., conflict_handler: str=..., - add_help: bool=...) -> None: ... - def add_subparsers(self, **kwargs): ... - def parse_args(self, args: Sequence[str] = ..., namespace=...): ... - def parse_known_args(self, args=..., namespace=...): ... - def convert_arg_line_to_args(self, arg_line): ... - def format_usage(self): ... - def format_help(self): ... - def format_version(self): ... - def print_usage(self, file=...): ... - def print_help(self, file: IO[Any] = None) -> None: ... - def print_version(self, file=...): ... - def exit(self, status=..., message=...): ... - def error(self, message): ... diff --git a/typeshed/2.7/pprint.pyi b/typeshed/2.7/pprint.pyi deleted file mode 100644 index 1452a69ab..000000000 --- a/typeshed/2.7/pprint.pyi +++ /dev/null @@ -1,21 +0,0 @@ -# Stubs for pprint (Python 2) -# -# NOTE: Based on a dynamically typed automatically generated by stubgen. - -from typing import IO, Any - -def pprint(object: Any, stream: IO[Any] = ..., indent: int = ..., width: int = ..., - depth: int = ...) -> None: ... -def pformat(object: Any, indent: int =..., width: int =..., depth: int =...) -> str: ... -def saferepr(object): ... -def isreadable(object): ... -def isrecursive(object): ... - -class PrettyPrinter: - def __init__(self, indent: int = ..., width: int = ..., depth: int = ..., - stream: IO[Any] = ...) -> None: ... - def pprint(self, object: Any) -> str: ... - def pformat(self, object): ... - def isrecursive(self, object): ... - def isreadable(self, object): ... - def format(self, object, context, maxlevels, level): ... diff --git a/typeshed/2.7/re.pyi b/typeshed/2.7/re.pyi deleted file mode 100644 index 8b88a7822..000000000 --- a/typeshed/2.7/re.pyi +++ /dev/null @@ -1,99 +0,0 @@ -# Stubs for re -# Ron Murawski -# 'bytes' support added by Jukka Lehtosalo - -# based on: http://docs.python.org/2.7/library/re.html - -from typing import ( - List, Iterator, overload, Callable, Tuple, Sequence, Dict, - Generic, AnyStr, Match, Pattern, Any -) - -# ----- re variables and constants ----- -DEBUG = 0 -I = 0 -IGNORECASE = 0 -L = 0 -LOCALE = 0 -M = 0 -MULTILINE = 0 -S = 0 -DOTALL = 0 -X = 0 -VERBOSE = 0 -U = 0 -UNICODE = 0 -T = 0 -TEMPLATE = 0 - -class error(Exception): ... - -@overload -def compile(pattern: AnyStr, flags: int = ...) -> Pattern[AnyStr]: ... -@overload -def compile(pattern: Pattern[AnyStr], flags: int = ...) -> Pattern[AnyStr]: ... - -@overload -def search(pattern: AnyStr, string: AnyStr, flags: int = ...) -> Match[AnyStr]: ... -@overload -def search(pattern: Pattern[AnyStr], string: AnyStr, flags: int = ...) -> Match[AnyStr]: ... - -@overload -def match(pattern: AnyStr, string: AnyStr, flags: int = ...) -> Match[AnyStr]: ... -@overload -def match(pattern: Pattern[AnyStr], string: AnyStr, flags: int = ...) -> Match[AnyStr]: ... - -@overload -def split(pattern: AnyStr, string: AnyStr, - maxsplit: int = ..., flags: int = ...) -> List[AnyStr]: ... -@overload -def split(pattern: Pattern[AnyStr], string: AnyStr, - maxsplit: int = ..., flags: int = ...) -> List[AnyStr]: ... - -@overload -def findall(pattern: AnyStr, string: AnyStr, flags: int = ...) -> List[Any]: ... -@overload -def findall(pattern: Pattern[AnyStr], string: AnyStr, flags: int = ...) -> List[Any]: ... - -# Return an iterator yielding match objects over all non-overlapping matches -# for the RE pattern in string. The string is scanned left-to-right, and -# matches are returned in the order found. Empty matches are included in the -# result unless they touch the beginning of another match. -@overload -def finditer(pattern: AnyStr, string: AnyStr, - flags: int = ...) -> Iterator[Match[AnyStr]]: ... -@overload -def finditer(pattern: Pattern[AnyStr], string: AnyStr, - flags: int = ...) -> Iterator[Match[AnyStr]]: ... - -@overload -def sub(pattern: AnyStr, repl: AnyStr, string: AnyStr, count: int = ..., - flags: int = ...) -> AnyStr: ... -@overload -def sub(pattern: AnyStr, repl: Callable[[Match[AnyStr]], AnyStr], - string: AnyStr, count: int = ..., flags: int = ...) -> AnyStr: ... -@overload -def sub(pattern: Pattern[AnyStr], repl: AnyStr, string: AnyStr, count: int = ..., - flags: int = ...) -> AnyStr: ... -@overload -def sub(pattern: Pattern[AnyStr], repl: Callable[[Match[AnyStr]], AnyStr], - string: AnyStr, count: int = ..., flags: int = ...) -> AnyStr: ... - -@overload -def subn(pattern: AnyStr, repl: AnyStr, string: AnyStr, count: int = ..., - flags: int = ...) -> Tuple[AnyStr, int]: ... -@overload -def subn(pattern: AnyStr, repl: Callable[[Match[AnyStr]], AnyStr], - string: AnyStr, count: int = ..., - flags: int = ...) -> Tuple[AnyStr, int]: ... -@overload -def subn(pattern: Pattern[AnyStr], repl: AnyStr, string: AnyStr, count: int = ..., - flags: int = ...) -> Tuple[AnyStr, int]: ... -@overload -def subn(pattern: Pattern[AnyStr], repl: Callable[[Match[AnyStr]], AnyStr], - string: AnyStr, count: int = ..., - flags: int = ...) -> Tuple[AnyStr, int]: ... - -def escape(string: AnyStr) -> AnyStr: ... - -def purge() -> None: ... diff --git a/typeshed/2.7/requests/__init__.pyi b/typeshed/2.7/requests/__init__.pyi deleted file mode 100644 index 6ea56efcc..000000000 --- a/typeshed/2.7/requests/__init__.pyi +++ /dev/null @@ -1,38 +0,0 @@ -# Stubs for requests (based on version 2.6.0, Python 3) - -from typing import Any -from requests import models -from requests import api -from requests import sessions -from requests import status_codes -from requests import exceptions -import logging - -__title__ = ... # type: Any -__build__ = ... # type: Any -__license__ = ... # type: Any -__copyright__ = ... # type: Any - -Request = models.Request -Response = models.Response -PreparedRequest = models.PreparedRequest -request = api.request -get = api.get -head = api.head -post = api.post -patch = api.patch -put = api.put -delete = api.delete -options = api.options -session = sessions.session -Session = sessions.Session -codes = status_codes.codes -RequestException = exceptions.RequestException -Timeout = exceptions.Timeout -URLRequired = exceptions.URLRequired -TooManyRedirects = exceptions.TooManyRedirects -HTTPError = exceptions.HTTPError -ConnectionError = exceptions.ConnectionError - -class NullHandler(logging.Handler): - def emit(self, record): ... diff --git a/typeshed/2.7/requests/adapters.pyi b/typeshed/2.7/requests/adapters.pyi deleted file mode 100644 index 109dc9a3e..000000000 --- a/typeshed/2.7/requests/adapters.pyi +++ /dev/null @@ -1,69 +0,0 @@ -# Stubs for requests.adapters (Python 3) - -from typing import Any -from . import models -from .packages.urllib3 import poolmanager -from .packages.urllib3 import response -from .packages.urllib3.util import retry -from . import compat -from . import utils -from . import structures -from .packages.urllib3 import exceptions as urllib3_exceptions -from . import cookies -from . import exceptions -from . import auth - -Response = models.Response -PoolManager = poolmanager.PoolManager -proxy_from_url = poolmanager.proxy_from_url -HTTPResponse = response.HTTPResponse -Retry = retry.Retry -DEFAULT_CA_BUNDLE_PATH = utils.DEFAULT_CA_BUNDLE_PATH -get_encoding_from_headers = utils.get_encoding_from_headers -prepend_scheme_if_needed = utils.prepend_scheme_if_needed -get_auth_from_url = utils.get_auth_from_url -urldefragauth = utils.urldefragauth -CaseInsensitiveDict = structures.CaseInsensitiveDict -ConnectTimeoutError = urllib3_exceptions.ConnectTimeoutError -MaxRetryError = urllib3_exceptions.MaxRetryError -ProtocolError = urllib3_exceptions.ProtocolError -ReadTimeoutError = urllib3_exceptions.ReadTimeoutError -ResponseError = urllib3_exceptions.ResponseError -extract_cookies_to_jar = cookies.extract_cookies_to_jar -ConnectionError = exceptions.ConnectionError -ConnectTimeout = exceptions.ConnectTimeout -ReadTimeout = exceptions.ReadTimeout -SSLError = exceptions.SSLError -ProxyError = exceptions.ProxyError -RetryError = exceptions.RetryError - -DEFAULT_POOLBLOCK = ... # type: Any -DEFAULT_POOLSIZE = ... # type: Any -DEFAULT_RETRIES = ... # type: Any - -class BaseAdapter: - def __init__(self) -> None: ... - # TODO: "request" parameter not actually supported, added to please mypy. - def send(self, request=...): ... - def close(self): ... - -class HTTPAdapter(BaseAdapter): - __attrs__ = ... # type: Any - max_retries = ... # type: Any - config = ... # type: Any - proxy_manager = ... # type: Any - def __init__(self, pool_connections=..., pool_maxsize=..., max_retries=..., - pool_block=...): ... - poolmanager = ... # type: Any - def init_poolmanager(self, connections, maxsize, block=..., **pool_kwargs): ... - def proxy_manager_for(self, proxy, **proxy_kwargs): ... - def cert_verify(self, conn, url, verify, cert): ... - def build_response(self, req, resp): ... - def get_connection(self, url, proxies=...): ... - def close(self): ... - def request_url(self, request, proxies): ... - def add_headers(self, request, **kwargs): ... - def proxy_headers(self, proxy): ... - # TODO: "request" is not actually optional, modified to please mypy. - def send(self, request=..., stream=..., timeout=..., verify=..., cert=..., - proxies=...): ... diff --git a/typeshed/2.7/requests/api.pyi b/typeshed/2.7/requests/api.pyi deleted file mode 100644 index 44853f72b..000000000 --- a/typeshed/2.7/requests/api.pyi +++ /dev/null @@ -1,14 +0,0 @@ -# Stubs for requests.api (Python 3) - -from typing import Union - -from .models import Response - -def request(method: str, url: str, **kwargs) -> Response: ... -def get(url: Union[str, unicode], **kwargs) -> Response: ... -def options(url: Union[str, unicode], **kwargs) -> Response: ... -def head(url: Union[str, unicode], **kwargs) -> Response: ... -def post(url: Union[str, unicode], data=..., json=..., **kwargs) -> Response: ... -def put(url: Union[str, unicode], data=..., **kwargs) -> Response: ... -def patch(url: Union[str, unicode], data=..., **kwargs) -> Response: ... -def delete(url: Union[str, unicode], **kwargs) -> Response: ... diff --git a/typeshed/2.7/requests/auth.pyi b/typeshed/2.7/requests/auth.pyi deleted file mode 100644 index 8eea2b0e0..000000000 --- a/typeshed/2.7/requests/auth.pyi +++ /dev/null @@ -1,41 +0,0 @@ -# Stubs for requests.auth (Python 3) - -from typing import Any -from . import compat -from . import cookies -from . import utils -from . import status_codes - -extract_cookies_to_jar = cookies.extract_cookies_to_jar -parse_dict_header = utils.parse_dict_header -to_native_string = utils.to_native_string -codes = status_codes.codes - -CONTENT_TYPE_FORM_URLENCODED = ... # type: Any -CONTENT_TYPE_MULTI_PART = ... # type: Any - -class AuthBase: - def __call__(self, r): ... - -class HTTPBasicAuth(AuthBase): - username = ... # type: Any - password = ... # type: Any - def __init__(self, username, password) -> None: ... - def __call__(self, r): ... - -class HTTPProxyAuth(HTTPBasicAuth): - def __call__(self, r): ... - -class HTTPDigestAuth(AuthBase): - username = ... # type: Any - password = ... # type: Any - last_nonce = ... # type: Any - nonce_count = ... # type: Any - chal = ... # type: Any - pos = ... # type: Any - num_401_calls = ... # type: Any - def __init__(self, username, password) -> None: ... - def build_digest_header(self, method, url): ... - def handle_redirect(self, r, **kwargs): ... - def handle_401(self, r, **kwargs): ... - def __call__(self, r): ... diff --git a/typeshed/2.7/requests/compat.pyi b/typeshed/2.7/requests/compat.pyi deleted file mode 100644 index 63b92f6fe..000000000 --- a/typeshed/2.7/requests/compat.pyi +++ /dev/null @@ -1,6 +0,0 @@ -# Stubs for requests.compat (Python 3.4) - -from typing import Any -import collections - -OrderedDict = collections.OrderedDict diff --git a/typeshed/2.7/requests/cookies.pyi b/typeshed/2.7/requests/cookies.pyi deleted file mode 100644 index 6f56c82fb..000000000 --- a/typeshed/2.7/requests/cookies.pyi +++ /dev/null @@ -1,61 +0,0 @@ -# Stubs for requests.cookies (Python 3) - -from typing import Any, MutableMapping -import collections -from . import compat - -class MockRequest: - type = ... # type: Any - def __init__(self, request) -> None: ... - def get_type(self): ... - def get_host(self): ... - def get_origin_req_host(self): ... - def get_full_url(self): ... - def is_unverifiable(self): ... - def has_header(self, name): ... - def get_header(self, name, default=...): ... - def add_header(self, key, val): ... - def add_unredirected_header(self, name, value): ... - def get_new_headers(self): ... - @property - def unverifiable(self): ... - @property - def origin_req_host(self): ... - @property - def host(self): ... - -class MockResponse: - def __init__(self, headers) -> None: ... - def info(self): ... - def getheaders(self, name): ... - -def extract_cookies_to_jar(jar, request, response): ... -def get_cookie_header(jar, request): ... -def remove_cookie_by_name(cookiejar, name, domain=..., path=...): ... - -class CookieConflictError(RuntimeError): ... - -class RequestsCookieJar(MutableMapping): - def get(self, name, default=..., domain=..., path=...): ... - def set(self, name, value, **kwargs): ... - def iterkeys(self): ... - def keys(self): ... - def itervalues(self): ... - def values(self): ... - def iteritems(self): ... - def items(self): ... - def list_domains(self): ... - def list_paths(self): ... - def multiple_domains(self): ... - def get_dict(self, domain=..., path=...): ... - def __getitem__(self, name): ... - def __setitem__(self, name, value): ... - def __delitem__(self, name): ... - def set_cookie(self, cookie, *args, **kwargs): ... - def update(self, other): ... - def copy(self): ... - -def create_cookie(name, value, **kwargs): ... -def morsel_to_cookie(morsel): ... -def cookiejar_from_dict(cookie_dict, cookiejar=..., overwrite=...): ... -def merge_cookies(cookiejar, cookies): ... diff --git a/typeshed/2.7/requests/exceptions.pyi b/typeshed/2.7/requests/exceptions.pyi deleted file mode 100644 index ff0c32883..000000000 --- a/typeshed/2.7/requests/exceptions.pyi +++ /dev/null @@ -1,26 +0,0 @@ -# Stubs for requests.exceptions (Python 3) - -from typing import Any -from .packages.urllib3.exceptions import HTTPError as BaseHTTPError - -class RequestException(IOError): - response = ... # type: Any - request = ... # type: Any - def __init__(self, *args, **kwargs) -> None: ... - -class HTTPError(RequestException): ... -class ConnectionError(RequestException): ... -class ProxyError(ConnectionError): ... -class SSLError(ConnectionError): ... -class Timeout(RequestException): ... -class ConnectTimeout(ConnectionError, Timeout): ... -class ReadTimeout(Timeout): ... -class URLRequired(RequestException): ... -class TooManyRedirects(RequestException): ... -class MissingSchema(RequestException, ValueError): ... -class InvalidSchema(RequestException, ValueError): ... -class InvalidURL(RequestException, ValueError): ... -class ChunkedEncodingError(RequestException): ... -class ContentDecodingError(RequestException, BaseHTTPError): ... -class StreamConsumedError(RequestException, TypeError): ... -class RetryError(RequestException): ... diff --git a/typeshed/2.7/requests/hooks.pyi b/typeshed/2.7/requests/hooks.pyi deleted file mode 100644 index 3367d9a48..000000000 --- a/typeshed/2.7/requests/hooks.pyi +++ /dev/null @@ -1,8 +0,0 @@ -# Stubs for requests.hooks (Python 3) - -from typing import Any - -HOOKS = ... # type: Any - -def default_hooks(): ... -def dispatch_hook(key, hooks, hook_data, **kwargs): ... diff --git a/typeshed/2.7/requests/models.pyi b/typeshed/2.7/requests/models.pyi deleted file mode 100644 index d400d4a06..000000000 --- a/typeshed/2.7/requests/models.pyi +++ /dev/null @@ -1,133 +0,0 @@ -# Stubs for requests.models (Python 3) - -from typing import Any, List, MutableMapping, Iterator, Dict -import datetime - -from . import hooks -from . import structures -from . import auth -from . import cookies -from .cookies import RequestsCookieJar -from .packages.urllib3 import fields -from .packages.urllib3 import filepost -from .packages.urllib3 import util -from .packages.urllib3 import exceptions as urllib3_exceptions -from . import exceptions -from . import utils -from . import compat -from . import status_codes - -default_hooks = hooks.default_hooks -CaseInsensitiveDict = structures.CaseInsensitiveDict -HTTPBasicAuth = auth.HTTPBasicAuth -cookiejar_from_dict = cookies.cookiejar_from_dict -get_cookie_header = cookies.get_cookie_header -RequestField = fields.RequestField -encode_multipart_formdata = filepost.encode_multipart_formdata -DecodeError = urllib3_exceptions.DecodeError -ReadTimeoutError = urllib3_exceptions.ReadTimeoutError -ProtocolError = urllib3_exceptions.ProtocolError -LocationParseError = urllib3_exceptions.LocationParseError -HTTPError = exceptions.HTTPError -MissingSchema = exceptions.MissingSchema -InvalidURL = exceptions.InvalidURL -ChunkedEncodingError = exceptions.ChunkedEncodingError -ContentDecodingError = exceptions.ContentDecodingError -ConnectionError = exceptions.ConnectionError -StreamConsumedError = exceptions.StreamConsumedError -guess_filename = utils.guess_filename -get_auth_from_url = utils.get_auth_from_url -requote_uri = utils.requote_uri -stream_decode_response_unicode = utils.stream_decode_response_unicode -to_key_val_list = utils.to_key_val_list -parse_header_links = utils.parse_header_links -iter_slices = utils.iter_slices -guess_json_utf = utils.guess_json_utf -super_len = utils.super_len -to_native_string = utils.to_native_string -codes = status_codes.codes - -REDIRECT_STATI = ... # type: Any -DEFAULT_REDIRECT_LIMIT = ... # type: Any -CONTENT_CHUNK_SIZE = ... # type: Any -ITER_CHUNK_SIZE = ... # type: Any -json_dumps = ... # type: Any - -class RequestEncodingMixin: - @property - def path_url(self): ... - -class RequestHooksMixin: - def register_hook(self, event, hook): ... - def deregister_hook(self, event, hook): ... - -class Request(RequestHooksMixin): - hooks = ... # type: Any - method = ... # type: Any - url = ... # type: Any - headers = ... # type: Any - files = ... # type: Any - data = ... # type: Any - json = ... # type: Any - params = ... # type: Any - auth = ... # type: Any - cookies = ... # type: Any - def __init__(self, method=..., url=..., headers=..., files=..., data=..., params=..., - auth=..., cookies=..., hooks=..., json=...): ... - def prepare(self): ... - -class PreparedRequest(RequestEncodingMixin, RequestHooksMixin): - method = ... # type: Any - url = ... # type: Any - headers = ... # type: Any - body = ... # type: Any - hooks = ... # type: Any - def __init__(self) -> None: ... - def prepare(self, method=..., url=..., headers=..., files=..., data=..., params=..., - auth=..., cookies=..., hooks=..., json=...): ... - def copy(self): ... - def prepare_method(self, method): ... - def prepare_url(self, url, params): ... - def prepare_headers(self, headers): ... - def prepare_body(self, data, files, json=...): ... - def prepare_content_length(self, body): ... - def prepare_auth(self, auth, url=...): ... - def prepare_cookies(self, cookies): ... - def prepare_hooks(self, hooks): ... - -class Response: - __attrs__ = ... # type: Any - status_code = ... # type: int - headers = ... # type: MutableMapping[str, str] - raw = ... # type: Any - url = ... # type: str - encoding = ... # type: str - history = ... # type: List[Response] - reason = ... # type: str - cookies = ... # type: RequestsCookieJar - elapsed = ... # type: datetime.timedelta - request = ... # type: PreparedRequest - def __init__(self) -> None: ... - def __bool__(self) -> bool: ... - def __nonzero__(self) -> bool: ... - def __iter__(self) -> Iterator[str]: ... - @property - def ok(self) -> bool: ... - @property - def is_redirect(self) -> bool: ... - @property - def is_permanent_redirect(self) -> bool: ... - @property - def apparent_encoding(self) -> str: ... - def iter_content(self, chunk_size: int = ..., - decode_unicode: bool = ...) -> Iterator[Any]: ... - def iter_lines(self, chunk_size=..., decode_unicode=..., delimiter=...): ... - @property - def content(self) -> str: ... - @property - def text(self) -> str: ... - def json(self, **kwargs) -> Any: ... - @property - def links(self) -> Dict[Any, Any]: ... - def raise_for_status(self) -> None: ... - def close(self) -> None: ... diff --git a/typeshed/2.7/requests/packages/__init__.pyi b/typeshed/2.7/requests/packages/__init__.pyi deleted file mode 100644 index 2b1bff828..000000000 --- a/typeshed/2.7/requests/packages/__init__.pyi +++ /dev/null @@ -1,8 +0,0 @@ -# Stubs for requests.packages (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -class VendorAlias: - def __init__(self, package_names) -> None: ... - def find_module(self, fullname, path=...): ... - def load_module(self, name): ... diff --git a/typeshed/2.7/requests/packages/urllib3/__init__.pyi b/typeshed/2.7/requests/packages/urllib3/__init__.pyi deleted file mode 100644 index 38cf6729c..000000000 --- a/typeshed/2.7/requests/packages/urllib3/__init__.pyi +++ /dev/null @@ -1,12 +0,0 @@ -# Stubs for requests.packages.urllib3 (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any -import logging - -class NullHandler(logging.Handler): - def emit(self, record): ... - -def add_stderr_logger(level=...): ... -def disable_warnings(category=...): ... diff --git a/typeshed/2.7/requests/packages/urllib3/_collections.pyi b/typeshed/2.7/requests/packages/urllib3/_collections.pyi deleted file mode 100644 index 58aa94422..000000000 --- a/typeshed/2.7/requests/packages/urllib3/_collections.pyi +++ /dev/null @@ -1,51 +0,0 @@ -# Stubs for requests.packages.urllib3._collections (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any -from collections import MutableMapping - -class RLock: - def __enter__(self): ... - def __exit__(self, exc_type, exc_value, traceback): ... - -class RecentlyUsedContainer(MutableMapping): - ContainerCls = ... # type: Any - dispose_func = ... # type: Any - lock = ... # type: Any - def __init__(self, maxsize=..., dispose_func=...) -> None: ... - def __getitem__(self, key): ... - def __setitem__(self, key, value): ... - def __delitem__(self, key): ... - def __len__(self): ... - def __iter__(self): ... - def clear(self): ... - def keys(self): ... - -class HTTPHeaderDict(dict): - def __init__(self, headers=..., **kwargs) -> None: ... - def __setitem__(self, key, val): ... - def __getitem__(self, key): ... - def __delitem__(self, key): ... - def __contains__(self, key): ... - def __eq__(self, other): ... - def __ne__(self, other): ... - values = ... # type: Any - get = ... # type: Any - update = ... # type: Any - iterkeys = ... # type: Any - itervalues = ... # type: Any - def pop(self, key, default=...): ... - def discard(self, key): ... - def add(self, key, val): ... - def extend(*args, **kwargs): ... - def getlist(self, key): ... - getheaders = ... # type: Any - getallmatchingheaders = ... # type: Any - iget = ... # type: Any - def copy(self): ... - def iteritems(self): ... - def itermerged(self): ... - def items(self): ... - @classmethod - def from_httplib(cls, message, duplicates=...): ... diff --git a/typeshed/2.7/requests/packages/urllib3/connection.pyi b/typeshed/2.7/requests/packages/urllib3/connection.pyi deleted file mode 100644 index 289fd1836..000000000 --- a/typeshed/2.7/requests/packages/urllib3/connection.pyi +++ /dev/null @@ -1,51 +0,0 @@ -# Stubs for requests.packages.urllib3.connection (Python 3.4) - -from typing import Any -from httplib import HTTPException -from . import packages -from . import exceptions -from . import util - -class DummyConnection: ... - -ConnectTimeoutError = exceptions.ConnectTimeoutError -SystemTimeWarning = exceptions.SystemTimeWarning -SecurityWarning = exceptions.SecurityWarning - -port_by_scheme = ... # type: Any -RECENT_DATE = ... # type: Any - -class HTTPConnection(object): - default_port = ... # type: Any - default_socket_options = ... # type: Any - is_verified = ... # type: Any - source_address = ... # type: Any - socket_options = ... # type: Any - def __init__(self, *args, **kw) -> None: ... - def connect(self): ... - -class HTTPSConnection(HTTPConnection): - default_port = ... # type: Any - key_file = ... # type: Any - cert_file = ... # type: Any - def __init__(self, host, port=..., key_file=..., cert_file=..., strict=..., timeout=..., **kw) -> None: ... - sock = ... # type: Any - def connect(self): ... - -class VerifiedHTTPSConnection(HTTPSConnection): - cert_reqs = ... # type: Any - ca_certs = ... # type: Any - ssl_version = ... # type: Any - assert_fingerprint = ... # type: Any - key_file = ... # type: Any - cert_file = ... # type: Any - assert_hostname = ... # type: Any - def set_cert(self, key_file=..., cert_file=..., cert_reqs=..., ca_certs=..., assert_hostname=..., assert_fingerprint=...): ... - sock = ... # type: Any - auto_open = ... # type: Any - is_verified = ... # type: Any - def connect(self): ... - -UnverifiedHTTPSConnection = ... # type: Any - -class ConnectionError(Exception): pass diff --git a/typeshed/2.7/requests/packages/urllib3/connectionpool.pyi b/typeshed/2.7/requests/packages/urllib3/connectionpool.pyi deleted file mode 100644 index 03c3140f2..000000000 --- a/typeshed/2.7/requests/packages/urllib3/connectionpool.pyi +++ /dev/null @@ -1,87 +0,0 @@ -# Stubs for requests.packages.urllib3.connectionpool (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any -from ssl import SSLError as BaseSSLError -from . import exceptions -from .packages import ssl_match_hostname -from . import packages -from . import connection -from . import request -from . import response -from .util import connection as _connection -from .util import retry -from .util import timeout -from .util import url - -ClosedPoolError = exceptions.ClosedPoolError -ProtocolError = exceptions.ProtocolError -EmptyPoolError = exceptions.EmptyPoolError -HostChangedError = exceptions.HostChangedError -LocationValueError = exceptions.LocationValueError -MaxRetryError = exceptions.MaxRetryError -ProxyError = exceptions.ProxyError -ReadTimeoutError = exceptions.ReadTimeoutError -SSLError = exceptions.SSLError -TimeoutError = exceptions.TimeoutError -InsecureRequestWarning = exceptions.InsecureRequestWarning -CertificateError = ssl_match_hostname.CertificateError -port_by_scheme = connection.port_by_scheme -DummyConnection = connection.DummyConnection -HTTPConnection = connection.HTTPConnection -HTTPSConnection = connection.HTTPSConnection -VerifiedHTTPSConnection = connection.VerifiedHTTPSConnection -HTTPException = connection.HTTPException -ConnectionError = connection.ConnectionError -RequestMethods = request.RequestMethods -HTTPResponse = response.HTTPResponse -is_connection_dropped = _connection.is_connection_dropped -Retry = retry.Retry -Timeout = timeout.Timeout -get_host = url.get_host - -xrange = ... # type: Any -log = ... # type: Any - -class ConnectionPool: - scheme = ... # type: Any - QueueCls = ... # type: Any - host = ... # type: Any - port = ... # type: Any - def __init__(self, host, port=...) -> None: ... - def __enter__(self): ... - def __exit__(self, exc_type, exc_val, exc_tb): ... - def close(self): ... - -class HTTPConnectionPool(ConnectionPool, RequestMethods): - scheme = ... # type: Any - ConnectionCls = ... # type: Any - strict = ... # type: Any - timeout = ... # type: Any - retries = ... # type: Any - pool = ... # type: Any - block = ... # type: Any - proxy = ... # type: Any - proxy_headers = ... # type: Any - num_connections = ... # type: Any - num_requests = ... # type: Any - conn_kw = ... # type: Any - def __init__(self, host, port=..., strict=..., timeout=..., maxsize=..., block=..., headers=..., retries=..., _proxy=..., _proxy_headers=..., **conn_kw) -> None: ... - def close(self): ... - def is_same_host(self, url): ... - def urlopen(self, method, url, body=..., headers=..., retries=..., redirect=..., assert_same_host=..., timeout=..., pool_timeout=..., release_conn=..., **response_kw): ... - -class HTTPSConnectionPool(HTTPConnectionPool): - scheme = ... # type: Any - ConnectionCls = ... # type: Any - key_file = ... # type: Any - cert_file = ... # type: Any - cert_reqs = ... # type: Any - ca_certs = ... # type: Any - ssl_version = ... # type: Any - assert_hostname = ... # type: Any - assert_fingerprint = ... # type: Any - def __init__(self, host, port=..., strict=..., timeout=..., maxsize=..., block=..., headers=..., retries=..., _proxy=..., _proxy_headers=..., key_file=..., cert_file=..., cert_reqs=..., ca_certs=..., ssl_version=..., assert_hostname=..., assert_fingerprint=..., **conn_kw) -> None: ... - -def connection_from_url(url, **kw): ... diff --git a/typeshed/2.7/requests/packages/urllib3/contrib/__init__.pyi b/typeshed/2.7/requests/packages/urllib3/contrib/__init__.pyi deleted file mode 100644 index 17d26bb13..000000000 --- a/typeshed/2.7/requests/packages/urllib3/contrib/__init__.pyi +++ /dev/null @@ -1,4 +0,0 @@ -# Stubs for requests.packages.urllib3.contrib (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - diff --git a/typeshed/2.7/requests/packages/urllib3/exceptions.pyi b/typeshed/2.7/requests/packages/urllib3/exceptions.pyi deleted file mode 100644 index 3e7d0f6c9..000000000 --- a/typeshed/2.7/requests/packages/urllib3/exceptions.pyi +++ /dev/null @@ -1,54 +0,0 @@ -# Stubs for requests.packages.urllib3.exceptions (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any - -class HTTPError(Exception): ... -class HTTPWarning(Warning): ... - -class PoolError(HTTPError): - pool = ... # type: Any - def __init__(self, pool, message) -> None: ... - def __reduce__(self): ... - -class RequestError(PoolError): - url = ... # type: Any - def __init__(self, pool, url, message) -> None: ... - def __reduce__(self): ... - -class SSLError(HTTPError): ... -class ProxyError(HTTPError): ... -class DecodeError(HTTPError): ... -class ProtocolError(HTTPError): ... - -ConnectionError = ... # type: Any - -class MaxRetryError(RequestError): - reason = ... # type: Any - def __init__(self, pool, url, reason=...) -> None: ... - -class HostChangedError(RequestError): - retries = ... # type: Any - def __init__(self, pool, url, retries=...) -> None: ... - -class TimeoutStateError(HTTPError): ... -class TimeoutError(HTTPError): ... -class ReadTimeoutError(TimeoutError, RequestError): ... -class ConnectTimeoutError(TimeoutError): ... -class EmptyPoolError(PoolError): ... -class ClosedPoolError(PoolError): ... -class LocationValueError(ValueError, HTTPError): ... - -class LocationParseError(LocationValueError): - location = ... # type: Any - def __init__(self, location) -> None: ... - -class ResponseError(HTTPError): - GENERIC_ERROR = ... # type: Any - SPECIFIC_ERROR = ... # type: Any - -class SecurityWarning(HTTPWarning): ... -class InsecureRequestWarning(SecurityWarning): ... -class SystemTimeWarning(SecurityWarning): ... -class InsecurePlatformWarning(SecurityWarning): ... diff --git a/typeshed/2.7/requests/packages/urllib3/fields.pyi b/typeshed/2.7/requests/packages/urllib3/fields.pyi deleted file mode 100644 index cdc7734e2..000000000 --- a/typeshed/2.7/requests/packages/urllib3/fields.pyi +++ /dev/null @@ -1,16 +0,0 @@ -# Stubs for requests.packages.urllib3.fields (Python 3.4) - -from typing import Any -from . import packages - -def guess_content_type(filename, default=...): ... -def format_header_param(name, value): ... - -class RequestField: - data = ... # type: Any - headers = ... # type: Any - def __init__(self, name, data, filename=..., headers=...) -> None: ... - @classmethod - def from_tuples(cls, fieldname, value): ... - def render_headers(self): ... - def make_multipart(self, content_disposition=..., content_type=..., content_location=...): ... diff --git a/typeshed/2.7/requests/packages/urllib3/filepost.pyi b/typeshed/2.7/requests/packages/urllib3/filepost.pyi deleted file mode 100644 index c6fefa618..000000000 --- a/typeshed/2.7/requests/packages/urllib3/filepost.pyi +++ /dev/null @@ -1,19 +0,0 @@ -# Stubs for requests.packages.urllib3.filepost (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any -from . import packages -#from .packages import six -from . import fields - -#six = packages.six -#b = six.b -RequestField = fields.RequestField - -writer = ... # type: Any - -def choose_boundary(): ... -def iter_field_objects(fields): ... -def iter_fields(fields): ... -def encode_multipart_formdata(fields, boundary=...): ... diff --git a/typeshed/2.7/requests/packages/urllib3/packages/__init__.pyi b/typeshed/2.7/requests/packages/urllib3/packages/__init__.pyi deleted file mode 100644 index 231463649..000000000 --- a/typeshed/2.7/requests/packages/urllib3/packages/__init__.pyi +++ /dev/null @@ -1,4 +0,0 @@ -# Stubs for requests.packages.urllib3.packages (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - diff --git a/typeshed/2.7/requests/packages/urllib3/packages/ssl_match_hostname/__init__.pyi b/typeshed/2.7/requests/packages/urllib3/packages/ssl_match_hostname/__init__.pyi deleted file mode 100644 index 05c03dc08..000000000 --- a/typeshed/2.7/requests/packages/urllib3/packages/ssl_match_hostname/__init__.pyi +++ /dev/null @@ -1 +0,0 @@ -class CertificateError(ValueError): pass diff --git a/typeshed/2.7/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.pyi b/typeshed/2.7/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.pyi deleted file mode 100644 index 5abbc9dd5..000000000 --- a/typeshed/2.7/requests/packages/urllib3/packages/ssl_match_hostname/_implementation.pyi +++ /dev/null @@ -1,7 +0,0 @@ -# Stubs for requests.packages.urllib3.packages.ssl_match_hostname._implementation (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -class CertificateError(ValueError): ... - -def match_hostname(cert, hostname): ... diff --git a/typeshed/2.7/requests/packages/urllib3/poolmanager.pyi b/typeshed/2.7/requests/packages/urllib3/poolmanager.pyi deleted file mode 100644 index a65f66497..000000000 --- a/typeshed/2.7/requests/packages/urllib3/poolmanager.pyi +++ /dev/null @@ -1,31 +0,0 @@ -# Stubs for requests.packages.urllib3.poolmanager (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any -from .request import RequestMethods - -class PoolManager(RequestMethods): - proxy = ... # type: Any - connection_pool_kw = ... # type: Any - pools = ... # type: Any - def __init__(self, num_pools=..., headers=..., **connection_pool_kw) -> None: ... - def __enter__(self): ... - def __exit__(self, exc_type, exc_val, exc_tb): ... - def clear(self): ... - def connection_from_host(self, host, port=..., scheme=...): ... - def connection_from_url(self, url): ... - # TODO: This was the original signature -- copied another one from base class to fix complaint. - # def urlopen(self, method, url, redirect=True, **kw): ... - def urlopen(self, method, url, body=..., headers=..., encode_multipart=..., multipart_boundary=..., **kw): ... - -class ProxyManager(PoolManager): - proxy = ... # type: Any - proxy_headers = ... # type: Any - def __init__(self, proxy_url, num_pools=..., headers=..., proxy_headers=..., **connection_pool_kw) -> None: ... - def connection_from_host(self, host, port=..., scheme=...): ... - # TODO: This was the original signature -- copied another one from base class to fix complaint. - # def urlopen(self, method, url, redirect=True, **kw): ... - def urlopen(self, method, url, body=..., headers=..., encode_multipart=..., multipart_boundary=..., **kw): ... - -def proxy_from_url(url, **kw): ... diff --git a/typeshed/2.7/requests/packages/urllib3/request.pyi b/typeshed/2.7/requests/packages/urllib3/request.pyi deleted file mode 100644 index 788c759c5..000000000 --- a/typeshed/2.7/requests/packages/urllib3/request.pyi +++ /dev/null @@ -1,13 +0,0 @@ -# Stubs for requests.packages.urllib3.request (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any - -class RequestMethods: - headers = ... # type: Any - def __init__(self, headers=...) -> None: ... - def urlopen(self, method, url, body=..., headers=..., encode_multipart=..., multipart_boundary=..., **kw): ... - def request(self, method, url, fields=..., headers=..., **urlopen_kw): ... - def request_encode_url(self, method, url, fields=..., **urlopen_kw): ... - def request_encode_body(self, method, url, fields=..., headers=..., encode_multipart=..., multipart_boundary=..., **urlopen_kw): ... diff --git a/typeshed/2.7/requests/packages/urllib3/response.pyi b/typeshed/2.7/requests/packages/urllib3/response.pyi deleted file mode 100644 index c84f7e91f..000000000 --- a/typeshed/2.7/requests/packages/urllib3/response.pyi +++ /dev/null @@ -1,58 +0,0 @@ -# Stubs for requests.packages.urllib3.response (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any, IO -import io -from . import _collections -from . import exceptions -#from .packages import six -from . import connection -from .util import response - -HTTPHeaderDict = _collections.HTTPHeaderDict -ProtocolError = exceptions.ProtocolError -DecodeError = exceptions.DecodeError -ReadTimeoutError = exceptions.ReadTimeoutError -binary_type = str # six.binary_type -PY3 = True # six.PY3 -is_fp_closed = response.is_fp_closed - -class DeflateDecoder: - def __init__(self) -> None: ... - def __getattr__(self, name): ... - def decompress(self, data): ... - -class GzipDecoder: - def __init__(self) -> None: ... - def __getattr__(self, name): ... - def decompress(self, data): ... - -class HTTPResponse(IO[Any]): - CONTENT_DECODERS = ... # type: Any - REDIRECT_STATUSES = ... # type: Any - headers = ... # type: Any - status = ... # type: Any - version = ... # type: Any - reason = ... # type: Any - strict = ... # type: Any - decode_content = ... # type: Any - def __init__(self, body=..., headers=..., status=..., version=..., reason=..., strict=..., preload_content=..., decode_content=..., original_response=..., pool=..., connection=...) -> None: ... - def get_redirect_location(self): ... - def release_conn(self): ... - @property - def data(self): ... - def tell(self): ... - def read(self, amt=..., decode_content=..., cache_content=...): ... - def stream(self, amt=..., decode_content=...): ... - @classmethod - def from_httplib(ResponseCls, r, **response_kw): ... - def getheaders(self): ... - def getheader(self, name, default=...): ... - def close(self): ... - @property - def closed(self): ... - def fileno(self): ... - def flush(self): ... - def readable(self): ... - def readinto(self, b): ... diff --git a/typeshed/2.7/requests/packages/urllib3/util/__init__.pyi b/typeshed/2.7/requests/packages/urllib3/util/__init__.pyi deleted file mode 100644 index eca2ea93d..000000000 --- a/typeshed/2.7/requests/packages/urllib3/util/__init__.pyi +++ /dev/null @@ -1,7 +0,0 @@ -# Stubs for requests.packages.urllib3.util (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from . import connection -from . import request - diff --git a/typeshed/2.7/requests/packages/urllib3/util/connection.pyi b/typeshed/2.7/requests/packages/urllib3/util/connection.pyi deleted file mode 100644 index cd673098c..000000000 --- a/typeshed/2.7/requests/packages/urllib3/util/connection.pyi +++ /dev/null @@ -1,11 +0,0 @@ -# Stubs for requests.packages.urllib3.util.connection (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any - -poll = ... # type: Any -select = ... # type: Any - -def is_connection_dropped(conn): ... -def create_connection(address, timeout=..., source_address=..., socket_options=...): ... diff --git a/typeshed/2.7/requests/packages/urllib3/util/request.pyi b/typeshed/2.7/requests/packages/urllib3/util/request.pyi deleted file mode 100644 index 20a6ea277..000000000 --- a/typeshed/2.7/requests/packages/urllib3/util/request.pyi +++ /dev/null @@ -1,12 +0,0 @@ -# Stubs for requests.packages.urllib3.util.request (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any -#from ..packages import six - -#b = six.b - -ACCEPT_ENCODING = ... # type: Any - -def make_headers(keep_alive=..., accept_encoding=..., user_agent=..., basic_auth=..., proxy_basic_auth=..., disable_cache=...): ... diff --git a/typeshed/2.7/requests/packages/urllib3/util/response.pyi b/typeshed/2.7/requests/packages/urllib3/util/response.pyi deleted file mode 100644 index 761a00679..000000000 --- a/typeshed/2.7/requests/packages/urllib3/util/response.pyi +++ /dev/null @@ -1,5 +0,0 @@ -# Stubs for requests.packages.urllib3.util.response (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -def is_fp_closed(obj): ... diff --git a/typeshed/2.7/requests/packages/urllib3/util/retry.pyi b/typeshed/2.7/requests/packages/urllib3/util/retry.pyi deleted file mode 100644 index e958d9061..000000000 --- a/typeshed/2.7/requests/packages/urllib3/util/retry.pyi +++ /dev/null @@ -1,36 +0,0 @@ -# Stubs for requests.packages.urllib3.util.retry (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any -from .. import exceptions -from .. import packages - -ConnectTimeoutError = exceptions.ConnectTimeoutError -MaxRetryError = exceptions.MaxRetryError -ProtocolError = exceptions.ProtocolError -ReadTimeoutError = exceptions.ReadTimeoutError -ResponseError = exceptions.ResponseError - -log = ... # type: Any - -class Retry: - DEFAULT_METHOD_WHITELIST = ... # type: Any - BACKOFF_MAX = ... # type: Any - total = ... # type: Any - connect = ... # type: Any - read = ... # type: Any - redirect = ... # type: Any - status_forcelist = ... # type: Any - method_whitelist = ... # type: Any - backoff_factor = ... # type: Any - raise_on_redirect = ... # type: Any - def __init__(self, total=..., connect=..., read=..., redirect=..., method_whitelist=..., status_forcelist=..., backoff_factor=..., raise_on_redirect=..., _observed_errors=...) -> None: ... - def new(self, **kw): ... - @classmethod - def from_int(cls, retries, redirect=..., default=...): ... - def get_backoff_time(self): ... - def sleep(self): ... - def is_forced_retry(self, method, status_code): ... - def is_exhausted(self): ... - def increment(self, method=..., url=..., response=..., error=..., _pool=..., _stacktrace=...): ... diff --git a/typeshed/2.7/requests/packages/urllib3/util/timeout.pyi b/typeshed/2.7/requests/packages/urllib3/util/timeout.pyi deleted file mode 100644 index 0a7653c5f..000000000 --- a/typeshed/2.7/requests/packages/urllib3/util/timeout.pyi +++ /dev/null @@ -1,24 +0,0 @@ -# Stubs for requests.packages.urllib3.util.timeout (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any -from .. import exceptions - -TimeoutStateError = exceptions.TimeoutStateError - -def current_time(): ... - -class Timeout: - DEFAULT_TIMEOUT = ... # type: Any - total = ... # type: Any - def __init__(self, total=..., connect=..., read=...) -> None: ... - @classmethod - def from_float(cls, timeout): ... - def clone(self): ... - def start_connect(self): ... - def get_connect_duration(self): ... - @property - def connect_timeout(self): ... - @property - def read_timeout(self): ... diff --git a/typeshed/2.7/requests/packages/urllib3/util/url.pyi b/typeshed/2.7/requests/packages/urllib3/util/url.pyi deleted file mode 100644 index 9877b4a17..000000000 --- a/typeshed/2.7/requests/packages/urllib3/util/url.pyi +++ /dev/null @@ -1,26 +0,0 @@ -# Stubs for requests.packages.urllib3.util.url (Python 3.4) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any -from .. import exceptions - -LocationParseError = exceptions.LocationParseError - -url_attrs = ... # type: Any - -class Url: - slots = ... # type: Any - def __new__(cls, scheme=..., auth=..., host=..., port=..., path=..., query=..., fragment=...): ... - @property - def hostname(self): ... - @property - def request_uri(self): ... - @property - def netloc(self): ... - @property - def url(self): ... - -def split_first(s, delims): ... -def parse_url(url): ... -def get_host(url): ... diff --git a/typeshed/2.7/requests/sessions.pyi b/typeshed/2.7/requests/sessions.pyi deleted file mode 100644 index b9f49cbb1..000000000 --- a/typeshed/2.7/requests/sessions.pyi +++ /dev/null @@ -1,92 +0,0 @@ -# Stubs for requests.sessions (Python 3) - -from typing import Any, AnyStr, Union, MutableMapping -from . import auth -from . import compat -from . import cookies -from . import models -from .models import Response -from . import hooks -from . import utils -from . import exceptions -from .packages.urllib3 import _collections -from . import structures -from . import adapters -from . import status_codes - -OrderedDict = compat.OrderedDict -cookiejar_from_dict = cookies.cookiejar_from_dict -extract_cookies_to_jar = cookies.extract_cookies_to_jar -RequestsCookieJar = cookies.RequestsCookieJar -merge_cookies = cookies.merge_cookies -Request = models.Request -PreparedRequest = models.PreparedRequest -DEFAULT_REDIRECT_LIMIT = models.DEFAULT_REDIRECT_LIMIT -default_hooks = hooks.default_hooks -dispatch_hook = hooks.dispatch_hook -to_key_val_list = utils.to_key_val_list -default_headers = utils.default_headers -to_native_string = utils.to_native_string -TooManyRedirects = exceptions.TooManyRedirects -InvalidSchema = exceptions.InvalidSchema -ChunkedEncodingError = exceptions.ChunkedEncodingError -ContentDecodingError = exceptions.ContentDecodingError -RecentlyUsedContainer = _collections.RecentlyUsedContainer -CaseInsensitiveDict = structures.CaseInsensitiveDict -HTTPAdapter = adapters.HTTPAdapter -requote_uri = utils.requote_uri -get_environ_proxies = utils.get_environ_proxies -get_netrc_auth = utils.get_netrc_auth -should_bypass_proxies = utils.should_bypass_proxies -get_auth_from_url = utils.get_auth_from_url -codes = status_codes.codes -REDIRECT_STATI = models.REDIRECT_STATI - -REDIRECT_CACHE_SIZE = ... # type: Any - -def merge_setting(request_setting, session_setting, dict_class=...): ... -def merge_hooks(request_hooks, session_hooks, dict_class=...): ... - -class SessionRedirectMixin: - def resolve_redirects(self, resp, req, stream=..., timeout=..., verify=..., cert=..., - proxies=...): ... - def rebuild_auth(self, prepared_request, response): ... - def rebuild_proxies(self, prepared_request, proxies): ... - -class Session(SessionRedirectMixin): - __attrs__ = ... # type: Any - headers = ... # type: MutableMapping[str, str] - auth = ... # type: Any - proxies = ... # type: Any - hooks = ... # type: Any - params = ... # type: Any - stream = ... # type: Any - verify = ... # type: Any - cert = ... # type: Any - max_redirects = ... # type: Any - trust_env = ... # type: Any - cookies = ... # type: Any - adapters = ... # type: Any - redirect_cache = ... # type: Any - def __init__(self) -> None: ... - def __enter__(self) -> 'Session': ... - def __exit__(self, *args) -> None: ... - def prepare_request(self, request): ... - def request(self, method: str, url: str, params=..., data=..., headers=..., - cookies=..., files=..., auth=..., timeout=..., allow_redirects=..., - proxies=..., hooks=..., stream=..., verify=..., cert=..., - json=...) -> Response: ... - def get(self, url: AnyStr, **kwargs) -> Response: ... - def options(self, url: str, **kwargs) -> Response: ... - def head(self, url: AnyStr, **kwargs) -> Response: ... - def post(self, url: str, data=..., json=..., **kwargs) -> Response: ... - def put(self, url: str, data=..., **kwargs) -> Response: ... - def patch(self, url: str, data=..., **kwargs) -> Response: ... - def delete(self, url: str, **kwargs) -> Response: ... - def send(self, request, **kwargs): ... - def merge_environment_settings(self, url, proxies, stream, verify, cert): ... - def get_adapter(self, url): ... - def close(self) -> None: ... - def mount(self, prefix, adapter): ... - -def session() -> Session: ... diff --git a/typeshed/2.7/requests/status_codes.pyi b/typeshed/2.7/requests/status_codes.pyi deleted file mode 100644 index e3035eb91..000000000 --- a/typeshed/2.7/requests/status_codes.pyi +++ /dev/null @@ -1,8 +0,0 @@ -# Stubs for requests.status_codes (Python 3) -# -# NOTE: This dynamically typed stub was automatically generated by stubgen. - -from typing import Any -from .structures import LookupDict - -codes = ... # type: Any diff --git a/typeshed/2.7/requests/structures.pyi b/typeshed/2.7/requests/structures.pyi deleted file mode 100644 index 837cf2501..000000000 --- a/typeshed/2.7/requests/structures.pyi +++ /dev/null @@ -1,21 +0,0 @@ -# Stubs for requests.structures (Python 3) - -from typing import Any -import collections - -class CaseInsensitiveDict(collections.MutableMapping): - def __init__(self, data=..., **kwargs) -> None: ... - def __setitem__(self, key, value): ... - def __getitem__(self, key): ... - def __delitem__(self, key): ... - def __iter__(self): ... - def __len__(self): ... - def lower_items(self): ... - def __eq__(self, other): ... - def copy(self): ... - -class LookupDict(dict): - name = ... # type: Any - def __init__(self, name=...) -> None: ... - def __getitem__(self, key): ... - def get(self, key, default=...): ... diff --git a/typeshed/2.7/requests/utils.pyi b/typeshed/2.7/requests/utils.pyi deleted file mode 100644 index 945277afc..000000000 --- a/typeshed/2.7/requests/utils.pyi +++ /dev/null @@ -1,52 +0,0 @@ -# Stubs for requests.utils (Python 3) - -from typing import Any -from . import compat -from . import cookies -from . import structures -from . import exceptions - -OrderedDict = compat.OrderedDict -RequestsCookieJar = cookies.RequestsCookieJar -cookiejar_from_dict = cookies.cookiejar_from_dict -CaseInsensitiveDict = structures.CaseInsensitiveDict -InvalidURL = exceptions.InvalidURL - -NETRC_FILES = ... # type: Any -DEFAULT_CA_BUNDLE_PATH = ... # type: Any - -def dict_to_sequence(d): ... -def super_len(o): ... -def get_netrc_auth(url): ... -def guess_filename(obj): ... -def from_key_val_list(value): ... -def to_key_val_list(value): ... -def parse_list_header(value): ... -def parse_dict_header(value): ... -def unquote_header_value(value, is_filename=...): ... -def dict_from_cookiejar(cj): ... -def add_dict_to_cookiejar(cj, cookie_dict): ... -def get_encodings_from_content(content): ... -def get_encoding_from_headers(headers): ... -def stream_decode_response_unicode(iterator, r): ... -def iter_slices(string, slice_length): ... -def get_unicode_from_response(r): ... - -UNRESERVED_SET = ... # type: Any - -def unquote_unreserved(uri): ... -def requote_uri(uri): ... -def address_in_network(ip, net): ... -def dotted_netmask(mask): ... -def is_ipv4_address(string_ip): ... -def is_valid_cidr(string_network): ... -def should_bypass_proxies(url): ... -def get_environ_proxies(url): ... -def default_user_agent(name=...): ... -def default_headers(): ... -def parse_header_links(value): ... -def guess_json_utf(data): ... -def prepend_scheme_if_needed(url, new_scheme): ... -def get_auth_from_url(url): ... -def to_native_string(string, encoding=...): ... -def urldefragauth(url): ... From 65e583c11a89041c8ca04b0e2a1ac26ac6528b8a Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 8 Jun 2017 12:41:57 +0530 Subject: [PATCH 099/116] mypy: some improvements in type annotations --- schema_salad/makedoc.py | 2 +- schema_salad/tests/test_errors.py | 2 -- schema_salad/tests/test_fetch.py | 6 +++--- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/schema_salad/makedoc.py b/schema_salad/makedoc.py index a9ae01160..c0e7c8a56 100644 --- a/schema_salad/makedoc.py +++ b/schema_salad/makedoc.py @@ -21,7 +21,7 @@ _logger = logging.getLogger("salad") -def has_types(items): # type: (Any) -> List[basestring] +def has_types(items): # type: (Any) -> List[Text] r = [] # type: List if isinstance(items, dict): if items["type"] == "https://w3id.org/cwl/salad#record": diff --git a/schema_salad/tests/test_errors.py b/schema_salad/tests/test_errors.py index 2943681e5..590984aa0 100644 --- a/schema_salad/tests/test_errors.py +++ b/schema_salad/tests/test_errors.py @@ -2,7 +2,6 @@ from __future__ import print_function from .util import get_data import unittest -from typing import cast from schema_salad.schema import load_schema, load_and_validate from schema_salad.validate import ValidationException from avro.schema import Names @@ -12,7 +11,6 @@ class TestErrors(unittest.TestCase): def test_errors(self): document_loader, avsc_names, schema_metadata, metaschema_loader = load_schema( get_data(u"tests/test_schema/CommonWorkflowLanguage.yml")) - avsc_names = cast(Names, avsc_names) for t in ("test_schema/test1.cwl", "test_schema/test2.cwl", diff --git a/schema_salad/tests/test_fetch.py b/schema_salad/tests/test_fetch.py index 09cb385da..6f36951a0 100644 --- a/schema_salad/tests/test_fetch.py +++ b/schema_salad/tests/test_fetch.py @@ -9,7 +9,7 @@ import ruamel.yaml as yaml import json import os - +from typing import Text from six.moves import urllib class TestFetcher(unittest.TestCase): @@ -18,7 +18,7 @@ class TestFetcher(schema_salad.ref_resolver.Fetcher): def __init__(self, a, b): pass - def fetch_text(self, url): # type: (unicode) -> unicode + def fetch_text(self, url): # type: (Text) -> Text if url == "keep:abc+123/foo.txt": return "hello: keepfoo" if url.endswith("foo.txt"): @@ -26,7 +26,7 @@ def fetch_text(self, url): # type: (unicode) -> unicode else: raise RuntimeError("Not foo.txt") - def check_exists(self, url): # type: (unicode) -> bool + def check_exists(self, url): # type: (Text) -> bool if url.endswith("foo.txt"): return True else: From ad05e61fc63a314213e3c2a91959be422186bc0e Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 8 Jun 2017 15:09:06 +0530 Subject: [PATCH 100/116] minor changes in rdflib stub files to make them compatible with mypy in py3 mode and move to typshed/2and3/ directory --- typeshed/{2.7 => 2and3}/rdflib/__init__.pyi | 0 typeshed/{2.7 => 2and3}/rdflib/events.pyi | 0 typeshed/{2.7 => 2and3}/rdflib/exceptions.pyi | 0 typeshed/{2.7 => 2and3}/rdflib/graph.pyi | 8 ++++---- typeshed/{2.7 => 2and3}/rdflib/namespace.pyi | 8 ++++---- typeshed/{2.7 => 2and3}/rdflib/parser.pyi | 2 +- typeshed/{2.7 => 2and3}/rdflib/plugin.pyi | 0 typeshed/{2.7 => 2and3}/rdflib/plugins/__init__.pyi | 0 .../{2.7 => 2and3}/rdflib/plugins/parsers/__init__.pyi | 0 .../{2.7 => 2and3}/rdflib/plugins/parsers/notation3.pyi | 0 typeshed/{2.7 => 2and3}/rdflib/query.pyi | 2 +- typeshed/{2.7 => 2and3}/rdflib/serializer.pyi | 0 typeshed/{2.7 => 2and3}/rdflib/store.pyi | 2 +- typeshed/{2.7 => 2and3}/rdflib/term.pyi | 4 ++-- typeshed/{2.7 => 2and3}/rdflib/util.pyi | 0 15 files changed, 13 insertions(+), 13 deletions(-) rename typeshed/{2.7 => 2and3}/rdflib/__init__.pyi (100%) rename typeshed/{2.7 => 2and3}/rdflib/events.pyi (100%) rename typeshed/{2.7 => 2and3}/rdflib/exceptions.pyi (100%) rename typeshed/{2.7 => 2and3}/rdflib/graph.pyi (97%) rename typeshed/{2.7 => 2and3}/rdflib/namespace.pyi (90%) rename typeshed/{2.7 => 2and3}/rdflib/parser.pyi (84%) rename typeshed/{2.7 => 2and3}/rdflib/plugin.pyi (100%) rename typeshed/{2.7 => 2and3}/rdflib/plugins/__init__.pyi (100%) rename typeshed/{2.7 => 2and3}/rdflib/plugins/parsers/__init__.pyi (100%) rename typeshed/{2.7 => 2and3}/rdflib/plugins/parsers/notation3.pyi (100%) rename typeshed/{2.7 => 2and3}/rdflib/query.pyi (97%) rename typeshed/{2.7 => 2and3}/rdflib/serializer.pyi (100%) rename typeshed/{2.7 => 2and3}/rdflib/store.pyi (97%) rename typeshed/{2.7 => 2and3}/rdflib/term.pyi (97%) rename typeshed/{2.7 => 2and3}/rdflib/util.pyi (100%) diff --git a/typeshed/2.7/rdflib/__init__.pyi b/typeshed/2and3/rdflib/__init__.pyi similarity index 100% rename from typeshed/2.7/rdflib/__init__.pyi rename to typeshed/2and3/rdflib/__init__.pyi diff --git a/typeshed/2.7/rdflib/events.pyi b/typeshed/2and3/rdflib/events.pyi similarity index 100% rename from typeshed/2.7/rdflib/events.pyi rename to typeshed/2and3/rdflib/events.pyi diff --git a/typeshed/2.7/rdflib/exceptions.pyi b/typeshed/2and3/rdflib/exceptions.pyi similarity index 100% rename from typeshed/2.7/rdflib/exceptions.pyi rename to typeshed/2and3/rdflib/exceptions.pyi diff --git a/typeshed/2.7/rdflib/graph.pyi b/typeshed/2and3/rdflib/graph.pyi similarity index 97% rename from typeshed/2.7/rdflib/graph.pyi rename to typeshed/2and3/rdflib/graph.pyi index 03a465985..6972611ea 100644 --- a/typeshed/2.7/rdflib/graph.pyi +++ b/typeshed/2and3/rdflib/graph.pyi @@ -2,8 +2,8 @@ # # NOTE: This dynamically typed stub was automatically generated by stubgen. -from typing import Any, AnyStr, Dict, Union, IO, Tuple, Iterator -from StringIO import StringIO as BytesIO +from typing import Any, AnyStr, Dict, Union, IO, Tuple, Iterator, Text +from io import StringIO as BytesIO from rdflib.term import Node, URIRef from rdflib.store import Store from rdflib.namespace import NamespaceManager @@ -71,8 +71,8 @@ class Graph(Node): def absolutize(self, uri, defrag=1): ... def serialize(self, destination: Union[str, IO[Any]]=None, format: str='', base: str=None, encoding: str=None, **args) -> Union[bytes, None]: ... def parse(self, source: str = None, publicID: str = None, - format: Union[str, unicode] = None, - location: Union[str, unicode] = None, file: IO[Any] = None, + format: Text = None, + location: Text = None, file: IO[Any] = None, data: str = None, **args): ... def load(self, source, publicID=None, format=''): ... def query(self, query_object, processor: str = '', result: str = '', initNs: Dict = None, initBindings: Dict = None, use_store_provided: bool = True, **kwargs) -> Result: ... diff --git a/typeshed/2.7/rdflib/namespace.pyi b/typeshed/2and3/rdflib/namespace.pyi similarity index 90% rename from typeshed/2.7/rdflib/namespace.pyi rename to typeshed/2and3/rdflib/namespace.pyi index 01197c745..2a0a6816d 100644 --- a/typeshed/2.7/rdflib/namespace.pyi +++ b/typeshed/2and3/rdflib/namespace.pyi @@ -2,9 +2,9 @@ # # NOTE: This dynamically typed stub was automatically generated by stubgen. -from typing import Any, Tuple, Union +from typing import Any, Tuple, Union, Text -class Namespace(unicode): +class Namespace(Text): __doc__ = ... # type: Any def __new__(cls, value): ... @property @@ -13,7 +13,7 @@ class Namespace(unicode): def __getitem__(self, key, default=None): ... def __getattr__(self, name): ... -class URIPattern(unicode): +class URIPattern(Text): __doc__ = ... # type: Any def __new__(cls, value): ... def __mod__(self, *args, **kwargs): ... @@ -57,4 +57,4 @@ def is_ncname(name): ... XMLNS = ... # type: Any -def split_uri(uri: Union[str, unicode]) -> Tuple[str, str]: ... +def split_uri(uri: Text) -> Tuple[str, str]: ... diff --git a/typeshed/2.7/rdflib/parser.pyi b/typeshed/2and3/rdflib/parser.pyi similarity index 84% rename from typeshed/2.7/rdflib/parser.pyi rename to typeshed/2and3/rdflib/parser.pyi index 3ff074d8c..fa1ec9a74 100644 --- a/typeshed/2.7/rdflib/parser.pyi +++ b/typeshed/2and3/rdflib/parser.pyi @@ -3,7 +3,7 @@ # NOTE: This dynamically typed stub was automatically generated by stubgen. from typing import Any -from StringIO import StringIO as BytesIO +from io import StringIO as BytesIO class Parser: def __init__(self): ... diff --git a/typeshed/2.7/rdflib/plugin.pyi b/typeshed/2and3/rdflib/plugin.pyi similarity index 100% rename from typeshed/2.7/rdflib/plugin.pyi rename to typeshed/2and3/rdflib/plugin.pyi diff --git a/typeshed/2.7/rdflib/plugins/__init__.pyi b/typeshed/2and3/rdflib/plugins/__init__.pyi similarity index 100% rename from typeshed/2.7/rdflib/plugins/__init__.pyi rename to typeshed/2and3/rdflib/plugins/__init__.pyi diff --git a/typeshed/2.7/rdflib/plugins/parsers/__init__.pyi b/typeshed/2and3/rdflib/plugins/parsers/__init__.pyi similarity index 100% rename from typeshed/2.7/rdflib/plugins/parsers/__init__.pyi rename to typeshed/2and3/rdflib/plugins/parsers/__init__.pyi diff --git a/typeshed/2.7/rdflib/plugins/parsers/notation3.pyi b/typeshed/2and3/rdflib/plugins/parsers/notation3.pyi similarity index 100% rename from typeshed/2.7/rdflib/plugins/parsers/notation3.pyi rename to typeshed/2and3/rdflib/plugins/parsers/notation3.pyi diff --git a/typeshed/2.7/rdflib/query.pyi b/typeshed/2and3/rdflib/query.pyi similarity index 97% rename from typeshed/2.7/rdflib/query.pyi rename to typeshed/2and3/rdflib/query.pyi index db002b8b0..fad3c3770 100644 --- a/typeshed/2.7/rdflib/query.pyi +++ b/typeshed/2and3/rdflib/query.pyi @@ -3,7 +3,7 @@ # NOTE: This dynamically typed stub was automatically generated by stubgen. from typing import Any -from StringIO import StringIO as BytesIO +from io import StringIO as BytesIO import collections class Processor: diff --git a/typeshed/2.7/rdflib/serializer.pyi b/typeshed/2and3/rdflib/serializer.pyi similarity index 100% rename from typeshed/2.7/rdflib/serializer.pyi rename to typeshed/2and3/rdflib/serializer.pyi diff --git a/typeshed/2.7/rdflib/store.pyi b/typeshed/2and3/rdflib/store.pyi similarity index 97% rename from typeshed/2.7/rdflib/store.pyi rename to typeshed/2and3/rdflib/store.pyi index 4a98a44b0..6886321b0 100644 --- a/typeshed/2.7/rdflib/store.pyi +++ b/typeshed/2and3/rdflib/store.pyi @@ -4,7 +4,7 @@ from typing import Any from rdflib.events import Event -from cStringIO import StringIO as BytesIO +from io import StringIO as BytesIO class StoreCreatedEvent(Event): ... class TripleAddedEvent(Event): ... diff --git a/typeshed/2.7/rdflib/term.pyi b/typeshed/2and3/rdflib/term.pyi similarity index 97% rename from typeshed/2.7/rdflib/term.pyi rename to typeshed/2and3/rdflib/term.pyi index 698218123..0021d14be 100644 --- a/typeshed/2.7/rdflib/term.pyi +++ b/typeshed/2and3/rdflib/term.pyi @@ -2,11 +2,11 @@ # # NOTE: This dynamically typed stub was automatically generated by stubgen. -from typing import Any +from typing import Any, Text class Node: ... -class Identifier(Node, unicode): +class Identifier(Node, Text): def __new__(cls, value): ... def eq(self, other): ... def neq(self, other): ... diff --git a/typeshed/2.7/rdflib/util.pyi b/typeshed/2and3/rdflib/util.pyi similarity index 100% rename from typeshed/2.7/rdflib/util.pyi rename to typeshed/2and3/rdflib/util.pyi From d16c0e5e9451b2f8a5844810dddb21b5e31e042a Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 8 Jun 2017 15:12:33 +0530 Subject: [PATCH 101/116] move cachecontrol stub files to typshed/2and3/ --- typeshed/{2.7 => 2and3}/cachecontrol/__init__.pyi | 0 typeshed/{2.7 => 2and3}/cachecontrol/adapter.pyi | 0 typeshed/{2.7 => 2and3}/cachecontrol/cache.pyi | 0 typeshed/{2.7 => 2and3}/cachecontrol/caches/__init__.pyi | 0 typeshed/{2.7 => 2and3}/cachecontrol/caches/file_cache.pyi | 0 typeshed/{2.7 => 2and3}/cachecontrol/compat.pyi | 0 typeshed/{2.7 => 2and3}/cachecontrol/controller.pyi | 0 typeshed/{2.7 => 2and3}/cachecontrol/filewrapper.pyi | 0 typeshed/{2.7 => 2and3}/cachecontrol/serialize.pyi | 0 typeshed/{2.7 => 2and3}/cachecontrol/wrapper.pyi | 0 10 files changed, 0 insertions(+), 0 deletions(-) rename typeshed/{2.7 => 2and3}/cachecontrol/__init__.pyi (100%) rename typeshed/{2.7 => 2and3}/cachecontrol/adapter.pyi (100%) rename typeshed/{2.7 => 2and3}/cachecontrol/cache.pyi (100%) rename typeshed/{2.7 => 2and3}/cachecontrol/caches/__init__.pyi (100%) rename typeshed/{2.7 => 2and3}/cachecontrol/caches/file_cache.pyi (100%) rename typeshed/{2.7 => 2and3}/cachecontrol/compat.pyi (100%) rename typeshed/{2.7 => 2and3}/cachecontrol/controller.pyi (100%) rename typeshed/{2.7 => 2and3}/cachecontrol/filewrapper.pyi (100%) rename typeshed/{2.7 => 2and3}/cachecontrol/serialize.pyi (100%) rename typeshed/{2.7 => 2and3}/cachecontrol/wrapper.pyi (100%) diff --git a/typeshed/2.7/cachecontrol/__init__.pyi b/typeshed/2and3/cachecontrol/__init__.pyi similarity index 100% rename from typeshed/2.7/cachecontrol/__init__.pyi rename to typeshed/2and3/cachecontrol/__init__.pyi diff --git a/typeshed/2.7/cachecontrol/adapter.pyi b/typeshed/2and3/cachecontrol/adapter.pyi similarity index 100% rename from typeshed/2.7/cachecontrol/adapter.pyi rename to typeshed/2and3/cachecontrol/adapter.pyi diff --git a/typeshed/2.7/cachecontrol/cache.pyi b/typeshed/2and3/cachecontrol/cache.pyi similarity index 100% rename from typeshed/2.7/cachecontrol/cache.pyi rename to typeshed/2and3/cachecontrol/cache.pyi diff --git a/typeshed/2.7/cachecontrol/caches/__init__.pyi b/typeshed/2and3/cachecontrol/caches/__init__.pyi similarity index 100% rename from typeshed/2.7/cachecontrol/caches/__init__.pyi rename to typeshed/2and3/cachecontrol/caches/__init__.pyi diff --git a/typeshed/2.7/cachecontrol/caches/file_cache.pyi b/typeshed/2and3/cachecontrol/caches/file_cache.pyi similarity index 100% rename from typeshed/2.7/cachecontrol/caches/file_cache.pyi rename to typeshed/2and3/cachecontrol/caches/file_cache.pyi diff --git a/typeshed/2.7/cachecontrol/compat.pyi b/typeshed/2and3/cachecontrol/compat.pyi similarity index 100% rename from typeshed/2.7/cachecontrol/compat.pyi rename to typeshed/2and3/cachecontrol/compat.pyi diff --git a/typeshed/2.7/cachecontrol/controller.pyi b/typeshed/2and3/cachecontrol/controller.pyi similarity index 100% rename from typeshed/2.7/cachecontrol/controller.pyi rename to typeshed/2and3/cachecontrol/controller.pyi diff --git a/typeshed/2.7/cachecontrol/filewrapper.pyi b/typeshed/2and3/cachecontrol/filewrapper.pyi similarity index 100% rename from typeshed/2.7/cachecontrol/filewrapper.pyi rename to typeshed/2and3/cachecontrol/filewrapper.pyi diff --git a/typeshed/2.7/cachecontrol/serialize.pyi b/typeshed/2and3/cachecontrol/serialize.pyi similarity index 100% rename from typeshed/2.7/cachecontrol/serialize.pyi rename to typeshed/2and3/cachecontrol/serialize.pyi diff --git a/typeshed/2.7/cachecontrol/wrapper.pyi b/typeshed/2and3/cachecontrol/wrapper.pyi similarity index 100% rename from typeshed/2.7/cachecontrol/wrapper.pyi rename to typeshed/2and3/cachecontrol/wrapper.pyi From 4f0ad4ff19b73e3fbee1cc664613813bbb15fe84 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Fri, 16 Jun 2017 01:50:10 +0530 Subject: [PATCH 102/116] tox: separate out mypy targets for py2 and py3 --- Makefile | 2 +- tox.ini | 13 ++++++++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/Makefile b/Makefile index d54c67b6b..0c9e0a9e7 100644 --- a/Makefile +++ b/Makefile @@ -173,7 +173,7 @@ mypy2: ${PYSOURCES} --warn-redundant-casts --warn-unused-ignores \ schema_salad -mypy: ${PYSOURCES} +mypy3: ${PYSOURCES} rm -Rf typeshed/2and3/ruamel/yaml ln -s $(shell python -c 'from __future__ import print_function; import ruamel.yaml; import os.path; print(os.path.dirname(ruamel.yaml.__file__))') \ typeshed/2and3/ruamel/ diff --git a/tox.ini b/tox.ini index 94686256f..2a50b71e6 100644 --- a/tox.ini +++ b/tox.ini @@ -2,7 +2,7 @@ envlist = py{27,33,34,35,36}-lint, py{27,33,34,35,36}-unit, - py35-mypy, + py35-mypy{2,3}, py27-pipconflictchecker skipsdist = True @@ -27,10 +27,17 @@ commands = whitelist_externals = py{27,33,34,35,36}-lint: flake8 -[testenv:py35-mypy] +[testenv:py35-mypy2] commands = make mypy2 - # make mypy # not enabled for now +whitelist_externals = make +deps = + -rmypy_requirements.txt + -rrequirements.txt + +[testenv:py35-mypy3] +commands = + make mypy3 whitelist_externals = make deps = -rmypy_requirements.txt From 740e235dd7fc4c056b4756eda27522951f055e9f Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Fri, 16 Jun 2017 01:52:20 +0530 Subject: [PATCH 103/116] mypy: turn off warn for unused ignore in --py3 --- Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Makefile b/Makefile index 0c9e0a9e7..c555c0a8c 100644 --- a/Makefile +++ b/Makefile @@ -178,7 +178,7 @@ mypy3: ${PYSOURCES} ln -s $(shell python -c 'from __future__ import print_function; import ruamel.yaml; import os.path; print(os.path.dirname(ruamel.yaml.__file__))') \ typeshed/2and3/ruamel/ MYPYPATH=$MYPYPATH:typeshed/3:typeshed/2and3 mypy --disallow-untyped-calls \ - --warn-redundant-casts --warn-unused-ignores \ + --warn-redundant-casts \ schema_salad jenkins: From 8672a5cddb11e374c7a29b45bd01b817eea48b6f Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Fri, 16 Jun 2017 01:53:06 +0530 Subject: [PATCH 104/116] sourceline.py: replace AnyStr with Text type --- schema_salad/sourceline.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/schema_salad/sourceline.py b/schema_salad/sourceline.py index aed31c7c6..21e57c1ff 100644 --- a/schema_salad/sourceline.py +++ b/schema_salad/sourceline.py @@ -20,16 +20,16 @@ def _add_lc_filename(r, source): # type: (ruamel.yaml.comments.CommentedBase, A for d in six.itervalues(r): _add_lc_filename(d, source) -def relname(source): # type: (AnyStr) -> AnyStr +def relname(source): # type: (Text) -> Text if source.startswith("file://"): source = source[7:] source = os.path.relpath(source) return source -def add_lc_filename(r, source): # type: (ruamel.yaml.comments.CommentedBase, AnyStr) -> None +def add_lc_filename(r, source): # type: (ruamel.yaml.comments.CommentedBase, Text) -> None _add_lc_filename(r, relname(source)) -def reflow(text, maxline, shift=""): # type: (AnyStr, int, AnyStr) -> AnyStr +def reflow(text, maxline, shift=""): # type: (Text, int, Text) -> Text if maxline < 20: maxline = 20 if len(text) > maxline: From 6b3d068c8132cef4f9aeb519215d5d76c2555095 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Tue, 27 Jun 2017 17:55:09 +0530 Subject: [PATCH 105/116] typeshed/3: add stubs(autogen) for avro-python3 --- typeshed/3/avro/__init__.pyi | 10 ++ typeshed/3/avro/schema.pyi | 180 +++++++++++++++++++++++++++++++++++ 2 files changed, 190 insertions(+) create mode 100644 typeshed/3/avro/__init__.pyi create mode 100644 typeshed/3/avro/schema.pyi diff --git a/typeshed/3/avro/__init__.pyi b/typeshed/3/avro/__init__.pyi new file mode 100644 index 000000000..0a82029d3 --- /dev/null +++ b/typeshed/3/avro/__init__.pyi @@ -0,0 +1,10 @@ +# Stubs for avro (Python 3.5) +# +# NOTE: This dynamically typed stub was automatically generated by stubgen. + +# Names in __all__ with no definition: +# datafile +# io +# ipc +# protocol +# schema diff --git a/typeshed/3/avro/schema.pyi b/typeshed/3/avro/schema.pyi new file mode 100644 index 000000000..5276b29a3 --- /dev/null +++ b/typeshed/3/avro/schema.pyi @@ -0,0 +1,180 @@ +# Stubs for avro.schema (Python 3.5) +# +# NOTE: This dynamically typed stub was automatically generated by stubgen. + +from typing import Any, Optional + +logger = ... # type: Any +DEBUG_VERBOSE = ... # type: int +NULL = ... # type: str +BOOLEAN = ... # type: str +STRING = ... # type: str +BYTES = ... # type: str +INT = ... # type: str +LONG = ... # type: str +FLOAT = ... # type: str +DOUBLE = ... # type: str +FIXED = ... # type: str +ENUM = ... # type: str +RECORD = ... # type: str +ERROR = ... # type: str +ARRAY = ... # type: str +MAP = ... # type: str +UNION = ... # type: str +REQUEST = ... # type: str +ERROR_UNION = ... # type: str +PRIMITIVE_TYPES = ... # type: Any +NAMED_TYPES = ... # type: Any +VALID_TYPES = ... # type: Any +SCHEMA_RESERVED_PROPS = ... # type: Any +FIELD_RESERVED_PROPS = ... # type: Any +VALID_FIELD_SORT_ORDERS = ... # type: Any + +class Error(Exception): ... +class AvroException(Error): ... +class SchemaParseException(AvroException): ... + +class ImmutableDict(dict): + def __init__(self, items: Optional[Any] = ..., **kwargs) -> None: ... + def __setitem__(self, key, value): ... + def __delitem__(self, key): ... + def clear(self): ... + def update(self, **kwargs): ... + def pop(self, key, default: Optional[Any] = ...): ... + def popitem(self): ... + +class Schema: + def __init__(self, type, other_props: Optional[Any] = ...) -> None: ... + @property + def name(self): ... + @property + def fullname(self): ... + @property + def namespace(self): ... + @property + def type(self): ... + @property + def doc(self): ... + @property + def props(self): ... + @property + def other_props(self): ... + def to_json(self, names): ... + +class Name: + def __init__(self, name, namespace: Optional[Any] = ...) -> None: ... + def __eq__(self, other): ... + @property + def simple_name(self): ... + @property + def namespace(self): ... + @property + def fullname(self): ... + +class Names: + def __init__(self, default_namespace: Optional[Any] = ..., names: Optional[Any] = ...) -> None: ... + @property + def names(self): ... + @property + def default_namespace(self): ... + def NewWithDefaultNamespace(self, namespace): ... + def GetName(self, name, namespace: Optional[Any] = ...): ... + def has_name(self, name, namespace: Optional[Any] = ...): ... + def get_name(self, name, namespace: Optional[Any] = ...): ... + def GetSchema(self, name, namespace: Optional[Any] = ...): ... + def prune_namespace(self, properties): ... + def Register(self, schema): ... + +class NamedSchema(Schema): + def __init__(self, type, name, namespace: Optional[Any] = ..., names: Optional[Any] = ..., other_props: Optional[Any] = ...) -> None: ... + @property + def avro_name(self): ... + @property + def name(self): ... + @property + def namespace(self): ... + @property + def fullname(self): ... + def name_ref(self, names): ... + +class Field: + def __init__(self, type, name, index, has_default, default: Any = ..., order: Optional[Any] = ..., names: Optional[Any] = ..., doc: Optional[Any] = ..., other_props: Optional[Any] = ...) -> None: ... + @property + def type(self): ... + @property + def name(self): ... + @property + def index(self): ... + @property + def default(self): ... + @property + def has_default(self): ... + @property + def order(self): ... + @property + def doc(self): ... + @property + def props(self): ... + @property + def other_props(self): ... + def to_json(self, names: Optional[Any] = ...): ... + def __eq__(self, that): ... + +class PrimitiveSchema(Schema): + def __init__(self, type, other_props: Optional[Any] = ...) -> None: ... + @property + def name(self): ... + def to_json(self, names: Optional[Any] = ...): ... + def __eq__(self, that): ... + +class FixedSchema(NamedSchema): + def __init__(self, name, namespace, size, names: Optional[Any] = ..., other_props: Optional[Any] = ...) -> None: ... + @property + def size(self): ... + def to_json(self, names: Optional[Any] = ...): ... + def __eq__(self, that): ... + +class EnumSchema(NamedSchema): + def __init__(self, name, namespace, symbols, names: Optional[Any] = ..., doc: Optional[Any] = ..., other_props: Optional[Any] = ...) -> None: ... + @property + def symbols(self): ... + def to_json(self, names: Optional[Any] = ...): ... + def __eq__(self, that): ... + +class ArraySchema(Schema): + def __init__(self, items, other_props: Optional[Any] = ...) -> None: ... + @property + def items(self): ... + def to_json(self, names: Optional[Any] = ...): ... + def __eq__(self, that): ... + +class MapSchema(Schema): + def __init__(self, values, other_props: Optional[Any] = ...) -> None: ... + @property + def values(self): ... + def to_json(self, names: Optional[Any] = ...): ... + def __eq__(self, that): ... + +class UnionSchema(Schema): + def __init__(self, schemas) -> None: ... + @property + def schemas(self): ... + def to_json(self, names: Optional[Any] = ...): ... + def __eq__(self, that): ... + +class ErrorUnionSchema(UnionSchema): + def __init__(self, schemas) -> None: ... + def to_json(self, names: Optional[Any] = ...): ... + +class RecordSchema(NamedSchema): + def __init__(self, name, namespace, fields: Optional[Any] = ..., make_fields: Optional[Any] = ..., names: Optional[Any] = ..., record_type: Any = ..., doc: Optional[Any] = ..., other_props: Optional[Any] = ...) -> None: ... + @property + def fields(self): ... + @property + def field_map(self): ... + def to_json(self, names: Optional[Any] = ...): ... + def __eq__(self, that): ... + +def FilterKeysOut(items, keys): ... +def SchemaFromJSONData(json_data, names: Optional[Any] = ...): ... +def Parse(json_string): ... From 669ebfc17e8a42449952d5c2d320e5bf12291d70 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Tue, 27 Jun 2017 19:56:11 +0530 Subject: [PATCH 106/116] remove redundant condition with typing package --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 7412a5821..c85e184bb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -typing==3.5.3 ; python_version>="2.7" +typing==3.5.3 avro-python3 ; python_version>="3" avro==1.8.1 ; python_version<"3" ruamel.yaml==0.14.0 From a1f697b22a6533d7d0b861ff727b5a53fa6cb3ff Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Tue, 27 Jun 2017 21:50:33 +0530 Subject: [PATCH 107/116] ref_resolver.py: mypy: use Text instead of unicode --- schema_salad/ref_resolver.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/schema_salad/ref_resolver.py b/schema_salad/ref_resolver.py index a517135a0..d5d14084d 100644 --- a/schema_salad/ref_resolver.py +++ b/schema_salad/ref_resolver.py @@ -184,7 +184,7 @@ def __init__(self, idx=None, # type: Dict[Text, Union[CommentedMap, CommentedSeq, Text, None]] cache=None, # type: Dict[Text, Any] session=None, # type: requests.sessions.Session - fetcher_constructor=None, # type: Callable[[Dict[unicode, unicode], requests.sessions.Session], Fetcher] + fetcher_constructor=None, # type: Callable[[Dict[Text, Text], requests.sessions.Session], Fetcher] skip_schemas=None # type: bool ): # type: (...) -> None From 3d6003370057e95123152f0c4b15786f2e5f861d Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Tue, 27 Jun 2017 22:25:28 +0530 Subject: [PATCH 108/116] mypy_requirements.txt: bump mypy version --- mypy_requirements.txt | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mypy_requirements.txt b/mypy_requirements.txt index 1430406b3..508a7cbf6 100644 --- a/mypy_requirements.txt +++ b/mypy_requirements.txt @@ -1,2 +1 @@ -mypy==0.470 ; python_version>="3" -typed-ast==0.6.3 ; python_version>="3" \ No newline at end of file +mypy==0.511 ; python_version>="3" \ No newline at end of file From b2a5a26f4131a770d4195e7db849661baf1eae35 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Tue, 27 Jun 2017 22:25:53 +0530 Subject: [PATCH 109/116] bump ruamel version, put upper bound on ruamel in setup.py --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index c85e184bb..a7007f830 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ typing==3.5.3 avro-python3 ; python_version>="3" avro==1.8.1 ; python_version<"3" -ruamel.yaml==0.14.0 +ruamel.yaml==0.15.2 rdflib==4.2.2 rdflib-jsonld==0.4.0 mistune==0.7.3 diff --git a/setup.py b/setup.py index 508f49a2e..d43eba60a 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ install_requires = [ 'setuptools', 'requests >= 1.0', - 'ruamel.yaml >= 0.12.4', + 'ruamel.yaml >= 0.12.4, <= 0.15.2', 'rdflib >= 4.2.2, < 4.3.0', 'rdflib-jsonld >= 0.3.0, < 0.5.0', 'mistune >= 0.7.3, < 0.8', From 77485264acdaf03d5bda87c657d6ad86c6e6b4ac Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Tue, 27 Jun 2017 23:23:24 +0530 Subject: [PATCH 110/116] schema.py: fix some types, remove redundant str --- schema_salad/schema.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/schema_salad/schema.py b/schema_salad/schema.py index cb2cbf08a..f1ca9af13 100644 --- a/schema_salad/schema.py +++ b/schema_salad/schema.py @@ -392,7 +392,7 @@ def avro_name(url): # type: (AnyStr) -> AnyStr return url -Avro = TypeVar('Avro', Dict[six.text_type, Any], List[Any], six.text_type) +Avro = TypeVar('Avro', Dict[Text, Any], List[Any], Text) def make_valid_avro(items, # type: Avro @@ -416,7 +416,7 @@ def make_valid_avro(items, # type: Avro "Named schemas must have a non-empty name: %s" % items) if items["name"] in found: - return cast(six.text_type, items["name"]) + return cast(Text, items["name"]) else: found.add(items["name"]) for n in ("type", "items", "values", "fields"): @@ -431,7 +431,7 @@ def make_valid_avro(items, # type: Avro for i in items: ret.append(make_valid_avro(i, alltypes, found, union=union)) # type: ignore return ret - if union and isinstance(items, (str, six.text_type)): + if union and isinstance(items, six.string_types): if items in alltypes and avro_name(items) not in found: return cast(Dict, make_valid_avro(alltypes[items], alltypes, found, union=union)) From d9bdc8bd79936d86f68fa66599ba88809f925026 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 28 Jun 2017 15:28:32 +0530 Subject: [PATCH 111/116] setup.py: cleanup --- setup.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/setup.py b/setup.py index d43eba60a..cd6e870e7 100755 --- a/setup.py +++ b/setup.py @@ -39,14 +39,9 @@ 'lockfile >= 0.9', 'six >= 1.8.0'] -# install_requires.append("avro") # TODO: remove me once cwltool is -# available in Debian Stable, Ubuntu 12.04 LTS - - -extras_require={ # TODO: uncomment me, same conditions as above +extras_require={ ':python_version<"3"': ['avro'], ':python_version>="3"': ['avro-python3']} -# extras_require = {} # TODO: to be removed when the above is added setup(name='schema-salad', version='3.0', @@ -69,7 +64,7 @@ 'console_scripts': ["schema-salad-tool=schema_salad.main:main", "schema-salad-doc=schema_salad.makedoc:main"] }, zip_safe=True, - # cmdclass={'egg_info': tagger}, + cmdclass={'egg_info': tagger}, classifiers=[ "Environment :: Console", "Intended Audience :: Science/Research", From a8903de681e5a68f246b119e987aa79412035efe Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Wed, 28 Jun 2017 17:26:47 +0530 Subject: [PATCH 112/116] setup.py: change version, uncomment classifiers for py3 --- setup.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/setup.py b/setup.py index cd6e870e7..52b378a28 100755 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ ':python_version>="3"': ['avro-python3']} setup(name='schema-salad', - version='3.0', + version='2.5.1', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group', @@ -72,8 +72,8 @@ "Operating System :: MacOS :: MacOS X", "Development Status :: 4 - Beta", "Programming Language :: Python :: 2.7", - #"Programming Language :: Python :: 3.3", # TODO: uncomment these - #"Programming Language :: Python :: 3.4", # lines - #"Programming Language :: Python :: 3.5" + "Programming Language :: Python :: 3.3", + "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5" ] ) From 52c054e5ed486e1081d34a44a9ec623cad9da163 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 28 Jun 2017 14:04:07 +0200 Subject: [PATCH 113/116] 3.6 too! --- setup.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 52b378a28..9fbbc9122 100755 --- a/setup.py +++ b/setup.py @@ -74,6 +74,7 @@ "Programming Language :: Python :: 2.7", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", - "Programming Language :: Python :: 3.5" + "Programming Language :: Python :: 3.5", + "Programming Language :: Python :: 3.6" ] ) From b3c579fdf52afcedaef59161ba14e5da5c72e530 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 28 Jun 2017 05:44:44 -0700 Subject: [PATCH 114/116] a better bump of the version (mea culpa) --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 9fbbc9122..373dc45c4 100755 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ ':python_version>="3"': ['avro-python3']} setup(name='schema-salad', - version='2.5.1', + version='2.6`', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group', From a8c3fde0652deed80585692f101f77f580c94763 Mon Sep 17 00:00:00 2001 From: "Michael R. Crusoe" Date: Wed, 28 Jun 2017 14:50:38 +0200 Subject: [PATCH 115/116] oops --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 373dc45c4..13789d6bd 100755 --- a/setup.py +++ b/setup.py @@ -44,7 +44,7 @@ ':python_version>="3"': ['avro-python3']} setup(name='schema-salad', - version='2.6`', + version='2.6', description='Schema Annotations for Linked Avro Data (SALAD)', long_description=open(README).read(), author='Common workflow language working group', From bde8dfb4a693ce5caf0822b117bfed86eb5232c3 Mon Sep 17 00:00:00 2001 From: Manvendra Singh Date: Thu, 29 Jun 2017 21:46:33 +0530 Subject: [PATCH 116/116] pin upper version of ruamel.yaml package --- requirements.txt | 2 +- setup.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/requirements.txt b/requirements.txt index a7007f830..fbf3974ea 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,7 +1,7 @@ typing==3.5.3 avro-python3 ; python_version>="3" avro==1.8.1 ; python_version<"3" -ruamel.yaml==0.15.2 +ruamel.yaml>=0.12.4, <0.15 rdflib==4.2.2 rdflib-jsonld==0.4.0 mistune==0.7.3 diff --git a/setup.py b/setup.py index 13789d6bd..80192c143 100755 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ install_requires = [ 'setuptools', 'requests >= 1.0', - 'ruamel.yaml >= 0.12.4, <= 0.15.2', + 'ruamel.yaml >= 0.12.4, < 0.15', 'rdflib >= 4.2.2, < 4.3.0', 'rdflib-jsonld >= 0.3.0, < 0.5.0', 'mistune >= 0.7.3, < 0.8',