Merge pull request #471 from crim-ca/cwl-refactor-update

crim-ca · Oct 5, 2022 · 10961f8 · 10961f8
2 parents 599603a + ae6306c
commit 10961f8
Show file tree

Hide file tree

Showing 18 changed files with 199 additions and 509 deletions.
diff --git a/.pylintrc b/.pylintrc
@@ -3,7 +3,9 @@
 # A comma-separated list of package or module names from where C extensions may
 # be loaded. Extensions are loading into the active Python interpreter and may
 # run arbitrary code.
-extension-pkg-whitelist=lxml.etree
+extension-pkg-whitelist=lxml.etree,
+                        schema_salad.sourceline,
+                        schema_salad.validate
 
 # Add files or directories to the blacklist. They should be base names, not
 # paths.

diff --git a/CHANGES.rst b/CHANGES.rst
@@ -12,7 +12,9 @@ Changes
 
 Changes:
 --------
-- No change.
+- Refactor ``weaver.processes.wps_workflow`` definitions to delegate implementation to ``cwltool`` core classes,
+  removing code duplication and allowing update to latest revisions
+  (resolves `#154 <https://github.com/crim-ca/weaver/issues/154>`_).
 
 Fixes:
 ------

diff --git a/Makefile b/Makefile
@@ -532,8 +532,9 @@ check-security-deps-only: mkdir-reports  ## run security checks on package depen
 			$(SAFETY_IGNORE) \
 		1> >(tee "$(REPORTS_DIR)/check-security-deps.txt")'
 
+# FIXME: bandit excludes not working (https://github.com/PyCQA/bandit/issues/657), clean-src beforehand to avoid error
 .PHONY: check-security-code-only
-check-security-code-only: mkdir-reports  ## run security checks on source code
+check-security-code-only: mkdir-reports clean-src ## run security checks on source code
 	@echo "Running security code checks..."
 	@-rm -fr "$(REPORTS_DIR)/check-security-code.txt"
 	@bash -c '$(CONDA_CMD) \

diff --git a/docker/Dockerfile-base b/docker/Dockerfile-base
@@ -17,11 +17,11 @@ COPY requirements* setup.py README.rst CHANGES.rst ${APP_DIR}/
 
 # install runtime/package dependencies
 RUN apt-get update && apt-get install -y --no-install-recommends \
-		ca-certificates \
-		netbase \
-		gcc \
-		git \
-	&& pip install --no-cache-dir --upgrade -r requirements-sys.txt \
+        ca-certificates \
+        netbase \
+        gcc \
+        git \
+    && pip install --no-cache-dir --upgrade -r requirements-sys.txt \
     && pip install --no-cache-dir -r requirements.txt \
     && pip install --no-cache-dir -e ${APP_DIR} \
     && apt-get remove -y \

diff --git a/docker/Dockerfile-worker b/docker/Dockerfile-worker
@@ -15,7 +15,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
     #   Only install CLI package, 'docker-ce' and 'containerd.io' not required as they should be provided by host.
     #   Docker sibling execution is expected. See 'docker/docker-compose.yml.example' for details.
     && apt install --no-install-recommends docker-ce-cli \
-	&& rm -rf /var/lib/apt/lists/*
+    && rm -rf /var/lib/apt/lists/*
 
 # run app
 # see CHANGES (4.15.0), celery>=5 needs '-A' before 'worker'

diff --git a/docs/examples/docker-python-script-report.cwl b/docs/examples/docker-python-script-report.cwl
@@ -23,5 +23,5 @@ requirements:
         entry: |
           amount = $(inputs.amount)
           cost = $(inputs.cost)
-          with open("report.txt", "w") as report:
-              report.write(f"Order Total: {amount * cost:0.2f}$\n")
+          with open("report.txt", mode="w", encoding="utf-8") as report:
+              report.write(f"Order Total: {amount * cost:0.2f}$\\n")
diff --git a/requirements-sys.txt b/requirements-sys.txt
@@ -1,8 +1,4 @@
 pip>=20.2.2; python_version <= "3.6"
 pip>=22.0.4; python_version >= "3.7"
-# celery enforces some specific versions of setuptools (<60)
-# FIXME: when cwltool and rdflib-jsonld versions are updated to avoid failing setuptools install, update more recent
-# 	'rdflib-jsonld==0.5.0' (required by: cwltool -> schema_salad) uses 'use_2to3' which is invalid for setuptools>=58
-# setuptools<58; python_version <= "3.6"
-# setuptools>=59,<59.7; python_version >= "3.7"
-setuptools<58
+setuptools<58; python_version <= "3.6"
+setuptools>=60; python_version >= "3.7"
diff --git a/requirements.txt b/requirements.txt
@@ -12,8 +12,6 @@ billiard>3.2,<3.4; sys_platform == "win32"
 #   celery's CLI interface changed
 #   https://github.com/celery/celery/blob/master/docs/whatsnew-5.2.rst#upgrading-from-celery-4x
 celery[mongodb]<4; sys_platform == "win32"  # rq.filter: <4  # pyup: ignore
-# FIXME: use celery 5.2 specifically to resolve security issue
-#   best match for now since cannot use setuptools>=59 until cwltool/rdflib-jsonld are updated
 celery[mongodb]>=5.2.2,<6; sys_platform != "win32" and python_version >= "3.7"
 # technically, >=5.2 preferred to resolve security issue, but dependency resolver cannot find it
 # FIXME: drop support? more recent versions not avilable for end-of-life python
@@ -30,10 +28,7 @@ cryptography
 # use cwltool gpu-enabled support until integrated within the original tool
 # (https://github.com/common-workflow-language/common-workflow-language/issues/587)
 ### git+https://github.com/crim-ca/cwltool@docker-gpu#egg=cwltool; python_version >= "3"
-# FIXME: remove extra CWL code and let it handle it for use
-#   - changes since cause error with invalid get_listing import location
-#   - https://github.com/crim-ca/weaver/issues/154
-cwltool==3.0.20200324120055
+cwltool==3.1.20220913185150
 docker
 duration
 git+https://github.com/ESGF/esgf-compute-api.git@v2.3.7#egg=esgf-compute-api
@@ -50,8 +45,9 @@ mako
 oauthlib
 owslib==0.27.2
 psutil
-# FIXME: pymongo>=4 breaks with kombu corresponding to pinned Celery (https://github.com/crim-ca/weaver/issues/386)
-pymongo>=3.12.0,<4
+# pymongo>=4 breaks with kombu corresponding to pinned Celery (https://github.com/crim-ca/weaver/issues/386)
+pymongo>=3.12.0,<4; python_version <= "3.6"
+pymongo>=4; python_version >= "3.7"
 pyramid>=1.7.3
 pyramid_beaker>=0.8
 pyramid_celery>=4.0.0   # required for celery>=5
@@ -60,15 +56,15 @@ python-dateutil
 pyramid_rewrite
 pyramid_storage
 pytz
-pywps==4.5.1
+pywps==4.5.1; python_version <= "3.6"
+pywps==4.5.2; python_version >= "3.7"
 pyyaml>=5.2
+rdflib>=5  # pyup: ignore
 requests
 requests_file
-# let cwltool define ruamel.yaml version (<=0.16.5)
-# ensure minimal 0.15.78 to solve install issue (python 3.8)
-# (https://bitbucket.org/ruamel/yaml/issues/261/error-while-installing-ruamelyaml-setuppy)
-ruamel.yaml>=0.15.78,<=0.16.5
-shapely
+ruamel.yaml>=0.16
+schema-salad>=8.2,<9
+shapely==1.8.2
 simplejson
 urlmatch
 xmltodict

diff --git a/setup.cfg b/setup.cfg
@@ -5,23 +5,23 @@ tag = True
 tag_name = {new_version}
 
 [bumpversion:file:CHANGES.rst]
-search = 
+search =
 	`Unreleased <https://github.com/crim-ca/weaver/tree/master>`_ (latest)
 	========================================================================
-replace = 
+replace =
 	`Unreleased <https://github.com/crim-ca/weaver/tree/master>`_ (latest)
 	========================================================================
-	
+
 	Changes:
 	--------
 	- No change.
-	
+
 	Fixes:
 	------
 	- No change.
-	
+
 	.. _changes_{new_version}:
-	
+
 	`{new_version} <https://github.com/crim-ca/weaver/tree/{new_version}>`_ ({now:%%Y-%%m-%%d})
 	========================================================================
 
@@ -42,14 +42,14 @@ search = LABEL version="{current_version}"
 replace = LABEL version="{new_version}"
 
 [tool:pytest]
-addopts = 
+addopts =
 	--strict-markers
 	--tb=native
 	weaver/
 log_cli = false
 log_level = DEBUG
 python_files = test_*.py
-markers = 
+markers =
 	cli: mark test as related to CLI operations
 	testbed14: mark test as 'testbed14' validation
 	functional: mark test as functionality validation
@@ -80,7 +80,7 @@ targets = .
 [flake8]
 ignore = E126,E226,E402,F401,W503,W504
 max-line-length = 120
-exclude = 
+exclude =
 	src,
 	.git,
 	__pycache__,
@@ -112,14 +112,14 @@ add_select = D201,D213
 branch = true
 source = ./
 include = weaver/*
-omit = 
+omit =
 	setup.py
 	docs/*
 	tests/*
 	*_mako
 
 [coverage:report]
-exclude_lines = 
+exclude_lines =
 	pragma: no cover
 	raise AssertionError
 	raise NotImplementedError

diff --git a/tests/processes/test_convert.py b/tests/processes/test_convert.py
@@ -952,11 +952,11 @@ def test_cwl2json_input_values_ogc_format():
         "test1": {"value": "value"},
         "test2": {"value": 1},
         "test3": {"value": 1.23},
-        "test4": {"href": "/tmp/random.txt"},
+        "test4": {"href": "/tmp/random.txt"},  # nosec: B108
         "test5": [{"value": "val1"}, {"value": "val2"}],
         "test6": [{"value": 1}, {"value": 2}],
         "test7": [{"value": 1.23}, {"value": 4.56}],
-        "test8": [{"href": "/tmp/other.txt"}]
+        "test8": [{"href": "/tmp/other.txt"}]  # nosec: B108
     }
     result = cwl2json_input_values(values, ProcessSchema.OGC)
     assert result == expect
@@ -1096,17 +1096,17 @@ def test_repr2json_input_values():
         {"id": "test6", "value": 2},
         {"id": "test7", "value": 1.23},
         {"id": "test7", "value": 4.56},
-        {"id": "test8", "href": "/tmp/other.txt"},
+        {"id": "test8", "href": "/tmp/other.txt"},  # nosec: B108
         {"id": "test9", "value": "short"},
         {"id": "test10", "value": "long"},
-        {"id": "test11", "href": "/tmp/file.json", "format": {
+        {"id": "test11", "href": "/tmp/file.json", "format": {  # nosec: B108
             "mediaType": ContentType.APP_JSON, "schema": "http://schema.org/random.json"
         }},
-        {"id": "test12", "href": "/tmp/other.xml", "format": {
+        {"id": "test12", "href": "/tmp/other.xml", "format": {  # nosec: B108
             "mediaType": ContentType.TEXT_XML, "schema": "http://schema.org/random.xml"
         }},
-        {"id": "test13", "href": "/tmp/one.json", "format": {"mediaType": ContentType.APP_JSON}},
-        {"id": "test13", "href": "/tmp/two.xml", "format": {"mediaType": ContentType.TEXT_XML}},
+        {"id": "test13", "href": "/tmp/one.json", "format": {"mediaType": ContentType.APP_JSON}},  # nosec: B108
+        {"id": "test13", "href": "/tmp/two.xml", "format": {"mediaType": ContentType.TEXT_XML}},   # nosec: B108
     ]
     result = repr2json_input_values(values)
     assert result == expect

diff --git a/tests/utils.py b/tests/utils.py
@@ -12,7 +12,7 @@
 import mimetypes
 import os
 import re
-import subprocess
+import subprocess  # nosec: B404
 import sys
 import tempfile
 import uuid
@@ -358,7 +358,8 @@ def run_command(command, trim=True, expect_error=False, entrypoint=None):
         command = command.split(" ")
     command = [str(arg) for arg in command]
     if entrypoint is None:
-        out, _ = subprocess.Popen(["which", "python"], universal_newlines=True, stdout=subprocess.PIPE).communicate()
+        func = ["which", "python"]
+        out, _ = subprocess.Popen(func, universal_newlines=True, stdout=subprocess.PIPE).communicate()  # nosec: B603
         if not out:
             out = sys.executable  # fallback for some systems that fail above call
         python_path = os.path.split(out)[0]

diff --git a/tests/wps/test_utils.py b/tests/wps/test_utils.py
@@ -218,24 +218,24 @@ def test_get_wps_output_context_resolution():
 
 def test_map_wps_output_location_duplicate_subdir():
     for tmp_dir in [
-        "/tmp/tmp/tmp",
-        "/tmp/tmpdir"
+        "/tmp/tmp/tmp",  # nosec: B108 # don't care hardcoded for test
+        "/tmp/tmpdir"    # nosec: B108 # don't care hardcoded for test
     ]:
-        wps_out = "http:///localhost/wps-output/tmp"
+        wps_out = "http:///localhost/wps-output/tmp"  # nosec: B108 # don't care hardcoded for test
         settings = {
             "weaver.wps_output_dir": tmp_dir,
             "weaver.wps_output_url": wps_out
         }
         path = map_wps_output_location(f"{wps_out}/tmp/some-file-tmp.tmp", settings, exists=False)
-        assert path == f"{tmp_dir}/tmp/some-file-tmp.tmp"
+        assert path == f"{tmp_dir}/tmp/some-file-tmp.tmp"  # nosec: B108 # don't care hardcoded for test
 
         path = map_wps_output_location(f"{tmp_dir}/here/some-file-tmp.tmp", settings, exists=False, url=True)
-        assert path == f"{wps_out}/here/some-file-tmp.tmp"
+        assert path == f"{wps_out}/here/some-file-tmp.tmp"  # nosec: B108 # don't care hardcoded for test
 
 
 def test_map_wps_output_location_exists():
     wps_url = "http:///localhost/wps-output/tmp"
-    wps_dir = "/tmp/weaver-test/test-outputs"
+    wps_dir = "/tmp/weaver-test/test-outputs"  # nosec: B108 # don't care hardcoded for test
     settings = {
         "weaver.wps_output_dir": wps_dir,
         "weaver.wps_output_url": wps_url

diff --git a/tests/wps_restapi/test_jobs.py b/tests/wps_restapi/test_jobs.py
@@ -63,7 +63,7 @@ def setUpClass(cls):
         cls.settings = {
             "weaver.url": "https://localhost",
             "weaver.wps_email_encrypt_salt": "weaver-test",
-            "weaver.wps_output_dir": "/tmp/weaver-test/wps-outputs",
+            "weaver.wps_output_dir": "/tmp/weaver-test/wps-outputs",  # nosec: B108 # don't care hardcoded for test
         }
         cls.config = setup_config_with_mongodb(settings=cls.settings)
         cls.app = get_test_weaver_app(config=cls.config)

diff --git a/weaver/processes/convert.py b/weaver/processes/convert.py
@@ -859,7 +859,7 @@ def is_cwl_array_type(io_info, strict=True):
     io_return = {
         "array": False,
         "allow": AnyValue,
-        "type": io_info["type"],
+        "type": get_cwl_io_type_name(io_info["type"]),
         "mode": MODE.NONE,
     }
 
@@ -897,19 +897,21 @@ def _update_if_sub_enum(_io_item):
         if io_type["type"] != PACKAGE_ARRAY_BASE:
             raise PackageTypeError(f"Unsupported I/O 'array' definition: '{io_info!r}'.")
         # parse enum in case we got an array of allowed symbols
-        is_enum = _update_if_sub_enum(io_type["items"])
+        io_items = get_cwl_io_type_name(io_type["items"])
+        is_enum = _update_if_sub_enum(io_items)
         if not is_enum:
-            io_return["type"] = io_type["items"]
-        if io_return["type"] not in PACKAGE_ARRAY_ITEMS:  # includes Complex, so implicit literal-only check possible
-            io_type = any2cwl_literal_datatype(io_return["type"])
+            io_return["type"] = io_items
+        io_type = get_cwl_io_type_name(io_return["type"])
+        if io_type not in PACKAGE_ARRAY_ITEMS:  # includes Complex, so implicit literal-only check possible
+            io_type = any2cwl_literal_datatype(io_type)
             if strict or io_type not in PACKAGE_ARRAY_ITEMS:
                 raise PackageTypeError(f"Unsupported I/O 'array' definition: '{io_info!r}'.")
             io_return["type"] = io_type
         LOGGER.debug("I/O [%s] parsed as 'array' with nested dict notation", io_info["name"])
         io_return["array"] = True
     # array type conversion when defined as string '<type>[]'
-    elif isinstance(io_return["type"], str) and io_return["type"] in PACKAGE_ARRAY_TYPES:
-        io_return["type"] = io_return["type"][:-2]  # remove '[]'
+    elif isinstance(io_return["type"], str) and get_cwl_io_type_name(io_return["type"]) in PACKAGE_ARRAY_TYPES:
+        io_return["type"] = get_cwl_io_type_name(io_return["type"][:-2])  # remove '[]'
         if io_return["type"] in PACKAGE_CUSTOM_TYPES:
             # parse 'enum[]' for array of allowed symbols, provide expected structure for sub-item parsing
             io_item = deepcopy(io_info)
@@ -935,7 +937,7 @@ def is_cwl_enum_type(io_info):
         - ``io_allow``: validation values of the enum.
     :raises PackageTypeError: if the enum doesn't have the required parameters and valid format.
     """
-    io_type = io_info["type"]
+    io_type = get_cwl_io_type_name(io_info["type"])
     if not isinstance(io_type, dict) or "type" not in io_type or io_type["type"] not in PACKAGE_CUSTOM_TYPES:
         return False, io_type, MODE.NONE, None
 
@@ -964,6 +966,19 @@ def is_cwl_enum_type(io_info):
     return True, io_type, MODE.SIMPLE, io_allow  # allowed value validator mode must be set for input
 
 
+def get_cwl_io_type_name(io_type):
+    # type: (Any) -> Any
+    """
+    Obtain the simple type-name representation of a :term:`CWL` I/O.
+
+    Depending on :mod:`cwltool` version, types are represented with or without an extended prefix, and using an
+    explicit quoted class representation rather than plain strings.
+    """
+    if isinstance(io_type, str):
+        return str(io_type.replace("org.w3id.cwl.cwl.", ""))
+    return io_type
+
+
 @dataclass
 class CWLIODefinition:
     """
@@ -1003,7 +1018,7 @@ def get_cwl_io_type(io_info, strict=True):
     :param strict: Indicates if only pure :term:`CWL` definition is allowed, or allow implicit data-type conversions.
     :return: tuple of guessed base type and flag indicating if it can be null (optional input).
     """
-    io_type = io_info["type"]
+    io_type = get_cwl_io_type_name(io_info["type"])
     is_null = False
 
     # parse multi-definition
@@ -1024,8 +1039,9 @@ def get_cwl_io_type(io_info, strict=True):
             io_type_many = set()
             io_base_type = None
             for i, typ in enumerate(io_type):
+                typ = get_cwl_io_type_name(typ)
                 io_name = io_info["name"]
-                sub_type = {"type": typ, "name": f"{io_name}[{i}]"}
+                sub_type = {"type": typ, "name": f"{io_name}[{i}]"}  # type: CWL_IO_Type
                 is_array, array_elem, _, _ = is_cwl_array_type(sub_type, strict=strict)
                 is_enum, enum_type, _, _ = is_cwl_enum_type(sub_type)
                 # array base type more important than enum because later array conversion also handles allowed values
@@ -1077,6 +1093,8 @@ def get_cwl_io_type(io_info, strict=True):
         LOGGER.debug("type(io_type): [%s]", type(io_type))
         raise TypeError(f"I/O type has not been properly decoded. Should be a string, got: '{io_type!r}'")
 
+    io_type = get_cwl_io_type_name(io_type)
+
     # parse shorthand notation for nullable
     if io_type.endswith("?"):
         io_type = io_type[:-1]