Skip to content

Commit

Permalink
Merge pull request #471 from crim-ca/cwl-refactor-update
Browse files Browse the repository at this point in the history
  • Loading branch information
fmigneault committed Oct 5, 2022
2 parents 599603a + ae6306c commit 10961f8
Show file tree
Hide file tree
Showing 18 changed files with 199 additions and 509 deletions.
4 changes: 3 additions & 1 deletion .pylintrc
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,9 @@
# A comma-separated list of package or module names from where C extensions may
# be loaded. Extensions are loading into the active Python interpreter and may
# run arbitrary code.
extension-pkg-whitelist=lxml.etree
extension-pkg-whitelist=lxml.etree,
schema_salad.sourceline,
schema_salad.validate

# Add files or directories to the blacklist. They should be base names, not
# paths.
Expand Down
4 changes: 3 additions & 1 deletion CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,9 @@ Changes

Changes:
--------
- No change.
- Refactor ``weaver.processes.wps_workflow`` definitions to delegate implementation to ``cwltool`` core classes,
removing code duplication and allowing update to latest revisions
(resolves `#154 <https://github.com/crim-ca/weaver/issues/154>`_).

Fixes:
------
Expand Down
3 changes: 2 additions & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -532,8 +532,9 @@ check-security-deps-only: mkdir-reports ## run security checks on package depen
$(SAFETY_IGNORE) \
1> >(tee "$(REPORTS_DIR)/check-security-deps.txt")'

# FIXME: bandit excludes not working (https://github.com/PyCQA/bandit/issues/657), clean-src beforehand to avoid error
.PHONY: check-security-code-only
check-security-code-only: mkdir-reports ## run security checks on source code
check-security-code-only: mkdir-reports clean-src ## run security checks on source code
@echo "Running security code checks..."
@-rm -fr "$(REPORTS_DIR)/check-security-code.txt"
@bash -c '$(CONDA_CMD) \
Expand Down
10 changes: 5 additions & 5 deletions docker/Dockerfile-base
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,11 @@ COPY requirements* setup.py README.rst CHANGES.rst ${APP_DIR}/

# install runtime/package dependencies
RUN apt-get update && apt-get install -y --no-install-recommends \
ca-certificates \
netbase \
gcc \
git \
&& pip install --no-cache-dir --upgrade -r requirements-sys.txt \
ca-certificates \
netbase \
gcc \
git \
&& pip install --no-cache-dir --upgrade -r requirements-sys.txt \
&& pip install --no-cache-dir -r requirements.txt \
&& pip install --no-cache-dir -e ${APP_DIR} \
&& apt-get remove -y \
Expand Down
2 changes: 1 addition & 1 deletion docker/Dockerfile-worker
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
# Only install CLI package, 'docker-ce' and 'containerd.io' not required as they should be provided by host.
# Docker sibling execution is expected. See 'docker/docker-compose.yml.example' for details.
&& apt install --no-install-recommends docker-ce-cli \
&& rm -rf /var/lib/apt/lists/*
&& rm -rf /var/lib/apt/lists/*

# run app
# see CHANGES (4.15.0), celery>=5 needs '-A' before 'worker'
Expand Down
4 changes: 2 additions & 2 deletions docs/examples/docker-python-script-report.cwl
Original file line number Diff line number Diff line change
Expand Up @@ -23,5 +23,5 @@ requirements:
entry: |
amount = $(inputs.amount)
cost = $(inputs.cost)
with open("report.txt", "w") as report:
report.write(f"Order Total: {amount * cost:0.2f}$\n")
with open("report.txt", mode="w", encoding="utf-8") as report:
report.write(f"Order Total: {amount * cost:0.2f}$\\n")
8 changes: 2 additions & 6 deletions requirements-sys.txt
Original file line number Diff line number Diff line change
@@ -1,8 +1,4 @@
pip>=20.2.2; python_version <= "3.6"
pip>=22.0.4; python_version >= "3.7"
# celery enforces some specific versions of setuptools (<60)
# FIXME: when cwltool and rdflib-jsonld versions are updated to avoid failing setuptools install, update more recent
# 'rdflib-jsonld==0.5.0' (required by: cwltool -> schema_salad) uses 'use_2to3' which is invalid for setuptools>=58
# setuptools<58; python_version <= "3.6"
# setuptools>=59,<59.7; python_version >= "3.7"
setuptools<58
setuptools<58; python_version <= "3.6"
setuptools>=60; python_version >= "3.7"
24 changes: 10 additions & 14 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -12,8 +12,6 @@ billiard>3.2,<3.4; sys_platform == "win32"
# celery's CLI interface changed
# https://github.com/celery/celery/blob/master/docs/whatsnew-5.2.rst#upgrading-from-celery-4x
celery[mongodb]<4; sys_platform == "win32" # rq.filter: <4 # pyup: ignore
# FIXME: use celery 5.2 specifically to resolve security issue
# best match for now since cannot use setuptools>=59 until cwltool/rdflib-jsonld are updated
celery[mongodb]>=5.2.2,<6; sys_platform != "win32" and python_version >= "3.7"
# technically, >=5.2 preferred to resolve security issue, but dependency resolver cannot find it
# FIXME: drop support? more recent versions not avilable for end-of-life python
Expand All @@ -30,10 +28,7 @@ cryptography
# use cwltool gpu-enabled support until integrated within the original tool
# (https://github.com/common-workflow-language/common-workflow-language/issues/587)
### git+https://github.com/crim-ca/cwltool@docker-gpu#egg=cwltool; python_version >= "3"
# FIXME: remove extra CWL code and let it handle it for use
# - changes since cause error with invalid get_listing import location
# - https://github.com/crim-ca/weaver/issues/154
cwltool==3.0.20200324120055
cwltool==3.1.20220913185150
docker
duration
git+https://github.com/ESGF/esgf-compute-api.git@v2.3.7#egg=esgf-compute-api
Expand All @@ -50,8 +45,9 @@ mako
oauthlib
owslib==0.27.2
psutil
# FIXME: pymongo>=4 breaks with kombu corresponding to pinned Celery (https://github.com/crim-ca/weaver/issues/386)
pymongo>=3.12.0,<4
# pymongo>=4 breaks with kombu corresponding to pinned Celery (https://github.com/crim-ca/weaver/issues/386)
pymongo>=3.12.0,<4; python_version <= "3.6"
pymongo>=4; python_version >= "3.7"
pyramid>=1.7.3
pyramid_beaker>=0.8
pyramid_celery>=4.0.0 # required for celery>=5
Expand All @@ -60,15 +56,15 @@ python-dateutil
pyramid_rewrite
pyramid_storage
pytz
pywps==4.5.1
pywps==4.5.1; python_version <= "3.6"
pywps==4.5.2; python_version >= "3.7"
pyyaml>=5.2
rdflib>=5 # pyup: ignore
requests
requests_file
# let cwltool define ruamel.yaml version (<=0.16.5)
# ensure minimal 0.15.78 to solve install issue (python 3.8)
# (https://bitbucket.org/ruamel/yaml/issues/261/error-while-installing-ruamelyaml-setuppy)
ruamel.yaml>=0.15.78,<=0.16.5
shapely
ruamel.yaml>=0.16
schema-salad>=8.2,<9
shapely==1.8.2
simplejson
urlmatch
xmltodict
Expand Down
22 changes: 11 additions & 11 deletions setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -5,23 +5,23 @@ tag = True
tag_name = {new_version}

[bumpversion:file:CHANGES.rst]
search =
search =
`Unreleased <https://github.com/crim-ca/weaver/tree/master>`_ (latest)
========================================================================
replace =
replace =
`Unreleased <https://github.com/crim-ca/weaver/tree/master>`_ (latest)
========================================================================

Changes:
--------
- No change.

Fixes:
------
- No change.

.. _changes_{new_version}:

`{new_version} <https://github.com/crim-ca/weaver/tree/{new_version}>`_ ({now:%%Y-%%m-%%d})
========================================================================

Expand All @@ -42,14 +42,14 @@ search = LABEL version="{current_version}"
replace = LABEL version="{new_version}"

[tool:pytest]
addopts =
addopts =
--strict-markers
--tb=native
weaver/
log_cli = false
log_level = DEBUG
python_files = test_*.py
markers =
markers =
cli: mark test as related to CLI operations
testbed14: mark test as 'testbed14' validation
functional: mark test as functionality validation
Expand Down Expand Up @@ -80,7 +80,7 @@ targets = .
[flake8]
ignore = E126,E226,E402,F401,W503,W504
max-line-length = 120
exclude =
exclude =
src,
.git,
__pycache__,
Expand Down Expand Up @@ -112,14 +112,14 @@ add_select = D201,D213
branch = true
source = ./
include = weaver/*
omit =
omit =
setup.py
docs/*
tests/*
*_mako

[coverage:report]
exclude_lines =
exclude_lines =
pragma: no cover
raise AssertionError
raise NotImplementedError
Expand Down
14 changes: 7 additions & 7 deletions tests/processes/test_convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -952,11 +952,11 @@ def test_cwl2json_input_values_ogc_format():
"test1": {"value": "value"},
"test2": {"value": 1},
"test3": {"value": 1.23},
"test4": {"href": "/tmp/random.txt"},
"test4": {"href": "/tmp/random.txt"}, # nosec: B108
"test5": [{"value": "val1"}, {"value": "val2"}],
"test6": [{"value": 1}, {"value": 2}],
"test7": [{"value": 1.23}, {"value": 4.56}],
"test8": [{"href": "/tmp/other.txt"}]
"test8": [{"href": "/tmp/other.txt"}] # nosec: B108
}
result = cwl2json_input_values(values, ProcessSchema.OGC)
assert result == expect
Expand Down Expand Up @@ -1096,17 +1096,17 @@ def test_repr2json_input_values():
{"id": "test6", "value": 2},
{"id": "test7", "value": 1.23},
{"id": "test7", "value": 4.56},
{"id": "test8", "href": "/tmp/other.txt"},
{"id": "test8", "href": "/tmp/other.txt"}, # nosec: B108
{"id": "test9", "value": "short"},
{"id": "test10", "value": "long"},
{"id": "test11", "href": "/tmp/file.json", "format": {
{"id": "test11", "href": "/tmp/file.json", "format": { # nosec: B108
"mediaType": ContentType.APP_JSON, "schema": "http://schema.org/random.json"
}},
{"id": "test12", "href": "/tmp/other.xml", "format": {
{"id": "test12", "href": "/tmp/other.xml", "format": { # nosec: B108
"mediaType": ContentType.TEXT_XML, "schema": "http://schema.org/random.xml"
}},
{"id": "test13", "href": "/tmp/one.json", "format": {"mediaType": ContentType.APP_JSON}},
{"id": "test13", "href": "/tmp/two.xml", "format": {"mediaType": ContentType.TEXT_XML}},
{"id": "test13", "href": "/tmp/one.json", "format": {"mediaType": ContentType.APP_JSON}}, # nosec: B108
{"id": "test13", "href": "/tmp/two.xml", "format": {"mediaType": ContentType.TEXT_XML}}, # nosec: B108
]
result = repr2json_input_values(values)
assert result == expect
Expand Down
5 changes: 3 additions & 2 deletions tests/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@
import mimetypes
import os
import re
import subprocess
import subprocess # nosec: B404
import sys
import tempfile
import uuid
Expand Down Expand Up @@ -358,7 +358,8 @@ def run_command(command, trim=True, expect_error=False, entrypoint=None):
command = command.split(" ")
command = [str(arg) for arg in command]
if entrypoint is None:
out, _ = subprocess.Popen(["which", "python"], universal_newlines=True, stdout=subprocess.PIPE).communicate()
func = ["which", "python"]
out, _ = subprocess.Popen(func, universal_newlines=True, stdout=subprocess.PIPE).communicate() # nosec: B603
if not out:
out = sys.executable # fallback for some systems that fail above call
python_path = os.path.split(out)[0]
Expand Down
12 changes: 6 additions & 6 deletions tests/wps/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -218,24 +218,24 @@ def test_get_wps_output_context_resolution():

def test_map_wps_output_location_duplicate_subdir():
for tmp_dir in [
"/tmp/tmp/tmp",
"/tmp/tmpdir"
"/tmp/tmp/tmp", # nosec: B108 # don't care hardcoded for test
"/tmp/tmpdir" # nosec: B108 # don't care hardcoded for test
]:
wps_out = "http:///localhost/wps-output/tmp"
wps_out = "http:///localhost/wps-output/tmp" # nosec: B108 # don't care hardcoded for test
settings = {
"weaver.wps_output_dir": tmp_dir,
"weaver.wps_output_url": wps_out
}
path = map_wps_output_location(f"{wps_out}/tmp/some-file-tmp.tmp", settings, exists=False)
assert path == f"{tmp_dir}/tmp/some-file-tmp.tmp"
assert path == f"{tmp_dir}/tmp/some-file-tmp.tmp" # nosec: B108 # don't care hardcoded for test

path = map_wps_output_location(f"{tmp_dir}/here/some-file-tmp.tmp", settings, exists=False, url=True)
assert path == f"{wps_out}/here/some-file-tmp.tmp"
assert path == f"{wps_out}/here/some-file-tmp.tmp" # nosec: B108 # don't care hardcoded for test


def test_map_wps_output_location_exists():
wps_url = "http:///localhost/wps-output/tmp"
wps_dir = "/tmp/weaver-test/test-outputs"
wps_dir = "/tmp/weaver-test/test-outputs" # nosec: B108 # don't care hardcoded for test
settings = {
"weaver.wps_output_dir": wps_dir,
"weaver.wps_output_url": wps_url
Expand Down
2 changes: 1 addition & 1 deletion tests/wps_restapi/test_jobs.py
Original file line number Diff line number Diff line change
Expand Up @@ -63,7 +63,7 @@ def setUpClass(cls):
cls.settings = {
"weaver.url": "https://localhost",
"weaver.wps_email_encrypt_salt": "weaver-test",
"weaver.wps_output_dir": "/tmp/weaver-test/wps-outputs",
"weaver.wps_output_dir": "/tmp/weaver-test/wps-outputs", # nosec: B108 # don't care hardcoded for test
}
cls.config = setup_config_with_mongodb(settings=cls.settings)
cls.app = get_test_weaver_app(config=cls.config)
Expand Down
38 changes: 28 additions & 10 deletions weaver/processes/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -859,7 +859,7 @@ def is_cwl_array_type(io_info, strict=True):
io_return = {
"array": False,
"allow": AnyValue,
"type": io_info["type"],
"type": get_cwl_io_type_name(io_info["type"]),
"mode": MODE.NONE,
}

Expand Down Expand Up @@ -897,19 +897,21 @@ def _update_if_sub_enum(_io_item):
if io_type["type"] != PACKAGE_ARRAY_BASE:
raise PackageTypeError(f"Unsupported I/O 'array' definition: '{io_info!r}'.")
# parse enum in case we got an array of allowed symbols
is_enum = _update_if_sub_enum(io_type["items"])
io_items = get_cwl_io_type_name(io_type["items"])
is_enum = _update_if_sub_enum(io_items)
if not is_enum:
io_return["type"] = io_type["items"]
if io_return["type"] not in PACKAGE_ARRAY_ITEMS: # includes Complex, so implicit literal-only check possible
io_type = any2cwl_literal_datatype(io_return["type"])
io_return["type"] = io_items
io_type = get_cwl_io_type_name(io_return["type"])
if io_type not in PACKAGE_ARRAY_ITEMS: # includes Complex, so implicit literal-only check possible
io_type = any2cwl_literal_datatype(io_type)
if strict or io_type not in PACKAGE_ARRAY_ITEMS:
raise PackageTypeError(f"Unsupported I/O 'array' definition: '{io_info!r}'.")
io_return["type"] = io_type
LOGGER.debug("I/O [%s] parsed as 'array' with nested dict notation", io_info["name"])
io_return["array"] = True
# array type conversion when defined as string '<type>[]'
elif isinstance(io_return["type"], str) and io_return["type"] in PACKAGE_ARRAY_TYPES:
io_return["type"] = io_return["type"][:-2] # remove '[]'
elif isinstance(io_return["type"], str) and get_cwl_io_type_name(io_return["type"]) in PACKAGE_ARRAY_TYPES:
io_return["type"] = get_cwl_io_type_name(io_return["type"][:-2]) # remove '[]'
if io_return["type"] in PACKAGE_CUSTOM_TYPES:
# parse 'enum[]' for array of allowed symbols, provide expected structure for sub-item parsing
io_item = deepcopy(io_info)
Expand All @@ -935,7 +937,7 @@ def is_cwl_enum_type(io_info):
- ``io_allow``: validation values of the enum.
:raises PackageTypeError: if the enum doesn't have the required parameters and valid format.
"""
io_type = io_info["type"]
io_type = get_cwl_io_type_name(io_info["type"])
if not isinstance(io_type, dict) or "type" not in io_type or io_type["type"] not in PACKAGE_CUSTOM_TYPES:
return False, io_type, MODE.NONE, None

Expand Down Expand Up @@ -964,6 +966,19 @@ def is_cwl_enum_type(io_info):
return True, io_type, MODE.SIMPLE, io_allow # allowed value validator mode must be set for input


def get_cwl_io_type_name(io_type):
# type: (Any) -> Any
"""
Obtain the simple type-name representation of a :term:`CWL` I/O.
Depending on :mod:`cwltool` version, types are represented with or without an extended prefix, and using an
explicit quoted class representation rather than plain strings.
"""
if isinstance(io_type, str):
return str(io_type.replace("org.w3id.cwl.cwl.", ""))
return io_type


@dataclass
class CWLIODefinition:
"""
Expand Down Expand Up @@ -1003,7 +1018,7 @@ def get_cwl_io_type(io_info, strict=True):
:param strict: Indicates if only pure :term:`CWL` definition is allowed, or allow implicit data-type conversions.
:return: tuple of guessed base type and flag indicating if it can be null (optional input).
"""
io_type = io_info["type"]
io_type = get_cwl_io_type_name(io_info["type"])
is_null = False

# parse multi-definition
Expand All @@ -1024,8 +1039,9 @@ def get_cwl_io_type(io_info, strict=True):
io_type_many = set()
io_base_type = None
for i, typ in enumerate(io_type):
typ = get_cwl_io_type_name(typ)
io_name = io_info["name"]
sub_type = {"type": typ, "name": f"{io_name}[{i}]"}
sub_type = {"type": typ, "name": f"{io_name}[{i}]"} # type: CWL_IO_Type
is_array, array_elem, _, _ = is_cwl_array_type(sub_type, strict=strict)
is_enum, enum_type, _, _ = is_cwl_enum_type(sub_type)
# array base type more important than enum because later array conversion also handles allowed values
Expand Down Expand Up @@ -1077,6 +1093,8 @@ def get_cwl_io_type(io_info, strict=True):
LOGGER.debug("type(io_type): [%s]", type(io_type))
raise TypeError(f"I/O type has not been properly decoded. Should be a string, got: '{io_type!r}'")

io_type = get_cwl_io_type_name(io_type)

# parse shorthand notation for nullable
if io_type.endswith("?"):
io_type = io_type[:-1]
Expand Down
Loading

0 comments on commit 10961f8

Please sign in to comment.