Skip to content

Commit

Permalink
Merge pull request #66 from simleo/container_img
Browse files Browse the repository at this point in the history
Add ContainerImage
  • Loading branch information
simleo authored Nov 6, 2023
2 parents d052f77 + 8a56a22 commit 59702e9
Show file tree
Hide file tree
Showing 6 changed files with 108 additions and 3 deletions.
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
bdbag>=1.4.1
click~=8.1
cwl-utils>=0.27
cwl-utils==0.29
cwlprov==0.1.1
networkx==3.1
prov>=1.5.1
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ python_requires=>=3.8, <4
install_requires=
bdbag>=1.4.1
click~=8.1
cwl-utils>=0.27
cwl-utils==0.29
cwlprov==0.1.1
networkx==3.1
prov>=1.5.1
Expand Down
19 changes: 18 additions & 1 deletion src/runcrate/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@
from rocrate.rocrate import ROCrate

from .constants import PROFILES_BASE, PROFILES_VERSION, TERMS_NAMESPACE
from .utils import as_list
from .utils import as_list, parse_img


WORKFLOW_BASENAME = "packed.cwl"
Expand All @@ -61,6 +61,8 @@

WROC_PROFILE_VERSION = "1.0"

DOCKER_IMG_TYPE = "https://w3id.org/ro/terms/workflow-run#DockerImage"


def convert_cwl_type(cwl_type):
if isinstance(cwl_type, list):
Expand Down Expand Up @@ -503,9 +505,24 @@ def to_wf_p(k):
action["endTime"] = activity.end().time.isoformat()
action["object"] = self.add_action_params(crate, activity, to_wf_p, "usage")
action["result"] = self.add_action_params(crate, activity, to_wf_p, "generation")
self.add_container_images(crate, action, activity)
for job in activity.steps():
self.add_action(crate, job, parent_instrument=instrument)

def add_container_images(self, crate, action, activity):
images = set()
for assoc in activity.association():
for agent in activity.provenance.prov_doc.get_record(assoc.agent_id):
images |= agent.get_attribute("cwlprov:image")
for im in images:
properties = parse_img(im)
properties.update({
"@type": "ContainerImage",
"additionalType": {"@id": DOCKER_IMG_TYPE}
})
roc_img = crate.add(ContextEntity(crate, properties=properties))
action.append_to("containerImage", roc_img, compact=True)

def add_action_params(self, crate, activity, to_wf_p, ptype="usage"):
action_params = []
all_roles = set()
Expand Down
36 changes: 36 additions & 0 deletions src/runcrate/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,3 +17,39 @@ def as_list(value):
if isinstance(value, list):
return value
return [value]


def parse_img_name(img_name):
parts = img_name.split("/")
if len(parts) == 3:
registry = parts[0]
name = "/".join(parts[1:])
else:
registry = "docker.io"
name = "/".join(parts)
return registry, name


def parse_img(img_str):
"""\
Parse image string following the docker pull syntax NAME[:TAG|@DIGEST].
CWL's DockerRequirement also accepts HTTP URLs for docker load.
"""
parsed = {}
if img_str.startswith("http://") or img_str.startswith("https://"):
return img_str
parts = img_str.rsplit("@", 1)
if len(parts) == 2:
parsed["registry"], parsed["name"] = parse_img_name(parts[0])
algo, digest = parts[1].split(":", 1)
assert algo == "sha256"
parsed[algo] = digest
return parsed
parts = img_str.rsplit(":", 1)
if len(parts) == 2:
parsed["registry"], parsed["name"] = parse_img_name(parts[0])
parsed["tag"] = parts[1]
return parsed
assert len(parts) == 1
parsed["registry"], parsed["name"] = parse_img_name(parts[0])
return parsed
10 changes: 10 additions & 0 deletions tests/test_cwlprov_crate_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,16 @@ def test_revsort(data_dir, tmpdir):
metadata = json.load(f)
context = metadata['@context']
assert TERMS_NAMESPACE in context
# Docker image
for action in crate.get_by_type("CreateAction"):
if action is wf_action:
continue
assert "containerImage" in action
img = action["containerImage"]
assert img.type == "ContainerImage"
assert img["additionalType"] == "https://w3id.org/ro/terms/workflow-run#DockerImage"
assert img["name"] == "debian"
assert img["tag"] == "8"


def test_no_input(data_dir, tmpdir):
Expand Down
42 changes: 42 additions & 0 deletions tests/test_utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
# Copyright 2023 CRS4.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from runcrate.utils import parse_img


def test_parse_img():
assert parse_img("python") == {
"registry": "docker.io",
"name": "python"
}
assert parse_img("python:3.12") == {
"registry": "docker.io",
"name": "python",
"tag": "3.12"
}
assert parse_img("josiah/python:3.11") == {
"registry": "docker.io",
"name": "josiah/python",
"tag": "3.11"
}
assert parse_img("quay.io/josiah/python:3.11") == {
"registry": "quay.io",
"name": "josiah/python",
"tag": "3.11"
}
assert parse_img("python@sha256:7b8d65a924f596eb65306214f559253c468336bcae09fd575429774563460caf") == {
"registry": "docker.io",
"name": "python",
"sha256": "7b8d65a924f596eb65306214f559253c468336bcae09fd575429774563460caf"
}

0 comments on commit 59702e9

Please sign in to comment.