Skip to content

Commit

Permalink
Merge pull request #69 from simleo/more_file_metadata
Browse files Browse the repository at this point in the history
More file metadata
  • Loading branch information
simleo authored Nov 8, 2023
2 parents c27ed11 + c9a15fc commit 3cf875e
Show file tree
Hide file tree
Showing 2 changed files with 20 additions and 4 deletions.
16 changes: 12 additions & 4 deletions src/runcrate/convert.py
Original file line number Diff line number Diff line change
Expand Up @@ -566,6 +566,8 @@ def add_action_params(self, crate, activity, to_wf_p, ptype="usage"):
))
if len(action_p["exampleOfWork"]) == 1:
action_p["exampleOfWork"] = action_p["exampleOfWork"][0]
if ptype == "generation":
action_p["dateCreated"] = rel.time.isoformat()
action_params.append(action_p)
return action_params

Expand Down Expand Up @@ -606,6 +608,7 @@ def convert_param(self, prov_param, crate, convert_secondary=True, parent=None):
source = self.manifest[hash_]
action_p = crate.add_file(source, dest, properties={
"sha1": hash_,
"contentSize": str(Path(source).stat().st_size)
})
self._set_alternate_name(prov_param, action_p, parent=parent)
try:
Expand Down Expand Up @@ -739,9 +742,9 @@ def patch_workflow_input_collection(self, crate, wf=None):
if "ComputationalWorkflow" in as_list(tool.type):
self.patch_workflow_input_collection(crate, wf=tool)

def _map_input_data(self, data):
def _map_input_data(self, crate, data):
if isinstance(data, list):
return [self._map_input_data(_) for _ in data]
return [self._map_input_data(crate, _) for _ in data]
if isinstance(data, dict):
rval = {}
for k, v in data.items():
Expand All @@ -753,8 +756,13 @@ def _map_input_data(self, data):
source_k = str(source)
dest = self.file_map.get(source_k)
rval[k] = str(dest) if dest else v
fmt = data.get("format")
if fmt:
entity = crate.get(str(dest))
if entity:
entity["encodingFormat"] = fmt
else:
rval[k] = self._map_input_data(v)
rval[k] = self._map_input_data(crate, v)
return rval
return data

Expand All @@ -763,7 +771,7 @@ def add_inputs_file(self, crate):
if path.is_file():
with open(path) as f:
data = json.load(f)
data = self._map_input_data(data)
data = self._map_input_data(crate, data)
source = StringIO(json.dumps(data, indent=4))
crate.add_file(source, path.name, properties={
"name": "input object document",
Expand Down
8 changes: 8 additions & 0 deletions tests/test_cwlprov_crate_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,10 +95,14 @@ def test_revsort(data_dir, tmpdir):
assert "File" in entity.type
assert entity["alternateName"] == "whale.txt"
assert entity["sha1"] == entity.id.rsplit("/")[-1]
assert entity["contentSize"] == "1111"
assert "encodingFormat" in entity
wf_input_file = entity
wf_output_file = wf_results[0]
assert wf_output_file["alternateName"] == "output.txt"
assert wf_output_file["sha1"] == wf_output_file.id.rsplit("/")[-1]
assert wf_output_file["dateCreated"] == "2018-10-25T15:46:38.058365"
assert wf_output_file["contentSize"] == "1111"
assert "File" in wf_output_file.type
steps = workflow["step"]
assert len(steps) == 2
Expand All @@ -118,6 +122,8 @@ def test_revsort(data_dir, tmpdir):
assert rev_input_file is wf_input_file
rev_output_file = results[0]
assert "File" in rev_output_file.type
assert rev_output_file["dateCreated"] == "2018-10-25T15:46:36.963254"
assert rev_output_file["contentSize"] == "1111"
assert step["position"] == "0"
assert set(_connected(step)) == set([
("packed.cwl#main/input", "packed.cwl#revtool.cwl/input"),
Expand Down Expand Up @@ -357,6 +363,7 @@ def test_dir_io(data_dir, tmpdir):
assert "Dataset" in entity.type
wf_input_dir = entity
wf_output_dir = wf_results[0]
assert wf_output_dir["dateCreated"] == "2023-02-17T16:20:30.288242"
assert wf_input_dir.type == wf_output_dir.type == "Dataset"
assert wf_input_dir["alternateName"] == "grepucase_in"
assert len(wf_input_dir["hasPart"]) == 2
Expand Down Expand Up @@ -395,6 +402,7 @@ def test_dir_io(data_dir, tmpdir):
assert greptool_input_dir is wf_input_dir
greptool_output_dir = greptool_results[0]
assert "Dataset" in greptool_output_dir.type
assert greptool_output_dir["dateCreated"] == "2023-02-17T16:20:30.262141"
ucasetool_action = action_map["packed.cwl#ucasetool.cwl"]
ucasetool_objects = ucasetool_action["object"]
ucasetool_results = ucasetool_action["result"]
Expand Down

0 comments on commit 3cf875e

Please sign in to comment.