Skip to content

Commit

Permalink
feat(io): add tasks to dump metadata
Browse files Browse the repository at this point in the history
- task to dump config to YAML
- task to dump versions to YAML

Closes chime-experiment/Analysis#37
  • Loading branch information
nritsche committed Feb 21, 2020
1 parent b59b0e2 commit 34e4000
Show file tree
Hide file tree
Showing 3 changed files with 111 additions and 8 deletions.
61 changes: 61 additions & 0 deletions draco/core/io.py
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@

import os.path
import numpy as np
from yaml import dump as yamldump

from caput import pipeline
from caput import config
Expand Down Expand Up @@ -553,6 +554,66 @@ def process(self, data):
return data


class SaveModuleVersions(task.SingleTask):
"""Write module versions to a YAML file.
The list of modules should be added to the configuration under key 'save_versions'.
The version strings are written to a YAML file.
Attributes
----------
root : str
Root of the file name to output to.
"""

root = config.Property(proptype=str)

done = True

def setup(self):
"""Save module versions."""

fname = "{}_versions.yml".format(self.root)
f = open(fname, "w")
f.write(yamldump(self.versions))
f.close()
self.done = True

def process(self):
"""Do nothing."""
self.done = True
return


class SaveConfig(task.SingleTask):
"""Write pipeline config to a text file.
Yaml configuration document is written to a text file.
Attributes
----------
root : str
Root of the file name to output to.
"""

root = config.Property(proptype=str)
done = True

def setup(self):
"""Save module versions."""

fname = "{}_config.yml".format(self.root)
f = open(fname, "w")
f.write(yamldump(self.pipeline_config))
f.close()
self.done = True

def process(self):
"""Do nothing."""
self.done = True
return


def get_telescope(obj):
"""Return a telescope object out of the input (either `ProductManager`,
`BeamTransfer` or `TransitTelescope`).
Expand Down
4 changes: 2 additions & 2 deletions draco/core/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -387,8 +387,8 @@ def _save_output(self, output):

# add metadata to output
metadata = {
"versions_json": self.versions,
"config_json": self.pipeline_config,
"versions": self.versions,
"config": self.pipeline_config,
}
for key, value in metadata.items():
if key in output.attrs:
Expand Down
54 changes: 48 additions & 6 deletions test/test_write_metadata.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@
import yaml

TAG = "test"
JSON_PREFIX = "!!_memh5_json:"


def test_metadata_to_hdf5():
Expand All @@ -39,16 +40,57 @@ def test_metadata_to_hdf5():

# Check HDF5 file for config- and versiondump
f = h5py.File("{}.h5".format(TAG), "r")
configdump = f.attrs["config_json"]
versiondump = f.attrs["versions_json"]
assert versiondump == json.dumps(
configdump = f.attrs["config"]
versiondump = f.attrs["versions"]
assert versiondump == JSON_PREFIX + json.dumps(
{"numpy": numpy.__version__, "caput": caput.__version__}
)
assert configdump == json.dumps(yaml.load(testconfig, Loader=yaml.SafeLoader))
assert configdump == JSON_PREFIX + json.dumps(
yaml.load(testconfig, Loader=yaml.SafeLoader)
)

# Do the same using caput.memh5 to make sure it deserializes it
m = memh5.MemDiskGroup.from_file("{}.h5".format(TAG))
configdump = m.attrs["config_json"]
versiondump = m.attrs["versions_json"]
configdump = m.attrs["config"]
versiondump = m.attrs["versions"]
assert versiondump == {"numpy": numpy.__version__, "caput": caput.__version__}
assert configdump == yaml.load(testconfig, Loader=yaml.SafeLoader)


def test_metadata_to_yaml():
"""Check if metadata is written to YAML file."""

testconfig = """
foo: bar
pipeline:
save_versions:
- numpy
- caput
tasks:
- type: draco.core.io.SaveModuleVersions
params:
root: {0}
- type: draco.core.io.SaveConfig
params:
root: {0}
""".format(
TAG
)

man = pipeline.Manager.from_yaml_str(testconfig)
man.run()

# Check yaml files for config- and versiondump
yaml_config = open("{}_config.yml".format(TAG), "r")
yaml_versions = open("{}_versions.yml".format(TAG), "r")
configdump = yaml_config.read()
versiondump = yaml_versions.read()
yaml_config.close()
yaml_versions.close()

assert versiondump == yaml.dump(
{"numpy": numpy.__version__, "caput": caput.__version__}
)

# let pyyaml fix the indentation by loading and dumping again
assert configdump == yaml.dump(yaml.safe_load(testconfig))

0 comments on commit 34e4000

Please sign in to comment.