From 34e4000a066d9e76c59d604fe30af5c3a4ab08c1 Mon Sep 17 00:00:00 2001 From: Rick Nitsche Date: Wed, 19 Feb 2020 18:30:42 -0800 Subject: [PATCH] feat(io): add tasks to dump metadata - task to dump config to YAML - task to dump versions to YAML Closes https://github.com/chime-experiment/Analysis/issues/37 --- draco/core/io.py | 61 +++++++++++++++++++++++++++++++++++++ draco/core/task.py | 4 +-- test/test_write_metadata.py | 54 ++++++++++++++++++++++++++++---- 3 files changed, 111 insertions(+), 8 deletions(-) diff --git a/draco/core/io.py b/draco/core/io.py index 3480e5adc..bf9eaee6a 100644 --- a/draco/core/io.py +++ b/draco/core/io.py @@ -45,6 +45,7 @@ import os.path import numpy as np +from yaml import dump as yamldump from caput import pipeline from caput import config @@ -553,6 +554,66 @@ def process(self, data): return data +class SaveModuleVersions(task.SingleTask): + """Write module versions to a YAML file. + + The list of modules should be added to the configuration under key 'save_versions'. + The version strings are written to a YAML file. + + Attributes + ---------- + root : str + Root of the file name to output to. + """ + + root = config.Property(proptype=str) + + done = True + + def setup(self): + """Save module versions.""" + + fname = "{}_versions.yml".format(self.root) + f = open(fname, "w") + f.write(yamldump(self.versions)) + f.close() + self.done = True + + def process(self): + """Do nothing.""" + self.done = True + return + + +class SaveConfig(task.SingleTask): + """Write pipeline config to a text file. + + Yaml configuration document is written to a text file. + + Attributes + ---------- + root : str + Root of the file name to output to. + """ + + root = config.Property(proptype=str) + done = True + + def setup(self): + """Save module versions.""" + + fname = "{}_config.yml".format(self.root) + f = open(fname, "w") + f.write(yamldump(self.pipeline_config)) + f.close() + self.done = True + + def process(self): + """Do nothing.""" + self.done = True + return + + def get_telescope(obj): """Return a telescope object out of the input (either `ProductManager`, `BeamTransfer` or `TransitTelescope`). diff --git a/draco/core/task.py b/draco/core/task.py index 246bd2ec4..208879050 100644 --- a/draco/core/task.py +++ b/draco/core/task.py @@ -387,8 +387,8 @@ def _save_output(self, output): # add metadata to output metadata = { - "versions_json": self.versions, - "config_json": self.pipeline_config, + "versions": self.versions, + "config": self.pipeline_config, } for key, value in metadata.items(): if key in output.attrs: diff --git a/test/test_write_metadata.py b/test/test_write_metadata.py index 54c4b5a5c..9c41ef468 100644 --- a/test/test_write_metadata.py +++ b/test/test_write_metadata.py @@ -14,6 +14,7 @@ import yaml TAG = "test" +JSON_PREFIX = "!!_memh5_json:" def test_metadata_to_hdf5(): @@ -39,16 +40,57 @@ def test_metadata_to_hdf5(): # Check HDF5 file for config- and versiondump f = h5py.File("{}.h5".format(TAG), "r") - configdump = f.attrs["config_json"] - versiondump = f.attrs["versions_json"] - assert versiondump == json.dumps( + configdump = f.attrs["config"] + versiondump = f.attrs["versions"] + assert versiondump == JSON_PREFIX + json.dumps( {"numpy": numpy.__version__, "caput": caput.__version__} ) - assert configdump == json.dumps(yaml.load(testconfig, Loader=yaml.SafeLoader)) + assert configdump == JSON_PREFIX + json.dumps( + yaml.load(testconfig, Loader=yaml.SafeLoader) + ) # Do the same using caput.memh5 to make sure it deserializes it m = memh5.MemDiskGroup.from_file("{}.h5".format(TAG)) - configdump = m.attrs["config_json"] - versiondump = m.attrs["versions_json"] + configdump = m.attrs["config"] + versiondump = m.attrs["versions"] assert versiondump == {"numpy": numpy.__version__, "caput": caput.__version__} assert configdump == yaml.load(testconfig, Loader=yaml.SafeLoader) + + +def test_metadata_to_yaml(): + """Check if metadata is written to YAML file.""" + + testconfig = """ + foo: bar + pipeline: + save_versions: + - numpy + - caput + tasks: + - type: draco.core.io.SaveModuleVersions + params: + root: {0} + - type: draco.core.io.SaveConfig + params: + root: {0} + """.format( + TAG + ) + + man = pipeline.Manager.from_yaml_str(testconfig) + man.run() + + # Check yaml files for config- and versiondump + yaml_config = open("{}_config.yml".format(TAG), "r") + yaml_versions = open("{}_versions.yml".format(TAG), "r") + configdump = yaml_config.read() + versiondump = yaml_versions.read() + yaml_config.close() + yaml_versions.close() + + assert versiondump == yaml.dump( + {"numpy": numpy.__version__, "caput": caput.__version__} + ) + + # let pyyaml fix the indentation by loading and dumping again + assert configdump == yaml.dump(yaml.safe_load(testconfig))