Skip to content

Commit

Permalink
feat(BasicCont): make history an h5 attribute
Browse files Browse the repository at this point in the history
add_history previously accepted single-level dictionaries only and they
were added as attributes to the history group. This was a simply
solution to dictionary de-/serialization with json). Now that we support
full de-/serialization of dictionaries using json, the history feature
was broken. To make it usable again, the history is now not a group
anymore, but an attribute, so that dictionaries of any depth can be
added.

Reading the old history format is still supported but creates a
deprecation warning.
  • Loading branch information
nritsche authored and jrs65 committed Apr 28, 2020
1 parent 8e84719 commit fbc5034
Show file tree
Hide file tree
Showing 2 changed files with 64 additions and 8 deletions.
32 changes: 28 additions & 4 deletions caput/memh5.py
Original file line number Diff line number Diff line change
Expand Up @@ -1892,8 +1892,17 @@ def history(self):

out = {}
for name, value in self._data["history"].items():
warnings.warn(
"memh5 dataset {} is using a deprecated history format. Read support of "
"files using this format will be continued for now, but you should "
"update the instance of caput that wrote this file.".format(self.name),
DeprecationWarning,
)
out[name] = value.attrs

for name, value in self._data["history"].attrs.items():
out[name] = value

# TODO: this seems like a trememndous hack. I've changed it to a safer version of
# eval, but this should probably be removed
out["order"] = literal_eval(
Expand Down Expand Up @@ -1981,7 +1990,23 @@ def del_reverse_map(self, axis_name):
del self._data["reverse_map"][axis_name]

def add_history(self, name, history=None):
"""Create a new history entry."""
"""
Create a new history entry.
Parameters
----------
name : str
Name for history entry.
history
History entry (optional). Needs to be json serializable.
Notes
-----
Previously only dictionaries with depth=1 were supported here. The key/value pairs of these
where added as attributes to the history group when written to disk. Reading the old
history format is still supported, however the history is now an attribute itself and
dictionaries of any depth are allowed as history entries.
"""

if name == "order":
raise ValueError(
Expand All @@ -1992,11 +2017,10 @@ def add_history(self, name, history=None):
history = {}
order = self.history["order"]
order = order + [name]

history_group = self._data["history"]
history_group.attrs["order"] = text_type(order)
history_group.create_group(name)
for key, value in history.items():
history_group[name].attrs[key] = value
history_group.attrs[name] = history

def redistribute(self, dist_axis):
"""Redistribute parallel datasets along a specified axis.
Expand Down
40 changes: 36 additions & 4 deletions caput/tests/test_memh5.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,9 @@
import glob
import gc
import json

import numpy as np
import h5py
import warnings

from caput import memh5

Expand Down Expand Up @@ -204,8 +204,18 @@ def tearDown(self):


class TestBasicCont(unittest.TestCase):
def test_access(self):
fname = "test_bc.h5"
history_dict = {"foo": {"bar": {"f": 23}, "foo": "bar"}, "bar": 0}
json_prefix = "!!_memh5_json:"

def setUp(self):
d = memh5.BasicCont()
d.create_dataset("a", data=np.arange(5))
d.add_history("test", self.history_dict)
d.to_disk(self.fname)

def test_access(self):
d = memh5.BasicCont.from_file(self.fname)
self.assertTrue("history" in d._data)
self.assertTrue("index_map" in d._data)
self.assertRaises(KeyError, d.__getitem__, "history")
Expand All @@ -215,8 +225,30 @@ def test_access(self):
self.assertRaises(
ValueError, d.create_dataset, "index_map/stuff", data=np.arange(5)
)
# But make sure this works.
d.create_dataset("a", data=np.arange(5))

def test_history(self):
# Check HDF5 file for config- and versiondump
with h5py.File(self.fname, "r") as f:
history = f["history"].attrs["test"]
assert history == self.json_prefix + json.dumps(self.history_dict)

# add old format history
with h5py.File(self.fname, "r+") as f:
f["history"].create_group("old_history_format")
f["history/old_history_format"].attrs["foo"] = "bar"

with memh5.MemDiskGroup.from_file(self.fname) as m:
with warnings.catch_warnings(record=True) as w:
# Cause all warnings to always be triggered.
warnings.simplefilter("always")
old_history_format = m.history["old_history_format"]

# Expect exactly one warning about deprecated history format
assert len(w) == 1
assert issubclass(w[-1].category, DeprecationWarning)
assert "deprecated" in str(w[-1].message)

assert old_history_format == {"foo": "bar"}


class TestUnicodeDataset(unittest.TestCase):
Expand Down

0 comments on commit fbc5034

Please sign in to comment.