Skip to content

Commit

Permalink
feat(task): modify default behaviour for task compression argument.
Browse files Browse the repository at this point in the history
The expected behaviour of the compression argument was to overwrite
compression args if a dict with those args was provided, to disable
compression if something which evaluates to False was provided, and use
the default parameters for the container otherwise. The behaviour
previously would instead disable compression if no args were provided.
  • Loading branch information
ljgray committed Mar 23, 2023
1 parent 173236e commit 7696a99
Showing 1 changed file with 13 additions and 7 deletions.
20 changes: 13 additions & 7 deletions draco/core/task.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,10 +259,11 @@ class SingleTask(MPILoggedTask, pipeline.BasicContMixin):
compression : dict or bool, optional
Set compression options for each dataset. Provided as a dict with the dataset
names as keys and values for `chunks`, `compression`, and `compression_opts`.
If set to `False`, chunks and compression will be disabled for all datasets.
Otherwise, the default parameters set in the dataset spec are used.
Note that this will modify these parameters on the container itself, such that
if it is written out again downstream in the pipeline these will be used.
If set to `False` (or anything that evaluates to `False`, such as an empty dict),
chunks and compression will be disabled for all datasets. Otherwise, the default
parameters set in the dataset spec are used. Note that this will modify these
parameters on the container itself, such that if it is written out again downstream
in the pipeline these will be used.
output_root : string
Pipeline settable parameter giving the first part of the output path.
Deprecated in favour of `output_name`.
Expand Down Expand Up @@ -291,7 +292,7 @@ class SingleTask(MPILoggedTask, pipeline.BasicContMixin):
output_name = config.Property(default="{output_root}{tag}.h5", proptype=str)
output_format = config.file_format()
compression = config.Property(
default={}, proptype=lambda x: x if isinstance(x, dict) else bool(x)
default=True, proptype=lambda x: x if isinstance(x, dict) else bool(x)
)

nan_check = config.Property(default=True, proptype=bool)
Expand Down Expand Up @@ -445,11 +446,13 @@ def walk_dset_tree(grp, root=""):
return datasets

if not self.compression:
# Disable compression
for ds in walk_dset_tree(output):
output._data._storage_root[ds].chunks = None
output._data._storage_root[ds].compression = None
output._data._storage_root[ds].compression_opts = None
else:
elif isinstance(self.compression, dict):
# We want to overwrite some compression settings
datasets = walk_dset_tree(output)
for ds in self.compression:
if ds in datasets:
Expand All @@ -459,7 +462,10 @@ def walk_dset_tree(grp, root=""):
)
setattr(output._data._storage_root[ds], key, val)
# shorthand for bitshuffle
if output[ds].compression in ("bitshuffle", fileformats.H5FILTER):
if output[ds].compression in (
"bitshuffle",
fileformats.H5FILTER,
):
output[ds].compression = fileformats.H5FILTER
if output[ds].compression_opts is None:
output._data._storage_root[ds].compression_opts = (
Expand Down

0 comments on commit 7696a99

Please sign in to comment.