Misc fixes based on my own PR comments

radiocosmology · Sep 17, 2021 · b553d92 · b553d92
1 parent 267a8ce
commit b553d92
Show file tree

Hide file tree

Showing 3 changed files with 26 additions and 8 deletions.
diff --git a/caput/memh5.py b/caput/memh5.py
@@ -425,7 +425,7 @@ def from_hdf5(
         selections=None,
         convert_dataset_strings=False,
         convert_attribute_strings=True,
-        **_,
+        **kwargs,
     ):
         """Create a new instance by copying from an hdf5 group.
 
@@ -466,6 +466,7 @@ def from_hdf5(
             convert_dataset_strings,
             convert_attribute_strings,
             file_format=fileformats.HDF5,
+            **kwargs,
         )
 
     @classmethod
@@ -576,7 +577,14 @@ def to_hdf5(
             understands. Default is `True`.
         convert_dataset_strings : bool, optional
             Try and convert dataset string types to bytestrings. Default is `False`.
+        compression : str
+            Name or identifier of HDF5 compression filter for the dataset.
+        compression_opts
+            Compression options for the dataset.
+            See HDF5 documentation for compression filters.
+
         """
+
         self.to_file(
             filename,
             mode,
@@ -2467,6 +2475,12 @@ def deep_group_copy(
             except AttributeError:
                 selection = slice(None)
 
+            # only the case if zarr is not installed
+            if file_format.module == None:
+                raise RuntimeError(
+                    "Can't deep_group_copy zarr file. Please install zarr."
+                )
+
             if convert_dataset_strings:
                 # Convert unicode strings back into ascii byte strings. This will break
                 # if there are characters outside of the ascii range
@@ -2598,8 +2612,10 @@ def _distributed_group_to_hdf5_serial(
                 fname,
                 entry.name,
                 chunks=entry.chunks,
-                compression=entry.compression,
-                compression_opts=entry.compression_opts,
+                **fileformats.HDF5.compression_kwargs(
+                    compression=entry.compression,
+                    compression_opts=entry.compression_opts,
+                ),
             )
 
         comm.Barrier()
@@ -2626,8 +2642,10 @@ def _distributed_group_to_hdf5_serial(
                         entry.name,
                         data=data,
                         chunks=entry.chunks,
-                        compression=entry.compression,
-                        compression_opts=entry.compression_opts,
+                        **fileformats.HDF5.compression_kwargs(
+                            compression=entry.compression,
+                            compression_opts=entry.compression_opts,
+                        ),
                     )
                     copyattrs(
                         entry.attrs,
@@ -2900,7 +2918,7 @@ def _distributed_group_from_file(
     **kwargs,
 ):
     """
-    Restore full tree from an HDF5 file into a distributed memh5 object.
+    Restore full tree from an HDF5 file or Zarr group into a distributed memh5 object.
 
     A `selections=` parameter may be supplied as parts of 'kwargs'. See
     `_deep_group_copy' for a description.

diff --git a/caput/mpiarray.py b/caput/mpiarray.py
@@ -637,7 +637,7 @@ def from_hdf5(cls, f, dataset, comm=None, axis=0, sel=None):
     def from_file(
         cls, f, dataset, comm=None, axis=0, sel=None, file_format=fileformats.HDF5
     ):
-        """Read MPIArray from an HDF5 dataset in parallel.
+        """Read MPIArray from an HDF5 dataset or Zarr array on disk in parallel.
 
         Parameters
         ----------

diff --git a/caput/pipeline.py b/caput/pipeline.py
@@ -1266,7 +1266,7 @@ def read_process_write(self, input, input_filename, output_filename):
             output_dirname = os.path.dirname(output_filename)
             if not os.path.isdir(output_dirname):
                 os.makedirs(output_dirname)
-            self.write_output(
+            _OneAndOne.write_output(
                 output_filename,
                 output,
                 file_format=self.output_format,