Skip to content

Commit

Permalink
Avoid unnecessary copy to intermediate store (#150)
Browse files Browse the repository at this point in the history
* Fix wasted compute

* Add test unit

* [pre-commit.ci] auto fixes from pre-commit.com hooks

for more information, see https://pre-commit.ci

---------

Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
  • Loading branch information
ghiggi and pre-commit-ci[bot] committed Feb 22, 2024
1 parent 0ac43e8 commit a22f556
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 2 deletions.
2 changes: 1 addition & 1 deletion rechunker/algorithm.py
Original file line number Diff line number Diff line change
Expand Up @@ -294,7 +294,7 @@ def multistage_rechunking_plan(
"achieving the minimum memory requirement due to increasing IO "
f"requirements. Smallest intermediates have size {int_mem}. "
f"Consider decreasing min_mem ({min_mem}) or increasing "
f"({max_mem}) to find a more efficient plan.",
f"max_mem ({max_mem}) to find a more efficient plan.",
category=ExcessiveIOWarning,
)
assert prev_plan is not None
Expand Down
2 changes: 1 addition & 1 deletion rechunker/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -609,7 +609,7 @@ def _setup_array_rechunk(
except AttributeError:
pass

if read_chunks == write_chunks:
if read_chunks == write_chunks or read_chunks == int_chunks:
int_array = None
else:
# do intermediate store
Expand Down
21 changes: 21 additions & 0 deletions tests/test_rechunk.py
Original file line number Diff line number Diff line change
Expand Up @@ -792,6 +792,27 @@ def test_no_intermediate_fused(tmp_path):
assert num_tasks < 20 # less than if no fuse


def test_no_intermediate_store(tmp_path):
"""Test behaviour when read_chunks == int_chunks."""
shape = (1000, 2000, 2000)
source_chunks = (1, 2000, 2000)
dtype = "f4"
max_mem = 20000000000
target_chunks = (1000, 4, 4)

store_source = str(tmp_path / "source.zarr")
source_array = zarr.ones(
shape, chunks=source_chunks, dtype=dtype, store=store_source
)

target_store = str(tmp_path / "target.zarr")
temp_store = str(tmp_path / "temp_store.zarr")
rechunked = api.rechunk(
source_array, target_chunks, max_mem, target_store, temp_store=temp_store
)
assert "Intermediate" not in repr(rechunked)


def test_rechunk_array_to_group_no_name(tmp_path):
a = sample_zarr_array(tmp_path)
target_chunks = a.chunks
Expand Down

0 comments on commit a22f556

Please sign in to comment.