Skip to content

Commit

Permalink
Add representative rechunking workflow (#1536)
Browse files Browse the repository at this point in the history
  • Loading branch information
jrbourbeau authored Sep 5, 2024
1 parent 5c99386 commit c626d45
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,11 @@ jobs:
echo PYTEST_MARKERS=${PYTEST_MARKERS} >> $GITHUB_ENV
fi
- name: Google auth
uses: "google-github-actions/auth@v2"
with:
credentials_json: "${{ secrets.GCP_CREDENTIALS }}"

- name: Run Coiled Runtime Tests
env:
DASK_COILED__TOKEN: ${{ secrets.COILED_BENCHMARK_BOT_TOKEN }}
Expand Down
28 changes: 28 additions & 0 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
import dask_expr
import distributed
import filelock
import gcsfs
import pandas
import pytest
import s3fs
Expand Down Expand Up @@ -677,6 +678,33 @@ def s3_cluster_dump_url(s3, s3_scratch):
return dump_url


GCS_REGION = "us-central1"
GCS_BUCKET = "gs://coiled-oss-scratch/benchmarks-bot"


@pytest.fixture(scope="session")
def gcs():
return gcsfs.GCSFileSystem()


@pytest.fixture(scope="session")
def gcs_scratch(gcs):
# Ensure that the test-scratch directory exists,
# but do NOT remove it as multiple test runs could be
# accessing it at the same time
scratch_url = f"{GCS_BUCKET}/test-scratch"
gcs.mkdirs(scratch_url, exist_ok=True)
return scratch_url


@pytest.fixture(scope="function")
def gcs_url(gcs, gcs_scratch, test_name_uuid):
url = f"{gcs_scratch}/{test_name_uuid}"
gcs.mkdirs(url, exist_ok=False)
yield url
gcs.rm(url, recursive=True)


# this code was taken from pytest docs
# https://docs.pytest.org/en/latest/example/simple.html#making-test-result-information-available-in-fixtures
@pytest.hookimpl(tryfirst=True, hookwrapper=True)
Expand Down
69 changes: 69 additions & 0 deletions tests/geospatial/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
import os
import uuid
from typing import Any, Literal

import coiled
import pytest


def pytest_addoption(parser):
parser.addoption(
"--scale",
action="store",
default="small",
help="Scale to run. Either 'small' or 'large'",
)


@pytest.fixture(scope="session")
def scale(request):
return request.config.getoption("scale")


def get_cluster_spec(scale: Literal["small", "large"]) -> dict[str, Any]:
everywhere = dict(
workspace="dask-engineering-gcp",
region="us-central1",
wait_for_workers=True,
spot_policy="on-demand",
)

if scale == "small":
return {
"n_workers": 10,
**everywhere,
}
elif scale == "large":
return {
"n_workers": 100,
**everywhere,
}


@pytest.fixture(scope="module")
def cluster_name(request, scale):
module = os.path.basename(request.fspath).split(".")[0]
module = module.replace("test_", "")
return f"geospatial-{module}-{scale}-{uuid.uuid4().hex[:8]}"


@pytest.fixture(scope="module")
def cluster(
cluster_name,
scale,
github_cluster_tags,
):
kwargs = dict(
name=cluster_name,
tags=github_cluster_tags,
**get_cluster_spec(scale),
)
with coiled.Cluster(**kwargs) as cluster:
yield cluster


@pytest.fixture()
def client(cluster, benchmark_all):
with cluster.get_client() as client:
with benchmark_all(client):
yield client
25 changes: 25 additions & 0 deletions tests/geospatial/test_rechunking.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
import pytest
import xarray as xr
from coiled.credentials.google import CoiledShippedCredentials


@pytest.mark.client("era5_rechunking")
def test_era5_rechunking(client, gcs_url, scale):
# Load dataset
ds = xr.open_zarr(
"gs://weatherbench2/datasets/era5/1959-2023_01_10-full_37-1h-0p25deg-chunk-1.zarr",
).drop_encoding()

if scale == "small":
# 101.83 GiB (small)
time_range = slice("2020-01-01", "2023-01-01")
else:
# 2.12 TiB (large)
time_range = slice(None)
subset = ds.sea_surface_temperature.sel(time=time_range)

# Rechunk
result = subset.chunk({"time": -1, "longitude": "auto", "latitude": "auto"})

# Write result to cloud storage
result.to_zarr(gcs_url, storage_options={"token": CoiledShippedCredentials()})

0 comments on commit c626d45

Please sign in to comment.