Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sept 23 upstream 5 0.2.122 #306

Merged
merged 17 commits into from
Sep 19, 2023
Merged
Show file tree
Hide file tree
Changes from 16 commits
Commits
Show all changes
17 commits
Select commit Hold shift + click to select a range
4d0379b
[batch] Use the test and test-dev GSAs in test_batch not CI (#13562)
daniel-goldstein Sep 6, 2023
4fb659c
[qob][batch] do not list all jobs on failure (plus: types!) (#13500)
danking Sep 6, 2023
7f65516
[compiler, lir] don’t compute unused loop regions (#13566)
patrick-schultz Sep 6, 2023
7343e9c
[release] 0.2.121 (#13529)
danking Sep 6, 2023
e0bf6e6
[query] MT.tail should prefer `n_rows` to `n`. (#13508)
danking Sep 6, 2023
56f6231
[dataproc] restore parallel fetching of resources (#13574)
danking Sep 6, 2023
35da6f4
[test-dataproc] make test-dataproc-37 and -38 not race (#13573)
danking Sep 6, 2023
b4912b5
[ci] Clean up temporary config for OAuth changes (#13570)
daniel-goldstein Sep 7, 2023
6cdd8f7
[deps] update rich (in query) and numpy (in batch) (#13572)
danking Sep 7, 2023
8586275
[batch] Allow hailgenetics/vep images to be public (#13565)
jigold Sep 7, 2023
9cd874c
[security] update scipy pin to 1.11.1 (#13571)
danking Sep 7, 2023
7fb6008
[query] fix transposed blanczos (#13552)
patrick-schultz Sep 7, 2023
c160d3e
[query] fix shadowing of field names by methods (#13498)
patrick-schultz Sep 7, 2023
be9d88a
[hailtop] Fix defaulting to hail.is when the user has no deploy-confi…
daniel-goldstein Sep 7, 2023
f11ef53
Merge commit 'be9d88a' into sept-23-upstream-5-0.2.122
illusional Sep 8, 2023
a56649f
TF: Replace google_storage_bucket. with module.
illusional Sep 12, 2023
e53807d
Merge branch 'main' of github.com:populationgenomics/hail into sept-2…
illusional Sep 19, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 5 additions & 6 deletions batch/batch/batch.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,9 @@
import json
import logging
from typing import Any, Dict, Optional
from typing import Any, Dict, List, Optional

from gear import transaction
from hailtop.batch_client.types import CostBreakdownEntry, JobListEntryV1Alpha
from hailtop.utils import humanize_timedelta_msecs, time_msecs_str

from .batch_format_version import BatchFormatVersion
Expand All @@ -12,7 +13,7 @@
log = logging.getLogger('batch')


def cost_breakdown_to_dict(cost_breakdown: dict):
def cost_breakdown_to_dict(cost_breakdown: Dict[str, float]) -> List[CostBreakdownEntry]:
return [{'resource': resource, 'cost': cost} for resource, cost in cost_breakdown.items()]


Expand Down Expand Up @@ -75,7 +76,7 @@ def _time_msecs_str(t):
return d


def job_record_to_dict(record: Dict[str, Any], name: Optional[str]) -> Dict[str, Any]:
def job_record_to_dict(record: Dict[str, Any], name: Optional[str]) -> JobListEntryV1Alpha:
format_version = BatchFormatVersion(record['format_version'])

db_status = record['status']
Expand All @@ -89,7 +90,7 @@ def job_record_to_dict(record: Dict[str, Any], name: Optional[str]) -> Dict[str,
if record['cost_breakdown'] is not None:
record['cost_breakdown'] = cost_breakdown_to_dict(json.loads(record['cost_breakdown']))

result = {
return {
'batch_id': record['batch_id'],
'job_id': record['job_id'],
'name': name,
Expand All @@ -103,8 +104,6 @@ def job_record_to_dict(record: Dict[str, Any], name: Optional[str]) -> Dict[str,
'cost_breakdown': record['cost_breakdown'],
}

return result


async def cancel_batch_in_db(db, batch_id):
@transaction(db)
Expand Down
4 changes: 3 additions & 1 deletion batch/batch/batch_format_version.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from typing import Optional, Tuple

from hailtop.batch_client.aioclient import Job


Expand Down Expand Up @@ -117,7 +119,7 @@ def db_status(self, status):

return [ec, duration]

def get_status_exit_code_duration(self, status):
def get_status_exit_code_duration(self, status) -> Tuple[Optional[int], Optional[int]]:
if self.format_version == 1:
job_status = {'status': status}
return (Job.exit_code(job_status), Job.total_duration_msecs(job_status))
Expand Down
54 changes: 32 additions & 22 deletions batch/batch/front_end/front_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@
import traceback
from functools import wraps
from numbers import Number
from typing import Any, Awaitable, Callable, Dict, List, NoReturn, Optional, Tuple, TypeVar, Union
from typing import Any, Awaitable, Callable, Dict, List, NoReturn, Optional, Tuple, TypeVar, Union, cast

import aiohttp
import aiohttp.web_exceptions
Expand Down Expand Up @@ -45,6 +45,7 @@
from gear.profiling import install_profiler_if_requested
from hailtop import aiotools, dictfix, httpx, version
from hailtop.batch_client.parse import parse_cpu_in_mcpu, parse_memory_in_bytes, parse_storage_in_bytes
from hailtop.batch_client.types import GetJobResponseV1Alpha, GetJobsResponseV1Alpha, JobListEntryV1Alpha
from hailtop.config import get_deploy_config
from hailtop.hail_logging import AccessLogger
from hailtop.tls import internal_server_ssl_context
Expand Down Expand Up @@ -341,7 +342,9 @@ async def _query_batch_jobs_for_billing(request, batch_id):
return jobs, last_job_id


async def _query_batch_jobs(request: web.Request, batch_id: int, version: int, q: str, last_job_id: Optional[int]):
async def _query_batch_jobs(
request: web.Request, batch_id: int, version: int, q: str, last_job_id: Optional[int]
) -> Tuple[List[JobListEntryV1Alpha], Optional[int]]:
db: Database = request.app['db']
if version == 1:
sql, sql_args = parse_batch_jobs_query_v1(batch_id, q, last_job_id)
Expand Down Expand Up @@ -416,7 +419,9 @@ async def get_completed_batches_ordered_by_completed_time(request, userdata):
body['last_completed_timestamp'] = last_completed_timestamp
return web.json_response(body)

async def _get_jobs(request, batch_id: int, version: int, q: str, last_job_id: Optional[int]):
async def _get_jobs(
request: web.Request, batch_id: int, version: int, q: str, last_job_id: Optional[int]
) -> GetJobsResponseV1Alpha:
db = request.app['db']

record = await db.select_and_fetchone(
Expand Down Expand Up @@ -1945,7 +1950,7 @@ async def ui_batches(request: web.Request, userdata: UserData) -> web.Response:
return await render_template('batch', request, userdata, 'batches.html', page_context)


async def _get_job(app, batch_id, job_id):
async def _get_job(app, batch_id, job_id) -> GetJobResponseV1Alpha:
db: Database = app['db']

record = await db.select_and_fetchone(
Expand Down Expand Up @@ -1992,9 +1997,11 @@ async def _get_job(app, batch_id, job_id):
_get_full_job_status(app, record), _get_full_job_spec(app, record), _get_attributes(app, record)
)

job = job_record_to_dict(record, attributes.get('name'))
job['status'] = full_status
job['spec'] = full_spec
job: GetJobResponseV1Alpha = {
**job_record_to_dict(record, attributes.get('name')),
'status': full_status,
'spec': full_spec,
}
if attributes:
job['attributes'] = attributes
return job
Expand Down Expand Up @@ -2278,6 +2285,8 @@ async def ui_get_job(request, userdata, batch_id):
_get_job_resource_usage(app, batch_id, job_id),
)

job = cast(Dict[str, Any], job)

job['duration'] = humanize_timedelta_msecs(job['duration'])
job['cost'] = cost_str(job['cost'])

Expand Down Expand Up @@ -2337,21 +2346,22 @@ async def ui_get_job(request, userdata, batch_id):
non_io_storage_limit_bytes = None
memory_limit_bytes = None

resources = job_specification['resources']
if 'memory_bytes' in resources:
memory_limit_bytes = resources['memory_bytes']
resources['actual_memory'] = humanize.naturalsize(memory_limit_bytes, binary=True)
del resources['memory_bytes']
if 'storage_gib' in resources:
io_storage_limit_bytes = resources['storage_gib'] * 1024**3
resources['actual_storage'] = humanize.naturalsize(io_storage_limit_bytes, binary=True)
del resources['storage_gib']
if 'cores_mcpu' in resources:
cores = resources['cores_mcpu'] / 1000
non_io_storage_limit_gb = min(cores * RESERVED_STORAGE_GB_PER_CORE, RESERVED_STORAGE_GB_PER_CORE)
non_io_storage_limit_bytes = int(non_io_storage_limit_gb * 1024**3 + 1)
resources['actual_cpu'] = cores
del resources['cores_mcpu']
if job_specification is not None:
resources = job_specification['resources']
if 'memory_bytes' in resources:
memory_limit_bytes = resources['memory_bytes']
resources['actual_memory'] = humanize.naturalsize(memory_limit_bytes, binary=True)
del resources['memory_bytes']
if 'storage_gib' in resources:
io_storage_limit_bytes = resources['storage_gib'] * 1024**3
resources['actual_storage'] = humanize.naturalsize(io_storage_limit_bytes, binary=True)
del resources['storage_gib']
if 'cores_mcpu' in resources:
cores = resources['cores_mcpu'] / 1000
non_io_storage_limit_gb = min(cores * RESERVED_STORAGE_GB_PER_CORE, RESERVED_STORAGE_GB_PER_CORE)
non_io_storage_limit_bytes = int(non_io_storage_limit_gb * 1024**3 + 1)
resources['actual_cpu'] = cores
del resources['cores_mcpu']

# Not all logs will be proper utf-8 but we attempt to show them as
# str or else Jinja will present them surrounded by b''
Expand Down
2 changes: 1 addition & 1 deletion batch/batch/globals.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

BATCH_FORMAT_VERSION = 7
STATUS_FORMAT_VERSION = 5
INSTANCE_VERSION = 25
INSTANCE_VERSION = 26

MAX_PERSISTENT_SSD_SIZE_GIB = 64 * 1024
RESERVED_STORAGE_GB_PER_CORE = 5
12 changes: 3 additions & 9 deletions batch/batch/publicly_available_images.py
Original file line number Diff line number Diff line change
@@ -1,13 +1,7 @@
from typing import List

from hailtop.batch.hail_genetics_images import HAIL_GENETICS_IMAGES


def publicly_available_images(docker_prefix: str) -> List[str]:
return [
f'{docker_prefix}/hailgenetics/{name}'
for name in (
'hail',
'hailtop',
'genetics',
'python-dill',
)
]
return [docker_prefix + '/' + image_name for image_name in HAIL_GENETICS_IMAGES]
2 changes: 1 addition & 1 deletion batch/pinned-requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ python-dateutil==2.8.2
# -c hail/batch/../hail/python/dev/pinned-requirements.txt
# -c hail/batch/../hail/python/pinned-requirements.txt
# pandas
pytz==2023.3
pytz==2023.3.post1
# via
# -c hail/batch/../hail/python/pinned-requirements.txt
# pandas
Expand Down
2 changes: 1 addition & 1 deletion batch/test/billing_projects.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@ def get_billing_project_prefix():

async def delete_all_test_billing_projects():
billing_project_prefix = get_billing_project_prefix()
bc = await BatchClient.create('', token_file=os.environ['HAIL_TEST_DEV_TOKEN_FILE'])
bc = await BatchClient.create('', cloud_credentials_file=os.environ['HAIL_TEST_DEV_GSA_KEY_FILE'])
try:
for project in await bc.list_billing_projects():
if project['billing_project'].startswith(billing_project_prefix):
Expand Down
5 changes: 3 additions & 2 deletions batch/test/test_accounts.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ async def make_client() -> AsyncGenerator[Callable[[str], Awaitable[BatchClient]
_bcs = []

async def factory(project: str):
bc = await BatchClient.create(project, token_file=os.environ['HAIL_TEST_TOKEN_FILE'])
bc = await BatchClient.create(project, cloud_credentials_file=os.environ['HAIL_TEST_GSA_KEY_FILE'])
_bcs.append(bc)
return bc

Expand All @@ -36,7 +36,8 @@ async def factory(project: str):
@pytest.fixture
async def dev_client() -> AsyncGenerator[BatchClient, Any]:
bc = await BatchClient.create(
'billing-project-not-needed-but-required-by-BatchClient', token_file=os.environ['HAIL_TEST_DEV_TOKEN_FILE']
'billing-project-not-needed-but-required-by-BatchClient',
cloud_credentials_file=os.environ['HAIL_TEST_DEV_GSA_KEY_FILE'],
)
yield bc
await bc.close()
Expand Down
Loading