diff --git a/auth/auth/auth.py b/auth/auth/auth.py index 0ddf87686bb..a07229d7f32 100644 --- a/auth/auth/auth.py +++ b/auth/auth/auth.py @@ -7,6 +7,9 @@ import aiohttp import aiohttp_session +import kubernetes_asyncio.client +import kubernetes_asyncio.client.rest +import kubernetes_asyncio.config import uvloop from aiohttp import web from prometheus_async.aio.web import server_stats # type: ignore @@ -14,6 +17,7 @@ from gear import ( AuthClient, Database, + K8sCache, Transaction, check_csrf_token, create_session, @@ -53,6 +57,9 @@ CLOUD = get_global_config()['cloud'] ORGANIZATION_DOMAIN = os.environ['HAIL_ORGANIZATION_DOMAIN'] +DEFAULT_NAMESPACE = os.environ['HAIL_DEFAULT_NAMESPACE'] + +is_test_deployment = DEFAULT_NAMESPACE != 'default' deploy_config = get_deploy_config() @@ -124,7 +131,14 @@ async def check_valid_new_user(tx: Transaction, username, login_id, is_developer async def insert_new_user( - db: Database, username: str, login_id: Optional[str], is_developer: bool, is_service_account: bool + db: Database, + username: str, + login_id: Optional[str], + is_developer: bool, + is_service_account: bool, + *, + hail_identity: Optional[str] = None, + hail_credentials_secret_name: Optional[str] = None, ) -> bool: @transaction(db) async def _insert(tx): @@ -134,10 +148,18 @@ async def _insert(tx): await tx.execute_insertone( ''' -INSERT INTO users (state, username, login_id, is_developer, is_service_account) -VALUES (%s, %s, %s, %s, %s); +INSERT INTO users (state, username, login_id, is_developer, is_service_account, hail_identity, hail_credentials_secret_name) +VALUES (%s, %s, %s, %s, %s, %s, %s); ''', - ('creating', username, login_id, is_developer, is_service_account), + ( + 'creating', + username, + login_id, + is_developer, + is_service_account, + hail_identity, + hail_credentials_secret_name, + ), ) await _insert() # pylint: disable=no-value-for-parameter @@ -367,8 +389,29 @@ async def create_user(request: web.Request, userdata): # pylint: disable=unused is_developer = body['is_developer'] is_service_account = body['is_service_account'] + hail_identity = body.get('hail_identity') + hail_credentials_secret_name = body.get('hail_credentials_secret_name') + if (hail_identity or hail_credentials_secret_name) and not is_test_deployment: + raise web.HTTPBadRequest(text='Cannot specify an existing hail identity for a new user') + if hail_credentials_secret_name: + try: + k8s_cache: K8sCache = request.app['k8s_cache'] + await k8s_cache.read_secret(hail_credentials_secret_name, DEFAULT_NAMESPACE) + except kubernetes_asyncio.client.rest.ApiException as e: + raise web.HTTPBadRequest( + text=f'hail credentials secret name specified but was not found in namespace {DEFAULT_NAMESPACE}: {hail_credentials_secret_name}' + ) from e + try: - await insert_new_user(db, username, login_id, is_developer, is_service_account) + await insert_new_user( + db, + username, + login_id, + is_developer, + is_service_account, + hail_identity=hail_identity, + hail_credentials_secret_name=hail_credentials_secret_name, + ) except AuthUserError as e: raise e.http_response() @@ -750,12 +793,20 @@ async def on_startup(app): app['client_session'] = httpx.client_session() app['flow_client'] = get_flow_client('/auth-oauth2-client-secret/client_secret.json') + kubernetes_asyncio.config.load_incluster_config() + app['k8s_client'] = kubernetes_asyncio.client.CoreV1Api() + app['k8s_cache'] = K8sCache(app['k8s_client']) + async def on_cleanup(app): try: - await app['db'].async_close() + k8s_client: kubernetes_asyncio.client.CoreV1Api = app['k8s_client'] + await k8s_client.api_client.rest_client.pool_manager.close() finally: - await app['client_session'].close() + try: + await app['db'].async_close() + finally: + await app['client_session'].close() class AuthAccessLogger(AccessLogger): diff --git a/auth/auth/driver/driver.py b/auth/auth/driver/driver.py index 598155feb6b..94caac920d4 100644 --- a/auth/auth/driver/driver.py +++ b/auth/auth/driver/driver.py @@ -4,8 +4,7 @@ import logging import os import random -import secrets -from typing import Any, Awaitable, Callable, Dict, List, Optional +from typing import Any, Awaitable, Callable, Dict, List import aiohttp import kubernetes_asyncio.client @@ -17,7 +16,6 @@ from gear.cloud_config import get_gcp_config, get_global_config from hailtop import aiotools, httpx from hailtop import batch_client as bc -from hailtop.auth.sql_config import SQLConfig, create_secret_data_from_config from hailtop.utils import secret_alnum_string, time_msecs log = logging.getLogger('auth.driver') @@ -34,7 +32,7 @@ class DatabaseConflictError(Exception): class EventHandler: - def __init__(self, handler, event=None, bump_secs=60.0, min_delay_secs=0.1): + def __init__(self, handler, event=None, bump_secs=5.0, min_delay_secs=0.1): self.handler = handler if event is None: event = asyncio.Event() @@ -234,86 +232,6 @@ async def delete(self): self.app_obj_id = None -class DatabaseResource: - def __init__(self, db_instance, name=None): - self.db_instance = db_instance - self.name = name - self.password = None - - async def create(self, name): - assert self.name is None - - if is_test_deployment: - return - - await self._delete(name) - - self.password = secrets.token_urlsafe(16) - await self.db_instance.just_execute( - f''' -CREATE DATABASE `{name}`; - -CREATE USER '{name}'@'%' IDENTIFIED BY '{self.password}'; -GRANT ALL ON `{name}`.* TO '{name}'@'%'; -''' - ) - self.name = name - - def secret_data(self): - with open('/database-server-config/sql-config.json', 'r', encoding='utf-8') as f: - server_config = SQLConfig.from_json(f.read()) - with open('/database-server-config/server-ca.pem', 'r', encoding='utf-8') as f: - server_ca = f.read() - client_cert: Optional[str] - client_key: Optional[str] - if server_config.using_mtls(): - with open('/database-server-config/client-cert.pem', 'r', encoding='utf-8') as f: - client_cert = f.read() - with open('/database-server-config/client-key.pem', 'r', encoding='utf-8') as f: - client_key = f.read() - else: - client_cert = None - client_key = None - - if is_test_deployment: - return create_secret_data_from_config(server_config, server_ca, client_cert, client_key) - - assert self.name is not None - assert self.password is not None - - config = SQLConfig( - host=server_config.host, - port=server_config.port, - user=self.name, - password=self.password, - instance=server_config.instance, - connection_name=server_config.connection_name, - db=self.name, - ssl_ca='/sql-config/server-ca.pem', - ssl_cert='/sql-config/client-cert.pem' if client_cert is not None else None, - ssl_key='/sql-config/client-key.pem' if client_key is not None else None, - ssl_mode='VERIFY_CA', - ) - return create_secret_data_from_config(config, server_ca, client_cert, client_key) - - async def _delete(self, name): - if is_test_deployment: - return - - # no DROP USER IF EXISTS in current db version - row = await self.db_instance.execute_and_fetchone('SELECT 1 FROM mysql.user WHERE User = %s;', (name,)) - if row is not None: - await self.db_instance.just_execute(f"DROP USER '{name}';") - - await self.db_instance.just_execute(f'DROP DATABASE IF EXISTS `{name}`;') - - async def delete(self): - if self.name is None: - return - await self._delete(self.name) - self.name = None - - class K8sNamespaceResource: def __init__(self, k8s_client, name=None): self.k8s_client = k8s_client @@ -410,7 +328,6 @@ async def delete(self): async def _create_user(app, user, skip_trial_bp, cleanup): - db_instance = app['db_instance'] db = app['db'] k8s_client = app['k8s_client'] identity_client = app['identity_client'] @@ -481,21 +398,14 @@ async def _create_user(app, user, skip_trial_bp, cleanup): updates['hail_credentials_secret_name'] = hail_credentials_secret_name namespace_name = user['namespace_name'] - if namespace_name is None and user['is_developer'] == 1: + # auth services in test namespaces cannot/should not be creating and deleting namespaces + if namespace_name is None and user['is_developer'] == 1 and not is_test_deployment: namespace_name = ident namespace = K8sNamespaceResource(k8s_client) cleanup.append(namespace.delete) await namespace.create(namespace_name) updates['namespace_name'] = namespace_name - db_resource = DatabaseResource(db_instance) - cleanup.append(db_resource.delete) - await db_resource.create(ident) - - db_secret = K8sSecretResource(k8s_client) - cleanup.append(db_secret.delete) - await db_secret.create('database-server-config', namespace_name, db_resource.secret_data()) - if not skip_trial_bp and user['is_service_account'] != 1: trial_bp = user['trial_bp_name'] if trial_bp is None: @@ -536,7 +446,6 @@ async def create_user(app, user, skip_trial_bp=False): async def delete_user(app, user): - db_instance = app['db_instance'] db = app['db'] k8s_client = app['k8s_client'] identity_client = app['identity_client'] @@ -572,9 +481,6 @@ async def delete_user(app, user): namespace = K8sNamespaceResource(k8s_client, namespace_name) await namespace.delete() - db_resource = DatabaseResource(db_instance, user['username']) - await db_resource.delete() - trial_bp_name = user['trial_bp_name'] if trial_bp_name is not None: batch_client = app['batch_client'] @@ -619,10 +525,6 @@ async def async_main(): app['client_session'] = httpx.client_session() - db_instance = Database() - await db_instance.async_init(maxsize=50, config_file='/database-server-config/sql-config.json') - app['db_instance'] = db_instance - kubernetes_asyncio.config.load_incluster_config() app['k8s_client'] = kubernetes_asyncio.client.CoreV1Api() @@ -647,18 +549,14 @@ async def users_changed_handler(): await app['db'].async_close() finally: try: - if 'db_instance_pool' in app: - await app['db_instance_pool'].async_close() + await app['client_session'].close() finally: try: - await app['client_session'].close() + if user_creation_loop is not None: + user_creation_loop.shutdown() finally: try: - if user_creation_loop is not None: - user_creation_loop.shutdown() + await app['identity_client'].close() finally: - try: - await app['identity_client'].close() - finally: - k8s_client: kubernetes_asyncio.client.CoreV1Api = app['k8s_client'] - await k8s_client.api_client.rest_client.pool_manager.close() + k8s_client: kubernetes_asyncio.client.CoreV1Api = app['k8s_client'] + await k8s_client.api_client.rest_client.pool_manager.close() diff --git a/batch/batch/cloud/azure/driver/create_instance.py b/batch/batch/cloud/azure/driver/create_instance.py index 2131124d977..70444c02168 100644 --- a/batch/batch/cloud/azure/driver/create_instance.py +++ b/batch/batch/cloud/azure/driver/create_instance.py @@ -3,9 +3,10 @@ import logging import os from shlex import quote as shq -from typing import Any, Dict, Optional +from typing import Any, Dict, List, Optional from gear.cloud_config import get_global_config +from hailtop.config import get_deploy_config from ....batch_configuration import DEFAULT_NAMESPACE, DOCKER_PREFIX, DOCKER_ROOT_IMAGE, INTERNAL_GATEWAY_IP from ....file_store import FileStore @@ -80,6 +81,15 @@ def create_vm_config( assert instance_config.is_valid_configuration(resource_rates.keys()) + touch_commands: List[str] = [] + for jvm_cores in (1, 2, 4, 8): + for _ in range(cores // jvm_cores): + idx = len(touch_commands) + log_path = f'/batch/jvm-container-logs/jvm-{idx}.log' + touch_commands.append(f'sudo touch {log_path}') + + jvm_touch_command = '\n'.join(touch_commands) + startup_script = r'''#cloud-config mounts: @@ -139,6 +149,9 @@ def create_vm_config( sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME/batch/ sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/batch /batch +sudo mkdir -p /batch/jvm-container-logs/ +{jvm_touch_command} + sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME/logs/ sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/logs /logs @@ -218,6 +231,11 @@ def create_vm_config( {make_global_config_str} +mkdir /deploy-config +cat >/deploy-config/deploy-config.json < AzureBillingManager: + return self._billing_manager + + @property + def inst_coll_manager(self) -> InstanceCollectionManager: + return self._inst_coll_manager async def shutdown(self) -> None: try: diff --git a/batch/batch/cloud/gcp/driver/create_instance.py b/batch/batch/cloud/gcp/driver/create_instance.py index d6c082c0396..c7a4f38c624 100644 --- a/batch/batch/cloud/gcp/driver/create_instance.py +++ b/batch/batch/cloud/gcp/driver/create_instance.py @@ -3,9 +3,10 @@ import logging import os from shlex import quote as shq -from typing import Dict +from typing import Dict, List from gear.cloud_config import get_global_config +from hailtop.config import get_deploy_config from ....batch_configuration import DEFAULT_NAMESPACE, DOCKER_PREFIX, DOCKER_ROOT_IMAGE, INTERNAL_GATEWAY_IP from ....file_store import FileStore @@ -75,6 +76,32 @@ def create_vm_config( assert instance_config.is_valid_configuration(resource_rates.keys()) + configs: List[str] = [] + touch_commands = [] + for jvm_cores in (1, 2, 4, 8): + for _ in range(cores // jvm_cores): + idx = len(configs) + log_path = f'/batch/jvm-container-logs/jvm-{idx}.log' + touch_commands.append(f'touch {log_path}') + + config = f''' + +@type tail + + # 'none' indicates the log is unstructured (text). + @type none + +path {log_path} +pos_file /var/lib/google-fluentd/pos/jvm-{idx}.pos +read_from_head true +tag jvm-{idx}.log + +''' + configs.append(config) + + jvm_fluentd_config = '\n'.join(configs) + jvm_touch_command = '\n'.join(touch_commands) + def scheduling() -> dict: result = { 'automaticRestart': False, @@ -156,6 +183,35 @@ def scheduling() -> dict: #!/bin/bash set -x +WORKER_DATA_DISK_NAME="{worker_data_disk_name}" +UNRESERVED_WORKER_DATA_DISK_SIZE_GB="{unreserved_disk_storage_gb}" +ACCEPTABLE_QUERY_JAR_URL_PREFIX="{ACCEPTABLE_QUERY_JAR_URL_PREFIX}" + +# format worker data disk +sudo mkfs.xfs -m reflink=1 -n ftype=1 /dev/$WORKER_DATA_DISK_NAME +sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME +sudo mount -o prjquota /dev/$WORKER_DATA_DISK_NAME /mnt/disks/$WORKER_DATA_DISK_NAME +sudo chmod a+w /mnt/disks/$WORKER_DATA_DISK_NAME +XFS_DEVICE=$(xfs_info /mnt/disks/$WORKER_DATA_DISK_NAME | head -n 1 | awk '{{ print $1 }}' | awk 'BEGIN {{ FS = "=" }}; {{ print $2 }}') + +# reconfigure docker to use local SSD +sudo service docker stop +sudo mv /var/lib/docker /mnt/disks/$WORKER_DATA_DISK_NAME/docker +sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/docker /var/lib/docker +sudo service docker start + +# reconfigure /batch and /logs and /gcsfuse to use local SSD +sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME/batch/ +sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/batch /batch + +sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME/logs/ +sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/logs /logs + +sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME/cloudfuse/ +sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/cloudfuse /cloudfuse + +sudo mkdir -p /etc/netns + # Setup fluentd touch /worker.log touch /run.log @@ -193,6 +249,10 @@ def scheduling() -> dict: EOF +sudo tee /etc/google-fluentd/config.d/jvm-logs.conf < dict: EOF rm /etc/google-fluentd/google-fluentd.conf.bak -sudo service google-fluentd restart - -WORKER_DATA_DISK_NAME="{worker_data_disk_name}" -UNRESERVED_WORKER_DATA_DISK_SIZE_GB="{unreserved_disk_storage_gb}" -ACCEPTABLE_QUERY_JAR_URL_PREFIX="{ACCEPTABLE_QUERY_JAR_URL_PREFIX}" - -# format worker data disk -sudo mkfs.xfs -m reflink=1 -n ftype=1 /dev/$WORKER_DATA_DISK_NAME -sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME -sudo mount -o prjquota /dev/$WORKER_DATA_DISK_NAME /mnt/disks/$WORKER_DATA_DISK_NAME -sudo chmod a+w /mnt/disks/$WORKER_DATA_DISK_NAME -XFS_DEVICE=$(xfs_info /mnt/disks/$WORKER_DATA_DISK_NAME | head -n 1 | awk '{{ print $1 }}' | awk 'BEGIN {{ FS = "=" }}; {{ print $2 }}') +mkdir -p /batch/jvm-container-logs/ +{jvm_touch_command} -# reconfigure docker to use local SSD -sudo service docker stop -sudo mv /var/lib/docker /mnt/disks/$WORKER_DATA_DISK_NAME/docker -sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/docker /var/lib/docker -sudo service docker start - -# reconfigure /batch and /logs and /gcsfuse to use local SSD -sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME/batch/ -sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/batch /batch - -sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME/logs/ -sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/logs /logs - -sudo mkdir -p /mnt/disks/$WORKER_DATA_DISK_NAME/cloudfuse/ -sudo ln -s /mnt/disks/$WORKER_DATA_DISK_NAME/cloudfuse /cloudfuse - -sudo mkdir -p /etc/netns +sudo service google-fluentd restart CORES=$(nproc) NAMESPACE=$(curl -s -H "Metadata-Flavor: Google" "http://metadata.google.internal/computeMetadata/v1/instance/attributes/namespace") @@ -278,6 +312,12 @@ def scheduling() -> dict: {make_global_config_str} +mkdir /deploy-config +cat >/deploy-config/deploy-config.json < dict: -v /batch:/batch:shared \ -v /logs:/logs \ -v /global-config:/global-config \ +-v /deploy-config:/deploy-config \ -v /cloudfuse:/cloudfuse:shared \ -v /etc/netns:/etc/netns \ -v /sys/fs/cgroup:/sys/fs/cgroup \ diff --git a/batch/batch/cloud/gcp/driver/driver.py b/batch/batch/cloud/gcp/driver/driver.py index 0d3af9a8106..ae506ed757f 100644 --- a/batch/batch/cloud/gcp/driver/driver.py +++ b/batch/batch/cloud/gcp/driver/driver.py @@ -137,9 +137,17 @@ def __init__( self.project = project self.namespace = namespace self.zone_monitor = zone_monitor - self.inst_coll_manager = inst_coll_manager self.job_private_inst_manager = job_private_inst_manager - self.billing_manager = billing_manager + self._billing_manager = billing_manager + self._inst_coll_manager = inst_coll_manager + + @property + def billing_manager(self) -> GCPBillingManager: + return self._billing_manager + + @property + def inst_coll_manager(self) -> InstanceCollectionManager: + return self._inst_coll_manager async def shutdown(self) -> None: try: diff --git a/batch/batch/driver/driver.py b/batch/batch/driver/driver.py index 0aa05c2b4b5..8302e6db0bb 100644 --- a/batch/batch/driver/driver.py +++ b/batch/batch/driver/driver.py @@ -3,9 +3,7 @@ from typing import Awaitable, Callable from gear import Database -from hailtop import aiotools -from ..inst_coll_config import InstanceCollectionConfigs from .billing_manager import CloudBillingManager from .instance_collection import InstanceCollectionManager @@ -25,20 +23,14 @@ async def process_outstanding_events(db: Database, process_events_since: Callabl class CloudDriver(abc.ABC): - inst_coll_manager: InstanceCollectionManager - billing_manager: CloudBillingManager + @property + @abc.abstractmethod + def inst_coll_manager(self) -> InstanceCollectionManager: + raise NotImplementedError - @staticmethod + @property @abc.abstractmethod - async def create( - app, - db: Database, - machine_name_prefix: str, - namespace: str, - inst_coll_configs: InstanceCollectionConfigs, - credentials_file: str, - task_manager: aiotools.BackgroundTaskManager, - ) -> 'CloudDriver': + def billing_manager(self) -> CloudBillingManager: raise NotImplementedError @abc.abstractmethod diff --git a/batch/batch/driver/job.py b/batch/batch/driver/job.py index d7b1ce876a5..411a802f730 100644 --- a/batch/batch/driver/job.py +++ b/batch/batch/driver/job.py @@ -8,7 +8,7 @@ import aiohttp -from gear import Database +from gear import Database, K8sCache from hailtop import httpx from hailtop.aiotools import BackgroundTaskManager from hailtop.utils import Notice, retry_transient_errors, time_msecs @@ -21,7 +21,6 @@ from ..instance_config import QuantifiedResource from ..spec_writer import SpecWriter from .instance import Instance -from .k8s_cache import K8sCache if TYPE_CHECKING: from .instance_collection import InstanceCollectionManager # pylint: disable=cyclic-import diff --git a/batch/batch/driver/main.py b/batch/batch/driver/main.py index 409a74c2e68..ae9c96908a9 100644 --- a/batch/batch/driver/main.py +++ b/batch/batch/driver/main.py @@ -25,6 +25,7 @@ from gear import ( AuthClient, Database, + K8sCache, check_csrf_token, json_request, json_response, @@ -59,7 +60,6 @@ from .driver import CloudDriver from .instance_collection import InstanceCollectionManager, JobPrivateInstanceManager, Pool from .job import mark_job_complete, mark_job_started -from .k8s_cache import K8sCache uvloop.install() diff --git a/batch/batch/exceptions.py b/batch/batch/exceptions.py index 1c8f32acd50..43a52451655 100644 --- a/batch/batch/exceptions.py +++ b/batch/batch/exceptions.py @@ -47,3 +47,12 @@ def __init__(self, message, severity): super().__init__(message) self.message = message self.ui_error_type = severity + + +class QueryError(BatchUserError): + def __init__(self, message): + super().__init__(message, 'error') + self.message = message + + def http_response(self): + return web.HTTPBadRequest(reason=self.message) diff --git a/batch/batch/front_end/front_end.py b/batch/batch/front_end/front_end.py index c486b275746..a0f0d7f7d35 100644 --- a/batch/batch/front_end/front_end.py +++ b/batch/batch/front_end/front_end.py @@ -74,6 +74,7 @@ ClosedBillingProjectError, InvalidBillingLimitError, NonExistentBillingProjectError, + QueryError, ) from ..file_store import FileStore from ..globals import BATCH_FORMAT_VERSION, HTTP_CLIENT_MAX_SIZE, RESERVED_STORAGE_GB_PER_CORE, complete_states @@ -81,6 +82,7 @@ from ..resource_usage import ResourceUsageMonitor from ..spec_writer import SpecWriter from ..utils import query_billing_projects, regions_to_bits_rep, unavailable_if_frozen +from .query import CURRENT_QUERY_VERSION, build_batch_jobs_query from .validate import ValidationError, validate_and_clean_jobs, validate_batch, validate_batch_update uvloop.install() @@ -216,7 +218,7 @@ async def _handle_ui_error(session, f, *args, **kwargs): async def _handle_api_error(f, *args, **kwargs): try: - await f(*args, **kwargs) + return await f(*args, **kwargs) except BatchOperationAlreadyCompletedError as e: log.info(e.message) return @@ -313,106 +315,9 @@ async def _query_batch_jobs_for_billing(request, batch_id): return jobs, last_job_id -async def _query_batch_jobs(request, batch_id): - state_query_values = { - 'pending': ['Pending'], - 'ready': ['Ready'], - 'creating': ['Creating'], - 'running': ['Running'], - 'live': ['Ready', 'Creating', 'Running'], - 'cancelled': ['Cancelled'], - 'error': ['Error'], - 'failed': ['Failed'], - 'bad': ['Error', 'Failed'], - 'success': ['Success'], - 'done': ['Cancelled', 'Error', 'Failed', 'Success'], - } - - db = request.app['db'] - - # batch has already been validated - where_conditions = ['(jobs.batch_id = %s AND batch_updates.committed)'] - where_args = [batch_id] - - last_job_id = request.query.get('last_job_id') - if last_job_id is not None: - last_job_id = int(last_job_id) - where_conditions.append('(jobs.job_id > %s)') - where_args.append(last_job_id) - - q = request.query.get('q', '') - terms = q.split() - for _t in terms: - if _t[0] == '!': - negate = True - t = _t[1:] - else: - negate = False - t = _t - - if '=' in t: - k, v = t.split('=', 1) - if k == 'job_id': - condition = '(jobs.job_id = %s)' - args = [v] - else: - condition = ''' -((jobs.batch_id, jobs.job_id) IN - (SELECT batch_id, job_id FROM job_attributes - WHERE `key` = %s AND `value` = %s)) -''' - args = [k, v] - elif t.startswith('has:'): - k = t[4:] - condition = ''' -((jobs.batch_id, jobs.job_id) IN - (SELECT batch_id, job_id FROM job_attributes - WHERE `key` = %s)) -''' - args = [k] - elif t in state_query_values: - values = state_query_values[t] - condition = ' OR '.join(['(jobs.state = %s)' for v in values]) - condition = f'({condition})' - args = values - else: - session = await aiohttp_session.get_session(request) - set_message(session, f'Invalid search term: {t}.', 'error') - return ([], None) - - if negate: - condition = f'(NOT {condition})' - - where_conditions.append(condition) - where_args.extend(args) - - sql = f''' -WITH base_t AS -( - SELECT jobs.*, batches.user, batches.billing_project, batches.format_version, - job_attributes.value AS name - FROM jobs - INNER JOIN batches ON jobs.batch_id = batches.id - INNER JOIN batch_updates ON jobs.batch_id = batch_updates.batch_id AND jobs.update_id = batch_updates.update_id - LEFT JOIN job_attributes - ON jobs.batch_id = job_attributes.batch_id AND - jobs.job_id = job_attributes.job_id AND - job_attributes.`key` = 'name' - WHERE {' AND '.join(where_conditions)} - LIMIT 50 -) -SELECT base_t.*, COALESCE(SUM(`usage` * rate), 0) AS cost -FROM base_t -LEFT JOIN ( - SELECT aggregated_job_resources_v2.batch_id, aggregated_job_resources_v2.job_id, resource_id, CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage` - FROM base_t - LEFT JOIN aggregated_job_resources_v2 ON base_t.batch_id = aggregated_job_resources_v2.batch_id AND base_t.job_id = aggregated_job_resources_v2.job_id - GROUP BY aggregated_job_resources_v2.batch_id, aggregated_job_resources_v2.job_id, aggregated_job_resources_v2.resource_id -) AS usage_t ON base_t.batch_id = usage_t.batch_id AND base_t.job_id = usage_t.job_id -LEFT JOIN resources ON usage_t.resource_id = resources.resource_id -GROUP BY base_t.batch_id, base_t.job_id; -''' - sql_args = where_args +async def _query_batch_jobs(request, batch_id: int, version: int, q: str, last_job_id: Optional[int]): + db: Database = request.app['db'] + sql, sql_args = build_batch_jobs_query(batch_id, version, q, last_job_id) jobs = [job_record_to_dict(record, record['name']) async for record in db.select_and_fetchall(sql, sql_args)] @@ -483,10 +388,9 @@ async def get_completed_batches_ordered_by_completed_time(request, userdata): return web.json_response(body) -@routes.get('/api/v1alpha/batches/{batch_id}/jobs') -@rest_billing_project_users_only -async def get_jobs(request, userdata, batch_id): # pylint: disable=unused-argument +async def _get_jobs(request, batch_id: int, version: int, q: str, last_job_id: Optional[int]): db = request.app['db'] + record = await db.select_and_fetchone( ''' SELECT * FROM batches @@ -497,10 +401,33 @@ async def get_jobs(request, userdata, batch_id): # pylint: disable=unused-argum if not record: raise web.HTTPNotFound() - jobs, last_job_id = await _query_batch_jobs(request, batch_id) + jobs, last_job_id = await _query_batch_jobs(request, batch_id, version, q, last_job_id) + resp = {'jobs': jobs} if last_job_id is not None: resp['last_job_id'] = last_job_id + return resp + + +@routes.get('/api/v1alpha/batches/{batch_id}/jobs') +@rest_billing_project_users_only +async def get_jobs_v1(request, userdata, batch_id): # pylint: disable=unused-argument + q = request.query.get('q', '') + last_job_id = request.query.get('last_job_id') + if last_job_id is not None: + last_job_id = int(last_job_id) + resp = await _handle_api_error(_get_jobs, request, batch_id, 1, q, last_job_id) + return json_response(resp) + + +@routes.get('/api/v2alpha/batches/{batch_id}/jobs') +@rest_billing_project_users_only +async def get_jobs_v2(request, userdata, batch_id): # pylint: disable=unused-argument + q = request.query.get('q', '') + last_job_id = request.query.get('last_job_id') + if last_job_id is not None: + last_job_id = int(last_job_id) + resp = await _handle_api_error(_get_jobs, request, batch_id, 2, q, last_job_id) return json_response(resp) @@ -1314,14 +1241,6 @@ async def _create_jobs(userdata: dict, job_specs: dict, batch_id: int, update_id 'mount_in_copy': False, } ) - secrets.append( - { - 'namespace': DEFAULT_NAMESPACE, - 'name': 'worker-deploy-config', - 'mount_path': '/deploy-config', - 'mount_in_copy': False, - } - ) secrets.append( { 'namespace': DEFAULT_NAMESPACE, @@ -1968,7 +1887,19 @@ async def ui_batch(request, userdata, batch_id): app = request.app batch = await _get_batch(app, batch_id) - jobs, last_job_id = await _query_batch_jobs(request, batch_id) + q = request.query.get('q', '') + last_job_id = request.query.get('last_job_id') + if last_job_id is not None: + last_job_id = int(last_job_id) + + try: + jobs, last_job_id = await _query_batch_jobs(request, batch_id, CURRENT_QUERY_VERSION, q, last_job_id) + except QueryError as e: + session = await aiohttp_session.get_session(request) + set_message(session, e.message, 'error') + jobs = [] + last_job_id = None + for j in jobs: j['duration'] = humanize_timedelta_msecs(j['duration']) j['cost'] = cost_str(j['cost']) @@ -1976,7 +1907,11 @@ async def ui_batch(request, userdata, batch_id): batch['cost'] = cost_str(batch['cost']) - page_context = {'batch': batch, 'q': request.query.get('q'), 'last_job_id': last_job_id} + page_context = { + 'batch': batch, + 'q': q, + 'last_job_id': last_job_id, + } return await render_template('batch', request, userdata, 'batch.html', page_context) diff --git a/batch/batch/front_end/query/__init__.py b/batch/batch/front_end/query/__init__.py new file mode 100644 index 00000000000..61dfdc7459e --- /dev/null +++ b/batch/batch/front_end/query/__init__.py @@ -0,0 +1,13 @@ +from typing import Optional + +from .query_v1 import parse_batch_jobs_query_v1 +from .query_v2 import parse_batch_jobs_query_v2 + +CURRENT_QUERY_VERSION = 1 + + +def build_batch_jobs_query(batch_id: int, version: int, q: str, last_job_id: Optional[int]): + if version == 1: + return parse_batch_jobs_query_v1(batch_id, q, last_job_id) + assert version == 2, version + return parse_batch_jobs_query_v2(batch_id, q, last_job_id) diff --git a/batch/batch/front_end/query/operators.py b/batch/batch/front_end/query/operators.py new file mode 100644 index 00000000000..d5d008bca2d --- /dev/null +++ b/batch/batch/front_end/query/operators.py @@ -0,0 +1,93 @@ +import abc + +from ...exceptions import QueryError + + +class Operator(abc.ABC): + @abc.abstractmethod + def to_sql(self) -> str: + raise NotImplementedError + + +class ComparisonOperator(Operator, abc.ABC): + symbols = {'>=', '>', '<', '<=', '==', '=', '!='} + + +class MatchOperator(Operator, abc.ABC): + symbols = {'=', '!=', '!~', '=~'} + + +class PartialMatchOperator(MatchOperator, abc.ABC): + pass + + +class ExactMatchOperator(MatchOperator, abc.ABC): + pass + + +class GreaterThanEqualOperator(ComparisonOperator): + def to_sql(self) -> str: + return '>=' + + +class LessThanEqualOperator(ComparisonOperator): + def to_sql(self) -> str: + return '<=' + + +class GreaterThanOperator(ComparisonOperator): + def to_sql(self) -> str: + return '>' + + +class LessThanOperator(ComparisonOperator): + def to_sql(self) -> str: + return '<' + + +class NotEqualExactMatchOperator(ExactMatchOperator, ComparisonOperator): + def to_sql(self) -> str: + return '!=' + + +class EqualExactMatchOperator(ExactMatchOperator, ComparisonOperator): + def to_sql(self) -> str: + return '=' + + +class NotEqualPartialMatchOperator(PartialMatchOperator): + def to_sql(self) -> str: + return 'NOT LIKE' + + +class EqualPartialMatchOperator(PartialMatchOperator): + def to_sql(self) -> str: + return 'LIKE' + + +symbols_to_operator = { + '>=': GreaterThanEqualOperator(), + '<=': LessThanEqualOperator(), + '>': GreaterThanOperator(), + '<': LessThanOperator(), + '!~': NotEqualPartialMatchOperator(), + '=~': EqualPartialMatchOperator(), + '!=': NotEqualExactMatchOperator(), + '==': EqualExactMatchOperator(), + '=': EqualExactMatchOperator(), +} + + +def pad_maybe_operator(s: str) -> str: + for symbol in symbols_to_operator: + if symbol in s: + padded_symbol = f' {symbol} ' + tokens = [token.strip() for token in s.split(symbol)] + return padded_symbol.join(tokens) + return s + + +def get_operator(symbol: str) -> Operator: + if symbol not in symbols_to_operator: + raise QueryError(f'unknown operator {symbol}') + return symbols_to_operator[symbol] diff --git a/batch/batch/front_end/query/query.py b/batch/batch/front_end/query/query.py new file mode 100644 index 00000000000..a2edb70d1ae --- /dev/null +++ b/batch/batch/front_end/query/query.py @@ -0,0 +1,321 @@ +import abc +from enum import Enum +from typing import Any, List, Tuple + +from hailtop.utils import parse_timestamp_msecs + +from ...exceptions import QueryError +from .operators import ( + ComparisonOperator, + ExactMatchOperator, + MatchOperator, + NotEqualExactMatchOperator, + PartialMatchOperator, + get_operator, +) + + +class State(Enum): + PENDING = 'Pending' + READY = 'Ready' + CREATING = 'Creating' + RUNNING = 'Running' + CANCELLED = 'Cancelled' + ERROR = 'Error' + FAILED = 'Failed' + SUCCESS = 'Success' + + +state_search_term_to_states = { + 'pending': [State.PENDING], + 'ready': [State.READY], + 'creating': [State.CREATING], + 'running': [State.RUNNING], + 'live': [State.READY, State.CREATING, State.RUNNING], + 'cancelled': [State.CANCELLED], + 'error': [State.ERROR], + 'failed': [State.FAILED], + 'bad': [State.ERROR, State.FAILED], + 'success': [State.SUCCESS], + 'done': [State.CANCELLED, State.ERROR, State.FAILED, State.SUCCESS], +} + + +def parse_int(word: str) -> int: + try: + return int(word) + except ValueError as e: + raise QueryError(f'expected int, but found {word}') from e + + +def parse_float(word: str) -> float: + try: + return float(word) + except ValueError as e: + raise QueryError(f'expected float, but found {word}') from e + + +def parse_date(word: str) -> int: + try: + return parse_timestamp_msecs(word) + except ValueError as e: + raise QueryError(f'expected date, but found {word}') from e + + +def parse_cost(word: str) -> float: + word = word.lstrip('$') + return parse_float(word) + + +class Query(abc.ABC): + @abc.abstractmethod + def query(self) -> Tuple[str, List[Any]]: + raise NotImplementedError + + +class StateQuery(Query): + @staticmethod + def parse(op: str, state: str) -> 'StateQuery': + operator = get_operator(op) + if not isinstance(operator, ExactMatchOperator): + raise QueryError(f'unexpected operator "{op}" expected one of {ExactMatchOperator.symbols}') + if state not in state_search_term_to_states: + raise QueryError(f'unknown state "{state}"') + return StateQuery(state, operator) + + def __init__(self, state: str, operator: ExactMatchOperator): + self.state = state + self.operator = operator + + def query(self) -> Tuple[str, List[Any]]: + states = [s.value for s in state_search_term_to_states[self.state]] + condition = ' OR '.join(['(jobs.state = %s)' for _ in states]) + condition = f'({condition})' + if isinstance(self.operator, NotEqualExactMatchOperator): + condition = f'(NOT {condition})' + return (condition, states) + + +class JobIdQuery(Query): + @staticmethod + def parse(op: str, maybe_job_id: str) -> 'JobIdQuery': + operator = get_operator(op) + if not isinstance(operator, ComparisonOperator): + raise QueryError(f'unexpected operator "{op}" expected one of {ComparisonOperator.symbols}') + job_id = parse_int(maybe_job_id) + return JobIdQuery(job_id, operator) + + def __init__(self, job_id: int, operator: ComparisonOperator): + self.job_id = job_id + self.operator = operator + + def query(self) -> Tuple[str, List[Any]]: + op = self.operator.to_sql() + return (f'(jobs.job_id {op} %s)', [self.job_id]) + + +class InstanceQuery(Query): + @staticmethod + def parse(op: str, instance: str) -> 'InstanceQuery': + operator = get_operator(op) + if not isinstance(operator, MatchOperator): + raise QueryError(f'unexpected operator "{op}" expected one of {MatchOperator.symbols}') + return InstanceQuery(instance, operator) + + def __init__(self, instance: str, operator: MatchOperator): + self.instance = instance + self.operator = operator + + def query(self) -> Tuple[str, List[Any]]: + op = self.operator.to_sql() + if isinstance(self.operator, PartialMatchOperator): + self.instance = f'%{self.instance}%' + sql = f''' +((jobs.batch_id, jobs.job_id) IN + (SELECT batch_id, job_id FROM attempts + WHERE instance_name {op} %s)) +''' + return (sql, [self.instance]) + + +class InstanceCollectionQuery(Query): + @staticmethod + def parse(op: str, instance_collection: str) -> 'InstanceCollectionQuery': + operator = get_operator(op) + if not isinstance(operator, MatchOperator): + raise QueryError(f'unexpected operator "{op}" expected one of {MatchOperator.symbols}') + return InstanceCollectionQuery(instance_collection, operator) + + def __init__(self, inst_coll: str, operator: MatchOperator): + self.inst_coll = inst_coll + self.operator = operator + + def query(self) -> Tuple[str, List[Any]]: + op = self.operator.to_sql() + if isinstance(self.operator, PartialMatchOperator): + self.inst_coll = f'%{self.inst_coll}%' + sql = f'(jobs.inst_coll {op} %s)' + return (sql, [self.inst_coll]) + + +class QuotedExactMatchQuery(Query): + @staticmethod + def parse(term: str) -> 'QuotedExactMatchQuery': + if len(term) < 3: + raise QueryError(f'expected a string of minimum length 3. Found {term}') + if term[-1] != '"': + raise QueryError("expected the last character of the string to be '\"'") + return QuotedExactMatchQuery(term[1:-1]) + + def __init__(self, term: str): + self.term = term + + def query(self) -> Tuple[str, List[Any]]: + sql = ''' +(((jobs.batch_id, jobs.job_id) IN + (SELECT batch_id, job_id FROM job_attributes + WHERE `key` = %s OR `value` = %s)) OR +((jobs.batch_id, jobs.job_id) IN + (SELECT batch_id, job_id FROM attempts + WHERE instance_name = %s))) +''' + return (sql, [self.term, self.term, self.term]) + + +class UnquotedPartialMatchQuery(Query): + @staticmethod + def parse(term: str) -> 'UnquotedPartialMatchQuery': + if len(term) < 1: + raise QueryError(f'expected a string of minimum length 1. Found {term}') + if term[0] == '"': + raise QueryError("expected the first character of the string to not be '\"'") + if term[-1] == '"': + raise QueryError("expected the last character of the string to not be '\"'") + return UnquotedPartialMatchQuery(term) + + def __init__(self, term: str): + self.term = term + + def query(self) -> Tuple[str, List[Any]]: + sql = ''' +(((jobs.batch_id, jobs.job_id) IN + (SELECT batch_id, job_id FROM job_attributes + WHERE `key` LIKE %s OR `value` LIKE %s)) OR +((jobs.batch_id, jobs.job_id) IN + (SELECT batch_id, job_id FROM attempts + WHERE instance_name LIKE %s))) +''' + escaped_term = f'%{self.term}%' + return (sql, [escaped_term, escaped_term, escaped_term]) + + +class KeywordQuery(Query): + @staticmethod + def parse(op: str, key: str, value: str) -> 'KeywordQuery': + operator = get_operator(op) + if not isinstance(operator, MatchOperator): + raise QueryError(f'unexpected operator "{op}" expected one of {MatchOperator.symbols}') + return KeywordQuery(operator, key, value) + + def __init__(self, operator: MatchOperator, key: str, value: str): + self.operator = operator + self.key = key + self.value = value + + def query(self) -> Tuple[str, List[Any]]: + op = self.operator.to_sql() + if isinstance(self.operator, PartialMatchOperator): + self.value = f'%{self.value}%' + sql = f''' +((jobs.batch_id, jobs.job_id) IN + (SELECT batch_id, job_id FROM job_attributes + WHERE `key` = %s AND `value` {op} %s)) + ''' + return (sql, [self.key, self.value]) + + +class StartTimeQuery(Query): + @staticmethod + def parse(op: str, time: str) -> 'StartTimeQuery': + operator = get_operator(op) + if not isinstance(operator, ComparisonOperator): + raise QueryError(f'unexpected operator "{op}" expected one of {ComparisonOperator.symbols}') + time_msecs = parse_date(time) + return StartTimeQuery(operator, time_msecs) + + def __init__(self, operator: ComparisonOperator, time_msecs: int): + self.operator = operator + self.time_msecs = time_msecs + + def query(self) -> Tuple[str, List[Any]]: + op = self.operator.to_sql() + sql = f''' +((jobs.batch_id, jobs.job_id) IN + (SELECT batch_id, job_id FROM attempts + WHERE start_time {op} %s)) +''' + return (sql, [self.time_msecs]) + + +class EndTimeQuery(Query): + @staticmethod + def parse(op: str, time: str) -> 'EndTimeQuery': + operator = get_operator(op) + if not isinstance(operator, ComparisonOperator): + raise QueryError(f'unexpected operator "{op}" expected one of {ComparisonOperator.symbols}') + time_msecs = parse_date(time) + return EndTimeQuery(operator, time_msecs) + + def __init__(self, operator: ComparisonOperator, time_msecs: int): + self.operator = operator + self.time_msecs = time_msecs + + def query(self) -> Tuple[str, List[Any]]: + op = self.operator.to_sql() + sql = f''' +((jobs.batch_id, jobs.job_id) IN + (SELECT batch_id, job_id FROM attempts + WHERE end_time {op} %s)) +''' + return (sql, [self.time_msecs]) + + +class DurationQuery(Query): + @staticmethod + def parse(op: str, time: str) -> 'DurationQuery': + operator = get_operator(op) + if not isinstance(operator, ComparisonOperator): + raise QueryError(f'unexpected operator "{op}" expected one of {ComparisonOperator.symbols}') + time_msecs = int(parse_float(time) * 1000 + 1) + return DurationQuery(operator, time_msecs) + + def __init__(self, operator: ComparisonOperator, time_msecs: int): + self.operator = operator + self.time_msecs = time_msecs + + def query(self) -> Tuple[str, List[Any]]: + op = self.operator.to_sql() + sql = f''' +((jobs.batch_id, jobs.job_id) IN + (SELECT batch_id, job_id FROM attempts + WHERE end_time - start_time {op} %s)) +''' + return (sql, [self.time_msecs]) + + +class CostQuery(Query): + @staticmethod + def parse(op: str, cost_str: str) -> 'CostQuery': + operator = get_operator(op) + if not isinstance(operator, ComparisonOperator): + raise QueryError(f'unexpected operator "{op}" expected one of {ComparisonOperator.symbols}') + cost = parse_float(cost_str) + return CostQuery(operator, cost) + + def __init__(self, operator: ComparisonOperator, cost: float): + self.operator = operator + self.cost = cost + + def query(self) -> Tuple[str, List[Any]]: + op = self.operator.to_sql() + return (f'(cost {op} %s)', [self.cost]) diff --git a/batch/batch/front_end/query/query_v1.py b/batch/batch/front_end/query/query_v1.py new file mode 100644 index 00000000000..37536e5cfe3 --- /dev/null +++ b/batch/batch/front_end/query/query_v1.py @@ -0,0 +1,89 @@ +from typing import Any, List, Optional + +from ...exceptions import QueryError +from .query import state_search_term_to_states + + +def parse_batch_jobs_query_v1(batch_id: int, q: str, last_job_id: Optional[int]): + # batch has already been validated + where_conditions = ['(jobs.batch_id = %s AND batch_updates.committed)'] + where_args: List[Any] = [batch_id] + + if last_job_id is not None: + where_conditions.append('(jobs.job_id > %s)') + where_args.append(last_job_id) + + terms = q.split() + for _t in terms: + if _t[0] == '!': + negate = True + t = _t[1:] + else: + negate = False + t = _t + + args: List[Any] + + if '=' in t: + k, v = t.split('=', 1) + if k == 'job_id': + condition = '(jobs.job_id = %s)' + args = [v] + else: + condition = ''' +((jobs.batch_id, jobs.job_id) IN + (SELECT batch_id, job_id FROM job_attributes + WHERE `key` = %s AND `value` = %s)) +''' + args = [k, v] + elif t.startswith('has:'): + k = t[4:] + condition = ''' +((jobs.batch_id, jobs.job_id) IN + (SELECT batch_id, job_id FROM job_attributes + WHERE `key` = %s)) +''' + args = [k] + elif t in state_search_term_to_states: + values = state_search_term_to_states[t] + condition = ' OR '.join(['(jobs.state = %s)' for _ in values]) + condition = f'({condition})' + args = [v.value for v in values] + else: + raise QueryError(f'Invalid search term: {t}.') + + if negate: + condition = f'(NOT {condition})' + + where_conditions.append(condition) + where_args.extend(args) + + sql = f''' +WITH base_t AS +( + SELECT jobs.*, batches.user, batches.billing_project, batches.format_version, + job_attributes.value AS name + FROM jobs + INNER JOIN batches ON jobs.batch_id = batches.id + INNER JOIN batch_updates ON jobs.batch_id = batch_updates.batch_id AND jobs.update_id = batch_updates.update_id + LEFT JOIN job_attributes + ON jobs.batch_id = job_attributes.batch_id AND + jobs.job_id = job_attributes.job_id AND + job_attributes.`key` = 'name' + WHERE {' AND '.join(where_conditions)} + LIMIT 50 +) +SELECT base_t.*, COALESCE(SUM(`usage` * rate), 0) AS cost +FROM base_t +LEFT JOIN ( + SELECT aggregated_job_resources_v2.batch_id, aggregated_job_resources_v2.job_id, resource_id, CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage` + FROM base_t + LEFT JOIN aggregated_job_resources_v2 ON base_t.batch_id = aggregated_job_resources_v2.batch_id AND base_t.job_id = aggregated_job_resources_v2.job_id + GROUP BY aggregated_job_resources_v2.batch_id, aggregated_job_resources_v2.job_id, aggregated_job_resources_v2.resource_id +) AS usage_t ON base_t.batch_id = usage_t.batch_id AND base_t.job_id = usage_t.job_id +LEFT JOIN resources ON usage_t.resource_id = resources.resource_id +GROUP BY base_t.batch_id, base_t.job_id; +''' + sql_args = where_args + + return (sql, sql_args) diff --git a/batch/batch/front_end/query/query_v2.py b/batch/batch/front_end/query/query_v2.py new file mode 100644 index 00000000000..297eac4b028 --- /dev/null +++ b/batch/batch/front_end/query/query_v2.py @@ -0,0 +1,162 @@ +from typing import List, Optional + +from ...exceptions import QueryError +from .operators import ( + GreaterThanEqualOperator, + GreaterThanOperator, + LessThanEqualOperator, + LessThanOperator, + pad_maybe_operator, +) +from .query import ( + CostQuery, + DurationQuery, + EndTimeQuery, + InstanceCollectionQuery, + InstanceQuery, + JobIdQuery, + KeywordQuery, + Query, + QuotedExactMatchQuery, + StartTimeQuery, + StateQuery, + UnquotedPartialMatchQuery, +) + +# ::= "" | "\n" +# ::= | | | | +# | | | | +# | +# ::= "=" | "==" | "!=" +# ::= "!~" | "=~" +# ::= | +# ::= ">=" | "<=" | ">" | "<" | +# ::= "instance" +# ::= "instance_collection" +# ::= "job_id" +# ::= "state" +# ::= "start_time" +# ::= "end_time" +# ::= "duration" +# ::= "cost" +# ::= \" \" +# ::= + + +def parse_batch_jobs_query_v2(batch_id: int, q: str, last_job_id: Optional[int]): + queries: List[Query] = [] + + # logic to make time interval queries fast + min_start_gt_query: Optional[StartTimeQuery] = None + max_end_lt_query: Optional[EndTimeQuery] = None + + if q: + terms = q.rstrip().lstrip().split('\n') + for _term in terms: + _term = pad_maybe_operator(_term) + statement = _term.split() + if len(statement) == 1: + word = statement[0] + if word[0] == '"': + queries.append(QuotedExactMatchQuery.parse(word)) + else: + queries.append(UnquotedPartialMatchQuery.parse(word)) + elif len(statement) == 3: + left, op, right = statement + if left == 'instance': + queries.append(InstanceQuery.parse(op, right)) + elif left == 'instance_collection': + queries.append(InstanceCollectionQuery.parse(op, right)) + elif left == 'job_id': + queries.append(JobIdQuery.parse(op, right)) + elif left == 'state': + queries.append(StateQuery.parse(op, right)) + elif left == 'start_time': + st_query = StartTimeQuery.parse(op, right) + queries.append(st_query) + if (type(st_query.operator) in [GreaterThanOperator, GreaterThanEqualOperator]) and ( + min_start_gt_query is None or min_start_gt_query.time_msecs >= st_query.time_msecs + ): + min_start_gt_query = st_query + elif left == 'end_time': + et_query = EndTimeQuery.parse(op, right) + queries.append(et_query) + if (type(et_query.operator) in [LessThanOperator, LessThanEqualOperator]) and ( + max_end_lt_query is None or max_end_lt_query.time_msecs <= et_query.time_msecs + ): + max_end_lt_query = et_query + elif left == 'duration': + queries.append(DurationQuery.parse(op, right)) + elif left == 'cost': + queries.append(CostQuery.parse(op, right)) + else: + queries.append(KeywordQuery.parse(op, left, right)) + else: + raise QueryError(f'could not parse term "{_term}"') + + # this is to make time interval queries fast by using the bounds on both indices + if min_start_gt_query and max_end_lt_query and min_start_gt_query.time_msecs <= max_end_lt_query.time_msecs: + queries.append(StartTimeQuery(max_end_lt_query.operator, max_end_lt_query.time_msecs)) + queries.append(EndTimeQuery(min_start_gt_query.operator, min_start_gt_query.time_msecs)) + + # batch has already been validated + where_conditions = ['(jobs.batch_id = %s AND batch_updates.committed)'] + where_args = [batch_id] + + if last_job_id is not None: + where_conditions.append('(jobs.job_id > %s)') + where_args.append(last_job_id) + + uses_attempts_table = False + for query in queries: + cond, args = query.query() + if isinstance( + query, + ( + StartTimeQuery, + EndTimeQuery, + DurationQuery, + InstanceQuery, + QuotedExactMatchQuery, + UnquotedPartialMatchQuery, + ), + ): + uses_attempts_table = True + + where_conditions.append(f'({cond})') + where_args += args + + if uses_attempts_table: + attempts_table_join_str = ( + 'LEFT JOIN attempts ON jobs.batch_id = attempts.batch_id AND jobs.job_id = attempts.job_id' + ) + else: + attempts_table_join_str = '' + + sql = f''' +SELECT jobs.*, batches.user, batches.billing_project, batches.format_version, job_attributes.value AS name, cost_t.cost +FROM jobs +INNER JOIN batches ON jobs.batch_id = batches.id +INNER JOIN batch_updates ON jobs.batch_id = batch_updates.batch_id AND jobs.update_id = batch_updates.update_id +LEFT JOIN job_attributes + ON jobs.batch_id = job_attributes.batch_id AND + jobs.job_id = job_attributes.job_id AND + job_attributes.`key` = 'name' +{attempts_table_join_str} +LEFT JOIN LATERAL ( +SELECT COALESCE(SUM(`usage` * rate), 0) AS cost +FROM (SELECT aggregated_job_resources_v2.batch_id, aggregated_job_resources_v2.job_id, resource_id, CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage` + FROM aggregated_job_resources_v2 + WHERE aggregated_job_resources_v2.batch_id = jobs.batch_id AND aggregated_job_resources_v2.job_id = jobs.job_id + GROUP BY aggregated_job_resources_v2.batch_id, aggregated_job_resources_v2.job_id, aggregated_job_resources_v2.resource_id +) AS usage_t +LEFT JOIN resources ON usage_t.resource_id = resources.resource_id +GROUP BY usage_t.batch_id, usage_t.job_id +) AS cost_t ON TRUE +WHERE {" AND ".join(where_conditions)} +LIMIT 50; +''' + + sql_args = where_args + + return (sql, sql_args) diff --git a/batch/batch/globals.py b/batch/batch/globals.py index 2ef85ddadef..54990bc235c 100644 --- a/batch/batch/globals.py +++ b/batch/batch/globals.py @@ -30,7 +30,7 @@ BATCH_FORMAT_VERSION = 7 STATUS_FORMAT_VERSION = 5 -INSTANCE_VERSION = 24 +INSTANCE_VERSION = 25 MAX_PERSISTENT_SSD_SIZE_GIB = 64 * 1024 RESERVED_STORAGE_GB_PER_CORE = 5 diff --git a/batch/batch/utils.py b/batch/batch/utils.py index d0b3ce8c0ba..389fb42620c 100644 --- a/batch/batch/utils.py +++ b/batch/batch/utils.py @@ -134,33 +134,31 @@ async def query_billing_projects(db, user=None, billing_project=None): where_condition = '' sql = f''' -WITH base_t AS ( SELECT billing_projects.name as billing_project, billing_projects.`status` as `status`, - users, `limit` -FROM ( + users, `limit`, COALESCE(cost_t.cost, 0) AS accrued_cost +FROM billing_projects +LEFT JOIN LATERAL ( SELECT billing_project, JSON_ARRAYAGG(`user_cs`) as users FROM billing_project_users - GROUP BY billing_project + WHERE billing_project_users.billing_project = billing_projects.name + GROUP BY billing_project_users.billing_project LOCK IN SHARE MODE -) AS t -RIGHT JOIN billing_projects - ON t.billing_project = billing_projects.name +) AS t ON TRUE +LEFT JOIN LATERAL ( + SELECT SUM(`usage` * rate) as cost + FROM ( + SELECT billing_project, resource_id, CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage` + FROM aggregated_billing_project_user_resources_v2 + WHERE billing_projects.name = aggregated_billing_project_user_resources_v2.billing_project + GROUP BY billing_project, resource_id + LOCK IN SHARE MODE + ) AS usage_t + LEFT JOIN resources ON resources.resource_id = usage_t.resource_id + GROUP BY usage_t.billing_project +) AS cost_t ON TRUE {where_condition} -GROUP BY billing_projects.name, billing_projects.status, `limit` -LOCK IN SHARE MODE -) -SELECT base_t.*, COALESCE(SUM(`usage` * rate), 0) as accrued_cost -FROM base_t -LEFT JOIN ( - SELECT base_t.billing_project, resource_id, CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage` - FROM base_t - LEFT JOIN aggregated_billing_project_user_resources_v2 - ON base_t.billing_project = aggregated_billing_project_user_resources_v2.billing_project - GROUP BY base_t.billing_project, resource_id -) AS usage_t ON usage_t.billing_project = base_t.billing_project -LEFT JOIN resources ON resources.resource_id = usage_t.resource_id -GROUP BY base_t.billing_project; +LOCK IN SHARE MODE; ''' def record_to_dict(record): diff --git a/batch/batch/worker/worker.py b/batch/batch/worker/worker.py index 30e387d1234..0b9848723ab 100644 --- a/batch/batch/worker/worker.py +++ b/batch/batch/worker/worker.py @@ -48,7 +48,7 @@ from hailtop.aiotools import AsyncFS, LocalAsyncFS from hailtop.aiotools.router_fs import RouterAsyncFS from hailtop.batch.hail_genetics_images import HAIL_GENETICS_IMAGES -from hailtop.config import DeployConfig +from hailtop.config import get_deploy_config from hailtop.hail_logging import AccessLogger, configure_logging from hailtop.utils import ( CalledProcessError, @@ -199,7 +199,7 @@ def compose(auth: Union[MutableMapping, str, bytes], registry_addr: Optional[str N_SLOTS = 4 * CORES # Jobs are allowed at minimum a quarter core -deploy_config = DeployConfig('gce', NAMESPACE, {}) +deploy_config = get_deploy_config() docker: Optional[aiodocker.Docker] = None @@ -773,6 +773,7 @@ def __init__( volume_mounts: Optional[List[MountSpecification]] = None, env: Optional[List[str]] = None, stdin: Optional[str] = None, + log_path: Optional[str] = None, ): self.task_manager = task_manager self.fs = fs @@ -809,7 +810,7 @@ def __init__( self.container_scratch = scratch_dir self.container_overlay_path = f'{self.container_scratch}/rootfs_overlay' self.config_path = f'{self.container_scratch}/config' - self.log_path = f'{self.container_scratch}/container.log' + self.log_path = log_path or f'{self.container_scratch}/container.log' self.resource_usage_path = f'{self.container_scratch}/resource_usage' self.overlay_mounted = False @@ -1245,7 +1246,7 @@ def _mounts(self, uid: int, gid: int) -> List[MountSpecification]: } ) - return ( + mounts = ( self.volume_mounts + external_volumes + [ @@ -1308,6 +1309,18 @@ def _mounts(self, uid: int, gid: int) -> List[MountSpecification]: ] ) + if not any(v['destination'] == '/deploy-config' for v in self.volume_mounts): + mounts.append( + { + 'source': '/deploy-config/deploy-config.json', + 'destination': '/deploy-config/deploy-config.json', + 'type': 'none', + 'options': ['bind', 'ro', 'private'], + }, + ) + + return mounts + def _env(self): assert self.image.image_config env = self.image.image_config['Config']['Env'] + self.env @@ -2084,6 +2097,11 @@ def write_batch_config(self): os.makedirs(f'{self.scratch}/batch-config') with open(f'{self.scratch}/batch-config/batch-config.json', 'wb') as config: config.write(orjson.dumps({'version': 1, 'batch_id': self.batch_id})) + # Necessary for backward compatibility for Hail Query jars that expect + # the deploy config at this path and not at `/deploy-config/deploy-config.json` + os.makedirs(f'{self.scratch}/secrets/deploy-config', exist_ok=True) + with open(f'{self.scratch}/secrets/deploy-config/deploy-config.json', 'wb') as config: + config.write(orjson.dumps(deploy_config.get_config())) def step(self, name): return self.timings.step(name) @@ -2499,6 +2517,7 @@ async def create_and_start( memory_in_bytes=total_memory_bytes, env=[f'HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB={off_heap_memory_per_core_mib}', f'HAIL_CLOUD={CLOUD}'], volume_mounts=volume_mounts, + log_path=f'/batch/jvm-container-logs/jvm-{index}.log', ) await c.create() diff --git a/batch/sql/dedup_billing_project_users_v2.py b/batch/sql/dedup_billing_project_users_v2.py new file mode 100644 index 00000000000..b840a30e17e --- /dev/null +++ b/batch/sql/dedup_billing_project_users_v2.py @@ -0,0 +1,271 @@ +import asyncio +import functools +import os +import random +import time +from typing import List, Optional, Tuple + +from gear import Database, transaction +from hailtop.utils import bounded_gather + + +MYSQL_CONFIG_FILE = os.environ.get('MYSQL_CONFIG_FILE') + + +class Counter: + def __init__(self): + self.n = 0 + + +async def process_chunk(counter, db, query, query_args, start, end, quiet=True): + start_time = time.time() + + await db.just_execute(query, query_args) + + if not quiet and counter.n % 100 == 0: + print(f'processed chunk ({start}, {end}) in {time.time() - start_time}s') + + counter.n += 1 + if counter.n % 500 == 0: + print(f'processed {counter.n} complete chunks') + + +async def process_chunk_agg_bp_user_resources(counter, db, start, end, quiet=True): + if start is None: + assert end is not None + end_billing_project, end_user, end_resource_id = end + + where_statement = ''' +WHERE (aggregated_billing_project_user_resources_v2.billing_project < %s) OR + (aggregated_billing_project_user_resources_v2.billing_project = %s AND aggregated_billing_project_user_resources_v2.`user` < %s) OR + (aggregated_billing_project_user_resources_v2.billing_project = %s AND aggregated_billing_project_user_resources_v2.`user` = %s AND aggregated_billing_project_user_resources_v2.resource_id < %s) +''' + query_args = [end_billing_project, + end_billing_project, end_user, + end_billing_project, end_user, end_resource_id, + ] + elif end is None: + assert start is not None + start_billing_project, start_user, start_resource_id = start + + where_statement = ''' +WHERE (aggregated_billing_project_user_resources_v2.billing_project > %s) OR + (aggregated_billing_project_user_resources_v2.billing_project = %s AND aggregated_billing_project_user_resources_v2.`user` > %s) OR + (aggregated_billing_project_user_resources_v2.billing_project = %s AND aggregated_billing_project_user_resources_v2.`user` = %s AND aggregated_billing_project_user_resources_v2.resource_id >= %s) +''' + query_args = [start_billing_project, + start_billing_project, start_user, + start_billing_project, start_user, start_resource_id, + ] + else: + assert start is not None and end is not None + start_billing_project, start_user, start_resource_id = start + end_billing_project, end_user, end_resource_id = end + + where_statement = ''' +WHERE ((aggregated_billing_project_user_resources_v2.billing_project > %s) OR + (aggregated_billing_project_user_resources_v2.billing_project = %s AND aggregated_billing_project_user_resources_v2.`user` > %s) OR + (aggregated_billing_project_user_resources_v2.billing_project = %s AND aggregated_billing_project_user_resources_v2.`user` = %s AND aggregated_billing_project_user_resources_v2.resource_id >= %s)) + AND ((aggregated_billing_project_user_resources_v2.billing_project < %s) OR + (aggregated_billing_project_user_resources_v2.billing_project = %s AND aggregated_billing_project_user_resources_v2.`user` < %s) OR + (aggregated_billing_project_user_resources_v2.billing_project = %s AND aggregated_billing_project_user_resources_v2.`user` = %s AND aggregated_billing_project_user_resources_v2.resource_id < %s)) +''' + query_args = [start_billing_project, + start_billing_project, start_user, + start_billing_project, start_user, start_resource_id, + end_billing_project, + end_billing_project, end_user, + end_billing_project, end_user, end_resource_id, + ] + + query = f''' +UPDATE aggregated_billing_project_user_resources_v2 +SET migrated = 1 +{where_statement} +''' + + await process_chunk(counter, db, query, query_args, start, end, quiet) + + +async def audit_changes(db): + bp_user_audit_start = time.time() + print('starting auditing billing project user records') + + chunk_offsets = [None] + for offset in await find_chunk_offsets_for_audit(db, 100): + chunk_offsets.append(offset) + chunk_offsets = list(zip(chunk_offsets[:-1], chunk_offsets[1:])) + + failing_bp_users = [] + + if chunk_offsets != [(None, None)]: + for offsets in chunk_offsets: + start, end = offsets + if start is not None and end is not None: + start_bp, start_user = start + end_bp, end_user = end + where_statement = ''' +WHERE ((billing_project > %s) OR (billing_project = %s AND `user` >= %s)) + AND ((billing_project < %s) OR (billing_project = %s AND `user` < %s)) +''' + where_args = [start_bp, start_bp, start_user, + end_bp, end_bp, end_user] + elif start is None and end is not None: + end_bp, end_user = end + where_statement = 'WHERE (billing_project < %s) OR (billing_project = %s AND `user` < %s)' + where_args = [end_bp, end_bp, end_user] + else: + assert start is not None and end is None + start_bp, start_user = start + where_statement = 'WHERE (billing_project > %s) OR (billing_project = %s AND `user` >= %s)' + where_args = [start_bp, start_bp, start_user] + + bad_bp_user_records = db.select_and_fetchall( + f''' +SELECT old.billing_project, old.`user`, old.deduped_resource_id, old.`usage`, new.`usage`, ABS(new.`usage` - old.`usage`) AS usage_diff +FROM ( + SELECT billing_project, `user`, deduped_resource_id, CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage` + FROM aggregated_billing_project_user_resources_v2 + LEFT JOIN resources ON resources.resource_id = aggregated_billing_project_user_resources_v2.resource_id + {where_statement} + GROUP BY billing_project, `user`, deduped_resource_id + LOCK IN SHARE MODE +) AS old +LEFT JOIN ( + SELECT billing_project, `user`, deduped_resource_id, CAST(COALESCE(SUM(`usage`), 0) AS SIGNED) AS `usage` + FROM aggregated_billing_project_user_resources_v3 + LEFT JOIN resources ON resources.resource_id = aggregated_billing_project_user_resources_v3.resource_id + {where_statement} + GROUP BY billing_project, `user`, deduped_resource_id + LOCK IN SHARE MODE +) AS new ON old.billing_project = new.billing_project AND old.`user` = new.`user` AND old.deduped_resource_id = new.deduped_resource_id +WHERE new.`usage` != old.`usage` +LIMIT 100; +''', + where_args + where_args) + + bad_bp_user_records = [record async for record in bad_bp_user_records] + for record in bad_bp_user_records: + print(f'found bad bp user record {record}') + failing_bp_users.append((record['billing_project'], record['user'])) + + if failing_bp_users: + raise Exception(f'errors found in audit') + + print(f'finished auditing bp user records in {time.time() - bp_user_audit_start}s') + + +async def find_chunk_offsets(db, size): + @transaction(db) + async def _find_chunks(tx) -> List[Optional[Tuple[int, int, str]]]: + start_time = time.time() + + await tx.just_execute('SET @rank=0;') + + query = f''' +SELECT t.billing_project, t.`user`, t.resource_id FROM ( + SELECT billing_project, `user`, resource_id + FROM aggregated_billing_project_user_resources_v2 + ORDER BY billing_project ASC, `user` ASC, resource_id ASC +) AS t +WHERE MOD((@rank := @rank + 1), %s) = 0; +''' + + offsets = tx.execute_and_fetchall(query, (size,)) + offsets = [(offset['billing_project'], offset['user'], offset['resource_id']) async for offset in offsets] + offsets.append(None) + + print(f'found chunk offsets in {round(time.time() - start_time, 4)}s') + return offsets + + return await _find_chunks() + + +async def find_chunk_offsets_for_audit(db, size): + @transaction(db) + async def _find_chunks(tx) -> List[Optional[Tuple[int, int, str]]]: + start_time = time.time() + + await tx.just_execute('SET @rank=0;') + + query = f''' +SELECT t.billing_project, t.`user` FROM ( + SELECT billing_project, `user` + FROM aggregated_billing_project_user_resources_v2 + ORDER BY billing_project ASC, `user` ASC +) AS t +WHERE MOD((@rank := @rank + 1), %s) = 0; +''' + + offsets = tx.execute_and_fetchall(query, (size,)) + offsets = [(offset['billing_project'], offset['user']) async for offset in offsets] + offsets.append(None) + + print(f'found chunk offsets in {round(time.time() - start_time, 4)}s') + return offsets + + return await _find_chunks() + + +async def run_migration(db, chunk_size): + populate_start_time = time.time() + + chunk_counter = Counter() + chunk_offsets = [None] + for offset in await find_chunk_offsets(db, chunk_size): + chunk_offsets.append(offset) + + chunk_offsets = list(zip(chunk_offsets[:-1], chunk_offsets[1:])) + + if chunk_offsets != [(None, None)]: + print(f'found {len(chunk_offsets)} chunks to process') + + random.shuffle(chunk_offsets) + + burn_in_start = time.time() + n_burn_in_chunks = 10000 + + burn_in_chunk_offsets = chunk_offsets[:n_burn_in_chunks] + chunk_offsets = chunk_offsets[n_burn_in_chunks:] + + for start_offset, end_offset in burn_in_chunk_offsets: + await process_chunk_agg_bp_user_resources(chunk_counter, db, start_offset, end_offset, quiet=False) + + print(f'finished burn-in in {time.time() - burn_in_start}s') + + parallel_insert_start = time.time() + + await bounded_gather( + *[functools.partial(process_chunk_agg_bp_user_resources, chunk_counter, db, start_offset, end_offset, quiet=False) + for start_offset, end_offset in chunk_offsets], + parallelism=10 + ) + print( + f'took {time.time() - parallel_insert_start}s to insert the remaining complete records in parallel ({(chunk_size * len(chunk_offsets)) / (time.time() - parallel_insert_start)}) attempts / sec') + + print(f'finished populating records in {time.time() - populate_start_time}s') + + +async def main(chunk_size=100): + db = Database() + await db.async_init(config_file=MYSQL_CONFIG_FILE) + + start_time = time.time() + + try: + migration_start_time = time.time() + + await run_migration(db, chunk_size) + + print(f'finished populating records in {time.time() - migration_start_time}s') + + audit_start_time = time.time() + await audit_changes(db) + print(f'finished auditing changes in {time.time() - audit_start_time}') + finally: + print(f'finished migration in {time.time() - start_time}s') + await db.async_close() + + +loop = asyncio.get_event_loop() +loop.run_until_complete(main()) diff --git a/batch/sql/estimated-current.sql b/batch/sql/estimated-current.sql index c2706c90225..aed88292d51 100644 --- a/batch/sql/estimated-current.sql +++ b/batch/sql/estimated-current.sql @@ -326,6 +326,8 @@ CREATE TABLE IF NOT EXISTS `job_attributes` ( FOREIGN KEY (`batch_id`, `job_id`) REFERENCES jobs(batch_id, job_id) ON DELETE CASCADE ) ENGINE = InnoDB; CREATE INDEX job_attributes_key_value ON `job_attributes` (`key`, `value`(256)); +CREATE INDEX job_attributes_batch_id_key_value ON `job_attributes` (batch_id, `key`, `value`(256)); +CREATE INDEX job_attributes_value ON `job_attributes` (batch_id, `value`(256)); CREATE TABLE IF NOT EXISTS `regions` ( `region_id` INT NOT NULL AUTO_INCREMENT, diff --git a/batch/sql/list-jobs-extra-indices.sql b/batch/sql/list-jobs-extra-indices.sql new file mode 100644 index 00000000000..ef099a593de --- /dev/null +++ b/batch/sql/list-jobs-extra-indices.sql @@ -0,0 +1,2 @@ +CREATE INDEX job_attributes_batch_id_key_value ON `job_attributes` (batch_id, `key`, `value`(256)); +CREATE INDEX job_attributes_value ON `job_attributes` (batch_id, `value`(256)); diff --git a/batch/test/test_batch.py b/batch/test/test_batch.py index c00b958f638..ca47ae4dbdc 100644 --- a/batch/test/test_batch.py +++ b/batch/test/test_batch.py @@ -317,7 +317,7 @@ def assert_batch_ids(expected: Set[int], q=None): assert_batch_ids({b2.id}, f'tag={tag} name=b2') -def test_list_jobs(client: BatchClient): +def test_list_jobs_v1(client: BatchClient): bb = create_batch(client) j_success = bb.create_job(DOCKER_ROOT_IMAGE, ['true']) j_failure = bb.create_job(DOCKER_ROOT_IMAGE, ['false']) @@ -325,23 +325,114 @@ def test_list_jobs(client: BatchClient): j_running = bb.create_job(DOCKER_ROOT_IMAGE, ['sleep', '1800'], attributes={'tag': 'foo'}) b = bb.submit() - j_success.wait() - j_failure.wait() - j_error.wait() def assert_job_ids(expected, q=None): jobs = b.jobs(q=q) actual = set(j['job_id'] for j in jobs) assert actual == expected, str((jobs, b.debug_info())) - assert_job_ids({j_success.job_id}, 'success') - assert_job_ids({j_success.job_id, j_failure.job_id, j_error.job_id}, 'done') - assert_job_ids({j_running.job_id}, '!done') - assert_job_ids({j_running.job_id}, 'tag=foo') - assert_job_ids({j_error.job_id, j_running.job_id}, 'has:tag') - assert_job_ids({j_success.job_id, j_failure.job_id, j_error.job_id, j_running.job_id}, None) + try: + j_success.wait() + j_failure.wait() + j_error.wait() - b.cancel() + assert_job_ids({j_success.job_id}, 'success') + assert_job_ids({j_success.job_id, j_failure.job_id, j_error.job_id}, 'done') + assert_job_ids({j_running.job_id}, '!done') + assert_job_ids({j_running.job_id}, 'tag=foo') + assert_job_ids({j_error.job_id, j_running.job_id}, 'has:tag') + assert_job_ids({j_success.job_id, j_failure.job_id, j_error.job_id, j_running.job_id}, None) + finally: + b.cancel() + + +def test_list_jobs_v2(client: BatchClient): + bb = create_batch(client) + j_success = bb.create_job(DOCKER_ROOT_IMAGE, ['true']) + j_failure = bb.create_job(DOCKER_ROOT_IMAGE, ['false']) + j_error = bb.create_job(DOCKER_ROOT_IMAGE, ['sleep 5'], attributes={'tag': 'bar'}) + j_running = bb.create_job(DOCKER_ROOT_IMAGE, ['sleep', '1800'], attributes={'tag': 'foo'}) + + b = bb.submit() + + def assert_job_ids(expected, q=None): + jobs = b.jobs(q=q, version=2) + actual = set(j['job_id'] for j in jobs) + assert actual == expected, str((jobs, b.debug_info())) + + try: + j_success.wait() + j_failure.wait() + j_error.wait() + + assert_job_ids({j_success.job_id}, 'state = success') + assert_job_ids({j_success.job_id}, 'state == success') + assert_job_ids({j_success.job_id}, 'state=success') + assert_job_ids({j_success.job_id}, 'state==success') + + assert_job_ids({j_success.job_id, j_failure.job_id, j_error.job_id}, 'state=done') + assert_job_ids({j_running.job_id}, 'state != done') + + assert_job_ids({j_running.job_id}, 'tag=foo') + assert_job_ids({j_running.job_id}, 'tag=~fo') + assert_job_ids({j_running.job_id}, 'tag = foo') + assert_job_ids({j_running.job_id}, 'tag =~ fo') + + assert_job_ids({j_error.job_id}, 'tag!=foo') + assert_job_ids({j_error.job_id}, 'tag != foo') + assert_job_ids({j_error.job_id, j_running.job_id}, '"tag"') + assert_job_ids({j_running.job_id}, 'foo') + + no_jobs: Set[int] = set() + all_jobs = {j_error.job_id, j_running.job_id, j_failure.job_id, j_success.job_id} + assert_job_ids(no_jobs, 'duration > 50000') + assert_job_ids(all_jobs, 'instance_collection = standard') + assert_job_ids(no_jobs, 'cost > 1000') + + assert_job_ids(no_jobs, 'start_time == 2023-02-24T17:15:25Z') + assert_job_ids(no_jobs, 'end_time == 2023-02-24T17:15:25Z') + + assert_job_ids(no_jobs, 'start_time<2023-02-24T17:15:25Z') + assert_job_ids(no_jobs, 'start_time<=2023-02-24T17:15:25Z') + assert_job_ids(all_jobs, 'start_time != 2023-02-24T17:15:25Z') + assert_job_ids(all_jobs, 'start_time>2023-02-24T17:15:25Z') + assert_job_ids(all_jobs, 'start_time>=2023-02-24T17:15:25Z') + + assert_job_ids(no_jobs, 'start_time < 2023-02-24T17:15:25Z') + assert_job_ids(no_jobs, 'start_time <= 2023-02-24T17:15:25Z') + assert_job_ids(all_jobs, 'start_time > 2023-02-24T17:15:25Z') + assert_job_ids(all_jobs, 'start_time >= 2023-02-24T17:15:25Z') + + assert_job_ids(no_jobs, 'instance = batch-worker') + assert_job_ids(all_jobs, 'instance != batch-worker') + assert_job_ids(all_jobs, 'instance =~ batch-worker') + assert_job_ids(no_jobs, 'instance !~ batch-worker') + + assert_job_ids(no_jobs, 'instance=batch-worker') + assert_job_ids(all_jobs, 'instance!=batch-worker') + assert_job_ids(all_jobs, 'instance=~batch-worker') + assert_job_ids(no_jobs, 'instance!~batch-worker') + + assert_job_ids({j_success.job_id}, 'job_id = 1') + assert_job_ids(all_jobs, 'job_id >= 1') + + assert_job_ids(all_jobs, None) + + assert_job_ids( + no_jobs, + ''' +job_id >=1 +instance == foo +foo = bar +start_time >= 2023-02-24T17:15:25Z +end_time <= 2023-02-24T17:18:25Z +''', + ) + + with pytest.raises(httpx.ClientResponseError, match='expected float, but found'): + assert_job_ids(no_jobs, 'duration >= abcd') + finally: + b.cancel() def test_include_jobs(client: BatchClient): @@ -812,7 +903,7 @@ def test_cant_submit_to_default_with_other_ns_creds(client: BatchClient): '-c', f''' hailctl config set domain {DOMAIN} -rm /deploy-config/deploy-config.json +export HAIL_DEFAULT_NAMESPACE=default python3 -c \'{script}\'''', ], mount_tokens=True, @@ -825,27 +916,26 @@ def test_cant_submit_to_default_with_other_ns_creds(client: BatchClient): assert status['state'] == 'Failed', str((status, b.debug_info())) assert "Please log in" in j.log()['main'], (str(j.log()['main']), status) + +def test_deploy_config_is_mounted_as_readonly(client: BatchClient): bb = create_batch(client) j = bb.create_job( HAIL_GENETICS_HAILTOP_IMAGE, [ '/bin/bash', '-c', - f''' + ''' +set -ex jq '.default_namespace = "default"' /deploy-config/deploy-config.json > tmp.json -mv tmp.json /deploy-config/deploy-config.json -python3 -c \'{script}\'''', +mv tmp.json /deploy-config/deploy-config.json''', ], mount_tokens=True, ) b = bb.submit() status = j.wait() - if NAMESPACE == 'default': - assert status['state'] == 'Success', str((status, b.debug_info())) - else: - assert status['state'] == 'Failed', str((status, b.debug_info())) - job_log = j.log() - assert "Please log in" in job_log['main'], str((job_log, b.debug_info())) + assert status['state'] == 'Failed', str((status, b.debug_info())) + job_log = j.log() + assert "mv: cannot move" in job_log['main'], str((job_log, b.debug_info())) def test_cannot_contact_other_internal_ips(client: BatchClient): diff --git a/bootstrap-gateway/Makefile b/bootstrap-gateway/Makefile index da857d5020a..e538f20a806 100644 --- a/bootstrap-gateway/Makefile +++ b/bootstrap-gateway/Makefile @@ -6,8 +6,8 @@ GATEWAY_IMAGE := $(DOCKER_PREFIX)/gateway:$(TOKEN) IP := $(shell kubectl get secret global-config --template={{.data.ip}} | base64 --decode) build: - $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile Dockerfile.out + $(MAKE) -C .. hail-ubuntu-image + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../hail-ubuntu-image)'"}}' Dockerfile Dockerfile.out ../docker-build.sh . Dockerfile.out $(GATEWAY_IMAGE) deploy: build diff --git a/build.yaml b/build.yaml index 5420f2385b6..3e5605cd16c 100644 --- a/build.yaml +++ b/build.yaml @@ -562,34 +562,6 @@ steps: - deploy_auth_driver_service_account - create_test_gsa_keys - create_test_database_server_config - - kind: runImage - name: create_initial_user - runIfRequested: true - image: - valueFrom: auth_image.image - script: | - set -ex - export NAMESPACE={{ default_ns.name }} - export CLOUD={{ global.cloud }} - python3 /io/create_initial_account.py {{ code.username }} {{ code.login_id }} - serviceAccount: - name: admin - namespace: - valueFrom: default_ns.name - secrets: - - name: - valueFrom: auth_database.user_secret_name - namespace: - valueFrom: default_ns.name - mountPath: /sql-config - inputs: - - from: /repo/ci/create_initial_account.py - to: /io/create_initial_account.py - dependsOn: - - default_ns - - auth_image - - merge_code - - auth_database - kind: buildImage2 name: hailgenetics_vep_grch37_85_image dockerFile: /io/repo/docker/hailgenetics/vep/grch37/85/Dockerfile @@ -1659,7 +1631,7 @@ steps: - dev dependsOn: - default_ns - - base_image + - hail_ubuntu_image - create_certs - copy_third_party_images - kind: deploy @@ -1711,6 +1683,36 @@ steps: - create_dummy_oauth2_client_secret - create_certs - create_accounts + - kind: runImage + name: create_initial_user + runIfRequested: true + image: + valueFrom: hailgenetics_hailtop_image.image + script: | + set -ex + {% if default_ns.name == "default" %} + hailctl auth create-user --developer {{ code.username }} {{ code.login_id }} + {% else %} + hailctl auth create-user \ + --developer \ + --hail-identity {{ code.hail_identity }} \ + --hail-credentials-secret-name {{ code.username }}-gsa-key \ + {{ code.username }} {{ code.login_id }} + {% endif %} + secrets: + - name: worker-deploy-config + namespace: + valueFrom: default_ns.name + mountPath: /deploy-config + - name: test-dev-tokens + namespace: + valueFrom: default_ns.name + mountPath: /user-tokens + dependsOn: + - default_ns + - hailgenetics_hailtop_image + - merge_code + - deploy_auth - kind: runImage name: delete_monitoring_tables image: @@ -1805,10 +1807,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /user-tokens - - name: ssl-config-monitoring-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config timeout: 300 inputs: - from: /repo/monitoring/test @@ -2248,6 +2246,12 @@ steps: - name: set-test-min-pool-size-to-1 script: /io/sql/set-test-min-pool-size-to-1.py online: true + - name: dedup-billing-project-users-v2 + script: /io/sql/dedup_billing_project_users_v2.py + online: true + - name: list-jobs-extra-indices + script: /io/sql/list-jobs-extra-indices.sql + online: true inputs: - from: /repo/batch/sql to: /io/sql @@ -2423,11 +2427,14 @@ steps: export HAIL_DOCTEST_DATA_DIR="{{ global.test_storage_uri }}/{{ upload_test_resources_to_blob_storage.token }}/doctest/data/" export HAIL_GENETICS_VEP_GRCH37_85_IMAGE={{ hailgenetics_vep_grch37_85_image.image }} export GOOGLE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json + export AZURE_APPLICATION_CREDENTIALS=/test-gsa-key/key.json - if [[ "$HAIL_CLOUD" = "gcp" ]] - then - export GCS_REQUESTER_PAYS_PROJECT=broad-ctsa - fi + {% if global.cloud == "gcp" %} + export GCS_REQUESTER_PAYS_PROJECT=broad-ctsa + {% elif global.cloud == "azure" %} + export HAIL_AZURE_SUBSCRIPTION_ID={{ global.azure_subscription_id }} + export HAIL_AZURE_RESOURCE_GROUP={{ global.azure_resource_group }} + {% endif %} export HAIL_SHUFFLE_MAX_BRANCH=4 export HAIL_SHUFFLE_CUTOFF=1000000 @@ -2473,10 +2480,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /user-tokens - - name: ssl-config-batch-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config dependsOn: - default_ns - merge_code @@ -2596,10 +2599,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /user-tokens - - name: ssl-config-memory-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config - name: test-gsa-key namespace: valueFrom: default_ns.name @@ -2674,10 +2673,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /dev-tokens - - name: ssl-config-batch-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config - name: test-gsa-key namespace: valueFrom: default_ns.name @@ -2726,10 +2721,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /dev-tokens - - name: ssl-config-batch-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config alwaysRun: true dependsOn: - create_deploy_config @@ -2881,10 +2872,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /user-tokens - - name: ssl-config-ci-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config timeout: 5400 inputs: - from: /repo/ci/test @@ -2950,10 +2937,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key - - name: ssl-config-batch-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config dependsOn: - hailgenetics_hail_image - hailgenetics_hailtop_image @@ -3071,10 +3054,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /test-gsa-key - - name: ssl-config-batch-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config dependsOn: - hailgenetics_hail_image - upload_query_jar @@ -3615,10 +3594,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /deploy-config - - name: ssl-config-services-java-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config - name: test-gsa-key namespace: valueFrom: default_ns.name @@ -3665,10 +3640,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /deploy-config - - name: ssl-config-services-java-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config timeout: 1200 dependsOn: - default_ns @@ -3710,10 +3681,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /user-tokens - - name: ssl-config-batch-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config alwaysRun: true timeout: 300 dependsOn: @@ -3761,10 +3728,6 @@ steps: namespace: valueFrom: default_ns.name mountPath: /user-tokens - - name: ssl-config-batch-tests - namespace: - valueFrom: default_ns.name - mountPath: /ssl-config scopes: - test - dev diff --git a/ci/bootstrap.py b/ci/bootstrap.py index bbc95783559..fc97999cd00 100644 --- a/ci/bootstrap.py +++ b/ci/bootstrap.py @@ -9,11 +9,11 @@ import kubernetes_asyncio.client import kubernetes_asyncio.config -from batch.driver.k8s_cache import K8sCache from ci.build import BuildConfiguration, Code from ci.environment import KUBERNETES_SERVER_URL, STORAGE_URI from ci.github import clone_or_fetch_script from ci.utils import generate_token +from gear import K8sCache from hailtop.utils import check_shell_output BATCH_WORKER_IMAGE = os.environ['BATCH_WORKER_IMAGE'] diff --git a/ci/ci/constants.py b/ci/ci/constants.py index cc0a32a33f7..aca0df0d8fd 100644 --- a/ci/ci/constants.py +++ b/ci/ci/constants.py @@ -25,15 +25,12 @@ def __init__(self, gh_username: str, hail_username: Optional[str] = None, teams: User('jigold', 'jigold', [SERVICES_TEAM]), User('jkgoodrich', 'jgoodric'), User('konradjk', 'konradk'), - User('lfrancioli'), - User('lgruen'), User('nawatts'), User('patrick-schultz', 'pschultz', [COMPILER_TEAM]), User('pwc2', 'pcumming'), - User('tpoterba', 'tpoterba', [COMPILER_TEAM]), - User('lgruen', 'lgruensc', []), + User('tpoterba', 'tpoterba', []), User('vladsaveliev', 'vsavelye', []), User('illusional', 'mfrankli', []), User('iris-garden', 'irademac'), - User('ehigham', 'ehigham', []), + User('ehigham', 'ehigham', [COMPILER_TEAM]), ] diff --git a/ci/create_initial_account.py b/ci/create_initial_account.py deleted file mode 100644 index 46d7950dc25..00000000000 --- a/ci/create_initial_account.py +++ /dev/null @@ -1,99 +0,0 @@ -import argparse -import base64 -import json -import os - -import kubernetes_asyncio.client -import kubernetes_asyncio.config - -from gear import Database, transaction -from hailtop.utils import async_to_blocking - -NAMESPACE = os.environ['NAMESPACE'] - - -async def copy_identity_from_default(hail_credentials_secret_name: str) -> str: - cloud = os.environ['CLOUD'] - await kubernetes_asyncio.config.load_kube_config() - k8s_client = kubernetes_asyncio.client.CoreV1Api() - - secret = await k8s_client.read_namespaced_secret(hail_credentials_secret_name, 'default') - - try: - await k8s_client.delete_namespaced_secret(hail_credentials_secret_name, NAMESPACE) - except kubernetes_asyncio.client.rest.ApiException as e: - if e.status == 404: - pass - else: - raise - - await k8s_client.create_namespaced_secret( - NAMESPACE, - kubernetes_asyncio.client.V1Secret( - metadata=kubernetes_asyncio.client.V1ObjectMeta(name=hail_credentials_secret_name), - data=secret.data, - ), - ) - - credentials_json = base64.b64decode(secret.data['key.json']).decode() - credentials = json.loads(credentials_json) - - if cloud == 'gcp': - return credentials['client_email'] - assert cloud == 'azure' - return credentials['appObjectId'] - - -async def insert_user_if_not_exists(db, username, login_id, is_developer, is_service_account): - @transaction(db) - async def insert(tx): - row = await tx.execute_and_fetchone('SELECT id, state FROM users where username = %s;', (username,)) - if row: - if row['state'] == 'active': - return None - return row['id'] - - if NAMESPACE == 'default': - hail_credentials_secret_name = None - hail_identity = None - namespace_name = None - else: - hail_credentials_secret_name = f'{username}-gsa-key' - hail_identity = await copy_identity_from_default(hail_credentials_secret_name) - namespace_name = NAMESPACE - - return await tx.execute_insertone( - ''' - INSERT INTO users (state, username, login_id, is_developer, is_service_account, hail_identity, hail_credentials_secret_name, namespace_name) - VALUES (%s, %s, %s, %s, %s, %s, %s, %s); - ''', - ( - 'creating', - username, - login_id, - is_developer, - is_service_account, - hail_identity, - hail_credentials_secret_name, - namespace_name, - ), - ) - - return await insert() # pylint: disable=no-value-for-parameter - - -async def main(): - parser = argparse.ArgumentParser(description='Create an initial dev user.') - - parser.add_argument('username', help='The username of the initial user.') - parser.add_argument('login_id', metavar='login-id', help='The login id of the initial user.') - - args = parser.parse_args() - - db = Database() - await db.async_init(maxsize=50) - - await insert_user_if_not_exists(db, args.username, args.login_id, True, False) - - -async_to_blocking(main()) diff --git a/ci/pinned-requirements.txt b/ci/pinned-requirements.txt index 1f047a6c1e6..d0bd1e30a48 100644 --- a/ci/pinned-requirements.txt +++ b/ci/pinned-requirements.txt @@ -25,6 +25,7 @@ charset-normalizer==3.1.0 click==8.1.3 # via # -c hail/ci/../hail/python/dev/pinned-requirements.txt + # -c hail/ci/../hail/python/pinned-requirements.txt # zulip cryptography==41.0.1 # via diff --git a/gear/gear/__init__.py b/gear/gear/__init__.py index 554560e31eb..a323f65fc7a 100644 --- a/gear/gear/__init__.py +++ b/gear/gear/__init__.py @@ -3,6 +3,7 @@ from .csrf import check_csrf_token, new_csrf_token from .database import Database, Transaction, create_database_pool, resolve_test_db_endpoint, transaction from .http_server_utils import json_request, json_response +from .k8s_cache import K8sCache from .metrics import monitor_endpoints_middleware from .session import setup_aiohttp_session @@ -22,4 +23,5 @@ 'json_request', 'json_response', 'resolve_test_db_endpoint', + 'K8sCache', ] diff --git a/batch/batch/driver/k8s_cache.py b/gear/gear/k8s_cache.py similarity index 95% rename from batch/batch/driver/k8s_cache.py rename to gear/gear/k8s_cache.py index cdd14528fd6..c53f31e56a2 100644 --- a/batch/batch/driver/k8s_cache.py +++ b/gear/gear/k8s_cache.py @@ -1,9 +1,10 @@ import os from typing import Tuple -from gear.time_limited_max_size_cache import TimeLimitedMaxSizeCache from hailtop.utils import retry_transient_errors +from .time_limited_max_size_cache import TimeLimitedMaxSizeCache + FIVE_SECONDS_NS = 5 * 1000 * 1000 * 1000 diff --git a/gear/pinned-requirements.txt b/gear/pinned-requirements.txt index f2ac99f56fa..0aa15ca2af8 100644 --- a/gear/pinned-requirements.txt +++ b/gear/pinned-requirements.txt @@ -76,7 +76,7 @@ google-auth-httplib2==0.1.0 # google-cloud-profiler google-cloud-profiler==3.1.0 # via -r hail/gear/requirements.txt -googleapis-common-protos==1.59.0 +googleapis-common-protos==1.59.1 # via # -c hail/gear/../hail/python/hailtop/pinned-requirements.txt # -c hail/gear/../hail/python/pinned-requirements.txt @@ -146,6 +146,7 @@ python-dateutil==2.8.2 pyyaml==6.0 # via # -c hail/gear/../hail/python/dev/pinned-requirements.txt + # -c hail/gear/../hail/python/hailtop/pinned-requirements.txt # -c hail/gear/../hail/python/pinned-requirements.txt # kubernetes-asyncio requests==2.31.0 diff --git a/grafana/Makefile b/grafana/Makefile index 403c77b0686..d87e4a3b89e 100644 --- a/grafana/Makefile +++ b/grafana/Makefile @@ -5,9 +5,9 @@ include ../config.mk CLOUD := $(shell kubectl get secret global-config --template={{.data.cloud}} | base64 --decode) build: - $(MAKE) -C ../docker base + $(MAKE) -C .. hail-ubuntu-image deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default - python3 ../ci/jinja2_render.py '{"deploy":$(DEPLOY),"global": {"cloud": "$(CLOUD)", "domain": "$(DOMAIN)", "docker_prefix":"$(DOCKER_PREFIX)"},"default_ns":{"name":"$(NAMESPACE)"},"base_image":{"image":"'$$(cat ../docker/base-image-ref)'"}}' deployment.yaml deployment.yaml.out + python3 ../ci/jinja2_render.py '{"deploy":$(DEPLOY),"global": {"cloud": "$(CLOUD)", "domain": "$(DOMAIN)", "docker_prefix":"$(DOCKER_PREFIX)"},"default_ns":{"name":"$(NAMESPACE)"},"hail_ubuntu_image":{"image":"'$$(cat ../hail-ubuntu-image)'"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out diff --git a/grafana/deployment.yaml b/grafana/deployment.yaml index 0c6e4fe7c5b..e6b3f65c5b7 100644 --- a/grafana/deployment.yaml +++ b/grafana/deployment.yaml @@ -155,7 +155,7 @@ spec: name: grafana-envoy-sidecar-config initContainers: - name: setup-tokens - image: {{ base_image.image }} + image: {{ hail_ubuntu_image.image }} command: ["/bin/bash"] args: ["-c", "jq -r '.{{ default_ns.name }}' /grafana-tokens/tokens.json > /grafana-shared/token"] volumeMounts: diff --git a/hail/Makefile b/hail/Makefile index 161119cd5ce..2501c66a733 100644 --- a/hail/Makefile +++ b/hail/Makefile @@ -121,6 +121,13 @@ endif services-jvm-test: $(SCALA_BUILD_INFO) $(JAR_SOURCES) $(JAR_TEST_SOURCES) +./pgradle testServices $(GRADLE_ARGS) $(GRADLE_TEST_ARGS) +.PHONY: services-jvm-test +ifdef HAIL_COMPILE_NATIVES +fs-jvm-test: native-lib-prebuilt +endif +fs-jvm-test: $(SCALA_BUILD_INFO) $(JAR_SOURCES) $(JAR_TEST_SOURCES) + +./pgradle testFS $(GRADLE_ARGS) $(GRADLE_TEST_ARGS) + # javac args from compileJava in build.gradle $(BUILD_DEBUG_PREFIX)/%.class: src/debug/scala/%.java @mkdir -p $(BUILD_DEBUG_PREFIX) diff --git a/hail/build.gradle b/hail/build.gradle index 864d9605b79..726ce8c3111 100644 --- a/hail/build.gradle +++ b/hail/build.gradle @@ -208,7 +208,7 @@ dependencies { bundled 'com.kohlschutter.junixsocket:junixsocket-core:2.6.1' - bundled 'com.github.luben:zstd-jni:1.5.2-1' + bundled 'com.github.luben:zstd-jni:1.5.5-2' bundled project(path: ':shadedazure', configuration: 'shadow') } diff --git a/hail/python/dev/pinned-requirements.txt b/hail/python/dev/pinned-requirements.txt index 7abd1501b21..7c61adb7c43 100644 --- a/hail/python/dev/pinned-requirements.txt +++ b/hail/python/dev/pinned-requirements.txt @@ -50,6 +50,7 @@ charset-normalizer==3.1.0 # requests click==8.1.3 # via + # -c hail/hail/python/dev/../pinned-requirements.txt # -r hail/hail/python/dev/requirements.txt # black # curlylint @@ -115,7 +116,7 @@ importlib-resources==5.12.0 # matplotlib iniconfig==2.0.0 # via pytest -ipykernel==6.23.1 +ipykernel==6.23.2 # via # ipywidgets # jupyter diff --git a/hail/python/hail/backend/service_backend.py b/hail/python/hail/backend/service_backend.py index df15fa8a938..4c7d9aa8c8c 100644 --- a/hail/python/hail/backend/service_backend.py +++ b/hail/python/hail/backend/service_backend.py @@ -1,5 +1,6 @@ from typing import Dict, Optional, Callable, Awaitable, Mapping, Any, List, Union, Tuple, TypeVar, Set import abc +import asyncio import math import struct from hail.expr.expressions.base_expression import Expression @@ -468,6 +469,7 @@ async def _rpc(self, with timings.step("wait driver"): try: + await asyncio.sleep(0.6) # it is not possible for the batch to be finished in less than 600ms await self._batch.wait( description=name, disable_progress_bar=self.disable_progress_bar, diff --git a/hail/python/hail/expr/aggregators/__init__.py b/hail/python/hail/expr/aggregators/__init__.py index 59a40eda11e..39f9a572199 100644 --- a/hail/python/hail/expr/aggregators/__init__.py +++ b/hail/python/hail/expr/aggregators/__init__.py @@ -1,7 +1,8 @@ from .aggregators import approx_cdf, approx_quantiles, approx_median, collect, collect_as_set, count, count_where, \ counter, any, all, take, _densify, min, max, sum, array_sum, ndarray_sum, mean, stats, product, fraction, \ hardy_weinberg_test, explode, filter, inbreeding, call_stats, info_score, \ - hist, linreg, corr, group_by, downsample, array_agg, _prev_nonnull, _impute_type, fold, _reservoir_sample + hist, linreg, corr, group_by, downsample, array_agg, _prev_nonnull, _impute_type, fold, _reservoir_sample, \ + _aggregate_local_array __all__ = [ 'approx_cdf', @@ -40,5 +41,6 @@ '_prev_nonnull', '_impute_type', 'fold', - '_reservoir_sample' + '_reservoir_sample', + '_aggregate_local_array' ] diff --git a/hail/python/hail/expr/aggregators/aggregators.py b/hail/python/hail/expr/aggregators/aggregators.py index 9c925f69a78..1366c6e23c6 100644 --- a/hail/python/hail/expr/aggregators/aggregators.py +++ b/hail/python/hail/expr/aggregators/aggregators.py @@ -238,6 +238,40 @@ def context(self): return 'agg' +def _aggregate_local_array(array, f): + """Compute a summary of an array using aggregators. Useful for accessing + functionality that exists in `hl.agg` but not elsewhere, like `hl.agg.call_stats`. + + Parameters + ---------- + array + f + + Returns + ------- + Aggregation result. + """ + elt = array.dtype.element_type + + var = Env.get_uid(base='agg') + ref = construct_expr(ir.Ref(var, elt), elt, array._indices) + aggregated = f(ref) + + if not aggregated._aggregations: + raise ExpressionException("'hl.aggregate_local_array' " + "must take a mapping that contains aggregation expression.") + + indices, _ = unify_all(array, aggregated) + if isinstance(array.dtype, tarray): + stream = ir.toStream(array._ir) + else: + stream = array._ir + return construct_expr(ir.StreamAgg(stream, var, aggregated._ir), + aggregated.dtype, + Indices(indices.source, indices.axes), + array._aggregations) + + _agg_func = AggFunc() diff --git a/hail/python/hail/expr/expressions/typed_expressions.py b/hail/python/hail/expr/expressions/typed_expressions.py index ee45e1debd2..bf78d9b6e3e 100644 --- a/hail/python/hail/expr/expressions/typed_expressions.py +++ b/hail/python/hail/expr/expressions/typed_expressions.py @@ -463,6 +463,25 @@ def _slice(self, start=None, stop=None, step=None): return construct_expr(slice_ir, self.dtype, indices, aggregations) + @typecheck_method(f=func_spec(1, expr_any)) + def aggregate(self, f): + """Uses the aggregator library to compute a summary from an array. + + This method is useful for accessing functionality that exists in the aggregator library + but not the basic expression library, for instance, :func:`.call_stats`. + + Parameters + ---------- + f + Aggregation function + + Returns + ------- + :class:`.Expression` + """ + return hl.agg._aggregate_local_array(self, f) + + @typecheck_method(item=expr_any) def contains(self, item): """Returns a boolean indicating whether `item` is found in the array. @@ -4618,6 +4637,10 @@ def scan(self, f, zero): indices, aggregations = unify_all(self, zero, body) return construct_expr(x, tstream(body.dtype), indices, aggregations) + @typecheck_method(f=func_spec(1, expr_any)) + def aggregate(self, f): + return hl.agg._aggregate_local_array(self, f) + def to_array(self): return construct_expr(ir.toArray(self._ir), tarray(self.dtype.element_type), self._indices, self._aggregations) diff --git a/hail/python/hail/ir/__init__.py b/hail/python/hail/ir/__init__.py index 25b07ff316e..7f593785f3f 100644 --- a/hail/python/hail/ir/__init__.py +++ b/hail/python/hail/ir/__init__.py @@ -8,7 +8,7 @@ Void, Cast, NA, IsNA, If, Coalesce, Let, AggLet, Ref, TopLevelReference, ProjectedTopLevelReference, SelectedTopLevelReference, \ TailLoop, Recur, ApplyBinaryPrimOp, ApplyUnaryPrimOp, ApplyComparisonOp, \ MakeArray, ArrayRef, ArraySlice, ArrayLen, ArrayZeros, StreamIota, StreamRange, StreamGrouped, MakeNDArray, \ - NDArrayShape, NDArrayReshape, NDArrayMap, NDArrayMap2, NDArrayRef, NDArraySlice, NDArraySVD, \ + NDArrayShape, NDArrayReshape, NDArrayMap, NDArrayMap2, NDArrayRef, NDArraySlice, NDArraySVD, NDArrayEigh, \ NDArrayReindex, NDArrayAgg, NDArrayMatMul, NDArrayQR, NDArrayInv, NDArrayConcat, NDArrayWrite, \ ArraySort, ArrayMaximalIndependentSet, ToSet, ToDict, toArray, ToArray, CastToArray, \ ToStream, toStream, LowerBoundOnOrderedCollection, GroupByKey, StreamMap, StreamZip, StreamTake, \ @@ -18,7 +18,7 @@ GetTupleElement, Die, ConsoleLog, Apply, ApplySeeded, RNGStateLiteral, RNGSplit,\ TableCount, TableGetGlobals, TableCollect, TableAggregate, MatrixCount, \ MatrixAggregate, TableWrite, udf, subst, clear_session_functions, ReadPartition, \ - PartitionNativeIntervalReader, StreamMultiMerge, StreamZipJoin + PartitionNativeIntervalReader, StreamMultiMerge, StreamZipJoin, StreamAgg from .register_functions import register_functions from .register_aggregators import register_aggregators from .table_ir import (MatrixRowsTable, TableJoin, TableLeftJoinRightDistinct, TableIntervalJoin, @@ -158,6 +158,7 @@ 'NDArrayAgg', 'NDArrayMatMul', 'NDArrayQR', + 'NDArrayEigh', 'NDArraySVD', 'NDArrayInv', 'NDArrayConcat', @@ -190,6 +191,7 @@ 'AggExplode', 'AggGroupBy', 'AggArrayPerElement', + 'StreamAgg', 'BaseApplyAggOp', 'ApplyAggOp', 'ApplyScanOp', diff --git a/hail/python/hail/ir/ir.py b/hail/python/hail/ir/ir.py index dc5c88fd4fc..347331e1458 100644 --- a/hail/python/hail/ir/ir.py +++ b/hail/python/hail/ir/ir.py @@ -1136,6 +1136,31 @@ def _compute_type(self, env, agg_env, deep_typecheck): return tndarray(tfloat64, 1) +class NDArrayEigh(IR): + @typecheck_method(nd=IR, eigvals_only=bool, error_id=nullable(int), stack_trace=nullable(str)) + def __init__(self, nd, eigvals_only=False, error_id=None, stack_trace=None): + super().__init__(nd) + self.nd = nd + self.eigvals_only = eigvals_only + self._error_id = error_id + self._stack_trace = stack_trace + if error_id is None or stack_trace is None: + self.save_error_info() + + def copy(self): + return NDArrayEigh(self.nd, self.eigvals_only, self._error_id, self._stack_trace) + + def head_str(self): + return f'{self._error_id} {self.eigvals_only}' + + def _compute_type(self, env, agg_env, deep_typecheck): + self.nd.compute_type(env, agg_env, deep_typecheck) + if self.eigvals_only: + return tndarray(tfloat64, 1) + else: + return ttuple(tndarray(tfloat64, 1), tndarray(tfloat64, 2)) + + class NDArrayInv(IR): @typecheck_method(nd=IR, error_id=nullable(int), stack_trace=nullable(str)) def __init__(self, nd, error_id=None, stack_trace=None): @@ -2074,6 +2099,78 @@ def renderable_bindings(self, i, default_value=None): return {} +class StreamAgg(IR): + @typecheck_method(a=IR, value_name=str, body=IR) + def __init__(self, a, value_name, body): + a = a.handle_randomness(body.uses_agg_randomness) + if body.uses_agg_randomness: + tup, uid, elt = unpack_uid(a.typ) + body = AggLet(value_name, elt, body, is_scan=False) + body = with_split_rng_state(body, uid, is_scan=False) + value_name = tup + + super().__init__(a, body) + self.a = a + self.value_name = value_name + self.body = body + + @typecheck_method(a=IR, body=IR) + def copy(self, a, body): + return StreamAgg(a, self.value_name, body) + + def head_str(self): + return escape_id(self.value_name) + + def _eq(self, other): + return self.value_name == other.value_name + + @property + def bound_variables(self): + return {self.value_name} | super().bound_variables + + def _compute_type(self, env, agg_env, deep_typecheck): + self.a.compute_type(env, agg_env, deep_typecheck) + self.body.compute_type(env, _env_bind(env, self.bindings(1)), deep_typecheck) + return self.body.typ + + @property + def free_agg_vars(self): + return set() + + @property + def free_vars(self): + fv = (self.body.free_agg_vars.difference({self.value_name})).union(self.a.free_vars) + return fv + + def renderable_child_context_without_bindings(self, i: int, parent_context): + if i == 0: + return parent_context + (eval_c, agg_c, scan_c) = parent_context + return (eval_c, eval_c, None) + + def renderable_agg_bindings(self, i, default_value=None): + if i == 1: + if default_value is None: + value = self.a.typ.element_type + else: + value = default_value + return {self.value_name: value} + else: + return {} + + def renderable_bindings(self, i, default_value=None): + if i == 1: + return {BaseIR.agg_capability: default_value} + else: + return {} + + def renderable_uses_agg_context(self, i: int): + return i == 0 + + def renderable_new_block(self, i: int) -> bool: + return i == 1 + + class AggFilter(IR): @typecheck_method(cond=IR, agg_ir=IR, is_scan=bool) def __init__(self, cond, agg_ir, is_scan): diff --git a/hail/python/hail/matrixtable.py b/hail/python/hail/matrixtable.py index fd17c2a9b36..6baceae5d33 100644 --- a/hail/python/hail/matrixtable.py +++ b/hail/python/hail/matrixtable.py @@ -2662,22 +2662,6 @@ def checkpoint(self, output: str, overwrite: bool = False, stage_locally: bool = -------- >>> dataset = dataset.checkpoint('output/dataset_checkpoint.mt') """ - if _codec_spec is None: - _codec_spec = """{ - "name": "LEB128BufferSpec", - "child": { - "name": "BlockingBufferSpec", - "blockSize": 32768, - "child": { - "name": "LZ4FastBlockBufferSpec", - "blockSize": 32768, - "child": { - "name": "StreamBlockBufferSpec" - } - } - } -}""" - hl.current_backend().validate_file_scheme(output) if not _read_if_exists or not hl.hadoop_exists(f'{output}/_SUCCESS'): diff --git a/hail/python/hail/nd/__init__.py b/hail/python/hail/nd/__init__.py index eea4d022ce9..93cbcdb1ff3 100644 --- a/hail/python/hail/nd/__init__.py +++ b/hail/python/hail/nd/__init__.py @@ -1,9 +1,9 @@ -from .nd import array, from_column_major, arange, full, zeros, ones, svd, qr, solve, solve_triangular, diagonal, inv, concatenate, \ +from .nd import array, from_column_major, arange, full, zeros, ones, svd, eigh, qr, solve, solve_triangular, diagonal, inv, concatenate, \ eye, identity, vstack, hstack, maximum, minimum newaxis = None __all__ = [ - 'array', 'from_column_major', 'arange', 'full', 'zeros', 'ones', 'qr', 'solve', 'solve_triangular', 'svd', 'diagonal', 'inv', + 'array', 'from_column_major', 'arange', 'full', 'zeros', 'ones', 'qr', 'solve', 'solve_triangular', 'svd', 'eigh', 'diagonal', 'inv', 'concatenate', 'eye', 'identity', 'vstack', 'hstack', 'newaxis', 'maximum', 'minimum' ] diff --git a/hail/python/hail/nd/nd.py b/hail/python/hail/nd/nd.py index dd021837760..46b15192378 100644 --- a/hail/python/hail/nd/nd.py +++ b/hail/python/hail/nd/nd.py @@ -10,7 +10,7 @@ expr_numeric, Int64Expression, cast_expr, construct_expr, expr_bool, unify_all) from hail.expr.expressions.typed_expressions import NDArrayNumericExpression -from hail.ir import NDArrayQR, NDArrayInv, NDArrayConcat, NDArraySVD, Apply +from hail.ir import NDArrayQR, NDArrayInv, NDArrayConcat, NDArraySVD, NDArrayEigh, Apply tsequenceof_nd = oneof(sequenceof(expr_ndarray()), expr_array(expr_ndarray())) @@ -426,6 +426,31 @@ def svd(nd, full_matrices=True, compute_uv=True): return construct_expr(ir, return_type, nd._indices, nd._aggregations) +@typecheck(nd=expr_ndarray(), eigvals_only=bool) +def eigh(nd, eigvals_only=False): + """Performs an eigenvalue decomposition of a symmetric matrix. + + Parameters + ---------- + nd : :class:`.NDArrayNumericExpression` + A 2 dimensional ndarray, shape(N, N). + eigvals_only: :class:`.bool` + If False (default), compute the eigenvectors and eigenvalues. Otherwise, only compute eigenvalues. + + Returns + ------- + - w: :class:`.NDArrayNumericExpression` + The eigenvalues, shape(N). + - v: :class:`.NDArrayNumericExpression` + The eigenvectors, shape(N, N). Only returned if eigvals_only is false. + """ + float_nd = nd.map(lambda x: hl.float64(x)) + ir = NDArrayEigh(float_nd._ir, eigvals_only) + + return_type = tndarray(tfloat64, 1) if eigvals_only else ttuple(tndarray(tfloat64, 1), tndarray(tfloat64, 2)) + return construct_expr(ir, return_type, nd._indices, nd._aggregations) + + @typecheck(nd=expr_ndarray()) def inv(nd): """Performs a matrix inversion. diff --git a/hail/python/hail/table.py b/hail/python/hail/table.py index 50a47215dcb..1d3bd75a004 100644 --- a/hail/python/hail/table.py +++ b/hail/python/hail/table.py @@ -1327,22 +1327,6 @@ def checkpoint(self, output: str, overwrite: bool = False, stage_locally: bool = """ hl.current_backend().validate_file_scheme(output) - if _codec_spec is None: - _codec_spec = """{ - "name": "LEB128BufferSpec", - "child": { - "name": "BlockingBufferSpec", - "blockSize": 32768, - "child": { - "name": "LZ4FastBlockBufferSpec", - "blockSize": 32768, - "child": { - "name": "StreamBlockBufferSpec" - } - } - } -}""" - if not _read_if_exists or not hl.hadoop_exists(f'{output}/_SUCCESS'): self.write(output=output, overwrite=overwrite, stage_locally=stage_locally, _codec_spec=_codec_spec) _assert_type = self._type diff --git a/hail/python/hail/vds/combiner/variant_dataset_combiner.py b/hail/python/hail/vds/combiner/variant_dataset_combiner.py index 02a1bcb4d4d..f47e465bfb3 100644 --- a/hail/python/hail/vds/combiner/variant_dataset_combiner.py +++ b/hail/python/hail/vds/combiner/variant_dataset_combiner.py @@ -43,10 +43,10 @@ class CombinerOutType(NamedTuple): "name": "LEB128BufferSpec", "child": { "name": "BlockingBufferSpec", - "blockSize": 32768, + "blockSize": 65536, "child": { - "name": "LZ4FastBlockBufferSpec", - "blockSize": 32768, + "name": "ZstdBlockBufferSpec", + "blockSize": 65536, "child": { "name": "StreamBlockBufferSpec" } diff --git a/hail/python/hail/vds/methods.py b/hail/python/hail/vds/methods.py index f779289193a..be8bed205c6 100644 --- a/hail/python/hail/vds/methods.py +++ b/hail/python/hail/vds/methods.py @@ -39,7 +39,14 @@ def to_dense_mt(vds: 'VariantDataset') -> 'MatrixTable': Dataset in dense MatrixTable representation. """ ref = vds.reference_data - ref = ref.drop(*(x for x in ('alleles', 'rsid', 'ref_allele') if x in ref.row)) + # FIXME(chrisvittal) consider changing END semantics on VDS to make this better + # see https://github.com/hail-is/hail/issues/13183 for why this is here and more discussion + # we assume that END <= contig.length + ref = ref.annotate_rows(_locus_global_pos=ref.locus.global_position(), _locus_pos=ref.locus.position) + ref = ref.transmute_entries(_END_GLOBAL=ref._locus_global_pos + (ref.END - ref._locus_pos)) + + to_drop = 'alleles', 'rsid', 'ref_allele', '_locus_global_pos', '_locus_pos' + ref = ref.drop(*(x for x in to_drop if x in ref.row)) var = vds.variant_data refl = ref.localize_entries('_ref_entries') varl = var.localize_entries('_var_entries', '_var_cols') @@ -70,9 +77,9 @@ def coalesce_join(ref, var): dr = dr.annotate( _dense=hl.rbind(dr._ref_entries, lambda refs_at_this_row: hl.zip_with_index(hl.zip(dr._var_entries, dr.dense_ref)).map( - lambda tuple: coalesce_join(hl.coalesce(refs_at_this_row[tuple[0]], - hl.or_missing(tuple[1][1].END >= dr.locus.position, - tuple[1][1])), tuple[1][0]) + lambda tup: coalesce_join(hl.coalesce(refs_at_this_row[tup[0]], + hl.or_missing(tup[1][1]._END_GLOBAL >= dr.locus.global_position(), + tup[1][1])), tup[1][0]) )), ) diff --git a/hail/python/hailtop/aiocloud/aioazure/fs.py b/hail/python/hailtop/aiocloud/aioazure/fs.py index 4a378ff10a8..5361f3e980e 100644 --- a/hail/python/hailtop/aiocloud/aioazure/fs.py +++ b/hail/python/hailtop/aiocloud/aioazure/fs.py @@ -395,7 +395,8 @@ async def generate_sas_token( expiry=datetime.utcnow() + valid_interval) return token - def parse_url(self, url: str) -> AzureAsyncFSURL: + @staticmethod + def parse_url(url: str) -> AzureAsyncFSURL: colon_index = url.find(':') if colon_index == -1: raise ValueError(f'invalid URL: {url}') diff --git a/hail/python/hailtop/auth/__init__.py b/hail/python/hailtop/auth/__init__.py index d1348e32686..84d07757c10 100644 --- a/hail/python/hailtop/auth/__init__.py +++ b/hail/python/hailtop/auth/__init__.py @@ -4,7 +4,7 @@ from .auth import ( get_userinfo, hail_credentials, copy_paste_login, async_copy_paste_login, - async_create_user, async_delete_user, async_get_user) + async_create_user, async_delete_user, async_get_user, async_logout) __all__ = [ 'get_tokens', @@ -14,6 +14,7 @@ 'get_userinfo', 'hail_credentials', 'async_copy_paste_login', + 'async_logout', 'copy_paste_login', 'sql_config', 'session_id_encode_to_str', diff --git a/hail/python/hailtop/auth/auth.py b/hail/python/hailtop/auth/auth.py index 52b617f5bec..6da77af4504 100644 --- a/hail/python/hailtop/auth/auth.py +++ b/hail/python/hailtop/auth/auth.py @@ -117,6 +117,25 @@ async def async_copy_paste_login(copy_paste_token: str, namespace: Optional[str] return namespace, username +async def async_logout(): + deploy_config = get_deploy_config() + + auth_ns = deploy_config.service_ns('auth') + tokens = get_tokens() + if auth_ns not in tokens: + print('Not logged in.') + return + + headers = await hail_credentials().auth_headers() + async with httpx.client_session(headers=headers) as session: + async with session.post(deploy_config.url('auth', '/api/v1alpha/logout')): + pass + auth_ns = deploy_config.service_ns('auth') + + del tokens[auth_ns] + tokens.write() + + def get_user(username: str, namespace: Optional[str] = None) -> dict: return async_to_blocking(async_get_user(username, namespace)) @@ -133,17 +152,24 @@ async def async_get_user(username: str, namespace: Optional[str] = None) -> dict ) -def create_user(username: str, login_id: str, is_developer: bool, is_service_account: bool, namespace: Optional[str] = None): - return async_to_blocking(async_create_user(username, login_id, is_developer, is_service_account, namespace=namespace)) - - -async def async_create_user(username: str, login_id: str, is_developer: bool, is_service_account: bool, namespace: Optional[str] = None): +async def async_create_user( + username: str, + login_id: str, + is_developer: bool, + is_service_account: bool, + hail_identity: Optional[str], + hail_credentials_secret_name: Optional[str], + *, + namespace: Optional[str] = None +): deploy_config, headers, _ = deploy_config_and_headers_from_namespace(namespace) body = { 'login_id': login_id, 'is_developer': is_developer, 'is_service_account': is_service_account, + 'hail_identity': hail_identity, + 'hail_credentials_secret_name': hail_credentials_secret_name, } async with httpx.client_session( diff --git a/hail/python/hailtop/batch/backend.py b/hail/python/hailtop/batch/backend.py index 785409af3d9..5a92d2e3403 100644 --- a/hail/python/hailtop/batch/backend.py +++ b/hail/python/hailtop/batch/backend.py @@ -1,6 +1,7 @@ from typing import Optional, Dict, Any, TypeVar, Generic, List, Union import sys import abc +import asyncio import collections import orjson import os @@ -812,6 +813,7 @@ async def compile_job(job): if verbose: print(f'Waiting for batch {batch_handle.id}...') starting_job_id = min(j._client_job.job_id for j in unsubmitted_jobs) + await asyncio.sleep(0.6) # it is not possible for the batch to be finished in less than 600ms status = await batch_handle._async_batch.wait(disable_progress_bar=disable_progress_bar, starting_job=starting_job_id) print(f'batch {batch_handle.id} complete: {status["state"]}') diff --git a/hail/python/hailtop/batch_client/aioclient.py b/hail/python/hailtop/batch_client/aioclient.py index 45d209a8fa1..d78c23da538 100644 --- a/hail/python/hailtop/batch_client/aioclient.py +++ b/hail/python/hailtop/batch_client/aioclient.py @@ -369,7 +369,8 @@ def __init__(self, async def cancel(self): await self._client._patch(f'/api/v1alpha/batches/{self.id}/cancel') - async def jobs(self, q=None): + async def jobs(self, q: Optional[str] = None, version: Optional[int] = None): + version = version or 1 last_job_id = None while True: params = {} @@ -377,7 +378,7 @@ async def jobs(self, q=None): params['q'] = q if last_job_id is not None: params['last_job_id'] = last_job_id - resp = await self._client._get(f'/api/v1alpha/batches/{self.id}/jobs', params=params) + resp = await self._client._get(f'/api/v{version}alpha/batches/{self.id}/jobs', params=params) body = await resp.json() for job in body['jobs']: yield job diff --git a/hail/python/hailtop/batch_client/client.py b/hail/python/hailtop/batch_client/client.py index e8e6249f69b..f03ab381952 100644 --- a/hail/python/hailtop/batch_client/client.py +++ b/hail/python/hailtop/batch_client/client.py @@ -181,8 +181,8 @@ def status(self): def last_known_status(self): return async_to_blocking(self._async_batch.last_known_status()) - def jobs(self, q=None): - return agen_to_blocking(self._async_batch.jobs(q=q)) + def jobs(self, q=None, version=None): + return agen_to_blocking(self._async_batch.jobs(q=q, version=version)) def get_job(self, job_id: int) -> Job: j = async_to_blocking(self._async_batch.get_job(job_id)) diff --git a/hail/python/hailtop/config/deploy_config.py b/hail/python/hailtop/config/deploy_config.py index 0c4202e9ece..f8da8534918 100644 --- a/hail/python/hailtop/config/deploy_config.py +++ b/hail/python/hailtop/config/deploy_config.py @@ -9,10 +9,18 @@ log = logging.getLogger('deploy_config') +def env_var_or_default(name: str, default: str) -> str: + return os.environ.get(f'HAIL_{name}') or default + + class DeployConfig: @staticmethod def from_config(config) -> 'DeployConfig': - return DeployConfig(config['location'], config['default_namespace'], config.get('domain') or 'hail.is') + return DeployConfig( + env_var_or_default('LOCATION', config['location']), + env_var_or_default('DEFAULT_NAMESPACE', config['default_namespace']), + env_var_or_default('DOMAIN', config.get('domain') or 'hail.is') + ) def get_config(self) -> Dict[str, str]: return { @@ -51,6 +59,9 @@ def __init__(self, location, default_namespace, domain): def with_default_namespace(self, default_namespace): return DeployConfig(self._location, default_namespace, self._domain) + def with_location(self, location): + return DeployConfig(location, self._default_namespace, self._domain) + def default_namespace(self): return self._default_namespace diff --git a/hail/python/hailtop/hailctl/__main__.py b/hail/python/hailtop/hailctl/__main__.py index a5faf994093..09538e23ff9 100644 --- a/hail/python/hailtop/hailctl/__main__.py +++ b/hail/python/hailtop/hailctl/__main__.py @@ -1,86 +1,55 @@ -import sys +import typer +import os -import argparse +from .auth import cli as auth_cli +from .batch import cli as batch_cli +from .config import cli as config_cli +from .describe import describe +from .dataproc import cli as dataproc_cli +from .dev import cli as dev_cli +from .hdinsight import cli as hdinsight_cli -from hailtop import version +app = typer.Typer(help='Manage and monitor hail deployments.', no_args_is_help=True) -def print_help(): - main_parser = argparse.ArgumentParser(prog='hailctl', - description='Manage and monitor Hail deployments.') - subs = main_parser.add_subparsers() +for cli in ( + auth_cli.app, + batch_cli.app, + config_cli.app, + dataproc_cli.app, + dev_cli.app, + hdinsight_cli.app, +): + app.add_typer(cli) - subs.add_parser('dataproc', - help='Manage Google Dataproc clusters configured for Hail.', - description='Manage Google Dataproc clusters configured for Hail.') - subs.add_parser('describe', - help='Describe Hail Matrix Table and Table files.', - description='Describe Hail Matrix Table and Table files.') - subs.add_parser('hdinsight', - help='Manage Azure HDInsight clusters configured for Hail.', - description='Manage Azure HDInsight clusters configured for Hail.') - subs.add_parser('auth', - help='Manage Hail credentials.', - description='Manage Hail credentials.') - subs.add_parser('dev', - help='Manage Hail development utilities.', - description='Manage Hail development utilities.') - subs.add_parser('version', - help='Print version information and exit.', - description='Print version information and exit.') - subs.add_parser('batch', - help='Manage batches running on the batch service managed by the Hail team.', - description='Manage batches running on the batch service managed by the Hail team.') - subs.add_parser('curl', - help='Issue authenticated curl requests to Hail infrastructure.', - description='Issue authenticated curl requests to Hail infrastructure.') - subs.add_parser('config', - help='Manage Hail configuration.', - description='Manage Hail configuration.') - main_parser.print_help() +@app.command() +def version(): + '''Print version information and exit.''' + import hailtop # pylint: disable=import-outside-toplevel + print(hailtop.version()) -def main(): - if len(sys.argv) == 1: - print_help() - sys.exit(0) - else: - module = sys.argv[1] - args = sys.argv[2:] - if module == 'version': - print(version()) - elif module == 'dataproc': - from hailtop.hailctl.dataproc import cli as dataproc_cli # pylint: disable=import-outside-toplevel - dataproc_cli.main(args) - elif module == 'describe': - from hailtop.hailctl.describe import main as describe_main # pylint: disable=import-outside-toplevel - describe_main(args) - elif module == 'hdinsight': - from hailtop.hailctl.hdinsight import cli as hdinsight_cli # pylint: disable=import-outside-toplevel - hdinsight_cli.main(args) - elif module == 'auth': - from hailtop.hailctl.auth import cli as auth_cli # pylint: disable=import-outside-toplevel - auth_cli.main(args) - elif module == 'dev': - from hailtop.hailctl.dev import cli as dev_cli # pylint: disable=import-outside-toplevel - dev_cli.main(args) - elif module == 'batch': - from hailtop.hailctl.batch import cli as batch_cli # pylint: disable=import-outside-toplevel - batch_cli.main(args) - elif module == 'curl': - from hailtop.hailctl.curl import main as curl_main # pylint: disable=import-outside-toplevel - curl_main(args) - elif module == 'config': - from hailtop.hailctl.config import cli as config_cli # pylint: disable=import-outside-toplevel - config_cli.main(args) - elif module in ('-h', '--help', 'help'): - print_help() - else: - sys.stderr.write(f"ERROR: no such module: {module!r}") - print_help() - sys.exit(1) +@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True}) +def curl( + namespace: str, + service: str, + path: str, + ctx: typer.Context, +): + '''Issue authenticated curl requests to Hail infrastructure.''' + from hailtop.auth import hail_credentials # pylint: disable=import-outside-toplevel + from hailtop.config import get_deploy_config # pylint: disable=import-outside-toplevel + from hailtop.utils import async_to_blocking # pylint: disable=import-outside-toplevel + + headers_dict = async_to_blocking(hail_credentials(namespace=namespace).auth_headers()) + headers = [x for k, v in headers_dict.items() for x in ['-H', f'{k}: {v}']] + path = get_deploy_config().url(service, path) + os.execvp('curl', ['curl', *headers, *ctx.args, path]) + +app.command(help='Describe Hail Matrix Table and Table files.')(describe) -if __name__ == '__main__': - main() + +def main(): + app() diff --git a/hail/python/hailtop/hailctl/auth/auth_list.py b/hail/python/hailtop/hailctl/auth/auth_list.py deleted file mode 100644 index 38fe1201c48..00000000000 --- a/hail/python/hailtop/hailctl/auth/auth_list.py +++ /dev/null @@ -1,18 +0,0 @@ -from hailtop.config import get_deploy_config -from hailtop.auth import get_tokens - - -def init_parser(parser): # pylint: disable=unused-argument - pass - - -def main(args, pass_through_args): # pylint: disable=unused-argument - deploy_config = get_deploy_config() - auth_ns = deploy_config.service_ns('auth') - tokens = get_tokens() - for ns in tokens: - if ns == auth_ns: - s = '*' - else: - s = ' ' - print(f'{s}{ns}') diff --git a/hail/python/hailtop/hailctl/auth/cli.py b/hail/python/hailtop/hailctl/auth/cli.py index cbc2351cbd8..65584200c52 100644 --- a/hail/python/hailtop/hailctl/auth/cli.py +++ b/hail/python/hailtop/hailctl/auth/cli.py @@ -1,90 +1,118 @@ +import asyncio import sys -import argparse - -from . import login -from . import logout -from . import auth_list -from . import copy_paste_login -from . import user -from . import create_user -from . import delete_user - - -def parser(): - main_parser = argparse.ArgumentParser( - prog='hailctl auth', - description='Manage Hail credentials.') - subparsers = main_parser.add_subparsers() - - login_parser = subparsers.add_parser( - 'login', - help='Obtain Hail credentials.', - description='Obtain Hail credentials.') - copy_paste_login_parser = subparsers.add_parser( - 'copy-paste-login', - help='Obtain Hail credentials with a copy paste token.', - description='Obtain Hail credentials with a copy paste token.') - logout_parser = subparsers.add_parser( - 'logout', - help='Revoke Hail credentials.', - description='Revoke Hail credentials.') - list_parser = subparsers.add_parser( - 'list', - help='List Hail credentials.', - description='List Hail credentials.') - user_parser = subparsers.add_parser( - 'user', - help='Get Hail user information.', - description='Get Hail user information.' - ) - create_user_parser = subparsers.add_parser( - 'create-user', - help='Create a new Hail user.', - description='Create a new Hail user.' - ) - delete_user_parser = subparsers.add_parser( - 'delete-user', - help='Delete a Hail user.', - description='Delete a Hail user.' - ) - - login_parser.set_defaults(module='login') - login.init_parser(login_parser) - - copy_paste_login_parser.set_defaults(module='copy-paste-login') - copy_paste_login.init_parser(copy_paste_login_parser) - - logout_parser.set_defaults(module='logout') - logout.init_parser(logout_parser) - - list_parser.set_defaults(module='list') - auth_list.init_parser(list_parser) - - user_parser.set_defaults(module='user') - user.init_parser(user_parser) - - create_user_parser.set_defaults(module='create-user') - create_user.init_parser(create_user_parser) - - delete_user_parser.set_defaults(module='delete-user') - delete_user.init_parser(delete_user_parser) - - return main_parser - - -def main(args): - if not args: - parser().print_help() - sys.exit(0) - jmp = { - 'login': login, - 'copy-paste-login': copy_paste_login, - 'logout': logout, - 'list': auth_list, - 'user': user, - 'create-user': create_user, - 'delete-user': delete_user, +import typer +from typer import Option as Opt, Argument as Arg +import json + +from typing import Optional +from typing_extensions import Annotated as Ann + + +app = typer.Typer( + name='auth', + no_args_is_help=True, + help='Manage Hail credentials.', +) + + +NamespaceOption = Ann[ + Optional[str], + Opt('--namespace', '-n', help='Namespace for the auth server (default: from deploy configuration).'), +] + + +@app.command() +def login(namespace: NamespaceOption = None): + '''Obtain Hail credentials.''' + from .login import async_login # pylint: disable=import-outside-toplevel + asyncio.run(async_login(namespace)) + + +@app.command() +def copy_paste_login(copy_paste_token: str, namespace: NamespaceOption = None): + '''Obtain Hail credentials with a copy paste token.''' + from hailtop.auth import copy_paste_login # pylint: disable=import-outside-toplevel + + auth_ns, username = copy_paste_login(copy_paste_token, namespace) + if auth_ns == 'default': + print(f'Logged in as {username}.') + else: + print(f'Logged into namespace {auth_ns} as {username}.') + + +@app.command() +def logout(): + '''Revoke Hail credentials.''' + from hailtop.auth import async_logout # pylint: disable=import-outside-toplevel + + asyncio.run(async_logout()) + + +@app.command() +def list(): + '''List Hail credentials.''' + from hailtop.config import get_deploy_config # pylint: disable=import-outside-toplevel + from hailtop.auth import get_tokens # pylint: disable=import-outside-toplevel + + deploy_config = get_deploy_config() + auth_ns = deploy_config.service_ns('auth') + tokens = get_tokens() + for ns in tokens: + if ns == auth_ns: + s = '*' + else: + s = ' ' + print(f'{s}{ns}') + + +@app.command() +def user(): + '''Get Hail user information.''' + from hailtop.auth import get_userinfo # pylint: disable=import-outside-toplevel + + userinfo = get_userinfo() + if userinfo is None: + print('not logged in') + sys.exit(1) + result = { + 'username': userinfo['username'], + 'email': userinfo['login_id'], # deprecated - backwards compatibility + 'gsa_email': userinfo['hail_identity'], # deprecated - backwards compatibility + 'hail_identity': userinfo['hail_identity'], + 'login_id': userinfo['login_id'], + 'display_name': userinfo['display_name'], } + print(json.dumps(result, indent=4)) + + +@app.command() +def create_user( + username: str, + login_id: Ann[str, Arg(help="In Azure, the user's object ID in AAD. In GCP, the Google email")], + developer: bool = False, + service_account: bool = False, + hail_identity: Optional[str] = None, + hail_credentials_secret_name: Optional[str] = None, + namespace: NamespaceOption = None, + wait: bool = False, +): + ''' + Create a new Hail user with username USERNAME and login ID LOGIN_ID. + ''' + from .create_user import polling_create_user # pylint: disable=import-outside-toplevel + + asyncio.run(polling_create_user(username, login_id, developer, service_account, hail_identity, hail_credentials_secret_name, namespace=namespace, wait=wait)) + + +@app.command() +def delete_user( + username: str, + namespace: NamespaceOption = None, + wait: bool = False, +): + ''' + Delete the Hail user with username USERNAME. + ''' + from .delete_user import polling_delete_user # pylint: disable=import-outside-toplevel - args, pass_through_args = parser().parse_known_args(args=args) - jmp[args.module].main(args, pass_through_args) + asyncio.run(polling_delete_user(username, namespace, wait)) diff --git a/hail/python/hailtop/hailctl/auth/copy_paste_login.py b/hail/python/hailtop/hailctl/auth/copy_paste_login.py deleted file mode 100644 index ef24d7c0ebe..00000000000 --- a/hail/python/hailtop/hailctl/auth/copy_paste_login.py +++ /dev/null @@ -1,24 +0,0 @@ -import asyncio - -from hailtop.auth import async_copy_paste_login - - -def init_parser(parser): - parser.add_argument("copy_paste_token", type=str, - help="Copy paste token.") - parser.add_argument("--namespace", "-n", type=str, - help="Specify namespace for auth server. (default: from deploy configuration)") - - -async def async_main(args): - auth_ns, username = await async_copy_paste_login(args.copy_paste_token, args.namespace) - - if auth_ns == 'default': - print(f'Logged in as {username}.') - else: - print(f'Logged into namespace {auth_ns} as {username}.') - - -def main(args, pass_through_args): # pylint: disable=unused-argument - loop = asyncio.get_event_loop() - loop.run_until_complete(async_main(args)) diff --git a/hail/python/hailtop/hailctl/auth/create_user.py b/hail/python/hailtop/hailctl/auth/create_user.py index 77cb4effd8f..061aa8495f6 100644 --- a/hail/python/hailtop/hailctl/auth/create_user.py +++ b/hail/python/hailtop/hailctl/auth/create_user.py @@ -1,4 +1,4 @@ -import asyncio +from typing import Optional from hailtop.utils import sleep_and_backoff from hailtop.auth import async_create_user, async_get_user @@ -8,43 +8,33 @@ class CreateUserException(Exception): pass -def init_parser(parser): - parser.add_argument("username", type=str, - help="User name to create.") - parser.add_argument("login_id", type=str, - help="Login ID to be used with OAuth. This is the object ID in Azure and the email address in GCP.") - parser.add_argument("--developer", default=False, action='store_true', - help="User should be a developer.") - parser.add_argument("--service-account", default=False, action='store_true', - help="User should be a service account.") - parser.add_argument("--namespace", "-n", type=str, - help="Specify namespace for auth server. (default: from deploy configuration)") - parser.add_argument("--wait", default=False, action='store_true', - help="Wait for the creation of the user to finish") - - -async def async_main(args): +async def polling_create_user( + username: str, + login_id: str, + developer: bool, + service_account: bool, + hail_identity: Optional[str], + hail_credentials_secret_name: Optional[str], + *, + namespace: Optional[str] = None, + wait: bool = False, +): try: - await async_create_user(args.username, args.login_id, args.developer, args.service_account, args.namespace) + await async_create_user(username, login_id, developer, service_account, hail_identity, hail_credentials_secret_name, namespace=namespace) - if not args.wait: + if not wait: return async def _poll(): delay = 5 while True: - user = await async_get_user(args.username, args.namespace) + user = await async_get_user(username, namespace) if user['state'] == 'active': - print(f"Created user '{args.username}'") + print(f"Created user '{username}'") return assert user['state'] == 'creating' delay = await sleep_and_backoff(delay) await _poll() except Exception as e: - raise CreateUserException(f"Error while creating user '{args.username}'") from e - - -def main(args, pass_through_args): # pylint: disable=unused-argument - loop = asyncio.get_event_loop() - loop.run_until_complete(async_main(args)) + raise CreateUserException(f"Error while creating user '{username}'") from e diff --git a/hail/python/hailtop/hailctl/auth/delete_user.py b/hail/python/hailtop/hailctl/auth/delete_user.py index 9c9eb5705e8..9ebb7709304 100644 --- a/hail/python/hailtop/hailctl/auth/delete_user.py +++ b/hail/python/hailtop/hailctl/auth/delete_user.py @@ -1,4 +1,4 @@ -import asyncio +from typing import Optional from hailtop.utils import sleep_and_backoff from hailtop.auth import async_delete_user, async_get_user @@ -8,37 +8,27 @@ class DeleteUserException(Exception): pass -def init_parser(parser): - parser.add_argument("username", type=str, - help="User name to delete.") - parser.add_argument("--namespace", "-n", type=str, - help="Specify namespace for auth server. (default: from deploy configuration)") - parser.add_argument("--wait", default=False, action='store_true', - help="Wait for the creation of the user to finish") - - -async def async_main(args): +async def polling_delete_user( + username: str, + namespace: Optional[str], + wait: bool, +): try: - await async_delete_user(args.username, args.namespace) + await async_delete_user(username, namespace) - if not args.wait: + if not wait: return async def _poll(): delay = 5 while True: - user = await async_get_user(args.username, args.namespace) + user = await async_get_user(username, namespace) if user['state'] == 'deleted': - print(f"Deleted user '{args.username}'") + print(f"Deleted user '{username}'") return assert user['state'] == 'deleting' delay = await sleep_and_backoff(delay) await _poll() except Exception as e: - raise DeleteUserException(f"Error while deleting user '{args.username}'") from e - - -def main(args, pass_through_args): # pylint: disable=unused-argument - loop = asyncio.get_event_loop() - loop.run_until_complete(async_main(args)) + raise DeleteUserException(f"Error while deleting user '{username}'") from e diff --git a/hail/python/hailtop/hailctl/auth/login.py b/hail/python/hailtop/hailctl/auth/login.py index 539ca29551b..4b9d5d0f284 100644 --- a/hail/python/hailtop/hailctl/auth/login.py +++ b/hail/python/hailtop/hailctl/auth/login.py @@ -5,16 +5,14 @@ import webbrowser from aiohttp import web +from typing import Optional + + from hailtop.config import get_deploy_config from hailtop.auth import get_tokens, hail_credentials from hailtop.httpx import client_session -def init_parser(parser): - parser.add_argument("--namespace", "-n", type=str, - help="Specify namespace for auth server. (default: from deploy configuration)") - - routes = web.RouteTableDef() @@ -47,34 +45,36 @@ async def start_server(): async def auth_flow(deploy_config, default_ns, session): runner, port = await start_server() - async with session.get(deploy_config.url('auth', '/api/v1alpha/login'), - params={'callback_port': port}) as resp: + async with session.get(deploy_config.url('auth', '/api/v1alpha/login'), params={'callback_port': port}) as resp: resp = await resp.json() flow = resp['flow'] state = flow['state'] authorization_url = flow['authorization_url'] - print(f''' + print( + f''' Visit the following URL to log into Hail: {authorization_url} Opening in your browser. -''') +''' + ) webbrowser.open(authorization_url) code = await runner.app['q'].get() await runner.cleanup() async with session.get( - deploy_config.url('auth', '/api/v1alpha/oauth2callback'), - params={ - 'callback_port': port, - 'code': code, - 'state': state, - 'flow': json.dumps(flow), - }) as resp: + deploy_config.url('auth', '/api/v1alpha/oauth2callback'), + params={ + 'callback_port': port, + 'code': code, + 'state': state, + 'flow': json.dumps(flow), + }, + ) as resp: resp = await resp.json() token = resp['token'] username = resp['username'] @@ -92,16 +92,11 @@ async def auth_flow(deploy_config, default_ns, session): print(f'Logged into namespace {default_ns} as {username}.') -async def async_main(args): +async def async_login(namespace: Optional[str]): deploy_config = get_deploy_config() - if args.namespace: - deploy_config = deploy_config.with_default_namespace(args.namespace) - namespace = args.namespace or deploy_config.default_namespace() + if namespace: + deploy_config = deploy_config.with_default_namespace(namespace) + namespace = namespace or deploy_config.default_namespace() headers = await hail_credentials(namespace=namespace, authorize_target=False).auth_headers() async with client_session(headers=headers) as session: await auth_flow(deploy_config, namespace, session) - - -def main(args, pass_through_args): # pylint: disable=unused-argument - loop = asyncio.get_event_loop() - loop.run_until_complete(async_main(args)) diff --git a/hail/python/hailtop/hailctl/auth/logout.py b/hail/python/hailtop/hailctl/auth/logout.py deleted file mode 100644 index d5d8c3f7d26..00000000000 --- a/hail/python/hailtop/hailctl/auth/logout.py +++ /dev/null @@ -1,35 +0,0 @@ -import asyncio - -from hailtop.config import get_deploy_config -from hailtop.auth import get_tokens, hail_credentials -from hailtop.httpx import client_session - - -def init_parser(parser): # pylint: disable=unused-argument - pass - - -async def async_main(): - deploy_config = get_deploy_config() - - auth_ns = deploy_config.service_ns('auth') - tokens = get_tokens() - if auth_ns not in tokens: - print('Not logged in.') - return - - headers = await hail_credentials().auth_headers() - async with client_session(headers=headers) as session: - async with session.post(deploy_config.url('auth', '/api/v1alpha/logout')): - pass - auth_ns = deploy_config.service_ns('auth') - - del tokens[auth_ns] - tokens.write() - - print('Logged out.') - - -def main(args, pass_through_args): # pylint: disable=unused-argument - loop = asyncio.get_event_loop() - loop.run_until_complete(async_main()) diff --git a/hail/python/hailtop/hailctl/auth/user.py b/hail/python/hailtop/hailctl/auth/user.py deleted file mode 100644 index b4c388823ad..00000000000 --- a/hail/python/hailtop/hailctl/auth/user.py +++ /dev/null @@ -1,24 +0,0 @@ -import json -import sys - -from hailtop.auth import get_userinfo - - -def init_parser(parser): # pylint: disable=unused-argument - pass - - -def main(args, pass_through_args): # pylint: disable=unused-argument - userinfo = get_userinfo() - if userinfo is None: - print('not logged in') - sys.exit(1) - result = { - 'username': userinfo['username'], - 'email': userinfo['login_id'], # deprecated - backwards compatibility - 'gsa_email': userinfo['hail_identity'], # deprecated - backwards compatibility - 'hail_identity': userinfo['hail_identity'], - 'login_id': userinfo['login_id'], - 'display_name': userinfo['display_name'], - } - print(json.dumps(result, indent=4)) diff --git a/hail/python/hailtop/hailctl/batch/__init__.py b/hail/python/hailtop/hailctl/batch/__init__.py index 4e7d8ff32de..cfbdef3e080 100644 --- a/hail/python/hailtop/hailctl/batch/__init__.py +++ b/hail/python/hailtop/hailctl/batch/__init__.py @@ -1,5 +1,3 @@ from . import cli -__all__ = [ - 'cli' -] +__all__ = ['cli'] diff --git a/hail/python/hailtop/hailctl/batch/batch_cli_utils.py b/hail/python/hailtop/hailctl/batch/batch_cli_utils.py index dabb276844d..461abbcd993 100644 --- a/hail/python/hailtop/hailctl/batch/batch_cli_utils.py +++ b/hail/python/hailtop/hailctl/batch/batch_cli_utils.py @@ -1,16 +1,57 @@ import json +from enum import Enum import yaml -import aiohttp import csv from typing import List, Dict, Callable import tabulate import io +from typer import Option as Opt + +from typing_extensions import Annotated as Ann + TableData = List[Dict[str, str]] TABLE_FORMAT_OPTIONS = ['json', 'yaml', 'csv', *tabulate.tabulate_formats] +class StructuredFormat(str, Enum): + YAML = 'yaml' + JSON = 'json' + + def __str__(self): + return self.value + + +StructuredFormatOption = Ann[StructuredFormat, Opt('--output', '-o')] + + +class StructuredFormatPlusText(str, Enum): + TEXT = 'text' + YAML = 'yaml' + JSON = 'json' + + def __str__(self): + return self.value + + +StructuredFormatPlusTextOption = Ann[StructuredFormatPlusText, Opt('--output', '-o')] + + +class ExtendedOutputFormat(str, Enum): + YAML = 'yaml' + JSON = 'json' + GRID = 'grid' + + def __str__(self): + return self.value + + +ExtendedOutputFormatOption = Ann[ExtendedOutputFormat, Opt('--output', '-o')] + + def get_batch_if_exists(client, id): + import aiohttp.client_exceptions # pylint: disable=import-outside-toplevel + try: return client.get_batch(id) except aiohttp.client_exceptions.ClientResponseError as cle: @@ -20,6 +61,8 @@ def get_batch_if_exists(client, id): def get_job_if_exists(client, batch_id, job_id): + import aiohttp.client_exceptions # pylint: disable=import-outside-toplevel + try: return client.get_job(batch_id, job_id) except aiohttp.client_exceptions.ClientResponseError as cle: diff --git a/hail/python/hailtop/hailctl/batch/billing/cli.py b/hail/python/hailtop/hailctl/batch/billing/cli.py index 50d2856531a..c3a7f52f361 100644 --- a/hail/python/hailtop/hailctl/batch/billing/cli.py +++ b/hail/python/hailtop/hailctl/batch/billing/cli.py @@ -1,52 +1,31 @@ -import sys -import argparse +import typer -from . import list_billing_projects -from . import get +from ..batch_cli_utils import make_formatter, StructuredFormat, StructuredFormatOption -def init_parser(): - main_parser = argparse.ArgumentParser( - prog='hailctl batch billing', - description='Manage billing on the service managed by the Hail team.') - subparsers = main_parser.add_subparsers() +app = typer.Typer( + name='billing', + no_args_is_help=True, + help='Manage billing on the service managed by the Hail team.', +) - list_parser = subparsers.add_parser( - 'list', - help="List billing projects", - description="List billing projects") - get_parser = subparsers.add_parser( - 'get', - help='Get a particular billing project\'s info', - description='Get a particular billing project\'s info') - list_parser.set_defaults(module='list') - list_billing_projects.init_parser(list_parser) +@app.command() +def get(billing_project: str, output: StructuredFormatOption = StructuredFormat.YAML): + '''Get the billing information for BILLING_PROJECT.''' + from hailtop.batch_client.client import BatchClient # pylint: disable=import-outside-toplevel - get_parser.set_defaults(module='get') - get.init_parser(get_parser) + with BatchClient('') as client: + billing_project_data = client.get_billing_project(billing_project) + print(make_formatter(output.value)(billing_project_data)) - return main_parser +@app.command() +def list(output: StructuredFormatOption = StructuredFormat.YAML): + '''List billing projects.''' + from hailtop.batch_client.client import BatchClient # pylint: disable=import-outside-toplevel -def main(args, pass_through_args, client): - if not args: - init_parser().print_help() - sys.exit(0) - jmp = { - 'list': list_billing_projects, - 'get': get - } - - args, pass_through_args = init_parser().parse_known_args(args=pass_through_args) - - if not args or 'module' not in args: - init_parser().print_help() - sys.exit(0) - - if args.module not in jmp: - sys.stderr.write(f"ERROR: no such module: {args.module!r}") - init_parser().print_help() - sys.exit(1) - - jmp[args.module].main(args, pass_through_args, client) + with BatchClient('') as client: + billing_projects = client.list_billing_projects() + format = make_formatter(output.value) + print(format(billing_projects)) diff --git a/hail/python/hailtop/hailctl/batch/billing/get.py b/hail/python/hailtop/hailctl/batch/billing/get.py deleted file mode 100644 index ad638e6f2b0..00000000000 --- a/hail/python/hailtop/hailctl/batch/billing/get.py +++ /dev/null @@ -1,20 +0,0 @@ -import aiohttp - -from ..batch_cli_utils import make_formatter - - -def init_parser(parser): - parser.add_argument('billing_project', type=str, help="Name of the desired billing project") - parser.add_argument('-o', type=str, default='yaml', help="Specify output format", - choices=["yaml", "json"]) - - -def main(args, pass_through_args, client): # pylint: disable=unused-argument - try: - billing_project = client.get_billing_project(args.billing_project) - except aiohttp.client_exceptions.ClientResponseError as cle: - if cle.code == 403: - billing_project = None - raise cle - - print(make_formatter(args.o)(billing_project)) diff --git a/hail/python/hailtop/hailctl/batch/billing/list_billing_projects.py b/hail/python/hailtop/hailctl/batch/billing/list_billing_projects.py deleted file mode 100644 index 513b74e1033..00000000000 --- a/hail/python/hailtop/hailctl/batch/billing/list_billing_projects.py +++ /dev/null @@ -1,16 +0,0 @@ -from ..batch_cli_utils import make_formatter, TABLE_FORMAT_OPTIONS - - -def init_parser(parser): - parser.add_argument('-o', type=str, default='grid', choices=TABLE_FORMAT_OPTIONS) - - -def main(args, pass_through_args, client): # pylint: disable=unused-argument - billing_projects = client.list_billing_projects() - - if args.o not in ('json', 'yaml'): - for bp in billing_projects: - bp['users'] = "\n".join(bp['users']) - - format = make_formatter(args.o) - print(format(billing_projects)) diff --git a/hail/python/hailtop/hailctl/batch/cancel.py b/hail/python/hailtop/hailctl/batch/cancel.py deleted file mode 100644 index 57d89fdf592..00000000000 --- a/hail/python/hailtop/hailctl/batch/cancel.py +++ /dev/null @@ -1,17 +0,0 @@ -from .batch_cli_utils import get_batch_if_exists - - -def init_parser(parser): - parser.add_argument('id', type=int) - - -def main(args, pass_through_args, client): # pylint: disable=unused-argument - maybe_batch = get_batch_if_exists(client, args.id) - if maybe_batch is None: - print(f"Batch with id {args.id} not found") - return - - batch = maybe_batch - - batch.cancel() - print(f"Batch with id {args.id} was cancelled successfully") diff --git a/hail/python/hailtop/hailctl/batch/cli.py b/hail/python/hailtop/hailctl/batch/cli.py index e1ad2b9bf6b..f65ddb52422 100644 --- a/hail/python/hailtop/hailctl/batch/cli.py +++ b/hail/python/hailtop/hailctl/batch/cli.py @@ -1,122 +1,164 @@ -import sys -import argparse +import asyncio +from enum import Enum +import typer +from typer import Option as Opt, Argument as Arg +import json + +from typing import Optional, List +from typing_extensions import Annotated as Ann -from hailtop.batch_client.client import BatchClient from . import list_batches -from . import delete -from . import get -from . import cancel -from . import wait -from . import log -from . import job from . import billing -from . import submit - - -def parser(): - main_parser = argparse.ArgumentParser( - prog='hailctl batch', - description='Manage batches running on the batch service managed by the Hail team.') - subparsers = main_parser.add_subparsers() - - billing_parser = subparsers.add_parser( - 'billing', - help='List billing', - description='List billing') - list_parser = subparsers.add_parser( - 'list', - help="List batches", - description="List batches") - get_parser = subparsers.add_parser( - 'get', - help='Get a particular batch\'s info', - description='Get a particular batch\'s info') - cancel_parser = subparsers.add_parser( - 'cancel', - help='Cancel a batch', - description='Cancel a batch') - delete_parser = subparsers.add_parser( - 'delete', - help='Delete a batch', - description='Delete a batch' - ) - submit_parser = subparsers.add_parser( - 'submit', - help='Submit a batch', - description='Submit a batch', - ) - log_parser = subparsers.add_parser( - 'log', - help='Get log for a job', - description='Get log for a job' - ) - job_parser = subparsers.add_parser( - 'job', - help='Get the status and specification for a job', - description='Get the status and specification for a job' - ) - wait_parser = subparsers.add_parser( - 'wait', - help='Wait for a batch to complete, then print JSON status.', - description='Wait for a batch to complete, then print JSON status.' - ) - - billing_parser.set_defaults(module='billing') - - list_parser.set_defaults(module='list') - list_batches.init_parser(list_parser) - - get_parser.set_defaults(module='get') - get.init_parser(get_parser) - - cancel_parser.set_defaults(module='cancel') - cancel.init_parser(cancel_parser) - - delete_parser.set_defaults(module='delete') - delete.init_parser(delete_parser) - - submit_parser.set_defaults(module='submit') - submit.init_parser(submit_parser) - - log_parser.set_defaults(module='log') - log.init_parser(log_parser) - - job_parser.set_defaults(module='job') - job.init_parser(job_parser) - - wait_parser.set_defaults(module='wait') - wait.init_parser(wait_parser) - - return main_parser - - -def main(args): - if not args: - parser().print_help() - sys.exit(0) - jmp = { - 'billing': billing, - 'list': list_batches, - 'delete': delete, - 'get': get, - 'cancel': cancel, - 'log': log, - 'job': job, - 'wait': wait, - 'submit': submit, - } - - args, pass_through_args = parser().parse_known_args(args=args) - - # hailctl batch doesn't create batches - client = BatchClient(None) # type: ignore - - try: - if args.module == 'billing': - from .billing import cli # pylint: disable=import-outside-toplevel - cli.main(args, pass_through_args, client) +from . import submit as _submit +from .batch_cli_utils import ( + get_batch_if_exists, + get_job_if_exists, + make_formatter, + StructuredFormat, + StructuredFormatOption, + StructuredFormatPlusText, + StructuredFormatPlusTextOption, + ExtendedOutputFormat, + ExtendedOutputFormatOption, +) + + +app = typer.Typer( + name='batch', + no_args_is_help=True, + help='Manage batches running on the batch service managed by the Hail team.', +) +app.add_typer(billing.cli.app) + + +@app.command() +def list( + query: str = '', + limit: int = 50, + before: Optional[int] = None, + full: bool = False, + output: ExtendedOutputFormatOption = ExtendedOutputFormat.GRID, +): + '''List batches.''' + list_batches.list(query, limit, before, full, output) + + +@app.command() +def get(batch_id: int, output: StructuredFormatOption = StructuredFormat.YAML): + '''Get information on the batch with id BATCH_ID.''' + from hailtop.batch_client.client import BatchClient # pylint: disable=import-outside-toplevel + + with BatchClient('') as client: + batch = get_batch_if_exists(client, batch_id) + if batch: + print(make_formatter(output)(batch.last_known_status())) + else: + print(f"Batch with id {batch_id} not found") + + +@app.command() +def cancel(batch_id: int): + '''Cancel the batch with id BATCH_ID.''' + from hailtop.batch_client.client import BatchClient # pylint: disable=import-outside-toplevel + + with BatchClient('') as client: + batch = get_batch_if_exists(client, batch_id) + if batch: + batch.cancel() + print(f"Batch with id {batch_id} was cancelled successfully") + else: + print(f"Batch with id {batch_id} not found") + + +@app.command() +def delete(batch_id: int): + '''Delete the batch with id BATCH_ID.''' + from hailtop.batch_client.client import BatchClient # pylint: disable=import-outside-toplevel + + with BatchClient('') as client: + batch = get_batch_if_exists(client, batch_id) + if batch: + batch.delete() + print(f"Batch with id {batch_id} was deleted successfully") + else: + print(f"Batch with id {batch_id} not found") + + +class JobContainer(str, Enum): + INPUT = 'input' + MAIN = 'main' + OUTPUT = 'output' + + +@app.command() +def log( + batch_id: int, + job_id: int, + container: Ann[Optional[JobContainer], Opt(help='Container name of the desired job')] = None, + output: StructuredFormatOption = StructuredFormat.YAML, +): + '''Get the log for the job with id JOB_ID in the batch with id BATCH_ID.''' + from hailtop.batch_client.client import BatchClient # pylint: disable=import-outside-toplevel + + with BatchClient('') as client: + maybe_job = get_job_if_exists(client, batch_id, job_id) + if maybe_job is None: + print(f"Job with ID {job_id} on batch {batch_id} not found") return - jmp[args.module].main(args, pass_through_args, client) - finally: - client.close() + if container: + print(maybe_job.container_log(container)) + else: + print(make_formatter(output)(maybe_job.log())) + + +@app.command() +def wait( + batch_id: int, + quiet: Ann[bool, Opt('--quiet', '-q', help='Do not print a progress bar for the batch.')] = False, + output: StructuredFormatPlusTextOption = StructuredFormatPlusText.TEXT, +): + '''Wait for the batch with id BATCH_ID to complete, then print status.''' + from hailtop.batch_client.client import BatchClient # pylint: disable=import-outside-toplevel + + with BatchClient('') as client: + batch = get_batch_if_exists(client, batch_id) + if batch is None: + print(f"Batch with id {batch_id} not found") + raise typer.Exit(1) + + quiet = quiet or output != StructuredFormatPlusText.TEXT + out = batch.wait(disable_progress_bar=quiet) + if output == StructuredFormatPlusText.JSON: + print(json.dumps(out)) + else: + print(out) + + +@app.command() +def job(batch_id: int, job_id: int, output: StructuredFormatOption = StructuredFormat.YAML): + '''Get the status and specification for the job with id JOB_ID in the batch with id BATCH_ID.''' + from hailtop.batch_client.client import BatchClient # pylint: disable=import-outside-toplevel + + with BatchClient('') as client: + job = get_job_if_exists(client, batch_id, job_id) + + if job is not None: + print(make_formatter(output)(job._status)) + else: + print(f"Job with ID {job_id} on batch {batch_id} not found") + + +@app.command() +def submit( + script: str, + arguments: Ann[Optional[List[str]], Arg()] = None, + files: Ann[ + Optional[List[str]], Opt(help='Files or directories to add to the working directory of the job.') + ] = None, + name: Ann[str, Opt(help='The name of the batch.')] = '', + image_name: Ann[Optional[str], Opt(help='Name of Docker image for the job (default: hailgenetics/hail)')] = None, + output: StructuredFormatPlusTextOption = StructuredFormatPlusText.TEXT, +): + '''Submit a batch with a single job that runs SCRIPT with the arguments ARGUMENTS.''' + asyncio.run(_submit.submit(name, image_name, files or [], output, script, arguments or [])) diff --git a/hail/python/hailtop/hailctl/batch/delete.py b/hail/python/hailtop/hailctl/batch/delete.py deleted file mode 100644 index eec81040309..00000000000 --- a/hail/python/hailtop/hailctl/batch/delete.py +++ /dev/null @@ -1,18 +0,0 @@ -import sys -from .batch_cli_utils import get_batch_if_exists - - -def init_parser(parser): - parser.add_argument('batch_id', type=int, help="ID number of batch to be deleted") - - -def main(args, pass_through_args, client): # pylint: disable=unused-argument - maybe_batch = get_batch_if_exists(client, args.batch_id) - if maybe_batch is None: - print(f"Batch with batch_id {args.batch_id} not found") - sys.exit(1) - - batch = maybe_batch - - batch.delete() - print(f"Batch with batch_id {args.batch_id} was deleted successfully") diff --git a/hail/python/hailtop/hailctl/batch/get.py b/hail/python/hailtop/hailctl/batch/get.py deleted file mode 100644 index 0b76a869863..00000000000 --- a/hail/python/hailtop/hailctl/batch/get.py +++ /dev/null @@ -1,19 +0,0 @@ -import sys -from .batch_cli_utils import get_batch_if_exists, make_formatter - - -def init_parser(parser): - parser.add_argument('batch_id', type=int, help="ID number of the desired batch") - parser.add_argument('-o', type=str, default='yaml', help="Specify output format", - choices=["yaml", "json"]) - - -def main(args, pass_through_args, client): # pylint: disable=unused-argument - maybe_batch = get_batch_if_exists(client, args.batch_id) - if maybe_batch is None: - print(f"Batch with id {args.batch_id} not found") - sys.exit(1) - - batch = maybe_batch - - print(make_formatter(args.o)(batch.last_known_status())) diff --git a/hail/python/hailtop/hailctl/batch/job.py b/hail/python/hailtop/hailctl/batch/job.py deleted file mode 100644 index 6a06ea9122e..00000000000 --- a/hail/python/hailtop/hailctl/batch/job.py +++ /dev/null @@ -1,17 +0,0 @@ -from .batch_cli_utils import get_job_if_exists, make_formatter - - -def init_parser(parser): - parser.add_argument('batch_id', type=int, help="ID number of the desired batch") - parser.add_argument('job_id', type=int, help="ID number of the desired job") - parser.add_argument('-o', type=str, default='yaml', help="Specify output format", - choices=["yaml", "json"]) - - -def main(args, pass_through_args, client): # pylint: disable=unused-argument - maybe_job = get_job_if_exists(client, args.batch_id, args.job_id) - if maybe_job is None: - print(f"Job with ID {args.job_id} on batch {args.batch_id} not found") - return - - print(make_formatter(args.o)(maybe_job._status)) diff --git a/hail/python/hailtop/hailctl/batch/list_batches.py b/hail/python/hailtop/hailctl/batch/list_batches.py index e42f2200271..03f32ac0e30 100644 --- a/hail/python/hailtop/hailctl/batch/list_batches.py +++ b/hail/python/hailtop/hailctl/batch/list_batches.py @@ -1,23 +1,12 @@ -from .batch_cli_utils import make_formatter, TABLE_FORMAT_OPTIONS +from .batch_cli_utils import make_formatter -def init_parser(parser): - parser.add_argument('--query', '-q', type=str, help="see docs at https://batch.hail.is/batches") - parser.add_argument('--limit', '-l', type=int, default=50, - help='number of batches to return (default 50)') - parser.add_argument('--all', '-a', action='store_true', - help='list all batches (overrides --limit)') - parser.add_argument('--before', type=int, help='start listing before supplied id', default=None) - parser.add_argument('--full', action='store_true', - help='when output is tabular, print more information') - parser.add_argument('--no-header', action='store_true', help='do not print a table header') - parser.add_argument('-o', type=str, default='grid', - choices=TABLE_FORMAT_OPTIONS) +def list(query, limit, before, full, output): + from hailtop.batch_client.client import BatchClient # pylint: disable=import-outside-toplevel - -def main(args, passthrough_args, client): # pylint: disable=unused-argument - batch_list = client.list_batches(q=args.query, last_batch_id=args.before, limit=args.limit) - statuses = [batch.last_known_status() for batch in batch_list] + with BatchClient('') as client: + batch_list = client.list_batches(q=query, last_batch_id=before, limit=limit) + statuses = [batch.last_known_status() for batch in batch_list] if len(statuses) == 0: print("No batches to display.") @@ -26,16 +15,10 @@ def main(args, passthrough_args, client): # pylint: disable=unused-argument for status in statuses: status['state'] = status['state'].capitalize() - if args.full: - statuses = [ - {k: v for k, v in status.items() if k != 'attributes'} - for status in statuses - ] + if full: + statuses = [{k: v for k, v in status.items() if k != 'attributes'} for status in statuses] else: - statuses = [ - {'id': status['id'], 'state': status['state']} - for status in statuses - ] + statuses = [{'id': status['id'], 'state': status['state']} for status in statuses] - format = make_formatter(args.o) + format = make_formatter(output) print(format(statuses)) diff --git a/hail/python/hailtop/hailctl/batch/log.py b/hail/python/hailtop/hailctl/batch/log.py deleted file mode 100644 index 46fb4776392..00000000000 --- a/hail/python/hailtop/hailctl/batch/log.py +++ /dev/null @@ -1,21 +0,0 @@ -from .batch_cli_utils import get_job_if_exists, make_formatter - - -def init_parser(parser): - parser.add_argument('batch_id', type=int, help="ID number of the desired batch") - parser.add_argument('job_id', type=int, help="ID number of the desired job") - parser.add_argument('-c', type=str, default='', help="Container name of the desired job (input, main, output)") - parser.add_argument('-o', type=str, default='yaml', help="Specify output format", - choices=["yaml", "json"]) - - -def main(args, pass_through_args, client): # pylint: disable=unused-argument - maybe_job = get_job_if_exists(client, args.batch_id, args.job_id) - if maybe_job is None: - print(f"Job with ID {args.job_id} on batch {args.batch_id} not found") - return - - if args.c: - print(maybe_job.container_log(args.c)) - else: - print(make_formatter(args.o)(maybe_job.log())) diff --git a/hail/python/hailtop/hailctl/batch/submit.py b/hail/python/hailtop/hailctl/batch/submit.py index 2593cd3ba82..facaa12da92 100644 --- a/hail/python/hailtop/hailctl/batch/submit.py +++ b/hail/python/hailtop/hailctl/batch/submit.py @@ -1,35 +1,19 @@ -import asyncio import orjson import os -import sys from shlex import quote as shq - -import hailtop.batch as hb -import hailtop.batch_client.client as bc from hailtop import pip_version -from hailtop.aiotools.copy import copy_from_dict -from hailtop.config import get_remote_tmpdir, get_user_config_path, get_deploy_config -from hailtop.utils import secret_alnum_string, unpack_comma_delimited_inputs - -HAIL_GENETICS_HAIL_IMAGE = os.environ.get('HAIL_GENETICS_HAIL_IMAGE', f'hailgenetics/hail:{pip_version()}') -def init_parser(parser): - parser.add_argument('--name', type=str, default='', help='Batch name') - parser.add_argument('--image-name', type=str, required=False, - help='Name for Docker image. Defaults to hailgenetics/hail') - parser.add_argument('--files', nargs='+', action='append', default=[], - help='Comma-separated list of files or directories to add to the working directory of job') - parser.add_argument('-o', type=str, default='text', choices=['text', 'json']) - parser.add_argument('script', type=str, help='Path to script') - parser.add_argument('arguments', nargs='*', help='Arguments to script') +async def submit(name, image_name, files, output, script, arguments): + import hailtop.batch as hb # pylint: disable=import-outside-toplevel + import hailtop.batch_client.client as bc # pylint: disable=import-outside-toplevel + from hailtop.aiotools.copy import copy_from_dict # pylint: disable=import-outside-toplevel + from hailtop.config import get_remote_tmpdir, get_user_config_path, get_deploy_config # pylint: disable=import-outside-toplevel + from hailtop.utils import secret_alnum_string, unpack_comma_delimited_inputs # pylint: disable=import-outside-toplevel - -async def async_main(args): - script = args.script - files = unpack_comma_delimited_inputs(args.files) + files = unpack_comma_delimited_inputs(files) user_config = get_user_config_path() - quiet = args.o != 'text' + quiet = output != 'text' remote_tmpdir = get_remote_tmpdir('hailctl batch submit') tmpdir_path_prefix = secret_alnum_string() @@ -37,17 +21,20 @@ async def async_main(args): def cloud_prefix(path): return f'{remote_tmpdir}/{tmpdir_path_prefix}/{path}' - b = hb.Batch(name=args.name, backend=hb.ServiceBackend()) + backend = hb.ServiceBackend() + b = hb.Batch(name=name, backend=backend) j = b.new_bash_job() - j.image(args.image_name or HAIL_GENETICS_HAIL_IMAGE) + j.image(image_name or os.environ.get('HAIL_GENETICS_HAIL_IMAGE', f'hailgenetics/hail:{pip_version()}')) rel_file_paths = [os.path.relpath(file) for file in files] local_files_to_cloud_files = [{'from': local, 'to': cloud_prefix(local)} for local in rel_file_paths] - await copy_from_dict(files=[ - {'from': script, 'to': cloud_prefix(script)}, - {'from': str(user_config), 'to': cloud_prefix(user_config)}, - *local_files_to_cloud_files, - ]) + await copy_from_dict( + files=[ + {'from': script, 'to': cloud_prefix(script)}, + {'from': str(user_config), 'to': cloud_prefix(user_config)}, + *local_files_to_cloud_files, + ] + ) for file in local_files_to_cloud_files: local_file = file['from'] cloud_file = file['to'] @@ -61,21 +48,16 @@ def cloud_prefix(path): j.env('HAIL_QUERY_BACKEND', 'batch') command = 'python3' if script.endswith('.py') else 'bash' - script_arguments = " ".join(shq(x) for x in args.arguments) + script_arguments = " ".join(shq(x) for x in arguments) j.command(f'{command} {script_file} {script_arguments}') batch_handle: bc.Batch = b.run(wait=False, disable_progress_bar=quiet) # type: ignore - if args.o == 'text': + if output == 'text': deploy_config = get_deploy_config() url = deploy_config.external_url('batch', f'/batches/{batch_handle.id}/jobs/1') print(f'Submitted batch {batch_handle.id}, see {url}') else: - assert args.o == 'json' + assert output == 'json' print(orjson.dumps({'id': batch_handle.id}).decode('utf-8')) - -def main(args, pass_through_args, client): # pylint: disable=unused-argument - if pass_through_args: - print(f'Unrecognized arguments: {" ".join(pass_through_args)}') - sys.exit(1) - asyncio.run(async_main(args)) + backend.close() diff --git a/hail/python/hailtop/hailctl/batch/wait.py b/hail/python/hailtop/hailctl/batch/wait.py deleted file mode 100644 index 0aad2e3ab6a..00000000000 --- a/hail/python/hailtop/hailctl/batch/wait.py +++ /dev/null @@ -1,26 +0,0 @@ -import json -import sys -from .batch_cli_utils import get_batch_if_exists - - -def init_parser(parser): - parser.add_argument('batch_id', type=int) - parser.add_argument("--quiet", "-q", - action="store_true", - help="Do not print a progress bar for the batch") - parser.add_argument('-o', type=str, default='text', choices=['text', 'json']) - - -def main(args, pass_through_args, client): # pylint: disable=unused-argument - maybe_batch = get_batch_if_exists(client, args.batch_id) - if maybe_batch is None: - print(f"Batch with id {args.batch_id} not found") - sys.exit(1) - - batch = maybe_batch - quiet = args.quiet or args.o != 'text' - out = batch.wait(disable_progress_bar=quiet) - if args.o == 'json': - print(json.dumps(out)) - else: - print(out) diff --git a/hail/python/hailtop/hailctl/config/cli.py b/hail/python/hailtop/hailctl/config/cli.py index 7c735ef771f..021aa268570 100644 --- a/hail/python/hailtop/hailctl/config/cli.py +++ b/hail/python/hailtop/hailctl/config/cli.py @@ -1,111 +1,31 @@ import os import sys -import argparse import re import warnings -from hailtop.config import get_user_config, get_user_config_path -from hailtop.aiotools.router_fs import RouterAsyncFS - -validations = { - ('batch', 'bucket'): (lambda x: re.fullmatch(r'[^:/\s]+', x) is not None, - 'should be valid Google Bucket identifier, with no gs:// prefix'), - ('batch', 'remote_tmpdir'): (RouterAsyncFS.valid_url, - 'should be valid cloud storage URI such as gs://my-bucket/batch-tmp/'), - ('email',): (lambda x: re.fullmatch(r'.+@.+', x) is not None, 'should be valid email address'), -} - -deprecated_paths = { - ('batch', 'bucket'): '\'batch/bucket\' has been deprecated. Use \'batch/remote_tmpdir\' instead.' -} - - -def parser(): - main_parser = argparse.ArgumentParser( - prog='hailctl config', - description='Manage Hail configuration.') - subparsers = main_parser.add_subparsers() - - set_parser = subparsers.add_parser( - 'set', - help='Set a Hail configuration parameter.', - description='Set a Hail configuration parameter.') - unset_parser = subparsers.add_parser( - 'unset', - help='Unset a Hail configuration parameter (restore to default behavior).', - description='Unset a hail configuration parameter (restore to default behavior).') - get_parser = subparsers.add_parser( - 'get', - help='Get the value of a Hail configuration parameter.', - description='Get the value of a Hail configuration parameter.') - config_location_parser = subparsers.add_parser( - 'config-location', - help='Print the location of the config file', - description='Print the location of the config file') - list_parser = subparsers.add_parser( - 'list', - help='lists every config variable in the section (default: all sections)', - description='lists every config variable in the section (default: all sections)') - - set_parser.set_defaults(module='set') - set_parser.add_argument("parameter", type=str, - help="A hail configuration parameter.") - set_parser.add_argument("value", type=str, - help="A value.") - - unset_parser.set_defaults(module='unset') - unset_parser.add_argument("parameter", type=str, - help="A hail configuration parameter.") - - get_parser.set_defaults(module='get') - get_parser.add_argument("parameter", type=str, - help="A hail configuration parameter.") - - config_location_parser.set_defaults(module='config-location') - - list_parser.set_defaults(module='list') - list_parser.add_argument('section', type=str, nargs='?', - help='Section to list (default: all sections)') - - return main_parser - - -def list_config(config, section: str): - if section: - for key, value in config.items(section): - print(f'{key}={value}') - else: - for sname, items in config.items(): - for key, value in items.items(): - print(f'{sname}/{key}={value}') +from typing import Optional, Tuple +from typing_extensions import Annotated as Ann +import typer +from typer import Argument as Arg -def main(args): - if not args: - parser().print_help() - sys.exit(0) - args = parser().parse_args(args=args) - config_file = get_user_config_path() - if args.module == 'config-location': - print(config_file) - sys.exit(0) +app = typer.Typer( + name='config', + no_args_is_help=True, + help='Manage Hail configuration.', +) - config = get_user_config() - if args.module == 'list': - list_config(config, args.section) - sys.exit(0) - path = args.parameter.split('/') +def get_section_key_path(parameter: str) -> Tuple[str, str, Tuple[str, ...]]: + path = parameter.split('/') if len(path) == 1: - section = 'global' - key = path[0] - elif len(path) == 2: - section = path[0] - key = path[1] - else: - print(''' -Paramters must contain at most one slash separating the configuration section + return 'global', path[0], tuple(path) + if len(path) == 2: + return path[0], path[1], tuple(path) + print( + ''' +Parameters must contain at most one slash separating the configuration section from the configuration parameter, for example: "batch/billing_project". Parameters may also have no slashes, indicating the parameter is a global @@ -113,37 +33,100 @@ def main(args): A parameter with more than one slash is invalid, for example: "batch/billing/project". -'''.lstrip('\n'), file=sys.stderr) +'''.lstrip( + '\n' + ), + file=sys.stderr, + ) + sys.exit(1) + + +@app.command() +def set(parameter: str, value: str): + '''Set a Hail configuration parameter.''' + from hailtop.aiotools.router_fs import RouterAsyncFS # pylint: disable=import-outside-toplevel + from hailtop.config import get_user_config, get_user_config_path # pylint: disable=import-outside-toplevel + + config = get_user_config() + config_file = get_user_config_path() + section, key, path = get_section_key_path(parameter) + + validations = { + ('batch', 'bucket'): ( + lambda x: re.fullmatch(r'[^:/\s]+', x) is not None, + 'should be valid Google Bucket identifier, with no gs:// prefix', + ), + ('batch', 'remote_tmpdir'): ( + RouterAsyncFS.valid_url, + 'should be valid cloud storage URI such as gs://my-bucket/batch-tmp/', + ), + ('email',): (lambda x: re.fullmatch(r'.+@.+', x) is not None, 'should be valid email address'), + } + + validation_func, msg = validations.get(path, (lambda _: True, '')) # type: ignore + if not validation_func(value): + print(f"Error: bad value {value!r} for parameter {parameter!r} {msg}", file=sys.stderr) sys.exit(1) - if args.module == 'set': - path = tuple(path) - validation_func, msg = validations.get(path, (lambda x: True, '')) - if not validation_func(args.value): - print(f"Error: bad value {args.value!r} for parameter {args.parameter!r} {msg}", file=sys.stderr) - sys.exit(1) - if path in deprecated_paths: - warnings.warn(deprecated_paths[path]) - if section not in config: - config[section] = {} - config[section][key] = args.value - try: - f = open(config_file, 'w', encoding='utf-8') - except FileNotFoundError: - os.makedirs(config_file.parent, exist_ok=True) - f = open(config_file, 'w', encoding='utf-8') - with f: + if path == ('batch', 'bucket'): + warnings.warn("'batch/bucket' has been deprecated. Use 'batch/remote_tmpdir' instead.") + + if section not in config: + config[section] = {} + config[section][key] = value + + try: + f = open(config_file, 'w', encoding='utf-8') + except FileNotFoundError: + os.makedirs(config_file.parent, exist_ok=True) + f = open(config_file, 'w', encoding='utf-8') + with f: + config.write(f) + + +@app.command() +def unset(parameter: str): + '''Unset a Hail configuration parameter (restore to default behavior).''' + from hailtop.config import get_user_config, get_user_config_path # pylint: disable=import-outside-toplevel + + config = get_user_config() + config_file = get_user_config_path() + section, key, _ = get_section_key_path(parameter) + if section in config and key in config[section]: + del config[section][key] + with open(config_file, 'w', encoding='utf-8') as f: config.write(f) - sys.exit(0) - if args.module == 'unset': - if section in config and key in config[section]: - del config[section][key] - with open(config_file, 'w', encoding='utf-8') as f: - config.write(f) - sys.exit(0) - if args.module == 'get': - if section in config and key in config[section]: - print(config[section][key]) - sys.exit(0) - print(f'bad module name: {args.module}') - sys.exit(1) + + +@app.command() +def get(parameter: str): + '''Get the value of a Hail configuration parameter.''' + from hailtop.config import get_user_config # pylint: disable=import-outside-toplevel + + config = get_user_config() + section, key, _ = get_section_key_path(parameter) + if section in config and key in config[section]: + print(config[section][key]) + + +@app.command(name='config-location') +def config_location(): + '''Print the location of the config file.''' + from hailtop.config import get_user_config_path # pylint: disable=import-outside-toplevel + + print(get_user_config_path()) + + +@app.command() +def list(section: Ann[Optional[str], Arg(show_default='all sections')] = None): + '''Lists every config variable in the section.''' + from hailtop.config import get_user_config # pylint: disable=import-outside-toplevel + + config = get_user_config() + if section: + for key, value in config.items(section): + print(f'{key}={value}') + else: + for sname, items in config.items(): + for key, value in items.items(): + print(f'{sname}/{key}={value}') diff --git a/hail/python/hailtop/hailctl/curl.py b/hail/python/hailtop/hailctl/curl.py deleted file mode 100644 index f2e7c2b5cd8..00000000000 --- a/hail/python/hailtop/hailctl/curl.py +++ /dev/null @@ -1,21 +0,0 @@ -import sys -import os - -from hailtop.auth import hail_credentials -from hailtop.config import get_deploy_config -from hailtop.utils import async_to_blocking - - -def main(args): - if len(args) < 3: - print('hailctl curl NAMESPACE SERVICE PATH [args] ...', file=sys.stderr) - sys.exit(1) - ns = args[0] - svc = args[1] - path = args[2] - headers_dict = async_to_blocking(hail_credentials(namespace=ns).auth_headers()) - headers = [x - for k, v in headers_dict.items() - for x in ['-H', f'{k}: {v}']] - path = get_deploy_config().url(svc, path) - os.execvp('curl', ['curl', *headers, *args[3:], path]) diff --git a/hail/python/hailtop/hailctl/dataproc/cli.py b/hail/python/hailtop/hailctl/dataproc/cli.py index 7c0cf5bf57a..393e6597f53 100644 --- a/hail/python/hailtop/hailctl/dataproc/cli.py +++ b/hail/python/hailtop/hailctl/dataproc/cli.py @@ -1,124 +1,405 @@ import sys -import asyncio -import argparse +import typer +from typer import Option as Opt, Argument as Arg -from . import connect -from . import describe -from . import diagnose +from typing import List, Optional +from typing_extensions import Annotated as Ann + +from .connect import connect as dataproc_connect, DataprocConnectService +from .submit import submit as dataproc_submit +from .diagnose import diagnose as dataproc_diagnose +from .modify import modify as dataproc_modify +from .start import start as dataproc_start, VepVersion +from ..describe import describe from . import gcloud -from . import list_clusters -from . import modify -from . import start -from . import stop -from . import submit MINIMUM_REQUIRED_GCLOUD_VERSION = (285, 0, 0) -def parser(): - main_parser = argparse.ArgumentParser( - prog='hailctl dataproc', - description='Manage and monitor Hail deployments.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - main_parser.add_argument( - '--beta', - action='store_true', - help='Force use of `beta` in gcloud commands') - - subparsers = main_parser.add_subparsers() - - start_parser = subparsers.add_parser( - 'start', - help='Start a Dataproc cluster configured for Hail.', - description='Start a Dataproc cluster configured for Hail.') - submit_parser = subparsers.add_parser( - 'submit', - help='Submit a Python script to a running Dataproc cluster.', - description='Submit a Python script to a running Dataproc cluster. To pass arguments to the ' - 'script being submitted, just list them after the name of the script.') - connect_parser = subparsers.add_parser( - 'connect', - help='Connect to a running Dataproc cluster.', - description='Connect to a running Dataproc cluster.') - diagnose_parser = subparsers.add_parser( - 'diagnose', - help='Diagnose problems in a Dataproc cluster.', - description='Diagnose problems in a Dataproc cluster.') - stop_parser = subparsers.add_parser( - 'stop', - help='Shut down a Dataproc cluster.', - description='Shut down a Dataproc cluster.') - list_parser = subparsers.add_parser( - 'list', - help='List active Dataproc clusters.', - description='List active Dataproc clusters.') - modify_parser = subparsers.add_parser( - 'modify', - help='Modify active Dataproc clusters.', - description='Modify active Dataproc clusters.') - describe_parser = subparsers.add_parser( - 'describe', - help='DEPRECATED. Describe Hail Matrix Table and Table files.', - description='DEPRECATED. Describe Hail Matrix Table and Table files.') - - start_parser.set_defaults(module='start') - start.init_parser(start_parser) - - submit_parser.set_defaults(module='submit') - submit.init_parser(submit_parser) - - connect_parser.set_defaults(module='connect') - connect.init_parser(connect_parser) - - diagnose_parser.set_defaults(module='diagnose') - diagnose.init_parser(diagnose_parser) - - stop_parser.set_defaults(module='stop') - stop.init_parser(stop_parser) - - list_parser.set_defaults(module='list') - - modify_parser.set_defaults(module='modify') - modify.init_parser(modify_parser) - - describe_parser.set_defaults(module='describe') - describe.init_parser(describe_parser) - - return main_parser - - -def main(args): - p = parser() - if not args: - p.print_help() - sys.exit(0) - jmp = { - 'start': start, - 'submit': submit, - 'connect': connect, - 'diagnose': diagnose, - 'stop': stop, - 'list': list_clusters, - 'modify': modify, - 'describe': describe, - } - - args, pass_through_args = p.parse_known_args(args=args) - if "module" not in args: - p.error('positional argument required') +BetaOption = Ann[bool, Opt(help='Force use of `beta` in gcloud commands')] +use_gcloud_beta = False + +ProjectOption = Ann[ + Optional[str], Opt(help='Google Cloud project for the cluster (defaults to currently set project).') +] + +ZoneOption = Ann[ + Optional[str], + Opt('--zone', '-z', help='Compute zone for Dataproc cluster.'), +] + +DryRunOption = Ann[ + bool, + Opt(help="Print gcloud dataproc command, but don't run it."), +] + +NumWorkersOption = Ann[Optional[int], Opt('--num-workers', '--n-workers', '-w', help='Number of worker machines.')] + +NumSecondaryWorkersOption = Ann[ + Optional[int], + Opt( + '--num-secondary-workers', + '--num-preemptible-workers', + '--n-pre-workers', + '-p', + help='Number of secondary (preemptible) worker machines.', + ), +] + + +app = typer.Typer(name='dataproc', no_args_is_help=True, help='Manage Hail Dataproc clusters.') + + +@app.callback() +def check_gcloud_version(beta: BetaOption = False): + global use_gcloud_beta + use_gcloud_beta = beta try: gcloud_version = gcloud.get_version() if gcloud_version < MINIMUM_REQUIRED_GCLOUD_VERSION: - print(f"hailctl dataproc requires Google Cloud SDK (gcloud) version {'.'.join(map(str, MINIMUM_REQUIRED_GCLOUD_VERSION))} or higher", file=sys.stderr) - sys.exit(1) + sys.exit( + f"hailctl dataproc requires Google Cloud SDK (gcloud) version {'.'.join(map(str, MINIMUM_REQUIRED_GCLOUD_VERSION))} or higher", + ) except Exception: # If gcloud's output format changes in the future and the version can't be parsed, # then continue and attempt to run gcloud. print("Warning: unable to determine Google Cloud SDK version", file=sys.stderr) - asyncio.get_event_loop().run_until_complete( - jmp[args.module].main(args, pass_through_args)) + +@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True}) +def start( + ctx: typer.Context, + name: str, + # arguments with default parameters + master_machine_type: Ann[str, Opt('--master-machine-type', '--master', '-m')] = 'n1-highmem-8', + master_memory_fraction: Ann[ + float, + Opt( + help='Fraction of master memory allocated to the JVM. Use a smaller value to reserve more memory for Python.' + ), + ] = 0.8, + master_boot_disk_size: Ann[int, Opt(help='Disk size of master machine, in GB')] = 100, + num_master_local_ssds: Ann[int, Opt(help='Number of local SSDs to attach to the master machine.')] = 0, + num_secondary_workers: NumSecondaryWorkersOption = 0, + num_worker_local_ssds: Ann[int, Opt(help='Number of local SSDs to attach to each worker machine.')] = 0, + num_workers: NumWorkersOption = 2, + secondary_worker_boot_disk_size: Ann[ + int, + Opt( + '--secondary-worker-boot-disk-size', + '--preemptible-worker-boot-disk-size', + help='Disk size of secondary (preemptible) worker machines, in GB.', + ), + ] = 40, + worker_boot_disk_size: Ann[int, Opt(help='Disk size of worker machines, in GB.')] = 40, + worker_machine_type: Ann[ + Optional[str], + Opt( + '--worker-machine-type', + '--worker', + help='Worker machine type (default: n1-standard-8, or n1-highmem-8 with --vep).', + ), + ] = None, + region: Ann[Optional[str], Opt(help='Compute region for the cluster.')] = None, + zone: ZoneOption = None, + properties: Ann[Optional[str], Opt(help='Additional configuration properties for the cluster.')] = None, + metadata: Ann[Optional[str], Opt(help='Comma-separated list of metadata to add: KEY1=VALUE1,KEY2=VALUE2')] = None, + packages: Ann[ + Optional[str], Opt(help='Comma-separated list of Python packages to be installed on the master node.') + ] = None, + project: Ann[Optional[str], Opt(help='GCP project to start cluster (defaults to currently set project).')] = None, + configuration: Ann[ + Optional[str], + Opt(help='Google Cloud configuration to start cluster (defaults to currently set configuration).'), + ] = None, + max_idle: Ann[Optional[str], Opt(help='If specified, maximum idle time before shutdown (e.g. 60m).')] = None, + expiration_time: Ann[ + Optional[str], Opt(help='If specified, time at which cluster is shutdown (e.g. 2020-01-01T00:00:00Z).') + ] = None, + max_age: Ann[Optional[str], Opt(help='If specified, maximum age before shutdown (e.g. 60m).')] = None, + bucket: Ann[ + Optional[str], + Opt( + help='The Google Cloud Storage bucket to use for cluster temporary storage (just the bucket name, no gs:// prefix).' + ), + ] = None, + temp_bucket: Ann[ + Optional[str], + Opt( + help='The Google Cloud Storage bucket to use for cluster temporary storage (just the bucket name, no gs:// prefix).' + ), + ] = None, + network: Ann[Optional[str], Opt(help='The network for all nodes in this cluster.')] = None, + subnet: Ann[Optional[str], Opt(help='The subnet for all nodes in this cluster.')] = None, + service_account: Ann[ + Optional[str], + Opt( + help='The Google Service Account to use for cluster creation (default to the Compute Engine service account).' + ), + ] = None, + master_tags: Ann[ + Optional[str], Opt(help='Comma-separated list of instance tags to apply to the master node') + ] = None, + scopes: Ann[Optional[str], Opt(help='Specifies access scopes for the node instances')] = None, + wheel: Ann[Optional[str], Opt(help='Non-default Hail installation. Warning: experimental.')] = None, + # initialization action flags + init: Ann[str, Opt(help='Comma-separated list of init scripts to run.')] = '', + init_timeout: Ann[ + str, Opt('--init_timeout', help='Flag to specify a timeout period for the initialization action') + ] = '20m', + vep: Ann[Optional[VepVersion], Opt(help='Install VEP for the specified reference genome.')] = None, + dry_run: DryRunOption = False, + no_off_heap_memory: Ann[ + bool, Opt('--no-off-heap-memory', help="Don't partition JVM memory between hail heap and JVM heap") + ] = False, + big_executors: Ann[ + bool, + Opt( + help="Double memory allocated per executor, using half the cores of the cluster with an extra large memory allotment per core." + ), + ] = False, + off_heap_memory_fraction: Ann[ + float, Opt(help='Minimum fraction of worker memory dedicated to off-heap Hail values.') + ] = 0.6, + off_heap_memory_hard_limit: Ann[bool, Opt(help='Limit off-heap allocations to the dedicated fraction')] = False, + yarn_memory_fraction: Ann[ + float, Opt(help='Fraction of machine memory to allocate to the yarn container scheduler.') + ] = 0.95, + # requester pays + requester_pays_allow_all: Ann[bool, Opt(help='Allow reading from all requester-pays buckets.')] = False, + requester_pays_allow_buckets: Ann[ + Optional[str], Opt(help='Comma-separated list of requester-pays buckets to allow reading from.') + ] = None, + requester_pays_allow_annotation_db: Ann[ + bool, + Opt(help='Allows reading from any of the requester-pays buckets that hold data for the annotation database.'), + ] = False, + debug_mode: Ann[ + bool, Opt(help='Enable debug features on created cluster (heap dump on out-of-memory error)') + ] = False, +): + ''' + Start a Dataproc cluster configured for Hail. + ''' + assert num_secondary_workers is not None + assert num_workers is not None + + dataproc_start( + name, + ctx.args, + master_machine_type, + master_memory_fraction, + master_boot_disk_size, + num_master_local_ssds, + num_secondary_workers, + num_worker_local_ssds, + num_workers, + secondary_worker_boot_disk_size, + worker_boot_disk_size, + worker_machine_type, + region, + zone, + properties, + metadata, + packages, + project, + configuration, + max_idle, + expiration_time, + max_age, + bucket, + temp_bucket, + network, + subnet, + service_account, + master_tags, + scopes, + wheel, + init, + init_timeout, + vep, + dry_run, + no_off_heap_memory, + big_executors, + off_heap_memory_fraction, + off_heap_memory_hard_limit, + yarn_memory_fraction, + requester_pays_allow_all, + requester_pays_allow_buckets, + requester_pays_allow_annotation_db, + debug_mode, + use_gcloud_beta, + ) + + +@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True}) +def stop( + ctx: typer.Context, + name: str, + asink: Ann[bool, Opt('--async/--sync', help='Do not wait for cluster deletion')] = False, + dry_run: DryRunOption = False, +): + ''' + Shut down a Dataproc cluster. + ''' + print("Stopping cluster '{}'...".format(name)) + + cmd = ['dataproc', 'clusters', 'delete', '--quiet', name] + if asink: + cmd.append('--async') + + cmd.extend(ctx.args) + + # print underlying gcloud command + print('gcloud ' + ' '.join(cmd[:5]) + ' \\\n ' + ' \\\n '.join(cmd[6:])) + + if not dry_run: + gcloud.run(cmd) + + +@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True}) +def list( + ctx: typer.Context, +): + ''' + List active Dataproc clusters. + ''' + gcloud.run(['dataproc', 'clusters', 'list', *ctx.args]) + + +@app.command() +def connect( + name: str, + service: DataprocConnectService, + pass_through_args: Ann[Optional[List[str]], Arg()] = None, + project: ProjectOption = None, + port: Ann[str, Opt(help='Local port to use for SSH tunnel to leader (master) node')] = '10000', + zone: ZoneOption = None, + dry_run: DryRunOption = False, +): + ''' + Connect to a running Dataproc cluster with name NAME and start + the web service SERVICE. + ''' + dataproc_connect(name, service, project, port, zone, dry_run, pass_through_args or []) + + +@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True}) +def submit( + ctx: typer.Context, + name: str, + script: str, + files: Ann[ + str, Opt(help='Comma-separated list of files to add to the working directory of the Hail application.') + ] = '', + pyfiles: Ann[ + str, Opt(help='Comma-separated list of files (or directories with python files) to add to the PYTHONPATH.') + ] = '', + properties: Ann[Optional[str], Opt('--properties', '-p', help='Extra Spark properties to set.')] = None, + gcloud_configuration: Ann[ + Optional[str], + Opt( + '--gcloud_configuration', + help='Google Cloud configuration to submit job (defaults to currently set configuration).', + ), + ] = None, + dry_run: DryRunOption = False, + region: Ann[Optional[str], Opt(help='Compute region for the cluster.')] = None, +): + ''' + Submit the Python script at path SCRIPT to a running Dataproc cluster with + name NAME. To pass arguments to the script being submitted, just list them + after the name of the script. + ''' + dataproc_submit(name, script, files, pyfiles, properties, gcloud_configuration, dry_run, region, ctx.args) + + +@app.command() +def diagnose( + name: str, + dest: Ann[str, Opt('--dest', '-d', help="Directory for diagnose output -- must be local.")], + hail_log: Ann[str, Opt('--hail-log', '-l', help='Path for hail.log file')] = '/home/hail/hail.log', + overwrite: Ann[bool, Opt(help='Delete dest directory before adding new files')] = False, + no_diagnose: Ann[bool, Opt('--no-diagnose', help='Do not run gcloud dataproc clusters diagnose.')] = False, + compress: Ann[bool, Opt('--compress', '-z', help='GZIP all files')] = False, + workers: Ann[Optional[List[str]], Opt(help='Specific workers to get log files from.')] = None, + take: Ann[Optional[int], Opt(help='Only download logs from the first N workers.')] = None, +): + ''' + Diagnose problems in a Dataproc cluster with name NAME. + ''' + dataproc_diagnose(name, dest, hail_log, overwrite, no_diagnose, compress, workers or [], take) + + +@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True}) +def modify( + ctx: typer.Context, + name: str, + num_workers: NumWorkersOption = None, + num_secondary_workers: NumSecondaryWorkersOption = None, + graceful_decommission_timeout: Ann[ + Optional[str], + Opt( + '--graceful-decommision-timeout', + '--graceful', + help='If set, cluster size downgrade will use graceful decommissioning with the given timeout (e.g. "60m").', + ), + ] = None, + max_idle: Ann[ + Optional[str], + Opt(help='New maximum idle time before shutdown (e.g. "60m").'), + ] = None, + no_max_idle: Ann[bool, Opt('--no-max-idle', help='Disable auto deletion after idle time.')] = False, + expiration_time: Ann[ + Optional[str], + Opt( + help=( + 'The time when cluster will be auto-deleted. (e.g. "2020-01-01T20:00:00Z"). ' + 'Execute gcloud topic datatimes for more information.' + ) + ), + ] = None, + max_age: Ann[ + Optional[str], + Opt( + help=( + 'If the cluster is older than this, it will be auto-deleted. (e.g. "2h")' + 'Execute gcloud topic datatimes for more information.' + ) + ), + ] = None, + no_max_age: Ann[bool, Opt('--no-max-age', help='Disable auto-deletion due to max age or expiration time.')] = False, + dry_run: DryRunOption = False, + zone: ZoneOption = None, + update_hail_version: Ann[ + bool, + Opt(help="Update the version of hail running on cluster to match the currently installed version."), + ] = False, + wheel: Ann[Optional[str], Opt(help='New Hail installation.')] = None, +): + ''' + Modify an active dataproc cluster with name NAME. + ''' + dataproc_modify( + name, + num_workers, + num_secondary_workers, + graceful_decommission_timeout, + max_idle, + no_max_idle, + expiration_time, + max_age, + no_max_age, + dry_run, + zone, + update_hail_version, + wheel, + use_gcloud_beta, + ctx.args, + ) + + +app.command(help='DEPRECATED. Describe Hail Matrix Table and Table files.')(describe) diff --git a/hail/python/hailtop/hailctl/dataproc/cluster_config.py b/hail/python/hailtop/hailctl/dataproc/cluster_config.py index a99efa5be94..f1a991836ef 100644 --- a/hail/python/hailtop/hailctl/dataproc/cluster_config.py +++ b/hail/python/hailtop/hailctl/dataproc/cluster_config.py @@ -27,9 +27,4 @@ def format(self, obj): def get_command(self, name): flags = ['--{}={}'.format(f, self.format(v)) for f, v in self.flags.items()] - return ['gcloud', - 'dataproc', - 'clusters', - 'create', - name, - *flags] + return ['gcloud', 'dataproc', 'clusters', 'create', name, *flags] diff --git a/hail/python/hailtop/hailctl/dataproc/connect.py b/hail/python/hailtop/hailctl/dataproc/connect.py index 23e4e185231..4da5bb66e45 100755 --- a/hail/python/hailtop/hailctl/dataproc/connect.py +++ b/hail/python/hailtop/hailctl/dataproc/connect.py @@ -1,24 +1,33 @@ +from enum import Enum import os import platform import shutil import subprocess import tempfile -from hailtop.utils import secret_alnum_string +from typing import Optional, List + from . import gcloud -def init_parser(parser): - parser.add_argument('name', type=str, help='Cluster name.') - parser.add_argument('service', type=str, - choices=['notebook', 'nb', 'spark-ui', 'ui', 'spark-history', 'hist'], - help='Web service to launch.') - parser.add_argument('--project', help='Google Cloud project for the cluster (defaults to currently set project).') - parser.add_argument('--port', '-p', default='10000', type=str, - help='Local port to use for SSH tunnel to leader (master) node (default: %(default)s).') - parser.add_argument('--zone', '-z', type=str, help='Compute zone for Dataproc cluster.') - parser.add_argument('--dry-run', action='store_true', help="Print gcloud dataproc command, but don't run it.") +class DataprocConnectService(str, Enum): + NOTEBOOK = 'notebook' + NB = 'nb' + SPARK_UI = 'spark-ui' + UI = 'ui' + SPARK_HISTORY = 'spark-history' + HIST = 'hist' + + def shortcut(self): + if self == self.UI: + return self.SPARK_UI + if self == self.HIST: + return self.SPARK_HISTORY + if self == self.NB: + return self.NOTEBOOK + + return self def get_chrome_path(): @@ -41,54 +50,60 @@ def get_chrome_path(): raise ValueError(f"unsupported system: {system}, set environment variable HAILCTL_CHROME to a chrome executable") -async def main(args, pass_through_args): # pylint: disable=unused-argument - # shortcut mapping - shortcut = { - 'ui': 'spark-ui', - 'hist': 'spark-history', - 'nb': 'notebook' - } +def connect( + name: str, + service: DataprocConnectService, + project: Optional[str], + port: str, + zone: Optional[str], + dry_run: bool, + pass_through_args: List[str], +): + from hailtop.utils import secret_alnum_string # pylint: disable=import-outside-toplevel - service = args.service - service = shortcut.get(service, service) + service = service.shortcut() # Dataproc port mapping dataproc_port_and_path = { - 'spark-ui': '18080/?showIncomplete=true', - 'spark-history': '18080', - 'notebook': '8123' + DataprocConnectService.SPARK_UI: '18080/?showIncomplete=true', + DataprocConnectService.SPARK_HISTORY: '18080', + DataprocConnectService.NOTEBOOK: '8123', } connect_port_and_path = dataproc_port_and_path[service] - zone = args.zone if args.zone else gcloud.get_config("compute/zone") + zone = zone if zone else gcloud.get_config("compute/zone") if not zone: - raise RuntimeError("Could not determine compute zone. Use --zone argument to hailctl, or use `gcloud config set compute/zone ` to set a default.") + raise RuntimeError( + "Could not determine compute zone. Use --zone argument to hailctl, or use `gcloud config set compute/zone ` to set a default." + ) account = gcloud.get_config("account") if account: - account = account[0:account.find('@')] - ssh_login = '{}@{}-m'.format(account, args.name) + account = account[0 : account.find('@')] + ssh_login = '{}@{}-m'.format(account, name) else: - ssh_login = '{}-m'.format(args.name) - - cmd = ['compute', - 'ssh', - ssh_login, - '--zone={}'.format(zone), - '--ssh-flag=-D {}'.format(args.port), - '--ssh-flag=-N', - '--ssh-flag=-f', - '--ssh-flag=-n', - *pass_through_args] - - if args.project: - cmd.append(f"--project={args.project}") + ssh_login = '{}-m'.format(name) + + cmd = [ + 'compute', + 'ssh', + ssh_login, + '--zone={}'.format(zone), + '--ssh-flag=-D {}'.format(port), + '--ssh-flag=-N', + '--ssh-flag=-f', + '--ssh-flag=-n', + *pass_through_args, + ] + + if project: + cmd.append(f"--project={project}") print('gcloud command:') print(' '.join(cmd[:4]) + ' \\\n ' + ' \\\n '.join([f"'{x}'" for x in cmd[4:]])) - if not args.dry_run: - print("Connecting to cluster '{}'...".format(args.name)) + if not dry_run: + print("Connecting to cluster '{}'...".format(name)) # open SSH tunnel to master node gcloud.run(cmd) @@ -96,13 +111,18 @@ async def main(args, pass_through_args): # pylint: disable=unused-argument chrome = os.environ.get('HAILCTL_CHROME') or get_chrome_path() # open Chrome with SOCKS proxy configuration - subprocess.Popen([ # pylint: disable=consider-using-with - chrome, - 'http://localhost:{}'.format(connect_port_and_path), - '--proxy-server=socks5://localhost:{}'.format(args.port), - '--host-resolver-rules=MAP * 0.0.0.0 , EXCLUDE localhost', - '--proxy-bypass-list=<-loopback>', # https://chromium.googlesource.com/chromium/src/+/da790f920bbc169a6805a4fb83b4c2ab09532d91 - '--user-data-dir={}'.format( - os.path.join(tempfile.gettempdir(), - 'hailctl-dataproc-connect-' + secret_alnum_string(6))) - ], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL) + with subprocess.Popen( + [ # pylint: disable=consider-using-with + chrome, + 'http://localhost:{}'.format(connect_port_and_path), + '--proxy-server=socks5://localhost:{}'.format(port), + '--host-resolver-rules=MAP * 0.0.0.0 , EXCLUDE localhost', + '--proxy-bypass-list=<-loopback>', # https://chromium.googlesource.com/chromium/src/+/da790f920bbc169a6805a4fb83b4c2ab09532d91 + '--user-data-dir={}'.format( + os.path.join(tempfile.gettempdir(), 'hailctl-dataproc-connect-' + secret_alnum_string(6)) + ), + ], + stdout=subprocess.DEVNULL, + stderr=subprocess.DEVNULL, + ): + pass diff --git a/hail/python/hailtop/hailctl/dataproc/deploy_metadata.py b/hail/python/hailtop/hailctl/dataproc/deploy_metadata.py index bc81c562c33..a267ca9bb44 100644 --- a/hail/python/hailtop/hailctl/dataproc/deploy_metadata.py +++ b/hail/python/hailtop/hailctl/dataproc/deploy_metadata.py @@ -3,6 +3,7 @@ def get_deploy_metadata(): import pkg_resources # pylint: disable=import-outside-toplevel + if not pkg_resources.resource_exists("hailtop.hailctl", "deploy.yaml"): raise RuntimeError("package has no 'deploy.yaml' file") diff --git a/hail/python/hailtop/hailctl/dataproc/describe.py b/hail/python/hailtop/hailctl/dataproc/describe.py deleted file mode 100644 index 6e36846f5f5..00000000000 --- a/hail/python/hailtop/hailctl/dataproc/describe.py +++ /dev/null @@ -1,9 +0,0 @@ -from .. import describe -import sys - -init_parser = describe.init_parser - - -async def main(*args, **kwargs): - await describe.main_after_parsing(*args, **kwargs) - print('!!! `hailctl dataproc describe` is DEPRECATED. Please use `hailctl describe` instead. !!!', file=sys.stderr) diff --git a/hail/python/hailtop/hailctl/dataproc/diagnose.py b/hail/python/hailtop/hailctl/dataproc/diagnose.py index 6751c54ccb3..1bbba77b6a4 100644 --- a/hail/python/hailtop/hailctl/dataproc/diagnose.py +++ b/hail/python/hailtop/hailctl/dataproc/diagnose.py @@ -1,75 +1,72 @@ import re import json -from subprocess import call, Popen, PIPE - -def init_parser(parser): - parser.add_argument('name', type=str, help='Cluster name.') - parser.add_argument('--dest', '-d', required=True, type=str, help="Directory for diagnose output -- must be local.") - parser.add_argument('--hail-log', '-l', required=False, type=str, default='/home/hail/hail.log', - help="Path for hail.log file.") - parser.add_argument('--overwrite', required=False, action='store_true', - help="Delete dest directory before adding new files.") - parser.add_argument('--no-diagnose', required=False, action='store_true', - help="Do not run gcloud dataproc clusters diagnose.") - parser.add_argument('--compress', '-z', required=False, action='store_true', help="GZIP all files.") - parser.add_argument('--workers', required=False, nargs='*', help="Specific workers to get log files from.") - parser.add_argument('--take', required=False, type=int, default=None, - help="Only download logs from the first N workers.") +from typing import List, Optional +from subprocess import call, Popen, PIPE -async def main(args, pass_through_args): # pylint: disable=unused-argument - print("Diagnosing cluster '{}'...".format(args.name)) +def diagnose( + name: str, + dest: str, + hail_log: str, + overwrite: bool, + no_diagnose: bool, + compress: bool, + workers: List[str], + take: Optional[int], +): + print("Diagnosing cluster '{}'...".format(name)) - is_local = not args.dest.startswith("gs://") + is_local = not dest.startswith("gs://") - if args.overwrite: + if overwrite: if is_local: - call('rm -r {dir}'.format(dir=args.dest), shell=True) + call('rm -r {dir}'.format(dir=dest), shell=True) else: - call('gsutil -m rm -r {dir}'.format(dir=args.dest), shell=True) + call('gsutil -m rm -r {dir}'.format(dir=dest), shell=True) - master_dest = args.dest.rstrip('/') + "/master/" - worker_dest = args.dest.rstrip('/') + "/workers/" + master_dest = dest.rstrip('/') + "/master/" + worker_dest = dest.rstrip('/') + "/workers/" if is_local: call('mkdir -p {dir}'.format(dir=master_dest), shell=True) call('mkdir -p {dir}'.format(dir=worker_dest), shell=True) - with Popen('gcloud dataproc clusters describe {name} --format json'.format(name=args.name), - shell=True, - stdout=PIPE, - stderr=PIPE) as process: + with Popen( + 'gcloud dataproc clusters describe {name} --format json'.format(name=name), shell=True, stdout=PIPE, stderr=PIPE + ) as process: desc = json.loads(process.communicate()[0].strip()) config = desc['config'] master = config['masterConfig']['instanceNames'][0] try: - workers = config['workerConfig']['instanceNames'] + config['secondaryWorkerConfig']['instanceNames'] + all_workers = config['workerConfig']['instanceNames'] + config['secondaryWorkerConfig']['instanceNames'] except KeyError: - workers = config['workerConfig']['instanceNames'] + all_workers = config['workerConfig']['instanceNames'] zone_match = re.search(r'zones/(?P\S+)$', config['gceClusterConfig']['zoneUri']) assert zone_match zone = zone_match.group('zone') - if args.workers: - invalid_workers = set(args.workers).difference(set(workers)) - assert len(invalid_workers) == 0, "Non-existent workers specified: " + ", ".join(invalid_workers) - workers = args.workers + if workers: + invalid_workers = set(workers).difference(set(all_workers)) + if invalid_workers: + raise ValueError("Non-existent workers specified: " + ", ".join(invalid_workers)) - if args.take: - assert args.take > 0 and args.take <= len( - workers), "Number of workers to take must be in the range of [0, nWorkers]. Found " + args.take + "." - workers = workers[:args.take] + if take: + if take < 0 or take > len(workers): + raise ValueError(f'Number of workers to take must be in the range of [0, nWorkers]. Found {take}.') + workers = workers[:take] def gcloud_ssh(remote, command): - return 'gcloud compute ssh {remote} --zone {zone} --command "{command}"'.format(remote=remote, zone=zone, - command=command) + return 'gcloud compute ssh {remote} --zone {zone} --command "{command}"'.format( + remote=remote, zone=zone, command=command + ) def gcloud_copy_files(remote, src, dest): - return 'gcloud compute copy-files {remote}:{src} {dest} --zone {zone}'.format(remote=remote, src=src, dest=dest, - zone=zone) + return 'gcloud compute copy-files {remote}:{src} {dest} --zone {zone}'.format( + remote=remote, src=src, dest=dest, zone=zone + ) def gsutil_cp(src, dest): return 'gsutil -m cp -r {src} {dest}'.format(src=src, dest=dest) @@ -80,7 +77,7 @@ def copy_files_tmp(remote, files, dest, tmp): copy_tmp_cmds = ['sudo cp -r {file} {tmp}'.format(file=file, tmp=tmp) for file in files] copy_tmp_cmds.append('sudo chmod -R 777 {tmp}'.format(tmp=tmp)) - if args.compress: + if compress: copy_tmp_cmds.append('sudo find ' + tmp + ' -type f ! -name \'*.gz\' -exec gzip "{}" \\;') call(gcloud_ssh(remote, '; '.join(init_cmd + copy_tmp_cmds)), shell=True) @@ -92,33 +89,37 @@ def copy_files_tmp(remote, files, dest, tmp): call(copy_dest_cmd, shell=True) - if not args.no_diagnose: - with Popen('gcloud dataproc clusters diagnose {name}'.format(name=args.name), - shell=True, - stdout=PIPE, - stderr=PIPE) as process: + if not no_diagnose: + with Popen( + 'gcloud dataproc clusters diagnose {name}'.format(name=name), shell=True, stdout=PIPE, stderr=PIPE + ) as process: output = process.communicate() - diagnose_tar_path_match = re.search(r'Diagnostic results saved in: (?Pgs://\S+diagnostic\.tar)', str(output)) + diagnose_tar_path_match = re.search( + r'Diagnostic results saved in: (?Pgs://\S+diagnostic\.tar)', str(output) + ) assert diagnose_tar_path_match diagnose_tar_path = diagnose_tar_path_match.group('tarfile') - call(gsutil_cp(diagnose_tar_path, args.dest), shell=True) + call(gsutil_cp(diagnose_tar_path, dest), shell=True) - master_log_files = ['/var/log/hive/hive-*', - '/var/log/google-dataproc-agent.0.log', - '/var/log/dataproc-initialization-script-0.log', - '/var/log/hadoop-mapreduce/mapred-mapred-historyserver*', - '/var/log/hadoop-hdfs/*-m.*', - '/var/log/hadoop-yarn/yarn-yarn-resourcemanager-*-m.*', - args.hail_log - ] + master_log_files = [ + '/var/log/hive/hive-*', + '/var/log/google-dataproc-agent.0.log', + '/var/log/dataproc-initialization-script-0.log', + '/var/log/hadoop-mapreduce/mapred-mapred-historyserver*', + '/var/log/hadoop-hdfs/*-m.*', + '/var/log/hadoop-yarn/yarn-yarn-resourcemanager-*-m.*', + hail_log, + ] copy_files_tmp(master, master_log_files, master_dest, '/tmp/' + master + '/') - worker_log_files = ['/var/log/hadoop-hdfs/hadoop-hdfs-datanode-*.*', - '/var/log/dataproc-startup-script.log', - '/var/log/hadoop-yarn/yarn-yarn-nodemanager-*.*'] + worker_log_files = [ + '/var/log/hadoop-hdfs/hadoop-hdfs-datanode-*.*', + '/var/log/dataproc-startup-script.log', + '/var/log/hadoop-yarn/yarn-yarn-nodemanager-*.*', + ] for worker in workers: copy_files_tmp(worker, worker_log_files, worker_dest, '/tmp/' + worker + '/') - copy_files_tmp(worker, ['/var/log/hadoop-yarn/userlogs/'], args.dest, '/tmp/hadoop-yarn/') + copy_files_tmp(worker, ['/var/log/hadoop-yarn/userlogs/'], dest, '/tmp/hadoop-yarn/') diff --git a/hail/python/hailtop/hailctl/dataproc/gcloud.py b/hail/python/hailtop/hailctl/dataproc/gcloud.py index 030238c05c6..1f314daa42f 100644 --- a/hail/python/hailtop/hailctl/dataproc/gcloud.py +++ b/hail/python/hailtop/hailctl/dataproc/gcloud.py @@ -12,7 +12,11 @@ def run(command: List[str]): def get_config(setting: str) -> Optional[str]: """Get a gcloud configuration value.""" try: - return subprocess.check_output(["gcloud", "config", "get-value", setting], stderr=subprocess.DEVNULL).decode().strip() + return ( + subprocess.check_output(["gcloud", "config", "get-value", setting], stderr=subprocess.DEVNULL) + .decode() + .strip() + ) except subprocess.CalledProcessError as e: print(f"Warning: could not run 'gcloud config get-value {setting}': {e.output.decode}", file=sys.stderr) return None @@ -20,7 +24,9 @@ def get_config(setting: str) -> Optional[str]: def get_version() -> Tuple[int, int, int]: """Get gcloud version as a tuple.""" - version_output = subprocess.check_output(["gcloud", "version", "--format=json"], stderr=subprocess.DEVNULL).decode().strip() + version_output = ( + subprocess.check_output(["gcloud", "version", "--format=json"], stderr=subprocess.DEVNULL).decode().strip() + ) version_info = json.loads(version_output) v = version_info["Google Cloud SDK"].split(".") version = (int(v[0]), int(v[1]), int(v[2])) diff --git a/hail/python/hailtop/hailctl/dataproc/list_clusters.py b/hail/python/hailtop/hailctl/dataproc/list_clusters.py deleted file mode 100644 index ab1cf2ccbd9..00000000000 --- a/hail/python/hailtop/hailctl/dataproc/list_clusters.py +++ /dev/null @@ -1,5 +0,0 @@ -from . import gcloud - - -async def main(args, pass_through_args): # pylint: disable=unused-argument - gcloud.run(['dataproc', 'clusters', 'list', *pass_through_args]) diff --git a/hail/python/hailtop/hailctl/dataproc/modify.py b/hail/python/hailtop/hailctl/dataproc/modify.py index a10c4d26ede..351cda7d6ca 100644 --- a/hail/python/hailtop/hailctl/dataproc/modify.py +++ b/hail/python/hailtop/hailctl/dataproc/modify.py @@ -1,76 +1,56 @@ import os.path import sys +from typing import List, Optional + from . import gcloud from .deploy_metadata import get_deploy_metadata -def init_parser(parser): - parser.add_argument('name', type=str, help='Cluster name.') - parser.add_argument('--num-workers', '--n-workers', '-w', type=int, - help='New number of worker machines (min. 2).') - parser.add_argument('--num-secondary-workers', '--num-preemptible-workers', '--n-pre-workers', '-p', type=int, - help='New number of secondary (preemptible) worker machines.') - parser.add_argument('--graceful-decommission-timeout', '--graceful', type=str, - help='If set, cluster size downgrade will use graceful decommissioning with the given timeout (e.g. "60m").') - max_idle_group = parser.add_mutually_exclusive_group() - max_idle_group.add_argument('--max-idle', - type=str, - help='New maximum idle time before shutdown (e.g. "60m").') - max_idle_group.add_argument('--no-max-idle', - action='store_true', - help='Disable auto deletion after idle time.') - max_age_group = parser.add_mutually_exclusive_group() - max_age_group.add_argument( - '--expiration-time', - type=str, - help=('The time when cluster will be auto-deleted. (e.g. "2020-01-01T20:00:00Z"). ' - 'Execute gcloud topic datatimes for more information.')) - max_age_group.add_argument( - '--max-age', - type=str, - help=('If the cluster is older than this, it will be auto-deleted. (e.g. "2h")' - 'Execute gcloud topic datatimes for more information.')) - max_age_group.add_argument( - '--no-max-age', - action='store_true', - help='Disable auto-deletion due to max age or expiration time.') - parser.add_argument('--dry-run', action='store_true', help="Print gcloud dataproc command, but don't run it.") - parser.add_argument('--zone', '-z', type=str, help='Compute zone for Dataproc cluster.') - wheel_group = parser.add_mutually_exclusive_group() - wheel_group.add_argument('--update-hail-version', action='store_true', help="Update the version of hail running on cluster to match " - "the currently installed version.") - wheel_group.add_argument('--wheel', type=str, help='New Hail installation.') - - -async def main(args, pass_through_args): +def modify( + name: str, + num_workers: Optional[int], + num_secondary_workers: Optional[int], + graceful_decommission_timeout: Optional[str], + max_idle: Optional[str], + no_max_idle: bool, + expiration_time: Optional[str], + max_age: Optional[str], + no_max_age: bool, + dry_run: bool, + zone: Optional[str], + update_hail_version: bool, + wheel: Optional[str], + beta: bool, + pass_through_args: List[str], +): modify_args = [] - if args.num_workers is not None: - modify_args.append('--num-workers={}'.format(args.num_workers)) + if num_workers is not None: + modify_args.append('--num-workers={}'.format(num_workers)) - if args.num_secondary_workers is not None: - modify_args.append('--num-secondary-workers={}'.format(args.num_secondary_workers)) + if num_secondary_workers is not None: + modify_args.append('--num-secondary-workers={}'.format(num_secondary_workers)) - if args.graceful_decommission_timeout: + if graceful_decommission_timeout: if not modify_args: sys.exit("Error: Cannot use --graceful-decommission-timeout without resizing the cluster.") - modify_args.append('--graceful-decommission-timeout={}'.format(args.graceful_decommission_timeout)) + modify_args.append('--graceful-decommission-timeout={}'.format(graceful_decommission_timeout)) - if args.max_idle: - modify_args.append('--max-idle={}'.format(args.max_idle)) - if args.no_max_idle: + if max_idle: + modify_args.append('--max-idle={}'.format(max_idle)) + if no_max_idle: modify_args.append('--no-max-idle') - if args.expiration_time: - modify_args.append('--expiration_time={}'.format(args.expiration_time)) - if args.max_age: - modify_args.append('--max-age={}'.format(args.max_age)) - if args.no_max_age: + if expiration_time: + modify_args.append('--expiration_time={}'.format(expiration_time)) + if max_age: + modify_args.append('--max-age={}'.format(max_age)) + if no_max_age: modify_args.append('--no-max-age') if modify_args: - cmd = ['dataproc', 'clusters', 'update', args.name, *modify_args] + cmd = ['dataproc', 'clusters', 'update', name, *modify_args] - if args.beta: + if beta: cmd.insert(0, 'beta') cmd.extend(pass_through_args) @@ -79,66 +59,66 @@ async def main(args, pass_through_args): print('gcloud ' + ' '.join(cmd[:4]) + ' \\\n ' + ' \\\n '.join(cmd[4:])) # Update cluster - if not args.dry_run: - print("Updating cluster '{}'...".format(args.name)) + if not dry_run: + print("Updating cluster '{}'...".format(name)) gcloud.run(cmd) - wheel = None - if args.update_hail_version: + if update_hail_version and wheel is not None: + sys.exit('argument --update-hail-version: not allowed with argument --wheel') + + if update_hail_version: deploy_metadata = get_deploy_metadata() wheel = deploy_metadata["wheel"] - else: - wheel = args.wheel if wheel is not None: - zone = args.zone if args.zone else gcloud.get_config("compute/zone") + zone = zone if zone else gcloud.get_config("compute/zone") if not zone: - raise RuntimeError("Could not determine compute zone. Use --zone argument to hailctl, or use `gcloud config set compute/zone ` to set a default.") + raise RuntimeError( + "Could not determine compute zone. Use --zone argument to hailctl, or use `gcloud config set compute/zone ` to set a default." + ) wheelfile = os.path.basename(wheel) cmds = [] if wheel.startswith("gs://"): - cmds.append([ - 'compute', - 'ssh', - '{}-m'.format(args.name), - '--zone={}'.format(zone), - '--', - f'sudo gsutil cp {wheel} /tmp/ && ' - 'sudo /opt/conda/default/bin/pip uninstall -y hail && ' - f'sudo /opt/conda/default/bin/pip install --no-dependencies /tmp/{wheelfile} && ' - f"unzip /tmp/{wheelfile} && " - "requirements_file=$(mktemp) && " - "grep 'Requires-Dist: ' hail*dist-info/METADATA | sed 's/Requires-Dist: //' | sed 's/ (//' | sed 's/)//' | grep -v 'pyspark' >$requirements_file &&" - "/opt/conda/default/bin/pip install -r $requirements_file" - ]) - else: - cmds.extend([ - [ - 'compute', - 'scp', - '--zone={}'.format(zone), - wheel, - '{}-m:/tmp/'.format(args.name) - ], + cmds.append( [ 'compute', 'ssh', - f'{args.name}-m', - f'--zone={zone}', + '{}-m'.format(name), + '--zone={}'.format(zone), '--', + f'sudo gsutil cp {wheel} /tmp/ && ' 'sudo /opt/conda/default/bin/pip uninstall -y hail && ' f'sudo /opt/conda/default/bin/pip install --no-dependencies /tmp/{wheelfile} && ' f"unzip /tmp/{wheelfile} && " "requirements_file=$(mktemp) && " "grep 'Requires-Dist: ' hail*dist-info/METADATA | sed 's/Requires-Dist: //' | sed 's/ (//' | sed 's/)//' | grep -v 'pyspark' >$requirements_file &&" - "/opt/conda/default/bin/pip install -r $requirements_file" + "/opt/conda/default/bin/pip install -r $requirements_file", + ] + ) + else: + cmds.extend( + [ + ['compute', 'scp', '--zone={}'.format(zone), wheel, '{}-m:/tmp/'.format(name)], + [ + 'compute', + 'ssh', + f'{name}-m', + f'--zone={zone}', + '--', + 'sudo /opt/conda/default/bin/pip uninstall -y hail && ' + f'sudo /opt/conda/default/bin/pip install --no-dependencies /tmp/{wheelfile} && ' + f"unzip /tmp/{wheelfile} && " + "requirements_file=$(mktemp) && " + "grep 'Requires-Dist: ' hail*dist-info/METADATA | sed 's/Requires-Dist: //' | sed 's/ (//' | sed 's/)//' | grep -v 'pyspark' >$requirements_file &&" + "/opt/conda/default/bin/pip install -r $requirements_file", + ], ] - ]) + ) for cmd in cmds: print('gcloud ' + ' '.join(cmd)) - if not args.dry_run: + if not dry_run: gcloud.run(cmd) if not wheel and not modify_args and pass_through_args: diff --git a/hail/python/hailtop/hailctl/dataproc/resources/init_notebook.py b/hail/python/hailtop/hailctl/dataproc/resources/init_notebook.py index 5bc52a84ce7..a66a0ea1b8b 100644 --- a/hail/python/hailtop/hailctl/dataproc/resources/init_notebook.py +++ b/hail/python/hailtop/hailctl/dataproc/resources/init_notebook.py @@ -96,14 +96,18 @@ def mkdir_if_not_exists(path): print('setting environment') for e, value in env_to_set.items(): - safe_call('/bin/sh', '-c', - 'set -ex; echo "export {}={}" | tee -a /etc/environment /usr/lib/spark/conf/spark-env.sh'.format(e, - value)) - - hail_jar = sp.check_output([ - '/bin/sh', '-c', - 'set -ex; python3 -m pip show hail | grep Location | sed "s/Location: //"' - ]).decode('ascii').strip() + '/hail/backend/hail-all-spark.jar' + safe_call( + '/bin/sh', + '-c', + 'set -ex; echo "export {}={}" | tee -a /etc/environment /usr/lib/spark/conf/spark-env.sh'.format(e, value), + ) + + hail_jar = ( + sp.check_output(['/bin/sh', '-c', 'set -ex; python3 -m pip show hail | grep Location | sed "s/Location: //"']) + .decode('ascii') + .strip() + + '/hail/backend/hail-all-spark.jar' + ) conf_to_set = [ 'spark.executorEnv.PYTHONHASHSEED=0', @@ -129,13 +133,7 @@ def mkdir_if_not_exists(path): python3_kernel = json.load(f) except: python3_kernel = { - 'argv': [ - '/opt/conda/default/bin/python', - '-m', - 'ipykernel', - '-f', - '{connection_file}' - ], + 'argv': ['/opt/conda/default/bin/python', '-m', 'ipykernel', '-f', '{connection_file}'], 'display_name': 'Python 3', 'language': 'python', } @@ -152,10 +150,7 @@ def mkdir_if_not_exists(path): json.dump(python3_kernel, f) # some old notebooks use the "Hail" kernel, so create that too - hail_kernel = { - **python3_kernel, - 'display_name': 'Hail' - } + hail_kernel = {**python3_kernel, 'display_name': 'Hail'} mkdir_if_not_exists('/opt/conda/default/share/jupyter/kernels/hail/') with open('/opt/conda/default/share/jupyter/kernels/hail/kernel.json', 'w') as f: json.dump(hail_kernel, f) @@ -169,12 +164,14 @@ def mkdir_if_not_exists(path): 'c.NotebookApp.open_browser = False', 'c.NotebookApp.port = 8123', 'c.NotebookApp.token = ""', - 'c.NotebookApp.contents_manager_class = "jgscm.GoogleStorageContentManager"' + 'c.NotebookApp.contents_manager_class = "jgscm.GoogleStorageContentManager"', ] f.write('\n'.join(opts) + '\n') print('copying spark monitor') - spark_monitor_gs = 'gs://hail-common/sparkmonitor-c1289a19ac117336fec31ec08a2b13afe7e420cf/sparkmonitor-0.0.12-py3-none-any.whl' + spark_monitor_gs = ( + 'gs://hail-common/sparkmonitor-c1289a19ac117336fec31ec08a2b13afe7e420cf/sparkmonitor-0.0.12-py3-none-any.whl' + ) spark_monitor_wheel = '/home/hail/' + spark_monitor_gs.split('/')[-1] safe_call('gcloud', 'storage', 'cp', spark_monitor_gs, spark_monitor_wheel) safe_call('pip', 'install', spark_monitor_wheel) @@ -186,7 +183,8 @@ def mkdir_if_not_exists(path): safe_call('/opt/conda/default/bin/jupyter', 'nbextension', 'enable', '--user', '--py', 'widgetsnbextension') safe_call( """ipython profile create && echo "c.InteractiveShellApp.extensions.append('sparkmonitor.kernelextension')" >> $(ipython profile locate default)/ipython_kernel_config.py""", - shell=True) + shell=True, + ) # create systemd service file for Jupyter notebook server process with open('/lib/systemd/system/jupyter.service', 'w') as f: @@ -203,7 +201,7 @@ def mkdir_if_not_exists(path): 'Restart=always', 'RestartSec=1', '[Install]', - 'WantedBy=multi-user.target' + 'WantedBy=multi-user.target', ] f.write('\n'.join(opts) + '\n') diff --git a/hail/python/hailtop/hailctl/dataproc/start.py b/hail/python/hailtop/hailctl/dataproc/start.py index 281c8aacf06..1131e049f2f 100755 --- a/hail/python/hailtop/hailctl/dataproc/start.py +++ b/hail/python/hailtop/hailctl/dataproc/start.py @@ -1,10 +1,19 @@ import re +from enum import Enum import yaml +from typing import Optional, List + from . import gcloud from .cluster_config import ClusterConfig + +class VepVersion(str, Enum): + GRCH37 = 'GRCh37' + GRCH38 = 'GRCh38' + + DEFAULT_PROPERTIES = { "spark:spark.task.maxFailures": "20", "spark:spark.driver.extraJavaOptions": "-Xss4M", @@ -12,7 +21,7 @@ 'spark:spark.speculation': 'true', "hdfs:dfs.replication": "1", 'dataproc:dataproc.logging.stackdriver.enable': 'false', - 'dataproc:dataproc.monitoring.stackdriver.enable': 'false' + 'dataproc:dataproc.monitoring.stackdriver.enable': 'false', } # leadre (master) machine type to memory map, used for setting @@ -130,7 +139,7 @@ 'europe-west2': 'uk', 'europe-west3': 'eu', 'europe-west4': 'eu', - 'australia-southeast1': 'aus-sydney' + 'australia-southeast1': 'aus-sydney', } ANNOTATION_DB_BUCKETS = ["hail-datasets-us", "hail-datasets-eu"] @@ -138,98 +147,52 @@ IMAGE_VERSION = '2.1.2-debian11' -def init_parser(parser): - parser.add_argument('name', type=str, help='Cluster name.') - - # arguments with default parameters - parser.add_argument('--master-machine-type', '--master', '-m', default='n1-highmem-8', type=str, - help='Master machine type (default: %(default)s).') - parser.add_argument('--master-memory-fraction', default=0.8, type=float, - help='Fraction of master memory allocated to the JVM. ' - 'Use a smaller value to reserve more memory ' - 'for Python. (default: %(default)s)') - parser.add_argument('--master-boot-disk-size', default=100, type=int, - help='Disk size of master machine, in GB (default: %(default)s).') - parser.add_argument('--num-master-local-ssds', default=0, type=int, - help='Number of local SSDs to attach to the master machine (default: %(default)s).') - parser.add_argument('--num-secondary-workers', '--num-preemptible-workers', '--n-pre-workers', '-p', default=0, type=int, - help='Number of secondary (preemptible) worker machines (default: %(default)s).') - parser.add_argument('--num-worker-local-ssds', default=0, type=int, - help='Number of local SSDs to attach to each worker machine (default: %(default)s).') - parser.add_argument('--num-workers', '--n-workers', '-w', default=2, type=int, - help='Number of worker machines (default: %(default)s).') - parser.add_argument('--secondary-worker-boot-disk-size', '--preemptible-worker-boot-disk-size', default=40, type=int, - help='Disk size of secondary (preemptible) worker machines, in GB (default: %(default)s).') - parser.add_argument('--worker-boot-disk-size', default=40, type=int, - help='Disk size of worker machines, in GB (default: %(default)s).') - parser.add_argument('--worker-machine-type', '--worker', - help='Worker machine type (default: n1-standard-8, or n1-highmem-8 with --vep).') - parser.add_argument('--region', - help='Compute region for the cluster.') - parser.add_argument('--zone', - help='Compute zone for the cluster.') - parser.add_argument('--properties', - help='Additional configuration properties for the cluster') - parser.add_argument('--metadata', - help='Comma-separated list of metadata to add: KEY1=VALUE1,KEY2=VALUE2...') - parser.add_argument('--packages', '--pkgs', - help='Comma-separated list of Python packages to be installed on the master node.') - parser.add_argument('--project', help='Google Cloud project to start cluster (defaults to currently set project).') - parser.add_argument('--configuration', - help='Google Cloud configuration to start cluster (defaults to currently set configuration).') - parser.add_argument('--max-idle', type=str, help='If specified, maximum idle time before shutdown (e.g. 60m).') - max_age_group = parser.add_mutually_exclusive_group() - max_age_group.add_argument('--expiration-time', type=str, help='If specified, time at which cluster is shutdown (e.g. 2020-01-01T00:00:00Z).') - max_age_group.add_argument('--max-age', type=str, help='If specified, maximum age before shutdown (e.g. 60m).') - parser.add_argument('--bucket', type=str, - help='The Google Cloud Storage bucket to use for cluster staging (just the bucket name, no gs:// prefix).') - parser.add_argument('--temp-bucket', type=str, - help='The Google Cloud Storage bucket to use for cluster temporary storage (just the bucket name, no gs:// prefix).') - parser.add_argument('--network', type=str, help='the network for all nodes in this cluster') - parser.add_argument('--subnet', type=str, help='the subnetwork for all nodes in this cluster') - parser.add_argument('--service-account', type=str, help='The Google Service Account to use for cluster creation (default to the Compute Engine service account).') - parser.add_argument('--master-tags', type=str, help='comma-separated list of instance tags to apply to the mastern node') - parser.add_argument('--scopes', help='Specifies access scopes for the node instances') - - parser.add_argument('--wheel', help='Non-default Hail installation. Warning: experimental.') - - # initialization action flags - parser.add_argument('--init', default='', help='Comma-separated list of init scripts to run.') - parser.add_argument('--init_timeout', default='20m', - help='Flag to specify a timeout period for the initialization action') - parser.add_argument('--vep', - help='Install VEP for the specified reference genome.', - required=False, - choices=['GRCh37', 'GRCh38']) - parser.add_argument('--dry-run', action='store_true', help="Print gcloud dataproc command, but don't run it.") - parser.add_argument('--no-off-heap-memory', action='store_true', - help="If true, don't partition JVM memory between hail heap and JVM heap") - parser.add_argument('--big-executors', action='store_true', - help="If true, double memory allocated per executor, using half the cores of the cluster with an extra large memory allotment per core.") - parser.add_argument('--off-heap-memory-fraction', type=float, default=0.6, - help="Minimum fraction of worker memory dedicated to off-heap Hail values.") - parser.add_argument('--off-heap-memory-hard-limit', action='store_true', - help="If true, limit off-heap allocations to the dedicated fraction") - parser.add_argument('--yarn-memory-fraction', type=float, - help="Fraction of machine memory to allocate to the yarn container scheduler.", - default=0.95) - - # requester pays - parser.add_argument('--requester-pays-allow-all', - help="Allow reading from all requester-pays buckets.", - action='store_true', - required=False) - parser.add_argument('--requester-pays-allow-buckets', - help="Comma-separated list of requester-pays buckets to allow reading from.") - parser.add_argument('--requester-pays-allow-annotation-db', - action='store_true', - help="Allows reading from any of the requester-pays buckets that hold data for the annotation database.") - parser.add_argument('--debug-mode', - action='store_true', - help="Enable debug features on created cluster (heap dump on out-of-memory error)") - - -async def main(args, pass_through_args): +def start( + name: str, + pass_through_args: List[str], + master_machine_type: str, + master_memory_fraction: float, + master_boot_disk_size: int, + num_master_local_ssds: int, + num_secondary_workers: int, + num_worker_local_ssds: int, + num_workers: int, + secondary_worker_boot_disk_size: int, + worker_boot_disk_size: int, + worker_machine_type: Optional[str], + region: Optional[str], + zone: Optional[str], + properties: Optional[str], + metadata: Optional[str], + packages: Optional[str], + project: Optional[str], + configuration: Optional[str], + max_idle: Optional[str], + expiration_time: Optional[str], + max_age: Optional[str], + bucket: Optional[str], + temp_bucket: Optional[str], + network: Optional[str], + subnet: Optional[str], + service_account: Optional[str], + master_tags: Optional[str], + scopes: Optional[str], + wheel: Optional[str], + init: str, + init_timeout: str, + vep: Optional[VepVersion], + dry_run: bool, + no_off_heap_memory: bool, + big_executors: bool, # pylint: disable=unused-argument + off_heap_memory_fraction: float, + off_heap_memory_hard_limit: bool, + yarn_memory_fraction: float, + requester_pays_allow_all: bool, + requester_pays_allow_buckets: Optional[str], + requester_pays_allow_annotation_db: bool, + debug_mode: bool, + beta: bool, +): import pkg_resources # pylint: disable=import-outside-toplevel conf = ClusterConfig() @@ -237,173 +200,195 @@ async def main(args, pass_through_args): if not pkg_resources.resource_exists('hailtop.hailctl', "deploy.yaml"): raise RuntimeError("package has no 'deploy.yaml' file") - deploy_metadata = yaml.safe_load( - pkg_resources.resource_stream('hailtop.hailctl', "deploy.yaml"))['dataproc'] + deploy_metadata = yaml.safe_load(pkg_resources.resource_stream('hailtop.hailctl', "deploy.yaml"))['dataproc'] conf.extend_flag('properties', DEFAULT_PROPERTIES) - if args.properties: - conf.parse_and_extend('properties', args.properties) - - if args.debug_mode: - conf.extend_flag('properties', { - "spark:spark.driver.extraJavaOptions": "-Xss4M -XX:+HeapDumpOnOutOfMemoryError -XX:-OmitStackTraceInFastThrow", - "spark:spark.executor.extraJavaOptions": "-Xss4M -XX:+HeapDumpOnOutOfMemoryError -XX:-OmitStackTraceInFastThrow", - }) + if properties: + conf.parse_and_extend('properties', properties) + + if debug_mode: + conf.extend_flag( + 'properties', + { + "spark:spark.driver.extraJavaOptions": "-Xss4M -XX:+HeapDumpOnOutOfMemoryError -XX:-OmitStackTraceInFastThrow", + "spark:spark.executor.extraJavaOptions": "-Xss4M -XX:+HeapDumpOnOutOfMemoryError -XX:-OmitStackTraceInFastThrow", + }, + ) # default to highmem machines if using VEP - if not args.worker_machine_type: - args.worker_machine_type = 'n1-highmem-8' if args.vep else 'n1-standard-8' + if not worker_machine_type: + worker_machine_type = 'n1-highmem-8' if vep else 'n1-standard-8' # default initialization script to start up cluster with - conf.extend_flag('initialization-actions', - [deploy_metadata['init_notebook.py']]) + conf.extend_flag('initialization-actions', [deploy_metadata['init_notebook.py']]) # requester pays support - if args.requester_pays_allow_all or args.requester_pays_allow_buckets or args.requester_pays_allow_annotation_db: - if args.requester_pays_allow_all and args.requester_pays_allow_buckets: + if requester_pays_allow_all or requester_pays_allow_buckets or requester_pays_allow_annotation_db: + if requester_pays_allow_all and requester_pays_allow_buckets: raise RuntimeError("Cannot specify both 'requester_pays_allow_all' and 'requester_pays_allow_buckets") - if args.requester_pays_allow_all: + if requester_pays_allow_all: requester_pays_mode = "AUTO" else: requester_pays_mode = "CUSTOM" requester_pays_bucket_sources = [] - if args.requester_pays_allow_buckets: - requester_pays_bucket_sources.append(args.requester_pays_allow_buckets) - if args.requester_pays_allow_annotation_db: + if requester_pays_allow_buckets: + requester_pays_bucket_sources.append(requester_pays_allow_buckets) + if requester_pays_allow_annotation_db: requester_pays_bucket_sources.extend(ANNOTATION_DB_BUCKETS) - conf.extend_flag("properties", {"spark:spark.hadoop.fs.gs.requester.pays.buckets": ",".join(requester_pays_bucket_sources)}) + conf.extend_flag( + "properties", + {"spark:spark.hadoop.fs.gs.requester.pays.buckets": ",".join(requester_pays_bucket_sources)}, + ) # Need to pick requester pays project. - requester_pays_project = args.project if args.project else gcloud.get_config("project") + requester_pays_project = project if project else gcloud.get_config("project") - conf.extend_flag("properties", {"spark:spark.hadoop.fs.gs.requester.pays.mode": requester_pays_mode, - "spark:spark.hadoop.fs.gs.requester.pays.project.id": requester_pays_project}) + conf.extend_flag( + "properties", + { + "spark:spark.hadoop.fs.gs.requester.pays.mode": requester_pays_mode, + "spark:spark.hadoop.fs.gs.requester.pays.project.id": requester_pays_project, + }, + ) # gcloud version 277 and onwards requires you to specify a region. Let's just require it for all hailctl users for consistency. - if args.region: - project_region = args.region + if region: + project_region = region else: - project_region = gcloud.get_config("dataproc/region") - - if not project_region: - raise RuntimeError("Could not determine dataproc region. Use --region argument to hailctl, or use `gcloud config set dataproc/region ` to set a default.") + maybe_project_region = gcloud.get_config("dataproc/region") + if not maybe_project_region: + raise RuntimeError( + "Could not determine dataproc region. Use --region argument to hailctl, or use `gcloud config set dataproc/region ` to set a default." + ) + project_region = maybe_project_region # add VEP init script - if args.vep: + if vep: # VEP is too expensive if you have to pay egress charges. We must choose the right replicate. replicate = REGION_TO_REPLICATE_MAPPING.get(project_region) if replicate is None: - raise RuntimeError(f"The --vep argument is not currently provided in your region.\n" - f" Please contact the Hail team on https://discuss.hail.is for support.\n" - f" Your region: {project_region}\n" - f" Supported regions: {', '.join(REGION_TO_REPLICATE_MAPPING.keys())}") + raise RuntimeError( + f"The --vep argument is not currently provided in your region.\n" + f" Please contact the Hail team on https://discuss.hail.is for support.\n" + f" Your region: {project_region}\n" + f" Supported regions: {', '.join(REGION_TO_REPLICATE_MAPPING.keys())}" + ) print(f"Pulling VEP data from bucket in {replicate}.") conf.extend_flag('metadata', {"VEP_REPLICATE": replicate}) vep_config_path = "/vep_data/vep-gcloud.json" - conf.extend_flag('metadata', {"VEP_CONFIG_PATH": vep_config_path, "VEP_CONFIG_URI": f"file://{vep_config_path}"}) - conf.extend_flag('initialization-actions', [deploy_metadata[f'vep-{args.vep}.sh']]) + conf.extend_flag( + 'metadata', {"VEP_CONFIG_PATH": vep_config_path, "VEP_CONFIG_URI": f"file://{vep_config_path}"} + ) + conf.extend_flag('initialization-actions', [deploy_metadata[f'vep-{vep.value}.sh']]) # add custom init scripts - if args.init: - conf.extend_flag('initialization-actions', args.init.split(',')) + if init: + conf.extend_flag('initialization-actions', init.split(',')) - if args.metadata: - conf.parse_and_extend('metadata', args.metadata) + if metadata: + conf.parse_and_extend('metadata', metadata) - wheel = args.wheel or deploy_metadata['wheel'] + wheel = wheel or deploy_metadata['wheel'] conf.extend_flag('metadata', {'WHEEL': wheel}) # if Python packages requested, add metadata variable - packages = deploy_metadata['pip_dependencies'].strip('|').split('|||') + hail_packages = deploy_metadata['pip_dependencies'].strip('|').split('|||') metadata_pkgs = conf.flags['metadata'].get('PKGS') split_regex = r'[|,]' if metadata_pkgs: - packages.extend(re.split(split_regex, metadata_pkgs)) - if args.packages: - packages.extend(re.split(split_regex, args.packages)) - conf.extend_flag('metadata', {'PKGS': '|'.join(set(packages))}) + hail_packages.extend(re.split(split_regex, metadata_pkgs)) + if packages: + hail_packages.extend(re.split(split_regex, packages)) + conf.extend_flag('metadata', {'PKGS': '|'.join(set(hail_packages))}) def disk_size(size): - if args.vep: + if vep: size = max(size, 200) return str(size) + 'GB' - conf.extend_flag('properties', - {"spark:spark.driver.memory": "{driver_memory}g".format( - driver_memory=str(int(MACHINE_MEM[args.master_machine_type] * args.master_memory_fraction)))}) - conf.flags['master-machine-type'] = args.master_machine_type - conf.flags['master-boot-disk-size'] = '{}GB'.format(args.master_boot_disk_size) - conf.flags['num-master-local-ssds'] = args.num_master_local_ssds - conf.flags['num-secondary-workers'] = args.num_secondary_workers - conf.flags['num-worker-local-ssds'] = args.num_worker_local_ssds - conf.flags['num-workers'] = args.num_workers - conf.flags['secondary-worker-boot-disk-size'] = disk_size(args.secondary_worker_boot_disk_size) - conf.flags['worker-boot-disk-size'] = disk_size(args.worker_boot_disk_size) - conf.flags['worker-machine-type'] = args.worker_machine_type - - if not args.no_off_heap_memory: - worker_memory = MACHINE_MEM[args.worker_machine_type] + conf.extend_flag( + 'properties', + { + "spark:spark.driver.memory": "{driver_memory}g".format( + driver_memory=str(int(MACHINE_MEM[master_machine_type] * master_memory_fraction)) + ) + }, + ) + conf.flags['master-machine-type'] = master_machine_type + conf.flags['master-boot-disk-size'] = '{}GB'.format(master_boot_disk_size) + conf.flags['num-master-local-ssds'] = num_master_local_ssds + conf.flags['num-secondary-workers'] = num_secondary_workers + conf.flags['num-worker-local-ssds'] = num_worker_local_ssds + conf.flags['num-workers'] = num_workers + conf.flags['secondary-worker-boot-disk-size'] = disk_size(secondary_worker_boot_disk_size) + conf.flags['worker-boot-disk-size'] = disk_size(worker_boot_disk_size) + conf.flags['worker-machine-type'] = worker_machine_type + + if not no_off_heap_memory: + worker_memory = MACHINE_MEM[worker_machine_type] # A Google support engineer recommended the strategy of passing the YARN # config params, and the default value of 95% of machine memory to give to YARN. # yarn.nodemanager.resource.memory-mb - total memory per machine # yarn.scheduler.maximum-allocation-mb - max memory to allocate to each container - available_memory_fraction = args.yarn_memory_fraction + available_memory_fraction = yarn_memory_fraction available_memory_mb = int(worker_memory * available_memory_fraction * 1024) - cores_per_machine = int(args.worker_machine_type.split('-')[-1]) + cores_per_machine = int(worker_machine_type.split('-')[-1]) executor_cores = min(cores_per_machine, 4) available_memory_per_core_mb = available_memory_mb // cores_per_machine memory_per_executor_mb = int(available_memory_per_core_mb * executor_cores) - off_heap_mb = int(memory_per_executor_mb * args.off_heap_memory_fraction) + off_heap_mb = int(memory_per_executor_mb * off_heap_memory_fraction) on_heap_mb = memory_per_executor_mb - off_heap_mb - if args.off_heap_memory_hard_limit: + if off_heap_memory_hard_limit: off_heap_memory_per_core = off_heap_mb // executor_cores else: off_heap_memory_per_core = available_memory_per_core_mb - print(f"hailctl dataproc: Creating a cluster with workers of machine type {args.worker_machine_type}.\n" - f" Allocating {memory_per_executor_mb} MB of memory per executor ({executor_cores} cores),\n" - f" with at least {off_heap_mb} MB for Hail off-heap values and {on_heap_mb} MB for the JVM." - f" Using a maximum Hail memory reservation of {off_heap_memory_per_core} MB per core.") - - conf.extend_flag('properties', - { - 'yarn:yarn.nodemanager.resource.memory-mb': f'{available_memory_mb}', - 'yarn:yarn.scheduler.maximum-allocation-mb': f'{executor_cores * available_memory_per_core_mb}', - 'spark:spark.executor.cores': f'{executor_cores}', - 'spark:spark.executor.memory': f'{on_heap_mb}m', - 'spark:spark.executor.memoryOverhead': f'{off_heap_mb}m', - 'spark:spark.memory.storageFraction': '0.2', - 'spark:spark.executorEnv.HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': str( - off_heap_memory_per_core), - } - ) - - if args.region: - conf.flags['region'] = args.region - if args.zone: - conf.flags['zone'] = args.zone - conf.flags['initialization-action-timeout'] = args.init_timeout - if args.network and args.subnet: + print( + f"hailctl dataproc: Creating a cluster with workers of machine type {worker_machine_type}.\n" + f" Allocating {memory_per_executor_mb} MB of memory per executor ({executor_cores} cores),\n" + f" with at least {off_heap_mb} MB for Hail off-heap values and {on_heap_mb} MB for the JVM." + f" Using a maximum Hail memory reservation of {off_heap_memory_per_core} MB per core." + ) + + conf.extend_flag( + 'properties', + { + 'yarn:yarn.nodemanager.resource.memory-mb': f'{available_memory_mb}', + 'yarn:yarn.scheduler.maximum-allocation-mb': f'{executor_cores * available_memory_per_core_mb}', + 'spark:spark.executor.cores': f'{executor_cores}', + 'spark:spark.executor.memory': f'{on_heap_mb}m', + 'spark:spark.executor.memoryOverhead': f'{off_heap_mb}m', + 'spark:spark.memory.storageFraction': '0.2', + 'spark:spark.executorEnv.HAIL_WORKER_OFF_HEAP_MEMORY_PER_CORE_MB': str(off_heap_memory_per_core), + }, + ) + + if region: + conf.flags['region'] = region + if zone: + conf.flags['zone'] = zone + conf.flags['initialization-action-timeout'] = init_timeout + if network and subnet: raise RuntimeError("Cannot define both 'network' and 'subnet' at the same time.") - if args.network: - conf.flags['network'] = args.network - if args.subnet: - conf.flags['subnet'] = args.subnet - if args.configuration: - conf.flags['configuration'] = args.configuration - if args.project: - conf.flags['project'] = args.project - if args.bucket: - conf.flags['bucket'] = args.bucket - if args.temp_bucket: - conf.flags['temp-bucket'] = args.temp_bucket - if args.scopes: - conf.flags['scopes'] = args.scopes + if network: + conf.flags['network'] = network + if subnet: + conf.flags['subnet'] = subnet + if configuration: + conf.flags['configuration'] = configuration + if project: + conf.flags['project'] = project + if bucket: + conf.flags['bucket'] = bucket + if temp_bucket: + conf.flags['temp-bucket'] = temp_bucket + if scopes: + conf.flags['scopes'] = scopes account = gcloud.get_config("account") if account: @@ -414,18 +399,18 @@ def disk_size(size): conf.flags['properties'] = '^|||^' + '|||'.join(f'{k}={v}' for k, v in conf.flags['properties'].items()) # command to start cluster - cmd = conf.get_command(args.name) + cmd = conf.get_command(name) - if args.beta: + if beta: cmd.insert(1, 'beta') - if args.max_idle: - cmd.append('--max-idle={}'.format(args.max_idle)) - if args.max_age: - cmd.append('--max-age={}'.format(args.max_age)) - if args.expiration_time: - cmd.append('--expiration_time={}'.format(args.expiration_time)) - if args.service_account: - cmd.append('--service-account={}'.format(args.service_account)) + if max_idle: + cmd.append('--max-idle={}'.format(max_idle)) + if max_age: + cmd.append('--max-age={}'.format(max_age)) + if expiration_time: + cmd.append('--expiration_time={}'.format(expiration_time)) + if service_account: + cmd.append('--service-account={}'.format(service_account)) cmd.extend(pass_through_args) @@ -433,18 +418,18 @@ def disk_size(size): print(' '.join(cmd[:5]) + ' \\\n ' + ' \\\n '.join(cmd[5:])) # spin up cluster - if not args.dry_run: - print("Starting cluster '{}'...".format(args.name)) + if not dry_run: + print("Starting cluster '{}'...".format(name)) gcloud.run(cmd[1:]) - if args.master_tags: - add_tags_command = ['compute', 'instances', 'add-tags', args.name + '-m', '--tags', args.master_tags] + if master_tags: + add_tags_command = ['compute', 'instances', 'add-tags', name + '-m', '--tags', master_tags] - if args.project: - add_tags_command.append(f"--project={args.project}") - if args.zone: - add_tags_command.append(f"--zone={args.zone}") + if project: + add_tags_command.append(f"--project={project}") + if zone: + add_tags_command.append(f"--zone={zone}") print('gcloud ' + ' '.join(add_tags_command)) - if not args.dry_run: + if not dry_run: gcloud.run(add_tags_command) diff --git a/hail/python/hailtop/hailctl/dataproc/stop.py b/hail/python/hailtop/hailctl/dataproc/stop.py deleted file mode 100644 index 3b58bb405f9..00000000000 --- a/hail/python/hailtop/hailctl/dataproc/stop.py +++ /dev/null @@ -1,33 +0,0 @@ -from . import gcloud - - -def init_parser(parser): - parser.add_argument('name', type=str, help='Cluster name.') - parser.add_argument('--async', action='store_true', dest='asink', - help="Do not wait for cluster deletion.") - parser.add_argument('--region', help='Region.', required=True) - parser.add_argument('--dry-run', action='store_true', - help="Print gcloud dataproc command, but don't run it.") - - -async def main(args, pass_through_args): - print("Stopping cluster '{}'...".format(args.name)) - - cmd = [ - 'dataproc', - 'clusters', - 'delete', - '--region={}'.format(args.region), - '--quiet', - args.name - ] - if args.asink: - cmd.append('--async') - - cmd.extend(pass_through_args) - - # print underlying gcloud command - print('gcloud ' + ' '.join(cmd[:5]) + ' \\\n ' + ' \\\n '.join(cmd[6:])) - - if not args.dry_run: - gcloud.run(cmd) diff --git a/hail/python/hailtop/hailctl/dataproc/submit.py b/hail/python/hailtop/hailctl/dataproc/submit.py index 0ce351cd24c..7e8ace9d80a 100644 --- a/hail/python/hailtop/hailctl/dataproc/submit.py +++ b/hail/python/hailtop/hailctl/dataproc/submit.py @@ -2,63 +2,49 @@ import tempfile import zipfile -from . import gcloud - - -def init_parser(parser): - parser.add_argument('name', type=str, help='Cluster name.') - parser.add_argument('script', type=str, help="Path to script.") - parser.add_argument('--files', required=False, type=str, help='Comma-separated list of files to add to the working directory of the Hail application.') - parser.add_argument('--pyfiles', required=False, type=str, help='Comma-separated list of files (or directories with python files) to add to the PYTHONPATH.') - parser.add_argument('--properties', '-p', required=False, type=str, help='Extra Spark properties to set.') - parser.add_argument('--gcloud_configuration', help='Google Cloud configuration to submit job (defaults to currently set configuration).') - parser.add_argument('--dry-run', action='store_true', help="Print gcloud dataproc command, but don't run it.") - parser.add_argument('--region', help='Compute region for the cluster.') +from typing import Optional, List +from . import gcloud -async def main(args, pass_through_args): # pylint: disable=unused-argument - print("Submitting to cluster '{}'...".format(args.name)) - # create files argument - files = '' - if args.files: - files = args.files +def submit( + name: str, + script: str, + files: str, + pyfiles: str, + properties: Optional[str], + gcloud_configuration: Optional[str], + dry_run: bool, + region: Optional[str], + pass_through_args: List[str], +): + print("Submitting to cluster '{}'...".format(name)) def _filter_pyfile(fname: str) -> bool: return not fname.endswith('.pyc') # If you only provide one (comma-sep) argument, and it's a zip file, use that file directly - if args.pyfiles and args.pyfiles.endswith('.zip') and ',' not in args.pyfiles: - # Adding the zip archive directly - pyfiles = args.pyfiles - else: - pyfiles = [] - if args.pyfiles: - pyfiles.extend(args.pyfiles.split(',')) - pyfiles.extend(os.environ.get('HAIL_SCRIPTS', '').split(':')) + if not (pyfiles and pyfiles.endswith('.zip') and ',' not in pyfiles): + pyfiles_list = [] if pyfiles: + pyfiles_list.extend(pyfiles.split(',')) + pyfiles_list.extend(os.environ.get('HAIL_SCRIPTS', '').split(':')) + if pyfiles_list: tfile = tempfile.mkstemp(suffix='.zip', prefix='pyscripts_')[1] - zipf = zipfile.ZipFile(tfile, 'w', zipfile.ZIP_DEFLATED) - for hail_script_entry in pyfiles: - if os.path.isfile(hail_script_entry) and _filter_pyfile(hail_script_entry): - zipf.write(hail_script_entry, arcname=os.path.basename(hail_script_entry)) - else: - for root, _, pyfiles_walk in os.walk(hail_script_entry): - for pyfile in pyfiles_walk: - path = os.path.join(root, pyfile) - if os.path.isfile(path) and _filter_pyfile(path): - zipf.write(path, - os.path.relpath(path, - os.path.join(hail_script_entry, '..'))) - zipf.close() + with zipfile.ZipFile(tfile, 'w', zipfile.ZIP_DEFLATED) as zipf: + for hail_script_entry in pyfiles_list: + if os.path.isfile(hail_script_entry) and _filter_pyfile(hail_script_entry): + zipf.write(hail_script_entry, arcname=os.path.basename(hail_script_entry)) + else: + for root, _, pyfiles_walk in os.walk(hail_script_entry): + for pyfile in pyfiles_walk: + path = os.path.join(root, pyfile) + if os.path.isfile(path) and _filter_pyfile(path): + zipf.write( + os.path.join(root, pyfile), + os.path.relpath(os.path.join(root, pyfile), os.path.join(hail_script_entry, '..')), + ) pyfiles = tfile - else: - pyfiles = '' - - # create properties argument - properties = '' - if args.properties: - properties = args.properties # pyspark submit command cmd = [ @@ -66,17 +52,17 @@ def _filter_pyfile(fname: str) -> bool: 'jobs', 'submit', 'pyspark', - args.script, - '--cluster={}'.format(args.name), + script, + '--cluster={}'.format(name), '--files={}'.format(files), '--py-files={}'.format(pyfiles), - '--properties={}'.format(properties) + '--properties={}'.format(properties or ''), ] - if args.gcloud_configuration: - cmd.append('--configuration={}'.format(args.gcloud_configuration)) + if gcloud_configuration: + cmd.append('--configuration={}'.format(gcloud_configuration)) - if args.region: - cmd.append('--region={}'.format(args.region)) + if region: + cmd.append('--region={}'.format(region)) # append arguments to pass to the Hail script if pass_through_args: @@ -88,5 +74,5 @@ def _filter_pyfile(fname: str) -> bool: print('gcloud ' + ' '.join(cmd[:5]) + ' \\\n ' + ' \\\n '.join(cmd[6:])) # submit job - if not args.dry_run: + if not dry_run: gcloud.run(cmd) diff --git a/hail/python/hailtop/hailctl/describe.py b/hail/python/hailtop/hailctl/describe.py index 0243e522922..5f1c963c2da 100644 --- a/hail/python/hailtop/hailctl/describe.py +++ b/hail/python/hailtop/hailctl/describe.py @@ -1,17 +1,13 @@ -import argparse import asyncio import orjson -import sys -from typing import List +from typing import List, Optional +from typing_extensions import Annotated as Ann from os import path from zlib import decompress, MAX_WBITS from statistics import median, mean, stdev from collections import OrderedDict - -from ..aiotools import aio_contextlib -from ..aiotools.router_fs import RouterAsyncFS - +from typer import Option as Opt SECTION_SEPARATOR = '-' * 40 IDENT = ' ' * 4 @@ -27,25 +23,25 @@ def parse_type(s: str, end_delimiter: str, element_type: str): if s[:i]: values.append(s[:i]) if element_type in ['Array', 'Set', 'Dict', 'Tuple', 'Interval']: - return {'type': element_type, 'value': values}, s[i + 1:] - return {'type': element_type, 'value': OrderedDict(zip(keys, values))}, s[i + 1:] + return {'type': element_type, 'value': values}, s[i + 1 :] + return {'type': element_type, 'value': OrderedDict(zip(keys, values))}, s[i + 1 :] if s[i] == ':': keys.append(s[:i]) - s = s[i + 1:] + s = s[i + 1 :] i = 0 elif s[i] == '{': - struct, s = parse_type(s[i + 1:], '}', s[:i]) + struct, s = parse_type(s[i + 1 :], '}', s[:i]) values.append(struct) i = 0 elif s[i] == '[': - arr, s = parse_type(s[i + 1:], ']', s[:i] if s[:i] else 'Array') + arr, s = parse_type(s[i + 1 :], ']', s[:i] if s[:i] else 'Array') values.append(arr) i = 0 elif s[i] == ',': if s[:i]: values.append(s[:i]) - s = s[i + 1:] + s = s[i + 1 :] i = 0 else: i += 1 @@ -53,32 +49,20 @@ def parse_type(s: str, end_delimiter: str, element_type: str): raise ValueError(f'End of {element_type} not found') start_schema_index = s.index('{') - return parse_type(s[start_schema_index + 1:], "}", s[:start_schema_index])[0] + return parse_type(s[start_schema_index + 1 :], "}", s[:start_schema_index])[0] def type_str(t, depth=1): - name_map = { - 'Boolean': 'bool', - 'String': 'str' - } + name_map = {'Boolean': 'bool', 'String': 'str'} def element_str(e): if isinstance(e, dict): if e['type'] == 'Struct': - return "struct {{\n{}\n{}}}".format( - type_str(e['value'], depth + 1), - (IDENT * depth) - ) - return "{}<{}>".format( - e['type'].lower(), - ", ".join([element_str(x) for x in e['value']]) - ) + return "struct {{\n{}\n{}}}".format(type_str(e['value'], depth + 1), (IDENT * depth)) + return "{}<{}>".format(e['type'].lower(), ", ".join([element_str(x) for x in e['value']])) return name_map.get(e, e).lower().replace('(', '<').replace(')', '>') - return "\n".join( - "{}'{}': {}".format(IDENT * depth, k, element_str(v)) - for k, v in t.items() - ) + return "\n".join("{}'{}': {}".format(IDENT * depth, k, element_str(v)) for k, v in t.items()) def key_str(k): @@ -92,49 +76,48 @@ def get_partitions_info_str(j): partitions_info = { 'Partitions': len(partitions), 'Rows': sum(partitions), - 'Empty partitions': len([p for p in partitions if p == 0]) + 'Empty partitions': len([p for p in partitions if p == 0]), } if partitions_info['Partitions'] > 1: - partitions_info.update({ - 'Min(rows/partition)': min(partitions), - 'Max(rows/partition)': max(partitions), - 'Median(rows/partition)': median(partitions), - 'Mean(rows/partition)': int(mean(partitions)), - 'StdDev(rows/partition)': int(stdev(partitions)) - }) + partitions_info.update( + { + 'Min(rows/partition)': min(partitions), + 'Max(rows/partition)': max(partitions), + 'Median(rows/partition)': median(partitions), + 'Mean(rows/partition)': int(mean(partitions)), + 'StdDev(rows/partition)': int(stdev(partitions)), + } + ) return "\n{}".format(IDENT).join(['{}: {}'.format(k, v) for k, v in partitions_info.items()]) -def init_parser(parser): - # arguments with default parameters - parser.add_argument('file', type=str, help='Path to hail file (either MatrixTable or Table).') - parser.add_argument('--requester-pays-project-id', '-u', help='Project to be billed for GCS requests.') - +def describe( + file: str, + requester_pays_project_id: Ann[ + Optional[str], + Opt('--requester-pays-project-id', '-u', help='Project to be billed for GCS requests.'), + ] = None, +): + ''' + Describe the MatrixTable or Table at path FILE. + ''' + asyncio.get_event_loop().run_until_complete(async_describe(file, requester_pays_project_id)) -def main(args): - describe_parser = argparse.ArgumentParser( - prog='hailctl describe', - description='Describe Hail Matrix Table and Table files.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - init_parser(describe_parser) - if not args: - describe_parser.print_help() - sys.exit(0) - asyncio.get_event_loop().run_until_complete( - main_after_parsing( - *describe_parser.parse_known_args(args=args))) +async def async_describe( + file: str, + requester_pays_project_id: Optional[str], +): + from ..aiotools import aio_contextlib # pylint: disable=import-outside-toplevel + from ..aiotools.router_fs import RouterAsyncFS # pylint: disable=import-outside-toplevel -async def main_after_parsing(args, pass_through_args): # pylint: disable=unused-argument gcs_kwargs = {} - if args.requester_pays_project_id: - gcs_kwargs['project'] = args.requester_pays_project_id + if requester_pays_project_id: + gcs_kwargs['project'] = requester_pays_project_id - async with aio_contextlib.closing( - RouterAsyncFS(gcs_kwargs=gcs_kwargs)) as fs: - j = orjson.loads(decompress(await fs.read(path.join(args.file, 'metadata.json.gz')), - 16 + MAX_WBITS)) + async with aio_contextlib.closing(RouterAsyncFS(gcs_kwargs=gcs_kwargs)) as fs: + j = orjson.loads(decompress(await fs.read(path.join(file, 'metadata.json.gz')), 16 + MAX_WBITS)) # Get the file schema file_schema = parse_schema(j[[k for k in j.keys() if k.endswith('type')][0]]) @@ -176,6 +159,7 @@ async def main_after_parsing(args, pass_through_args): # pylint: disable=unused print(SECTION_SEPARATOR) # Check for _SUCCESS - if not await fs.exists(path.join(args.file, '_SUCCESS')): + if not await fs.exists(path.join(file, '_SUCCESS')): print( - "\033[;1m\033[1;31mCould not find _SUCCESS for file: {}\nThis file will not work.\033[0m".format(args.file)) + "\033[;1m\033[1;31mCould not find _SUCCESS for file: {}\nThis file will not work.\033[0m".format(file) + ) diff --git a/hail/python/hailtop/hailctl/dev/ci_client.py b/hail/python/hailtop/hailctl/dev/ci_client.py new file mode 100644 index 00000000000..39ea8dcf7b4 --- /dev/null +++ b/hail/python/hailtop/hailctl/dev/ci_client.py @@ -0,0 +1,50 @@ +import aiohttp +import sys + +from typing import Optional + +from hailtop import httpx +from hailtop.config import get_deploy_config +from hailtop.auth import hail_credentials +from hailtop.httpx import client_session + + +class CIClient: + def __init__(self, deploy_config=None): + if not deploy_config: + deploy_config = get_deploy_config() + self._deploy_config = deploy_config + self._session: Optional[httpx.ClientSession] = None + + async def __aenter__(self): + headers = await hail_credentials().auth_headers() + self._session = client_session( + raise_for_status=False, timeout=aiohttp.ClientTimeout(total=60), headers=headers + ) # type: ignore + return self + + async def __aexit__(self, exc_type, exc, tb): + await self.close() + + async def close(self): + if self._session: + await self._session.close() + self._session = None + + async def dev_deploy_branch(self, branch, steps, excluded_steps, extra_config): + data = { + 'branch': branch, + 'steps': steps, + 'excluded_steps': excluded_steps, + 'extra_config': extra_config, + } + assert self._session + async with self._session.post( + self._deploy_config.url('ci', '/api/v1alpha/dev_deploy_branch'), json=data + ) as resp: + if resp.status >= 400: + print(f'HTTP Response code was {resp.status}') + print(await resp.text()) + sys.exit(1) + resp_data = await resp.json() + return resp_data['batch_id'] diff --git a/hail/python/hailtop/hailctl/dev/cli.py b/hail/python/hailtop/hailctl/dev/cli.py index bfb4181944a..23b6ed98879 100644 --- a/hail/python/hailtop/hailctl/dev/cli.py +++ b/hail/python/hailtop/hailctl/dev/cli.py @@ -1,48 +1,63 @@ -import argparse +import asyncio +import typer +import webbrowser + +from typing import List, Optional +from typing_extensions import Annotated as Ann +from typer import Option as Opt + from . import config -from . import deploy - - -def parser(): - main_parser = argparse.ArgumentParser( - prog='hailctl', - description='Manage Hail development utilities.') - # we have to set dest becuase of a rendering bug in argparse - # https://bugs.python.org/issue29298 - main_subparsers = main_parser.add_subparsers(title='hailctl subcommand', dest='hailctl subcommand', required=True) - - dev_parser = main_subparsers.add_parser( - 'dev', - help='Developer tools.', - description='Developer tools.') - subparsers = dev_parser.add_subparsers(title='hailctl dev subcommand', dest='hailctl dev subcommand', required=True) - - config_parser = subparsers.add_parser( - 'config', - help='Configure deployment', - description='Configure deployment') - - config.cli.init_parser(config_parser) - - deploy_parser = subparsers.add_parser( - 'deploy', - help='Deploy a branch', - description='Deploy a branch') - deploy_parser.set_defaults(module='deploy') - deploy.cli.init_parser(deploy_parser) - - return main_parser - - -def main(args): - p = parser() - args = p.parse_args() - if args.module == 'deploy': - from .deploy import cli as deploy_cli # pylint: disable=import-outside-toplevel - deploy_cli.main(args) - else: - prefix = 'hailctl dev config' - assert args.module[:len(prefix)] == prefix - from .config import cli as config_cli # pylint: disable=import-outside-toplevel - config_cli.main(args) + + +app = typer.Typer( + name='dev', + no_args_is_help=True, + help='Manage Hail development utilities.', +) +app.add_typer( + config.app, +) + + +@app.command() +def deploy( + branch: Ann[str, Opt('--branch', '-b', help='Fully-qualified branch, e.g., hail-is/hail:feature')], + steps: Ann[List[str], Opt('--steps', '-s', help='Comma-separated list of steps to run.')], + excluded_steps: Ann[ + Optional[List[str]], + Opt( + '--excluded_steps', + '-e', + help='Comma-separated list of steps to forcibly exclude. Use with caution!', + ), + ] = None, + extra_config: Ann[ + Optional[List[str]], + Opt( + '--extra-config', + '-c', + help='Comma-separated list of key=value pairs to add as extra config parameters.', + ), + ] = None, + open: Ann[bool, Opt('--open', '-o', help='Open the deploy batch page in a web browser.')] = False, +): + '''Deploy a branch.''' + asyncio.run(_deploy(branch, steps, excluded_steps or [], extra_config or [], open)) + + +async def _deploy(branch: str, steps: List[str], excluded_steps: List[str], extra_config: List[str], open: bool): + from hailtop.config import get_deploy_config # pylint: disable=import-outside-toplevel + from hailtop.utils import unpack_comma_delimited_inputs, unpack_key_value_inputs # pylint: disable=import-outside-toplevel + from .ci_client import CIClient # pylint: disable=import-outside-toplevel + + deploy_config = get_deploy_config() + steps = unpack_comma_delimited_inputs(steps) + excluded_steps = unpack_comma_delimited_inputs(excluded_steps) + extra_config_dict = unpack_key_value_inputs(extra_config) + async with CIClient(deploy_config) as ci_client: + batch_id = await ci_client.dev_deploy_branch(branch, steps, excluded_steps, extra_config_dict) + url = deploy_config.url('ci', f'/batches/{batch_id}') + print(f'Created deploy batch, see {url}') + if open: + webbrowser.open(url) diff --git a/hail/python/hailtop/hailctl/dev/config.py b/hail/python/hailtop/hailctl/dev/config.py new file mode 100644 index 00000000000..d5e8964b195 --- /dev/null +++ b/hail/python/hailtop/hailctl/dev/config.py @@ -0,0 +1,43 @@ +from enum import Enum +import os +import json +import typer + +app = typer.Typer( + name='config', + no_args_is_help=True, + help='Configure deployment.', +) + + +class DevConfigProperty(str, Enum): + LOCATION = 'location' + DEFAULT_NAMESPACE = 'default_namespace' + DOMAIN = 'domain' + + +@app.command() +def set(property: DevConfigProperty, value: str): + '''Set dev config property PROPERTY to value VALUE.''' + from hailtop.config import get_deploy_config # pylint: disable=import-outside-toplevel + + deploy_config = get_deploy_config() + config = deploy_config.get_config() + + p = property + config[p] = value + + config_file = os.environ.get('HAIL_DEPLOY_CONFIG_FILE', os.path.expanduser('~/.hail/deploy-config.json')) + with open(config_file, 'w', encoding='utf-8') as f: + json.dump(config, f) + + +@app.command() +def list(): + '''List the settings in the dev config.''' + from hailtop.config import get_deploy_config # pylint: disable=import-outside-toplevel + + deploy_config = get_deploy_config() + print(f' location: {deploy_config.location()}') + print(f' default_namespace: {deploy_config._default_namespace}') + print(f' domain: {deploy_config._domain}') diff --git a/hail/python/hailtop/hailctl/dev/config/__init__.py b/hail/python/hailtop/hailctl/dev/config/__init__.py deleted file mode 100644 index cfbdef3e080..00000000000 --- a/hail/python/hailtop/hailctl/dev/config/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from . import cli - -__all__ = ['cli'] diff --git a/hail/python/hailtop/hailctl/dev/config/cli.py b/hail/python/hailtop/hailctl/dev/config/cli.py deleted file mode 100644 index 72a292ffbf6..00000000000 --- a/hail/python/hailtop/hailctl/dev/config/cli.py +++ /dev/null @@ -1,45 +0,0 @@ -from . import set_property -from . import list_properties - -import warnings - - -def init_parser(config_parser): - subparsers = config_parser.add_subparsers(title='hailctl dev config subcommand', dest='hailctl dev config subcommand', required=True) - - set_parser = subparsers.add_parser( - 'set', - help='Set deploy configuration property.', - description='Set deploy configuration property.') - - set_parser.set_defaults(module='hailctl dev config set') - set_property.init_parser(set_parser) - - show_parser = subparsers.add_parser( - 'show', - help='List all dev configuration properties. Note: This subcommand is deprecated. Use `list` instead', - description='Set deploy configuration property.') - - show_parser.set_defaults(module='hailctl dev config show') - list_properties.init_parser(show_parser) - - list_parser = subparsers.add_parser( - 'list', - help='List all dev configuration properties.', - description='List all dev configuration properties.') - - list_parser.set_defaults(module='hailctl dev config list') - list_properties.init_parser(list_parser) - - -def main(args): - if args.module == 'hailctl dev config set': - set_property.main(args) - return - - if args.module == 'hailctl dev config show': - warnings.warn('The `show` subcommand is deprecated. Use `list` instead.', stacklevel=2) - else: - assert args.module == 'hailctl dev config list' - - list_properties.main(args) diff --git a/hail/python/hailtop/hailctl/dev/config/list_properties.py b/hail/python/hailtop/hailctl/dev/config/list_properties.py deleted file mode 100644 index 41219be73a6..00000000000 --- a/hail/python/hailtop/hailctl/dev/config/list_properties.py +++ /dev/null @@ -1,12 +0,0 @@ -from hailtop.config import get_deploy_config - - -def init_parser(parser): # pylint: disable=unused-argument - pass - - -def main(args): # pylint: disable=unused-argument - deploy_config = get_deploy_config() - print(f' location: {deploy_config.location()}') - print(f' default_namespace: {deploy_config._default_namespace}') - print(f' domain: {deploy_config._domain}') diff --git a/hail/python/hailtop/hailctl/dev/config/set_property.py b/hail/python/hailtop/hailctl/dev/config/set_property.py deleted file mode 100644 index d728b68041e..00000000000 --- a/hail/python/hailtop/hailctl/dev/config/set_property.py +++ /dev/null @@ -1,25 +0,0 @@ -import os -import json - -from hailtop.config import get_deploy_config - - -def init_parser(parser): - parser.add_argument("property", type=str, - help="Property to set.", - choices=['location', 'default_namespace', 'domain']) - parser.add_argument("value", type=str, - help="Value to set property to.") - - -def main(args): - deploy_config = get_deploy_config() - config = deploy_config.get_config() - - p = args.property - config[p] = args.value - - config_file = os.environ.get( - 'HAIL_DEPLOY_CONFIG_FILE', os.path.expanduser('~/.hail/deploy-config.json')) - with open(config_file, 'w', encoding='utf-8') as f: - json.dump(config, f) diff --git a/hail/python/hailtop/hailctl/dev/deploy/__init__.py b/hail/python/hailtop/hailctl/dev/deploy/__init__.py deleted file mode 100644 index cfbdef3e080..00000000000 --- a/hail/python/hailtop/hailctl/dev/deploy/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -from . import cli - -__all__ = ['cli'] diff --git a/hail/python/hailtop/hailctl/dev/deploy/cli.py b/hail/python/hailtop/hailctl/dev/deploy/cli.py deleted file mode 100644 index 59606fd8110..00000000000 --- a/hail/python/hailtop/hailctl/dev/deploy/cli.py +++ /dev/null @@ -1,85 +0,0 @@ -import asyncio -import aiohttp -import webbrowser -import sys - -from typing import Optional - -from hailtop import httpx -from hailtop.config import get_deploy_config -from hailtop.auth import hail_credentials -from hailtop.httpx import client_session -from hailtop.utils import unpack_comma_delimited_inputs, unpack_key_value_inputs - - -def init_parser(parser): - parser.add_argument("--branch", "-b", type=str, - help="Fully-qualified branch, e.g., hail-is/hail:feature.", required=True) - parser.add_argument("--steps", "-s", nargs='+', action='append', - help="Comma or space-separated list of steps to run.", required=True) - parser.add_argument("--excluded_steps", "-e", nargs='+', action='append', default=[], - help="Comma or space-separated list of steps to forcibly exclude. Use with caution!") - parser.add_argument("--extra-config", "-c", nargs="+", action='append', default=[], - help="Comma or space-separate list of key=value pairs to add as extra config parameters.") - parser.add_argument("--open", "-o", - action="store_true", - help="Open the deploy batch page in a web browser.") - - -class CIClient: - def __init__(self, deploy_config=None): - if not deploy_config: - deploy_config = get_deploy_config() - self._deploy_config = deploy_config - self._session: Optional[httpx.ClientSession] = None - - async def __aenter__(self): - headers = await hail_credentials().auth_headers() - self._session = client_session( - raise_for_status=False, - timeout=aiohttp.ClientTimeout(total=60), headers=headers) # type: ignore - return self - - async def __aexit__(self, exc_type, exc, tb): - await self.close() - - async def close(self): - if self._session: - await self._session.close() - self._session = None - - async def dev_deploy_branch(self, branch, steps, excluded_steps, extra_config): - data = { - 'branch': branch, - 'steps': steps, - 'excluded_steps': excluded_steps, - 'extra_config': extra_config, - } - assert self._session - async with self._session.post( - self._deploy_config.url('ci', '/api/v1alpha/dev_deploy_branch'), json=data) as resp: - if resp.status >= 400: - print(f'HTTP Response code was {resp.status}') - print(await resp.text()) - sys.exit(1) - resp_data = await resp.json() - return resp_data['batch_id'] - - -async def submit(args): - deploy_config = get_deploy_config() - steps = unpack_comma_delimited_inputs(args.steps) - excluded_steps = unpack_comma_delimited_inputs(args.excluded_steps) - extra_config = unpack_key_value_inputs(args.extra_config) - async with CIClient(deploy_config) as ci_client: - batch_id = await ci_client.dev_deploy_branch(args.branch, steps, excluded_steps, extra_config) - url = deploy_config.url('ci', f'/batches/{batch_id}') - print(f'Created deploy batch, see {url}') - if args.open: - webbrowser.open(url) - - -def main(args): - loop = asyncio.get_event_loop() - loop.run_until_complete(submit(args)) - loop.run_until_complete(loop.shutdown_asyncgens()) diff --git a/hail/python/hailtop/hailctl/hdinsight/cli.py b/hail/python/hailtop/hailctl/hdinsight/cli.py index 3a0e8b2a694..6fb9644bda0 100644 --- a/hail/python/hailtop/hailctl/hdinsight/cli.py +++ b/hail/python/hailtop/hailctl/hdinsight/cli.py @@ -1,72 +1,164 @@ -import sys - -import asyncio -import argparse - -from . import start -from . import stop -from . import submit -from . import list_clusters - - -def parser(): - main_parser = argparse.ArgumentParser( - prog='hailctl dataproc', - description='Manage and monitor Hail HDInsight clusters.', - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - subparsers = main_parser.add_subparsers() - - start_parser = subparsers.add_parser( - 'start', - help='Start an HDInsight cluster configured for Hail.', - description='Start an HDInsight cluster configured for Hail.') - - start_parser.set_defaults(module='start') - start.init_parser(start_parser) - - stop_parser = subparsers.add_parser( - 'stop', - help='Stop an HDInsight cluster configured for Hail.', - description='Stop an HDInsight cluster configured for Hail.') - - stop_parser.set_defaults(module='stop') - stop.init_parser(stop_parser) - - submit_parser = subparsers.add_parser( - 'submit', - help='Submit a job to an HDInsight cluster configured for Hail.', - description='Submit a job to an HDInsight cluster configured for Hail.') - - submit_parser.set_defaults(module='submit') - submit.init_parser(submit_parser) - - list_parser = subparsers.add_parser( - 'list', - help='List HDInsight clusters configured for Hail.', - description='List HDInsight clusters configured for Hail.') - - list_parser.set_defaults(module='list') - list_clusters.init_parser(list_parser) - - return main_parser - - -def main(args): - p = parser() - if not args: - p.print_help() - sys.exit(0) - jmp = { - 'start': start, - 'stop': stop, - 'submit': submit, - 'list': list_clusters, - } - - args, pass_through_args = p.parse_known_args(args=args) - if "module" not in args: - p.error('positional argument required') - - asyncio.get_event_loop().run_until_complete( - jmp[args.module].main(args, pass_through_args)) +import subprocess + +from typing import Optional, List +from typing_extensions import Annotated as Ann + +import typer +from typer import Option as Opt, Argument as Arg + +from .start import start as hdinsight_start, VepVersion +from .submit import submit as hdinsight_submit + + +app = typer.Typer(name='hdinsight', no_args_is_help=True, help='Manage and monitor Hail HDInsight clusters.') + + +@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True}) +def start( + ctx: typer.Context, + cluster_name: str, + storage_account: Ann[str, Arg(help='Storage account in which to create a container for ephemeral cluster data.')], + resource_group: Ann[str, Arg(help='Resource group in which to place cluster.')], + http_password: Ann[ + Optional[str], Opt(help='Password for web access. If unspecified one will be generated.') + ] = None, + sshuser_password: Ann[ + Optional[str], Opt(help='Password for ssh access. If unspecified one will be generated.') + ] = None, + location: Ann[str, Opt(help='Azure location in which to place the cluster.')] = 'eastus', + num_workers: Ann[int, Opt(help='Initial number of workers.')] = 2, + install_hail_uri: Ann[ + Optional[str], + Opt( + help="A custom install hail bash script to use. Must be accessible by the cluster's head nodes. http(s) and wasb(s) protocols are both acceptable" + ), + ] = None, + install_native_deps_uri: Ann[ + Optional[str], + Opt( + help="A custom native deps bash script to use. Must be accessible by the cluster's nodes. http(s) and wasb(s) protocols are both acceptable" + ), + ] = None, + wheel_uri: Ann[ + Optional[str], + Opt( + help="A custom wheel file to use. Must be accessible by the cluster's head nodes. only http(s) protocol is acceptable" + ), + ] = None, + vep: Ann[Optional[VepVersion], Opt(help='Install VEP for the specified reference genome.')] = None, + vep_loftee_uri: Ann[ + Optional[str], + Opt( + help="(REQUIRED FOR VEP) A folder file containing the VEP loftee data files. There are tarred, requester-pays copies available at gs://hail-REGION-vep/loftee-beta/GRCh38.tar and gs://hail-REGION-vep/loftee-beta/GRCh37.tar where REGION is one of us, eu, uk, and aus-sydney. Must be accessible by the cluster's head nodes. Must be an Azure blob storage URI like https://account.blob.core.windows.net/container/foo. See the Azure-specific VEP instructions in the Hail documentation." + ), + ] = None, + vep_homo_sapiens_uri: Ann[ + Optional[str], + Opt( + help="(REQUIRED FOR VEP) A folder file containing the VEP homo sapiens data files. There are tarred, requester-pays copies available at gs://hail-REGION-vep/homo-sapiens/95_GRCh38.tar and gs://hail-REGION-vep/homo-sapiens/85_GRCh37.tar where REGION is one of us, eu, uk, and aus-sydney. Must be accessible by the cluster's head nodes. Must be an Azure blob storage URI like https://account.blob.core.windows.net/container/foo. See the Azure-specific VEP instructions in the Hail documentation." + ), + ] = None, + vep_config_uri: Ann[ + Optional[str], + Opt( + help="A VEP config to use. Must be accessible by the cluster's head nodes. Only http(s) protocol is acceptable." + ), + ] = None, + install_vep_uri: Ann[ + Optional[str], + Opt( + help="A custom VEP install script to use. Must be accessible by the cluster's nodes. http(s) and wasb(s) protocols are both acceptable" + ), + ] = None, +): + ''' + Start an HDInsight cluster configured for Hail. + ''' + from ... import pip_version # pylint: disable=import-outside-toplevel + + hail_version = pip_version() + + def default_artifact(filename: str) -> str: + return f'https://raw.githubusercontent.com/hail-is/hail/{hail_version}/hail/python/hailtop/hailctl/hdinsight/resources/{filename}' + + hdinsight_start( + cluster_name, + storage_account, + resource_group, + http_password, + sshuser_password, + location, + num_workers, + install_hail_uri or default_artifact('install-hail.sh'), + install_native_deps_uri or default_artifact('install-native-deps.sh'), + wheel_uri + or f'https://storage.googleapis.com/hail-common/azure-hdinsight-wheels/hail-{hail_version}-py3-none-any.whl', + vep, + vep_loftee_uri, + vep_homo_sapiens_uri, + vep_config_uri, + install_vep_uri or default_artifact('install-vep.sh'), + ctx.args, + ) + + +@app.command() +def stop( + name: str, + storage_account: Ann[str, Arg(help="Storage account in which the cluster's container exists.")], + resource_group: Ann[str, Arg(help='Resource group in which the cluster exists.')], + extra_hdinsight_delete_args: Optional[List[str]] = None, + extra_storage_delete_args: Optional[List[str]] = None, +): + ''' + Stop an HDInsight cluster configured for Hail. + ''' + print(f"Stopping cluster '{name}'...") + + subprocess.check_call( + [ + 'az', + 'hdinsight', + 'delete', + '--name', + name, + '--resource-group', + resource_group, + *(extra_hdinsight_delete_args or []), + ] + ) + subprocess.check_call( + [ + 'az', + 'storage', + 'container', + 'delete', + '--name', + name, + '--account-name', + storage_account, + *(extra_storage_delete_args or []), + ] + ) + + +@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True}) +def submit( + ctx: typer.Context, + name: str, + storage_account: Ann[str, Arg(help="Storage account in which the cluster's container exists.")], + http_password: Ann[str, Arg(help='Web password for the cluster')], + script: Ann[str, Arg(help='Path to script.')], +): + ''' + Submit a job to an HDInsight cluster configured for Hail. + ''' + hdinsight_submit(name, storage_account, http_password, script, ctx.args) + + +@app.command(context_settings={"allow_extra_args": True, "ignore_unknown_options": True}) +def list(ctx: typer.Context): + ''' + List HDInsight clusters configured for Hail. + ''' + subprocess.check_call(['az', 'hdinsight', 'list', *ctx.args]) diff --git a/hail/python/hailtop/hailctl/hdinsight/list_clusters.py b/hail/python/hailtop/hailctl/hdinsight/list_clusters.py deleted file mode 100644 index 0a6817b227d..00000000000 --- a/hail/python/hailtop/hailctl/hdinsight/list_clusters.py +++ /dev/null @@ -1,10 +0,0 @@ -import subprocess - - -def init_parser(parser): - del parser - - -async def main(args, pass_through_args): - del args - subprocess.check_call(['az', 'hdinsight', 'list', *pass_through_args]) diff --git a/hail/python/hailtop/hailctl/hdinsight/start.py b/hail/python/hailtop/hailctl/hdinsight/start.py index 8f79af59547..61476c62637 100644 --- a/hail/python/hailtop/hailctl/hdinsight/start.py +++ b/hail/python/hailtop/hailctl/hdinsight/start.py @@ -1,166 +1,218 @@ import re import os +from enum import Enum import sys import time import json import subprocess -import requests from shlex import quote as shq -from ... import pip_version -from ...utils import secret_alnum_string +from typing import List, Optional def exec(*args): subprocess.check_call(args) -def init_parser(parser): - parser.add_argument('cluster_name', type=str, help='Cluster name.') - parser.add_argument('storage_account', type=str, help='Storage account in which to create a container for ephemeral cluster data.') - parser.add_argument('resource_group', type=str, help='Resource group in which to place cluster.') - parser.add_argument('--http-password', type=str, help='Password for web access. If unspecified one will be generated.') - parser.add_argument('--sshuser-password', type=str, help='Password for ssh access. If unspecified one will be generated.') - parser.add_argument('--location', type=str, default='eastus', help='Azure location in which to place the cluster.') - parser.add_argument('--num-workers', type=str, default='2', help='Initial number of workers.') - parser.add_argument('--install-hail-uri', - type=str, - default=f'https://raw.githubusercontent.com/hail-is/hail/{pip_version()}/hail/python/hailtop/hailctl/hdinsight/resources/install-hail.sh', - help='A custom install hail bash script to use. Must be accessible by the cluster\'s head nodes. http(s) and wasb(s) protocols are both acceptable') - parser.add_argument('--install-native-deps-uri', - type=str, - default=f'https://raw.githubusercontent.com/hail-is/hail/{pip_version()}/hail/python/hailtop/hailctl/hdinsight/resources/install-native-deps.sh', - help='A custom install hail bash script to use. Must be accessible by the cluster\'s nodes. http(s) and wasb(s) protocols are both acceptable') - parser.add_argument('--wheel-uri', - type=str, - default=f'https://storage.googleapis.com/hail-common/azure-hdinsight-wheels/hail-{pip_version()}-py3-none-any.whl', - help='A custom wheel file to use. Must be accessible by the cluster\'s head nodes. only http(s) protocol is acceptable') - parser.add_argument('--vep', - help='Install VEP for the specified reference genome.', - required=False, - choices=['GRCh37', 'GRCh38']) - parser.add_argument('--vep-loftee-uri', - type=str, - default=None, - help='(REQUIRED FOR VEP) A folder file containing the VEP loftee data files. There are tarred, requester-pays copies available at gs://hail-REGION-vep/loftee-beta/GRCh38.tar and gs://hail-REGION-vep/loftee-beta/GRCh37.tar where REGION is one of us, eu, uk, and aus-sydney. Must be accessible by the cluster\'s head nodes. Must be an Azure blob storage URI like https://account.blob.core.windows.net/container/foo. See the Azure-specific VEP instructions in the Hail documentation.') - parser.add_argument('--vep-homo-sapiens-uri', - type=str, - default=None, - help='(REQUIRED FOR VEP) A folder file containing the VEP homo sapiens data files. There are tarred, requester-pays copies available at gs://hail-REGION-vep/homo-sapiens/95_GRCh38.tar and gs://hail-REGION-vep/homo-sapiens/85_GRCh37.tar where REGION is one of us, eu, uk, and aus-sydney. Must be accessible by the cluster\'s head nodes. Must be an Azure blob storage URI like https://account.blob.core.windows.net/container/foo. See the Azure-specific VEP instructions in the Hail documentation.') - parser.add_argument('--vep-config-uri', - type=str, - default=None, - help='A VEP config to use. Must be accessible by the cluster\'s head nodes. Only http(s) protocol is acceptable.') - parser.add_argument('--install-vep-uri', - type=str, - default=f'https://raw.githubusercontent.com/hail-is/hail/{pip_version()}/hail/python/hailtop/hailctl/hdinsight/resources/install-vep.sh', - help='A custom VEP install script to use. Must be accessible by the cluster\'s nodes. http(s) and wasb(s) protocols are both acceptable') - - -async def main(args, pass_through_args): - print(f'Starting the cluster {args.cluster_name}') - - sshuser_password = args.sshuser_password +class VepVersion(str, Enum): + GRCH37 = 'GRCh37' + GRCH38 = 'GRCh38' + + +def start( + cluster_name: str, + storage_account: str, + resource_group: str, + http_password: Optional[str], + sshuser_password: Optional[str], + location: str, + num_workers: int, + install_hail_uri: str, + install_native_deps_uri: str, + wheel_uri: str, + vep: Optional[VepVersion], + vep_loftee_uri: Optional[str], + vep_homo_sapiens_uri: Optional[str], + vep_config_uri: Optional[str], + install_vep_uri: str, + pass_through_args: List[str], +): + import requests # pylint: disable=import-outside-toplevel + import requests.auth # pylint: disable=import-outside-toplevel + from ...utils import secret_alnum_string # pylint: disable=import-outside-toplevel + from ... import pip_version # pylint: disable=import-outside-toplevel + + print(f'Starting the cluster {cluster_name}') + if sshuser_password is None: sshuser_password = secret_alnum_string(12) + '_aA0' - http_password = args.http_password if http_password is None: http_password = secret_alnum_string(12) + '_aA0' - exec('az', 'hdinsight', 'create', - '--name', args.cluster_name, - '--resource-group', args.resource_group, - '--type', 'spark', - '--component-version', 'Spark=3.0', - '--http-password', http_password, - '--http-user', 'admin', - '--location', args.location, - '--workernode-count', args.num_workers, - '--ssh-password', sshuser_password, - '--ssh-user', 'sshuser', - '--storage-account', args.storage_account, - '--storage-container', args.cluster_name, - '--version', '4.0', - *pass_through_args) - - print(f'Installing Hail on {args.cluster_name}') - wheel_pip_version_match = re.match('[^-]*-([^-]*)-.*.whl', os.path.basename(args.wheel_uri)) + exec( + 'az', + 'hdinsight', + 'create', + '--name', + cluster_name, + '--resource-group', + resource_group, + '--type', + 'spark', + '--component-version', + 'Spark=3.0', + '--http-password', + http_password, + '--http-user', + 'admin', + '--location', + location, + '--workernode-count', + num_workers, + '--ssh-password', + sshuser_password, + '--ssh-user', + 'sshuser', + '--storage-account', + storage_account, + '--storage-container', + cluster_name, + '--version', + '4.0', + *pass_through_args, + ) + + print(f'Installing Hail on {cluster_name}') + wheel_pip_version_match = re.match('[^-]*-([^-]*)-.*.whl', os.path.basename(wheel_uri)) assert wheel_pip_version_match - wheel_pip_version, = wheel_pip_version_match.groups() - exec('az', 'hdinsight', 'script-action', 'execute', '-g', args.resource_group, '-n', 'installhail', - '--cluster-name', args.cluster_name, - '--script-uri', args.install_hail_uri, - '--roles', 'headnode', 'workernode', - '--persist-on-success', - '--script-parameters', f'{args.wheel_uri} {wheel_pip_version} {args.cluster_name}') - - print(f'Installing Hail\'s native dependencies on {args.cluster_name}') - exec('az', 'hdinsight', 'script-action', 'execute', '-g', args.resource_group, '-n', 'installnativedeps', - '--cluster-name', args.cluster_name, - '--script-uri', args.install_native_deps_uri, - '--roles', 'headnode', 'workernode', - '--persist-on-success') - - if args.vep: - if args.vep == 'GRCh38': + (wheel_pip_version,) = wheel_pip_version_match.groups() + exec( + 'az', + 'hdinsight', + 'script-action', + 'execute', + '-g', + resource_group, + '-n', + 'installhail', + '--cluster-name', + cluster_name, + '--script-uri', + install_hail_uri, + '--roles', + 'headnode', + 'workernode', + '--persist-on-success', + '--script-parameters', + f'{wheel_uri} {wheel_pip_version} {cluster_name}', + ) + + print(f"Installing Hail's native dependencies on {cluster_name}") + exec( + 'az', + 'hdinsight', + 'script-action', + 'execute', + '-g', + resource_group, + '-n', + 'installnativedeps', + '--cluster-name', + cluster_name, + '--script-uri', + install_native_deps_uri, + '--roles', + 'headnode', + 'workernode', + '--persist-on-success', + ) + + if vep: + if vep == 'GRCh38': image = 'konradjk/vep95_loftee:0.2' - elif args.vep == 'GRCh37': + elif vep == 'GRCh37': image = 'konradjk/vep85_loftee:1.0.3' else: - print(f'unknown reference genome {args.vep}') + print(f'unknown reference genome {vep}') sys.exit(1) - vep_config_uri = args.vep_config_uri if vep_config_uri is None: - vep_config_uri = f'https://raw.githubusercontent.com/hail-is/hail/{pip_version()}/hail/python/hailtop/hailctl/hdinsight/resources/vep-{args.vep}.json' - - print(f'Loading VEP into ABS container {args.cluster_name}') - for uri in [args.vep_loftee_uri, args.vep_homo_sapiens_uri]: - exec('az', 'storage', 'copy', '--recursive', '--source', uri, '--destination', f'https://{args.storage_account}.blob.core.windows.net/{args.cluster_name}/') - - print(f'Installing VEP on {args.cluster_name}') - exec('az', 'hdinsight', 'script-action', 'execute', '-g', args.resource_group, '-n', 'installvep', - '--cluster-name', args.cluster_name, - '--script-uri', args.install_vep_uri, - '--roles', 'headnode', 'workernode', - '--persist-on-success', - '--script-parameters', f'/{os.path.basename(args.vep_loftee_uri)} /{os.path.basename(args.vep_homo_sapiens_uri)} {args.vep} {image} {vep_config_uri}') + vep_config_uri = f'https://raw.githubusercontent.com/hail-is/hail/{pip_version()}/hail/python/hailtop/hailctl/hdinsight/resources/vep-{vep}.json' + + if vep_loftee_uri is None or vep_homo_sapiens_uri is None: + raise ValueError("Both `vep_loftee_uri` and `vep_homo_sapiens_uri` must be specified if `vep` is specified") + + print(f'Loading VEP into ABS container {cluster_name}') + for uri in [vep_loftee_uri, vep_homo_sapiens_uri]: + exec( + 'az', + 'storage', + 'copy', + '--recursive', + '--source', + uri, + '--destination', + f'https://{storage_account}.blob.core.windows.net/{cluster_name}/', + ) + + print(f'Installing VEP on {cluster_name}') + exec( + 'az', + 'hdinsight', + 'script-action', + 'execute', + '-g', + resource_group, + '-n', + 'installvep', + '--cluster-name', + cluster_name, + '--script-uri', + install_vep_uri, + '--roles', + 'headnode', + 'workernode', + '--persist-on-success', + '--script-parameters', + f'/{os.path.basename(vep_loftee_uri)} /{os.path.basename(vep_homo_sapiens_uri)} {vep} {image} {vep_config_uri}', + ) def put_jupyter(command): # I figured this out after looking at # https://docs.microsoft.com/en-us/azure/hdinsight/hdinsight-hadoop-manage-ambari-rest-api#restart-a-service-component # and doing some trial and error + assert http_password requests.put( - f'https://{shq(args.cluster_name)}.azurehdinsight.net/api/v1/clusters/{shq(args.cluster_name)}/services/JUPYTER/', + f'https://{shq(cluster_name)}.azurehdinsight.net/api/v1/clusters/{shq(cluster_name)}/services/JUPYTER/', headers={'Content-Type': 'application/json', 'X-Requested-By': 'ambari'}, json=command, - auth=requests.auth.HTTPBasicAuth('admin', args.http_password), + auth=requests.auth.HTTPBasicAuth('admin', http_password), timeout=60, ) - stop = json.dumps({ - "RequestInfo": {"context": "put services into STOPPED state"}, - "Body": {"ServiceInfo": {"state" : "INSTALLED"}} - }) - start = json.dumps({ - "RequestInfo": {"context": "put services into STARTED state"}, - "Body": {"ServiceInfo": {"state" : "STARTED"}} - }) + stop = json.dumps( + {"RequestInfo": {"context": "put services into STOPPED state"}, "Body": {"ServiceInfo": {"state": "INSTALLED"}}} + ) + start = json.dumps( + {"RequestInfo": {"context": "put services into STARTED state"}, "Body": {"ServiceInfo": {"state": "STARTED"}}} + ) print('Restarting Jupyter ...') put_jupyter(stop) time.sleep(10) put_jupyter(start) - print(f'''Your cluster is ready. + print( + f'''Your cluster is ready. Web username: admin Web password: {http_password} -Jupyter URL: https://{args.cluster_name}.azurehdinsight.net/jupyter/tree +Jupyter URL: https://{cluster_name}.azurehdinsight.net/jupyter/tree SSH username: sshuser SSH password: {sshuser_password} -SSH domain name: {args.cluster_name}-ssh.azurehdinsight.net +SSH domain name: {cluster_name}-ssh.azurehdinsight.net Use the "Python3 (ipykernel)" kernel. -''') +''' + ) diff --git a/hail/python/hailtop/hailctl/hdinsight/stop.py b/hail/python/hailtop/hailctl/hdinsight/stop.py deleted file mode 100644 index ef10f2e90c8..00000000000 --- a/hail/python/hailtop/hailctl/hdinsight/stop.py +++ /dev/null @@ -1,30 +0,0 @@ -import subprocess -import sys - - -def init_parser(parser): - parser.add_argument('name', type=str, help='Cluster name.') - parser.add_argument('storage_account', type=str, help='Storage account in which cluster\'s container exists.') - parser.add_argument('resource_group', type=str, help='Resource group in which cluster exists.') - parser.add_argument('--extra-hdinsight-delete-args', nargs='+', help='Storage account in which cluster\'s container exists.') - parser.add_argument('--extra-storage-delete-args', nargs='+', help='Storage account in which cluster\'s container exists.') - - -async def main(args, pass_through_args): - print("Stopping cluster '{}'...".format(args.name)) - - if len(pass_through_args) > 0: - print('Received too many arguments, did you intend to use --extra-hdinsight-delete-args ' - f'or --extra-storage-delete-args? Excess arguments were {pass_through_args}') - sys.exit(1) - - subprocess.check_call( - ['az', 'hdinsight', 'delete', - '--name', args.name, - '--resource-group', args.resource_group, - *(args.extra_hdinsight_delete_args or [])]) - subprocess.check_call( - ['az', 'storage', 'container', 'delete', - '--name', args.name, - '--account-name', args.storage_account, - *(args.extra_storage_delete_args or [])]) diff --git a/hail/python/hailtop/hailctl/hdinsight/submit.py b/hail/python/hailtop/hailctl/hdinsight/submit.py index 7a5a907126f..61a28a74b24 100644 --- a/hail/python/hailtop/hailctl/hdinsight/submit.py +++ b/hail/python/hailtop/hailctl/hdinsight/submit.py @@ -1,36 +1,48 @@ import os import subprocess -import requests -from ...utils import sync_sleep_and_backoff +from typing import List -def init_parser(parser): - parser.add_argument('name', type=str, help='Cluster name.') - parser.add_argument('storage_account', type=str, help='Storage account in which cluster\'s container exists.') - parser.add_argument('http_password', type=str, help='Web password for the cluster.') - parser.add_argument('script', type=str, nargs='?', help='Path to script.') +def submit( + name: str, + storage_account: str, + http_password: str, + script: str, + pass_through_args: List[str], +): + import requests # pylint: disable=import-outside-toplevel + import requests.auth # pylint: disable=import-outside-toplevel + from ...utils import sync_sleep_and_backoff # pylint: disable=import-outside-toplevel - -async def main(args, pass_through_args): # pylint: disable=unused-argument - print("Submitting to cluster '{}'...".format(args.name)) + print("Submitting to cluster '{}'...".format(name)) subprocess.check_call( - ['az', 'storage', 'copy', - '--source', args.script, - '--destination', f'https://{args.storage_account}.blob.core.windows.net/{args.name}/{os.path.basename(args.script)}']) + [ + 'az', + 'storage', + 'copy', + '--source', + script, + '--destination', + f'https://{storage_account}.blob.core.windows.net/{name}/{os.path.basename(script)}', + ] + ) resp = requests.post( - f'https://{args.name}.azurehdinsight.net/livy/batches', + f'https://{name}.azurehdinsight.net/livy/batches', headers={'Content-Type': 'application/json', 'X-Requested-By': 'admin'}, - json={'file': f'wasbs://{args.name}@{args.storage_account}.blob.core.windows.net/{os.path.basename(args.script)}', - 'conf': { - # NB: Only the local protocol is permitted, the file protocol is banned #security - 'spark.jars': 'local:/usr/bin/anaconda/envs/py37/lib/python3.7/site-packages/hail/backend/hail-all-spark.jar', - 'spark.pyspark.driver.python': '/usr/bin/anaconda/envs/py37/bin/python3', - }, - 'args': pass_through_args}, - auth=requests.auth.HTTPBasicAuth('admin', args.http_password), - timeout=60) + json={ + 'file': f'wasbs://{name}@{storage_account}.blob.core.windows.net/{os.path.basename(script)}', + 'conf': { + # NB: Only the local protocol is permitted, the file protocol is banned #security + 'spark.jars': 'local:/usr/bin/anaconda/envs/py37/lib/python3.7/site-packages/hail/backend/hail-all-spark.jar', + 'spark.pyspark.driver.python': '/usr/bin/anaconda/envs/py37/bin/python3', + }, + 'args': pass_through_args, + }, + auth=requests.auth.HTTPBasicAuth('admin', http_password), + timeout=60, + ) batch = resp.json() resp.raise_for_status() batch_id = batch['id'] @@ -38,12 +50,15 @@ async def main(args, pass_through_args): # pylint: disable=unused-argument delay = 0.01 while True: resp = requests.get( - f'https://{args.name}.azurehdinsight.net/livy/batches/{batch_id}', - auth=requests.auth.HTTPBasicAuth('admin', args.http_password), - timeout=60) + f'https://{name}.azurehdinsight.net/livy/batches/{batch_id}', + auth=requests.auth.HTTPBasicAuth('admin', http_password), + timeout=60, + ) batch = resp.json() resp.raise_for_status() if batch.get('appId'): - print(f'Job submitted. View logs at: https://{args.name}.azurehdinsight.net/yarnui/hn/cluster/app/{batch["appId"]}') + print( + f'Job submitted. View logs at: https://{name}.azurehdinsight.net/yarnui/hn/cluster/app/{batch["appId"]}' + ) break delay = sync_sleep_and_backoff(delay) diff --git a/hail/python/hailtop/pinned-requirements.txt b/hail/python/hailtop/pinned-requirements.txt index e79667958e3..b05e101f15a 100644 --- a/hail/python/hailtop/pinned-requirements.txt +++ b/hail/python/hailtop/pinned-requirements.txt @@ -30,9 +30,9 @@ azure-mgmt-storage==20.1.0 # via -r hail/hail/python/hailtop/requirements.txt azure-storage-blob==12.16.0 # via -r hail/hail/python/hailtop/requirements.txt -boto3==1.26.151 +boto3==1.26.152 # via -r hail/hail/python/hailtop/requirements.txt -botocore==1.29.151 +botocore==1.29.152 # via # -r hail/hail/python/hailtop/requirements.txt # boto3 @@ -51,6 +51,8 @@ charset-normalizer==3.1.0 # via # aiohttp # requests +click==8.1.3 + # via typer commonmark==0.9.1 # via rich cryptography==41.0.1 @@ -84,7 +86,7 @@ google-crc32c==1.5.0 # via google-resumable-media google-resumable-media==2.5.0 # via google-cloud-storage -googleapis-common-protos==1.59.0 +googleapis-common-protos==1.59.1 # via google-api-core humanize==1.1.0 # via -r hail/hail/python/hailtop/requirements.txt @@ -147,6 +149,8 @@ python-dateutil==2.8.2 # via botocore python-json-logger==2.0.7 # via -r hail/hail/python/hailtop/requirements.txt +pyyaml==6.0 + # via -r hail/hail/python/hailtop/requirements.txt requests==2.31.0 # via # azure-core @@ -175,12 +179,15 @@ sortedcontainers==2.4.0 # via -r hail/hail/python/hailtop/requirements.txt tabulate==0.9.0 # via -r hail/hail/python/hailtop/requirements.txt +typer==0.9.0 + # via -r hail/hail/python/hailtop/requirements.txt typing-extensions==4.6.3 # via # azure-core # azure-storage-blob # janus # rich + # typer urllib3==1.26.16 # via # botocore diff --git a/hail/python/hailtop/requirements.txt b/hail/python/hailtop/requirements.txt index eeffaee201e..47ac9b6eba7 100644 --- a/hail/python/hailtop/requirements.txt +++ b/hail/python/hailtop/requirements.txt @@ -15,7 +15,9 @@ nest_asyncio>=1.5.4,<2 orjson>=3.6.4,<4 protobuf==3.20.2 rich==12.6.0 +typer>=0.9.0,<1 python-json-logger>=2.0.2,<3 +pyyaml>=6.0,<7.0 sortedcontainers>=2.4.0,<3 tabulate>=0.8.9,<1 uvloop>=0.16.0,<1; sys_platform!='win32' diff --git a/hail/python/hailtop/utils/utils.py b/hail/python/hailtop/utils/utils.py index 114db1d9866..233a82af5cf 100644 --- a/hail/python/hailtop/utils/utils.py +++ b/hail/python/hailtop/utils/utils.py @@ -46,14 +46,13 @@ U = TypeVar('U') # pylint: disable=invalid-name -def unpack_comma_delimited_inputs(inputs): +def unpack_comma_delimited_inputs(inputs: List[str]) -> List[str]: return [s.strip() - for steps in inputs - for step in steps - for s in step.split(',') if s.strip()] + for comma_separated_steps in inputs + for s in comma_separated_steps.split(',') if s.strip()] -def unpack_key_value_inputs(inputs): +def unpack_key_value_inputs(inputs: List[str]) -> Dict[str, str]: key_values = [i.split('=') for i in unpack_comma_delimited_inputs(inputs)] return {kv[0]: kv[1] for kv in key_values} diff --git a/hail/python/pinned-requirements.txt b/hail/python/pinned-requirements.txt index bd13d69f943..423f98b89a5 100644 --- a/hail/python/pinned-requirements.txt +++ b/hail/python/pinned-requirements.txt @@ -57,11 +57,11 @@ azure-storage-blob==12.16.0 # -r hail/hail/python/hailtop/requirements.txt bokeh==3.1.1 # via -r hail/hail/python/requirements.txt -boto3==1.26.151 +boto3==1.26.152 # via # -c hail/hail/python/hailtop/pinned-requirements.txt # -r hail/hail/python/hailtop/requirements.txt -botocore==1.29.151 +botocore==1.29.152 # via # -c hail/hail/python/hailtop/pinned-requirements.txt # -r hail/hail/python/hailtop/requirements.txt @@ -86,6 +86,10 @@ charset-normalizer==3.1.0 # -c hail/hail/python/hailtop/pinned-requirements.txt # aiohttp # requests +click==8.1.3 + # via + # -c hail/hail/python/hailtop/pinned-requirements.txt + # typer commonmark==0.9.1 # via # -c hail/hail/python/hailtop/pinned-requirements.txt @@ -143,7 +147,7 @@ google-resumable-media==2.5.0 # via # -c hail/hail/python/hailtop/pinned-requirements.txt # google-cloud-storage -googleapis-common-protos==1.59.0 +googleapis-common-protos==1.59.1 # via # -c hail/hail/python/hailtop/pinned-requirements.txt # google-api-core @@ -283,7 +287,10 @@ python-json-logger==2.0.7 pytz==2023.3 # via pandas pyyaml==6.0 - # via bokeh + # via + # -c hail/hail/python/hailtop/pinned-requirements.txt + # -r hail/hail/python/hailtop/requirements.txt + # bokeh regex==2023.6.3 # via parsimonious requests==2.31.0 @@ -335,6 +342,10 @@ tenacity==8.2.2 # via plotly tornado==6.3.2 # via bokeh +typer==0.9.0 + # via + # -c hail/hail/python/hailtop/pinned-requirements.txt + # -r hail/hail/python/hailtop/requirements.txt typing-extensions==4.6.3 # via # -c hail/hail/python/hailtop/pinned-requirements.txt @@ -342,6 +353,7 @@ typing-extensions==4.6.3 # azure-storage-blob # janus # rich + # typer tzdata==2023.3 # via pandas urllib3==1.26.16 diff --git a/hail/python/test/hail/backend/test_service_backend.py b/hail/python/test/hail/backend/test_service_backend.py index 12da7814688..1a0e0fa50b5 100644 --- a/hail/python/test/hail/backend/test_service_backend.py +++ b/hail/python/test/hail/backend/test_service_backend.py @@ -9,13 +9,9 @@ @skip_unless_service_backend() def test_tiny_driver_has_tiny_memory(): try: - hl.utils.range_table(100_000_000, 50).to_pandas() - except Exception as exc: - # Sometimes the JVM properly OOMs, sometimes it just dies. - assert ( - 'java.lang.OutOfMemoryError: Java heap space' in exc.args[0] or - 'batch.worker.jvm_entryway_protocol.EndOfStream' in exc.args[0] - ) + hl.eval(hl.range(1024 * 1024).map(lambda x: hl.range(1024 * 1024))) + except hl.utils.FatalError as exc: + assert "HailException: Hail off-heap memory exceeded maximum threshold: limit " in exc.args[0] else: assert False diff --git a/hail/python/test/hail/expr/test_expr.py b/hail/python/test/hail/expr/test_expr.py index 5d7f461b790..def5ba76948 100644 --- a/hail/python/test/hail/expr/test_expr.py +++ b/hail/python/test/hail/expr/test_expr.py @@ -4015,6 +4015,15 @@ def assert_unique_uids(a): assert_contains_node(a, ir.StreamScan) assert(len(set(hl.eval(a.to_array())[-1])) == 5) + # test StreamAgg + a = hl._stream_range(10) + a = a.aggregate(lambda x: hl.agg.collect(hl.rand_int64())) + assert_contains_node(a, ir.StreamAgg) + assert(len(set(hl.eval(a))) == 10) + a = hl._stream_range(10) + a = a.map(lambda x: hl._stream_range(10).aggregate(lambda y: hl.agg.count() + hl.rand_int64())) + assert_contains_node(a, ir.StreamAgg) + # test AggExplode t = hl.utils.range_table(5) t = t.annotate(a = hl.range(t.idx)) @@ -4147,3 +4156,8 @@ def test_reservoir_sampling(): mean = np.mean(sample) expected_stdev = math.sqrt(sample_variance / sample_size) assert abs(mean - sample_mean) / expected_stdev < 4 , (iteration, sample_size, abs(mean - sample_mean) / expected_stdev) + + +def test_local_agg(): + x = hl.literal([1,2,3,4]) + assert hl.eval(x.aggregate(lambda x: hl.agg.sum(x))) == 10 \ No newline at end of file diff --git a/hail/python/test/hail/expr/test_ndarrays.py b/hail/python/test/hail/expr/test_ndarrays.py index b2082adb3bc..42c740e1baa 100644 --- a/hail/python/test/hail/expr/test_ndarrays.py +++ b/hail/python/test/hail/expr/test_ndarrays.py @@ -942,6 +942,37 @@ def assert_evals_to_same_svd(nd_expr, np_array, full_matrices=True, compute_uv=T assert_evals_to_same_svd(rank_2_tall_rectangle, np_rank_2_tall_rectangle, full_matrices=False) +def test_eigh(): + def assert_evals_to_same_eig(nd_expr, np_array, eigvals_only=True): + evaled = hl.eval(hl.nd.eigh(nd_expr, eigvals_only)) + np_eig = np.linalg.eigvalsh(np_array) + + # check shapes + for h, n in zip(evaled, np_eig): + assert h.shape == n.shape + + if eigvals_only: + np.testing.assert_array_almost_equal(evaled, np_eig) + else: + he, hv = evaled + + # eigvals match + np.testing.assert_array_almost_equal(he, np_eig) + + # V is orthonormal + vvt = hv @ hv.T + np.testing.assert_array_almost_equal(vvt, np.identity(vvt.shape[0])) + + # V is eigenvectors + np.testing.assert_array_almost_equal(np_array @ hv, hv * he) + + A = np.array([[6, 3, 1, 5], [3, 0, 5, 1], [1, 5, 6, 2], [5, 1, 2, 2]]) + hA = hl.nd.array(A) + + assert_evals_to_same_eig(hA, A) + assert_evals_to_same_eig(hA, A, eigvals_only=True) + + def test_numpy_interop(): v = [2, 3] w = [3, 5] diff --git a/hail/python/test/hail/fs/test_worker_driver_fs.py b/hail/python/test/hail/fs/test_worker_driver_fs.py index 414f407b0d0..6863d72def7 100644 --- a/hail/python/test/hail/fs/test_worker_driver_fs.py +++ b/hail/python/test/hail/fs/test_worker_driver_fs.py @@ -1,9 +1,12 @@ +import asyncio +import os + import hail as hl -import pytest from hailtop.utils import secret_alnum_string from hailtop.test_utils import skip_in_azure, run_if_azure +from hailtop.aiocloud.aioazure import AzureAsyncFS -from ..helpers import fails_local_backend, hl_stop_for_test, hl_init_for_test, test_timeout +from ..helpers import fails_local_backend, hl_stop_for_test, hl_init_for_test, test_timeout, resource @skip_in_azure @@ -129,3 +132,17 @@ def test_can_access_public_blobs(): assert len(readme.read()) > 0 mt = hl.read_matrix_table(public_mt) mt.describe() + +@run_if_azure +@fails_local_backend +def test_qob_can_use_sas_tokens(): + vcf = resource('sample.vcf') + account = AzureAsyncFS.parse_url(vcf).account + + sub_id = os.environ['HAIL_AZURE_SUBSCRIPTION_ID'] + rg = os.environ['HAIL_AZURE_RESOURCE_GROUP'] + creds_file = os.environ['AZURE_APPLICATION_CREDENTIALS'] + sas_token = asyncio.run(AzureAsyncFS(credential_file=creds_file).generate_sas_token(sub_id, rg, account, "rl")) + + mt = hl.import_vcf(f'{vcf}?{sas_token}', min_partitions=4) + mt._force_count_rows() diff --git a/hail/python/test/hail/matrixtable/test_file_formats.py b/hail/python/test/hail/matrixtable/test_file_formats.py index 5d158ae8a4b..d4a098f72fe 100644 --- a/hail/python/test/hail/matrixtable/test_file_formats.py +++ b/hail/python/test/hail/matrixtable/test_file_formats.py @@ -73,7 +73,7 @@ def all_values_table_fixture(init_hail): @pytest.mark.parametrize("path", mt_paths) def test_backward_compatability_mt(path, all_values_matrix_table_fixture): - assert len(mt_paths) == 46, str((resource_dir, ht_paths)) + assert len(mt_paths) == 56, str((resource_dir, ht_paths)) old = hl.read_matrix_table(path) @@ -88,7 +88,7 @@ def test_backward_compatability_mt(path, all_values_matrix_table_fixture): @pytest.mark.parametrize("path", ht_paths) def test_backward_compatability_ht(path, all_values_table_fixture): - assert len(ht_paths) == 42, str((resource_dir, ht_paths)) + assert len(ht_paths) == 52, str((resource_dir, ht_paths)) old = hl.read_table(path) diff --git a/hail/python/test/hail/vds/test_vds.py b/hail/python/test/hail/vds/test_vds.py index 2eedef8664e..4912f42b9a6 100644 --- a/hail/python/test/hail/vds/test_vds.py +++ b/hail/python/test/hail/vds/test_vds.py @@ -705,3 +705,19 @@ def test_filter_intervals_table(): vds_filt = hl.vds.filter_intervals(vds, filter_intervals) assert vds_filt.variant_data.rows().select()._same(filter_vars) + + +# issue 13183 +def test_ref_block_does_not_densify_to_next_contig(): + vds = hl.vds.read_vds(os.path.join(resource('vds'), '1kg_2samples_starts.vds')) + vds = hl.vds.filter_chromosomes(vds, keep=['chr1', 'chr2']) + ref = vds.reference_data + var = vds.variant_data.filter_entries(False) + # max out all chr1 refblocks, and truncate all chr2 refblocks so that nothing in chr2 should be densified + ref = ref.annotate_entries(END=hl.if_else(ref.locus.contig == 'chr1', + hl.parse_locus_interval('chr1', reference_genome=ref.locus.dtype.reference_genome).end.position, + ref.locus.position)) + vds = hl.vds.VariantDataset(reference_data=ref, variant_data=var) + mt = hl.vds.to_dense_mt(vds) + mt = mt.filter_rows(mt.locus.contig == 'chr2') + assert mt.aggregate_entries(hl.agg.count()) == 0 diff --git a/hail/python/test/hailtop/hailctl/dataproc/test_cli.py b/hail/python/test/hailtop/hailctl/dataproc/test_cli.py index 617e49dae74..39497ca486e 100644 --- a/hail/python/test/hailtop/hailctl/dataproc/test_cli.py +++ b/hail/python/test/hailtop/hailctl/dataproc/test_cli.py @@ -1,45 +1,32 @@ from unittest.mock import Mock - -import pytest +from typer.testing import CliRunner from hailtop.hailctl.dataproc import cli -from hailtop.hailctl.dataproc import list_clusters -def test_required_gcloud_version_met(monkeypatch): - monkeypatch.setattr("hailtop.hailctl.dataproc.gcloud.get_version", Mock(return_value=cli.MINIMUM_REQUIRED_GCLOUD_VERSION)) +runner = CliRunner(mix_stderr=False) + - mock_list = Mock() - async def async_return_mock_list(*args, **kwargs): - return mock_list(*args, **kwargs) - monkeypatch.setattr(list_clusters, "main", async_return_mock_list) - cli.main(["list"]) +def test_required_gcloud_version_met(gcloud_run, monkeypatch): + monkeypatch.setattr("hailtop.hailctl.dataproc.gcloud.get_version", Mock(return_value=cli.MINIMUM_REQUIRED_GCLOUD_VERSION)) - assert mock_list.called + runner.invoke(cli.app, ['list']) + assert gcloud_run.call_count == 1 -def test_required_gcloud_version_unmet(monkeypatch, capsys): +def test_required_gcloud_version_unmet(gcloud_run, monkeypatch): monkeypatch.setattr("hailtop.hailctl.dataproc.gcloud.get_version", Mock(return_value=(200, 0, 0))) - mock_list = Mock() - async def async_return_mock_list(*args, **kwargs): - return mock_list(*args, **kwargs) - monkeypatch.setattr(list_clusters, "main", async_return_mock_list) - with pytest.raises(SystemExit): - cli.main(["list"]) - - assert "hailctl dataproc requires Google Cloud SDK (gcloud) version" in capsys.readouterr().err + res = runner.invoke(cli.app, ['list']) + assert res.exit_code == 1 + assert res.exception + assert "hailctl dataproc requires Google Cloud SDK (gcloud) version" in res.exception.args[0] - assert not mock_list.called + assert gcloud_run.call_count == 0 -def test_unable_to_determine_version(monkeypatch): +def test_unable_to_determine_version(gcloud_run, monkeypatch): monkeypatch.setattr("hailtop.hailctl.dataproc.gcloud.get_version", Mock(side_effect=ValueError)) - mock_list = Mock() - async def async_return_mock_list(*args, **kwargs): - return mock_list(*args, **kwargs) - monkeypatch.setattr(list_clusters, "main", async_return_mock_list) - cli.main(["list"]) - - assert mock_list.called + runner.invoke(cli.app, ['list']) + assert gcloud_run.call_count == 1 diff --git a/hail/python/test/hailtop/hailctl/dataproc/test_connect.py b/hail/python/test/hailtop/hailctl/dataproc/test_connect.py index 287f909f671..7a1dbad7eb4 100644 --- a/hail/python/test/hailtop/hailctl/dataproc/test_connect.py +++ b/hail/python/test/hailtop/hailctl/dataproc/test_connect.py @@ -1,10 +1,14 @@ from unittest.mock import Mock +from typer.testing import CliRunner import pytest from hailtop.hailctl.dataproc import cli +runner = CliRunner(mix_stderr=False) + + @pytest.fixture def subprocess(): return Mock() @@ -23,25 +27,24 @@ def patch_subprocess(monkeypatch, subprocess): def test_cluster_and_service_required(gcloud_run): - with pytest.raises(SystemExit): - cli.main(["connect"]) - + res = runner.invoke(cli.app, ['connect']) + assert res.exit_code == 2 assert gcloud_run.call_count == 0 - with pytest.raises(SystemExit): - cli.main(["connect", "notebook"]) - + res = runner.invoke(cli.app, ['connect', 'notebook']) + assert res.exit_code == 2 assert gcloud_run.call_count == 0 def test_dry_run(gcloud_run, subprocess): - cli.main(["connect", "test-cluster", "notebook", "--dry-run"]) + res = runner.invoke(cli.app, ['connect', 'test-cluster', 'notebook', '--dry-run']) + assert res.exit_code == 0 assert gcloud_run.call_count == 0 assert subprocess.Popen.call_count == 0 def test_connect(gcloud_run, subprocess): - cli.main(["connect", "test-cluster", "notebook"]) + runner.invoke(cli.app, ['connect', 'test-cluster', 'notebook']) gcloud_args = gcloud_run.call_args[0][0] assert gcloud_args[:2] == ["compute", "ssh"] @@ -70,8 +73,8 @@ def test_connect(gcloud_run, subprocess): ("notebook", "8123"), ("nb", "8123"), ]) -def test_service_port_and_path(gcloud_run, subprocess, service, expected_port_and_path): - cli.main(["connect", "test-cluster", service]) +def test_service_port_and_path(subprocess, service, expected_port_and_path): + runner.invoke(cli.app, ['connect', 'test-cluster', service]) popen_args = subprocess.Popen.call_args[0][0] assert popen_args[1] == f"http://localhost:{expected_port_and_path}" @@ -84,20 +87,20 @@ def test_hailctl_chrome(subprocess, monkeypatch): ) monkeypatch.setenv("HAILCTL_CHROME", "/path/to/chrome.exe") - cli.main(["connect", "test-cluster", "notebook"]) + runner.invoke(cli.app, ['connect', 'test-cluster', 'notebook']) popen_args = subprocess.Popen.call_args[0][0] assert popen_args[0] == "/path/to/chrome.exe" def test_port(gcloud_run): - cli.main(["connect", "test-cluster", "notebook", "--port=8000"]) + runner.invoke(cli.app, ['connect', 'test-cluster', 'notebook', '--port=8000']) assert "--ssh-flag=-D 8000" in gcloud_run.call_args[0][0] def test_connect_zone(gcloud_run, gcloud_config): gcloud_config["compute/zone"] = "us-central1-b" - cli.main(["connect", "test-cluster", "notebook", "--zone=us-east1-d"]) + runner.invoke(cli.app, ['connect', 'test-cluster', 'notebook', '--zone=us-east1-d']) assert "--zone=us-east1-d" in gcloud_run.call_args[0][0] @@ -105,7 +108,7 @@ def test_connect_zone(gcloud_run, gcloud_config): def test_connect_default_zone(gcloud_run, gcloud_config): gcloud_config["compute/zone"] = "us-west1-a" - cli.main(["connect", "test-cluster", "notebook"]) + runner.invoke(cli.app, ['connect', 'test-cluster', 'notebook']) assert "--zone=us-west1-a" in gcloud_run.call_args[0][0] @@ -113,13 +116,12 @@ def test_connect_default_zone(gcloud_run, gcloud_config): def test_connect_zone_required(gcloud_run, gcloud_config): gcloud_config["compute/zone"] = None - with pytest.raises(Exception): - cli.main(["connect", "test-cluster", "notebook"]) + res = runner.invoke(cli.app, ['connect', 'test-cluster', 'notebook']) + assert res.exit_code == 1 assert gcloud_run.call_count == 0 def test_connect_project(gcloud_run): - cli.main(["connect", "test-cluster", "notebook", "--project=test-project"]) - + runner.invoke(cli.app, ['connect', 'test-cluster', 'notebook', '--project=test-project']) assert "--project=test-project" in gcloud_run.call_args[0][0] diff --git a/hail/python/test/hailtop/hailctl/dataproc/test_list_clusters.py b/hail/python/test/hailtop/hailctl/dataproc/test_list_clusters.py index 635329c31af..0b1fc333066 100644 --- a/hail/python/test/hailtop/hailctl/dataproc/test_list_clusters.py +++ b/hail/python/test/hailtop/hailctl/dataproc/test_list_clusters.py @@ -1,18 +1,21 @@ -import pytest +from typer.testing import CliRunner from hailtop.hailctl.dataproc import cli +runner = CliRunner(mix_stderr=False) + + def test_list(gcloud_run): - cli.main(["list"]) + runner.invoke(cli.app, ['list']) assert gcloud_run.call_args[0][0] == ["dataproc", "clusters", "list"] def test_clusters_project(gcloud_run): - cli.main(["list", "--project=foo"]) + runner.invoke(cli.app, ['list', '--project=foo']) assert "--project=foo" in gcloud_run.call_args[0][0] def test_clusters_region(gcloud_run): - cli.main(["list", "--region=europe-north1"]) + runner.invoke(cli.app, ['list', '--region=europe-north1']) assert "--region=europe-north1" in gcloud_run.call_args[0][0] diff --git a/hail/python/test/hailtop/hailctl/dataproc/test_modify.py b/hail/python/test/hailtop/hailctl/dataproc/test_modify.py index 9d6c443df81..7468f5c03e9 100644 --- a/hail/python/test/hailtop/hailctl/dataproc/test_modify.py +++ b/hail/python/test/hailtop/hailctl/dataproc/test_modify.py @@ -1,48 +1,50 @@ import pytest +from typer.testing import CliRunner from hailtop.hailctl.dataproc import cli +runner = CliRunner(mix_stderr=False) + + def test_stop(gcloud_run): - cli.main(["modify", "test-cluster", "--num-workers=2"]) + runner.invoke(cli.app, ['modify', 'test-cluster', '--num-workers=2']) assert gcloud_run.call_args[0][0][:3] == ["dataproc", "clusters", "update"] def test_beta(gcloud_run): - cli.main(["--beta", "modify", "test-cluster", "--num-workers=2"]) + runner.invoke(cli.app, ['--beta', 'modify', 'test-cluster', '--num-workers=2']) assert gcloud_run.call_args[0][0][:4] == ["beta", "dataproc", "clusters", "update"] -def test_cluster_name_required(capsys, gcloud_run): - with pytest.raises(SystemExit): - cli.main(["modify"]) - - assert "arguments are required: name" in capsys.readouterr().err +def test_cluster_name_required(gcloud_run): + res = runner.invoke(cli.app, ['modify']) + assert "Missing argument 'NAME'" in res.stderr assert gcloud_run.call_count == 0 def test_cluster_project(gcloud_run): - cli.main(["modify", "--project=foo", "test-cluster", "--num-workers=2"]) + runner.invoke(cli.app, ['modify', '--project=foo', 'test-cluster', '--num-workers=2']) assert "--project=foo" in gcloud_run.call_args[0][0] def test_cluster_region(gcloud_run): - cli.main(["modify", "--region=europe-north1", "test-cluster", "--num-workers=2"]) + runner.invoke(cli.app, ['modify', '--region=europe-north1', 'test-cluster', '--num-workers=2']) assert "--region=europe-north1" in gcloud_run.call_args[0][0] def test_modify_dry_run(gcloud_run): - cli.main(["modify", "test-cluster", "--num-workers=2", "--dry-run"]) + runner.invoke(cli.app, ['modify', 'test-cluster', '--num-workers=2', '--dry-run']) assert gcloud_run.call_count == 0 @pytest.mark.parametrize("workers_arg", [ "--num-workers=2", "--n-workers=2", - "-w=2", + "-w2", ]) def test_modify_workers(gcloud_run, workers_arg): - cli.main(["modify", "test-cluster", workers_arg]) + runner.invoke(cli.app, ['modify', 'test-cluster', workers_arg]) assert "--num-workers=2" in gcloud_run.call_args[0][0] @@ -50,15 +52,15 @@ def test_modify_workers(gcloud_run, workers_arg): "--num-secondary-workers=2", "--num-preemptible-workers=2", "--n-pre-workers=2", - "-p=2", + "-p2", ]) def test_modify_secondary_workers(gcloud_run, workers_arg): - cli.main(["modify", "test-cluster", workers_arg]) + runner.invoke(cli.app, ['modify', 'test-cluster', workers_arg]) assert "--num-secondary-workers=2" in gcloud_run.call_args[0][0] def test_modify_max_idle(gcloud_run): - cli.main(["modify", "test-cluster", "--max-idle=1h"]) + runner.invoke(cli.app, ['modify', 'test-cluster', '--max-idle=1h']) assert "--max-idle=1h" in gcloud_run.call_args[0][0] @@ -67,19 +69,19 @@ def test_modify_max_idle(gcloud_run): "--num-secondary-workers=2", ]) def test_graceful_decommission_timeout(gcloud_run, workers_arg): - cli.main(["modify", "test-cluster", workers_arg, "--graceful-decommission-timeout=1h"]) + runner.invoke(cli.app, ['modify', 'test-cluster', workers_arg, '--graceful-decommission-timeout=1h']) assert workers_arg in gcloud_run.call_args[0][0] assert "--graceful-decommission-timeout=1h" in gcloud_run.call_args[0][0] def test_graceful_decommission_timeout_no_resize(gcloud_run): - with pytest.raises(SystemExit): - cli.main(["modify", "test-cluster", "--graceful-decommission-timeout=1h"]) - assert gcloud_run.call_count == 0 + res = runner.invoke(cli.app, ['modify', 'test-cluster', '--graceful-decommission-timeout=1h']) + assert res.exit_code == 1 + assert gcloud_run.call_count == 0 def test_modify_wheel_remote_wheel(gcloud_run): - cli.main(["modify", "test-cluster", "--wheel=gs://some-bucket/hail.whl"]) + runner.invoke(cli.app, ['modify', 'test-cluster', '--wheel=gs://some-bucket/hail.whl']) assert gcloud_run.call_count == 1 gcloud_args = gcloud_run.call_args[0][0] assert gcloud_args[:3] == ["compute", "ssh", "test-cluster-m"] @@ -95,7 +97,7 @@ def test_modify_wheel_remote_wheel(gcloud_run): def test_modify_wheel_local_wheel(gcloud_run): - cli.main(["modify", "test-cluster", "--wheel=./local-hail.whl"]) + runner.invoke(cli.app, ['modify', 'test-cluster', '--wheel=./local-hail.whl']) assert gcloud_run.call_count == 2 copy_gcloud_args = gcloud_run.call_args_list[0][0][0] @@ -121,7 +123,7 @@ def test_modify_wheel_local_wheel(gcloud_run): def test_modify_wheel_zone(gcloud_run, gcloud_config, wheel_arg): gcloud_config["compute/zone"] = "us-central1-b" - cli.main(["modify", "test-cluster", wheel_arg, "--zone=us-east1-d"]) + runner.invoke(cli.app, ['modify', 'test-cluster', wheel_arg, '--zone=us-east1-d']) for call_args in gcloud_run.call_args_list: assert "--zone=us-east1-d" in call_args[0][0] @@ -133,7 +135,7 @@ def test_modify_wheel_zone(gcloud_run, gcloud_config, wheel_arg): def test_modify_wheel_default_zone(gcloud_run, gcloud_config, wheel_arg): gcloud_config["compute/zone"] = "us-central1-b" - cli.main(["modify", "test-cluster", wheel_arg]) + runner.invoke(cli.app, ['modify', 'test-cluster', wheel_arg]) for call_args in gcloud_run.call_args_list: assert "--zone=us-central1-b" in call_args[0][0] @@ -145,9 +147,9 @@ def test_modify_wheel_default_zone(gcloud_run, gcloud_config, wheel_arg): def test_modify_wheel_zone_required(gcloud_run, gcloud_config, wheel_arg): gcloud_config["compute/zone"] = None - with pytest.raises(Exception): - cli.main(["modify", "test-cluster", wheel_arg]) - assert gcloud_run.call_count == 0 + res = runner.invoke(cli.app, ['modify', 'test-cluster', wheel_arg]) + assert res.exit_code == 1 + assert gcloud_run.call_count == 0 @pytest.mark.parametrize("wheel_arg", [ @@ -155,22 +157,22 @@ def test_modify_wheel_zone_required(gcloud_run, gcloud_config, wheel_arg): "--wheel=./hail.whl", ]) def test_modify_wheel_dry_run(gcloud_run, wheel_arg): - cli.main(["modify", "test-cluster", wheel_arg, "--dry-run"]) + runner.invoke(cli.app, ['modify', 'test-cluster', wheel_arg, '--dry-run']) assert gcloud_run.call_count == 0 -def test_wheel_and_update_hail_version_mutually_exclusive(gcloud_run, capsys): - with pytest.raises(SystemExit): - cli.main(["modify", "test-cluster", "--wheel=./hail.whl", "--update-hail-version"]) - +def test_wheel_and_update_hail_version_mutually_exclusive(gcloud_run): + res = runner.invoke(cli.app, ['modify', 'test-cluster', '--wheel=./hail.whl', '--update-hail-version']) + assert res.exit_code == 1 + assert res.exception + assert 'argument --update-hail-version: not allowed with argument --wheel' in res.exception.args[0] assert gcloud_run.call_count == 0 - assert "argument --update-hail-version: not allowed with argument --wheel" in capsys.readouterr().err def test_update_hail_version(gcloud_run, monkeypatch, deploy_metadata): monkeypatch.setattr("hailtop.hailctl.dataproc.modify.get_deploy_metadata", lambda: deploy_metadata) - cli.main(["modify", "test-cluster", "--update-hail-version"]) + runner.invoke(cli.app, ['modify', 'test-cluster', '--update-hail-version']) assert gcloud_run.call_count == 1 gcloud_args = gcloud_run.call_args[0][0] assert gcloud_args[:3] == ["compute", "ssh", "test-cluster-m"] diff --git a/hail/python/test/hailtop/hailctl/dataproc/test_start.py b/hail/python/test/hailtop/hailctl/dataproc/test_start.py index 402d2d34bc5..2075879ef41 100644 --- a/hail/python/test/hailtop/hailctl/dataproc/test_start.py +++ b/hail/python/test/hailtop/hailctl/dataproc/test_start.py @@ -1,23 +1,26 @@ import pytest +from typer.testing import CliRunner from hailtop.hailctl.dataproc import cli -def test_cluster_name_required(capsys, gcloud_run): - with pytest.raises(SystemExit): - cli.main(["start"]) +runner = CliRunner(mix_stderr=False) - assert "arguments are required: name" in capsys.readouterr().err + +def test_cluster_name_required(gcloud_run): + res = runner.invoke(cli.app, ['start']) + assert "Missing argument 'NAME'" in res.stderr assert gcloud_run.call_count == 0 def test_dry_run(gcloud_run): - cli.main(["start", "test-cluster", "--dry-run"]) + res = runner.invoke(cli.app, ['start', 'test-cluster', '--dry-run']) + assert res.exit_code == 0 assert gcloud_run.call_count == 0 def test_cluster_project(gcloud_run): - cli.main(["start", "--project", "foo", "test-cluster"]) + runner.invoke(cli.app, ['start', '--project', 'foo', 'test-cluster']) assert "--project=foo" in gcloud_run.call_args[0][0] @@ -26,22 +29,22 @@ def test_cluster_project(gcloud_run): "--zone=us-central1-b", ]) def test_cluster_location(gcloud_run, location_arg): - cli.main(["start", location_arg, "test-cluster"]) + runner.invoke(cli.app, ['start', location_arg, 'test-cluster']) assert location_arg in gcloud_run.call_args[0][0] def test_creator_label(gcloud_run, gcloud_config): gcloud_config["account"] = "test-user@hail.is" - cli.main(["start", "my-cluster"]) + runner.invoke(cli.app, ['start', 'my-cluster']) assert "--labels=creator=test-user_hail_is" in gcloud_run.call_args[0][0] gcloud_config["account"] = None - cli.main(["start", "my-cluster"]) + runner.invoke(cli.app, ['start', 'my-cluster']) assert not any(arg.startswith("--labels=") and "creator=" in arg for arg in gcloud_run.call_args[0][0]) def test_workers_configuration(gcloud_run): - cli.main(["start", "--num-workers=4", "test-cluster"]) + runner.invoke(cli.app, ['start', '--num-workers=4', 'test-cluster']) assert "--num-workers=4" in gcloud_run.call_args[0][0] @@ -50,7 +53,7 @@ def test_workers_configuration(gcloud_run): "--num-preemptible-workers=8" ]) def test_secondary_workers_configuration(gcloud_run, workers_arg): - cli.main(["start", workers_arg, "test-cluster"]) + runner.invoke(cli.app, ['start', workers_arg, 'test-cluster']) assert "--num-secondary-workers=8" in gcloud_run.call_args[0][0] @@ -59,7 +62,7 @@ def test_secondary_workers_configuration(gcloud_run, workers_arg): "--worker-machine-type=n1-standard-32", ]) def test_machine_type_configuration(gcloud_run, machine_arg): - cli.main(["start", machine_arg, "test-cluster"]) + runner.invoke(cli.app, ['start', machine_arg, 'test-cluster']) assert machine_arg in gcloud_run.call_args[0][0] @@ -69,17 +72,17 @@ def test_machine_type_configuration(gcloud_run, machine_arg): "--secondary-worker-boot-disk-size=100" ]) def test_boot_disk_size_configuration(gcloud_run, machine_arg): - cli.main(["start", machine_arg, "test-cluster"]) + runner.invoke(cli.app, ['start', machine_arg, 'test-cluster']) assert f"{machine_arg}GB" in gcloud_run.call_args[0][0] def test_vep_defaults_to_highmem_master_machine(gcloud_run): - cli.main(["start", "test-cluster", "--vep=GRCh37"]) + runner.invoke(cli.app, ['start', 'test-cluster', '--vep=GRCh37']) assert "--master-machine-type=n1-highmem-8" in gcloud_run.call_args[0][0] def test_vep_defaults_to_larger_worker_boot_disk(gcloud_run): - cli.main(["start", "test-cluster", "--vep=GRCh37"]) + runner.invoke(cli.app, ['start', 'test-cluster', '--vep=GRCh37']) assert "--worker-boot-disk-size=200GB" in gcloud_run.call_args[0][0] assert "--secondary-worker-boot-disk-size=200GB" in gcloud_run.call_args[0][0] @@ -92,11 +95,11 @@ def test_vep_defaults_to_larger_worker_boot_disk(gcloud_run): def test_requester_pays_project_configuration(gcloud_run, gcloud_config, requester_pays_arg): gcloud_config["project"] = "foo-project" - cli.main(["start", "test-cluster", requester_pays_arg]) + runner.invoke(cli.app, ['start', 'test-cluster', requester_pays_arg]) properties = next(arg for arg in gcloud_run.call_args[0][0] if arg.startswith("--properties=")) assert "spark:spark.hadoop.fs.gs.requester.pays.project.id=foo-project" in properties - cli.main(["start", "--project=bar-project", "test-cluster", requester_pays_arg]) + runner.invoke(cli.app, ['start', '--project=bar-project', 'test-cluster', requester_pays_arg]) properties = next(arg for arg in gcloud_run.call_args[0][0] if arg.startswith("--properties=")) assert "spark:spark.hadoop.fs.gs.requester.pays.project.id=bar-project" in properties @@ -106,14 +109,14 @@ def test_requester_pays_project_configuration(gcloud_run, gcloud_config, request ("--requester-pays-allow-buckets=example-bucket", "CUSTOM"), ("--requester-pays-allow-annotation-db", "CUSTOM"), ]) -def test_requester_pays_mode_configuration(gcloud_run, gcloud_config, requester_pays_arg, expected_mode): - cli.main(["start", "test-cluster", requester_pays_arg]) +def test_requester_pays_mode_configuration(gcloud_run, requester_pays_arg, expected_mode): + runner.invoke(cli.app, ['start', 'test-cluster', requester_pays_arg]) properties = next(arg for arg in gcloud_run.call_args[0][0] if arg.startswith("--properties=")) assert f"spark:spark.hadoop.fs.gs.requester.pays.mode={expected_mode}" in properties -def test_requester_pays_buckets_configuration(gcloud_run, gcloud_config): - cli.main(["start", "test-cluster", "--requester-pays-allow-buckets=foo,bar"]) +def test_requester_pays_buckets_configuration(gcloud_run): + runner.invoke(cli.app, ['start', 'test-cluster', '--requester-pays-allow-buckets=foo,bar']) properties = next(arg for arg in gcloud_run.call_args[0][0] if arg.startswith("--properties=")) assert f"spark:spark.hadoop.fs.gs.requester.pays.buckets=foo,bar" in properties @@ -123,29 +126,29 @@ def test_requester_pays_buckets_configuration(gcloud_run, gcloud_config): "--max-age=1h", ]) def test_scheduled_deletion_configuration(gcloud_run, scheduled_deletion_arg): - cli.main(["start", scheduled_deletion_arg, "test-cluster"]) + runner.invoke(cli.app, ['start', scheduled_deletion_arg, 'test-cluster']) assert scheduled_deletion_arg in gcloud_run.call_args[0][0] def test_master_tags(gcloud_run): - cli.main(["start", "test-cluster", "--master-tags=foo"]) + runner.invoke(cli.app, ['start', 'test-cluster', '--master-tags=foo']) assert gcloud_run.call_count == 2 assert gcloud_run.call_args_list[0][0][0][:4] == ["dataproc", "clusters", "create", "test-cluster"] assert gcloud_run.call_args_list[1][0][0] == ["compute", "instances", "add-tags", "test-cluster-m", "--tags", "foo"] def test_master_tags_project(gcloud_run): - cli.main(["start", "test-cluster", "--master-tags=foo", "--project=some-project"]) + runner.invoke(cli.app, ['start', 'test-cluster', '--master-tags=foo', '--project=some-project']) assert gcloud_run.call_count == 2 assert "--project=some-project" in gcloud_run.call_args_list[1][0][0] def test_master_tags_zone(gcloud_run): - cli.main(["start", "test-cluster", "--master-tags=foo", "--zone=us-east1-d"]) + runner.invoke(cli.app, ['start', 'test-cluster', '--master-tags=foo', '--zone=us-east1-d']) assert gcloud_run.call_count == 2 assert "--zone=us-east1-d" in gcloud_run.call_args_list[1][0][0] def test_master_tags_dry_run(gcloud_run): - cli.main(["start", "test-cluster", "--master-tags=foo", "--dry-run"]) + runner.invoke(cli.app, ['start', 'test-cluster', '--master-tags=foo', '--dry-run']) assert gcloud_run.call_count == 0 diff --git a/hail/python/test/hailtop/hailctl/dataproc/test_stop.py b/hail/python/test/hailtop/hailctl/dataproc/test_stop.py index 40ab95dca6f..cb26bc79df0 100644 --- a/hail/python/test/hailtop/hailctl/dataproc/test_stop.py +++ b/hail/python/test/hailtop/hailctl/dataproc/test_stop.py @@ -1,36 +1,39 @@ -import pytest +from typer.testing import CliRunner from hailtop.hailctl.dataproc import cli +runner = CliRunner(mix_stderr=False) + + def test_stop(gcloud_run): - cli.main(["stop", "test-cluster"]) + runner.invoke(cli.app, ['stop', 'test-cluster']) assert gcloud_run.call_args[0][0][:3] == ["dataproc", "clusters", "delete"] -def test_cluster_name_required(capsys, gcloud_run): - with pytest.raises(SystemExit): - cli.main(["stop"]) - - assert "arguments are required: name" in capsys.readouterr().err +def test_cluster_name_required(gcloud_run): + res = runner.invoke(cli.app, ['stop']) + assert res.exit_code == 2 + assert "Missing argument 'NAME'" in res.stderr assert gcloud_run.call_count == 0 def test_dry_run(gcloud_run): - cli.main(["stop", "test-cluster", "--dry-run"]) + res = runner.invoke(cli.app, ['stop', 'test-cluster', '--dry-run']) + assert res.exit_code == 0 assert gcloud_run.call_count == 0 def test_cluster_project(gcloud_run): - cli.main(["stop", "--project=foo", "test-cluster"]) + runner.invoke(cli.app, ['stop', '--project=foo', 'test-cluster']) assert "--project=foo" in gcloud_run.call_args[0][0] def test_cluster_region(gcloud_run): - cli.main(["stop", "--region=europe-north1", "test-cluster"]) + runner.invoke(cli.app, ['stop', '--region=europe-north1', 'test-cluster']) assert "--region=europe-north1" in gcloud_run.call_args[0][0] def test_async(gcloud_run): - cli.main(["stop", "test-cluster", "--async"]) + runner.invoke(cli.app, ['stop', 'test-cluster', '--async']) assert "--async" in gcloud_run.call_args[0][0] diff --git a/hail/python/test/hailtop/hailctl/dataproc/test_submit.py b/hail/python/test/hailtop/hailctl/dataproc/test_submit.py index c62e9bc3c6f..1cf45a5382d 100644 --- a/hail/python/test/hailtop/hailctl/dataproc/test_submit.py +++ b/hail/python/test/hailtop/hailctl/dataproc/test_submit.py @@ -1,52 +1,54 @@ -import pytest +from typer.testing import CliRunner from hailtop.hailctl.dataproc import cli +runner = CliRunner(mix_stderr=False) + + def test_submit(gcloud_run): - cli.main(["submit", "test-cluster", "a-script.py"]) + runner.invoke(cli.app, ['submit', 'test-cluster', 'a-script.py']) gcloud_args = gcloud_run.call_args[0][0] assert gcloud_args[:5] == ["dataproc", "jobs", "submit", "pyspark", "a-script.py"] assert "--cluster=test-cluster" in gcloud_args def test_cluster_and_script_required(gcloud_run): - with pytest.raises(SystemExit): - cli.main(["submit"]) - + res = runner.invoke(cli.app, ['submit']) + assert res.exit_code == 2 assert gcloud_run.call_count == 0 - with pytest.raises(SystemExit): - cli.main(["submit", "test-cluster"]) - + res = runner.invoke(cli.app, ['submit', 'test-cluster']) + assert res.exit_code == 2 assert gcloud_run.call_count == 0 def test_dry_run(gcloud_run): - cli.main(["submit", "test-cluster", "a-script.py", "--dry-run"]) + res = runner.invoke(cli.app, ['submit', 'test-cluster', 'a-script.py', '--dry-run']) + assert res.exit_code == 0 assert gcloud_run.call_count == 0 def test_script_args(gcloud_run): - cli.main(["submit", "test-cluster", "a-script.py", "--foo", "bar"]) + runner.invoke(cli.app, ['submit', 'test-cluster', 'a-script.py', '--foo', 'bar']) gcloud_args = gcloud_run.call_args[0][0] job_args = gcloud_args[gcloud_args.index("--") + 1:] assert job_args == ["--foo", "bar"] def test_files(gcloud_run): - cli.main(["submit", "test-cluster", "a-script.py", "--files=some-file.txt"]) + runner.invoke(cli.app, ['submit', 'test-cluster', 'a-script.py', '--files=some-file.txt']) assert "--" not in gcloud_run.call_args[0][0] # make sure arg is passed to gcloud and not job assert "--files=some-file.txt" in gcloud_run.call_args[0][0] def test_properties(gcloud_run): - cli.main(["submit", "test-cluster", "a-script.py", "--properties=spark:spark.task.maxFailures=3"]) + runner.invoke(cli.app, ['submit', 'test-cluster', 'a-script.py', '--properties=spark:spark.task.maxFailures=3']) assert "--" not in gcloud_run.call_args[0][0] # make sure arg is passed to gcloud and not job assert "--properties=spark:spark.task.maxFailures=3" in gcloud_run.call_args[0][0] def test_gcloud_configuration(gcloud_run): - cli.main(["submit", "test-cluster", "a-script.py", "--gcloud_configuration=some-config"]) + runner.invoke(cli.app, ['submit', 'test-cluster', 'a-script.py', '--gcloud_configuration=some-config']) assert "--" not in gcloud_run.call_args[0][0] # make sure arg is passed to gcloud and not job assert "--configuration=some-config" in gcloud_run.call_args[0][0] diff --git a/hail/src/main/scala/is/hail/asm4s/Code.scala b/hail/src/main/scala/is/hail/asm4s/Code.scala index bc72880bfdb..d947e031fc4 100644 --- a/hail/src/main/scala/is/hail/asm4s/Code.scala +++ b/hail/src/main/scala/is/hail/asm4s/Code.scala @@ -342,6 +342,21 @@ object Code { a9ct.runtimeClass, a10ct.runtimeClass, a11ct.runtimeClass, a12ct.runtimeClass, a13ct.runtimeClass, a14ct.runtimeClass, a15ct.runtimeClass, a16ct.runtimeClass), Array(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16)) + def invokeScalaObject19[A1, A2, A3, A4, A5, A6, A7, A8, A9, A10, A11, A12, A13, A14, A15, A16, A17, A18, A19, S]( + cls: Class[_], method: String, a1: Code[A1], a2: Code[A2], a3: Code[A3], a4: Code[A4], a5: Code[A5], a6: Code[A6], a7: Code[A7], a8: Code[A8], + a9: Code[A9], a10: Code[A10], a11: Code[A11], a12: Code[A12], a13: Code[A13], a14: Code[A14], a15: Code[A15], a16: Code[A16], + a17: Code[A17], a18: Code[A18], a19: Code[A19])( + implicit a1ct: ClassTag[A1], a2ct: ClassTag[A2], a3ct: ClassTag[A3], a4ct: ClassTag[A4], a5ct: ClassTag[A5], a6ct: ClassTag[A6], a7ct: ClassTag[A7], + a8ct: ClassTag[A8], a9ct: ClassTag[A9], a10ct: ClassTag[A10], a11ct: ClassTag[A11], a12ct: ClassTag[A12], a13ct: ClassTag[A13], a14ct: ClassTag[A14], + a15ct: ClassTag[A15], a16ct: ClassTag[A16], a17ct: ClassTag[A17], a18ct: ClassTag[A18], a19ct: ClassTag[A19], sct: ClassTag[S]): Code[S] = + invokeScalaObject[S]( + cls, method, + Array[Class[_]]( + a1ct.runtimeClass, a2ct.runtimeClass, a3ct.runtimeClass, a4ct.runtimeClass, a5ct.runtimeClass, a6ct.runtimeClass, a7ct.runtimeClass, a8ct.runtimeClass, + a9ct.runtimeClass, a10ct.runtimeClass, a11ct.runtimeClass, a12ct.runtimeClass, a13ct.runtimeClass, a14ct.runtimeClass, a15ct.runtimeClass, a16ct.runtimeClass, + a17ct.runtimeClass, a18ct.runtimeClass, a19ct.runtimeClass), + Array(a1, a2, a3, a4, a5, a6, a7, a8, a9, a10, a11, a12, a13, a14, a15, a16, a17, a18, a19)) + def invokeStatic[S](cls: Class[_], method: String, parameterTypes: Array[Class[_]], args: Array[Code[_]])(implicit sct: ClassTag[S]): Code[S] = { val m = Invokeable.lookupMethod(cls, method, parameterTypes)(sct) assert(m.isStatic) diff --git a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala index b89f479829e..485ab124034 100644 --- a/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala +++ b/hail/src/main/scala/is/hail/backend/service/ServiceBackend.scala @@ -442,8 +442,7 @@ object ServiceBackendSocketAPI2 { DeployConfig.set(deployConfig) val userTokens = Tokens.fromFile(s"$scratchDir/secrets/user-tokens/tokens.json") Tokens.set(userTokens) - tls.setSSLConfigFromDir(s"$scratchDir/secrets/ssl-config") - log.info("TLS configured.") + sys.env.get("HAIL_SSL_CONFIG_DIR").foreach(tls.setSSLConfigFromDir(_)) val sessionId = userTokens.namespaceToken(deployConfig.defaultNamespace) log.info("Namespace token acquired.") diff --git a/hail/src/main/scala/is/hail/backend/service/Worker.scala b/hail/src/main/scala/is/hail/backend/service/Worker.scala index 1aab79fd7f1..e717dbb881c 100644 --- a/hail/src/main/scala/is/hail/backend/service/Worker.scala +++ b/hail/src/main/scala/is/hail/backend/service/Worker.scala @@ -113,7 +113,7 @@ object Worker { DeployConfig.set(deployConfig) val userTokens = Tokens.fromFile(s"$scratchDir/secrets/user-tokens/tokens.json") Tokens.set(userTokens) - tls.setSSLConfigFromDir(s"$scratchDir/secrets/ssl-config") + sys.env.get("HAIL_SSL_CONFIG_DIR").foreach(tls.setSSLConfigFromDir(_)) log.info(s"is.hail.backend.service.Worker $myRevision") log.info(s"running job $i/$n at root $root with scratch directory '$scratchDir'") diff --git a/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala b/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala index ff854ce4b06..0b23745d117 100644 --- a/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala +++ b/hail/src/main/scala/is/hail/expr/ir/AbstractMatrixTableSpec.scala @@ -198,5 +198,5 @@ class MatrixTableSpec( } object FileFormat { - val version: SemanticVersion = SemanticVersion(1, 6, 0) + val version: SemanticVersion = SemanticVersion(1, 7, 0) } diff --git a/hail/src/main/scala/is/hail/expr/ir/Children.scala b/hail/src/main/scala/is/hail/expr/ir/Children.scala index d98e198fa17..146ef135f7e 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Children.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Children.scala @@ -170,6 +170,8 @@ object Children { Array(nd) case NDArraySVD(nd, _, _, _) => Array(nd) + case NDArrayEigh(nd, _, _) => + Array(nd) case NDArrayInv(nd, errorID) => Array(nd) case NDArrayWrite(nd, path) => diff --git a/hail/src/main/scala/is/hail/expr/ir/Copy.scala b/hail/src/main/scala/is/hail/expr/ir/Copy.scala index eb13aa65c8f..ba68a1f4786 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Copy.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Copy.scala @@ -137,6 +137,9 @@ object Copy { case NDArraySVD(_, fullMatrices, computeUV, errorID) => assert(newChildren.length == 1) NDArraySVD(newChildren(0).asInstanceOf[IR], fullMatrices, computeUV, errorID) + case NDArrayEigh(_, eigvalsOnly, errorID) => + assert(newChildren.length == 1) + NDArrayEigh(newChildren(0).asInstanceOf[IR], eigvalsOnly, errorID) case NDArrayInv(_, errorID) => assert(newChildren.length == 1) NDArrayInv(newChildren(0).asInstanceOf[IR], errorID) diff --git a/hail/src/main/scala/is/hail/expr/ir/Emit.scala b/hail/src/main/scala/is/hail/expr/ir/Emit.scala index d45be5c44e9..a6ef8cb04f7 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Emit.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Emit.scala @@ -740,7 +740,7 @@ class Emit[C]( val ns = sigs.length val deserializers = sc.states.states .slice(start, start + ns) - .map(sc => sc.deserialize(BufferSpec.defaultUncompressed)) + .map(sc => sc.deserialize(spec)) Array.range(start, start + ns).foreach(i => sc.newState(cb, i)) @@ -1874,6 +1874,35 @@ class Emit[C]( } + case NDArrayEigh(nd, eigvalsOnly, errorID) => + emitNDArrayColumnMajorStrides(nd).map(cb) { case mat: SNDArrayValue => + val n = mat.shapes(0) + val jobz = if (eigvalsOnly) "N" else "V" + val (workSize, iWorkSize) = SNDArray.syevr_query(cb, jobz, "U", cb.memoize(n.toI), region) + + val matType = PCanonicalNDArray(PFloat64Required, 2) + val vecType = PCanonicalNDArray(PFloat64Required, 1) + val intVecType = PCanonicalNDArray(PInt32Required, 1) + + val W = vecType.constructUninitialized(FastIndexedSeq(n), cb, region) + val work = vecType.constructUninitialized(FastIndexedSeq(SizeValueDyn(workSize)), cb, region) + val iWork = intVecType.constructUninitialized(FastIndexedSeq(iWorkSize), cb, region) + + if (eigvalsOnly) { + SNDArray.syevr(cb, "U", mat, W, None, work, iWork) + + W + } else { + val resultType = NDArrayEigh.pTypes(false, false).asInstanceOf[PCanonicalTuple] + val Z = matType.constructUninitialized(FastIndexedSeq(n, n), cb, region) + val iSuppZ = vecType.constructUninitialized(FastIndexedSeq(SizeValueDyn(cb.memoize(n * 2))), cb, region) + + SNDArray.syevr(cb, "U", mat, W, Some((Z, iSuppZ)), work, iWork) + + resultType.constructFromFields(cb, region, FastIndexedSeq(EmitCode.present(cb.emb, W), EmitCode.present(cb.emb, Z)), false) + } + } + case x@NDArrayQR(nd, mode, errorID) => // See here to understand different modes: https://docs.scipy.org/doc/numpy/reference/generated/numpy.linalg.qr.html emitNDArrayColumnMajorStrides(nd).map(cb) { case pndValue: SNDArrayValue => @@ -2374,7 +2403,7 @@ class Emit[C]( PCanonicalTuple(true, et.emitType.storageType).constructFromFields(cb, region, FastIndexedSeq(et), deepCopy = false) } - val bufferSpec: BufferSpec = BufferSpec.defaultUncompressed + val bufferSpec: BufferSpec = BufferSpec.blockedUncompressed val emitGlobals = EmitCode.fromI(mb)(cb => emitInNewBuilder(cb, globals)) diff --git a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala index 3c8a58dc5c9..bbb8c5e9d34 100644 --- a/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala +++ b/hail/src/main/scala/is/hail/expr/ir/EmitClassBuilder.scala @@ -333,7 +333,7 @@ class EmitClassBuilder[C]( private[this] def encodeLiterals(): Array[AnyRef] = { val (literals, preEncodedLiterals) = emodb.literalsResult() val litType = PCanonicalTuple(true, literals.map(_._1.canonicalPType.setRequired(true)): _*) - val spec = TypedCodecSpec(litType, BufferSpec.defaultUncompressed) + val spec = TypedCodecSpec(litType, BufferSpec.wireSpec) cb.addInterface(typeInfo[FunctionWithLiterals].iname) val mb2 = newEmitMethod("addAndDecodeLiterals", FastIndexedSeq[ParamType](typeInfo[Array[AnyRef]]), typeInfo[Unit]) diff --git a/hail/src/main/scala/is/hail/expr/ir/IR.scala b/hail/src/main/scala/is/hail/expr/ir/IR.scala index 0a33bec0672..f587dd261a5 100644 --- a/hail/src/main/scala/is/hail/expr/ir/IR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/IR.scala @@ -111,7 +111,7 @@ object EncodedLiteral { case ts: PString => Str(ts.loadString(addr)) case _ => val etype = EType.defaultFromPType(pt) - val codec = TypedCodecSpec(etype, pt.virtualType, BufferSpec.defaultUncompressed) + val codec = TypedCodecSpec(etype, pt.virtualType, BufferSpec.wireSpec) val bytes = codec.encodeArrays(ctx, pt, addr) EncodedLiteral(codec, bytes) } @@ -521,6 +521,17 @@ final case class NDArrayQR(nd: IR, mode: String, errorID: Int) extends IR final case class NDArraySVD(nd: IR, fullMatrices: Boolean, computeUV: Boolean, errorID: Int) extends IR +object NDArrayEigh { + def pTypes(eigvalsOnly: Boolean, req: Boolean): PType = { + if (eigvalsOnly) { + PCanonicalNDArray(PFloat64Required, 1, req) + } else { + PCanonicalTuple(req, PCanonicalNDArray(PFloat64Required, 1, true), PCanonicalNDArray(PFloat64Required, 2, true)) + } + } +} +final case class NDArrayEigh(nd: IR, eigvalsOnly: Boolean, errorID: Int) extends IR + final case class NDArrayInv(nd: IR, errorID: Int) extends IR final case class AggFilter(cond: IR, aggIR: IR, isScan: Boolean) extends IR diff --git a/hail/src/main/scala/is/hail/expr/ir/InferType.scala b/hail/src/main/scala/is/hail/expr/ir/InferType.scala index fcd946d23c2..d0e789d15cb 100644 --- a/hail/src/main/scala/is/hail/expr/ir/InferType.scala +++ b/hail/src/main/scala/is/hail/expr/ir/InferType.scala @@ -222,6 +222,12 @@ object InferType { } else { TNDArray(TFloat64, Nat(1)) } + case NDArrayEigh(nd, eigvalsOnly, _) => + if (eigvalsOnly) { + TNDArray(TFloat64, Nat(1)) + } else { + TTuple(TNDArray(TFloat64, Nat(1)), TNDArray(TFloat64, Nat(2))) + } case NDArrayInv(_, _) => TNDArray(TFloat64, Nat(2)) case NDArrayWrite(_, _) => TVoid diff --git a/hail/src/main/scala/is/hail/expr/ir/Interpret.scala b/hail/src/main/scala/is/hail/expr/ir/Interpret.scala index fa7521c9e95..bb90e50f064 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Interpret.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Interpret.scala @@ -897,7 +897,7 @@ object Interpret { // TODO Is this right? where does wrapped run? ctx.scopedExecution((hcl, fs, htc, r) => SafeRow(rt, f(hcl, fs, htc, r).apply(r, globalsOffset))) } else { - val spec = BufferSpec.defaultUncompressed + val spec = BufferSpec.blockedUncompressed val (_, initOp) = CompileWithAggregators[AsmFunction2RegionLongUnit](ctx, extracted.states, diff --git a/hail/src/main/scala/is/hail/expr/ir/Parser.scala b/hail/src/main/scala/is/hail/expr/ir/Parser.scala index 15b36c94e8b..9a6fb167bb8 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Parser.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Parser.scala @@ -1079,6 +1079,12 @@ object IRParser { ir_value_expr(env)(it).map { nd => NDArraySVD(nd, fullMatrices, computeUV, errorID) } + case "NDArrayEigh" => + val errorID = int32_literal(it) + val eigvalsOnly = boolean_literal(it) + ir_value_expr(env)(it).map { nd => + NDArrayEigh(nd, eigvalsOnly, errorID) + } case "NDArrayInv" => val errorID = int32_literal(it) ir_value_expr(env)(it).map{ nd => NDArrayInv(nd, errorID) } diff --git a/hail/src/main/scala/is/hail/expr/ir/Pretty.scala b/hail/src/main/scala/is/hail/expr/ir/Pretty.scala index edbe1979074..2761f9f1bde 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Pretty.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Pretty.scala @@ -267,6 +267,7 @@ class Pretty(width: Int, ribbonWidth: Int, elideLiterals: Boolean, maxLen: Int, case NDArrayMatMul(_, _, errorID) => single(s"$errorID") case NDArrayQR(_, mode, errorID) => FastSeq(errorID.toString, mode) case NDArraySVD(_, fullMatrices, computeUV, errorID) => FastSeq(errorID.toString, fullMatrices.toString, computeUV.toString) + case NDArrayEigh(_, eigvalsOnly, errorID) => FastSeq(errorID.toString, eigvalsOnly.toString) case NDArrayInv(_, errorID) => single(s"$errorID") case ArraySort(_, l, r, _) if !elideBindings => FastSeq(prettyIdentifier(l), prettyIdentifier(r)) case ArrayRef(_,_, errorID) => single(s"$errorID") diff --git a/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala b/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala index 1c8332d36b0..371f0b8412d 100644 --- a/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala +++ b/hail/src/main/scala/is/hail/expr/ir/Requiredness.scala @@ -687,6 +687,7 @@ class Requiredness(val usesAndDefs: UsesAndDefs, ctx: ExecuteContext) { requiredness.union(lookup(r).required) case NDArrayQR(child, mode, _) => requiredness.fromPType(NDArrayQR.pType(mode, lookup(child).required)) case NDArraySVD(child, _, computeUV, _) => requiredness.fromPType(NDArraySVD.pTypes(computeUV, lookup(child).required)) + case NDArrayEigh(child, eigvalsOnly, _) => requiredness.fromPType(NDArrayEigh.pTypes(eigvalsOnly, lookup(child).required)) case NDArrayInv(child, _) => requiredness.unionFrom(lookup(child)) case MakeStruct(fields) => fields.foreach { case (n, f) => diff --git a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala index 46c0ebac13e..e06f2c0ff67 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TableIR.scala @@ -7,7 +7,7 @@ import is.hail.backend.spark.{SparkBackend, SparkTaskContext} import is.hail.backend.{ExecuteContext, HailStateManager, HailTaskContext, TaskFinalizer} import is.hail.expr.ir import is.hail.expr.ir.functions.{BlockMatrixToTableFunction, IntervalFunctions, MatrixToTableFunction, TableToTableFunction} -import is.hail.expr.ir.lowering.{DArrayLowering, LowerTableIR, LowererUnsupportedOperation, TableStage, TableStageDependency} +import is.hail.expr.ir.lowering.{DArrayLowering, LowerTableIR, LowerTableIRHelpers, LowererUnsupportedOperation, TableStage, TableStageDependency} import is.hail.expr.ir.streams.StreamProducer import is.hail.io._ import is.hail.io.avro.AvroTableReader @@ -57,10 +57,11 @@ abstract sealed class TableIR extends BaseIR { final def analyzeAndExecute(ctx: ExecuteContext): TableExecuteIntermediate = { val r = Requiredness(this, ctx) - execute(ctx, new TableRunContext(r)) + val d = DistinctlyKeyed(this) + execute(ctx, LoweringAnalyses(r, d)) } - protected[ir] def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = + protected[ir] def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = fatal("tried to execute unexecutable IR:\n" + Pretty(ctx, this)) override def copy(newChildren: IndexedSeq[BaseIR]): TableIR @@ -75,8 +76,6 @@ abstract sealed class TableIR extends BaseIR { def pyUnpersist(): TableIR = unpersist() } -class TableRunContext(val req: RequirednessAnalysis) - object TableLiteral { def apply(value: TableValue, theHailClassLoader: HailClassLoader): TableLiteral = { TableLiteral(value.typ, value.rvd, value.globals.encoding, value.globals.encodeToByteArrays(theHailClassLoader)) @@ -93,7 +92,7 @@ case class TableLiteral(typ: TableType, rvd: RVD, enc: AbstractTypedCodecSpec, e TableLiteral(typ, rvd, enc, encodedGlobals) } - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val (globalPType: PStruct, dec) = enc.buildDecoder(ctx, typ.globalType) val bais = new ArrayOfByteArrayInputStream(encodedGlobals) @@ -1666,7 +1665,7 @@ case class TableRead(typ: TableType, dropRows: Boolean, tr: TableReader) extends TableRead(typ, dropRows, tr) } - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = new TableValueIntermediate(tr.apply(ctx, typ, dropRows)) } @@ -1692,7 +1691,7 @@ case class TableParallelize(rowsAndGlobal: IR, nPartitions: Option[Int] = None) FastIndexedSeq(), globalsType) - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val (ptype: PStruct, res) = CompileAndEvaluate._apply(ctx, rowsAndGlobal, optimize = false) match { case Right((t, off)) => (t.fields(0).typ, t.loadField(off, 0)) } @@ -1779,7 +1778,7 @@ case class TableKeyBy(child: TableIR, keys: IndexedSeq[String], isSorted: Boolea TableKeyBy(newChildren(0).asInstanceOf[TableIR], keys, isSorted) } - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val tv = child.execute(ctx, r).asTableValue(ctx) new TableValueIntermediate(tv.copy(typ = typ, rvd = tv.rvd.enforceKey(ctx, keys, isSorted))) } @@ -1838,7 +1837,7 @@ case class TableGen(contexts: IR, override def children: IndexedSeq[BaseIR] = FastSeq(contexts, globals, body) - override protected[ir] def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = + override protected[ir] def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = new TableStageIntermediate(LowerTableIR.applyTable(this, DArrayLowering.All, ctx, LoweringAnalyses(this, ctx))) } @@ -1864,7 +1863,7 @@ case class TableRange(n: Int, nPartitions: Int) extends TableIR { Array("idx"), TStruct.empty) - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val localRowType = PCanonicalStruct(true, "idx" -> PInt32Required) val localPartCounts = partCounts val partStarts = partCounts.scanLeft(0)(_ + _) @@ -1906,7 +1905,7 @@ case class TableFilter(child: TableIR, pred: IR) extends TableIR { TableFilter(newChildren(0).asInstanceOf[TableIR], newChildren(1).asInstanceOf[IR]) } - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val tv = child.execute(ctx, r).asTableValue(ctx) if (pred == True()) @@ -1951,7 +1950,7 @@ trait TableSubset extends TableIR { case None => Some(n) } - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val prev = child.execute(ctx, r).asTableValue(ctx) new TableValueIntermediate(prev.copy(rvd = subsetKind match { case TableSubset.HEAD => prev.rvd.head(n, child.partitionCounts) @@ -1998,7 +1997,7 @@ case class TableRepartition(child: TableIR, n: Int, strategy: Int) extends Table TableRepartition(newChild, n, strategy) } - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val prev = child.execute(ctx, r).asTableValue(ctx) val rvd = strategy match { case RepartitionStrategy.SHUFFLE => prev.rvd.coalesce(ctx, n, shuffle = true) @@ -2082,113 +2081,10 @@ case class TableJoin(left: TableIR, right: TableIR, joinType: String, joinKey: I joinKey) } - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { - val leftTV = left.execute(ctx, r).asTableValue(ctx) - val rightTV = right.execute(ctx, r).asTableValue(ctx) - - val combinedRow = Row.fromSeq(leftTV.globals.javaValue.toSeq ++ rightTV.globals.javaValue.toSeq) - val newGlobals = BroadcastRow(ctx, combinedRow, newGlobalType) - - val leftRVDType = leftTV.rvd.typ.copy(key = left.typ.key.take(joinKey)) - val rightRVDType = rightTV.rvd.typ.copy(key = right.typ.key.take(joinKey)) - - val leftRowType = leftRVDType.rowType - val rightRowType = rightRVDType.rowType - val leftKeyFieldIdx = leftRVDType.kFieldIdx - val rightKeyFieldIdx = rightRVDType.kFieldIdx - val leftValueFieldIdx = leftRVDType.valueFieldIdx - val rightValueFieldIdx = rightRVDType.valueFieldIdx - - def noIndex(pfs: IndexedSeq[PField]): IndexedSeq[(String, PType)] = - pfs.map(pf => (pf.name, pf.typ)) - - def unionFieldPTypes(ps: PStruct, ps2: PStruct): IndexedSeq[(String, PType)] = - ps.fields.zip(ps2.fields).map { case (pf1, pf2) => - (pf1.name, InferPType.getCompatiblePType(Seq(pf1.typ, pf2.typ))) - } - - def castFieldRequiredeness(ps: PStruct, required: Boolean): IndexedSeq[(String, PType)] = - ps.fields.map(pf => (pf.name, pf.typ.setRequired(required))) - - val (lkT, lvT, rvT) = joinType match { - case "inner" => - val keyTypeFields = castFieldRequiredeness(leftRVDType.kType, true) - (keyTypeFields, noIndex(leftRVDType.valueType.fields), noIndex(rightRVDType.valueType.fields)) - case "left" => - val rValueTypeFields = castFieldRequiredeness(rightRVDType.valueType, false) - (noIndex(leftRVDType.kType.fields), noIndex(leftRVDType.valueType.fields), rValueTypeFields) - case "right" => - val keyTypeFields = leftRVDType.kType.fields.zip(rightRVDType.kType.fields).map({ - case (pf1, pf2) => { - assert(pf1.typ isOfType pf2.typ) - (pf1.name, pf2.typ) - } - }) - val lValueTypeFields = castFieldRequiredeness(leftRVDType.valueType, false) - (keyTypeFields, lValueTypeFields, noIndex(rightRVDType.valueType.fields)) - case "outer" => - val keyTypeFields = unionFieldPTypes(leftRVDType.kType, rightRVDType.kType) - val lValueTypeFields = castFieldRequiredeness(leftRVDType.valueType, false) - val rValueTypeFields = castFieldRequiredeness(rightRVDType.valueType, false) - (keyTypeFields, lValueTypeFields, rValueTypeFields) - } - - val newRowPType = PCanonicalStruct(true, lkT ++ lvT ++ rvT: _*) - - assert(newRowPType.virtualType == newRowType) - - val sm = ctx.stateManager - val rvMerger = { (_: RVDContext, it: Iterator[JoinedRegionValue]) => - val rvb = new RegionValueBuilder(sm) - val rv = RegionValue() - it.map { joined => - val lrv = joined._1 - val rrv = joined._2 - - if (lrv != null) - rvb.set(lrv.region) - else { - assert(rrv != null) - rvb.set(rrv.region) - } - - rvb.start(newRowPType) - rvb.startStruct() - - if (lrv != null) - rvb.addFields(leftRowType, lrv, leftKeyFieldIdx) - else { - assert(rrv != null) - rvb.addFields(rightRowType, rrv, rightKeyFieldIdx) - } - - if (lrv != null) - rvb.addFields(leftRowType, lrv, leftValueFieldIdx) - else - rvb.skipFields(leftValueFieldIdx.length) - - if (rrv != null) - rvb.addFields(rightRowType, rrv, rightValueFieldIdx) - else - rvb.skipFields(rightValueFieldIdx.length) - - rvb.endStruct() - rv.set(rvb.region, rvb.end()) - rv - } - } - - val leftRVD = leftTV.rvd - val rightRVD = rightTV.rvd - val joinedRVD = leftRVD.orderedJoin( - rightRVD, - joinKey, - joinType, - rvMerger, - RVDType(newRowPType, newKey), - ctx) - - new TableValueIntermediate(TableValue(ctx, typ, newGlobals, joinedRVD)) + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { + val leftTV = left.execute(ctx, r).asTableStage(ctx) + val rightTV = right.execute(ctx, r).asTableStage(ctx) + TableExecuteIntermediate(LowerTableIRHelpers.lowerTableJoin(ctx, r, this, leftTV, rightTV)) } } @@ -2210,7 +2106,7 @@ case class TableIntervalJoin( override def partitionCounts: Option[IndexedSeq[Long]] = left.partitionCounts - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val leftValue = left.execute(ctx, r).asTableValue(ctx) val rightValue = right.execute(ctx, r).asTableValue(ctx) @@ -2308,7 +2204,7 @@ case class TableMultiWayZipJoin(children: IndexedSeq[TableIR], fieldName: String def copy(newChildren: IndexedSeq[BaseIR]): TableMultiWayZipJoin = TableMultiWayZipJoin(newChildren.asInstanceOf[IndexedSeq[TableIR]], fieldName, globalName) - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val sm = ctx.stateManager val childValues = children.map(_.execute(ctx, r).asTableValue(ctx)) @@ -2401,7 +2297,7 @@ case class TableLeftJoinRightDistinct(left: TableIR, right: TableIR, root: Strin TableLeftJoinRightDistinct(newLeft, newRight, root) } - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val leftValue = left.execute(ctx, r).asTableValue(ctx) val rightValue = right.execute(ctx, r).asTableValue(ctx) @@ -2444,7 +2340,7 @@ case class TableMapPartitions(child: TableIR, globalName, partitionStreamName, newChildren(1).asInstanceOf[IR], requestedKey, allowedOverlap) } - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val tv = child.execute(ctx, r).asTableValue(ctx) val rowPType = tv.rvd.rowPType val globalPType = tv.globals.t @@ -2509,7 +2405,7 @@ case class TableMapRows(child: TableIR, newRow: IR) extends TableIR { override def partitionCounts: Option[IndexedSeq[Long]] = child.partitionCounts - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val tv = child.execute(ctx, r).asTableValue(ctx) val fsBc = ctx.fsBc val scanRef = genUID() @@ -2562,7 +2458,7 @@ case class TableMapRows(child: TableIR, newRow: IR) extends TableIR { else null - val spec = BufferSpec.defaultUncompressed + val spec = BufferSpec.blockedUncompressed // Order of operations: // 1. init op on all aggs and serialize to byte array. @@ -2829,7 +2725,7 @@ case class TableMapGlobals(child: TableIR, newGlobals: IR) extends TableIR { override def partitionCounts: Option[IndexedSeq[Long]] = child.partitionCounts - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val tv = child.execute(ctx, r).asTableValue(ctx) val (Some(PTypeReferenceSingleCodeType(resultPType: PStruct)), f) = Compile[AsmFunction2RegionLongLong](ctx, @@ -2886,7 +2782,7 @@ case class TableExplode(child: TableIR, path: IndexedSeq[String]) extends TableI TableExplode(newChildren(0).asInstanceOf[TableIR], path) } - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val prev = child.execute(ctx, r).asTableValue(ctx) val (len, l) = Compile[AsmFunction2RegionLongInt](ctx, @@ -2951,7 +2847,7 @@ case class TableUnion(children: IndexedSeq[TableIR]) extends TableIR { val typ: TableType = children(0).typ - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val tvs = children.map(_.execute(ctx, r).asTableValue(ctx)) new TableValueIntermediate( tvs(0).copy( @@ -3012,7 +2908,7 @@ case class TableDistinct(child: TableIR) extends TableIR { val typ: TableType = child.typ - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val prev = child.execute(ctx, r).asTableValue(ctx) new TableValueIntermediate(prev.copy(rvd = prev.rvd.truncateKey(prev.typ.key).distinctByKey(ctx))) } @@ -3043,7 +2939,7 @@ case class TableKeyByAndAggregate( key = keyType.fieldNames ) - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val prev = child.execute(ctx, r).asTableValue(ctx) val fsBc = ctx.fsBc val sm = ctx.stateManager @@ -3059,7 +2955,7 @@ case class TableKeyByAndAggregate( val globalsBc = prev.globals.broadcast(ctx.theHailClassLoader) - val spec = BufferSpec.defaultUncompressed + val spec = BufferSpec.blockedUncompressed val res = genUID() val extracted = agg.Extract(expr, res, Requiredness(this, ctx)) @@ -3198,7 +3094,7 @@ case class TableAggregateByKey(child: TableIR, expr: IR) extends TableIR { val typ: TableType = child.typ.copy(rowType = child.typ.keyType ++ tcoerce[TStruct](expr.typ)) - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val prev = child.execute(ctx, r).asTableValue(ctx) val prevRVD = prev.rvd.truncateKey(child.typ.key) val fsBc = ctx.fsBc @@ -3328,7 +3224,7 @@ case class TableOrderBy(child: TableIR, sortFields: IndexedSeq[SortField]) exten val typ: TableType = child.typ.copy(key = FastIndexedSeq()) - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val prev = child.execute(ctx, r).asTableValue(ctx) val physicalKey = prev.rvd.typ.key @@ -3403,7 +3299,7 @@ case class TableRename(child: TableIR, rowMap: Map[String, String], globalMap: M TableRename(newChild, rowMap, globalMap) } - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = new TableValueIntermediate( child.execute(ctx, r).asTableValue(ctx).rename(globalMap, rowMap)) } @@ -3420,7 +3316,7 @@ case class TableFilterIntervals(child: TableIR, intervals: IndexedSeq[Interval], override lazy val typ: TableType = child.typ - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val tv = child.execute(ctx, r).asTableValue(ctx) val partitioner = RVDPartitioner.union( ctx.stateManager, @@ -3463,7 +3359,7 @@ case class TableToTableApply(child: TableIR, function: TableToTableFunction) ext lazy val rowCountUpperBound: Option[Long] = if (function.preservesPartitionCounts) child.rowCountUpperBound else None - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { new TableValueIntermediate(function.execute(ctx, child.execute(ctx, r).asTableValue(ctx))) } } @@ -3485,7 +3381,7 @@ case class BlockMatrixToTableApply( override lazy val typ: TableType = function.typ(bm.typ, aux.typ) - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { val b = bm.execute(ctx) val a = CompileAndEvaluate[Any](ctx, aux, optimize = false) new TableValueIntermediate(function.execute(ctx, b, a)) @@ -3507,7 +3403,7 @@ case class BlockMatrixToTable(child: BlockMatrixIR) extends TableIR { TableType(rvType, Array[String](), TStruct.empty) } - protected[ir] override def execute(ctx: ExecuteContext, r: TableRunContext): TableExecuteIntermediate = { + protected[ir] override def execute(ctx: ExecuteContext, r: LoweringAnalyses): TableExecuteIntermediate = { new TableValueIntermediate(child.execute(ctx).entriesTable(ctx)) } } diff --git a/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala b/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala index 8e66a49e10f..bae64d8ba4a 100644 --- a/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala +++ b/hail/src/main/scala/is/hail/expr/ir/TypeCheck.scala @@ -268,6 +268,10 @@ object TypeCheck { val ndType = nd.typ.asInstanceOf[TNDArray] assert(ndType.elementType == TFloat64) assert(ndType.nDims == 2) + case x@NDArrayEigh(nd, _, _) => + val ndType = nd.typ.asInstanceOf[TNDArray] + assert(ndType.elementType == TFloat64) + assert(ndType.nDims == 2) case x@NDArrayInv(nd, _) => val ndType = nd.typ.asInstanceOf[TNDArray] assert(ndType.elementType == TFloat64) diff --git a/hail/src/main/scala/is/hail/expr/ir/agg/AggregatorState.scala b/hail/src/main/scala/is/hail/expr/ir/agg/AggregatorState.scala index 5c12541defe..6873c365ec4 100644 --- a/hail/src/main/scala/is/hail/expr/ir/agg/AggregatorState.scala +++ b/hail/src/main/scala/is/hail/expr/ir/agg/AggregatorState.scala @@ -39,7 +39,7 @@ trait AggregatorState { val lazyBuffer = kb.getOrDefineLazyField[MemoryBufferWrapper](Code.newInstance[MemoryBufferWrapper](), ("AggregatorStateBufferWrapper")) cb += lazyBuffer.invoke[Array[Byte], Unit]("set", bytes.loadBytes(cb)) val ib = cb.memoize(lazyBuffer.invoke[InputBuffer]("buffer")) - deserialize(BufferSpec.defaultUncompressed)(cb, ib) + deserialize(BufferSpec.blockedUncompressed)(cb, ib) cb += lazyBuffer.invoke[Unit]("invalidate") } @@ -48,7 +48,7 @@ trait AggregatorState { val addr = kb.genFieldThisRef[Long]("addr") cb += lazyBuffer.invoke[Unit]("clear") val ob = cb.memoize(lazyBuffer.invoke[OutputBuffer]("buffer")) - serialize(BufferSpec.defaultUncompressed)(cb, ob) + serialize(BufferSpec.blockedUncompressed)(cb, ob) cb.assign(addr, t.allocate(r, lazyBuffer.invoke[Int]("length"))) t.storeLength(cb, addr, lazyBuffer.invoke[Int]("length")) cb += lazyBuffer.invoke[Long, Unit]("copyToAddress", t.bytesAddress(addr)) diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala index c3affc0a473..f4a84aade17 100644 --- a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIR.scala @@ -1537,37 +1537,7 @@ object LowerTableIR { case tj@TableJoin(left, right, joinType, joinKey) => val loweredLeft = lower(left) val loweredRight = lower(right) - - val lKeyFields = left.typ.key.take(joinKey) - val lValueFields = left.typ.rowType.fieldNames.filter(f => !lKeyFields.contains(f)) - val rKeyFields = right.typ.key.take(joinKey) - val rValueFields = right.typ.rowType.fieldNames.filter(f => !rKeyFields.contains(f)) - val lReq = analyses.requirednessAnalysis.lookup(left).asInstanceOf[RTable] - val rReq = analyses.requirednessAnalysis.lookup(right).asInstanceOf[RTable] - val rightKeyIsDistinct = analyses.distinctKeyedAnalysis.contains(right) - - val joinedStage = loweredLeft.orderedJoin(ctx, - loweredRight, joinKey, joinType, - (lGlobals, rGlobals) => { - val rGlobalType = rGlobals.typ.asInstanceOf[TStruct] - val rGlobalRef = Ref(genUID(), rGlobalType) - Let(rGlobalRef.name, rGlobals, - InsertFields(lGlobals, rGlobalType.fieldNames.map(f => f -> GetField(rGlobalRef, f)))) - }, - (lEltRef, rEltRef) => { - MakeStruct( - (lKeyFields, rKeyFields).zipped.map { (lKey, rKey) => - if (joinType == "outer" && lReq.field(lKey).required && rReq.field(rKey).required) - lKey -> Coalesce(FastSeq(GetField(lEltRef, lKey), GetField(rEltRef, rKey), Die("TableJoin expected non-missing key", left.typ.rowType.fieldType(lKey), -1))) - else - lKey -> Coalesce(FastSeq(GetField(lEltRef, lKey), GetField(rEltRef, rKey))) - } - ++ lValueFields.map(f => f -> GetField(lEltRef, f)) - ++ rValueFields.map(f => f -> GetField(rEltRef, f))) - }, rightKeyIsDistinct) - - assert(joinedStage.rowType == tj.typ.rowType) - joinedStage + LowerTableIRHelpers.lowerTableJoin(ctx, analyses, tj, loweredLeft, loweredRight) case x@TableUnion(children) => val lowered = children.map(lower) diff --git a/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIRHelpers.scala b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIRHelpers.scala new file mode 100644 index 00000000000..c2b79972fc4 --- /dev/null +++ b/hail/src/main/scala/is/hail/expr/ir/lowering/LowerTableIRHelpers.scala @@ -0,0 +1,44 @@ +package is.hail.expr.ir.lowering + +import is.hail.backend.ExecuteContext +import is.hail.expr.ir._ +import is.hail.types.RTable +import is.hail.types.virtual.TStruct +import is.hail.utils.FastSeq + +object LowerTableIRHelpers { + + def lowerTableJoin(ctx: ExecuteContext, analyses: LoweringAnalyses, tj: TableJoin, loweredLeft: TableStage, loweredRight: TableStage): TableStage = { + val TableJoin(left, right, joinType, joinKey) = tj + val lKeyFields = left.typ.key.take(joinKey) + val lValueFields = left.typ.rowType.fieldNames.filter(f => !lKeyFields.contains(f)) + val rKeyFields = right.typ.key.take(joinKey) + val rValueFields = right.typ.rowType.fieldNames.filter(f => !rKeyFields.contains(f)) + val lReq = analyses.requirednessAnalysis.lookup(left).asInstanceOf[RTable] + val rReq = analyses.requirednessAnalysis.lookup(right).asInstanceOf[RTable] + val rightKeyIsDistinct = analyses.distinctKeyedAnalysis.contains(right) + + val joinedStage = loweredLeft.orderedJoin(ctx, + loweredRight, joinKey, joinType, + (lGlobals, rGlobals) => { + val rGlobalType = rGlobals.typ.asInstanceOf[TStruct] + val rGlobalRef = Ref(genUID(), rGlobalType) + Let(rGlobalRef.name, rGlobals, + InsertFields(lGlobals, rGlobalType.fieldNames.map(f => f -> GetField(rGlobalRef, f)))) + }, + (lEltRef, rEltRef) => { + MakeStruct( + (lKeyFields, rKeyFields).zipped.map { (lKey, rKey) => + if (joinType == "outer" && lReq.field(lKey).required && rReq.field(rKey).required) + lKey -> Coalesce(FastSeq(GetField(lEltRef, lKey), GetField(rEltRef, rKey), Die("TableJoin expected non-missing key", left.typ.rowType.fieldType(lKey), -1))) + else + lKey -> Coalesce(FastSeq(GetField(lEltRef, lKey), GetField(rEltRef, rKey))) + } + ++ lValueFields.map(f => f -> GetField(lEltRef, f)) + ++ rValueFields.map(f => f -> GetField(rEltRef, f))) + }, rightKeyIsDistinct) + + assert(joinedStage.rowType == tj.typ.rowType) + joinedStage + } +} diff --git a/hail/src/main/scala/is/hail/io/BufferSpecs.scala b/hail/src/main/scala/is/hail/io/BufferSpecs.scala index b98fca3aa4a..2ca8876fc89 100644 --- a/hail/src/main/scala/is/hail/io/BufferSpecs.scala +++ b/hail/src/main/scala/is/hail/io/BufferSpecs.scala @@ -12,24 +12,30 @@ import org.json4s.jackson.JsonMethods import org.json4s.{Extraction, JValue} object BufferSpec { - val default: BufferSpec = LEB128BufferSpec( - BlockingBufferSpec(32 * 1024, - LZ4HCBlockBufferSpec(32 * 1024, + val zstdCompressionLEB: BufferSpec = LEB128BufferSpec( + BlockingBufferSpec(64 * 1024, + ZstdBlockBufferSpec(64 * 1024, new StreamBlockBufferSpec))) - val defaultUncompressed: BufferSpec = BlockingBufferSpec(32 * 1024, - new StreamBlockBufferSpec) + val default: BufferSpec = zstdCompressionLEB + val blockedUncompressed: BufferSpec = BlockingBufferSpec(32 * 1024, + new StreamBlockBufferSpec) val unblockedUncompressed: BufferSpec = new StreamBufferSpec val wireSpec: BufferSpec = LEB128BufferSpec( - BlockingBufferSpec(32 * 1024, - LZ4SizeBasedBlockBufferSpec("fast", 32 * 1024, - 256, + BlockingBufferSpec(64 * 1024, + ZstdSizedBasedBlockBufferSpec(64 * 1024, + /*minCompressionSize=*/256, new StreamBlockBufferSpec))) - val memorySpec: BufferSpec = wireSpec + // longtime default spec + val lz4HCCompressionLEB: BufferSpec = LEB128BufferSpec( + BlockingBufferSpec(32 * 1024, + LZ4HCBlockBufferSpec(32 * 1024, + new StreamBlockBufferSpec))) + val blockSpecs: Array[BufferSpec] = Array( BlockingBufferSpec(64 * 1024, new StreamBlockBufferSpec), @@ -39,6 +45,9 @@ object BufferSpec { BlockingBufferSpec(32 * 1024, LZ4FastBlockBufferSpec(32 * 1024, new StreamBlockBufferSpec)), + BlockingBufferSpec(64 * 1024, + ZstdBlockBufferSpec(64 * 1024, + new StreamBlockBufferSpec)), new StreamBufferSpec) val specs: Array[BufferSpec] = blockSpecs.flatMap { blockSpec => @@ -61,6 +70,7 @@ object BufferSpec { classOf[LZ4HCBlockBufferSpec], classOf[LZ4FastBlockBufferSpec], classOf[LZ4SizeBasedBlockBufferSpec], + classOf[ZstdBlockBufferSpec], classOf[StreamBlockBufferSpec], classOf[BufferSpec], classOf[LEB128BufferSpec], @@ -174,6 +184,34 @@ final case class LZ4SizeBasedBlockBufferSpec(compressorType: String, blockSize: Code.newInstance[LZ4SizeBasedCompressingOutputBlockBuffer, LZ4, Int, Int, OutputBlockBuffer](stagedlz4, blockSize, minCompressionSize, child.buildCodeOutputBuffer(out)) } +final case class ZstdBlockBufferSpec(blockSize: Int, child: BlockBufferSpec) extends BlockBufferSpec { + require(blockSize <= (1 << 16)) + + def buildInputBuffer(in: InputStream): InputBlockBuffer = new ZstdInputBlockBuffer(blockSize, child.buildInputBuffer(in)) + + def buildOutputBuffer(out: OutputStream): OutputBlockBuffer = new ZstdOutputBlockBuffer(blockSize, child.buildOutputBuffer(out)) + + def buildCodeInputBuffer(in: Code[InputStream]): Code[InputBlockBuffer] = + Code.newInstance[ZstdInputBlockBuffer, Int, InputBlockBuffer](blockSize, child.buildCodeInputBuffer(in)) + + def buildCodeOutputBuffer(out: Code[OutputStream]): Code[OutputBlockBuffer] = + Code.newInstance[ZstdOutputBlockBuffer, Int, OutputBlockBuffer](blockSize, child.buildCodeOutputBuffer(out)) +} + +final case class ZstdSizedBasedBlockBufferSpec(blockSize: Int, minCompressionSize: Int, child: BlockBufferSpec) extends BlockBufferSpec { + require(blockSize <= (1 << 16)) + + def buildInputBuffer(in: InputStream): InputBlockBuffer = new ZstdSizedBasedInputBlockBuffer(blockSize, child.buildInputBuffer(in)) + + def buildOutputBuffer(out: OutputStream): OutputBlockBuffer = new ZstdSizedBasedOutputBlockBuffer(blockSize, minCompressionSize, child.buildOutputBuffer(out)) + + def buildCodeInputBuffer(in: Code[InputStream]): Code[InputBlockBuffer] = + Code.newInstance[ZstdSizedBasedInputBlockBuffer, Int, InputBlockBuffer](blockSize, child.buildCodeInputBuffer(in)) + + def buildCodeOutputBuffer(out: Code[OutputStream]): Code[OutputBlockBuffer] = + Code.newInstance[ZstdSizedBasedOutputBlockBuffer, Int, Int, OutputBlockBuffer](blockSize, minCompressionSize, child.buildCodeOutputBuffer(out)) +} + object StreamBlockBufferSpec { def extract(jv: JValue): StreamBlockBufferSpec = new StreamBlockBufferSpec } diff --git a/hail/src/main/scala/is/hail/io/InputBuffers.scala b/hail/src/main/scala/is/hail/io/InputBuffers.scala index 60a899d850d..dd22a159112 100644 --- a/hail/src/main/scala/is/hail/io/InputBuffers.scala +++ b/hail/src/main/scala/is/hail/io/InputBuffers.scala @@ -8,6 +8,8 @@ import is.hail.annotations.{Memory, Region} import is.hail.io.compress.LZ4 import is.hail.utils._ +import com.github.luben.zstd.Zstd + trait InputBuffer extends Closeable { def close(): Unit @@ -627,3 +629,57 @@ final class LZ4SizeBasedCompressingInputBlockBuffer(lz4: LZ4, blockSize: Int, in result } } + +final class ZstdInputBlockBuffer(blockSize: Int, in: InputBlockBuffer) extends InputBlockBuffer { + private val comp = new Array[Byte](4 + Zstd.compressBound(blockSize).toInt) + + def close(): Unit = { + in.close() + } + + def seek(offset: Long): Unit = in.seek(offset) + + def readBlock(buf: Array[Byte]): Int = { + val blockLen = in.readBlock(comp) + if (blockLen == -1) { + blockLen + } else { + val compLen = blockLen - 4 + val decompLen = Memory.loadInt(comp, 0) + val ret = Zstd.decompressByteArray(buf, 0, decompLen, comp, 4, compLen) + if (Zstd.isError(ret)) + throw new com.github.luben.zstd.ZstdException(ret) + decompLen + } + } +} + +final class ZstdSizedBasedInputBlockBuffer(blockSize: Int, in: InputBlockBuffer) extends InputBlockBuffer { + private val comp = new Array[Byte](4 + Zstd.compressBound(blockSize).toInt) + + def close(): Unit = { + in.close() + } + + def seek(offset: Long): Unit = in.seek(offset) + + def readBlock(buf: Array[Byte]): Int = { + val blockLen = in.readBlock(comp) + if (blockLen == -1) { + blockLen + } else { + val compLen = blockLen - 4 + val decomp = Memory.loadInt(comp, 0) + if (decomp % 2 == 0) { + System.arraycopy(comp, 4, buf, 0, compLen) + compLen + } else { + val decompLen = decomp >>> 1 + val ret = Zstd.decompressByteArray(buf, 0, decompLen, comp, 4, compLen) + if (Zstd.isError(ret)) + throw new com.github.luben.zstd.ZstdException(ret) + decompLen + } + } + } +} diff --git a/hail/src/main/scala/is/hail/io/OutputBuffers.scala b/hail/src/main/scala/is/hail/io/OutputBuffers.scala index 065665c5a03..cc942ccb871 100644 --- a/hail/src/main/scala/is/hail/io/OutputBuffers.scala +++ b/hail/src/main/scala/is/hail/io/OutputBuffers.scala @@ -8,6 +8,8 @@ import is.hail.io.compress.LZ4 import is.hail.utils._ import is.hail.utils.richUtils.ByteTrackingOutputStream +import com.github.luben.zstd.Zstd + trait OutputBuffer extends Closeable { def flush(): Unit @@ -341,4 +343,48 @@ final class LZ4SizeBasedCompressingOutputBlockBuffer(lz4: LZ4, blockSize: Int, m } def getPos(): Long = out.getPos() -} \ No newline at end of file +} + +final class ZstdOutputBlockBuffer(blockSize: Int, out: OutputBlockBuffer) extends OutputBlockBuffer { + private val comp = new Array[Byte](4 + Zstd.compressBound(blockSize).toInt) + + def flush(): Unit = out.flush() + + def close(): Unit = out.close() + + def writeBlock(buf: Array[Byte], decompLen: Int): Unit = { + val compLen = Zstd.compressByteArray(comp, 4, comp.length - 4, buf, 0, decompLen, Zstd.defaultCompressionLevel()) + if (Zstd.isError(compLen)) + throw new com.github.luben.zstd.ZstdException(compLen) + Memory.storeInt(comp, 0, decompLen.toInt) + out.writeBlock(comp, compLen.toInt + 4) + } + + def getPos(): Long = out.getPos() +} + +final class ZstdSizedBasedOutputBlockBuffer(blockSize: Int, minCompressionSize: Int, out: OutputBlockBuffer) extends OutputBlockBuffer { + private val comp = new Array[Byte](4 + Zstd.compressBound(blockSize).toInt) + + def flush(): Unit = out.flush() + + def close(): Unit = out.close() + + def writeBlock(buf: Array[Byte], decompLen: Int): Unit = { + val compLen = if (decompLen < minCompressionSize) { + System.arraycopy(buf, 0, comp, 4, decompLen) + Memory.storeInt(comp, 0, 0) + decompLen + } else { + val compLen = Zstd.compressByteArray(comp, 4, comp.length - 4, buf, 0, decompLen, Zstd.defaultCompressionLevel()) + if (Zstd.isError(compLen)) + throw new com.github.luben.zstd.ZstdException(compLen) + Memory.storeInt(comp, 0, (decompLen << 1) + 1) + compLen.toInt + } + + out.writeBlock(comp, compLen + 4) + } + + def getPos(): Long = out.getPos() +} diff --git a/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala b/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala index 4365bf64d4f..514af6b9e12 100644 --- a/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala +++ b/hail/src/main/scala/is/hail/io/bgen/BgenRDDPartitions.scala @@ -84,7 +84,7 @@ object BgenRDDPartitions extends Logging { val nonEmptyFilesAfterFilter = sortedFiles.filter(_.nVariants > 0) - val (leafSpec, intSpec) = BgenSettings.indexCodecSpecs(rg) + val (leafSpec, intSpec) = BgenSettings.indexCodecSpecs(files.head.indexVersion, rg) val getKeysFromFile = StagedBGENReader.queryIndexByPosition(ctx, leafSpec, intSpec) nonEmptyFilesAfterFilter.zipWithIndex.map { case (file, fileIndex) => diff --git a/hail/src/main/scala/is/hail/io/bgen/BgenSettings.scala b/hail/src/main/scala/is/hail/io/bgen/BgenSettings.scala index b433837e172..e833a06365e 100644 --- a/hail/src/main/scala/is/hail/io/bgen/BgenSettings.scala +++ b/hail/src/main/scala/is/hail/io/bgen/BgenSettings.scala @@ -7,7 +7,7 @@ import is.hail.types.encoded._ import is.hail.types.physical._ import is.hail.types.virtual._ import is.hail.types.{MatrixType, TableType} -import is.hail.utils.FastIndexedSeq +import is.hail.utils._ import is.hail.variant.ReferenceGenome @@ -22,11 +22,16 @@ object BgenSettings { val indexAnnotationType: Type = TStruct.empty - def indexCodecSpecs(rg: Option[String]): (AbstractTypedCodecSpec, AbstractTypedCodecSpec) = { - val bufferSpec = LEB128BufferSpec( - BlockingBufferSpec(32 * 1024, - LZ4HCBlockBufferSpec(32 * 1024, - new StreamBlockBufferSpec))) + private def specFromVersion(indexVersion: SemanticVersion): BufferSpec = + if (indexVersion >= SemanticVersion(1, 2, 0)) { + BufferSpec.zstdCompressionLEB + } else { + BufferSpec.lz4HCCompressionLEB + } + + + def indexCodecSpecs(indexVersion: SemanticVersion, rg: Option[String]): (AbstractTypedCodecSpec, AbstractTypedCodecSpec) = { + val bufferSpec = specFromVersion(indexVersion) val keyVType = indexKeyType(rg) val keyEType = EBaseStruct(FastIndexedSeq( diff --git a/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala b/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala index 994a34fe1d5..dc7e4dba05c 100644 --- a/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala +++ b/hail/src/main/scala/is/hail/io/bgen/LoadBgen.scala @@ -39,6 +39,7 @@ case class BgenHeader( case class BgenFileMetadata( indexPath: String, + indexVersion: SemanticVersion, header: BgenHeader, rg: Option[String], contigRecoding: Map[String, String], @@ -195,7 +196,8 @@ object LoadBgen { case _ => None } val metadata = IndexReader.readMetadata(fs, indexFile, keyType, annotationType) - val (leafSpec, internalSpec) = BgenSettings.indexCodecSpecs(rg) + val indexVersion = SemanticVersion(metadata.fileVersion) + val (leafSpec, internalSpec) = BgenSettings.indexCodecSpecs(indexVersion, rg) val getKeys = cacheByRG.getOrElseUpdate(rg, StagedBGENReader.queryIndexByPosition(ctx, leafSpec, internalSpec)) @@ -211,6 +213,7 @@ object LoadBgen { BgenFileMetadata( indexFile, + indexVersion, h, rg, contigRecoding, @@ -340,6 +343,9 @@ object MatrixBGENReader { val indexFiles = LoadBgen.getIndexFiles(fs, allFiles, params.indexFileMap) val fileMetadata = LoadBgen.getBgenFileMetadata(ctx, allFiles, indexFiles) assert(fileMetadata.nonEmpty) + if (fileMetadata.exists(md => md.indexVersion != fileMetadata.head.indexVersion)) { + fatal("BGEN index version mismatch. The index versions of all files must be the same, use 'index_bgen' to reindex all files to ensure that all index versions match before calling 'import_bgen' again") + } val sampleIds = params.sampleFile.map(file => LoadBgen.readSampleFile(fs, file)) .getOrElse(LoadBgen.readSamples(fs, fileMetadata.head.path)) @@ -607,7 +613,7 @@ case class BgenPartitionReaderWithVariantFilter(fileMetadata: Array[BgenFileMeta val contigRecoding = mb.genFieldThisRef[Map[String, String]]("bgen_contig_recoding") val indexNKeys = mb.genFieldThisRef[Long]("index_nkeys") - val (leafCodec, intCodec) = BgenSettings.indexCodecSpecs(rg) + val (leafCodec, intCodec) = BgenSettings.indexCodecSpecs(fileMetadata.head.indexVersion, rg) val index = new StagedIndexReader(mb, leafCodec, intCodec) val currVariantIndex = mb.genFieldThisRef[Long]("currVariantIndex") @@ -745,7 +751,7 @@ case class BgenPartitionReader(fileMetadata: Array[BgenFileMetadata], rg: Option val currVariantIndex = mb.genFieldThisRef[Long]("bgen_currIdx") val endVariantIndex = mb.genFieldThisRef[Long]("bgen_endIdx") - val (leafCodec, intCodec) = BgenSettings.indexCodecSpecs(rg) + val (leafCodec, intCodec) = BgenSettings.indexCodecSpecs(fileMetadata.head.indexVersion, rg) val index = new StagedIndexReader(mb, leafCodec, intCodec) var out: EmitSettable = null // filled in later @@ -821,4 +827,4 @@ case class BgenPartitionReader(fileMetadata: Array[BgenFileMetadata], rg: Option } def toJValue: JValue = Extraction.decompose(this)(PartitionReader.formats) -} \ No newline at end of file +} diff --git a/hail/src/main/scala/is/hail/io/fs/AzureStorageFS.scala b/hail/src/main/scala/is/hail/io/fs/AzureStorageFS.scala index 28fc7a4bba5..e49ee180286 100644 --- a/hail/src/main/scala/is/hail/io/fs/AzureStorageFS.scala +++ b/hail/src/main/scala/is/hail/io/fs/AzureStorageFS.scala @@ -1,7 +1,7 @@ package is.hail.io.fs -import is.hail.shadedazure.com.azure.core.credential.TokenCredential -import is.hail.shadedazure.com.azure.identity.{ClientSecretCredential, ClientSecretCredentialBuilder, DefaultAzureCredential, DefaultAzureCredentialBuilder} +import is.hail.shadedazure.com.azure.core.credential.{AzureSasCredential, TokenCredential} +import is.hail.shadedazure.com.azure.identity.{ClientSecretCredential, ClientSecretCredentialBuilder, DefaultAzureCredential, DefaultAzureCredentialBuilder, ManagedIdentityCredentialBuilder} import is.hail.shadedazure.com.azure.storage.blob.models.{BlobProperties, BlobRange, ListBlobsOptions, BlobStorageException} import is.hail.shadedazure.com.azure.storage.blob.specialized.BlockBlobClient import is.hail.shadedazure.com.azure.storage.blob.{BlobClient, BlobContainerClient, BlobServiceClient, BlobServiceClientBuilder} @@ -10,108 +10,131 @@ import is.hail.shadedazure.reactor.netty.http.client.HttpClient import is.hail.services.retryTransientErrors import is.hail.io.fs.FSUtil.{containsWildcard, dropTrailingSlash} import org.apache.log4j.Logger +import org.apache.commons.io.IOUtils import java.net.URI -import is.hail.utils.{defaultJSONFormats, fatal} +import is.hail.utils._ import org.json4s import org.json4s.jackson.JsonMethods import org.json4s.Formats import java.io.{ByteArrayInputStream, ByteArrayOutputStream, FileNotFoundException, OutputStream} -import java.nio.file.FileSystems +import java.nio.file.Paths import java.time.Duration import scala.collection.mutable import scala.collection.mutable.ArrayBuffer - +import org.json4s.{DefaultFormats, Formats, JInt, JObject, JString, JValue} abstract class AzureStorageFSURL( val account: String, val container: String, - val path: String -) { - + val path: String, + val sasToken: Option[String] +) extends FSURL[AzureStorageFSURL] { + + def addPathComponent(c: String): AzureStorageFSURL = { + if (path == "") + withPath(c) + else + withPath(s"$path/$c") + } def withPath(newPath: String): AzureStorageFSURL + def fromString(s: String): AzureStorageFSURL = AzureStorageFS.parseUrl(s) + + def prefix: String + def getPath: String = path - def withoutPath(): String + override def toString(): String = { + val pathPart = if (path == "") "" else s"/$path" + val sasTokenPart = sasToken.getOrElse("") + + prefix + pathPart + sasTokenPart + } } class AzureStorageFSHailAzURL( account: String, container: String, - path: String -) extends AzureStorageFSURL(account, container, path) { + path: String, + sasToken: Option[String] +) extends AzureStorageFSURL(account, container, path, sasToken) { override def withPath(newPath: String): AzureStorageFSHailAzURL = { - new AzureStorageFSHailAzURL(account, container, newPath) + new AzureStorageFSHailAzURL(account, container, newPath, sasToken) } - override def withoutPath(): String = s"hail-az://$account/$container" - - override def toString(): String = s"hail-az://$account/$container/$path" + override def prefix: String = s"hail-az://$account/$container" } class AzureStorageFSHttpsURL( account: String, container: String, - path: String -) extends AzureStorageFSURL(account, container, path) { + path: String, + sasToken: Option[String] +) extends AzureStorageFSURL(account, container, path, sasToken) { override def withPath(newPath: String): AzureStorageFSHttpsURL = { - new AzureStorageFSHttpsURL(account, container, newPath) + new AzureStorageFSHttpsURL(account, container, newPath, sasToken) } - override def withoutPath(): String = s"https://$account.blob.core.windows.net/$container" - override def toString(): String = s"https://$account.blob.core.windows.net/$container/$path" + override def prefix: String = s"https://$account.blob.core.windows.net/$container" } object AzureStorageFS { - private val pathRegex = "/([^/]+)(.*)".r + private val HAIL_AZ_URI_REGEX = "^hail-az:\\/\\/([a-z0-9_\\-\\.]+)\\/([a-z0-9_\\-\\.]+)(\\/.*)?".r + private val AZURE_HTTPS_URI_REGEX = "^https:\\/\\/([a-z0-9_\\-\\.]+)\\.blob\\.core\\.windows\\.net\\/([a-z0-9_\\-\\.]+)(\\/.*)?".r private val log = Logger.getLogger(getClass.getName) val schemes: Array[String] = Array("hail-az", "https") def parseUrl(filename: String): AzureStorageFSURL = { - val uri = new URI(filename).normalize() - - val scheme = uri.getScheme - if (scheme == null || !schemes.contains(scheme)) { + val scheme = new URI(filename).getScheme + if (scheme == "hail-az") { + parseHailAzUrl(filename) + } else if (scheme == "https") { + parseHttpsUrl(filename) + } else { throw new IllegalArgumentException(s"Invalid scheme, expected hail-az or https: $scheme") } + } - val authority = uri.getAuthority - if (authority == null) { - throw new IllegalArgumentException(s"Invalid path: $filename") - } - val account = scheme match { - case "hail-az" => authority - case "https" => { - if (authority.endsWith(".blob.core.windows.net")) { - authority.stripSuffix(".blob.core.windows.net") - } else { - throw new IllegalArgumentException(s"Invalid domain in blob URL: $authority") - } - } - } + private[this] def parseHttpsUrl(filename: String): AzureStorageFSHttpsURL = { + AZURE_HTTPS_URI_REGEX + .findFirstMatchIn(filename) + .map(m => { + val (path, sasToken) = parsePathAndQuery(m.group(3)) + new AzureStorageFSHttpsURL(m.group(1), m.group(2), path, sasToken) + }) + .getOrElse(throw new IllegalArgumentException("ABS URI must be of the form https://.blob.core.windows.net//")) + } - val (container, path) = pathRegex.findFirstMatchIn(uri.getPath) match { - case Some(filenameMatch) => - val container = filenameMatch.group(1) - val path = filenameMatch.group(2) - if (path != "") { - assert(path.startsWith("/")) - (container, path.substring(1)) - } else { - (container, "") - } - case None => - fatal(s"filename $filename is not in the correct format. hail-az://account/container/blobPath") - } + private[this] def parseHailAzUrl(filename: String): AzureStorageFSHailAzURL = { + HAIL_AZ_URI_REGEX + .findFirstMatchIn(filename) + .map(m => { + val (path, sasToken) = parsePathAndQuery(m.group(3)) + new AzureStorageFSHailAzURL(m.group(1), m.group(2), path, sasToken) + }) + .getOrElse(throw new IllegalArgumentException("hail-az URI must be of the form hail-az:////")) + } + + private[this] def parsePathAndQuery(maybeNullPath: String): (String, Option[String]) = { + val pathAndMaybeQuery = Paths.get(if (maybeNullPath == null) "" else maybeNullPath.stripPrefix("/")).normalize.toString - scheme match { - case "hail-az" => new AzureStorageFSHailAzURL(account, container, path) - case "https" => new AzureStorageFSHttpsURL(account, container, path) + // Unfortunately it is difficult to tell the difference between a glob pattern and a SAS token, + // so we make the imperfect assumption that if the query string starts with at least one + // key-value pair we will interpret it as a SAS token and not a glob pattern + val indexOfLastQuestionMark = pathAndMaybeQuery.lastIndexOf("?") + if (indexOfLastQuestionMark == -1) { + (pathAndMaybeQuery, None) + } else { + val (path, queryString) = pathAndMaybeQuery.splitAt(indexOfLastQuestionMark) + queryString.split("&")(0).split("=") match { + case Array(k, v) => (path, Some(queryString)) + case _ => (pathAndMaybeQuery, None) + } } } } @@ -126,31 +149,39 @@ object AzureStorageFileStatus { } class AzureBlobServiceClientCache(credential: TokenCredential) { - private[this] lazy val clients = mutable.Map[(String, String), BlobServiceClient]() + private[this] lazy val clients = mutable.Map[(String, String, Option[String]), BlobServiceClient]() - def getServiceClient(account: String, container: String): BlobServiceClient = { - clients.get((account, container)) match { + def getServiceClient(url: AzureStorageFSURL): BlobServiceClient = { + val k = (url.account, url.container, url.sasToken) + + clients.get(k) match { case Some(client) => client case None => - val blobServiceClient = new BlobServiceClientBuilder() - .credential(credential) - .endpoint(s"https://$account.blob.core.windows.net") + val clientBuilder = url.sasToken match { + case Some(sasToken) => new BlobServiceClientBuilder().credential(new AzureSasCredential(sasToken)) + case None => new BlobServiceClientBuilder().credential(credential) + } + + val blobServiceClient = clientBuilder + .endpoint(s"https://${url.account}.blob.core.windows.net") .buildClient() - clients += ((account, container) -> blobServiceClient) + clients += (k -> blobServiceClient) blobServiceClient } } - def setPublicAccessServiceClient(account: String, container: String): Unit = { + def setPublicAccessServiceClient(url: AzureStorageFSURL): Unit = { val blobServiceClient = new BlobServiceClientBuilder() - .endpoint(s"https://$account.blob.core.windows.net") + .endpoint(s"https://${url.account}.blob.core.windows.net") .buildClient() - clients += ((account, container) -> blobServiceClient) + clients += ((url.account, url.container, url.sasToken) -> blobServiceClient) } } class AzureStorageFS(val credentialsJSON: Option[String] = None) extends FS { + type URL = AzureStorageFSURL + import AzureStorageFS.log def validUrl(filename: String): Boolean = { @@ -175,8 +206,7 @@ class AzureStorageFS(val credentialsJSON: Option[String] = None) extends FS { f } catch { case e: BlobStorageException if e.getStatusCode == 401 => - val url = AzureStorageFS.parseUrl(filename) - serviceClientCache.setPublicAccessServiceClient(url.account, url.container) + serviceClientCache.setPublicAccessServiceClient(AzureStorageFS.parseUrl(filename)) f } } @@ -205,16 +235,12 @@ class AzureStorageFS(val credentialsJSON: Option[String] = None) extends FS { // https://docs.microsoft.com/en-us/rest/api/storageservices/setting-timeouts-for-blob-service-operations private val timeout = Duration.ofSeconds(30) - def getBlobServiceClient(account: String, container: String): BlobServiceClient = retryTransientErrors { - serviceClientCache.getServiceClient(account, container) - } - def getBlobClient(url: AzureStorageFSURL): BlobClient = retryTransientErrors { - getBlobServiceClient(url.account, url.container).getBlobContainerClient(url.container).getBlobClient(url.path) + serviceClientCache.getServiceClient(url).getBlobContainerClient(url.container).getBlobClient(url.path) } def getContainerClient(url: AzureStorageFSURL): BlobContainerClient = retryTransientErrors { - getBlobServiceClient(url.account, url.container).getBlobContainerClient(url.container) + serviceClientCache.getServiceClient(url).getBlobContainerClient(url.container) } def openNoCompression(filename: String, _debug: Boolean): SeekableDataInputStream = handlePublicAccessError(filename) { @@ -361,12 +387,12 @@ class AzureStorageFS(val credentialsJSON: Option[String] = None) extends FS { def glob(filename: String): Array[FileStatus] = handlePublicAccessError(filename) { val url = AzureStorageFS.parseUrl(filename) - globWithPrefix(prefix = url.withoutPath(), path = dropTrailingSlash(url.path)) + globWithPrefix(prefix = url.withPath(""), path = dropTrailingSlash(url.path)) } - def fileStatus(url: AzureStorageFSURL): FileStatus = retryTransientErrors { + override def fileStatus(url: AzureStorageFSURL): FileStatus = retryTransientErrors { if (url.path == "") { - return new BlobStorageFileStatus(url.withoutPath.toString, null, 0, true) + return new BlobStorageFileStatus(url.toString, null, 0, true) } val blobClient: BlobClient = getBlobClient(url) diff --git a/hail/src/main/scala/is/hail/io/fs/FS.scala b/hail/src/main/scala/is/hail/io/fs/FS.scala index 5eba0300eb0..88287d7f11f 100644 --- a/hail/src/main/scala/is/hail/io/fs/FS.scala +++ b/hail/src/main/scala/is/hail/io/fs/FS.scala @@ -57,6 +57,14 @@ class WrappedPositionOutputStream(os: OutputStream) extends OutputStream with Po def getPosition: Long = count } +trait FSURL[T <: FSURL[T]] { + def getPath: String + def addPathComponent(component: String): T + def fromString(s: String): T + + override def toString(): String +} + trait FileStatus { def getPath: String def getModificationTime: java.lang.Long @@ -257,6 +265,7 @@ object FS { } trait FS extends Serializable { + type URL <: FSURL[URL] def validUrl(filename: String): Boolean @@ -336,9 +345,11 @@ trait FS extends Serializable { def listStatus(filename: String): Array[FileStatus] + def listStatus(url: URL): Array[FileStatus] = listStatus(url.toString) + def glob(filename: String): Array[FileStatus] - def globWithPrefix(prefix: String, path: String) = { + def globWithPrefix(prefix: URL, path: String) = { val components = if (path == "") Array.empty[String] @@ -348,8 +359,8 @@ trait FS extends Serializable { val javaFS = FileSystems.getDefault val ab = new mutable.ArrayBuffer[FileStatus]() - def f(prefix: String, fs: FileStatus, i: Int): Unit = { - assert(!prefix.endsWith("/"), prefix) + def f(prefix: URL, fs: FileStatus, i: Int): Unit = { + assert(!prefix.getPath.endsWith("/"), prefix) if (i == components.length) { var t = fs @@ -370,17 +381,17 @@ trait FS extends Serializable { val m = javaFS.getPathMatcher(s"glob:$c") for (cfs <- listStatus(prefix)) { val p = dropTrailingSlash(cfs.getPath) - val d = p.drop(prefix.length + 1) + val d = p.drop(prefix.toString.length + 1) if (m.matches(javaFS.getPath(d))) { - f(p, cfs, i + 1) + f(prefix.fromString(p), cfs, i + 1) } } } else - f(s"$prefix/$c", null, i + 1) + f(prefix.addPathComponent(c), null, i + 1) } } - f(s"$prefix", null, 0) + f(prefix, null, 0) ab.toArray } @@ -391,6 +402,8 @@ trait FS extends Serializable { def fileStatus(filename: String): FileStatus + def fileStatus(url: URL): FileStatus = fileStatus(url.toString) + def makeQualified(path: String): String def deleteOnExit(filename: String): Unit = { diff --git a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala index c0c2c5d76c5..48423b62afa 100644 --- a/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala +++ b/hail/src/main/scala/is/hail/io/fs/GoogleStorageFS.scala @@ -4,7 +4,7 @@ package is.hail.io.fs import java.io.{ByteArrayInputStream, FileNotFoundException, IOException} import java.net.URI import java.nio.ByteBuffer -import java.nio.file.FileSystems +import java.nio.file.Paths import java.util.concurrent._ import org.apache.log4j.Logger import com.google.auth.oauth2.ServiceAccountCredentials @@ -22,23 +22,45 @@ import scala.collection.mutable import scala.{concurrent => scalaConcurrent} import scala.reflect.ClassTag -object GoogleStorageFS { - private val log = Logger.getLogger(getClass.getName()) - def getBucketPath(filename: String): (String, String) = { - val uri = new URI(filename).normalize() +case class GoogleStorageFSURL(val bucket: String, val path: String) extends FSURL[GoogleStorageFSURL] { + def addPathComponent(c: String): GoogleStorageFSURL = { + if (path == "") + withPath(c) + else + withPath(s"$path/$c") + } + def withPath(newPath: String): GoogleStorageFSURL = GoogleStorageFSURL(bucket, newPath) + def fromString(s: String): GoogleStorageFSURL = GoogleStorageFS.parseUrl(s) - val scheme = uri.getScheme - assert(scheme != null && scheme == "gs", (uri.getScheme, filename)) + def getPath: String = path - val bucket = uri.getAuthority - assert(bucket != null, (filename, uri.toString(), uri.getScheme, uri.getAuthority, uri.getRawAuthority(), uri.getUserInfo())) + override def toString(): String = if (path.isEmpty) { + s"gs://$bucket" + } else { + s"gs://$bucket/$path" + } +} + + +object GoogleStorageFS { + private val log = Logger.getLogger(getClass.getName()) + private[this] val GCS_URI_REGEX = "^gs:\\/\\/([a-z0-9_\\-\\.]+)(\\/.*)?".r - var path = uri.getPath - if (path.nonEmpty && path.head == '/') - path = path.drop(1) + def parseUrl(filename: String): GoogleStorageFSURL = { + val scheme = new URI(filename).getScheme + if (scheme == null || scheme != "gs") { + throw new IllegalArgumentException(s"Invalid scheme, expected gs: $scheme") + } - (bucket, path) + GCS_URI_REGEX.findFirstMatchIn(filename) match { + case Some(m) => + val bucket = m.group(1) + val maybePath = m.group(2) + val path = Paths.get(if (maybePath == null) "" else maybePath.stripPrefix("/")) + GoogleStorageFSURL(bucket, path.normalize().toString) + case None => throw new IllegalArgumentException(s"GCS URI must be of the form: gs://bucket/path, found $filename") + } } } @@ -78,6 +100,7 @@ object RequesterPaysConfiguration { } } + case class RequesterPaysConfiguration( val project: String, val buckets: Option[Set[String]] = None @@ -87,6 +110,8 @@ class GoogleStorageFS( private[this] val serviceAccountKey: Option[String] = None, private[this] var requesterPaysConfiguration: Option[RequesterPaysConfiguration] = None ) extends FS { + type URL = GoogleStorageFSURL + import GoogleStorageFS._ def validUrl(filename: String): Boolean = { @@ -194,7 +219,7 @@ class GoogleStorageFS( def openNoCompression(filename: String, _debug: Boolean = false): SeekableDataInputStream = retryTransientErrors { assert(!_debug) - val (bucket, path) = getBucketPath(filename) + val url = parseUrl(filename) val is: SeekableInputStream = new FSSeekableInputStream { private[this] var reader: ReadChannel = null @@ -212,12 +237,12 @@ class GoogleStorageFS( } else { handleRequesterPays( { (options: Seq[BlobSourceOption]) => - reader = retryTransientErrors { storage.reader(bucket, path, options:_*) } + reader = retryTransientErrors { storage.reader(url.bucket, url.path, options:_*) } reader.seek(getPosition) retryingRead() }, BlobSourceOption.userProject _, - bucket + url.bucket ) } } @@ -259,15 +284,15 @@ class GoogleStorageFS( } override def readNoCompression(filename: String): Array[Byte] = retryTransientErrors { - val (bucket, path) = getBucketPath(filename) - storage.readAllBytes(bucket, path) + val url = parseUrl(filename) + storage.readAllBytes(url.bucket, url.path) } def createNoCompression(filename: String): PositionedDataOutputStream = retryTransientErrors { log.info(f"createNoCompression: ${filename}") - val (bucket, path) = getBucketPath(filename) + val url = parseUrl(filename) - val blobId = BlobId.of(bucket, path) + val blobId = BlobId.of(url.bucket, url.path) val blobInfo = BlobInfo.newBuilder(blobId) .build() @@ -284,7 +309,7 @@ class GoogleStorageFS( f }, BlobWriteOption.userProject _, - bucket + url.bucket ) } } @@ -319,10 +344,10 @@ class GoogleStorageFS( } override def copy(src: String, dst: String, deleteSource: Boolean = false): Unit = { - val (srcBucket, srcPath) = getBucketPath(src) - val (dstBucket, dstPath) = getBucketPath(dst) - val srcId = BlobId.of(srcBucket, srcPath) - val dstId = BlobId.of(dstBucket, dstPath) + val srcUrl = parseUrl(src) + val dstUrl = parseUrl(dst) + val srcId = BlobId.of(srcUrl.bucket, srcUrl.path) + val dstId = BlobId.of(dstUrl.bucket, dstUrl.path) // There is only one userProject for the whole request, the source takes precedence over the target. // https://github.com/googleapis/java-storage/blob/0bd17b1f70e47081941a44f018e3098b37ba2c47/google-cloud-storage/src/main/java/com/google/cloud/storage/spi/v1/HttpStorageRpc.java#L1016-L1019 @@ -346,14 +371,14 @@ class GoogleStorageFS( .setTarget(dstId) .build() case Some(RequesterPaysConfiguration(project, Some(buckets))) => - if (buckets.contains(srcBucket) && buckets.contains(dstBucket)) { + if (buckets.contains(srcUrl.bucket) && buckets.contains(dstUrl.bucket)) { Storage.CopyRequest.newBuilder() .setSourceOptions(BlobSourceOption.userProject(project)) .setSource(srcId) .setTarget(dstId) .build() - } else if (buckets.contains(srcBucket) || buckets.contains(dstBucket)) { - throw new RuntimeException(s"both $srcBucket and $dstBucket must be specified in the requester_pays_buckets to copy between these buckets", exc) + } else if (buckets.contains(srcUrl.bucket) || buckets.contains(dstUrl.bucket)) { + throw new RuntimeException(s"both ${srcUrl.bucket} and ${dstUrl.bucket} must be specified in the requester_pays_buckets to copy between these buckets", exc) } else { throw exc } @@ -390,13 +415,13 @@ class GoogleStorageFS( } def delete(filename: String, recursive: Boolean): Unit = retryTransientErrors { - val (bucket, path) = getBucketPath(filename) + val url = parseUrl(filename) if (recursive) { var page = retryTransientErrors { handleRequesterPays( - (options: Seq[BlobListOption]) => storage.list(bucket, (BlobListOption.prefix(path) +: options):_*), + (options: Seq[BlobListOption]) => storage.list(url.bucket, (BlobListOption.prefix(url.path) +: options):_*), BlobListOption.userProject _, - bucket + url.bucket ) } while (page != null) { @@ -412,7 +437,7 @@ class GoogleStorageFS( } }, BlobSourceOption.userProject _, - bucket + url.bucket ) } } @@ -421,30 +446,28 @@ class GoogleStorageFS( } else { // Storage.delete is idempotent. it returns a Boolean which is false if the file did not exist handleRequesterPays( - (options: Seq[BlobSourceOption]) => storage.delete(bucket, path, options:_*), + (options: Seq[BlobSourceOption]) => storage.delete(url.bucket, url.path, options:_*), BlobSourceOption.userProject _, - bucket + url.bucket ) } } def glob(filename: String): Array[FileStatus] = retryTransientErrors { - var (bucket, path) = getBucketPath(filename) - path = dropTrailingSlash(path) - - globWithPrefix(prefix = s"gs://$bucket", path = path) + val url = parseUrl(filename) + globWithPrefix(url.withPath(""), path = dropTrailingSlash(url.path)) } - def listStatus(filename: String): Array[FileStatus] = retryTransientErrors { - var (bucket, path) = getBucketPath(filename) - if (!path.endsWith("/")) - path = path + "/" + def listStatus(filename: String): Array[FileStatus] = listStatus(parseUrl(filename)) + + override def listStatus(url: GoogleStorageFSURL): Array[FileStatus] = retryTransientErrors { + val path = if (url.path.endsWith("/")) url.path else url.path + "/" val blobs = retryTransientErrors { handleRequesterPays( - (options: Seq[BlobListOption]) => storage.list(bucket, (BlobListOption.prefix(path) +: BlobListOption.currentDirectory() +: options):_*), + (options: Seq[BlobListOption]) => storage.list(url.bucket, (BlobListOption.prefix(path) +: BlobListOption.currentDirectory() +: options):_*), BlobListOption.userProject _, - bucket + url.bucket ) } @@ -454,18 +477,19 @@ class GoogleStorageFS( .toArray } - def fileStatus(filename: String): FileStatus = retryTransientErrors { - var (bucket, path) = getBucketPath(filename) - path = dropTrailingSlash(path) + def fileStatus(filename: String): FileStatus = fileStatus(parseUrl(filename)) - if (path == "") - return new BlobStorageFileStatus(s"gs://$bucket", null, 0, true) + override def fileStatus(url: GoogleStorageFSURL): FileStatus = retryTransientErrors { + val path = dropTrailingSlash(url.path) + + if (url.path == "") + return new BlobStorageFileStatus(s"gs://${url.bucket}", null, 0, true) val blobs = retryTransientErrors { handleRequesterPays( - (options: Seq[BlobListOption]) => storage.list(bucket, (BlobListOption.prefix(path) +: BlobListOption.currentDirectory() +: options):_*), + (options: Seq[BlobListOption]) => storage.list(url.bucket, (BlobListOption.prefix(path) +: BlobListOption.currentDirectory() +: options):_*), BlobListOption.userProject _, - bucket + url.bucket ) } @@ -479,7 +503,7 @@ class GoogleStorageFS( return GoogleStorageFileStatus(b) } - throw new FileNotFoundException(filename) + throw new FileNotFoundException(url.toString) } def makeQualified(filename: String): String = { diff --git a/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala b/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala index bb3e898493c..7a74c696196 100644 --- a/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala +++ b/hail/src/main/scala/is/hail/io/fs/HadoopFS.scala @@ -69,7 +69,18 @@ object HadoopFS { } } + +case class LocalFSURL(val path: String) extends FSURL[LocalFSURL] { + def addPathComponent(c: String): LocalFSURL = LocalFSURL(s"$path/$c") + def getPath: String = path + def fromString(s: String): LocalFSURL = LocalFSURL(s) + override def toString(): String = path +} + + class HadoopFS(private[this] var conf: SerializableHadoopConfiguration) extends FS { + type URL = LocalFSURL + def validUrl(filename: String): Boolean = { val uri = new java.net.URI(filename) uri.getScheme == null || uri.getScheme == "file" diff --git a/hail/src/main/scala/is/hail/io/fs/RouterFS.scala b/hail/src/main/scala/is/hail/io/fs/RouterFS.scala index 19227b18288..aff5b27ed4b 100644 --- a/hail/src/main/scala/is/hail/io/fs/RouterFS.scala +++ b/hail/src/main/scala/is/hail/io/fs/RouterFS.scala @@ -1,6 +1,8 @@ package is.hail.io.fs class RouterFS(fss: IndexedSeq[FS]) extends FS { + // This is never actually used + type URL = LocalFSURL def lookupFS(path: String): FS = { fss.find(_.validUrl(path)) match { diff --git a/hail/src/main/scala/is/hail/io/index/IndexWriter.scala b/hail/src/main/scala/is/hail/io/index/IndexWriter.scala index be777688fe0..f89ddb44517 100644 --- a/hail/src/main/scala/is/hail/io/index/IndexWriter.scala +++ b/hail/src/main/scala/is/hail/io/index/IndexWriter.scala @@ -73,7 +73,7 @@ case class IndexMetadata( ) extends AbstractIndexMetadata object IndexWriter { - val version: SemanticVersion = SemanticVersion(1, 1, 0) + val version: SemanticVersion = SemanticVersion(1, 2, 0) val spec: BufferSpec = BufferSpec.default def builder( diff --git a/hail/src/main/scala/is/hail/linalg/LAPACK.scala b/hail/src/main/scala/is/hail/linalg/LAPACK.scala index 4e51e51a013..842e0a44b96 100644 --- a/hail/src/main/scala/is/hail/linalg/LAPACK.scala +++ b/hail/src/main/scala/is/hail/linalg/LAPACK.scala @@ -2,9 +2,8 @@ package is.hail.linalg import java.lang.reflect.Method import java.util.function._ - import com.sun.jna.{FunctionMapper, Library, Native, NativeLibrary} -import com.sun.jna.ptr.IntByReference +import com.sun.jna.ptr.{IntByReference, DoubleByReference} import scala.util.{Failure, Success, Try} import is.hail.utils._ @@ -200,6 +199,25 @@ object LAPACK { INFOref.getValue() } + def dsyevr(jobz: String, range: String, uplo: String, n: Int, A: Long, ldA: Int, vl: Double, vu: Double, il: Int, iu: Int, abstol: Double, W: Long, Z: Long, ldZ: Int, ISuppZ: Long, Work: Long, lWork: Int, IWork: Long, lIWork: Int): Int = { + val nRef = new IntByReference(n) + val ldARef = new IntByReference(ldA) + val vlRef = new DoubleByReference(vl) + val vuRef = new DoubleByReference(vu) + val ilRef = new IntByReference(il) + val iuRef = new IntByReference(iu) + val abstolRef = new DoubleByReference(abstol) + val ldZRef = new IntByReference(ldZ) + val lWorkRef = new IntByReference(lWork) + val lIWorkRef = new IntByReference(lIWork) + val INFOref = new IntByReference(1) + val mRef = new IntByReference(0) + + libraryInstance.get.dsyevr(jobz, range, uplo, nRef, A, ldARef, vlRef, vuRef, ilRef, iuRef, abstolRef, mRef, W, Z, ldZRef, ISuppZ, Work, lWorkRef, IWork, lIWorkRef, INFOref) + + INFOref.getValue() + } + def dtrtrs(UPLO: String, TRANS: String, DIAG: String, N: Int, NRHS: Int, A: Long, LDA: Int, B: Long, LDB: Int): Int = { val Nref = new IntByReference(N) @@ -254,6 +272,7 @@ trait LAPACKLibrary extends Library { def dgetrf(M: IntByReference, N: IntByReference, A: Long, LDA: IntByReference, IPIV: Long, INFO: IntByReference) def dgetri(N: IntByReference, A: Long, LDA: IntByReference, IPIV: Long, WORK: Long, LWORK: IntByReference, INFO: IntByReference) def dgesdd(JOBZ: String, M: IntByReference, N: IntByReference, A: Long, LDA: IntByReference, S: Long, U: Long, LDU: IntByReference, VT: Long, LDVT: IntByReference, WORK: Long, LWORK: IntByReference, IWORK: Long, INFO: IntByReference) + def dsyevr(jobz: String, range: String, uplo: String, n: IntByReference, A: Long, ldA: IntByReference, vl: DoubleByReference, vu: DoubleByReference, il: IntByReference, iu: IntByReference, abstol: DoubleByReference, m: IntByReference, W: Long, Z: Long, ldZ: IntByReference, ISuppZ: Long, Work: Long, lWork: IntByReference, IWork: Long, lIWork: IntByReference, info: IntByReference) def ilaver(MAJOR: IntByReference, MINOR: IntByReference, PATCH: IntByReference) def ilaenv(ispec: IntByReference, name: String, opts: String, n1: IntByReference, n2: IntByReference, n3: IntByReference, n4: IntByReference): Int def dtrtrs(UPLO: String, TRANS: String, DIAG: String, N: IntByReference, NRHS: IntByReference, A: Long, LDA: IntByReference, B: Long, LDB: IntByReference, INFO:IntByReference) diff --git a/hail/src/main/scala/is/hail/rvd/RVD.scala b/hail/src/main/scala/is/hail/rvd/RVD.scala index 49583eaa809..a033d309b79 100644 --- a/hail/src/main/scala/is/hail/rvd/RVD.scala +++ b/hail/src/main/scala/is/hail/rvd/RVD.scala @@ -825,25 +825,6 @@ class RVD( typ.copy(rowType = newRowType)) } - def orderedJoin( - right: RVD, - joinType: String, - joiner: (RVDContext, Iterator[JoinedRegionValue]) => Iterator[RegionValue], - joinedType: RVDType, - ctx: ExecuteContext - ): RVD = - orderedJoin(right, typ.key.length, joinType, joiner, joinedType, ctx) - - def orderedJoin( - right: RVD, - joinKey: Int, - joinType: String, - joiner: (RVDContext, Iterator[JoinedRegionValue]) => Iterator[RegionValue], - joinedType: RVDType, - ctx: ExecuteContext - ): RVD = - keyBy(joinKey).orderedJoin(right.keyBy(joinKey), joinType, joiner, joinedType, ctx) - def orderedLeftJoinDistinct( right: RVD, joinKey: Int, diff --git a/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala b/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala index 05c5608a04c..05e0948c2c0 100644 --- a/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala +++ b/hail/src/main/scala/is/hail/services/batch_client/BatchClient.scala @@ -174,6 +174,8 @@ class BatchClient( def waitForBatch(batchID: Long, excludeDriverJobInBatch: Boolean): JValue = { implicit val formats: Formats = DefaultFormats + Thread.sleep(600) // it is not possible for the batch to be finished in less than 600ms + val start = System.nanoTime() while (true) { diff --git a/hail/src/main/scala/is/hail/services/tls/package.scala b/hail/src/main/scala/is/hail/services/tls/package.scala index aad94e6eb54..5b45da87e62 100644 --- a/hail/src/main/scala/is/hail/services/tls/package.scala +++ b/hail/src/main/scala/is/hail/services/tls/package.scala @@ -55,6 +55,7 @@ package object tls { def setSSLConfigFromDir(configDir: String) = { _getSSLConfig = sslConfigFromDir(configDir) + log.info("TLS configured.") } def getSSLConfig(): SSLConfig = { diff --git a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala index 13128749fed..405ce749f74 100644 --- a/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala +++ b/hail/src/main/scala/is/hail/types/physical/stypes/interfaces/SNDArray.scala @@ -8,6 +8,7 @@ import is.hail.types.physical.stypes.concrete.{SNDArraySlice, SNDArraySliceValue import is.hail.types.physical.stypes.primitives.SInt64Value import is.hail.types.physical.stypes.{EmitType, SSettable, SType, SValue} import is.hail.types.physical.{PCanonicalNDArray, PNDArray, PNumeric, PPrimitive, PType} +import is.hail.types.virtual.TInt32 import is.hail.types.{RNDArray, TypeWithRequiredness} import is.hail.utils.{FastIndexedSeq, toRichIterable, valueToRichCodeRegion} @@ -626,6 +627,63 @@ object SNDArray { work.firstDataAddress, lwork.toI)) cb.ifx(info.cne(0), cb._fatal(s"LAPACK error DGEQRF. Error code = ", info.toS)) } + + def syevr_query(cb: EmitCodeBuilder, jobz: String, uplo: String, n: Value[Int], region: Value[Region]): (SizeValue, SizeValue) = { + val WorkAddress = cb.memoize(region.allocate(8L, 8L)) + val IWorkAddress = cb.memoize(region.allocate(4L, 4L)) + val info = cb.memoize(Code.invokeScalaObject19[String, String, String, Int, Long, Int, Double, Double, Int, Int, Double, Long, Long, Int, Long, Long, Int, Long, Int, Int](LAPACK.getClass, "dsyevr", + jobz, "A", uplo, + n, 0, n, + 0, 0, 0, 0, + 0, + 0, 0, n, + 0, + WorkAddress, -1, + IWorkAddress, -1)) + cb.ifx(info.cne(0), cb._fatal(s"LAPACK error DSYEVR. Failed size query. Error code = ", info.toS)) + val LWork = cb.memoize(Region.loadDouble(WorkAddress).toL) + val LIWork = cb.memoize(Region.loadInt(IWorkAddress).toL) + (SizeValueDyn(cb.memoize((LWork > 0).mux(LWork, 1))), SizeValueDyn(cb.memoize((LIWork > 0).mux(LIWork, 1)))) + } + + def syevr(cb: EmitCodeBuilder, uplo: String, A: SNDArrayValue, W: SNDArrayValue, Z: Option[(SNDArrayValue, SNDArrayValue)], Work: SNDArrayValue, IWork: SNDArrayValue): Unit = { + assertMatrix(A) + assertColMajor(cb, "orgqr", A) + assertVector(W, Work, IWork) + assert(IWork.pt.elementType.virtualType == TInt32) + + val n = A.shapes(0) + A.assertHasShape(cb, Array(n, n), "syevr: A must be square") + W.assertHasShape(cb, Array(n), "syevr: W has wrong size") + + val ldA = A.eltStride(1).max(1) + val lWork = Work.shapes(0) + val lIWork = IWork.shapes(0) + + val (jobz, zAddr: Value[Long], ldZ: Code[Int], iSuppZAddr: Value[Long]) = Z match { + case Some((z, iSuppZ)) => + assertVector(iSuppZ) + assertMatrix(z) + + z.assertHasShape(cb, Array(n, n), "syevr: Z has wrong size") + iSuppZ.assertHasShape(cb, IndexedSeq(SizeValueDyn(cb.memoize(n * 2))), "syevr: ISuppZ has wrong size") + + ("V", z.firstDataAddress, z.eltStride(1).max(1), iSuppZ.firstDataAddress) + case None => + ("N", const(0L), const(1).get, const(0L)) + } + + val info = cb.memoize(Code.invokeScalaObject19[String, String, String, Int, Long, Int, Double, Double, Int, Int, Double, Long, Long, Int, Long, Long, Int, Long, Int, Int](LAPACK.getClass, "dsyevr", + jobz, "A", uplo, + n.toI, A.firstDataAddress, ldA, + 0, 0, 0, 0, + 0, + W.firstDataAddress, zAddr, ldZ, + iSuppZAddr, + Work.firstDataAddress, lWork.toI, + IWork.firstDataAddress, lIWork.toI)) + cb.ifx(info.cne(0), cb._fatal(s"LAPACK error DSYEVR. Error code = ", info.toS)) + } } @@ -751,7 +809,14 @@ trait SNDArrayValue extends SValue { def assertHasShape(cb: EmitCodeBuilder, otherShape: IndexedSeq[SizeValue], msg: Code[String]*) = if (!hasShapeStatic(otherShape)) - cb.ifx(!hasShape(cb, otherShape), cb._fatal(msg: _*)) + cb.ifx(!hasShape(cb, otherShape), + cb._fatal( + msg ++ + (const("\nExpected shape ").get +: + shapes.map(_.toS).intersperse[Code[String]]("(", ",", ")")) ++ + (const(", found ").get +: + otherShape.map(_.toS).intersperse[Code[String]]("(", ",", ")")): _*, + )) // True IFF shape can be proven equal to otherShape statically def hasShapeStatic(otherShape: IndexedSeq[SizeValue]): Boolean = diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/README.txt new file mode 100644 index 00000000000..27ac7850704 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:11 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/README.txt new file mode 100644 index 00000000000..27ac7850704 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:11 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/metadata.json.gz new file mode 100644 index 00000000000..551d387e23c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/rows/metadata.json.gz new file mode 100644 index 00000000000..55272eaec8d Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/rows/parts/part-0 new file mode 100644 index 00000000000..5bc03aad937 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/cols/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/README.txt new file mode 100644 index 00000000000..27ac7850704 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:11 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/metadata.json.gz new file mode 100644 index 00000000000..f5838df71df Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/rows/metadata.json.gz new file mode 100644 index 00000000000..5ce9bc763d0 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/rows/parts/part-0-b5ea616a-d06e-4749-a1f2-720c5590485e b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/rows/parts/part-0-b5ea616a-d06e-4749-a1f2-720c5590485e new file mode 100644 index 00000000000..5ff731ea188 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/rows/parts/part-0-b5ea616a-d06e-4749-a1f2-720c5590485e differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/rows/parts/part-1-6a7fbc1c-d3ca-4cac-9836-692a6ce1e38d b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/rows/parts/part-1-6a7fbc1c-d3ca-4cac-9836-692a6ce1e38d new file mode 100644 index 00000000000..57cb2010b3f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/entries/rows/parts/part-1-6a7fbc1c-d3ca-4cac-9836-692a6ce1e38d differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/README.txt new file mode 100644 index 00000000000..27ac7850704 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:11 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/globals/metadata.json.gz new file mode 100644 index 00000000000..dd8662b0072 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/globals/parts/part-0 new file mode 100644 index 00000000000..a6d7bca41a8 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/metadata.json.gz new file mode 100644 index 00000000000..bfcc49dabaa Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/rows/metadata.json.gz new file mode 100644 index 00000000000..cb7abf5a23f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/rows/parts/part-0 new file mode 100644 index 00000000000..74f256d2ed2 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/globals/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/index/part-0-b5ea616a-d06e-4749-a1f2-720c5590485e.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/index/part-0-b5ea616a-d06e-4749-a1f2-720c5590485e.idx/index new file mode 100644 index 00000000000..f7659883d2b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/index/part-0-b5ea616a-d06e-4749-a1f2-720c5590485e.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/index/part-0-b5ea616a-d06e-4749-a1f2-720c5590485e.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/index/part-0-b5ea616a-d06e-4749-a1f2-720c5590485e.idx/metadata.json.gz new file mode 100644 index 00000000000..ac535f6bce6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/index/part-0-b5ea616a-d06e-4749-a1f2-720c5590485e.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/index/part-1-6a7fbc1c-d3ca-4cac-9836-692a6ce1e38d.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/index/part-1-6a7fbc1c-d3ca-4cac-9836-692a6ce1e38d.idx/index new file mode 100644 index 00000000000..e88b918ac96 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/index/part-1-6a7fbc1c-d3ca-4cac-9836-692a6ce1e38d.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/index/part-1-6a7fbc1c-d3ca-4cac-9836-692a6ce1e38d.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/index/part-1-6a7fbc1c-d3ca-4cac-9836-692a6ce1e38d.idx/metadata.json.gz new file mode 100644 index 00000000000..9749d4971c6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/index/part-1-6a7fbc1c-d3ca-4cac-9836-692a6ce1e38d.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/metadata.json.gz new file mode 100644 index 00000000000..acaa78da17e Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/README.txt new file mode 100644 index 00000000000..27ac7850704 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:11 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/metadata.json.gz new file mode 100644 index 00000000000..0eb70d8fecb Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/rows/metadata.json.gz new file mode 100644 index 00000000000..35ece5dd6ec Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/rows/parts/part-0-b5ea616a-d06e-4749-a1f2-720c5590485e b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/rows/parts/part-0-b5ea616a-d06e-4749-a1f2-720c5590485e new file mode 100644 index 00000000000..5bc03aad937 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/rows/parts/part-0-b5ea616a-d06e-4749-a1f2-720c5590485e differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/rows/parts/part-1-6a7fbc1c-d3ca-4cac-9836-692a6ce1e38d b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/rows/parts/part-1-6a7fbc1c-d3ca-4cac-9836-692a6ce1e38d new file mode 100644 index 00000000000..99a43510c4f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/0.hmt/rows/rows/parts/part-1-6a7fbc1c-d3ca-4cac-9836-692a6ce1e38d differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/README.txt new file mode 100644 index 00000000000..0b45111a122 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:13 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/README.txt new file mode 100644 index 00000000000..0b45111a122 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:13 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/metadata.json.gz new file mode 100644 index 00000000000..551d387e23c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/rows/metadata.json.gz new file mode 100644 index 00000000000..f6e8ec2465d Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/rows/parts/part-0 new file mode 100644 index 00000000000..0cba1ba4fe4 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/cols/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/README.txt new file mode 100644 index 00000000000..0b45111a122 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:13 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/metadata.json.gz new file mode 100644 index 00000000000..f5838df71df Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/rows/metadata.json.gz new file mode 100644 index 00000000000..eb9fcae555b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/rows/parts/part-0-14f2c855-f7e3-4c90-b453-9134ed45ff1f b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/rows/parts/part-0-14f2c855-f7e3-4c90-b453-9134ed45ff1f new file mode 100644 index 00000000000..83ed60f4a38 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/rows/parts/part-0-14f2c855-f7e3-4c90-b453-9134ed45ff1f differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/rows/parts/part-1-7d4eaa41-9269-4f19-a96d-553c7c870f3b b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/rows/parts/part-1-7d4eaa41-9269-4f19-a96d-553c7c870f3b new file mode 100644 index 00000000000..687108194e0 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/entries/rows/parts/part-1-7d4eaa41-9269-4f19-a96d-553c7c870f3b differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/README.txt new file mode 100644 index 00000000000..0b45111a122 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:13 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/globals/metadata.json.gz new file mode 100644 index 00000000000..11c504763ad Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/globals/parts/part-0 new file mode 100644 index 00000000000..a6d7bca41a8 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/metadata.json.gz new file mode 100644 index 00000000000..bfcc49dabaa Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/rows/metadata.json.gz new file mode 100644 index 00000000000..66ea919e545 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/rows/parts/part-0 new file mode 100644 index 00000000000..41e9baf4516 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/globals/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/index/part-0-14f2c855-f7e3-4c90-b453-9134ed45ff1f.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/index/part-0-14f2c855-f7e3-4c90-b453-9134ed45ff1f.idx/index new file mode 100644 index 00000000000..7548a26c700 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/index/part-0-14f2c855-f7e3-4c90-b453-9134ed45ff1f.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/index/part-0-14f2c855-f7e3-4c90-b453-9134ed45ff1f.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/index/part-0-14f2c855-f7e3-4c90-b453-9134ed45ff1f.idx/metadata.json.gz new file mode 100644 index 00000000000..80b4e358690 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/index/part-0-14f2c855-f7e3-4c90-b453-9134ed45ff1f.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/index/part-1-7d4eaa41-9269-4f19-a96d-553c7c870f3b.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/index/part-1-7d4eaa41-9269-4f19-a96d-553c7c870f3b.idx/index new file mode 100644 index 00000000000..e88b918ac96 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/index/part-1-7d4eaa41-9269-4f19-a96d-553c7c870f3b.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/index/part-1-7d4eaa41-9269-4f19-a96d-553c7c870f3b.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/index/part-1-7d4eaa41-9269-4f19-a96d-553c7c870f3b.idx/metadata.json.gz new file mode 100644 index 00000000000..9749d4971c6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/index/part-1-7d4eaa41-9269-4f19-a96d-553c7c870f3b.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/metadata.json.gz new file mode 100644 index 00000000000..acaa78da17e Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/README.txt new file mode 100644 index 00000000000..0b45111a122 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:13 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/metadata.json.gz new file mode 100644 index 00000000000..0eb70d8fecb Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/rows/metadata.json.gz new file mode 100644 index 00000000000..128080cd9ab Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/rows/parts/part-0-14f2c855-f7e3-4c90-b453-9134ed45ff1f b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/rows/parts/part-0-14f2c855-f7e3-4c90-b453-9134ed45ff1f new file mode 100644 index 00000000000..0cba1ba4fe4 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/rows/parts/part-0-14f2c855-f7e3-4c90-b453-9134ed45ff1f differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/rows/parts/part-1-7d4eaa41-9269-4f19-a96d-553c7c870f3b b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/rows/parts/part-1-7d4eaa41-9269-4f19-a96d-553c7c870f3b new file mode 100644 index 00000000000..2a6b4eeff88 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/1.hmt/rows/rows/parts/part-1-7d4eaa41-9269-4f19-a96d-553c7c870f3b differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/README.txt new file mode 100644 index 00000000000..1373011851e --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:16 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/README.txt new file mode 100644 index 00000000000..1373011851e --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:16 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/metadata.json.gz new file mode 100644 index 00000000000..551d387e23c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/rows/metadata.json.gz new file mode 100644 index 00000000000..d8619eba71e Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/rows/parts/part-0 new file mode 100644 index 00000000000..cb534d6c12f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/cols/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/README.txt new file mode 100644 index 00000000000..1373011851e --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:16 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/metadata.json.gz new file mode 100644 index 00000000000..f5838df71df Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/rows/metadata.json.gz new file mode 100644 index 00000000000..13d45d6de1a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/rows/parts/part-0-607e5a37-b8fd-4b2f-880d-5f0dc6d7e129 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/rows/parts/part-0-607e5a37-b8fd-4b2f-880d-5f0dc6d7e129 new file mode 100644 index 00000000000..9da8cdcf377 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/rows/parts/part-0-607e5a37-b8fd-4b2f-880d-5f0dc6d7e129 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/rows/parts/part-1-fe74d492-c36c-4b80-860e-d0090c9bdc04 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/rows/parts/part-1-fe74d492-c36c-4b80-860e-d0090c9bdc04 new file mode 100644 index 00000000000..7e26a8547c1 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/entries/rows/parts/part-1-fe74d492-c36c-4b80-860e-d0090c9bdc04 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/README.txt new file mode 100644 index 00000000000..1373011851e --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:16 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/globals/metadata.json.gz new file mode 100644 index 00000000000..b63734cc8e9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/globals/parts/part-0 new file mode 100644 index 00000000000..9168f10a03b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/metadata.json.gz new file mode 100644 index 00000000000..bfcc49dabaa Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/rows/metadata.json.gz new file mode 100644 index 00000000000..dba604385c3 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/rows/parts/part-0 new file mode 100644 index 00000000000..b787015fdbd Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/globals/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/index/part-0-607e5a37-b8fd-4b2f-880d-5f0dc6d7e129.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/index/part-0-607e5a37-b8fd-4b2f-880d-5f0dc6d7e129.idx/index new file mode 100644 index 00000000000..f7659883d2b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/index/part-0-607e5a37-b8fd-4b2f-880d-5f0dc6d7e129.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/index/part-0-607e5a37-b8fd-4b2f-880d-5f0dc6d7e129.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/index/part-0-607e5a37-b8fd-4b2f-880d-5f0dc6d7e129.idx/metadata.json.gz new file mode 100644 index 00000000000..ac535f6bce6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/index/part-0-607e5a37-b8fd-4b2f-880d-5f0dc6d7e129.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/index/part-1-fe74d492-c36c-4b80-860e-d0090c9bdc04.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/index/part-1-fe74d492-c36c-4b80-860e-d0090c9bdc04.idx/index new file mode 100644 index 00000000000..e88b918ac96 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/index/part-1-fe74d492-c36c-4b80-860e-d0090c9bdc04.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/index/part-1-fe74d492-c36c-4b80-860e-d0090c9bdc04.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/index/part-1-fe74d492-c36c-4b80-860e-d0090c9bdc04.idx/metadata.json.gz new file mode 100644 index 00000000000..9749d4971c6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/index/part-1-fe74d492-c36c-4b80-860e-d0090c9bdc04.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/metadata.json.gz new file mode 100644 index 00000000000..acaa78da17e Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/README.txt new file mode 100644 index 00000000000..1373011851e --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:16 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/metadata.json.gz new file mode 100644 index 00000000000..0eb70d8fecb Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/rows/metadata.json.gz new file mode 100644 index 00000000000..e4683194a66 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/rows/parts/part-0-607e5a37-b8fd-4b2f-880d-5f0dc6d7e129 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/rows/parts/part-0-607e5a37-b8fd-4b2f-880d-5f0dc6d7e129 new file mode 100644 index 00000000000..cb534d6c12f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/rows/parts/part-0-607e5a37-b8fd-4b2f-880d-5f0dc6d7e129 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/rows/parts/part-1-fe74d492-c36c-4b80-860e-d0090c9bdc04 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/rows/parts/part-1-fe74d492-c36c-4b80-860e-d0090c9bdc04 new file mode 100644 index 00000000000..33bfe88213b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/2.hmt/rows/rows/parts/part-1-fe74d492-c36c-4b80-860e-d0090c9bdc04 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/README.txt new file mode 100644 index 00000000000..12dd06508c4 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:19 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/README.txt new file mode 100644 index 00000000000..12dd06508c4 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:19 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/metadata.json.gz new file mode 100644 index 00000000000..551d387e23c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/rows/metadata.json.gz new file mode 100644 index 00000000000..f113d57e0ae Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/rows/parts/part-0 new file mode 100644 index 00000000000..2e1ca001ffe Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/cols/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/README.txt new file mode 100644 index 00000000000..12dd06508c4 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:19 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/metadata.json.gz new file mode 100644 index 00000000000..f5838df71df Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/rows/metadata.json.gz new file mode 100644 index 00000000000..5a3b80eaa79 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/rows/parts/part-0-be5db820-4710-40e4-b526-7372a2857a61 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/rows/parts/part-0-be5db820-4710-40e4-b526-7372a2857a61 new file mode 100644 index 00000000000..7e4d0da3cfb Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/rows/parts/part-0-be5db820-4710-40e4-b526-7372a2857a61 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/rows/parts/part-1-97037c33-4e2b-4750-821e-87f5936213d6 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/rows/parts/part-1-97037c33-4e2b-4750-821e-87f5936213d6 new file mode 100644 index 00000000000..759152ac114 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/entries/rows/parts/part-1-97037c33-4e2b-4750-821e-87f5936213d6 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/README.txt new file mode 100644 index 00000000000..12dd06508c4 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:19 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/globals/metadata.json.gz new file mode 100644 index 00000000000..6552f3d13de Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/globals/parts/part-0 new file mode 100644 index 00000000000..9168f10a03b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/metadata.json.gz new file mode 100644 index 00000000000..bfcc49dabaa Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/rows/metadata.json.gz new file mode 100644 index 00000000000..3964597b6ad Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/rows/parts/part-0 new file mode 100644 index 00000000000..02bc4388516 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/globals/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/index/part-0-be5db820-4710-40e4-b526-7372a2857a61.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/index/part-0-be5db820-4710-40e4-b526-7372a2857a61.idx/index new file mode 100644 index 00000000000..7548a26c700 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/index/part-0-be5db820-4710-40e4-b526-7372a2857a61.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/index/part-0-be5db820-4710-40e4-b526-7372a2857a61.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/index/part-0-be5db820-4710-40e4-b526-7372a2857a61.idx/metadata.json.gz new file mode 100644 index 00000000000..80b4e358690 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/index/part-0-be5db820-4710-40e4-b526-7372a2857a61.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/index/part-1-97037c33-4e2b-4750-821e-87f5936213d6.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/index/part-1-97037c33-4e2b-4750-821e-87f5936213d6.idx/index new file mode 100644 index 00000000000..e88b918ac96 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/index/part-1-97037c33-4e2b-4750-821e-87f5936213d6.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/index/part-1-97037c33-4e2b-4750-821e-87f5936213d6.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/index/part-1-97037c33-4e2b-4750-821e-87f5936213d6.idx/metadata.json.gz new file mode 100644 index 00000000000..9749d4971c6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/index/part-1-97037c33-4e2b-4750-821e-87f5936213d6.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/metadata.json.gz new file mode 100644 index 00000000000..acaa78da17e Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/README.txt new file mode 100644 index 00000000000..12dd06508c4 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:19 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/metadata.json.gz new file mode 100644 index 00000000000..0eb70d8fecb Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/rows/metadata.json.gz new file mode 100644 index 00000000000..97e4753ffa5 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/rows/parts/part-0-be5db820-4710-40e4-b526-7372a2857a61 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/rows/parts/part-0-be5db820-4710-40e4-b526-7372a2857a61 new file mode 100644 index 00000000000..2e1ca001ffe Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/rows/parts/part-0-be5db820-4710-40e4-b526-7372a2857a61 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/rows/parts/part-1-97037c33-4e2b-4750-821e-87f5936213d6 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/rows/parts/part-1-97037c33-4e2b-4750-821e-87f5936213d6 new file mode 100644 index 00000000000..2cdfda50bff Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/3.hmt/rows/rows/parts/part-1-97037c33-4e2b-4750-821e-87f5936213d6 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/README.txt new file mode 100644 index 00000000000..a2773de5592 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:21 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/README.txt new file mode 100644 index 00000000000..a2773de5592 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:21 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/metadata.json.gz new file mode 100644 index 00000000000..551d387e23c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/rows/metadata.json.gz new file mode 100644 index 00000000000..66bc5ebfb6f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/rows/parts/part-0 new file mode 100644 index 00000000000..9887da33bf8 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/cols/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/README.txt new file mode 100644 index 00000000000..a2773de5592 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:21 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/metadata.json.gz new file mode 100644 index 00000000000..f5838df71df Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/rows/metadata.json.gz new file mode 100644 index 00000000000..1919b2bf6c9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/rows/parts/part-0-55a1f3bb-102d-4e20-8434-b76c1268a071 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/rows/parts/part-0-55a1f3bb-102d-4e20-8434-b76c1268a071 new file mode 100644 index 00000000000..a210f4ff783 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/rows/parts/part-0-55a1f3bb-102d-4e20-8434-b76c1268a071 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/rows/parts/part-1-34c6d30f-576c-43d7-ae9b-54ffe1d0b6b8 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/rows/parts/part-1-34c6d30f-576c-43d7-ae9b-54ffe1d0b6b8 new file mode 100644 index 00000000000..3f12de5b771 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/entries/rows/parts/part-1-34c6d30f-576c-43d7-ae9b-54ffe1d0b6b8 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/README.txt new file mode 100644 index 00000000000..a2773de5592 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:21 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/globals/metadata.json.gz new file mode 100644 index 00000000000..e1a4c911631 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/globals/parts/part-0 new file mode 100644 index 00000000000..9168f10a03b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/metadata.json.gz new file mode 100644 index 00000000000..bfcc49dabaa Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/rows/metadata.json.gz new file mode 100644 index 00000000000..9118491072f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/rows/parts/part-0 new file mode 100644 index 00000000000..45927693ec9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/globals/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/index/part-0-55a1f3bb-102d-4e20-8434-b76c1268a071.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/index/part-0-55a1f3bb-102d-4e20-8434-b76c1268a071.idx/index new file mode 100644 index 00000000000..f7659883d2b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/index/part-0-55a1f3bb-102d-4e20-8434-b76c1268a071.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/index/part-0-55a1f3bb-102d-4e20-8434-b76c1268a071.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/index/part-0-55a1f3bb-102d-4e20-8434-b76c1268a071.idx/metadata.json.gz new file mode 100644 index 00000000000..ac535f6bce6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/index/part-0-55a1f3bb-102d-4e20-8434-b76c1268a071.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/index/part-1-34c6d30f-576c-43d7-ae9b-54ffe1d0b6b8.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/index/part-1-34c6d30f-576c-43d7-ae9b-54ffe1d0b6b8.idx/index new file mode 100644 index 00000000000..e88b918ac96 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/index/part-1-34c6d30f-576c-43d7-ae9b-54ffe1d0b6b8.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/index/part-1-34c6d30f-576c-43d7-ae9b-54ffe1d0b6b8.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/index/part-1-34c6d30f-576c-43d7-ae9b-54ffe1d0b6b8.idx/metadata.json.gz new file mode 100644 index 00000000000..9749d4971c6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/index/part-1-34c6d30f-576c-43d7-ae9b-54ffe1d0b6b8.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/metadata.json.gz new file mode 100644 index 00000000000..acaa78da17e Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/README.txt new file mode 100644 index 00000000000..a2773de5592 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:21 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/metadata.json.gz new file mode 100644 index 00000000000..0eb70d8fecb Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/rows/metadata.json.gz new file mode 100644 index 00000000000..f9caa530826 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/rows/parts/part-0-55a1f3bb-102d-4e20-8434-b76c1268a071 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/rows/parts/part-0-55a1f3bb-102d-4e20-8434-b76c1268a071 new file mode 100644 index 00000000000..9887da33bf8 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/rows/parts/part-0-55a1f3bb-102d-4e20-8434-b76c1268a071 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/rows/parts/part-1-34c6d30f-576c-43d7-ae9b-54ffe1d0b6b8 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/rows/parts/part-1-34c6d30f-576c-43d7-ae9b-54ffe1d0b6b8 new file mode 100644 index 00000000000..49b709f1848 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/4.hmt/rows/rows/parts/part-1-34c6d30f-576c-43d7-ae9b-54ffe1d0b6b8 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/README.txt new file mode 100644 index 00000000000..a7cc75ef61a --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:23 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/README.txt new file mode 100644 index 00000000000..a7cc75ef61a --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:23 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/metadata.json.gz new file mode 100644 index 00000000000..551d387e23c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/rows/metadata.json.gz new file mode 100644 index 00000000000..664bcf13255 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/rows/parts/part-0 new file mode 100644 index 00000000000..095f05058fd Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/cols/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/README.txt new file mode 100644 index 00000000000..a7cc75ef61a --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:23 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/metadata.json.gz new file mode 100644 index 00000000000..f5838df71df Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/rows/metadata.json.gz new file mode 100644 index 00000000000..4a35224edaf Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/rows/parts/part-0-7a2c0617-0447-4c76-8b00-595d19d27f4c b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/rows/parts/part-0-7a2c0617-0447-4c76-8b00-595d19d27f4c new file mode 100644 index 00000000000..aa62d3106ec Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/rows/parts/part-0-7a2c0617-0447-4c76-8b00-595d19d27f4c differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/rows/parts/part-1-b606bf09-a4d6-4f7a-b0fa-80ad6d261fee b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/rows/parts/part-1-b606bf09-a4d6-4f7a-b0fa-80ad6d261fee new file mode 100644 index 00000000000..8846c5fcbf9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/entries/rows/parts/part-1-b606bf09-a4d6-4f7a-b0fa-80ad6d261fee differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/README.txt new file mode 100644 index 00000000000..a7cc75ef61a --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:23 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/globals/metadata.json.gz new file mode 100644 index 00000000000..fd7910cbf6c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/globals/parts/part-0 new file mode 100644 index 00000000000..9168f10a03b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/metadata.json.gz new file mode 100644 index 00000000000..bfcc49dabaa Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/rows/metadata.json.gz new file mode 100644 index 00000000000..0fcb4a0f9f3 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/rows/parts/part-0 new file mode 100644 index 00000000000..fdff58a346a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/globals/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/index/part-0-7a2c0617-0447-4c76-8b00-595d19d27f4c.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/index/part-0-7a2c0617-0447-4c76-8b00-595d19d27f4c.idx/index new file mode 100644 index 00000000000..7548a26c700 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/index/part-0-7a2c0617-0447-4c76-8b00-595d19d27f4c.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/index/part-0-7a2c0617-0447-4c76-8b00-595d19d27f4c.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/index/part-0-7a2c0617-0447-4c76-8b00-595d19d27f4c.idx/metadata.json.gz new file mode 100644 index 00000000000..80b4e358690 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/index/part-0-7a2c0617-0447-4c76-8b00-595d19d27f4c.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/index/part-1-b606bf09-a4d6-4f7a-b0fa-80ad6d261fee.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/index/part-1-b606bf09-a4d6-4f7a-b0fa-80ad6d261fee.idx/index new file mode 100644 index 00000000000..e88b918ac96 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/index/part-1-b606bf09-a4d6-4f7a-b0fa-80ad6d261fee.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/index/part-1-b606bf09-a4d6-4f7a-b0fa-80ad6d261fee.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/index/part-1-b606bf09-a4d6-4f7a-b0fa-80ad6d261fee.idx/metadata.json.gz new file mode 100644 index 00000000000..9749d4971c6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/index/part-1-b606bf09-a4d6-4f7a-b0fa-80ad6d261fee.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/metadata.json.gz new file mode 100644 index 00000000000..acaa78da17e Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/README.txt new file mode 100644 index 00000000000..a7cc75ef61a --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:23 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/metadata.json.gz new file mode 100644 index 00000000000..0eb70d8fecb Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/rows/metadata.json.gz new file mode 100644 index 00000000000..ae008e868e5 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/rows/parts/part-0-7a2c0617-0447-4c76-8b00-595d19d27f4c b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/rows/parts/part-0-7a2c0617-0447-4c76-8b00-595d19d27f4c new file mode 100644 index 00000000000..095f05058fd Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/rows/parts/part-0-7a2c0617-0447-4c76-8b00-595d19d27f4c differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/rows/parts/part-1-b606bf09-a4d6-4f7a-b0fa-80ad6d261fee b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/rows/parts/part-1-b606bf09-a4d6-4f7a-b0fa-80ad6d261fee new file mode 100644 index 00000000000..efb3766e2d1 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/5.hmt/rows/rows/parts/part-1-b606bf09-a4d6-4f7a-b0fa-80ad6d261fee differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/README.txt new file mode 100644 index 00000000000..548e8c7ef06 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:25 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/README.txt new file mode 100644 index 00000000000..548e8c7ef06 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:25 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/metadata.json.gz new file mode 100644 index 00000000000..551d387e23c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/rows/metadata.json.gz new file mode 100644 index 00000000000..278e2823bbc Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/rows/parts/part-0 new file mode 100644 index 00000000000..479a5d5a334 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/cols/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/README.txt new file mode 100644 index 00000000000..548e8c7ef06 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:25 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/metadata.json.gz new file mode 100644 index 00000000000..f5838df71df Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/rows/metadata.json.gz new file mode 100644 index 00000000000..2c59cd88a1a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/rows/parts/part-0-97b8f877-9e49-4af9-bd84-0f79399531fd b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/rows/parts/part-0-97b8f877-9e49-4af9-bd84-0f79399531fd new file mode 100644 index 00000000000..6d32bd6b738 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/rows/parts/part-0-97b8f877-9e49-4af9-bd84-0f79399531fd differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/rows/parts/part-1-7a398a57-71ef-428b-9530-34db3da82a40 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/rows/parts/part-1-7a398a57-71ef-428b-9530-34db3da82a40 new file mode 100644 index 00000000000..62a19534508 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/entries/rows/parts/part-1-7a398a57-71ef-428b-9530-34db3da82a40 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/README.txt new file mode 100644 index 00000000000..548e8c7ef06 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:25 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/globals/metadata.json.gz new file mode 100644 index 00000000000..6b6963cf7cd Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/globals/parts/part-0 new file mode 100644 index 00000000000..89e711531de Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/metadata.json.gz new file mode 100644 index 00000000000..bfcc49dabaa Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/rows/metadata.json.gz new file mode 100644 index 00000000000..c036b89f711 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/rows/parts/part-0 new file mode 100644 index 00000000000..6fed4af7806 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/globals/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/index/part-0-97b8f877-9e49-4af9-bd84-0f79399531fd.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/index/part-0-97b8f877-9e49-4af9-bd84-0f79399531fd.idx/index new file mode 100644 index 00000000000..f7659883d2b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/index/part-0-97b8f877-9e49-4af9-bd84-0f79399531fd.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/index/part-0-97b8f877-9e49-4af9-bd84-0f79399531fd.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/index/part-0-97b8f877-9e49-4af9-bd84-0f79399531fd.idx/metadata.json.gz new file mode 100644 index 00000000000..ac535f6bce6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/index/part-0-97b8f877-9e49-4af9-bd84-0f79399531fd.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/index/part-1-7a398a57-71ef-428b-9530-34db3da82a40.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/index/part-1-7a398a57-71ef-428b-9530-34db3da82a40.idx/index new file mode 100644 index 00000000000..e88b918ac96 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/index/part-1-7a398a57-71ef-428b-9530-34db3da82a40.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/index/part-1-7a398a57-71ef-428b-9530-34db3da82a40.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/index/part-1-7a398a57-71ef-428b-9530-34db3da82a40.idx/metadata.json.gz new file mode 100644 index 00000000000..9749d4971c6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/index/part-1-7a398a57-71ef-428b-9530-34db3da82a40.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/metadata.json.gz new file mode 100644 index 00000000000..acaa78da17e Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/README.txt new file mode 100644 index 00000000000..548e8c7ef06 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:25 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/metadata.json.gz new file mode 100644 index 00000000000..0eb70d8fecb Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/rows/metadata.json.gz new file mode 100644 index 00000000000..81a45085c73 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/rows/parts/part-0-97b8f877-9e49-4af9-bd84-0f79399531fd b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/rows/parts/part-0-97b8f877-9e49-4af9-bd84-0f79399531fd new file mode 100644 index 00000000000..479a5d5a334 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/rows/parts/part-0-97b8f877-9e49-4af9-bd84-0f79399531fd differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/rows/parts/part-1-7a398a57-71ef-428b-9530-34db3da82a40 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/rows/parts/part-1-7a398a57-71ef-428b-9530-34db3da82a40 new file mode 100644 index 00000000000..dd5c0c4dac7 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/6.hmt/rows/rows/parts/part-1-7a398a57-71ef-428b-9530-34db3da82a40 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/README.txt new file mode 100644 index 00000000000..223b6c48604 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:28 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/README.txt new file mode 100644 index 00000000000..bae8ff7c443 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:27 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/metadata.json.gz new file mode 100644 index 00000000000..551d387e23c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/rows/metadata.json.gz new file mode 100644 index 00000000000..40afe94d2f1 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/rows/parts/part-0 new file mode 100644 index 00000000000..abdf4a08c65 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/cols/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/README.txt new file mode 100644 index 00000000000..bae8ff7c443 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:27 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/metadata.json.gz new file mode 100644 index 00000000000..f5838df71df Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/rows/metadata.json.gz new file mode 100644 index 00000000000..d2d5c4c66b3 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/rows/parts/part-0-6db80f54-70f4-452c-b8d9-028a60362501 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/rows/parts/part-0-6db80f54-70f4-452c-b8d9-028a60362501 new file mode 100644 index 00000000000..b45cd15124b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/rows/parts/part-0-6db80f54-70f4-452c-b8d9-028a60362501 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/rows/parts/part-1-529d9770-0b5e-4c8f-a4ec-f21a6f9bd547 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/rows/parts/part-1-529d9770-0b5e-4c8f-a4ec-f21a6f9bd547 new file mode 100644 index 00000000000..e921c02e3f0 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/entries/rows/parts/part-1-529d9770-0b5e-4c8f-a4ec-f21a6f9bd547 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/README.txt new file mode 100644 index 00000000000..223b6c48604 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:28 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/globals/metadata.json.gz new file mode 100644 index 00000000000..369b04d91ac Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/globals/parts/part-0 new file mode 100644 index 00000000000..89e711531de Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/metadata.json.gz new file mode 100644 index 00000000000..bfcc49dabaa Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/rows/metadata.json.gz new file mode 100644 index 00000000000..70abf7ef2b9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/rows/parts/part-0 new file mode 100644 index 00000000000..a5eccd2d0d5 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/globals/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/index/part-0-6db80f54-70f4-452c-b8d9-028a60362501.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/index/part-0-6db80f54-70f4-452c-b8d9-028a60362501.idx/index new file mode 100644 index 00000000000..7548a26c700 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/index/part-0-6db80f54-70f4-452c-b8d9-028a60362501.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/index/part-0-6db80f54-70f4-452c-b8d9-028a60362501.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/index/part-0-6db80f54-70f4-452c-b8d9-028a60362501.idx/metadata.json.gz new file mode 100644 index 00000000000..80b4e358690 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/index/part-0-6db80f54-70f4-452c-b8d9-028a60362501.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/index/part-1-529d9770-0b5e-4c8f-a4ec-f21a6f9bd547.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/index/part-1-529d9770-0b5e-4c8f-a4ec-f21a6f9bd547.idx/index new file mode 100644 index 00000000000..e88b918ac96 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/index/part-1-529d9770-0b5e-4c8f-a4ec-f21a6f9bd547.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/index/part-1-529d9770-0b5e-4c8f-a4ec-f21a6f9bd547.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/index/part-1-529d9770-0b5e-4c8f-a4ec-f21a6f9bd547.idx/metadata.json.gz new file mode 100644 index 00000000000..9749d4971c6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/index/part-1-529d9770-0b5e-4c8f-a4ec-f21a6f9bd547.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/metadata.json.gz new file mode 100644 index 00000000000..acaa78da17e Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/README.txt new file mode 100644 index 00000000000..bae8ff7c443 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:27 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/metadata.json.gz new file mode 100644 index 00000000000..0eb70d8fecb Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/rows/metadata.json.gz new file mode 100644 index 00000000000..46a179bc240 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/rows/parts/part-0-6db80f54-70f4-452c-b8d9-028a60362501 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/rows/parts/part-0-6db80f54-70f4-452c-b8d9-028a60362501 new file mode 100644 index 00000000000..abdf4a08c65 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/rows/parts/part-0-6db80f54-70f4-452c-b8d9-028a60362501 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/rows/parts/part-1-529d9770-0b5e-4c8f-a4ec-f21a6f9bd547 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/rows/parts/part-1-529d9770-0b5e-4c8f-a4ec-f21a6f9bd547 new file mode 100644 index 00000000000..1f56756b9b3 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/7.hmt/rows/rows/parts/part-1-529d9770-0b5e-4c8f-a4ec-f21a6f9bd547 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/README.txt new file mode 100644 index 00000000000..bedbe9afdd8 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:30 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/README.txt new file mode 100644 index 00000000000..bedbe9afdd8 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:30 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/metadata.json.gz new file mode 100644 index 00000000000..551d387e23c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/rows/metadata.json.gz new file mode 100644 index 00000000000..c580a27b69f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/rows/parts/part-0 new file mode 100644 index 00000000000..00010b0d7db Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/cols/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/README.txt new file mode 100644 index 00000000000..4124afb94c3 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:29 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/metadata.json.gz new file mode 100644 index 00000000000..f5838df71df Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/rows/metadata.json.gz new file mode 100644 index 00000000000..bba603af271 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/rows/parts/part-0-1d5bd810-6f4f-4d54-9a80-081904f95b8d b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/rows/parts/part-0-1d5bd810-6f4f-4d54-9a80-081904f95b8d new file mode 100644 index 00000000000..6bef6750f8c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/rows/parts/part-0-1d5bd810-6f4f-4d54-9a80-081904f95b8d differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/rows/parts/part-1-275b9875-8f74-4400-84d1-3a2254d81d54 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/rows/parts/part-1-275b9875-8f74-4400-84d1-3a2254d81d54 new file mode 100644 index 00000000000..b7bbb385ec1 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/entries/rows/parts/part-1-275b9875-8f74-4400-84d1-3a2254d81d54 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/README.txt new file mode 100644 index 00000000000..bedbe9afdd8 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:30 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/globals/metadata.json.gz new file mode 100644 index 00000000000..f8785569531 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/globals/parts/part-0 new file mode 100644 index 00000000000..35a038769b1 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/metadata.json.gz new file mode 100644 index 00000000000..bfcc49dabaa Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/rows/metadata.json.gz new file mode 100644 index 00000000000..72cbb4d074d Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/rows/parts/part-0 new file mode 100644 index 00000000000..c413de7126f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/globals/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/index/part-0-1d5bd810-6f4f-4d54-9a80-081904f95b8d.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/index/part-0-1d5bd810-6f4f-4d54-9a80-081904f95b8d.idx/index new file mode 100644 index 00000000000..f7659883d2b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/index/part-0-1d5bd810-6f4f-4d54-9a80-081904f95b8d.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/index/part-0-1d5bd810-6f4f-4d54-9a80-081904f95b8d.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/index/part-0-1d5bd810-6f4f-4d54-9a80-081904f95b8d.idx/metadata.json.gz new file mode 100644 index 00000000000..ac535f6bce6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/index/part-0-1d5bd810-6f4f-4d54-9a80-081904f95b8d.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/index/part-1-275b9875-8f74-4400-84d1-3a2254d81d54.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/index/part-1-275b9875-8f74-4400-84d1-3a2254d81d54.idx/index new file mode 100644 index 00000000000..e88b918ac96 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/index/part-1-275b9875-8f74-4400-84d1-3a2254d81d54.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/index/part-1-275b9875-8f74-4400-84d1-3a2254d81d54.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/index/part-1-275b9875-8f74-4400-84d1-3a2254d81d54.idx/metadata.json.gz new file mode 100644 index 00000000000..9749d4971c6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/index/part-1-275b9875-8f74-4400-84d1-3a2254d81d54.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/metadata.json.gz new file mode 100644 index 00000000000..acaa78da17e Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/README.txt new file mode 100644 index 00000000000..bedbe9afdd8 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:30 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/metadata.json.gz new file mode 100644 index 00000000000..0eb70d8fecb Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/rows/metadata.json.gz new file mode 100644 index 00000000000..470617602be Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/rows/parts/part-0-1d5bd810-6f4f-4d54-9a80-081904f95b8d b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/rows/parts/part-0-1d5bd810-6f4f-4d54-9a80-081904f95b8d new file mode 100644 index 00000000000..00010b0d7db Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/rows/parts/part-0-1d5bd810-6f4f-4d54-9a80-081904f95b8d differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/rows/parts/part-1-275b9875-8f74-4400-84d1-3a2254d81d54 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/rows/parts/part-1-275b9875-8f74-4400-84d1-3a2254d81d54 new file mode 100644 index 00000000000..db6ba3f18ca Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/8.hmt/rows/rows/parts/part-1-275b9875-8f74-4400-84d1-3a2254d81d54 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/README.txt new file mode 100644 index 00000000000..817296c20b3 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:32 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/README.txt new file mode 100644 index 00000000000..817296c20b3 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:32 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/metadata.json.gz new file mode 100644 index 00000000000..551d387e23c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/rows/metadata.json.gz new file mode 100644 index 00000000000..e1d04f5717a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/rows/parts/part-0 new file mode 100644 index 00000000000..016ae0b6b87 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/cols/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/README.txt new file mode 100644 index 00000000000..817296c20b3 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:32 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/metadata.json.gz new file mode 100644 index 00000000000..f5838df71df Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/rows/metadata.json.gz new file mode 100644 index 00000000000..005b71b135b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/rows/parts/part-0-30bcc9ce-4797-427a-a9ba-bd0c688bddce b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/rows/parts/part-0-30bcc9ce-4797-427a-a9ba-bd0c688bddce new file mode 100644 index 00000000000..d910abe82aa Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/rows/parts/part-0-30bcc9ce-4797-427a-a9ba-bd0c688bddce differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/rows/parts/part-1-e0447c57-8794-4e14-8e4f-c996fb431427 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/rows/parts/part-1-e0447c57-8794-4e14-8e4f-c996fb431427 new file mode 100644 index 00000000000..2285ec73a51 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/entries/rows/parts/part-1-e0447c57-8794-4e14-8e4f-c996fb431427 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/README.txt new file mode 100644 index 00000000000..817296c20b3 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:32 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/globals/metadata.json.gz new file mode 100644 index 00000000000..7fc023d42ca Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/globals/parts/part-0 new file mode 100644 index 00000000000..35a038769b1 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/metadata.json.gz new file mode 100644 index 00000000000..bfcc49dabaa Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/rows/metadata.json.gz new file mode 100644 index 00000000000..b3c00f4e66c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/rows/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/rows/parts/part-0 new file mode 100644 index 00000000000..32c0f9d20bf Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/globals/rows/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/index/part-0-30bcc9ce-4797-427a-a9ba-bd0c688bddce.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/index/part-0-30bcc9ce-4797-427a-a9ba-bd0c688bddce.idx/index new file mode 100644 index 00000000000..7548a26c700 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/index/part-0-30bcc9ce-4797-427a-a9ba-bd0c688bddce.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/index/part-0-30bcc9ce-4797-427a-a9ba-bd0c688bddce.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/index/part-0-30bcc9ce-4797-427a-a9ba-bd0c688bddce.idx/metadata.json.gz new file mode 100644 index 00000000000..80b4e358690 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/index/part-0-30bcc9ce-4797-427a-a9ba-bd0c688bddce.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/index/part-1-e0447c57-8794-4e14-8e4f-c996fb431427.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/index/part-1-e0447c57-8794-4e14-8e4f-c996fb431427.idx/index new file mode 100644 index 00000000000..e88b918ac96 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/index/part-1-e0447c57-8794-4e14-8e4f-c996fb431427.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/index/part-1-e0447c57-8794-4e14-8e4f-c996fb431427.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/index/part-1-e0447c57-8794-4e14-8e4f-c996fb431427.idx/metadata.json.gz new file mode 100644 index 00000000000..9749d4971c6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/index/part-1-e0447c57-8794-4e14-8e4f-c996fb431427.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/metadata.json.gz new file mode 100644 index 00000000000..acaa78da17e Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/README.txt new file mode 100644 index 00000000000..817296c20b3 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:32 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/metadata.json.gz new file mode 100644 index 00000000000..0eb70d8fecb Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/rows/metadata.json.gz new file mode 100644 index 00000000000..b499e32ab9c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/rows/parts/part-0-30bcc9ce-4797-427a-a9ba-bd0c688bddce b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/rows/parts/part-0-30bcc9ce-4797-427a-a9ba-bd0c688bddce new file mode 100644 index 00000000000..016ae0b6b87 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/rows/parts/part-0-30bcc9ce-4797-427a-a9ba-bd0c688bddce differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/rows/parts/part-1-e0447c57-8794-4e14-8e4f-c996fb431427 b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/rows/parts/part-1-e0447c57-8794-4e14-8e4f-c996fb431427 new file mode 100644 index 00000000000..66115c06fbf Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/matrix_table/9.hmt/rows/rows/parts/part-1-e0447c57-8794-4e14-8e4f-c996fb431427 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/README.txt new file mode 100644 index 00000000000..aa7029394ab --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:08 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/globals/metadata.json.gz new file mode 100644 index 00000000000..cb7abf5a23f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/globals/parts/part-0 new file mode 100644 index 00000000000..74f256d2ed2 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-0-cfa52e60-fa23-4924-b4f9-288d0f067534.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-0-cfa52e60-fa23-4924-b4f9-288d0f067534.idx/index new file mode 100644 index 00000000000..e201511e539 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-0-cfa52e60-fa23-4924-b4f9-288d0f067534.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-0-cfa52e60-fa23-4924-b4f9-288d0f067534.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-0-cfa52e60-fa23-4924-b4f9-288d0f067534.idx/metadata.json.gz new file mode 100644 index 00000000000..727e94f0734 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-0-cfa52e60-fa23-4924-b4f9-288d0f067534.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-1-a94e1fc1-a2c4-4396-9bcc-d85841bddf8a.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-1-a94e1fc1-a2c4-4396-9bcc-d85841bddf8a.idx/index new file mode 100644 index 00000000000..5256d0558f9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-1-a94e1fc1-a2c4-4396-9bcc-d85841bddf8a.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-1-a94e1fc1-a2c4-4396-9bcc-d85841bddf8a.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-1-a94e1fc1-a2c4-4396-9bcc-d85841bddf8a.idx/metadata.json.gz new file mode 100644 index 00000000000..727e94f0734 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-1-a94e1fc1-a2c4-4396-9bcc-d85841bddf8a.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-2-e6f90f08-2108-4c94-8a21-bfbb8c03891a.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-2-e6f90f08-2108-4c94-8a21-bfbb8c03891a.idx/index new file mode 100644 index 00000000000..25df4c1ef3a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-2-e6f90f08-2108-4c94-8a21-bfbb8c03891a.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-2-e6f90f08-2108-4c94-8a21-bfbb8c03891a.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-2-e6f90f08-2108-4c94-8a21-bfbb8c03891a.idx/metadata.json.gz new file mode 100644 index 00000000000..513d0cf58ff Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/index/part-2-e6f90f08-2108-4c94-8a21-bfbb8c03891a.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/metadata.json.gz new file mode 100644 index 00000000000..2cf1a8b6a6b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/rows/metadata.json.gz new file mode 100644 index 00000000000..3d4d1c4fbb5 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/rows/parts/part-0-cfa52e60-fa23-4924-b4f9-288d0f067534 b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/rows/parts/part-0-cfa52e60-fa23-4924-b4f9-288d0f067534 new file mode 100644 index 00000000000..5bc03aad937 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/rows/parts/part-0-cfa52e60-fa23-4924-b4f9-288d0f067534 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/rows/parts/part-1-a94e1fc1-a2c4-4396-9bcc-d85841bddf8a b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/rows/parts/part-1-a94e1fc1-a2c4-4396-9bcc-d85841bddf8a new file mode 100644 index 00000000000..4e6dadb6bb4 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/rows/parts/part-1-a94e1fc1-a2c4-4396-9bcc-d85841bddf8a differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/rows/parts/part-2-e6f90f08-2108-4c94-8a21-bfbb8c03891a b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/rows/parts/part-2-e6f90f08-2108-4c94-8a21-bfbb8c03891a new file mode 100644 index 00000000000..45115d928e6 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/0.ht/rows/parts/part-2-e6f90f08-2108-4c94-8a21-bfbb8c03891a differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/README.txt new file mode 100644 index 00000000000..5d020396120 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:12 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/globals/metadata.json.gz new file mode 100644 index 00000000000..66ea919e545 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/globals/parts/part-0 new file mode 100644 index 00000000000..41e9baf4516 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-0-38e025d7-e556-4cda-8b54-918b7380c008.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-0-38e025d7-e556-4cda-8b54-918b7380c008.idx/index new file mode 100644 index 00000000000..2181d48e1f8 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-0-38e025d7-e556-4cda-8b54-918b7380c008.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-0-38e025d7-e556-4cda-8b54-918b7380c008.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-0-38e025d7-e556-4cda-8b54-918b7380c008.idx/metadata.json.gz new file mode 100644 index 00000000000..445ecbb4cc7 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-0-38e025d7-e556-4cda-8b54-918b7380c008.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-1-492a0a6e-2f83-4cdd-ad59-0de399eff081.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-1-492a0a6e-2f83-4cdd-ad59-0de399eff081.idx/index new file mode 100644 index 00000000000..d59dc8151f9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-1-492a0a6e-2f83-4cdd-ad59-0de399eff081.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-1-492a0a6e-2f83-4cdd-ad59-0de399eff081.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-1-492a0a6e-2f83-4cdd-ad59-0de399eff081.idx/metadata.json.gz new file mode 100644 index 00000000000..445ecbb4cc7 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-1-492a0a6e-2f83-4cdd-ad59-0de399eff081.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-2-323c15bf-a481-45da-9342-564749749d48.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-2-323c15bf-a481-45da-9342-564749749d48.idx/index new file mode 100644 index 00000000000..25df4c1ef3a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-2-323c15bf-a481-45da-9342-564749749d48.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-2-323c15bf-a481-45da-9342-564749749d48.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-2-323c15bf-a481-45da-9342-564749749d48.idx/metadata.json.gz new file mode 100644 index 00000000000..513d0cf58ff Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/index/part-2-323c15bf-a481-45da-9342-564749749d48.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/metadata.json.gz new file mode 100644 index 00000000000..2cf1a8b6a6b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/rows/metadata.json.gz new file mode 100644 index 00000000000..11f21e850b9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/rows/parts/part-0-38e025d7-e556-4cda-8b54-918b7380c008 b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/rows/parts/part-0-38e025d7-e556-4cda-8b54-918b7380c008 new file mode 100644 index 00000000000..0cba1ba4fe4 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/rows/parts/part-0-38e025d7-e556-4cda-8b54-918b7380c008 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/rows/parts/part-1-492a0a6e-2f83-4cdd-ad59-0de399eff081 b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/rows/parts/part-1-492a0a6e-2f83-4cdd-ad59-0de399eff081 new file mode 100644 index 00000000000..6721d1607a8 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/rows/parts/part-1-492a0a6e-2f83-4cdd-ad59-0de399eff081 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/rows/parts/part-2-323c15bf-a481-45da-9342-564749749d48 b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/rows/parts/part-2-323c15bf-a481-45da-9342-564749749d48 new file mode 100644 index 00000000000..5f9ed48b556 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/1.ht/rows/parts/part-2-323c15bf-a481-45da-9342-564749749d48 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/README.txt new file mode 100644 index 00000000000..d1ca86748d0 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:15 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/globals/metadata.json.gz new file mode 100644 index 00000000000..dba604385c3 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/globals/parts/part-0 new file mode 100644 index 00000000000..b787015fdbd Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-0-d4680a7e-18cd-4a4f-9a94-01c8c8621153.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-0-d4680a7e-18cd-4a4f-9a94-01c8c8621153.idx/index new file mode 100644 index 00000000000..e201511e539 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-0-d4680a7e-18cd-4a4f-9a94-01c8c8621153.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-0-d4680a7e-18cd-4a4f-9a94-01c8c8621153.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-0-d4680a7e-18cd-4a4f-9a94-01c8c8621153.idx/metadata.json.gz new file mode 100644 index 00000000000..727e94f0734 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-0-d4680a7e-18cd-4a4f-9a94-01c8c8621153.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-1-d82a8bfc-93c4-4d8b-a44e-6f048988dd64.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-1-d82a8bfc-93c4-4d8b-a44e-6f048988dd64.idx/index new file mode 100644 index 00000000000..5256d0558f9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-1-d82a8bfc-93c4-4d8b-a44e-6f048988dd64.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-1-d82a8bfc-93c4-4d8b-a44e-6f048988dd64.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-1-d82a8bfc-93c4-4d8b-a44e-6f048988dd64.idx/metadata.json.gz new file mode 100644 index 00000000000..727e94f0734 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-1-d82a8bfc-93c4-4d8b-a44e-6f048988dd64.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-2-a1835229-e1e3-4931-8b4b-8eff574c665a.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-2-a1835229-e1e3-4931-8b4b-8eff574c665a.idx/index new file mode 100644 index 00000000000..25df4c1ef3a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-2-a1835229-e1e3-4931-8b4b-8eff574c665a.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-2-a1835229-e1e3-4931-8b4b-8eff574c665a.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-2-a1835229-e1e3-4931-8b4b-8eff574c665a.idx/metadata.json.gz new file mode 100644 index 00000000000..513d0cf58ff Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/index/part-2-a1835229-e1e3-4931-8b4b-8eff574c665a.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/metadata.json.gz new file mode 100644 index 00000000000..2cf1a8b6a6b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/rows/metadata.json.gz new file mode 100644 index 00000000000..4cfa5f77528 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/rows/parts/part-0-d4680a7e-18cd-4a4f-9a94-01c8c8621153 b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/rows/parts/part-0-d4680a7e-18cd-4a4f-9a94-01c8c8621153 new file mode 100644 index 00000000000..cb534d6c12f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/rows/parts/part-0-d4680a7e-18cd-4a4f-9a94-01c8c8621153 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/rows/parts/part-1-d82a8bfc-93c4-4d8b-a44e-6f048988dd64 b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/rows/parts/part-1-d82a8bfc-93c4-4d8b-a44e-6f048988dd64 new file mode 100644 index 00000000000..3c776c236bc Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/rows/parts/part-1-d82a8bfc-93c4-4d8b-a44e-6f048988dd64 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/rows/parts/part-2-a1835229-e1e3-4931-8b4b-8eff574c665a b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/rows/parts/part-2-a1835229-e1e3-4931-8b4b-8eff574c665a new file mode 100644 index 00000000000..77b9c9cecd8 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/2.ht/rows/parts/part-2-a1835229-e1e3-4931-8b4b-8eff574c665a differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/README.txt new file mode 100644 index 00000000000..48b1dda6833 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:17 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/globals/metadata.json.gz new file mode 100644 index 00000000000..3964597b6ad Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/globals/parts/part-0 new file mode 100644 index 00000000000..02bc4388516 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-0-af976401-4931-4cef-b4d7-d0c06f766250.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-0-af976401-4931-4cef-b4d7-d0c06f766250.idx/index new file mode 100644 index 00000000000..2181d48e1f8 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-0-af976401-4931-4cef-b4d7-d0c06f766250.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-0-af976401-4931-4cef-b4d7-d0c06f766250.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-0-af976401-4931-4cef-b4d7-d0c06f766250.idx/metadata.json.gz new file mode 100644 index 00000000000..445ecbb4cc7 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-0-af976401-4931-4cef-b4d7-d0c06f766250.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-1-fd44f1af-8c94-48c5-9ca0-827741096d1c.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-1-fd44f1af-8c94-48c5-9ca0-827741096d1c.idx/index new file mode 100644 index 00000000000..d59dc8151f9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-1-fd44f1af-8c94-48c5-9ca0-827741096d1c.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-1-fd44f1af-8c94-48c5-9ca0-827741096d1c.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-1-fd44f1af-8c94-48c5-9ca0-827741096d1c.idx/metadata.json.gz new file mode 100644 index 00000000000..445ecbb4cc7 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-1-fd44f1af-8c94-48c5-9ca0-827741096d1c.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-2-24ec7f66-7e14-4ede-a049-ae384388fc90.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-2-24ec7f66-7e14-4ede-a049-ae384388fc90.idx/index new file mode 100644 index 00000000000..25df4c1ef3a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-2-24ec7f66-7e14-4ede-a049-ae384388fc90.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-2-24ec7f66-7e14-4ede-a049-ae384388fc90.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-2-24ec7f66-7e14-4ede-a049-ae384388fc90.idx/metadata.json.gz new file mode 100644 index 00000000000..513d0cf58ff Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/index/part-2-24ec7f66-7e14-4ede-a049-ae384388fc90.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/metadata.json.gz new file mode 100644 index 00000000000..2cf1a8b6a6b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/rows/metadata.json.gz new file mode 100644 index 00000000000..07e7dd43e53 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/rows/parts/part-0-af976401-4931-4cef-b4d7-d0c06f766250 b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/rows/parts/part-0-af976401-4931-4cef-b4d7-d0c06f766250 new file mode 100644 index 00000000000..2e1ca001ffe Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/rows/parts/part-0-af976401-4931-4cef-b4d7-d0c06f766250 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/rows/parts/part-1-fd44f1af-8c94-48c5-9ca0-827741096d1c b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/rows/parts/part-1-fd44f1af-8c94-48c5-9ca0-827741096d1c new file mode 100644 index 00000000000..c6c8abb59e7 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/rows/parts/part-1-fd44f1af-8c94-48c5-9ca0-827741096d1c differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/rows/parts/part-2-24ec7f66-7e14-4ede-a049-ae384388fc90 b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/rows/parts/part-2-24ec7f66-7e14-4ede-a049-ae384388fc90 new file mode 100644 index 00000000000..1b8bb810c14 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/3.ht/rows/parts/part-2-24ec7f66-7e14-4ede-a049-ae384388fc90 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/README.txt new file mode 100644 index 00000000000..87c6f15dd93 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:20 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/globals/metadata.json.gz new file mode 100644 index 00000000000..9118491072f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/globals/parts/part-0 new file mode 100644 index 00000000000..45927693ec9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-0-abbf24cf-7a44-40b5-a096-d0491a56424b.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-0-abbf24cf-7a44-40b5-a096-d0491a56424b.idx/index new file mode 100644 index 00000000000..e201511e539 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-0-abbf24cf-7a44-40b5-a096-d0491a56424b.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-0-abbf24cf-7a44-40b5-a096-d0491a56424b.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-0-abbf24cf-7a44-40b5-a096-d0491a56424b.idx/metadata.json.gz new file mode 100644 index 00000000000..727e94f0734 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-0-abbf24cf-7a44-40b5-a096-d0491a56424b.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-1-83d59d12-ccd1-455f-974f-fe70d6934c51.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-1-83d59d12-ccd1-455f-974f-fe70d6934c51.idx/index new file mode 100644 index 00000000000..5256d0558f9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-1-83d59d12-ccd1-455f-974f-fe70d6934c51.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-1-83d59d12-ccd1-455f-974f-fe70d6934c51.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-1-83d59d12-ccd1-455f-974f-fe70d6934c51.idx/metadata.json.gz new file mode 100644 index 00000000000..727e94f0734 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-1-83d59d12-ccd1-455f-974f-fe70d6934c51.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-2-d6dc7da5-7778-4b44-baa5-ca8f20faa6b7.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-2-d6dc7da5-7778-4b44-baa5-ca8f20faa6b7.idx/index new file mode 100644 index 00000000000..25df4c1ef3a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-2-d6dc7da5-7778-4b44-baa5-ca8f20faa6b7.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-2-d6dc7da5-7778-4b44-baa5-ca8f20faa6b7.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-2-d6dc7da5-7778-4b44-baa5-ca8f20faa6b7.idx/metadata.json.gz new file mode 100644 index 00000000000..513d0cf58ff Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/index/part-2-d6dc7da5-7778-4b44-baa5-ca8f20faa6b7.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/metadata.json.gz new file mode 100644 index 00000000000..2cf1a8b6a6b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/rows/metadata.json.gz new file mode 100644 index 00000000000..4d19d090874 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/rows/parts/part-0-abbf24cf-7a44-40b5-a096-d0491a56424b b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/rows/parts/part-0-abbf24cf-7a44-40b5-a096-d0491a56424b new file mode 100644 index 00000000000..9887da33bf8 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/rows/parts/part-0-abbf24cf-7a44-40b5-a096-d0491a56424b differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/rows/parts/part-1-83d59d12-ccd1-455f-974f-fe70d6934c51 b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/rows/parts/part-1-83d59d12-ccd1-455f-974f-fe70d6934c51 new file mode 100644 index 00000000000..8005ae71613 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/rows/parts/part-1-83d59d12-ccd1-455f-974f-fe70d6934c51 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/rows/parts/part-2-d6dc7da5-7778-4b44-baa5-ca8f20faa6b7 b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/rows/parts/part-2-d6dc7da5-7778-4b44-baa5-ca8f20faa6b7 new file mode 100644 index 00000000000..266f13f2efa Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/4.ht/rows/parts/part-2-d6dc7da5-7778-4b44-baa5-ca8f20faa6b7 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/README.txt new file mode 100644 index 00000000000..4945f1d007f --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:22 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/globals/metadata.json.gz new file mode 100644 index 00000000000..0fcb4a0f9f3 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/globals/parts/part-0 new file mode 100644 index 00000000000..fdff58a346a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-0-599f90e8-0a8e-48bb-9b07-af3f1caedf7c.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-0-599f90e8-0a8e-48bb-9b07-af3f1caedf7c.idx/index new file mode 100644 index 00000000000..2181d48e1f8 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-0-599f90e8-0a8e-48bb-9b07-af3f1caedf7c.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-0-599f90e8-0a8e-48bb-9b07-af3f1caedf7c.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-0-599f90e8-0a8e-48bb-9b07-af3f1caedf7c.idx/metadata.json.gz new file mode 100644 index 00000000000..445ecbb4cc7 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-0-599f90e8-0a8e-48bb-9b07-af3f1caedf7c.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-1-dc67f12b-0b53-4acb-819a-d4ac50382449.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-1-dc67f12b-0b53-4acb-819a-d4ac50382449.idx/index new file mode 100644 index 00000000000..d59dc8151f9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-1-dc67f12b-0b53-4acb-819a-d4ac50382449.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-1-dc67f12b-0b53-4acb-819a-d4ac50382449.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-1-dc67f12b-0b53-4acb-819a-d4ac50382449.idx/metadata.json.gz new file mode 100644 index 00000000000..445ecbb4cc7 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-1-dc67f12b-0b53-4acb-819a-d4ac50382449.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-2-c8c0c1a5-e48f-4fc1-9ac1-c6a7551085d4.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-2-c8c0c1a5-e48f-4fc1-9ac1-c6a7551085d4.idx/index new file mode 100644 index 00000000000..25df4c1ef3a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-2-c8c0c1a5-e48f-4fc1-9ac1-c6a7551085d4.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-2-c8c0c1a5-e48f-4fc1-9ac1-c6a7551085d4.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-2-c8c0c1a5-e48f-4fc1-9ac1-c6a7551085d4.idx/metadata.json.gz new file mode 100644 index 00000000000..513d0cf58ff Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/index/part-2-c8c0c1a5-e48f-4fc1-9ac1-c6a7551085d4.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/metadata.json.gz new file mode 100644 index 00000000000..2cf1a8b6a6b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/rows/metadata.json.gz new file mode 100644 index 00000000000..29c339fb105 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/rows/parts/part-0-599f90e8-0a8e-48bb-9b07-af3f1caedf7c b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/rows/parts/part-0-599f90e8-0a8e-48bb-9b07-af3f1caedf7c new file mode 100644 index 00000000000..095f05058fd Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/rows/parts/part-0-599f90e8-0a8e-48bb-9b07-af3f1caedf7c differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/rows/parts/part-1-dc67f12b-0b53-4acb-819a-d4ac50382449 b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/rows/parts/part-1-dc67f12b-0b53-4acb-819a-d4ac50382449 new file mode 100644 index 00000000000..07dc9a3e071 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/rows/parts/part-1-dc67f12b-0b53-4acb-819a-d4ac50382449 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/rows/parts/part-2-c8c0c1a5-e48f-4fc1-9ac1-c6a7551085d4 b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/rows/parts/part-2-c8c0c1a5-e48f-4fc1-9ac1-c6a7551085d4 new file mode 100644 index 00000000000..a0c86c32b9f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/5.ht/rows/parts/part-2-c8c0c1a5-e48f-4fc1-9ac1-c6a7551085d4 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/README.txt new file mode 100644 index 00000000000..43f50902189 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:24 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/globals/metadata.json.gz new file mode 100644 index 00000000000..c036b89f711 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/globals/parts/part-0 new file mode 100644 index 00000000000..6fed4af7806 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-0-0fd909e1-e54d-46a5-8c7d-fce566ec2aa5.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-0-0fd909e1-e54d-46a5-8c7d-fce566ec2aa5.idx/index new file mode 100644 index 00000000000..e201511e539 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-0-0fd909e1-e54d-46a5-8c7d-fce566ec2aa5.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-0-0fd909e1-e54d-46a5-8c7d-fce566ec2aa5.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-0-0fd909e1-e54d-46a5-8c7d-fce566ec2aa5.idx/metadata.json.gz new file mode 100644 index 00000000000..727e94f0734 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-0-0fd909e1-e54d-46a5-8c7d-fce566ec2aa5.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-1-c8413971-f78a-4a31-a742-2c46a9fcdaf8.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-1-c8413971-f78a-4a31-a742-2c46a9fcdaf8.idx/index new file mode 100644 index 00000000000..5256d0558f9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-1-c8413971-f78a-4a31-a742-2c46a9fcdaf8.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-1-c8413971-f78a-4a31-a742-2c46a9fcdaf8.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-1-c8413971-f78a-4a31-a742-2c46a9fcdaf8.idx/metadata.json.gz new file mode 100644 index 00000000000..727e94f0734 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-1-c8413971-f78a-4a31-a742-2c46a9fcdaf8.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-2-7bd9039c-3649-4042-bc9c-f3a0cd170dc8.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-2-7bd9039c-3649-4042-bc9c-f3a0cd170dc8.idx/index new file mode 100644 index 00000000000..25df4c1ef3a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-2-7bd9039c-3649-4042-bc9c-f3a0cd170dc8.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-2-7bd9039c-3649-4042-bc9c-f3a0cd170dc8.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-2-7bd9039c-3649-4042-bc9c-f3a0cd170dc8.idx/metadata.json.gz new file mode 100644 index 00000000000..513d0cf58ff Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/index/part-2-7bd9039c-3649-4042-bc9c-f3a0cd170dc8.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/metadata.json.gz new file mode 100644 index 00000000000..2cf1a8b6a6b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/rows/metadata.json.gz new file mode 100644 index 00000000000..a55f8dd3360 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/rows/parts/part-0-0fd909e1-e54d-46a5-8c7d-fce566ec2aa5 b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/rows/parts/part-0-0fd909e1-e54d-46a5-8c7d-fce566ec2aa5 new file mode 100644 index 00000000000..479a5d5a334 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/rows/parts/part-0-0fd909e1-e54d-46a5-8c7d-fce566ec2aa5 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/rows/parts/part-1-c8413971-f78a-4a31-a742-2c46a9fcdaf8 b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/rows/parts/part-1-c8413971-f78a-4a31-a742-2c46a9fcdaf8 new file mode 100644 index 00000000000..f77a83ee032 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/rows/parts/part-1-c8413971-f78a-4a31-a742-2c46a9fcdaf8 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/rows/parts/part-2-7bd9039c-3649-4042-bc9c-f3a0cd170dc8 b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/rows/parts/part-2-7bd9039c-3649-4042-bc9c-f3a0cd170dc8 new file mode 100644 index 00000000000..7163477ca24 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/6.ht/rows/parts/part-2-7bd9039c-3649-4042-bc9c-f3a0cd170dc8 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/README.txt new file mode 100644 index 00000000000..ebcd2a1f772 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:26 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/globals/metadata.json.gz new file mode 100644 index 00000000000..70abf7ef2b9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/globals/parts/part-0 new file mode 100644 index 00000000000..a5eccd2d0d5 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-0-276be97d-2a08-4f3e-b10c-b823a1669902.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-0-276be97d-2a08-4f3e-b10c-b823a1669902.idx/index new file mode 100644 index 00000000000..2181d48e1f8 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-0-276be97d-2a08-4f3e-b10c-b823a1669902.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-0-276be97d-2a08-4f3e-b10c-b823a1669902.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-0-276be97d-2a08-4f3e-b10c-b823a1669902.idx/metadata.json.gz new file mode 100644 index 00000000000..445ecbb4cc7 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-0-276be97d-2a08-4f3e-b10c-b823a1669902.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-1-d2edbf0e-5390-4cc4-9d6b-654b3835985a.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-1-d2edbf0e-5390-4cc4-9d6b-654b3835985a.idx/index new file mode 100644 index 00000000000..d59dc8151f9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-1-d2edbf0e-5390-4cc4-9d6b-654b3835985a.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-1-d2edbf0e-5390-4cc4-9d6b-654b3835985a.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-1-d2edbf0e-5390-4cc4-9d6b-654b3835985a.idx/metadata.json.gz new file mode 100644 index 00000000000..445ecbb4cc7 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-1-d2edbf0e-5390-4cc4-9d6b-654b3835985a.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-2-524dd39c-d53a-452b-91ae-e5e417aa6bb8.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-2-524dd39c-d53a-452b-91ae-e5e417aa6bb8.idx/index new file mode 100644 index 00000000000..25df4c1ef3a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-2-524dd39c-d53a-452b-91ae-e5e417aa6bb8.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-2-524dd39c-d53a-452b-91ae-e5e417aa6bb8.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-2-524dd39c-d53a-452b-91ae-e5e417aa6bb8.idx/metadata.json.gz new file mode 100644 index 00000000000..513d0cf58ff Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/index/part-2-524dd39c-d53a-452b-91ae-e5e417aa6bb8.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/metadata.json.gz new file mode 100644 index 00000000000..2cf1a8b6a6b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/rows/metadata.json.gz new file mode 100644 index 00000000000..95ba2f074c8 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/rows/parts/part-0-276be97d-2a08-4f3e-b10c-b823a1669902 b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/rows/parts/part-0-276be97d-2a08-4f3e-b10c-b823a1669902 new file mode 100644 index 00000000000..abdf4a08c65 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/rows/parts/part-0-276be97d-2a08-4f3e-b10c-b823a1669902 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/rows/parts/part-1-d2edbf0e-5390-4cc4-9d6b-654b3835985a b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/rows/parts/part-1-d2edbf0e-5390-4cc4-9d6b-654b3835985a new file mode 100644 index 00000000000..eed983c9288 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/rows/parts/part-1-d2edbf0e-5390-4cc4-9d6b-654b3835985a differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/rows/parts/part-2-524dd39c-d53a-452b-91ae-e5e417aa6bb8 b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/rows/parts/part-2-524dd39c-d53a-452b-91ae-e5e417aa6bb8 new file mode 100644 index 00000000000..2e04130a12e Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/7.ht/rows/parts/part-2-524dd39c-d53a-452b-91ae-e5e417aa6bb8 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/README.txt new file mode 100644 index 00000000000..223b6c48604 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:28 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/globals/metadata.json.gz new file mode 100644 index 00000000000..72cbb4d074d Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/globals/parts/part-0 new file mode 100644 index 00000000000..c413de7126f Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-0-1e4a35b4-85b5-43a6-8c61-563d502e760b.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-0-1e4a35b4-85b5-43a6-8c61-563d502e760b.idx/index new file mode 100644 index 00000000000..e201511e539 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-0-1e4a35b4-85b5-43a6-8c61-563d502e760b.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-0-1e4a35b4-85b5-43a6-8c61-563d502e760b.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-0-1e4a35b4-85b5-43a6-8c61-563d502e760b.idx/metadata.json.gz new file mode 100644 index 00000000000..727e94f0734 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-0-1e4a35b4-85b5-43a6-8c61-563d502e760b.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-1-1f07d309-0433-4b1f-9ccf-2298ea84bc85.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-1-1f07d309-0433-4b1f-9ccf-2298ea84bc85.idx/index new file mode 100644 index 00000000000..5256d0558f9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-1-1f07d309-0433-4b1f-9ccf-2298ea84bc85.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-1-1f07d309-0433-4b1f-9ccf-2298ea84bc85.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-1-1f07d309-0433-4b1f-9ccf-2298ea84bc85.idx/metadata.json.gz new file mode 100644 index 00000000000..727e94f0734 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-1-1f07d309-0433-4b1f-9ccf-2298ea84bc85.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-2-f8467099-6fa9-49b3-ad85-04c97ab02efb.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-2-f8467099-6fa9-49b3-ad85-04c97ab02efb.idx/index new file mode 100644 index 00000000000..25df4c1ef3a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-2-f8467099-6fa9-49b3-ad85-04c97ab02efb.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-2-f8467099-6fa9-49b3-ad85-04c97ab02efb.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-2-f8467099-6fa9-49b3-ad85-04c97ab02efb.idx/metadata.json.gz new file mode 100644 index 00000000000..513d0cf58ff Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/index/part-2-f8467099-6fa9-49b3-ad85-04c97ab02efb.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/metadata.json.gz new file mode 100644 index 00000000000..2cf1a8b6a6b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/rows/metadata.json.gz new file mode 100644 index 00000000000..8b6e4835240 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/rows/parts/part-0-1e4a35b4-85b5-43a6-8c61-563d502e760b b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/rows/parts/part-0-1e4a35b4-85b5-43a6-8c61-563d502e760b new file mode 100644 index 00000000000..00010b0d7db Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/rows/parts/part-0-1e4a35b4-85b5-43a6-8c61-563d502e760b differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/rows/parts/part-1-1f07d309-0433-4b1f-9ccf-2298ea84bc85 b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/rows/parts/part-1-1f07d309-0433-4b1f-9ccf-2298ea84bc85 new file mode 100644 index 00000000000..cba8ebfc698 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/rows/parts/part-1-1f07d309-0433-4b1f-9ccf-2298ea84bc85 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/rows/parts/part-2-f8467099-6fa9-49b3-ad85-04c97ab02efb b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/rows/parts/part-2-f8467099-6fa9-49b3-ad85-04c97ab02efb new file mode 100644 index 00000000000..ca07a9a8e15 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/8.ht/rows/parts/part-2-f8467099-6fa9-49b3-ad85-04c97ab02efb differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/README.txt b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/README.txt new file mode 100644 index 00000000000..ca775864606 --- /dev/null +++ b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/README.txt @@ -0,0 +1,3 @@ +This folder comprises a Hail (www.hail.is) native Table or MatrixTable. + Written with version 0.2.115-cdba2731749a + Created at 2023/05/08 11:07:31 \ No newline at end of file diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/_SUCCESS b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/_SUCCESS new file mode 100644 index 00000000000..e69de29bb2d diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/globals/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/globals/metadata.json.gz new file mode 100644 index 00000000000..b3c00f4e66c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/globals/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/globals/parts/part-0 b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/globals/parts/part-0 new file mode 100644 index 00000000000..32c0f9d20bf Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/globals/parts/part-0 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-0-232710bd-c6bc-410d-ac01-a5ee5565ab19.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-0-232710bd-c6bc-410d-ac01-a5ee5565ab19.idx/index new file mode 100644 index 00000000000..2181d48e1f8 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-0-232710bd-c6bc-410d-ac01-a5ee5565ab19.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-0-232710bd-c6bc-410d-ac01-a5ee5565ab19.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-0-232710bd-c6bc-410d-ac01-a5ee5565ab19.idx/metadata.json.gz new file mode 100644 index 00000000000..445ecbb4cc7 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-0-232710bd-c6bc-410d-ac01-a5ee5565ab19.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-1-bc282db3-2080-42a3-87b4-4f31fc2fb9b4.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-1-bc282db3-2080-42a3-87b4-4f31fc2fb9b4.idx/index new file mode 100644 index 00000000000..d59dc8151f9 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-1-bc282db3-2080-42a3-87b4-4f31fc2fb9b4.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-1-bc282db3-2080-42a3-87b4-4f31fc2fb9b4.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-1-bc282db3-2080-42a3-87b4-4f31fc2fb9b4.idx/metadata.json.gz new file mode 100644 index 00000000000..445ecbb4cc7 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-1-bc282db3-2080-42a3-87b4-4f31fc2fb9b4.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-2-20e4b7f5-0c77-4482-8dc6-8ca428297f79.idx/index b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-2-20e4b7f5-0c77-4482-8dc6-8ca428297f79.idx/index new file mode 100644 index 00000000000..25df4c1ef3a Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-2-20e4b7f5-0c77-4482-8dc6-8ca428297f79.idx/index differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-2-20e4b7f5-0c77-4482-8dc6-8ca428297f79.idx/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-2-20e4b7f5-0c77-4482-8dc6-8ca428297f79.idx/metadata.json.gz new file mode 100644 index 00000000000..513d0cf58ff Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/index/part-2-20e4b7f5-0c77-4482-8dc6-8ca428297f79.idx/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/metadata.json.gz new file mode 100644 index 00000000000..2cf1a8b6a6b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/rows/metadata.json.gz b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/rows/metadata.json.gz new file mode 100644 index 00000000000..622a0daa90b Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/rows/metadata.json.gz differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/rows/parts/part-0-232710bd-c6bc-410d-ac01-a5ee5565ab19 b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/rows/parts/part-0-232710bd-c6bc-410d-ac01-a5ee5565ab19 new file mode 100644 index 00000000000..016ae0b6b87 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/rows/parts/part-0-232710bd-c6bc-410d-ac01-a5ee5565ab19 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/rows/parts/part-1-bc282db3-2080-42a3-87b4-4f31fc2fb9b4 b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/rows/parts/part-1-bc282db3-2080-42a3-87b4-4f31fc2fb9b4 new file mode 100644 index 00000000000..ac2e6dd833c Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/rows/parts/part-1-bc282db3-2080-42a3-87b4-4f31fc2fb9b4 differ diff --git a/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/rows/parts/part-2-20e4b7f5-0c77-4482-8dc6-8ca428297f79 b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/rows/parts/part-2-20e4b7f5-0c77-4482-8dc6-8ca428297f79 new file mode 100644 index 00000000000..0f945857900 Binary files /dev/null and b/hail/src/test/resources/backward_compatability/1.7.0/table/9.ht/rows/parts/part-2-20e4b7f5-0c77-4482-8dc6-8ca428297f79 differ diff --git a/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala b/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala index 3f107b33217..a5638983080 100644 --- a/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/Aggregators2Suite.scala @@ -33,7 +33,7 @@ class Aggregators2Suite extends HailSuite { val argT = PType.canonical(TStruct(args.map { case (n, (typ, _)) => n -> typ }: _*)).setRequired(true).asInstanceOf[PStruct] val argVs = Row.fromSeq(args.map { case (_, (_, v)) => v }) val argRef = Ref(genUID(), argT.virtualType) - val spec = BufferSpec.defaultUncompressed + val spec = BufferSpec.wireSpec val (_, combAndDuplicate) = CompileWithAggregators[AsmFunction1RegionUnit](ctx, Array.fill(nPartitions)(aggSig.state), diff --git a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala index 989a075917f..b5b2de2250b 100644 --- a/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala +++ b/hail/src/test/scala/is/hail/expr/ir/IRSuite.scala @@ -3411,7 +3411,7 @@ class IRSuite extends HailSuite { def testReadWriteValues(pt: SingleCodeType, value: Any): Unit = { implicit val execStrats = ExecStrategy.compileOnly val node = In(0, SingleCodeEmitParamType(true, pt)) - val spec = TypedCodecSpec(PType.canonical(node.typ), BufferSpec.defaultUncompressed) + val spec = TypedCodecSpec(PType.canonical(node.typ), BufferSpec.blockedUncompressed) val writer = ETypeValueWriter(spec) val reader = ETypeValueReader(spec) val prefix = ctx.createTmpPath("test-read-write-values") @@ -3425,7 +3425,7 @@ class IRSuite extends HailSuite { def testReadWriteValueDistributed(pt: SingleCodeType, value: Any): Unit = { implicit val execStrats = ExecStrategy.compileOnly val node = In(0, SingleCodeEmitParamType(true, pt)) - val spec = TypedCodecSpec(PType.canonical(node.typ), BufferSpec.defaultUncompressed) + val spec = TypedCodecSpec(PType.canonical(node.typ), BufferSpec.blockedUncompressed) val writer = ETypeValueWriter(spec) val reader = ETypeValueReader(spec) val prefix = ctx.createTmpPath("test-read-write-value-dist") diff --git a/hail/src/test/scala/is/hail/fs/FSSuite.scala b/hail/src/test/scala/is/hail/fs/FSSuite.scala index 81458a6bd7a..b27d62581fd 100644 --- a/hail/src/test/scala/is/hail/fs/FSSuite.scala +++ b/hail/src/test/scala/is/hail/fs/FSSuite.scala @@ -162,6 +162,24 @@ trait FSSuite extends TestNGSuite { s"${statuses} ${pathsRelResourcesRoot(statuses)} ${Set("/a", "/adir", "/az")}") } + @Test def testGlobFilenameMatchSingleCharacter(): Unit = { + val statuses = fs.glob(r("/a?")) + assert(pathsRelResourcesRoot(statuses) == Set("/az"), + s"${statuses} ${pathsRelResourcesRoot(statuses)} ${Set("/az")}") + } + + @Test def testGlobFilenameMatchSingleCharacterInMiddleOfName(): Unit = { + val statuses = fs.glob(r("/a?ir")) + assert(pathsRelResourcesRoot(statuses) == Set("/adir"), + s"${statuses} ${pathsRelResourcesRoot(statuses)} ${Set("/adir")}") + } + + @Test def testGlobDirnameMatchSingleCharacterInMiddleOfName(): Unit = { + val statuses = fs.glob(r("/a?ir/x")) + assert(pathsRelResourcesRoot(statuses) == Set("/adir/x"), + s"${statuses} ${pathsRelResourcesRoot(statuses)} ${Set("/adir/x")}") + } + @Test def testGlobMatchDir(): Unit = { val statuses = fs.glob(r("/*dir/x")) assert(pathsRelResourcesRoot(statuses) == Set("/adir/x", "/dir/x"), diff --git a/hail/src/test/scala/is/hail/variant/vsm/PartitioningSuite.scala b/hail/src/test/scala/is/hail/variant/vsm/PartitioningSuite.scala index 1ea109657cb..541aed89e76 100644 --- a/hail/src/test/scala/is/hail/variant/vsm/PartitioningSuite.scala +++ b/hail/src/test/scala/is/hail/variant/vsm/PartitioningSuite.scala @@ -32,19 +32,4 @@ class PartitioningSuite extends HailSuite { ctx, optimize = false) .rvd.count() } - - @Test def testEmptyRDDOrderedJoin() { - val tv = Interpret.apply(TableRange(100, 6), ctx) - - val nonEmptyRVD = tv.rvd - val rvdType = nonEmptyRVD.typ - - ExecuteContext.scoped() { ctx => - val emptyRVD = RVD.empty(ctx, rvdType) - emptyRVD.orderedJoin(nonEmptyRVD, "left", (_, it) => it.map(_._1), rvdType, ctx).count() - emptyRVD.orderedJoin(nonEmptyRVD, "inner", (_, it) => it.map(_._1), rvdType, ctx).count() - nonEmptyRVD.orderedJoin(emptyRVD, "left", (_, it) => it.map(_._1), rvdType, ctx).count() - nonEmptyRVD.orderedJoin(emptyRVD, "inner", (_, it) => it.map(_._1), rvdType, ctx).count() - } - } } diff --git a/infra/gcp-broad/main.tf b/infra/gcp-broad/main.tf index f1bd40a3e4a..cc36c4d550c 100644 --- a/infra/gcp-broad/main.tf +++ b/infra/gcp-broad/main.tf @@ -174,10 +174,6 @@ resource "google_container_cluster" "vdc" { } } - workload_identity_config { - workload_pool = "hail-vdc.svc.id.goog" - } - timeouts {} } @@ -226,10 +222,6 @@ resource "google_container_node_pool" "vdc_preemptible_pool" { enable_integrity_monitoring = true enable_secure_boot = false } - - workload_metadata_config { - mode = "GKE_METADATA" - } } timeouts {} @@ -285,10 +277,6 @@ resource "google_container_node_pool" "vdc_nonpreemptible_pool" { enable_integrity_monitoring = true enable_secure_boot = false } - - workload_metadata_config { - mode = "GKE_METADATA" - } } timeouts {} @@ -529,6 +517,18 @@ resource "google_storage_bucket_iam_member" "test_bucket_admin" { member = "serviceAccount:${module.test_gsa_secret.email}" } +module "test_dev_gsa_secret" { + source = "./gsa" + name = "test-dev" + project = var.gcp_project +} + +resource "google_storage_bucket_iam_member" "test_dev_bucket_admin" { + bucket = google_storage_bucket.hail_test_bucket.name + role = "roles/storage.admin" + member = "serviceAccount:${module.test_dev_gsa_secret.email}" +} + resource "google_service_account" "batch_agent" { description = "Delete instances and pull images" display_name = "batch2-agent" diff --git a/notebook/Makefile b/notebook/Makefile index a4057d45ecd..53a9feb9448 100644 --- a/notebook/Makefile +++ b/notebook/Makefile @@ -8,8 +8,8 @@ build-notebook: .PHONY: build-nginx build-nginx: - $(MAKE) -C ../docker hail-ubuntu - python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../docker/hail-ubuntu-image-ref)'"}}' Dockerfile.nginx Dockerfile.nginx.out + $(MAKE) -C .. hail-ubuntu-image + python3 ../ci/jinja2_render.py '{"hail_ubuntu_image":{"image":"'$$(cat ../hail-ubuntu-image)'"}}' Dockerfile.nginx Dockerfile.nginx.out python3 ../ci/jinja2_render.py '{"deploy": $(DEPLOY), "default_ns": {"name": "$(NAMESPACE)"}}' nginx.conf nginx.conf.out ../docker-build.sh . Dockerfile.nginx.out $(NOTEBOOK_NGINX_IMAGE) diff --git a/tls/config.yaml b/tls/config.yaml index 3670d8ba0bc..0d8f751231a 100644 --- a/tls/config.yaml +++ b/tls/config.yaml @@ -56,34 +56,14 @@ principals: - internal-gateway unmanged: True kind: nginx -- name: batch-tests - domains: - - batch-tests - kind: json -- name: ci-tests - domains: - - ci-tests - kind: json - name: memory domains: - memory kind: json -- name: memory-tests - domains: - - memory-tests - kind: json -- name: services-java-tests - domains: - - services-java-tests - kind: json - name: monitoring domains: - monitoring kind: json -- name: monitoring-tests - domains: - - monitoring-tests - kind: json - name: batch-user-code domains: - batch-user-code diff --git a/tls/create_test_db_config.sh b/tls/create_test_db_config.sh index b5bde7a2e0c..31d5b04ec26 100644 --- a/tls/create_test_db_config.sh +++ b/tls/create_test_db_config.sh @@ -29,6 +29,7 @@ cd $dir # Create the MySQL server CA openssl req -new -x509 \ -subj /CN=db-root -nodes -newkey rsa:4096 \ + -days 365 \ -keyout server-ca-key.pem -out server-ca.pem create_key_and_cert server diff --git a/website/Makefile b/website/Makefile index a62f29b543b..303b625d7f0 100644 --- a/website/Makefile +++ b/website/Makefile @@ -17,7 +17,6 @@ run-docker: build deploy: build ! [ -z $(NAMESPACE) ] # call this like: make deploy NAMESPACE=default - $(MAKE) -C ../docker hail-ubuntu python3 ../ci/jinja2_render.py '{"default_ns":{"name":"$(NAMESPACE)"},"code":{"sha":"$(shell git rev-parse --short=12 HEAD)"},"deploy":$(DEPLOY),"scope":"$(SCOPE)","website_image":{"image":"$(WEBSITE_IMAGE)"}}' deployment.yaml deployment.yaml.out kubectl -n $(NAMESPACE) apply -f deployment.yaml.out