diff --git a/.github/workflows/condarise.yaml b/.github/workflows/condarise.yaml deleted file mode 100644 index 13eddd5f..00000000 --- a/.github/workflows/condarise.yaml +++ /dev/null @@ -1,40 +0,0 @@ -name: Condarise -on: - push: - branches: - - main -jobs: - build-publish: - # Run on merge to main, where the commit name starts with "Bump version:" (for bump2version) - if: "startsWith(github.event.head_commit.message, 'Bump version:')" - runs-on: ubuntu-latest - defaults: - run: - shell: bash -l {0} - steps: - - uses: actions/checkout@main - - - uses: conda-incubator/setup-miniconda@v2 - with: - activate-environment: buildenv - channels: cpg,conda-forge,bioconda,defaults - channel-priority: true - # Set in case if the project will be using Hail: - python-version: 3.7 - - - name: Setup build env - run: conda install pip conda-build anaconda-client - - - name: Run-tests - run: | - conda env create --name test-env -f environment-dev.yml - conda activate test-env - python -m unittest test/test_analysis_runner.py - - - name: Build package - run: conda build conda/$(basename $GITHUB_REPOSITORY) - - - name: Upload to anaconda package repository - run: | - anaconda -t ${{ secrets.ANACONDA_TOKEN }} \ - upload ${CONDA_PREFIX}/conda-bld/**/*.tar.bz2 diff --git a/.github/workflows/hail_update.yaml b/.github/workflows/hail_update.yaml index 0ccfd022..f9627126 100644 --- a/.github/workflows/hail_update.yaml +++ b/.github/workflows/hail_update.yaml @@ -1,12 +1,12 @@ # This workflow is triggered after a new version of Hail has been built, with a -# corresponding conda package. It leads to new driver and server Docker images being +# corresponding pip package. It leads to new driver and server Docker images being # built, followed by the deployment of the server. name: Deploy analysis-runner server after Hail update on: workflow_dispatch: inputs: hail_version: - description: 'Hail version (as uploaded to Anaconda)' + description: 'Hail version (as uploaded to PyPI)' required: true jobs: diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml index cdbe7d22..6b5b8c1c 100644 --- a/.github/workflows/lint.yaml +++ b/.github/workflows/lint.yaml @@ -1,4 +1,4 @@ -name: CI +name: Lint on: [push, pull_request] jobs: @@ -11,18 +11,18 @@ jobs: steps: - uses: actions/checkout@v2 - - uses: conda-incubator/setup-miniconda@v2 + - uses: actions/setup-python@v2 + with: + python-version: '3.10' + cache: 'pip' - name: Install packages run: | - conda env create --name test-env -f environment-dev.yml + pip install . + pip install -r requirements-dev.txt - name: pre-commit - run: | - conda activate test-env - pre-commit run --all-files + run: pre-commit run --all-files - name: Run-tests - run: | - conda activate test-env - python -m unittest test/test_analysis_runner.py + run: python -m unittest test/test_analysis_runner.py diff --git a/.github/workflows/package.yaml b/.github/workflows/package.yaml new file mode 100644 index 00000000..486157e1 --- /dev/null +++ b/.github/workflows/package.yaml @@ -0,0 +1,42 @@ +name: Package +on: + # Building on pull-requests, manual dispatch, and pushes to main. But restricting + # publishing only to main pushes and manual dispatch with `if`s in specific steps + pull_request: + workflow_dispatch: + push: + branches: + - main + +jobs: + package: + runs-on: ubuntu-latest + defaults: + run: + shell: bash -l {0} + steps: + - uses: actions/checkout@main + + - uses: actions/setup-python@v2 + with: + python-version: '3.10' + + - name: Build + run: python setup.py sdist + + - name: Test install + run: pip install dist/* + + - name: Run tests + run: python -m unittest test/test_analysis_runner.py + + # `skip_existing: true` makes sure that the package will be published + # only when new version is created + - name: Publish the wheel to PyPI + uses: pypa/gh-action-pypi-publish@release/v1 + if: ${{ github.event_name != 'pull_request' }} + with: + user: __token__ + password: ${{ secrets.PYPI_API_TOKEN }} + packages_dir: dist/ + skip_existing: true diff --git a/.pylintrc b/.pylintrc index 8ac8dd9d..c74e827f 100644 --- a/.pylintrc +++ b/.pylintrc @@ -2,20 +2,26 @@ # We disable the following inspections: # 1. f-string-without-interpolation (we allow f-strings that don't do any # formatting for consistent looks and for future safety) -# 2. inherit-non-class ("Inheriting 'NamedTuple', which is not a class" false +# 2. logging-fstring-interpolation ("Use lazy % formatting in logging functions") +# 3. inherit-non-class ("Inheriting 'NamedTuple', which is not a class" false # positive, see: https://github.com/PyCQA/pylint/issues/3876) -# 3. too-few-public-methods (produces false positives) -# 4. inspections incompatible with Black (see +# 4. too-few-public-methods (produces false positives) +# 5. inspections incompatible with Black (see https://github.com/psf/black/blob/master/docs/compatible_configs.md#why-those-options-above-2): # C0330: Wrong hanging indentation before block (add 4 spaces) # C0326: Bad whitespace -# 5. fixme (left 'TODO' lines) -# 6. line-too-long -# 7. similar lines +# 6. fixme (left 'TODO' lines) +# 7. line-too-long +# 8. too-many-statements +# 9. too-many-lines +# 10. R0801 (duplicate-code) +# 11. similar lines +# 12. import-error -disable=f-string-without-interpolation,inherit-non-class,too-few-public-methods,C0330, - C0326,fixme,logging-fstring-interpolation,line-too-long,too-many-statements, - R0801,too-many-lines +disable=f-string-without-interpolation,logging-fstring-interpolation, + inherit-non-class,too-few-public-methods,C0330, + C0326,fixme,line-too-long,too-many-statements,too-many-lines, + R0801,import-error # Overriding variable name patterns to allow short 1- or 2-letter variables attr-rgx=[a-z_][a-z0-9_]{0,30}$ diff --git a/README.md b/README.md index c42d71a4..414a945b 100644 --- a/README.md +++ b/README.md @@ -31,10 +31,10 @@ The analysis-runner is also integrated with our Cromwell server to run WDL based ## CLI The analysis-runner CLI can be used to start pipelines based on a GitHub repository, -commit, and command to run. To install it, use mamba: +commit, and command to run. To install it, use pip: ```bash -mamba install -c cpg -c conda-forge analysis-runner +pip install analysis-runner ``` Run `analysis-runner --help` to see usage information. @@ -139,15 +139,13 @@ To bring up a stack corresponding to a dataset as described in the [storage policies](https://github.com/populationgenomics/team-docs/tree/main/storage_policies), see the [stack](stack) directory. -To set up a development environment for the analysis runner using mamba, run +To set up a development environment for the analysis runner using pip, run the following: ```bash -mamba env create --file environment-dev.yml +pip install -r requirements-dev.txt -conda activate analysis-runner - -pre-commit install +pre-commit install --install-hooks pip install --editable . ``` @@ -156,21 +154,21 @@ pip install --editable . 1. Add a Hail Batch service account for all supported datasets. 1. [Copy the Hail tokens](tokens) to the Secret Manager. -1. Deploy the [server](server) by invoking the [`hail_update` workflow](https://github.com/populationgenomics/analysis-runner/blob/main/.github/workflows/hail_update.yaml) manually, specifying the Hail package version in conda. +1. Deploy the [server](server) by invoking the [`hail_update` workflow](https://github.com/populationgenomics/analysis-runner/blob/main/.github/workflows/hail_update.yaml) manually, specifying the Hail package version. 1. Deploy the [Airtable](airtable) publisher. -1. Publish the [CLI tool and library](analysis_runner) to conda. +1. Publish the [CLI tool and library](analysis_runner) to PyPI. -Note that the [`hail_update` workflow](https://github.com/populationgenomics/analysis-runner/blob/main/.github/workflows/hail_update.yaml) gets invoked whenever a new Hail package is published to conda. You can test this manually as follows: +Note that the [`hail_update` workflow](https://github.com/populationgenomics/analysis-runner/blob/main/.github/workflows/hail_update.yaml) gets invoked whenever a new Hail package is published to PyPI. You can test this manually as follows: ```bash curl \ -X POST \ -H "Authorization: token $GITHUB_TOKEN" -H "Accept: application/vnd.github.v3+json" \ https://api.github.com/repos/populationgenomics/analysis-runner/actions/workflows/6364059/dispatches \ - -d '{"ref": "main", "inputs": {"hail_version": "0.2.63.deveb7251e548b1"}}' + -d '{"ref": "main", "inputs": {"hail_version": "0.2.84"}}' ``` -The CLI tool is shipped as a conda package. To build a new version, +The CLI tool is shipped as a pip package. To build a new version, we use [bump2version](https://pypi.org/project/bump2version/). For example, to increment the patch section of the version tag 1.0.0 and make it 1.0.1, run: @@ -185,5 +183,4 @@ open "https://github.com/populationgenomics/analysis-runner/pull/new/add-new-ver It's important the pull request name start with "Bump version:" (which should happen by default). Once this is merged into `main`, a GitHub action workflow will build a -new conda package that will be uploaded to the conda [CPG -channel](https://anaconda.org/cpg/), and become available to install with `mamba install -c cpg -c conda-forge ...` +new package that will be uploaded to PyPI, and become available to install with `pip install`. diff --git a/access_group_cache/Dockerfile b/access_group_cache/Dockerfile index 85afafb2..f9a99e47 100644 --- a/access_group_cache/Dockerfile +++ b/access_group_cache/Dockerfile @@ -1,18 +1,12 @@ -FROM python:3.9-slim +FROM python:3.10-slim ENV PYTHONUNBUFFERED True -ENV MAMBA_ROOT_PREFIX /root/micromamba -ENV PATH $MAMBA_ROOT_PREFIX/bin:$PATH - -RUN apt-get update && apt-get install -y wget bzip2 && \ - rm -r /var/lib/apt/lists/* && \ - rm -r /var/cache/apt/* && \ - wget -qO- https://api.anaconda.org/download/conda-forge/micromamba/0.8.2/linux-64/micromamba-0.8.2-he9b6cbd_0.tar.bz2 | tar -xvj -C /usr/local bin/micromamba && \ - mkdir $MAMBA_ROOT_PREFIX && \ - micromamba install -y --prefix $MAMBA_ROOT_PREFIX -c cpg -c conda-forge \ - cpg-utils==2.1.2 aiohttp flask gunicorn && \ - rm -r /root/micromamba/pkgs +RUN pip3 install \ + cpg-utils==2.1.2 \ + aiohttp \ + flask \ + gunicorn COPY main.py ./ diff --git a/analysis_runner/util.py b/analysis_runner/util.py index 282b516d..6cbd037a 100644 --- a/analysis_runner/util.py +++ b/analysis_runner/util.py @@ -150,7 +150,7 @@ def _perform_version_check(): f'{current_version} != {latest_version} (current vs latest).\n' f'Your analysis will still be submitted, but may not work as expected.' f' You can update the analysis-runner by running ' - f'"conda install -c cpg analysis-runner={latest_version}".' + f'"pip install analysis-runner={latest_version}".' ) logger.warning(message) return diff --git a/conda/analysis-runner/build.sh b/conda/analysis-runner/build.sh deleted file mode 100644 index e148bc8b..00000000 --- a/conda/analysis-runner/build.sh +++ /dev/null @@ -1,4 +0,0 @@ -# `--single-version-externally-managed --root=/` is added to make setuptools avoid downloading dependencies. This resolves the conda-build -# error "Setuptools downloading is disabled in conda build. Be sure to add all dependencies in the meta.yaml" -$PYTHON setup.py install --single-version-externally-managed --root=/ -chmod -R o+r $PREFIX/lib/python*/site-packages/* diff --git a/conda/analysis-runner/meta.yaml b/conda/analysis-runner/meta.yaml deleted file mode 100644 index 412a24ec..00000000 --- a/conda/analysis-runner/meta.yaml +++ /dev/null @@ -1,30 +0,0 @@ -{% set data=load_setup_py_data() %} - -package: - name: analysis-runner - version: {{ data['version'] }} - -source: - path: ../../ - -build: - number: 0 - noarch: python - -requirements: - host: - - python - run: - - google-auth ==1.24.0 - - click ==7.1.2 - - requests - - tabulate - -test: - commands: - - analysis-runner --version - -about: - home: {{ data['url'] }} - license: {{ data['license'] }} - summary: {{ data['description'] }} diff --git a/driver/Dockerfile b/driver/Dockerfile index af05b605..baf5a4b7 100644 --- a/driver/Dockerfile +++ b/driver/Dockerfile @@ -14,19 +14,20 @@ RUN apt-get update && apt-get install -y git wget bash bzip2 zip && \ mkdir $MAMBA_ROOT_PREFIX && \ micromamba install -y --prefix $MAMBA_ROOT_PREFIX \ -c cpg -c bioconda -c conda-forge \ - analysis-runner \ bokeh \ - cpg-utils \ phantomjs \ r-argparser \ r-base=4.1.1 \ r-essentials \ r-tidyverse \ - sample-metadata=4.1.1 \ selenium \ skopeo \ statsmodels && \ rm -r /root/micromamba/pkgs && \ - pip3 install cpg-hail=$HAIL_VERSION && \ + pip3 install \ + cpg-hail=$HAIL_VERSION \ + cpg-utils=2.1.1 \ + analysis-runner \ + sample-metadata=4.1.2 && \ # hailctl dataproc uses gcloud beta dataproc. gcloud -q components install beta diff --git a/environment-dev.yml b/environment-dev.yml deleted file mode 100644 index 386a45a6..00000000 --- a/environment-dev.yml +++ /dev/null @@ -1,29 +0,0 @@ -name: analysis-runner -channels: - - cpg - - bioconda - - conda-forge -dependencies: - - python=3.9 - - pip - - cpg-utils # Not yet in pypi. - - pip: - - airtable-python-wrapper - - black - - bump2version - - cpg-hail - - flake8 - - flake8-bugbear - - flask - - google-api-python-client==2.10.0 - - google-auth==1.27.0 - - google-cloud-logging==3.0.0 - - google-cloud-pubsub==2.3.0 - - google-cloud-secret-manager==2.2.0 - - google-cloud-storage==1.25.0 - - kubernetes - - pre-commit - - pulumi-gcp - - pylint - - requests - - tabulate diff --git a/examples/dataproc/README.md b/examples/dataproc/README.md index 47a9e1d8..a9cdf09b 100644 --- a/examples/dataproc/README.md +++ b/examples/dataproc/README.md @@ -1,6 +1,6 @@ # Dataproc example -This example shows how to run a Hail query script in Dataproc using Hail Batch. After installing the conda package for the analysis runner, you can run this as follows: +This example shows how to run a Hail query script in Dataproc using Hail Batch. After installing the pip package for the analysis runner, you can run this as follows: ```bash cd examples/dataproc diff --git a/requirements-dev.txt b/requirements-dev.txt new file mode 100644 index 00000000..9338371c --- /dev/null +++ b/requirements-dev.txt @@ -0,0 +1,4 @@ +black +bump2version +pre-commit +pylint diff --git a/server/Dockerfile b/server/Dockerfile index 92fb59a3..9e5f3369 100644 --- a/server/Dockerfile +++ b/server/Dockerfile @@ -2,15 +2,13 @@ ARG DRIVER_IMAGE FROM ${DRIVER_IMAGE} -RUN micromamba install --prefix $MAMBA_ROOT_PREFIX -c cpg -c conda-forge \ - cpg-utils==2.1.1 \ +RUN pip install \ google-api-python-client==2.10.0 \ google-cloud-secret-manager==2.2.0 \ google-cloud-pubsub==2.3.0 \ google-cloud-logging==2.7.0 \ gunicorn \ - requests && \ - rm -r /root/micromamba/pkgs + requests # Allow statements and log messages to immediately appear in the Knative logs. ENV PYTHONUNBUFFERED 1 diff --git a/setup.py b/setup.py index a655b4fc..48282b31 100644 --- a/setup.py +++ b/setup.py @@ -1,9 +1,7 @@ #!/usr/bin/env python """ -Setup script for the Python package -- Used for development setup with `pip install --editable .` -- Parsed by conda-build to extract version and metainfo +Setup script for the Python package. """ import setuptools @@ -29,6 +27,25 @@ def read_file(filename: str) -> str: packages=['analysis_runner'], include_package_data=True, zip_safe=False, + install_requires=[ + 'cpg-utils', + 'click', + 'airtable-python-wrapper', + 'cpg-hail', + 'flake8', + 'flake8-bugbear', + 'flask', + 'google-api-python-client==2.10.0', + 'google-auth==1.27.0', + 'google-cloud-logging==3.0.0', + 'google-cloud-pubsub==2.3.0', + 'google-cloud-secret-manager==2.2.0', + 'google-cloud-storage==1.25.0', + 'kubernetes', + 'pulumi-gcp', + 'requests', + 'tabulate==0.8.9', # https://github.com/Azure/azure-cli/issues/20887 + ], entry_points={ 'console_scripts': ['analysis-runner=analysis_runner.cli:main_from_args'] }, diff --git a/tokens/README.md b/tokens/README.md index 9863d9d8..d2d56f98 100644 --- a/tokens/README.md +++ b/tokens/README.md @@ -11,7 +11,7 @@ gcloud config set project hail-295901 gcloud container clusters get-credentials vdc ``` -See the [main readme file](../README.md) about how to set up a conda +See the [main readme file](../README.md) about how to set up environment. The list of projects is hardcoded in `main.py`. To update the secret stored in Secret Manager: diff --git a/web/Dockerfile b/web/Dockerfile index 39f74055..1a6c417a 100644 --- a/web/Dockerfile +++ b/web/Dockerfile @@ -1,18 +1,14 @@ -FROM python:3.9-slim +FROM python:3.10-slim ENV PYTHONUNBUFFERED True -ENV MAMBA_ROOT_PREFIX /root/micromamba -ENV PATH $MAMBA_ROOT_PREFIX/bin:$PATH - -RUN apt-get update && apt-get install -y wget bzip2 && \ - rm -r /var/lib/apt/lists/* && \ - rm -r /var/cache/apt/* && \ - wget -qO- https://api.anaconda.org/download/conda-forge/micromamba/0.8.2/linux-64/micromamba-0.8.2-he9b6cbd_0.tar.bz2 | tar -xvj -C /usr/local bin/micromamba && \ - mkdir $MAMBA_ROOT_PREFIX && \ - micromamba install -y --prefix $MAMBA_ROOT_PREFIX -c cpg -c conda-forge \ - cpg-utils==2.1.1 flask gunicorn google-api-python-client==2.10.0 google-cloud-storage==1.38.0 google-cloud-secret-manager==2.2.0 && \ - rm -r /root/micromamba/pkgs +RUN pip3 install \ + cpg-utils==2.1.2 \ + flask \ + gunicorn \ + google-api-python-client==2.10.0 \ + google-cloud-storage==1.38.0 \ + google-cloud-secret-manager==2.2.0 COPY main.py ./