From 4fa308ab8f1963708c9f210d3f7a6e81d157e638 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 16 Nov 2023 17:40:36 +0100 Subject: [PATCH 01/26] drop 3.7 support --- pyproject.toml | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 3b877aa5..5a902d1d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,7 @@ requires = [ [project] name = "deepTools" -version = "3.5.4" +version = "3.5.5" authors = [ {name="Fidel Ramirez"}, {name="Devon P Ryan"}, @@ -19,7 +19,7 @@ authors = [ {name="Thomas Manke"}, {email="bioinfo-core@ie-freiburg.mpg.de"} ] -requires-python = ">=3.7" +requires-python = ">=3.8" dependencies = [ "numpy >= 1.9.0", "scipy >= 0.17.0", @@ -29,8 +29,7 @@ dependencies = [ "pyBigWig >= 0.2.1", "py2bit >= 0.2.0", "plotly >= 4.9", - "deeptoolsintervals >= 0.1.8", - "importlib-metadata" # python 3.7 support + "deeptoolsintervals >= 0.1.8" ] description = "Useful tools for exploring deep sequencing data." license = {file = "LICENSE.txt"} From 65d55c365bd40b70806b9c5f6d033ee6abeb5dcf Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 16 Nov 2023 17:41:05 +0100 Subject: [PATCH 02/26] doc content update --- README.md | 32 ++++++----------- docs/content/installation.rst | 65 ++++++++++------------------------ docs/content/list_of_tools.rst | 34 +++++++++--------- docs/index.rst | 6 ++-- 4 files changed, 49 insertions(+), 88 deletions(-) diff --git a/README.md b/README.md index 84d46be8..f3f614c2 100644 --- a/README.md +++ b/README.md @@ -4,7 +4,6 @@ [![install with bioconda](https://img.shields.io/badge/install%20with-bioconda-brightgreen.svg?style=flat)](http://bioconda.github.io/recipes/deeptools/README.html) [![European Galaxy server](https://img.shields.io/badge/usegalaxy-.eu-brightgreen?logo=)](https://usegalaxy.eu/root?tool_id=deeptools_compute_matrix) ![test](https://github.com/deeptools/deepTools/actions/workflows/test.yml/badge.svg) -![planemo](https://github.com/deeptools/deepTools/actions/workflows/planemo.yml/badge.svg) ## User-friendly tools for exploring deep-sequencing data @@ -34,35 +33,26 @@ Our [Gallery](http://deeptools.readthedocs.org/en/latest/content/example_gallery deepTools are available for: -* Command line usage (via pip/anaconda/github) +* Command line usage (via pip / conda / github) * Integration into Galaxy servers (via toolshed/API/web-browser) -There are many easy ways to install deepTools. Details can be found [here](https://deeptools.readthedocs.io/en/latest/content/installation.html) +There are many easy ways to install deepTools. More details can be found [here](https://deeptools.readthedocs.io/en/latest/content/installation.html). -**Install by cloning this repository:** +In Brief: -You can install any one of the deepTools branches on command line (linux/mac) by cloning this git repository : +**Install through pypi** - $ git clone https://github.com/deeptools/deepTools - $ cd deepTools - $ python setup.py install - -By default, the script will install the python library and executable -codes globally, which means you need to be root or administrator of -the machine to complete the installation. If you need to -provide a nonstandard install prefix, or any other nonstandard -options, you can provide many command line options to the install -script. + $ pip install deeptools - $ python setup.py --help +**Install via conda** -For example, to install under a specific location use: + $ conda install -c bioconda deeptools - $ python setup.py install --prefix +**Install by cloning the repository** -To install into your home directory, use: - - $ python setup.py install --user + $ git clone https://github.com/deeptools/deepTools + $ cd deepTools + $ pip install . ### Galaxy Installation diff --git a/docs/content/installation.rst b/docs/content/installation.rst index a8fcaaad..f0ce0ef5 100644 --- a/docs/content/installation.rst +++ b/docs/content/installation.rst @@ -2,7 +2,7 @@ Installation ============= Remember -- deepTools are available for **command line usage** as well as for -**integration into Galaxy servers**! +**integration into Galaxy servers** ! .. contents:: :local: @@ -10,68 +10,53 @@ Remember -- deepTools are available for **command line usage** as well as for Command line installation using ``conda`` ----------------------------------------- -DeepTools (including the requirements) can be installed with conda: +The recommended way to install deepTools (including its requirements) is via `miniconda `_ or `anaconda `_. .. code:: bash $ conda install -c bioconda deeptools -Command line installation using ``pip`` from pypi --------------------------------------------------- +Command line installation using ``pip`` +--------------------------------------- -Install deepTools using the following command: -:: +deepTools can also be installed using `pip `_. +You can either install the latest release from `pypi `_: - $ pip install deeptools +.. code:: bash -All python requirements should be automatically installed. + $ pip install deeptools -If you need to specify a specific path for the installation of the tools, make use of `pip install`'s numerous options: +or a specific version with: .. code:: bash - $ pip install --install-option="--prefix=/MyPath/Tools/deepTools2.0" git+https://github.com/deeptools/deepTools.git - + $ pip install deeptools==3.5.3 -Command line installation using ``pip`` from source ---------------------------------------------------- +In case you would like to install an unreleased or development version, deepTools can also be installed from the repository: -You are highly recommended to use the 'pypi installation' rather than these more complicated steps. - -1. Download source code -:: +.. code:: bash $ git clone https://github.com/deeptools/deepTools.git - -or if you want a particular release, choose one from https://github.com/deeptools/deepTools/releases: -:: - - $ wget https://github.com/deeptools/deepTools/archive/1.5.12.tar.gz - $ tar -xzvf - -3. install the source code -:: - - $ python -m build - $ pip install dist/*whl + $ cd deepTools + $ pip install . Galaxy installation -------------------- -deepTools can be easily integrated into a local `Galaxy `_. +deepTools can be easily integrated into a local `Galaxy `_. All wrappers and dependencies are available in the `Galaxy Tool -Shed `_. +Shed `_. Installation via Galaxy API (recommended) ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -First generate an `API Key `_ +First generate an `API Key `_ for your admin user and run the the installation script: :: $ python ./scripts/api/install_tool_shed_repositories.py \ --api YOUR_API_KEY -l http://localhost/ \ - --url http://toolshed.g2.bx.psu.edu/ \ + --url https://toolshed.g2.bx.psu.edu/ \ -o bgruening -r --name suite_deeptools \ --tool-deps --repository-deps --panel-section-name deepTools @@ -80,7 +65,7 @@ latest revision number from the test tool shed or with the following command: :: - $ hg identify http://toolshed.g2.bx.psu.edu/repos/bgruening/suite_deeptools + $ hg identify https://toolshed.g2.bx.psu.edu/repos/bgruening/suite_deeptools You can watch the installation status under: Top Panel --> Admin --> Manage installed tool shed repositories @@ -92,15 +77,3 @@ Installation via web browser - select *Search and browse tool sheds* - Galaxy tool shed --> Sequence Analysis --> deeptools - install deeptools - -Installation with Docker -^^^^^^^^^^^^^^^^^^^^^^^^ - -The deepTools Galaxy instance is also available as a docker container, for those wishing to use the Galaxy framework but who also prefer a virtualized solution. This container is quite simple to install: -:: - - $ sudo docker pull quay.io/bgruening/galaxy-deeptools - -To start and otherwise modify this container, please see the instructions on `the docker-galaxy-stable github repository `__. Note that you must use `bgruening/galaxy-deeptools` in place of `bgruening/galaxy-stable` in the examples, as the deepTools Galaxy container is built on top of the galaxy-stable container. - -.. tip:: For support or questions please make a post on `Biostars `__. For feature requests or bug reports please open an issue `on github `__. diff --git a/docs/content/list_of_tools.rst b/docs/content/list_of_tools.rst index 4a874099..2191f3c2 100644 --- a/docs/content/list_of_tools.rst +++ b/docs/content/list_of_tools.rst @@ -1,16 +1,6 @@ The tools ========= -.. note:: With the release of deepTools 2.0, we renamed a couple of tools: - - * **heatmapper** to :doc:`tools/plotHeatmap` - * **profiler** to :doc:`tools/plotProfile` - * **bamCorrelate** to :doc:`tools/multiBamSummary` - * **bigwigCorrelate** to :doc:`tools/multiBigwigSummary` - * **bamFingerprint** to :doc:`tools/plotFingerprint`. - - For more, see :doc:`changelog`. - .. contents:: :local: @@ -68,11 +58,18 @@ A typical deepTools command could look like this: --ignoreDuplicates \ --scaleFactor 0.5 -You can always see all available command-line options via --help: +You can always see all available command-line options via --help or -h: .. code:: bash $ bamCoverage --help + $ bamCoverage -h + +And a minimal usage example can be shown by running a command without any arguments: + +.. code:: bash + + $ bamCoverage - Output format of plots should be indicated by the file ending, e.g. ``MyPlot.pdf`` will return a pdf file, ``MyPlot.png`` a png-file - All tools that produce plots can also output the underlying data - this can be useful in cases where you don't like the deepTools visualization, as you can then use the data matrices produced by deepTools with your favorite plotting tool, such as R @@ -82,14 +79,15 @@ Parameters to decrease the run time """"""""""""""""""""""""""""""""""" - ``numberOfProcessors`` - Number of processors to be used - For example, setting ``--numberOfProcessors 10`` will split up the - workload internally into 10 chunks, which will be - processed in parallel. + +For example, setting ``--numberOfProcessors 10`` will split up the workload internally into 10 chunks, which will be processed in parallel. +Note that for highly fragmented assemblies (> 1000 contigs) the runtime increases drastically. Consider to include only canonical chromosomes in cases like this. + - ``region`` - Process only a single genomic region. - This is particularly useful when you're still trying to figure out the best parameter setting. You can focus on a certain genomic region by setting, e.g., ``--region chr2`` or - ``--region chr2:100000-200000`` -These parameters are optional and available throughout almost all deepTools. +This is particularly useful when you're still trying to figure out the best parameter setting. You can focus on a certain genomic region by setting, e.g., ``--region chr2`` or ``--region chr2:100000-200000`` + +Both parameters are optional and available throughout almost all deepTools. Filtering BAMs while processing """"""""""""""""""""""""""""""" @@ -103,7 +101,7 @@ We offer several ways to filter those BAM files on the fly so that you don't nee Only reads with a mapping quality score of at least this are considered - ``samFlagInclude`` Include reads based on the SAM flag, e.g. ``--samFlagInclude 64`` gets reads that are first in a pair. For translating SAM flags into English, go to: `https://broadinstitute.github.io/picard/explain-flags.html `_ -- ``samFlagExclude`` +- ` `samFlagExclude`` Exclude reads based on the SAM flags - see previous explanation. These parameters are optional and available throughout deepTools. diff --git a/docs/index.rst b/docs/index.rst index a512e8c1..1d739da7 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -56,7 +56,7 @@ that fulfills the following criteria: we can add more and more modules and make use of established methods) -.. tip:: For support or questions please post to `Biostars `__. For bug reports and feature requests please open an issue ``__. +.. tip:: For support or questions please post to `Biostars `__. For bug reports and feature requests please open an issue `on github `__. Please cite deepTools2 as follows: @@ -67,6 +67,6 @@ Steffen Heyne, Friederike Dündar, and Thomas Manke. .. image:: images/logo_mpi-ie.jpg -This tool suite is developed by the `Bioinformatics Facility `_ at the +This tool suite is developed by the `Bioinformatics Facility `_ at the `Max Planck Institute for Immunobiology and Epigenetics, -Freiburg `_. +Freiburg `_. From 530b8975f08ba7aab485dd64af976f4a69cfa1d0 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 16 Nov 2023 17:41:25 +0100 Subject: [PATCH 03/26] pin doc versions --- docs/requirements.txt | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 0a9300aa..56d41a58 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ -sphinx -mock -sphinx_rtd_theme -sphinx-argparse \ No newline at end of file +sphinx==7.2.6 +mock==5.1.0 +sphinx_rtd_theme==1.3.0 +sphinx-argparse==0.4.0 \ No newline at end of file From 7fa48fd35fd4c509650057d1029bb5ad1cbf9820 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 16 Nov 2023 17:42:14 +0100 Subject: [PATCH 04/26] drop py3.7 support, include 3.12 --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 8a478907..182885b1 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -73,7 +73,7 @@ jobs: needs: build-linux strategy: matrix: - python-version: ['3.7','3.8','3.9','3.10', '3.11'] + python-version: ['3.8','3.9','3.10', '3.11', '3.12'] steps: - uses: actions/checkout@v3 - uses: actions/download-artifact@v3 From cf307c6fbe9ff24631e8e84b6d57b1bee49948f7 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 16 Nov 2023 17:42:36 +0100 Subject: [PATCH 05/26] galaxy rapper version boost --- galaxy/wrapper/deepTools_macros.xml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/galaxy/wrapper/deepTools_macros.xml b/galaxy/wrapper/deepTools_macros.xml index 85cbcb2a..49c8dd9e 100755 --- a/galaxy/wrapper/deepTools_macros.xml +++ b/galaxy/wrapper/deepTools_macros.xml @@ -1,7 +1,7 @@ --numberOfProcessors "\${GALAXY_SLOTS:-4}" - 3.5.4 + 3.5.5 22.05 From 02867014fb957531d1fcb06feb39467520775478 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 16 Nov 2023 17:42:47 +0100 Subject: [PATCH 06/26] importlib simplified --- deeptools/alignmentSieve.py | 6 +----- deeptools/bamPEFragmentSize.py | 6 +----- deeptools/computeMatrix.py | 6 +----- deeptools/computeMatrixOperations.py | 6 +----- deeptools/deeptools_list_tools.py | 6 +----- deeptools/estimateReadFiltering.py | 6 +----- deeptools/estimateScaleFactor.py | 6 +----- deeptools/getFragmentAndReadSize.py | 3 ++- deeptools/multiBamSummary.py | 6 +----- deeptools/multiBigwigSummary.py | 5 +---- deeptools/parserCommon.py | 6 +----- deeptools/plotCorrelation.py | 6 +----- deeptools/plotCoverage.py | 6 +----- deeptools/plotPCA.py | 6 +----- 14 files changed, 15 insertions(+), 65 deletions(-) diff --git a/deeptools/alignmentSieve.py b/deeptools/alignmentSieve.py index 4f2aa187..feb40d10 100644 --- a/deeptools/alignmentSieve.py +++ b/deeptools/alignmentSieve.py @@ -7,12 +7,8 @@ from deeptools import parserCommon from deeptools.bamHandler import openBam from deeptools.mapReduce import mapReduce -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version from deeptools.utilities import getTLen, smartLabels, getTempFileName - +from importlib.metadata import version def parseArguments(): parser = argparse.ArgumentParser( diff --git a/deeptools/bamPEFragmentSize.py b/deeptools/bamPEFragmentSize.py index ad63fa14..118b83c9 100755 --- a/deeptools/bamPEFragmentSize.py +++ b/deeptools/bamPEFragmentSize.py @@ -18,11 +18,7 @@ # own tools from deeptools.parserCommon import writableFile from deeptools.getFragmentAndReadSize import get_read_and_fragment_length -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version - +from importlib.metadata import version def parse_arguments(): parser = argparse.ArgumentParser( diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 440358c9..8287d1dd 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -7,15 +7,11 @@ import multiprocessing from deeptools.parserCommon import writableFile, numberOfProcessors -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version from deeptools import parserCommon from deeptools import heatmapper import deeptools.computeMatrixOperations as cmo import deeptools.deepBlue as db - +from importlib.metadata import version def parse_arguments(args=None): parser = \ diff --git a/deeptools/computeMatrixOperations.py b/deeptools/computeMatrixOperations.py index b246b9ce..978d7eb2 100755 --- a/deeptools/computeMatrixOperations.py +++ b/deeptools/computeMatrixOperations.py @@ -6,11 +6,7 @@ import sys import os import csv -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version - +from importlib.metadata import version def parse_arguments(): parser = argparse.ArgumentParser( diff --git a/deeptools/deeptools_list_tools.py b/deeptools/deeptools_list_tools.py index 0e4b6a38..53983093 100644 --- a/deeptools/deeptools_list_tools.py +++ b/deeptools/deeptools_list_tools.py @@ -3,11 +3,7 @@ import argparse import sys -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version - +from importlib.metadata import version def parse_arguments(args=None): parser = argparse.ArgumentParser( diff --git a/deeptools/estimateReadFiltering.py b/deeptools/estimateReadFiltering.py index 52fded53..13d52599 100644 --- a/deeptools/estimateReadFiltering.py +++ b/deeptools/estimateReadFiltering.py @@ -5,11 +5,7 @@ from deeptools import parserCommon, bamHandler, utilities from deeptools.mapReduce import mapReduce from deeptools.utilities import smartLabels -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version - +from importlib.metadata import version def parseArguments(): parser = argparse.ArgumentParser( diff --git a/deeptools/estimateScaleFactor.py b/deeptools/estimateScaleFactor.py index 31acea3f..549ecf78 100644 --- a/deeptools/estimateScaleFactor.py +++ b/deeptools/estimateScaleFactor.py @@ -6,11 +6,7 @@ from deeptools.SES_scaleFactor import estimateScaleFactor from deeptools.parserCommon import numberOfProcessors -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version - +from importlib.metadata import version debug = 0 diff --git a/deeptools/getFragmentAndReadSize.py b/deeptools/getFragmentAndReadSize.py index 427d5308..0cf2dc8e 100644 --- a/deeptools/getFragmentAndReadSize.py +++ b/deeptools/getFragmentAndReadSize.py @@ -76,7 +76,8 @@ def get_read_and_fragment_length(bamFile, return_lengths=False, blackListFileNam ------- d : dict tuple of two dictionaries, one for the fragment length and the other - for the read length. The dictionaries summarise the mean, median etc. values +for the read length. The dictionaries summarise the mean, median etc. values + """ bam_handle = bamHandler.openBam(bamFile) diff --git a/deeptools/multiBamSummary.py b/deeptools/multiBamSummary.py index b010001f..981a99e3 100644 --- a/deeptools/multiBamSummary.py +++ b/deeptools/multiBamSummary.py @@ -9,11 +9,7 @@ import deeptools.countReadsPerBin as countR from deeptools import parserCommon from deeptools.utilities import smartLabels -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version - +from importlib.metadata import version old_settings = np.seterr(all='ignore') diff --git a/deeptools/multiBigwigSummary.py b/deeptools/multiBigwigSummary.py index 50f40bee..22789647 100644 --- a/deeptools/multiBigwigSummary.py +++ b/deeptools/multiBigwigSummary.py @@ -10,10 +10,7 @@ from deeptools.utilities import smartLabels import deeptools.getScorePerBigWigBin as score_bw import deeptools.deepBlue as db -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version +from importlib.metadata import version old_settings = np.seterr(all='ignore') diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index 37e9f359..7a94b0f5 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -1,10 +1,6 @@ import argparse import os -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version - +from importlib.metadata import version def check_float_0_1(value): v = float(value) diff --git a/deeptools/plotCorrelation.py b/deeptools/plotCorrelation.py index 2b8d9f79..988cf559 100644 --- a/deeptools/plotCorrelation.py +++ b/deeptools/plotCorrelation.py @@ -10,13 +10,9 @@ matplotlib.rcParams['svg.fonttype'] = 'none' from deeptools import cm # noqa: F401 import matplotlib.pyplot as plt - +from importlib.metadata import version from deeptools.correlation import Correlation from deeptools.parserCommon import writableFile -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version old_settings = np.seterr(all='ignore') diff --git a/deeptools/plotCoverage.py b/deeptools/plotCoverage.py index e233dcb7..a3235955 100755 --- a/deeptools/plotCoverage.py +++ b/deeptools/plotCoverage.py @@ -14,14 +14,10 @@ import matplotlib.pyplot as plt import plotly.offline as py import plotly.graph_objs as go - +from importlib.metadata import version import deeptools.countReadsPerBin as countR from deeptools import parserCommon from deeptools.utilities import smartLabels -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version old_settings = np.seterr(all='ignore') diff --git a/deeptools/plotPCA.py b/deeptools/plotPCA.py index c43942b8..bc17ed32 100644 --- a/deeptools/plotPCA.py +++ b/deeptools/plotPCA.py @@ -8,13 +8,9 @@ matplotlib.rcParams['pdf.fonttype'] = 42 matplotlib.rcParams['svg.fonttype'] = 'none' from deeptools import cm # noqa: F401 - +from importlib.metadata import version from deeptools.correlation import Correlation from deeptools.parserCommon import writableFile -try: # keep python 3.7 support. - from importlib.metadata import version -except ModuleNotFoundError: - from importlib_metadata import version def parse_arguments(args=None): From 9574e7606dc3c87a1be7b7f60866ba858284b6d2 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 16 Nov 2023 17:48:13 +0100 Subject: [PATCH 07/26] include repo pip install --- .readthedocs.yaml | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index d87c2e6f..a0f0821e 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -3,7 +3,7 @@ version: 2 build: os: ubuntu-22.04 tools: - python: "3.11" + python: "3.12" sphinx: configuration: docs/conf.py @@ -11,3 +11,5 @@ sphinx: python: install: - requirements: docs/requirements.txt + - method: pip + path: . From 711c12c705b9477d4e11813fddacaaa59698eefa Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 16 Nov 2023 20:46:31 +0100 Subject: [PATCH 08/26] switch pip&docreqs build --- .readthedocs.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.readthedocs.yaml b/.readthedocs.yaml index a0f0821e..d95161e3 100644 --- a/.readthedocs.yaml +++ b/.readthedocs.yaml @@ -10,6 +10,6 @@ sphinx: python: install: - - requirements: docs/requirements.txt - method: pip path: . + - requirements: docs/requirements.txt From 73d03aa39d959b54128ba6430ae5ac94d2cabf1b Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 16 Nov 2023 20:56:32 +0100 Subject: [PATCH 09/26] flake fix --- deeptools/alignmentSieve.py | 1 + deeptools/bamPEFragmentSize.py | 1 + deeptools/computeMatrix.py | 1 + deeptools/computeMatrixOperations.py | 1 + deeptools/deeptools_list_tools.py | 1 + deeptools/estimateReadFiltering.py | 1 + deeptools/parserCommon.py | 1 + 7 files changed, 7 insertions(+) diff --git a/deeptools/alignmentSieve.py b/deeptools/alignmentSieve.py index feb40d10..73a24734 100644 --- a/deeptools/alignmentSieve.py +++ b/deeptools/alignmentSieve.py @@ -10,6 +10,7 @@ from deeptools.utilities import getTLen, smartLabels, getTempFileName from importlib.metadata import version + def parseArguments(): parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, diff --git a/deeptools/bamPEFragmentSize.py b/deeptools/bamPEFragmentSize.py index 118b83c9..91380517 100755 --- a/deeptools/bamPEFragmentSize.py +++ b/deeptools/bamPEFragmentSize.py @@ -20,6 +20,7 @@ from deeptools.getFragmentAndReadSize import get_read_and_fragment_length from importlib.metadata import version + def parse_arguments(): parser = argparse.ArgumentParser( description='This tool calculates the fragment sizes for read pairs given a BAM file from paired-end sequencing.' diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 8287d1dd..2202c1d1 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -13,6 +13,7 @@ import deeptools.deepBlue as db from importlib.metadata import version + def parse_arguments(args=None): parser = \ argparse.ArgumentParser( diff --git a/deeptools/computeMatrixOperations.py b/deeptools/computeMatrixOperations.py index 978d7eb2..6b3272d4 100755 --- a/deeptools/computeMatrixOperations.py +++ b/deeptools/computeMatrixOperations.py @@ -8,6 +8,7 @@ import csv from importlib.metadata import version + def parse_arguments(): parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, diff --git a/deeptools/deeptools_list_tools.py b/deeptools/deeptools_list_tools.py index 53983093..32dcf702 100644 --- a/deeptools/deeptools_list_tools.py +++ b/deeptools/deeptools_list_tools.py @@ -5,6 +5,7 @@ import sys from importlib.metadata import version + def parse_arguments(args=None): parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, diff --git a/deeptools/estimateReadFiltering.py b/deeptools/estimateReadFiltering.py index 13d52599..8c46a384 100644 --- a/deeptools/estimateReadFiltering.py +++ b/deeptools/estimateReadFiltering.py @@ -7,6 +7,7 @@ from deeptools.utilities import smartLabels from importlib.metadata import version + def parseArguments(): parser = argparse.ArgumentParser( formatter_class=argparse.RawDescriptionHelpFormatter, diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index 7a94b0f5..8e726ea0 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -2,6 +2,7 @@ import os from importlib.metadata import version + def check_float_0_1(value): v = float(value) if v < 0.0 or v > 1.0: From 80b646b88890cb600f24a097932f9dc46e56eb61 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Thu, 23 Nov 2023 16:07:02 +0100 Subject: [PATCH 10/26] drop deepblue support --- CHANGES.txt | 11 ++ deeptools/bigwigAverage.py | 38 +--- deeptools/bigwigCompare.py | 46 +---- deeptools/computeMatrix.py | 40 +--- deeptools/deepBlue.py | 286 ----------------------------- deeptools/multiBigwigSummary.py | 46 +---- deeptools/parserCommon.py | 38 ---- docs/content/advanced_features.rst | 1 - docs/content/feature/deepBlue.rst | 16 -- docs/source/deeptools.rst | 8 - 10 files changed, 23 insertions(+), 507 deletions(-) delete mode 100644 deeptools/deepBlue.py delete mode 100644 docs/content/feature/deepBlue.rst diff --git a/CHANGES.txt b/CHANGES.txt index 335dbc80..7e81a92a 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,3 +1,14 @@ +3.5.5 +* drop support for python 3.7 +* doc fixes (argparse properly displayed, minor changes in installation instructions) +* deepblue support drop + +3.5.4 +* error handling and cases for bwAverage with >2 samples +* Tick.label deprecation for mpl 3.8 +* minimal mpl version is 3.5 +* cicd update for pypi push + 3.5.3 * requirement cap for matplotlib lifted (changes in plotting can occur) * nose has been deprecated in favor of pytests diff --git a/deeptools/bigwigAverage.py b/deeptools/bigwigAverage.py index 7153d98f..4e2bb58f 100644 --- a/deeptools/bigwigAverage.py +++ b/deeptools/bigwigAverage.py @@ -7,7 +7,6 @@ import numpy as np from deeptools import parserCommon from deeptools import writeBedGraph_bam_and_bw -import deeptools.deepBlue as db debug = 0 @@ -15,9 +14,8 @@ def parse_arguments(args=None): parentParser = parserCommon.getParentArgParse() outputParser = parserCommon.output() - dbParser = parserCommon.deepBlueOptionalArgs() parser = argparse.ArgumentParser( - parents=[parentParser, outputParser, dbParser], + parents=[parentParser, outputParser], formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='This tool average multiple bigWig files based on the number ' 'of mapped reads. To average the bigWig files, the genome is ' @@ -59,7 +57,7 @@ def parse_arguments(args=None): def getType(fname): """ - Tries to determine if a file is a wiggle file from deepBlue or a bigWig file. + Tries to determine if a file is a wiggle file a bigWig file. Returns 'wiggle' if the file name ends with .wig, otherwise 'bigwig' """ if fname.endswith(".wig") or fname.endswith(".wiggle"): @@ -119,29 +117,6 @@ def main(args=None): FUNC = average function_args = {'scaleFactors': scaleFactors} - # Preload deepBlue files, which need to then be deleted - deepBlueFiles = [] - for idx, fname in enumerate(args.bigwigs): - if db.isDeepBlue(fname): - deepBlueFiles.append([fname, idx]) - if len(deepBlueFiles) > 0: - sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) - foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) - regs = db.makeChromTiles(foo) - for x in deepBlueFiles: - x.extend([args, regs]) - if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: - pool = multiprocessing.Pool(args.numberOfProcessors) - res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) - else: - res = list(map(db.preloadWrapper, deepBlueFiles)) - - # substitute the file names with the temp files - for (ftuple, r) in zip(deepBlueFiles, res): - args.bigwigs[ftuple[1]] = r - deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] - del regs - writeBedGraph_bam_and_bw.writeBedGraph( [(b, getType(b)) for b in args.bigwigs], args.outFileName, 0, FUNC, @@ -154,12 +129,3 @@ def main(args=None): smoothLength=False, missingDataAsZero=not args.skipNonCoveredRegions, extendPairedEnds=False) - - # Clean up temporary bigWig files, if applicable - if not args.deepBlueKeepTemp: - for k, v in deepBlueFiles: - os.remove(args.bigwigs[v]) - else: - for k, v in deepBlueFiles: - foo = args.bigwigs[v] - print("{} is stored in {}".format(k, foo)) diff --git a/deeptools/bigwigCompare.py b/deeptools/bigwigCompare.py index 4e15c7df..9f0863d2 100644 --- a/deeptools/bigwigCompare.py +++ b/deeptools/bigwigCompare.py @@ -7,7 +7,6 @@ from deeptools import parserCommon from deeptools.getRatio import getRatio from deeptools import writeBedGraph_bam_and_bw -import deeptools.deepBlue as db debug = 0 @@ -15,9 +14,8 @@ def parse_arguments(args=None): parentParser = parserCommon.getParentArgParse() outputParser = parserCommon.output() - dbParser = parserCommon.deepBlueOptionalArgs() parser = argparse.ArgumentParser( - parents=[parentParser, outputParser, dbParser], + parents=[parentParser, outputParser], formatter_class=argparse.ArgumentDefaultsHelpFormatter, description='This tool compares two bigWig files based on the number ' 'of mapped reads. To compare the bigWig files, the genome is ' @@ -104,7 +102,7 @@ def parse_arguments(args=None): def getType(fname): """ - Tries to determine if a file is a wiggle file from deepBlue or a bigWig file. + Tries to determine if a file is a wiggle file or a bigWig file. Returns 'wiggle' if the file name ends with .wig, otherwise 'bigwig' """ if fname.endswith(".wig") or fname.endswith(".wiggle"): @@ -136,32 +134,6 @@ def main(args=None): 'scaleFactors': scaleFactors, 'pseudocount': args.pseudocount} - # Preload deepBlue files, which need to then be deleted - deepBlueFiles = [] - for idx, fname in enumerate([args.bigwig1, args.bigwig2]): - if db.isDeepBlue(fname): - deepBlueFiles.append([fname, idx]) - if len(deepBlueFiles) > 0: - sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) - foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) - regs = db.makeChromTiles(foo) - for x in deepBlueFiles: - x.extend([args, regs]) - if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: - pool = multiprocessing.Pool(args.numberOfProcessors) - res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) - else: - res = list(map(db.preloadWrapper, deepBlueFiles)) - - # substitute the file names with the temp files - for (ftuple, r) in zip(deepBlueFiles, res): - if ftuple[1] == 0: - args.bigwig1 = r - else: - args.bigwig2 = r - deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] - del regs - writeBedGraph_bam_and_bw.writeBedGraph( [(args.bigwig1, getType(args.bigwig1)), (args.bigwig2, getType(args.bigwig2))], @@ -176,17 +148,3 @@ def main(args=None): missingDataAsZero=not args.skipNonCoveredRegions, extendPairedEnds=False, fixedStep=args.fixedStep) - - # Clean up temporary bigWig files, if applicable - if not args.deepBlueKeepTemp: - for k, v in deepBlueFiles: - if v == 0: - os.remove(args.bigwig1) - else: - os.remove(args.bigwig2) - else: - for k, v in deepBlueFiles: - foo = args.bigwig1 - if v == 1: - foo = args.bigwig2 - print("{} is stored in {}".format(k, foo)) diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 2202c1d1..4a52dd69 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -10,7 +10,6 @@ from deeptools import parserCommon from deeptools import heatmapper import deeptools.computeMatrixOperations as cmo -import deeptools.deepBlue as db from importlib.metadata import version @@ -44,8 +43,6 @@ def parse_arguments(args=None): dest='command', metavar='') - dbParser = parserCommon.deepBlueOptionalArgs() - # scale-regions mode options subparsers.add_parser( 'scale-regions', @@ -53,8 +50,8 @@ def parse_arguments(args=None): parents=[computeMatrixRequiredArgs(), computeMatrixOutputArgs(), computeMatrixOptArgs(case='scale-regions'), - parserCommon.gtf_options(), - dbParser], + parserCommon.gtf_options() + ], help="In the scale-regions mode, all regions in the BED file are " "stretched or shrunken to the length (in bases) indicated by the user.", usage='An example usage is:\n computeMatrix scale-regions -S ' @@ -67,8 +64,8 @@ def parse_arguments(args=None): parents=[computeMatrixRequiredArgs(), computeMatrixOutputArgs(), computeMatrixOptArgs(case='reference-point'), - parserCommon.gtf_options(), - dbParser], + parserCommon.gtf_options() + ], help="Reference-point refers to a position within a BED region " "(e.g., the starting point). In this mode, only those genomic" "positions before (upstream) and/or after (downstream) of the " @@ -399,28 +396,6 @@ def main(args=None): hm = heatmapper.heatmapper() - # Preload deepBlue files, which need to then be deleted - deepBlueFiles = [] - for idx, fname in enumerate(args.scoreFileName): - if db.isDeepBlue(fname): - deepBlueFiles.append([fname, idx]) - if len(deepBlueFiles) > 0: - sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) - regs = db.makeRegions(args.regionsFileName, args) - for x in deepBlueFiles: - x.extend([args, regs]) - if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: - pool = multiprocessing.Pool(args.numberOfProcessors) - res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) - else: - res = list(map(db.preloadWrapper, deepBlueFiles)) - - # substitute the file names with the temp files - for (ftuple, r) in zip(deepBlueFiles, res): - args.scoreFileName[ftuple[1]] = r - deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] - del regs - scores_file_list = args.scoreFileName hm.computeMatrix(scores_file_list, args.regionsFileName, parameters, blackListFileName=args.blackListFileName, verbose=args.verbose, allArgs=args) if args.sortRegions not in ['no', 'keep']: @@ -447,10 +422,3 @@ def main(args=None): if args.outFileSortedRegions: hm.save_BED(args.outFileSortedRegions) - # Clean up temporary bigWig files, if applicable - if not args.deepBlueKeepTemp: - for k, v in deepBlueFiles: - os.remove(args.scoreFileName[v]) - else: - for k, v in deepBlueFiles: - print("{} is stored in {}".format(k, args.scoreFileName[v])) diff --git a/deeptools/deepBlue.py b/deeptools/deepBlue.py deleted file mode 100644 index 86439339..00000000 --- a/deeptools/deepBlue.py +++ /dev/null @@ -1,286 +0,0 @@ -#!/usr/bin/env python -try: - # python 2 - import xmlrpclib -except: - # python 3 - import xmlrpc.client as xmlrpclib -import time -import tempfile -import os.path -import sys -import pyBigWig -from deeptools.utilities import mungeChromosome -from deeptoolsintervals import GTF -import datetime - - -def isDeepBlue(fname): - """ - Returns true if the file ends in .wig, .wiggle, or .bedgraph, since these indicate a file on the deepBlue server - """ - if fname.endswith(".wig"): - return True - if fname.endswith(".wiggle"): - return True - if fname.endswith(".bedgraph"): - return True - if fname.startswith("http") or fname.startswith("ftp"): - return False - # For ENCODE samples, the "Name" is just the ENCODE sample ID, so as a fallback check for files that aren't there. - if not os.path.exists(fname): - return True - return False - - -def mergeRegions(regions): - """ - Given a list of [(chrom, start, end), ...], merge all overlapping regions - - This returns a dict, where values are sorted lists of [start, end]. - """ - bar = sorted(regions) - out = dict() - last = [None, None, None] - for reg in bar: - if reg[0] == last[0] and reg[1] <= last[2]: - if reg[2] > last[2]: - last[2] = reg[2] - continue - else: - if last[0]: - if last[0] not in out: - out[last[0]] = list() - out[last[0]].append([last[1], last[2]]) - last = [reg[0], reg[1], reg[2]] - if last[0] not in out: - out[last[0]] = list() - out[last[0]].append([last[1], last[2]]) - return out - - -def makeTiles(db, args): - """ - Given a deepBlue object, return a list of regions that will be queried - """ - out = [] - for (k, v) in db.chromsTuple: - start = 0 - while start <= v: - end = start + args.binSize - if end > v: - end = v - out.append([k, start, end]) - start += end + args.distanceBetweenBins - return out - - -def makeChromTiles(db): - """ - Make a region for each chromosome - """ - out = [] - for (k, v) in db.chromsTuple: - out.append([k, 0, v]) - return out - - -def makeRegions(BED, args): - """ - Given a list of BED/GTF files, make a list of regions. - These are vaguely extended as appropriate. For simplicity, the maximum of --beforeRegionStartLength - and --afterRegionStartLength are tacked on to each end and transcripts are used for GTF files. - """ - itree = GTF(BED, transcriptID=args.transcriptID, transcript_id_designator=args.transcript_id_designator) - o = [] - extend = 0 - # The before/after stuff is specific to computeMatrix - if "beforeRegionStartLength" in args: - extend = max(args.beforeRegionStartLength, args.afterRegionStartLength) - for chrom in itree.chroms: - regs = itree.findOverlaps(chrom, 0, 4294967295) # bigWig files use 32 bit coordinates - for reg in regs: - o.append([chrom, max(0, reg[0] - extend), reg[1] + extend]) - del itree - return o - - -def preloadWrapper(foo): - """ - This is a wrapper around the preload function for multiprocessing - """ - args = foo[2] - regs = foo[3] - res = deepBlue(foo[0], url=args.deepBlueURL, userKey=args.userKey) - return res.preload(regs, tmpDir=args.deepBlueTempDir) - - -class deepBlue(object): - def __init__(self, sample, url="http://deepblue.mpi-inf.mpg.de/xmlrpc", userKey="anonymous_key"): - """ - Connect to the requested deepblue server with the given user key and request the specifed sample from it. - - >>> sample = "S002R5H1.ERX300721.H3K4me3.bwa.GRCh38.20150528.bedgraph" - >>> db = deepBlue(sample) # doctest: +SKIP - >>> assert db.chroms("chr1") == 248956422 # doctest: +SKIP - """ - self.sample = sample - self.url = url - self.userKey = userKey - self.server = xmlrpclib.Server(url, allow_none=True) - self.info = None - self.experimentID = None - self.genome = None - self.chromsDict = None - self.chromsTuple = None - - # Set self.experimentID - experimentID = self.getEID() - if not experimentID: - raise RuntimeError("The requested sample({}) has no associated experiment! If you did not intend to use samples on deepBlue, then it appears either you misspelled a file name or (if you're using BAM files for input) one of your BAM files is lacking a valid index.".format(sample)) - - # Set self.info - (status, resp) = self.server.info(self.experimentID, userKey) - if status != "okay": - raise RuntimeError("Received the following error while fetching information about '{}': {}".format(resp, sample)) - self.info = resp[0] - - # Set self.genome - genome = self.getGenome() - if not genome: - raise RuntimeError("Unable to determine an appropriate genome for '{}'".format(sample)) - - # Set self.chroms - chroms = self.getChroms() - if not chroms: - raise RuntimeError("Unable to determine chromosome names/sizes for '{}'".format(sample)) - - def getEID(self): - """ - Given a sample name, return its associated experiment ID (or None on error). - - self.experimentID is then the internal ID (e.g., e52525) - """ - (status, resps) = self.server.search(self.sample, "experiments", self.userKey) - if status != "okay": - raise RuntimeError("Received an error ({}) while searching for the experiment associated with '{}'".format(resps, self.sample)) - for resp in resps: - if resp[1] == self.sample: - self.experimentID = resp[0] - return resp[0] - return None - - def getGenome(self): - """ - Determines and sets the genome assigned to a given sample. On error, this raises a runtime exception. - - self.genome is then the internal genome ID. - """ - if "genome" in self.info.keys(): - self.genome = self.info["genome"] - return self.genome - - def getChroms(self): - """ - Determines and sets the chromosome names/sizes for a given sample. On error, this raises a runtime exception. - - self.chroms is then a dictionary of chromosome:length pairs - """ - (status, resp) = self.server.chromosomes(self.genome, self.userKey) - if status != "okay": - raise RuntimeError("Received an error while fetching chromosome information for '{}': {}".format(self.sample, resp)) - self.chromsDict = {k: v for k, v in resp} - self.chromsTuple = [(k, v) for k, v in resp] - return resp - - def chroms(self, chrom=None): - """ - Like the chroms() function in pyBigWig, returns either chromsDict (chrom is None) or the length of a given chromosome - """ - if chrom is None: - return self.chromsDict - elif chrom in self.chromsDict: - return self.chromsDict[chrom] - return None - - def close(self): - pass - - def preload(self, regions, tmpDir=None): - """ - Given a sample and a set of regions, write a bigWig file containing the underlying signal. - - This function returns the file name, which needs to be deleted by the calling function at some point. - - This sends queries one chromosome at a time, due to memory limits on deepBlue - """ - startTime = datetime.datetime.now() - regions2 = mergeRegions(regions) - - # Make a temporary file - f = tempfile.NamedTemporaryFile(delete=False, dir=tmpDir) - fname = f.name - f.close() - - # Start with the bigWig file - bw = pyBigWig.open(fname, "w") - bw.addHeader(self.chromsTuple, maxZooms=0) # This won't work in IGV! - - # Make a string out of everything in a resonable order - for k, v in self.chromsTuple: - # Munge chromosome names as appropriate - chrom = mungeChromosome(k, regions2.keys()) - if not chrom: - continue - if chrom not in regions2 or len(regions2) == 0: - continue - regionsStr = "\n".join(["{}\t{}\t{}".format(k, reg[0], reg[1]) for reg in regions2[chrom]]) - regionsStr += "\n" - - # Send the regions - (status, regionsID) = self.server.input_regions(self.genome, regionsStr, self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while sending regions for '{}': {}".format(regionsID, self.sample)) - - # Get the experiment information - (status, queryID) = self.server.select_experiments(self.sample, k, None, None, self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while running select_experiments on file '{}': {}".format(self.sample, queryID)) - if not queryID: - raise RuntimeError("Somehow, we received None as a query ID (file '{}')".format(self.sample)) - - # Intersect - (status, intersectID) = self.server.intersection(queryID, regionsID, self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while running intersection on file '{}': {}".format(self.sample, intersectID)) - if not intersectID: - raise RuntimeError("Somehow, we received None as an intersect ID (file '{}')".format(self.sample)) - - # Query the regions - (status, reqID) = self.server.get_regions(intersectID, "START,END,VALUE", self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while fetching regions in file '{}': {}".format(self.sample, reqID)) - - # Wait for the server to process the data - (status, info) = self.server.info(reqID, self.userKey) - request_status = info[0]["state"] - while request_status != "done" and request_status != "failed": - time.sleep(0.1) - (status, info) = self.server.info(reqID, self.userKey) - request_status = info[0]["state"] - - # Get the actual data - (status, resp) = self.server.get_request_data(reqID, self.userKey) - if status != "okay": - raise RuntimeError("Received the following error while fetching data in file '{}': {}".format(self.sample, resp)) - - for intervals in resp.split("\n"): - interval = intervals.split("\t") - if interval[0] == '': - continue - bw.addEntries([k], [int(interval[0]) - 1], ends=[int(interval[1]) - 1], values=[float(interval[2])]) - bw.close() - sys.stderr.write("{} done (took {})\n".format(self.sample, datetime.datetime.now() - startTime)) - sys.stderr.flush() - - return fname diff --git a/deeptools/multiBigwigSummary.py b/deeptools/multiBigwigSummary.py index 22789647..5a6004c0 100644 --- a/deeptools/multiBigwigSummary.py +++ b/deeptools/multiBigwigSummary.py @@ -9,7 +9,6 @@ from deeptools import parserCommon from deeptools.utilities import smartLabels import deeptools.getScorePerBigWigBin as score_bw -import deeptools.deepBlue as db from importlib.metadata import version old_settings = np.seterr(all='ignore') @@ -50,7 +49,6 @@ def parse_arguments(args=None): metavar='') parent_parser = parserCommon.getParentArgParse(binSize=False) - dbParser = parserCommon.deepBlueOptionalArgs() # bins mode options subparsers.add_parser( @@ -58,9 +56,8 @@ def parse_arguments(args=None): formatter_class=argparse.ArgumentDefaultsHelpFormatter, parents=[multiBigwigSummaryArgs(case='bins'), parent_parser, - parserCommon.gtf_options(suppress=True), - dbParser - ], + parserCommon.gtf_options(suppress=True) + ], help="The average score is based on equally sized bins " "(10 kilobases by default), which consecutively cover the " "entire genome. The only exception is the last bin of a chromosome, which " @@ -78,9 +75,8 @@ def parse_arguments(args=None): formatter_class=argparse.ArgumentDefaultsHelpFormatter, parents=[multiBigwigSummaryArgs(case='BED-file'), parent_parser, - parserCommon.gtf_options(), - dbParser - ], + parserCommon.gtf_options() + ], help="The user provides a BED file that contains all regions " "that should be considered for the analysis. A " "common use is to compare scores (e.g. ChIP-seq scores) between " @@ -227,33 +223,6 @@ def main(args=None): "--outRawCounts. The resulting output will NOT be " "useful with any deepTools program!\n") - # Preload deepBlue files, which need to then be deleted - deepBlueFiles = [] - for idx, fname in enumerate(args.bwfiles): - if db.isDeepBlue(fname): - deepBlueFiles.append([fname, idx]) - if len(deepBlueFiles) > 0: - sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles]))) - if 'BED' in args: - regs = db.makeRegions(args.BED, args) - else: - foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey) - regs = db.makeTiles(foo, args) - del foo - for x in deepBlueFiles: - x.extend([args, regs]) - if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1: - pool = multiprocessing.Pool(args.numberOfProcessors) - res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999) - else: - res = list(map(db.preloadWrapper, deepBlueFiles)) - - # substitute the file names with the temp files - for (ftuple, r) in zip(deepBlueFiles, res): - args.bwfiles[ftuple[1]] = r - deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles] - del regs - num_reads_per_bin = score_bw.getScorePerBin( args.bwfiles, args.binSize, @@ -310,10 +279,3 @@ def main(args=None): """ f.close() - # Clean up temporary bigWig files, if applicable - if not args.deepBlueKeepTemp: - for k, v in deepBlueFiles: - os.remove(args.bwfiles[v]) - else: - for k, v in deepBlueFiles: - print("{} is stored in {}".format(k, args.bwfiles[v])) diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index 8e726ea0..03000243 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -861,44 +861,6 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]): action='store_true') return parser - -def deepBlueOptionalArgs(): - - parser = argparse.ArgumentParser(add_help=False) - dbo = parser.add_argument_group('deepBlue arguments', 'Options used only for remote bedgraph/wig files hosted on deepBlue') - dbo.add_argument( - '--deepBlueURL', - help='For remote files bedgraph/wiggle files hosted on deepBlue, this ' - 'specifies the server URL. The default is ' - '"http://deepblue.mpi-inf.mpg.de/xmlrpc", which should not be ' - 'changed without good reason.', - default='http://deepblue.mpi-inf.mpg.de/xmlrpc') - dbo.add_argument( - '--userKey', - help='For remote files bedgraph/wiggle files hosted on deepBlue, this ' - 'specifies the user key to use for access. The default is ' - '"anonymous_key", which suffices for public datasets. If you need ' - 'access to a restricted access/private dataset, then request a ' - 'key from deepBlue and specify it here.', - default='anonymous_key') - dbo.add_argument( - '--deepBlueTempDir', - help='If specified, temporary files from preloading datasets from ' - 'deepBlue will be written here (note, this directory must exist). ' - 'If not specified, where ever temporary files would normally be written ' - 'on your system is used.', - default=None) - dbo.add_argument( - '--deepBlueKeepTemp', - action='store_true', - help='If specified, temporary bigWig files from preloading deepBlue ' - 'datasets are not deleted. A message will be printed noting where these ' - 'files are and what sample they correspond to. These can then be used ' - 'if you wish to analyse the same sample with the same regions again.') - - return parser - - def requiredLength(minL, maxL): """ This is an optional action that can be given to argparse.add_argument(..., nargs='+') diff --git a/docs/content/advanced_features.rst b/docs/content/advanced_features.rst index db325a6e..ea914304 100644 --- a/docs/content/advanced_features.rst +++ b/docs/content/advanced_features.rst @@ -8,7 +8,6 @@ Some of the features of deepTools are not self-explanatory. Below, we provide li * :doc:`feature/read_extension` * :doc:`feature/unscaled_regions` * :doc:`feature/read_offsets` - * :doc:`feature/deepBlue` * :doc:`feature/plotFingerprint_QC_metrics` * :doc:`feature/plotly` * :doc:`feature/effectiveGenomeSize` diff --git a/docs/content/feature/deepBlue.rst b/docs/content/feature/deepBlue.rst deleted file mode 100644 index 1fd7230c..00000000 --- a/docs/content/feature/deepBlue.rst +++ /dev/null @@ -1,16 +0,0 @@ -Accessing datasets hosted on deepBlue -===================================== - -`deepBlue `__ is an epigenome dataset server hosting many ENCODE, ROADMAP, BLUEPRINT, and DEEP samples. These are often hosted as normalized signal tracks that can be used with `bigwigCompare`, `bigwigAverage`, `multiBigwigSummary`, and `computeMatrix`. As of version 2.4.0, the aforementioned tools can now access signal files hosted on deepBlue. To do so, simply specify the "experiment name" from deepBlue, such as: - -.. code:: bash - - $ bigwigCompare -b1 S002R5H1.ERX300721.H3K4me3.bwa.GRCh38.20150528.bedgraph -b2 S002R5H1.ERX337057.Input.bwa.GRCh38.20150528.bedgraph -p 10 -o bwCompare.bw - -The file names given to the aforementioned commands are in the "Name" column in deepBlue. Any file ending in ".wig", ".wiggle", ".bedgraph" or otherwise not present on the file system (and not beginning with "http" or "ftp") is assumed to be hosted on deepBlue. This means that for ENCODE samples, one can simply use the ENCODE ID (e.g., "ENCFF721EKA"). - -Internally, deepTools queries deepBlue and creates a temporary bigWig file including signal in all of the regions that deepTools will use. By default, these temporary files are deleted after the command finishes. This can be prevented by specifying `--deepBlueKeepTemp`. The directory to which the temporary files are written can be specified by `--deepBlueTempDir`. If you intend to use the same sample multiple times with the same basic command (e.g., computeMatrix with the same regions or bigwigCompare with different samples), then considerable time can be saved by keeping the temporary bigWig file and simply specifying it in subsequent runs (i.e., deepTools won't magically find the previous file, you need to specify it). - -Note that some datasets may be restricted access. In such cases, you can request an account and will receive a "user key". You can then provide that to `bigwigCompare`, `multiBigwigSummary`, or `computeMatrix` using the `--userKey` option. In the off-chance that you have access to other deepBlue servers aside from the main one (http://deepblue.mpi-inf.mpg.de/xmlrpc), you can specify that with the `--deepBlueURL` option. - -.. warning:: bigwigCompare can be incredibly slow due to essentially downloading entire samples. It's faster to simply download bigWig files from the original source. diff --git a/docs/source/deeptools.rst b/docs/source/deeptools.rst index e85e7c75..97d96899 100644 --- a/docs/source/deeptools.rst +++ b/docs/source/deeptools.rst @@ -44,14 +44,6 @@ deeptools.countReadsPerBin module :undoc-members: :show-inheritance: -deeptools.deepBlue ------------------- - -.. automodule:: deeptools.deepBlue - :members: - :undoc-members: - :show-inheritance: - deeptools.getFragmentAndReadSize module --------------------------------------- From eb4ea284de1a56eef64192edd158cd5793a248dc Mon Sep 17 00:00:00 2001 From: WardDeb Date: Fri, 1 Dec 2023 16:31:40 +0100 Subject: [PATCH 11/26] drop unused imports due to deepblue drop, flake fixes --- deeptools/bigwigAverage.py | 4 +--- deeptools/bigwigCompare.py | 5 +---- deeptools/computeMatrix.py | 15 ++++++--------- deeptools/multiBigwigSummary.py | 20 ++++++++++---------- deeptools/parserCommon.py | 1 + 5 files changed, 19 insertions(+), 26 deletions(-) diff --git a/deeptools/bigwigAverage.py b/deeptools/bigwigAverage.py index 4e2bb58f..9dd12acd 100644 --- a/deeptools/bigwigAverage.py +++ b/deeptools/bigwigAverage.py @@ -1,9 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import argparse # to parse command line arguments +import argparse import sys -import multiprocessing -import os import numpy as np from deeptools import parserCommon from deeptools import writeBedGraph_bam_and_bw diff --git a/deeptools/bigwigCompare.py b/deeptools/bigwigCompare.py index 9f0863d2..a4501d45 100644 --- a/deeptools/bigwigCompare.py +++ b/deeptools/bigwigCompare.py @@ -1,9 +1,6 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -import argparse # to parse command line arguments -import sys -import multiprocessing -import os +import argparse from deeptools import parserCommon from deeptools.getRatio import getRatio from deeptools import writeBedGraph_bam_and_bw diff --git a/deeptools/computeMatrix.py b/deeptools/computeMatrix.py index 4a52dd69..62a95657 100644 --- a/deeptools/computeMatrix.py +++ b/deeptools/computeMatrix.py @@ -3,9 +3,6 @@ import argparse import sys -import os -import multiprocessing - from deeptools.parserCommon import writableFile, numberOfProcessors from deeptools import parserCommon from deeptools import heatmapper @@ -47,11 +44,12 @@ def parse_arguments(args=None): subparsers.add_parser( 'scale-regions', formatter_class=argparse.ArgumentDefaultsHelpFormatter, - parents=[computeMatrixRequiredArgs(), - computeMatrixOutputArgs(), - computeMatrixOptArgs(case='scale-regions'), - parserCommon.gtf_options() - ], + parents=[ + computeMatrixRequiredArgs(), + computeMatrixOutputArgs(), + computeMatrixOptArgs(case='scale-regions'), + parserCommon.gtf_options() + ], help="In the scale-regions mode, all regions in the BED file are " "stretched or shrunken to the length (in bases) indicated by the user.", usage='An example usage is:\n computeMatrix scale-regions -S ' @@ -421,4 +419,3 @@ def main(args=None): if args.outFileSortedRegions: hm.save_BED(args.outFileSortedRegions) - diff --git a/deeptools/multiBigwigSummary.py b/deeptools/multiBigwigSummary.py index 5a6004c0..f7231921 100644 --- a/deeptools/multiBigwigSummary.py +++ b/deeptools/multiBigwigSummary.py @@ -5,7 +5,6 @@ import argparse import os.path import numpy as np -import multiprocessing from deeptools import parserCommon from deeptools.utilities import smartLabels import deeptools.getScorePerBigWigBin as score_bw @@ -54,10 +53,11 @@ def parse_arguments(args=None): subparsers.add_parser( 'bins', formatter_class=argparse.ArgumentDefaultsHelpFormatter, - parents=[multiBigwigSummaryArgs(case='bins'), - parent_parser, - parserCommon.gtf_options(suppress=True) - ], + parents=[ + multiBigwigSummaryArgs(case='bins'), + parent_parser, + parserCommon.gtf_options(suppress=True) + ], help="The average score is based on equally sized bins " "(10 kilobases by default), which consecutively cover the " "entire genome. The only exception is the last bin of a chromosome, which " @@ -73,10 +73,11 @@ def parse_arguments(args=None): subparsers.add_parser( 'BED-file', formatter_class=argparse.ArgumentDefaultsHelpFormatter, - parents=[multiBigwigSummaryArgs(case='BED-file'), - parent_parser, - parserCommon.gtf_options() - ], + parents=[ + multiBigwigSummaryArgs(case='BED-file'), + parent_parser, + parserCommon.gtf_options() + ], help="The user provides a BED file that contains all regions " "that should be considered for the analysis. A " "common use is to compare scores (e.g. ChIP-seq scores) between " @@ -278,4 +279,3 @@ def main(args=None): args.outRawCounts.write(fmt.format(*tuple(row))) """ f.close() - diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index 03000243..3022404c 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -861,6 +861,7 @@ def heatmapperOptionalArgs(mode=['heatmap', 'profile'][0]): action='store_true') return parser + def requiredLength(minL, maxL): """ This is an optional action that can be given to argparse.add_argument(..., nargs='+') From 611095ada7cccd279dd670d13df032961b895d20 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Fri, 1 Dec 2023 18:51:55 +0100 Subject: [PATCH 12/26] remove tight_layout deprecation, change default to constrained layout --- deeptools/plotHeatmap.py | 34 +++++++++++++++------------------- 1 file changed, 15 insertions(+), 19 deletions(-) diff --git a/deeptools/plotHeatmap.py b/deeptools/plotHeatmap.py index ad666998..a2149f82 100755 --- a/deeptools/plotHeatmap.py +++ b/deeptools/plotHeatmap.py @@ -62,7 +62,7 @@ def process_args(args=None): return args -def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGroup, colorbar_position): +def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGroup, colorbar_position, fig): """ prepare the plot layout as a grid having as many rows @@ -113,7 +113,7 @@ def prepare_layout(hm_matrix, heatmapsize, showSummaryPlot, showColorbar, perGro # numbers to heatmapheigt fractions height_ratio = np.concatenate([[sumplot_height, spacer_height], height_ratio]) - grids = gridspec.GridSpec(numrows, numcols, height_ratios=height_ratio, width_ratios=width_ratio) + grids = gridspec.GridSpec(numrows, numcols, height_ratios=height_ratio, width_ratios=width_ratio, figure=fig) return grids @@ -498,9 +498,6 @@ def plotMatrix(hm, outFileName, else: colorbar_position = 'side' - grids = prepare_layout(hm.matrix, (heatmapWidth, heatmapHeight), - showSummaryPlot, showColorbar, perGroup, colorbar_position) - # figsize: w,h tuple in inches figwidth = heatmapWidth / 2.54 figheight = heatmapHeight / 2.54 @@ -521,9 +518,19 @@ def plotMatrix(hm, outFileName, else: total_figwidth += 1 / 2.54 - fig = plt.figure(figsize=(total_figwidth, figheight)) + fig = plt.figure(figsize=(total_figwidth, figheight), constrained_layout=True) fig.suptitle(plotTitle, y=1 - (0.06 / figheight)) + grids = prepare_layout( + hm.matrix, + (heatmapWidth, heatmapHeight), + showSummaryPlot, + showColorbar, + perGroup, + colorbar_position, + fig + ) + # color map for the summary plot (profile) on top of the heatmap cmap_plot = plt.get_cmap('jet') numgroups = hm.matrix.get_num_groups() @@ -582,17 +589,6 @@ def plotMatrix(hm, outFileName, iterNum = hm.matrix.get_num_samples() iterNum2 = numgroups ax_list = addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, None, None, colorbar_position, label_rotation) - if len(yMin) > 1 or len(yMax) > 1: - # replot with a tight layout - import matplotlib.tight_layout as tl - specList = tl.get_subplotspec_list(fig.axes, grid_spec=grids) - renderer = tl.get_renderer(fig) - kwargs = tl.get_tight_layout_figure(fig, fig.axes, specList, renderer, pad=1.08) - - for ax in ax_list: - fig.delaxes(ax) - - ax_list = addProfilePlot(hm, plt, fig, grids, iterNum, iterNum2, perGroup, averageType, plot_type, yAxisLabel, color_list, yMin, yMax, kwargs['wspace'], kwargs['hspace'], colorbar_position, label_rotation) if legend_location != 'none': ax_list[-1].legend(loc=legend_location.replace('-', ' '), ncol=1, prop=fontP, @@ -764,10 +760,10 @@ def plotMatrix(hm, outFileName, fig.colorbar(img, cax=ax) if box_around_heatmaps: - plt.subplots_adjust(wspace=0.10, hspace=0.025, top=0.85, bottom=0, left=0.04, right=0.96) + fig.get_layout_engine().set(wspace=0.10, hspace=0.025, rect=(0.04, 0, 0.96, 0.85)) else: # When no box is plotted the space between heatmaps is reduced - plt.subplots_adjust(wspace=0.05, hspace=0.01, top=0.85, bottom=0, left=0.04, right=0.96) + fig.get_layout_engine().set(wspace=0.05, hspace=0.01, rect=(0.04, 0, 0.96, 0.85)) plt.savefig(outFileName, bbox_inches='tight', pad_inches=0.1, dpi=dpi, format=image_format) plt.close() From 1e3fa9f1e161a9e0ab47aa74601cd982f8f401f8 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Fri, 1 Dec 2023 19:00:37 +0100 Subject: [PATCH 13/26] include in changes --- CHANGES.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 7e81a92a..f9bff7ab 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -1,7 +1,8 @@ 3.5.5 * drop support for python 3.7 * doc fixes (argparse properly displayed, minor changes in installation instructions) -* deepblue support drop +* deepblue support stops +* initiate deprecation of tight_layout in plotheatmap, in favor of constrained_layout. Minor changes in paddings, etc can occur (but for the better). 3.5.4 * error handling and cases for bwAverage with >2 samples From c7049a0c6f23ce74fb36f27b0840257665d1f52e Mon Sep 17 00:00:00 2001 From: WardDeb Date: Tue, 9 Jan 2024 17:21:49 +0100 Subject: [PATCH 14/26] purge table layout forcing to keep scrolleable tables --- docs/_static/fix_tables.css | 7 ------- docs/source/_templates/layout.html | 2 +- 2 files changed, 1 insertion(+), 8 deletions(-) delete mode 100644 docs/_static/fix_tables.css diff --git a/docs/_static/fix_tables.css b/docs/_static/fix_tables.css deleted file mode 100644 index 189a7c2a..00000000 --- a/docs/_static/fix_tables.css +++ /dev/null @@ -1,7 +0,0 @@ -.wy-table-responsive table td { - white-space: normal !important; -} - -.wy-table-responsive { - overflow: visible !important; -} diff --git a/docs/source/_templates/layout.html b/docs/source/_templates/layout.html index f6fc75dd..32e73053 100644 --- a/docs/source/_templates/layout.html +++ b/docs/source/_templates/layout.html @@ -1,3 +1,3 @@ {% extends "!layout.html" %} {% set script_files = script_files + ["_static/welcome_owl.carousel.min.js"] %} -{% set css_files = css_files + ["_static/welcome_owl.carousel.css", "_static/welcome_owl.carousel.theme.css", "_static/fix_tables.css"] %} +{% set css_files = css_files + ["_static/welcome_owl.carousel.css", "_static/welcome_owl.carousel.theme.css"] %} From 23523e7c0e028800a99a7bab900812d31d5e389b Mon Sep 17 00:00:00 2001 From: WardDeb Date: Tue, 9 Jan 2024 17:22:10 +0100 Subject: [PATCH 15/26] docs rtd theme to 2.0.0 --- docs/requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/requirements.txt b/docs/requirements.txt index 56d41a58..f330fe4e 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,4 +1,4 @@ sphinx==7.2.6 mock==5.1.0 -sphinx_rtd_theme==1.3.0 +sphinx_rtd_theme==2.0.0 sphinx-argparse==0.4.0 \ No newline at end of file From 7cdab5604b0b1be23e91f0c691f3252946f0d358 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Tue, 9 Jan 2024 17:23:33 +0100 Subject: [PATCH 16/26] include ESS for T2T + GRCz11, recheck khmer generated ones --- docs/content/feature/effectiveGenomeSize.rst | 80 +++++++++++++------- 1 file changed, 53 insertions(+), 27 deletions(-) diff --git a/docs/content/feature/effectiveGenomeSize.rst b/docs/content/feature/effectiveGenomeSize.rst index 4cbbb2dd..e988b18e 100644 --- a/docs/content/feature/effectiveGenomeSize.rst +++ b/docs/content/feature/effectiveGenomeSize.rst @@ -6,30 +6,56 @@ A number of tools can accept an "effective genome size". This is defined as the 1. The number of non-N bases in the genome. 2. The number of regions (of some size) in the genome that are uniquely mappable (possibly given some maximal edit distance). -Option 1 can be computed using ``faCount`` from `Kent's tools `__. The effective genome size for a number of genomes using this method is given below: - -======== ============== -Genome Effective size -======== ============== -GRCh37 2864785220 -GRCh38 2913022398 -GRCm37 2620345972 -GRCm38 2652783500 -dm3 162367812 -dm6 142573017 -GRCz10 1369631918 -WBcel235 100286401 -TAIR10 119481543 -======== ============== - -These values only appropriate if multimapping reads are included. If they are excluded (or there's any MAPQ filter applied), then values derived from option 2 are more appropriate. These are then based on the read length. We can approximate these values for various read lengths using the `khmer program `__ program and ``unique-kmers.py`` in particular. A table of effective genome sizes given a read length using this method is provided below: - -=========== ========== ========== ========== ========== ========= ========= ========== ======== -Read length GRCh37 GRCh38 GRCm37 GRCm38 dm3 dm6 GRCz10 WBcel235 -=========== ========== ========== ========== ========== ========= ========= ========== ======== -50 2685511504 2701495761 2304947926 2308125349 130428560 125464728 1195445591 95159452 -75 2736124973 2747877777 2404646224 2407883318 135004462 127324632 1251132686 96945445 -100 2776919808 2805636331 2462481010 2467481108 139647232 129789873 1280189044 98259998 -150 2827437033 2862010578 2489384235 2494787188 144307808 129941135 1312207169 98721253 -200 2855464000 2887553303 2513019276 2520869189 148524010 132509163 1321355241 98672758 -=========== ========== ========== ========== ========== ========= ========= ========== ======== +Option 1 can be computed using ``faCount`` from `Kents tools `__. +The effective genome size for a number of genomes using this method is given below: + + ++---------------+------------------+ +| Genome | Effective size | ++===============+==================+ +|GRCh37 | 2864785220 | ++---------------+------------------+ +|GRCh38 | 2913022398 | ++---------------+------------------+ +|T2T/CHM13CAT_v2| 3117292070 | ++---------------+------------------+ +|GRCm37 | 2620345972 | ++---------------+------------------+ +|GRCm38 | 2652783500 | ++---------------+------------------+ +|dm3 | 162367812 | ++---------------+------------------+ +|dm6 | 142573017 | ++---------------+------------------+ +|GRCz10 | 1369631918 | ++---------------+------------------+ +|GRCz11 | 1368780147 | ++---------------+------------------+ +|WBcel235 | 100286401 | ++---------------+------------------+ +|TAIR10 | 119482012 | ++---------------+------------------+ + + + +These values only appropriate if multimapping reads are included. If they are excluded (or there's any MAPQ filter applied), +then values derived from option 2 are more appropriate. +These are then based on the read length. +We can approximate these values for various read lengths using the `khmer program `__ program and ``unique-kmers.py`` in particular. +A table of effective genome sizes given a read length using this method is provided below: + ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +|Read length | GRCh37 | GRCh38 | T2T/CHM13CAT_v2 | GRCm37 | GRCm38 | dm3 | dm6 | GRCz10 | GRCz11 | WBcel235 | TAIR10 | ++=================+=================+=================+=================+=================+=================+=================+=================+=================+=================+=================+=================+ +|50 | 2685511454 | 2701495711 | 2725240337 | 2304947876 | 2308125299 | 130428510 | 125464678 | 1195445541 | 1197575653 | 95159402 | 114339094 | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +|75 | 2736124898 | 2747877702 | 2786136059 | 2404646149 | 2407883243 | 135004387 | 127324557 | 1251132611 | 1250812288 | 96945370 | 115317469 | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +|100 | 2776919708 | 2805636231 | 2814334875 | 2462480910 | 2467481008 | 139647132 | 129789773 | 1280188944 | 1280354977 | 98259898 | 118459858 | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +|150 | 2827436883 | 2862010428 | 2931551487 | 2489384085 | 2494787038 | 144307658 | 129940985 | 1312207019 | 1311832909 | 98721103 | 118504138 | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +|200 | 2855463800 | 2887553103 | 2936403235 | 2513019076 | 2520868989 | 148523810 | 132508963 | 1321355041 | 1322366338 | 98672558 | 117723393 | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ +|250 | 2855044784 | 2898802627 | 2960856300 | 2528988583 | 2538590322 | 151901455 | 132900923 | 1339205109 | 1342093482 | 101271756 | 119585546 | ++-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+-----------------+ From 7b6a4339ab72e51c928cb920aee041d5b511977a Mon Sep 17 00:00:00 2001 From: WardDeb Date: Tue, 9 Jan 2024 17:25:33 +0100 Subject: [PATCH 17/26] include doc changes in CHANGES --- CHANGES.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.txt b/CHANGES.txt index f9bff7ab..21ba877e 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -3,6 +3,7 @@ * doc fixes (argparse properly displayed, minor changes in installation instructions) * deepblue support stops * initiate deprecation of tight_layout in plotheatmap, in favor of constrained_layout. Minor changes in paddings, etc can occur (but for the better). +* documentation changes to improve ESS tab, table constraints have been lifted & sphinx_rtd_theme to v2.0.0 3.5.4 * error handling and cases for bwAverage with >2 samples From 4c3545de934387d1a96cdd6d5fec5ad8f25ff101 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Tue, 9 Jan 2024 17:41:49 +0100 Subject: [PATCH 18/26] revert back to upload_artifact v3 for now --- .github/workflows/test.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 182885b1..9bc28697 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -63,9 +63,9 @@ jobs: micromamba activate test_and_build rm -f dist/* python -m build - - uses: actions/upload-artifact@master + - uses: actions/upload-artifact@v3 with: - name: "Dist files" + name: "distfiles" path: "dist" test-wheels: name: test wheel From 11343cb765168b417f6f8f7cb7873bf0eb2a3519 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Tue, 9 Jan 2024 17:46:02 +0100 Subject: [PATCH 19/26] namechange of artifact in test --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 9bc28697..07ffd664 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -78,7 +78,7 @@ jobs: - uses: actions/checkout@v3 - uses: actions/download-artifact@v3 with: - name: "Dist files" + name: "distfiles" path: ~/dist/ - uses: actions/setup-python@v4 with: From 9dd5c031457a2b4591f302f0b4639c88651a790a Mon Sep 17 00:00:00 2001 From: WardDeb Date: Tue, 9 Jan 2024 17:51:27 +0100 Subject: [PATCH 20/26] pin upload artifact --- CHANGES.txt | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGES.txt b/CHANGES.txt index 21ba877e..ea5b937c 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -4,6 +4,7 @@ * deepblue support stops * initiate deprecation of tight_layout in plotheatmap, in favor of constrained_layout. Minor changes in paddings, etc can occur (but for the better). * documentation changes to improve ESS tab, table constraints have been lifted & sphinx_rtd_theme to v2.0.0 +* upload artifact in gh test runner pinned to 3 3.5.4 * error handling and cases for bwAverage with >2 samples From 11d48b6412959c19812cd9a9dc89d7fa26d47e68 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Wed, 17 Jan 2024 11:09:30 +0100 Subject: [PATCH 21/26] sched_getaffinity inclusion for availProcessors --- CHANGES.txt | 1 + deeptools/parserCommon.py | 10 +++++++--- 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/CHANGES.txt b/CHANGES.txt index ea5b937c..28780ef0 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -5,6 +5,7 @@ * initiate deprecation of tight_layout in plotheatmap, in favor of constrained_layout. Minor changes in paddings, etc can occur (but for the better). * documentation changes to improve ESS tab, table constraints have been lifted & sphinx_rtd_theme to v2.0.0 * upload artifact in gh test runner pinned to 3 +* Try to get the number of processors from sched_getaffinity, to avoid using to many in job submissions for example. #1199 3.5.4 * error handling and cases for bwAverage with >2 samples diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index 3022404c..a4e7604b 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -1,7 +1,7 @@ import argparse import os from importlib.metadata import version - +import multiprocessing def check_float_0_1(value): v = float(value) @@ -341,8 +341,12 @@ def getParentArgParse(args=None, binSize=True, blackList=True): def numberOfProcessors(string): - import multiprocessing - availProc = multiprocessing.cpu_count() + try: + # won't work on macOS or windows + # limit threads to what is available (e.g. grid submissions, issue #1199) + availProc = len(os.sched_getaffinity(0)) + except AttributeError: + availProc = multiprocessing.cpu_count() if string == "max/2": # default case # by default half of the available processors are used From ae7aed430bab53ed948bb5002a65c33c3c8b1879 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Wed, 17 Jan 2024 11:16:22 +0100 Subject: [PATCH 22/26] flake parser --- deeptools/parserCommon.py | 1 + 1 file changed, 1 insertion(+) diff --git a/deeptools/parserCommon.py b/deeptools/parserCommon.py index a4e7604b..9849d9c4 100755 --- a/deeptools/parserCommon.py +++ b/deeptools/parserCommon.py @@ -3,6 +3,7 @@ from importlib.metadata import version import multiprocessing + def check_float_0_1(value): v = float(value) if v < 0.0 or v > 1.0: From 8fc7c7921ffffa59c2e5342d79f18463feb0fcc2 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Wed, 17 Jan 2024 11:16:42 +0100 Subject: [PATCH 23/26] include parser fix scalefacestimation --- CHANGES.txt | 1 + deeptools/estimateScaleFactor.py | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/CHANGES.txt b/CHANGES.txt index 28780ef0..5f0bf0f0 100755 --- a/CHANGES.txt +++ b/CHANGES.txt @@ -6,6 +6,7 @@ * documentation changes to improve ESS tab, table constraints have been lifted & sphinx_rtd_theme to v2.0.0 * upload artifact in gh test runner pinned to 3 * Try to get the number of processors from sched_getaffinity, to avoid using to many in job submissions for example. #1199 +* Fix typo in estimateScaleFactor that fixes broken argparsing. #1286 3.5.4 * error handling and cases for bwAverage with >2 samples diff --git a/deeptools/estimateScaleFactor.py b/deeptools/estimateScaleFactor.py index 549ecf78..97869a7b 100644 --- a/deeptools/estimateScaleFactor.py +++ b/deeptools/estimateScaleFactor.py @@ -98,7 +98,7 @@ def main(args=None): between to samples """ - args = parseArguments().parse_args(args) + args = parseArguments(args) if len(args.bamfiles) > 2: print("SES method to estimate scale factors only works for two samples") exit(0) From 7c148159720036640cc1bf7a7e873b1a8fd1228a Mon Sep 17 00:00:00 2001 From: WardDeb Date: Fri, 8 Mar 2024 16:06:19 +0100 Subject: [PATCH 24/26] omit travis tests in contributing doc --- .github/CONTRIBUTING.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/CONTRIBUTING.md b/.github/CONTRIBUTING.md index 1a1fbf18..de8e31b1 100644 --- a/.github/CONTRIBUTING.md +++ b/.github/CONTRIBUTING.md @@ -17,4 +17,4 @@ for contributing to the repository :** ## Testing -* Please make sure that travis tests are passing +* Please make sure that github actions are passing From cbf623b4b2b4287df4a8b36eb6a01f9af9f21075 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Fri, 8 Mar 2024 16:06:38 +0100 Subject: [PATCH 25/26] drop python 3.7 as lowest version in test env --- .github/test_and_build.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/test_and_build.yml b/.github/test_and_build.yml index e2dcd08b..0d1c77b2 100644 --- a/.github/test_and_build.yml +++ b/.github/test_and_build.yml @@ -2,7 +2,7 @@ channels: - conda-forge - bioconda dependencies: - - python >= 3.7 + - python > 3.7 - numpy - scipy - flake8 From c0bac6d88b47feac47a68d5a53f42948b8447263 Mon Sep 17 00:00:00 2001 From: WardDeb Date: Fri, 8 Mar 2024 16:06:52 +0100 Subject: [PATCH 26/26] planemo test 3.8 minimal version --- .github/workflows/planemo.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/planemo.yml b/.github/workflows/planemo.yml index bc7e4b0b..067a303c 100644 --- a/.github/workflows/planemo.yml +++ b/.github/workflows/planemo.yml @@ -49,7 +49,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python-version: ['3.7', '3.11'] + python-version: ['3.8', '3.11'] steps: - uses: actions/download-artifact@v3 with: