Skip to content

Commit

Permalink
Bigwig average (#1169)
Browse files Browse the repository at this point in the history
* Fixes #1159

* Revert "dendogram of plotCorrelation now matches each cell correctly"

* add bigwigAverage

* linting

* remove forgotten prints

* make bigwigAverage executable

* fix galaxy tests

Co-authored-by: A.s <62971995+adRn-s@users.noreply.github.com>
Co-authored-by: LeilyR <25766687+LeilyR@users.noreply.github.com>
  • Loading branch information
3 people authored Jan 5, 2023
1 parent 4c0c4fa commit 8d9be32
Show file tree
Hide file tree
Showing 38 changed files with 509 additions and 196 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -50,3 +50,6 @@ _sources
#os X
.DS_Store
._.DS_Store

# Planemo
tool_test*
1 change: 1 addition & 0 deletions .planemo.sh
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ if [[ $1 == "1" ]] ; then
galaxy/wrapper/bamCoverage.xml \
galaxy/wrapper/bamPEFragmentSize.xml \
galaxy/wrapper/bigwigCompare.xml \
galaxy/wrapper/bigwigAverage.xml \
galaxy/wrapper/computeGCBias.xml"
elif [[ $1 == "2" ]] ; then
wrappers="galaxy/wrapper/computeMatrix.xml \
Expand Down
3 changes: 3 additions & 0 deletions CHANGES.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
3.6.0
* Add bigwigAverage

3.5.1
* cmp usage is updated to fit the recent mpl updates.
* The requirements.txt is updated.
Expand Down
12 changes: 12 additions & 0 deletions bin/bigwigAverage
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
#!/usr/bin/env python
#-*- coding: utf-8 -*-

import deeptools.misc
from deeptools.bigwigAverage import main
import sys

if __name__ == "__main__":
args = None
if len(sys.argv) == 1:
args = ["--help"]
main(args)
2 changes: 1 addition & 1 deletion bin/estimateScaleFactor
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,7 @@ def parseArguments(args=None):
args.ignoreForNormalization=[x.strip() for x in args.ignoreForNormalization.split(',')]
else:
args.ignoreForNormalization = []
return(args)
return args

def main(args):
"""
Expand Down
2 changes: 1 addition & 1 deletion deeptools/SES_scaleFactor.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,7 @@ def estimateScaleFactor(bamFilesList, binLength, numberOfSamples,
# Take a lower rank to move to a region with probably
# less peaks and more background.
maxIndex = int(maxIndex * 0.8)
while(maxIndex < len(p)):
while maxIndex < len(p):
# in rare cases the maxIndex maps to a zero value.
# In such cases, the next index is used until
# a non zero value appears.
Expand Down
2 changes: 1 addition & 1 deletion deeptools/bamHandler.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ def openBam(bamFile, returnStats=False, nThreads=1, minimalDecoding=True):
sys.exit("The file '{}' does not have BAM or CRAM format ".format(bamFile))

try:
assert(bam.check_index() is not False)
assert bam.check_index() is not False
except:
sys.exit("'{}' does not appear to have an index. You MUST index the file first!".format(bamFile))

Expand Down
160 changes: 160 additions & 0 deletions deeptools/bigwigAverage.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,160 @@
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse # to parse command line arguments
import sys
import multiprocessing
import os
import numpy as np
from deeptools import parserCommon
from deeptools import writeBedGraph_bam_and_bw
import deeptools.deepBlue as db

debug = 0


def parse_arguments(args=None):
parentParser = parserCommon.getParentArgParse()
outputParser = parserCommon.output()
dbParser = parserCommon.deepBlueOptionalArgs()
parser = argparse.ArgumentParser(
parents=[parentParser, outputParser, dbParser],
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
description='This tool average multiple bigWig files based on the number '
'of mapped reads. To average the bigWig files, the genome is '
'partitioned into bins of equal size, then the scores '
'in each bigwig file are computed per bin.'
'These scores are averaged and scaleFactors can be applied before the average.')

# define the arguments
parser.add_argument('--bigwigs', '-b',
metavar='Bigwig files',
help='Bigwig files separated by space.',
nargs='+',
required=True)

parser.add_argument('--scaleFactors',
help='Set this parameter to multipy the bigwig values '
'by a constant. The format is '
'scaleFactor1:scaleFactor2:scaleFactor3 etc. '
'For example 0.7:1 to scale the first bigwig file '
'by 0.7 while not scaling the second bigwig file',
default=None,
required=False)

parser.add_argument('--skipNonCoveredRegions', '--skipNAs',
help='This parameter determines if non-covered regions (regions without a score) '
'in the bigWig files should be skipped. The default is to treat those '
'regions as having a value of zero. '
'The decision to skip non-covered regions '
'depends on the interpretation of the data. Non-covered regions '
'in a bigWig file may represent repetitive regions that should '
'be skipped. Alternatively, the interpretation of non-covered regions as '
'zeros may be wrong and this option should be used ',
action='store_true')

return parser


def getType(fname):
"""
Tries to determine if a file is a wiggle file from deepBlue or a bigWig file.
Returns 'wiggle' if the file name ends with .wig, otherwise 'bigwig'
"""
if fname.endswith(".wig") or fname.endswith(".wiggle"):
return "wiggle"
elif fname.lower().endswith(".bedgraph") or fname.endswith(".bdg"):
return "bedgraph"
else:
return "bigwig"


def average(tileCoverage, args):
r"""
The mapreduce method calls this function
for each tile. The parameters (args) are fixed
in the main method.
>>> funcArgs= {'scaleFactors': (1,1)}
>>> average([1, 2], funcArgs)
1.5
>>> funcArgs= {'scaleFactors': (1,0.5)}
>>> average([1, 2], funcArgs)
1.0
>>> funcArgs= {'scaleFactors': (1,0.5,0.1,0.2)}
>>> average([1, 2, 3, 12], funcArgs)
1.175
>>> average([1, 2, 3, np.nan], funcArgs)
nan
"""

norm_values = [args['scaleFactors'][i] * cov for i, cov in enumerate(tileCoverage)]

return np.mean(norm_values)


def main(args=None):
args = parse_arguments().parse_args(args)

nFiles = len(args.bigwigs)

if args.scaleFactors:
scaleFactors = [float(x) for x in args.scaleFactors.split(":")]
if len(scaleFactors) == 1:
scaleFactors = scaleFactors * nFiles
elif len(scaleFactors) != nFiles:
raise argparse.ArgumentTypeError(
"Format of scaleFactors is factor or factor1:factor2... as many as bigwig files. "
"There are {} bigwigs and {} factors."
"The value given ( {} ) is not valid".format(nFiles, len(scaleFactors), args.scaleFactors))
else:
scaleFactors = [1] * nFiles

# the average function is called and receives
# the function_args per each tile that is considered
FUNC = average
function_args = {'scaleFactors': scaleFactors}

# Preload deepBlue files, which need to then be deleted
deepBlueFiles = []
for idx, fname in enumerate(args.bigwigs):
if db.isDeepBlue(fname):
deepBlueFiles.append([fname, idx])
if len(deepBlueFiles) > 0:
sys.stderr.write("Preloading the following deepBlue files: {}\n".format(",".join([x[0] for x in deepBlueFiles])))
foo = db.deepBlue(deepBlueFiles[0][0], url=args.deepBlueURL, userKey=args.userKey)
regs = db.makeChromTiles(foo)
for x in deepBlueFiles:
x.extend([args, regs])
if len(deepBlueFiles) > 1 and args.numberOfProcessors > 1:
pool = multiprocessing.Pool(args.numberOfProcessors)
res = pool.map_async(db.preloadWrapper, deepBlueFiles).get(9999999)
else:
res = list(map(db.preloadWrapper, deepBlueFiles))

# substitute the file names with the temp files
for (ftuple, r) in zip(deepBlueFiles, res):
args.bigwigs[ftuple[1]] = r
deepBlueFiles = [[x[0], x[1]] for x in deepBlueFiles]
del regs

writeBedGraph_bam_and_bw.writeBedGraph(
[(b, getType(b)) for b in args.bigwigs],
args.outFileName, 0, FUNC,
function_args, tileSize=args.binSize, region=args.region,
blackListFileName=args.blackListFileName,
verbose=args.verbose,
numberOfProcessors=args.numberOfProcessors,
skipZeroOverZero=False,
format=args.outFileFormat,
smoothLength=False,
missingDataAsZero=not args.skipNonCoveredRegions,
extendPairedEnds=False)

# Clean up temporary bigWig files, if applicable
if not args.deepBlueKeepTemp:
for k, v in deepBlueFiles:
os.remove(args.bigwigs[v])
else:
for k, v in deepBlueFiles:
foo = args.bigwigs[v]
print("{} is stored in {}".format(k, foo))
2 changes: 1 addition & 1 deletion deeptools/computeGCBias.py
Original file line number Diff line number Diff line change
Expand Up @@ -390,7 +390,7 @@ def tabulateGCcontent_worker(chromNameBam, start, end, stepSize,
print("%s total time %.1f @ %s:%s-%s %s" % (multiprocessing.current_process().name,
(endTime - startTime), chromNameBit, start, end, stepSize))

return(subN_gc, subF_gc)
return subN_gc, subF_gc


def tabulateGCcontent(fragmentLength, chrNameBitToBam, stepSize,
Expand Down
2 changes: 1 addition & 1 deletion deeptools/computeMatrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ def process_args(args=None):
"set to 0. Nothing to output. Maybe you want to "
"use the scale-regions mode?\n")

return(args)
return args


def main(args=None):
Expand Down
2 changes: 1 addition & 1 deletion deeptools/correctGCBias.py
Original file line number Diff line number Diff line change
Expand Up @@ -363,7 +363,7 @@ def writeCorrectedSam_worker(chrNameBam, chrNameBit, start, end,
try:
copies = matePairs[read.qname]['copies']
gc = matePairs[read.qname]['gc']
del(matePairs[read.qname])
del matePairs[read.qname]
except:
# this exception happens when a mate is
# not present. This could
Expand Down
8 changes: 4 additions & 4 deletions deeptools/countReadsPerBin.py
Original file line number Diff line number Diff line change
Expand Up @@ -860,9 +860,9 @@ def get_fragment_from_read(self, read):
>>> c = CountReadsPerBin([], 1, 1, 200, extendReads=True, center_read=True)
>>> c.defaultFragmentLength = 100
>>> assert(c.get_fragment_from_read(test.getRead("paired-forward")) == [(5000032, 5000068)])
>>> assert c.get_fragment_from_read(test.getRead("paired-forward")) == [(5000032, 5000068)]
>>> c.defaultFragmentLength = 200
>>> assert(c.get_fragment_from_read(test.getRead("single-reverse")) == [(5001618, 5001654)])
>>> assert c.get_fragment_from_read(test.getRead("single-reverse")) == [(5001618, 5001654)]
"""
# if no extension is needed, use pysam get_blocks
# to identify start and end reference positions.
Expand Down Expand Up @@ -977,10 +977,10 @@ def estimateSizeFactors(m):
>>> m = np.array([[0, 1, 2], [3, 4, 5], [6, 7, 8], [0, 10, 0], [10, 5, 100]])
>>> sf = estimateSizeFactors(m)
>>> assert(np.all(np.abs(sf - [1.305, 0.9932, 0.783]) < 1e-4))
>>> assert np.all(np.abs(sf - [1.305, 0.9932, 0.783]) < 1e-4)
>>> m = np.array([[0, 0], [0, 1], [1, 1], [1, 2]])
>>> sf = estimateSizeFactors(m)
>>> assert(np.all(np.abs(sf - [1.1892, 0.8409]) < 1e-4))
>>> assert np.all(np.abs(sf - [1.1892, 0.8409]) < 1e-4)
"""
loggeomeans = np.sum(np.log(m), axis=1) / m.shape[1]
# Mask after computing the geometric mean
Expand Down
2 changes: 1 addition & 1 deletion deeptools/deepBlue.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,7 +122,7 @@ def __init__(self, sample, url="http://deepblue.mpi-inf.mpg.de/xmlrpc", userKey=
>>> sample = "S002R5H1.ERX300721.H3K4me3.bwa.GRCh38.20150528.bedgraph"
>>> db = deepBlue(sample) # doctest: +SKIP
>>> assert(db.chroms("chr1") == 248956422) # doctest: +SKIP
>>> assert db.chroms("chr1") == 248956422 # doctest: +SKIP
"""
self.sample = sample
self.url = url
Expand Down
1 change: 1 addition & 0 deletions deeptools/deeptools_list_tools.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@ def parse_arguments(args=None):
bamCoverage computes read coverage per bins or regions
bamCompare computes log2 ratio and other operations of read coverage of two samples per bins or regions
bigwigCompare computes log2 ratio and other operations from bigwig scores of two samples per bins or regions
bigwigAverage computes average from bigwig scores of multiple samples per bins or regions
computeMatrix prepares the data from bigwig scores for plotting with plotHeatmap or plotProfile
alignmentSieve filters BAM alignments according to specified parameters, optionally producing a BEDPE file
Expand Down
2 changes: 1 addition & 1 deletion deeptools/getScorePerBigWigBin.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,7 +144,7 @@ def getChromSizes(bigwigFilesList):
>>> test = Tester()
Chromosome name(s) and size(s).
>>> assert(getChromSizes([test.bwFile1, test.bwFile2]) == ([('3R', 200)], set([])))
>>> assert getChromSizes([test.bwFile1, test.bwFile2]) == ([('3R', 200)], set([]))
"""
def print_chr_names_and_size(chr_set):
sys.stderr.write("chromosome\tlength\n")
Expand Down
4 changes: 2 additions & 2 deletions deeptools/plotProfile.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,11 +55,11 @@ def process_args(args=None):

# Ensure that yMin/yMax are there and a list
try:
assert(args.yMin is not None)
assert args.yMin is not None
except:
args.yMin = [None]
try:
assert(args.yMax is not None)
assert args.yMax is not None
except:
args.yMax = [None]

Expand Down
8 changes: 4 additions & 4 deletions deeptools/test/test_bamCoverage_and_bamCompare.py
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ def test_bam_coverage_offset1():
filecmp.clear_cache()
except:
pass
assert(filecmp.cmp(outfile, "{}testA_offset1.bw".format(ROOT)) is True)
assert filecmp.cmp(outfile, "{}testA_offset1.bw".format(ROOT)) is True
unlink(outfile)


Expand All @@ -394,7 +394,7 @@ def test_bam_coverage_offset1_10():
filecmp.clear_cache()
except:
pass
assert(filecmp.cmp(outfile, "{}testA_offset1_10.bw".format(ROOT)) is True)
assert filecmp.cmp(outfile, "{}testA_offset1_10.bw".format(ROOT)) is True
unlink(outfile)


Expand All @@ -412,7 +412,7 @@ def test_bam_coverage_offset_minus1():
filecmp.clear_cache()
except:
pass
assert(filecmp.cmp(outfile, "{}testA_offset-1.bw".format(ROOT)) is True)
assert filecmp.cmp(outfile, "{}testA_offset-1.bw".format(ROOT)) is True
unlink(outfile)


Expand All @@ -430,7 +430,7 @@ def test_bam_coverage_offset20_minus4():
filecmp.clear_cache()
except:
pass
assert(filecmp.cmp(outfile, "{}testA_offset20_-4.bw".format(ROOT)) is True)
assert filecmp.cmp(outfile, "{}testA_offset20_-4.bw".format(ROOT)) is True
unlink(outfile)


Expand Down
Loading

0 comments on commit 8d9be32

Please sign in to comment.