-
Notifications
You must be signed in to change notification settings - Fork 6
/
sam_pileup.xml
213 lines (188 loc) · 8.72 KB
/
sam_pileup.xml
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
<tool id="sam_pileup" name="Generate pileup" version="1.1.2">
<description>from BAM dataset</description>
<!-- requirements>
<requirement type="package" version="0.1.16">samtools</requirement>
</requirements -->
<requirements>
<container type="docker">sam_pileup</container>
</requirements>
<command interpreter="python">
sam_pileup.py
--input1=$input1
--output1=$output1
--ref=$refOrHistory.reference
#if $refOrHistory.reference == "history":
--ownFile=$refOrHistory.ownFile
#else:
--index=${refOrHistory.index.fields.path}
#end if
--bamIndex=${input1.metadata.bam_index}
--lastCol=$lastCol
--indels=$indels
--nobaq=$nobaq
--consensus=$c.consensus
#if $c.consensus == "yes":
--theta=$c.theta
--hapNum=$c.hapNum
--fraction=$c.fraction
--phredProb=$c.phredProb
#else:
--theta="None"
--hapNum="None"
--fraction="None"
--phredProb="None"
#end if
#if $mapq.filter == "yes":
--mapqMin=${mapq.min}
#end if
</command>
<inputs>
<conditional name="refOrHistory">
<param name="reference" type="select" label="Will you select a reference genome from your history or use a built-in index?">
<option value="indexed">Use a built-in index</option>
<option value="history">Use one from the history</option>
</param>
<!-- when value="indexed">
<param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for">
<validator type="unspecified_build" />
<validator type="dataset_metadata_in_data_table" table_name="fasta_indexes" metadata_name="dbkey" metadata_column="1" message="Sequences are not currently available for the specified build." />
</param>
<param name="index" type="select" label="Using reference genome">
<options from_data_table="fasta_indexes">
<filter type="data_meta" ref="input1" key="dbkey" column="1" />
<validator type="no_options" message="No reference genome is available for the build associated with the selected input dataset" />
</options>
</param>
</when -->
<when value="history">
<param name="input1" type="data" format="bam" label="Select the BAM file to generate the pileup file for" />
<param name="ownFile" type="data" format="fasta" metadata_name="dbkey" label="Select a reference genome" />
</when>
</conditional>
<param name="lastCol" type="select" label="Whether or not to print the mapping quality as the last column" help="Makes the output easier to parse, but is space inefficient">
<option value="no">Do not print the mapping quality as the last column</option>
<option value="yes">Print the mapping quality as the last column</option>
</param>
<param name="indels" type="select" label="Whether or not to print only output pileup lines containing indels">
<option value="no">Print all lines</option>
<option value="yes">Print only lines containing indels</option>
</param>
<param name="nobaq" type="select" label="BAQ computation">
<option value="no">Enabled</option>
<option value="yes">Disabled</option>
</param>
<conditional name="mapq">
<param name="filter" type="select" label="Filter by MAPQ?">
<option selected="true" value="no">No</option>
<option value="yes">Yes</option>
</param>
<when value="no" />
<when value="yes">
<param name="min" type="integer" value="1" label="Min MAPQ" />
</when>
</conditional>
<conditional name="c">
<param name="consensus" type="select" label="Call consensus according to MAQ model?">
<option selected="true" value="no">No</option>
<option value="yes">Yes</option>
</param>
<when value="no" />
<when value="yes">
<param name="theta" type="float" value="0.85" label="Theta parameter (error dependency coefficient) in the MAQ consensus calling model" />
<param name="hapNum" type="integer" value="2" label="Number of haplotypes in the sample" help="Greater than or equal to 2" />
<param name="fraction" type="float" value="0.001" label="Expected fraction of differences between a pair of haplotypes" />
<param name="phredProb" type="integer" value="40" label="Phred probability of an indel in sequencing/prep" />
</when>
</conditional>
</inputs>
<outputs>
<data format="pileup" name="output1" label="${tool.name} on ${on_string}: converted pileup" />
</outputs>
<tests>
<test>
<!--
Bam to pileup command:
samtools faidx chr_m.fasta
samtools pileup -M 60 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out1.pileup
chr_m.fasta is the prefix of the index
-->
<param name="reference" value="history" />
<param name="input1" value="sam_pileup_in1.bam" ftype="bam" />
<param name="ownFile" value="chr_m.fasta" ftype="fasta" dbkey="equCab2" />
<param name="lastCol" value="no" />
<param name="indels" value="no" />
<param name="mapCap" value="60" />
<param name="consensus" value="no" />
<output name="output1" file="sam_pileup_out1.pileup" />
</test>
<test>
<!--
Bam to pileup command:
samtools pileup -M 60 -c -T 0.85 -N 2 -r 0.001 -I 40 -f chr_m.fasta test-data/sam_pileup_in1.bam > sam_pileup_out2.pileup
chr_m.fasta is the prefix of the index
-->
<param name="reference" value="indexed" />
<param name="input1" value="sam_pileup_in1.bam" ftype="bam" dbkey="equCab2" />
<param name="index" value="chr_m" />
<param name="lastCol" value="no" />
<param name="indels" value="no" />
<param name="mapCap" value="60" />
<param name="consensus" value="yes" />
<param name="theta" value="0.85" />
<param name="hapNum" value="2" />
<param name="fraction" value="0.001" />
<param name="phredProb" value="40" />
<output name="output1" file="sam_pileup_out2.pileup" />
</test>
</tests>
<help>
**What it does**
Uses SAMTools_' pileup command to produce a pileup dataset from a provided BAM dataset. It generates two types of pileup datasets depending on the specified options. If *Call consensus according to MAQ model?* option is set to **No**, the tool produces simple pileup. If the option is set to **Yes**, a ten column pileup dataset with consensus is generated. Both types of datasets are briefly summarized below.
.. _SAMTools: http://samtools.sourceforge.net/samtools.shtml
------
**Types of pileup datasets**
The description of pileup format below is largely based on information that can be found on SAMTools Pileup_ documentation page. The 6- and 10-column variants are described below.
.. _Pileup: http://samtools.sourceforge.net/pileup.shtml
**Six column pileup**::
1 2 3 4 5 6
---------------------------------
chrM 412 A 2 ., II
chrM 413 G 4 ..t, IIIH
chrM 414 C 4 ...a III2
chrM 415 C 4 TTTt III7
where::
Column Definition
------- ----------------------------
1 Chromosome
2 Position (1-based)
3 Reference base at that position
4 Coverage (# reads aligning over that position)
5 Bases within reads where (see Galaxy wiki for more info)
6 Quality values (phred33 scale, see Galaxy wiki for more)
**Ten column pileup**
The `ten-column` (consensus_) pileup incorporates additional consensus information generated with *-c* option of *samtools pileup* command::
1 2 3 4 5 6 7 8 9 10
------------------------------------------------
chrM 412 A A 75 0 25 2 ., II
chrM 413 G G 72 0 25 4 ..t, IIIH
chrM 414 C C 75 0 25 4 ...a III2
chrM 415 C T 75 75 25 4 TTTt III7
where::
Column Definition
------- --------------------------------------------------------
1 Chromosome
2 Position (1-based)
3 Reference base at that position
4 Consensus bases
5 Consensus quality
6 SNP quality
7 Maximum mapping quality
8 Coverage (# reads aligning over that position)
9 Bases within reads where (see Galaxy wiki for more info)
10 Quality values (phred33 scale, see Galaxy wiki for more)
.. _consensus: http://samtools.sourceforge.net/cns0.shtml
------
**Citation**
For the underlying tool, please cite `Li H, Handsaker B, Wysoker A, Fennell T, Ruan J, Homer N, Marth G, Abecasis G, Durbin R; 1000 Genome Project Data Processing Subgroup. The Sequence Alignment/Map format and SAMtools. Bioinformatics. 2009 Aug 15;25(16):2078-9. <http://www.ncbi.nlm.nih.gov/pubmed/19505943>`_
</help>
</tool>