-
Notifications
You must be signed in to change notification settings - Fork 0
/
TwoBitNucleotideConverter.java
66 lines (56 loc) · 2.26 KB
/
TwoBitNucleotideConverter.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
package edu.rit.flick.genetics;
import static edu.rit.flick.genetics.nucleotide.Nucleotide.A;
import static edu.rit.flick.genetics.nucleotide.Nucleotide.C;
import static edu.rit.flick.genetics.nucleotide.Nucleotide.G;
import static edu.rit.flick.genetics.nucleotide.Nucleotide.T;
import com.google.common.collect.BiMap;
import com.google.common.collect.ImmutableBiMap;
public class TwoBitNucleotideConverter
{
private final BiMap<Byte, Byte> nucleotideMap;
{
nucleotideMap = ImmutableBiMap.<Byte, Byte> builder().put( A, (byte) 0b00 )
.put( C, (byte) 0b01 ).put( G, (byte) 0b10 ).put( T, (byte) 0b11 ).build();
}
public byte convert( final String tetramer )
{
if ( tetramer.length() != 4 )
throw new IllegalArgumentException(
String.format( "%s is not a tetramer.", tetramer ) );
byte twoBitTetramer = 0b0000_0000;
for ( int base = 0; base < tetramer.length(); base++ )
{
twoBitTetramer += nucleotideMap.get( (byte) tetramer.charAt( base ) );
if ( base + 1 < tetramer.length() )
twoBitTetramer <<= 2;
}
return twoBitTetramer;
}
public String convert( final byte twoBitTetramer )
{
byte [] tetramer = new byte [4];
short mask = 0b1100_0000;
for ( int base = tetramer.length - 1; base >= 0; base-- )
{
tetramer[tetramer.length - 1 - base] = nucleotideMap.inverse()
.get( (byte) ( ( twoBitTetramer & mask ) >> ( base * 2 ) ) );
mask >>= 2;
}
return new String( tetramer );
}
public static void main( final String [] args )
{
final TwoBitNucleotideConverter tbnc = new TwoBitNucleotideConverter();
System.out.println( tbnc.nucleotideMap );
final String [] tests = new String [] { "GTCA", "ACGT", "CGTC", "AAAA" };
for ( final String test : tests )
{
byte tet = tbnc.convert( test );
String back = tbnc.convert( tet );
System.out.printf( " %s -> (%03d) %s -> %s\n", test, tet,
String.format( "%8s", Integer.toBinaryString( Byte.toUnsignedInt( tet ) ) )
.replace( ' ', '0' ),
back );
}
}
}