Skip to content

Commit

Permalink
Merge pull request #60 from marbl/v3.1.0
Browse files Browse the repository at this point in the history
V3.1.0
  • Loading branch information
bkille committed Aug 21, 2023
2 parents 4f4df5d + 8fb01ab commit 3ce0cd3
Show file tree
Hide file tree
Showing 15 changed files with 437 additions and 182 deletions.
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
name: Build
name: Build and test mapping coverage

on:
push:
Expand All @@ -12,7 +12,7 @@ on:

env:
# Customize the CMake build type here (Release, Debug, RelWithDebInfo, etc.)
BUILD_TYPE: RelWithDebInfo
BUILD_TYPE: DEBUG

jobs:
linux:
Expand All @@ -33,36 +33,43 @@ jobs:
uses: actions/checkout@v2
with:
submodules: 'true'
- name: Install requirements
run: |
- name: Setup apt
run: |
sudo apt install build-essential manpages-dev software-properties-common
sudo add-apt-repository ppa:ubuntu-toolchain-r/test
sudo apt update && sudo apt install ${{ matrix.compiler.CC }} ${{ matrix.compiler.CXX }}
sudo apt install libhts-dev
sudo apt install libjemalloc-dev
cmake --version

- name: Configure CMake
env:
CC: ${{ matrix.compiler.CC }}
CXX: ${{ matrix.compiler.CXX }}
# Configure CMake in a 'build' subdirectory. `CMAKE_BUILD_TYPE` is only required if you are using a single-configuration generator such as make.
# See https://cmake.org/cmake/help/latest/variable/CMAKE_BUILD_TYPE.html?highlight=cmake_build_type
run: cmake ./

sudo apt update && sudo apt install
- name: Install Requirements
run: sudo apt install -y
${{ matrix.compiler.CC }}
${{ matrix.compiler.CXX }}
libhts-dev
libjemalloc-dev
libgsl-dev
zlib1g-dev
libhts-dev
samtools
bedtools

- name: Build
env:
CC: ${{ matrix.compiler.CC }}
CXX: ${{ matrix.compiler.CXX }}
# Build your program with the given configuration
run: cmake -H. -Bbuild -DCMAKE_BUILD_TYPE=Release && cmake --build build --
run: cmake -H. -Bbuild -DCMAKE_BUILD_TYPE=${BUILD_TYPE} && cmake --build build --

- name: Run
- name: Basic version test
env:
CC: ${{ matrix.compiler.CC }}
CXX: ${{ matrix.compiler.CXX }}
# Build your program with the given configuration
run: build/bin/mashmap -v

- name: Align yeast genomes
run: ASAN_OPTIONS=detect_leaks=1:symbolize=1 LSAN_OPTIONS=verbosity=0:log_threads=1 build/bin/mashmap
-r data/scerevisiae8.fa.gz
-q data/scerevisiae8.fa.gz
--pi 95 -n 1 -Y '#'
--threads $(nproc --all)
-o scerevisiae8.paf;
- name: Test yeast coverage
run: scripts/test.sh data/scerevisiae8.fa.gz.fai scerevisiae8.paf 0.92

6 changes: 6 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,10 @@ endif()
message(STATUS "CMAKE_BUILD_TYPE: ${CMAKE_BUILD_TYPE}")

option(OPTIMIZE_FOR_NATIVE "Build with -march=native" ON)
option(LARGE_CONTIG "Use 64-bit integers instead of 32 bit for sequence coordinates" OFF)
if (LARGE_CONTIG)
add_definitions(-DLARGE_CONTIG)
endif()
option(PROFILE "Prevent inlining and add debug symbols" OFF)

if (${CMAKE_BUILD_TYPE} MATCHES Release)
Expand Down Expand Up @@ -78,6 +82,7 @@ target_link_libraries(mashmap
gslcblas
m
pthread
hts
#rt
z
#assert
Expand All @@ -88,6 +93,7 @@ target_link_libraries(mashmap-align
gslcblas
m
pthread
hts
#rt
z
#assert
Expand Down
Binary file added data/scerevisiae8.fa.gz
Binary file not shown.
136 changes: 136 additions & 0 deletions data/scerevisiae8.fa.gz.fai
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
SGDref#1#chrI 230218 15 80 81
SGDref#1#chrII 813184 233127 80 81
SGDref#1#chrIII 316620 1056493 80 81
SGDref#1#chrIV 1531933 1377087 80 81
SGDref#1#chrV 576874 2928185 80 81
SGDref#1#chrVI 270161 3512286 80 81
SGDref#1#chrVII 1090940 3785842 80 81
SGDref#1#chrVIII 562643 4890437 80 81
SGDref#1#chrIX 439888 5460130 80 81
SGDref#1#chrX 745751 5905532 80 81
SGDref#1#chrXI 666816 6660621 80 81
SGDref#1#chrXII 1078177 7335790 80 81
SGDref#1#chrXIII 924431 8427463 80 81
SGDref#1#chrXIV 784333 9363467 80 81
SGDref#1#chrXV 1091291 10157621 80 81
SGDref#1#chrXVI 948066 11262571 80 81
SGDref#1#chrMT 85779 12222504 80 81
S288C#1#chrI 219929 12309370 219929 219930
S288C#1#chrII 813597 12529315 813597 813598
S288C#1#chrIII 341580 13342929 341580 341581
S288C#1#chrIV 1566853 13684525 1566853 1566854
S288C#1#chrV 583092 15251393 583092 583093
S288C#1#chrVI 271539 15834501 271539 271540
S288C#1#chrVII 1091538 16106057 1091538 1091539
S288C#1#chrVIII 581049 17197613 581049 581050
S288C#1#chrIX 440036 17778678 440036 440037
S288C#1#chrX 751611 18218729 751611 751612
S288C#1#chrXI 666862 18970356 666862 666863
S288C#1#chrXII 1075542 19637235 1075542 1075543
S288C#1#chrXIII 930506 20712795 930506 930507
S288C#1#chrXIV 777615 21643318 777615 777616
S288C#1#chrXV 1091343 22420949 1091343 1091344
S288C#1#chrXVI 954457 23512309 954457 954458
S288C#1#chrMT 85793 24466782 60 61
DBVPG6765#1#chrI 215496 24554023 215496 215497
DBVPG6765#1#chrII 795659 24769539 795659 795660
DBVPG6765#1#chrIII 327289 25565219 327289 327290
DBVPG6765#1#chrIV 1488087 25892528 1488087 1488088
DBVPG6765#1#chrV 576784 27380634 576784 576785
DBVPG6765#1#chrVI 257436 27957438 257436 257437
DBVPG6765#1#chrVII 1070236 28214895 1070236 1070237
DBVPG6765#1#chrVIII 533397 29285153 533397 533398
DBVPG6765#1#chrIX 419821 29818570 419821 419822
DBVPG6765#1#chrX 730957 30238410 730957 730958
DBVPG6765#1#chrXI 657417 30969387 657417 657418
DBVPG6765#1#chrXII 1022186 31626825 1022186 1022187
DBVPG6765#1#chrXIII 913017 32649033 913017 913018
DBVPG6765#1#chrXIV 765110 33562071 765110 765111
DBVPG6765#1#chrXV 1120088 34327201 1120088 1120089
DBVPG6765#1#chrXVI 920208 35447310 920208 920209
DBVPG6765#1#chrMT 81722 36367538 60 61
Y12#1#chrI 197190 36450635 197190 197191
Y12#1#chrII 800619 36647839 800619 800620
Y12#1#chrIII 322503 37448473 322503 322504
Y12#1#chrIV 1497473 37770990 1497473 1497474
Y12#1#chrV 575802 39268476 575802 575803
Y12#1#chrVI 285938 39844292 285938 285939
Y12#1#chrVII 1107723 40130245 1107723 1107724
Y12#1#chrVIII 547529 41237984 547529 547530
Y12#1#chrIX 431184 41785527 431184 431185
Y12#1#chrX 725652 42216724 725652 725653
Y12#1#chrXI 692235 42942390 692235 692236
Y12#1#chrXII 1043178 43634640 1043178 1043179
Y12#1#chrXIII 903361 44677834 903361 903362
Y12#1#chrXIV 800685 45581210 800685 800686
Y12#1#chrXV 1048295 46381909 1048295 1048296
Y12#1#chrXVI 901780 47430219 901780 901781
Y12#1#chrMT 82868 48332013 60 61
YPS128#1#chrI 237661 48416278 237661 237662
YPS128#1#chrII 808481 48653956 808481 808482
YPS128#1#chrIII 319304 49462455 319304 319305
YPS128#1#chrIV 1495051 49781776 1495051 1495052
YPS128#1#chrV 575962 51276843 575962 575963
YPS128#1#chrVI 289276 51852822 289276 289277
YPS128#1#chrVII 1082279 52142116 1082279 1082280
YPS128#1#chrVIII 543240 53224414 543240 543241
YPS128#1#chrIX 440919 53767671 440919 440920
YPS128#1#chrX 723414 54208606 723414 723415
YPS128#1#chrXI 667446 54932037 667446 667447
YPS128#1#chrXII 1027157 55599501 1027157 1027158
YPS128#1#chrXIII 928527 56626677 928527 928528
YPS128#1#chrXIV 766731 57555222 766731 766732
YPS128#1#chrXV 1072763 58321970 1072763 1072764
YPS128#1#chrXVI 932263 59394751 932263 932264
YPS128#1#chrMT 77479 60327031 60 61
SK1#1#chrI 228861 60405814 228861 228862
SK1#1#chrII 829469 60634689 829469 829470
SK1#1#chrIII 340914 61464173 340914 340915
SK1#1#chrIV 1486921 61805101 1486921 1486922
SK1#1#chrV 589812 63292035 589812 589813
SK1#1#chrVI 299318 63881861 299318 299319
SK1#1#chrVII 1080440 64181194 1080440 1080441
SK1#1#chrVIII 542723 65261650 542723 542724
SK1#1#chrIX 449612 65804387 449612 449613
SK1#1#chrX 753937 66254012 753937 753938
SK1#1#chrXI 690901 67007963 690901 690902
SK1#1#chrXII 1054145 67698879 1054145 1054146
SK1#1#chrXIII 923535 68753040 923535 923536
SK1#1#chrXIV 791982 69676590 791982 791983
SK1#1#chrXV 1053869 70468586 1053869 1053870
SK1#1#chrXVI 946846 71522470 946846 946847
SK1#1#chrMT 84638 72469330 60 61
DBVPG6044#1#chrI 217365 72555397 217365 217366
DBVPG6044#1#chrII 815565 72772782 815565 815566
DBVPG6044#1#chrIII 332771 73588368 332771 332772
DBVPG6044#1#chrIV 1486873 73921159 1486873 1486874
DBVPG6044#1#chrV 572248 75408051 572248 572249
DBVPG6044#1#chrVI 298678 75980319 298678 298679
DBVPG6044#1#chrVII 1078455 76279018 1078455 1078456
DBVPG6044#1#chrVIII 535633 77357495 535633 535634
DBVPG6044#1#chrIX 443501 77893148 443501 443502
DBVPG6044#1#chrX 728645 78336668 728645 728646
DBVPG6044#1#chrXI 695907 79065333 695907 695908
DBVPG6044#1#chrXII 1035507 79761261 1035507 1035508
DBVPG6044#1#chrXIII 930256 80796790 930256 930257
DBVPG6044#1#chrXIV 784343 81727067 784343 784344
DBVPG6044#1#chrXV 1062383 82511430 1062383 1062384
DBVPG6044#1#chrXVI 939911 83573834 939911 939912
DBVPG6044#1#chrMT 81093 84513765 60 61
UWOPS034614#1#chrI 214332 84596230 214332 214333
UWOPS034614#1#chrII 815151 84810584 815151 815152
UWOPS034614#1#chrIII 309137 85625758 309137 309138
UWOPS034614#1#chrIV 1468232 85934917 1468232 1468233
UWOPS034614#1#chrV 555692 87403170 555692 555693
UWOPS034614#1#chrVI 290867 87958884 290867 290868
UWOPS034614#1#chrVII 632616 88249774 632616 632617
UWOPS034614#1#chrVIII 738767 88882414 738767 738768
UWOPS034614#1#chrIX 428968 89621203 428968 428969
UWOPS034614#1#chrX 1092164 90050192 1092164 1092165
UWOPS034614#1#chrXI 792116 91142378 792116 792117
UWOPS034614#1#chrXII 1010127 91934517 1010127 1010128
UWOPS034614#1#chrXIII 662343 92944668 662343 662344
UWOPS034614#1#chrXIV 765143 93607034 765143 765144
UWOPS034614#1#chrXV 1062502 94372199 1062502 1062503
UWOPS034614#1#chrXVI 909189 95434724 909189 909190
UWOPS034614#1#chrMT 74179 96343935 60 61
Binary file added data/scerevisiae8.fa.gz.gzi
Binary file not shown.
36 changes: 36 additions & 0 deletions scripts/test.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
#!/bin/bash

FASTA_FAI=$1
PAF=$2
COVERAGE=$3

cat $FASTA_FAI | awk -v OFS='\t' '{print($1,"0",$2)}' > $PAF.sequences.bed
cat \
<(cat $PAF | awk -v OFS='\t' '{print $1, $3, $4, "", "", $5}') \
<(cat $PAF | awk -v OFS='\t' '{print $6, $8, $9, "", "", "+"}') \
| bedtools sort | bedtools merge > $PAF.query+target.bed

echo "#seq.name" coverage | tr ' ' '\t' > $PAF.coverage.txt
bedtools intersect -a $PAF.sequences.bed -b $PAF.query+target.bed -wo > $PAF.overlap.bed
awk 'BEGIN{FS=OFS="\t"}{
if(NR==FNR){
len[$1]=$3-$2; coverage[$1]=0;
} else {
coverage[$1]+=$NF
}
} END{
for(seq in len){
printf("%s\t%f\n", seq, coverage[seq] / len[seq])
}
}' $PAF.sequences.bed $PAF.overlap.bed >> $PAF.coverage.txt

cat \
<(head -n 1 $PAF.coverage.txt) \
<(sed '1d' $PAF.coverage.txt | sort -k 2,2nr -k 1,1) | column -t

awk -v threshold=$COVERAGE 'NR > 1 && $2 < threshold {
print "Low coverage for sequence " $1 " with coverage " $2;
flag = 1
} END {
if (flag) exit 1
}' $PAF.coverage.txt
52 changes: 0 additions & 52 deletions src/common/rkmh.hpp

This file was deleted.

Loading

0 comments on commit 3ce0cd3

Please sign in to comment.