Skip to content

Commit

Permalink
Limit filtering DP table size
Browse files Browse the repository at this point in the history
  • Loading branch information
bkille committed Aug 21, 2023
1 parent 93d12ad commit 8fb01ab
Show file tree
Hide file tree
Showing 2 changed files with 12 additions and 8 deletions.
19 changes: 11 additions & 8 deletions src/map/include/computeMap.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,7 @@ namespace skch
param(p),
refSketch(refsketch),
processMappingResults(f),
sketchCutoffs(p.sketchSize + 1, 1),
sketchCutoffs(std::min<double>(p.sketchSize, skch::fixed::ss_table_max) + 1, 1),
refIdGroup(refsketch.metadata.size())
{
if (p.stage1_topANI_filter) {
Expand Down Expand Up @@ -179,12 +179,12 @@ namespace skch

float deltaANI = param.ANIDiff;
float min_p = 1 - param.ANIDiffConf;
int ss = param.sketchSize;
int ss = std::min<double>(param.sketchSize, skch::fixed::ss_table_max);

// Cache hg pmf results
std::vector<std::vector<double>> sketchProbs(
param.sketchSize + 1,
std::vector<double>(param.sketchSize + 1.0)
ss + 1,
std::vector<double>(ss + 1.0)
);
for (auto ci = 0; ci <= ss; ci++)
{
Expand All @@ -203,9 +203,9 @@ namespace skch

// yi_cutoff is minimum jaccard numerator required to be within deltaANI of ymax
double yi_cutoff = deltaANI == 0 ? ymax : (std::floor(skch::Stat::md2j(
skch::Stat::j2md(ymax / param.sketchSize, param.kmerSize) + deltaANI,
skch::Stat::j2md(ymax / ss, param.kmerSize) + deltaANI,
param.kmerSize
) * param.sketchSize));
) * ss));

// Pr Y_i < yi_cutoff
//std::cerr << "CMF " << yi_cutoff - 1 << " " << ss << " " << ss-ci << " " << ci << std::endl;
Expand All @@ -225,7 +225,7 @@ namespace skch
};

// Helper vector for binary search
std::vector<int> ss_range(param.sketchSize+1);
std::vector<int> ss_range(ss+1);
std::iota (ss_range.begin(), ss_range.end(), 0);

for (auto cmax = 1; cmax <= ss; cmax++)
Expand Down Expand Up @@ -944,7 +944,10 @@ namespace skch
} else
{
minimumHits = std::max(
sketchCutoffs[std::min(bestIntersectionSize, Q.sketchSize)],
sketchCutoffs[
int(std::min(bestIntersectionSize, Q.sketchSize)
/ std::max<double>(1, param.sketchSize / skch::fixed::ss_table_max))
],
minimumHits);
}
}
Expand Down
1 change: 1 addition & 0 deletions src/map/include/map_parameters.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -92,6 +92,7 @@ namespace fixed
//int max_best_mappings_per_position = 25; // At a particular position, if algorithm finds more than a certain best
//mappings, it doesn't mark them as best anymore

double ss_table_max = 1000.0; // Maximum size of dp table for filtering
double pval_cutoff = 1e-3; // p-value cutoff for determining window size
float confidence_interval = 0.95; // Confidence interval to relax jaccard cutoff for mapping (0-1)
float percentage_identity = 0.85; // Percent identity in the mapping step
Expand Down

0 comments on commit 8fb01ab

Please sign in to comment.