Skip to content

Commit

Permalink
Handle legacy stats arrays (#761)
Browse files Browse the repository at this point in the history
  • Loading branch information
gspowley committed Aug 20, 2024
1 parent 6282fc3 commit 867f130
Show file tree
Hide file tree
Showing 3 changed files with 51 additions and 7 deletions.
28 changes: 28 additions & 0 deletions libtiledbvcf/src/stats/sample_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
namespace tiledb::vcf {

SampleStats::~SampleStats() {
if (!enabled_) {
return;
}

// Flush any remaining stats
flush(true);

Expand Down Expand Up @@ -174,6 +178,30 @@ void SampleStats::init(
return;
}

// Check array version
{
auto array = Array(*ctx, uri, TILEDB_READ);

auto get_version = [&]() -> std::optional<int> {
tiledb_datatype_t value_type;
uint32_t value_num;
const void* value;
array.get_metadata("version", &value_type, &value_num, &value);
if (value_type == TILEDB_INT32 && value_num == 1) {
return *static_cast<const int*>(value);
}
return std::nullopt;
};

auto version = get_version();
if (version == std::nullopt) {
LOG_WARN(
"[SampleStats] Sample stats are deprecated for this array version.");
enabled_ = false;
return;
}
}

LOG_DEBUG("[SampleStats] Open array '{}'", uri);

// Open array
Expand Down
15 changes: 13 additions & 2 deletions libtiledbvcf/src/stats/variant_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ namespace tiledb::vcf {
int32_t VariantStats::max_length_ = 0;

uint32_t VariantStats::array_version_ = VariantStats::VARIANT_STATS_MIN_VERSION;

bool VariantStats::an_present_ = false;

//===================================================================
//= public static functions
//===================================================================
Expand Down Expand Up @@ -213,6 +216,10 @@ void VariantStats::init(std::shared_ptr<Context> ctx, const Group& group) {
throw std::runtime_error(
"encountered variant stats array version out of range while writing");

// Check for presence of "an" attribute
auto schema = fetch_version.schema();
an_present_ = schema.has_attribute("an");

// Open array
array_ = std::make_unique<Array>(*ctx, uri, TILEDB_WRITE);
enabled_ = true;
Expand Down Expand Up @@ -421,7 +428,9 @@ void VariantStats::flush(bool finalize) {
}

query_->set_data_buffer("ac", ac_buffer_);
query_->set_data_buffer("an", an_buffer_);
if (an_present_) {
query_->set_data_buffer("an", an_buffer_);
}
query_->set_data_buffer("n_hom", n_hom_buffer_);
if (array_version_ >= 3) {
query_->set_data_buffer("max_length", max_length_buffer_);
Expand Down Expand Up @@ -469,7 +478,9 @@ void VariantStats::flush(bool finalize) {
}

query_->set_data_buffer("ac", ac_buffer_);
query_->set_data_buffer("an", an_buffer_);
if (an_present_) {
query_->set_data_buffer("an", an_buffer_);
}
query_->set_data_buffer("n_hom", n_hom_buffer_);
if (array_version_ >= 3) {
query_->set_data_buffer("max_length", max_length_buffer_);
Expand Down
15 changes: 10 additions & 5 deletions libtiledbvcf/src/stats/variant_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,15 +241,20 @@ class VariantStats {
// Sample names included in the fragment
inline static std::set<std::string> fragment_sample_names_;

//===================================================================
//= private non-static
//===================================================================

// maximum allele length ecountered
// maximum allele length encountered
static int32_t max_length_;

// Array version
static uint32_t array_version_;

// Flag to indicate if AN is present in the schema, needed to handle
// the case where AN could be missing in a version 2 schema
static bool an_present_;

//===================================================================
//= private non-static
//===================================================================

// Count delta is +1 in ingest mode, -1 in delete mode
int count_delta_ = 1;

Expand Down

0 comments on commit 867f130

Please sign in to comment.