diff --git a/libtiledbvcf/src/stats/sample_stats.cc b/libtiledbvcf/src/stats/sample_stats.cc index a20b0a1e4..d3ee27764 100644 --- a/libtiledbvcf/src/stats/sample_stats.cc +++ b/libtiledbvcf/src/stats/sample_stats.cc @@ -34,6 +34,10 @@ namespace tiledb::vcf { SampleStats::~SampleStats() { + if (!enabled_) { + return; + } + // Flush any remaining stats flush(true); @@ -174,6 +178,30 @@ void SampleStats::init( return; } + // Check array version + { + auto array = Array(*ctx, uri, TILEDB_READ); + + auto get_version = [&]() -> std::optional { + tiledb_datatype_t value_type; + uint32_t value_num; + const void* value; + array.get_metadata("version", &value_type, &value_num, &value); + if (value_type == TILEDB_INT32 && value_num == 1) { + return *static_cast(value); + } + return std::nullopt; + }; + + auto version = get_version(); + if (version == std::nullopt) { + LOG_WARN( + "[SampleStats] Sample stats are deprecated for this array version."); + enabled_ = false; + return; + } + } + LOG_DEBUG("[SampleStats] Open array '{}'", uri); // Open array diff --git a/libtiledbvcf/src/stats/variant_stats.cc b/libtiledbvcf/src/stats/variant_stats.cc index c3b67cfd5..7ae140ec2 100644 --- a/libtiledbvcf/src/stats/variant_stats.cc +++ b/libtiledbvcf/src/stats/variant_stats.cc @@ -38,6 +38,9 @@ namespace tiledb::vcf { int32_t VariantStats::max_length_ = 0; uint32_t VariantStats::array_version_ = VariantStats::VARIANT_STATS_MIN_VERSION; + +bool VariantStats::an_present_ = false; + //=================================================================== //= public static functions //=================================================================== @@ -213,6 +216,10 @@ void VariantStats::init(std::shared_ptr ctx, const Group& group) { throw std::runtime_error( "encountered variant stats array version out of range while writing"); + // Check for presence of "an" attribute + auto schema = fetch_version.schema(); + an_present_ = schema.has_attribute("an"); + // Open array array_ = std::make_unique(*ctx, uri, TILEDB_WRITE); enabled_ = true; @@ -421,7 +428,9 @@ void VariantStats::flush(bool finalize) { } query_->set_data_buffer("ac", ac_buffer_); - query_->set_data_buffer("an", an_buffer_); + if (an_present_) { + query_->set_data_buffer("an", an_buffer_); + } query_->set_data_buffer("n_hom", n_hom_buffer_); if (array_version_ >= 3) { query_->set_data_buffer("max_length", max_length_buffer_); @@ -469,7 +478,9 @@ void VariantStats::flush(bool finalize) { } query_->set_data_buffer("ac", ac_buffer_); - query_->set_data_buffer("an", an_buffer_); + if (an_present_) { + query_->set_data_buffer("an", an_buffer_); + } query_->set_data_buffer("n_hom", n_hom_buffer_); if (array_version_ >= 3) { query_->set_data_buffer("max_length", max_length_buffer_); diff --git a/libtiledbvcf/src/stats/variant_stats.h b/libtiledbvcf/src/stats/variant_stats.h index f113aff66..a4c60b014 100644 --- a/libtiledbvcf/src/stats/variant_stats.h +++ b/libtiledbvcf/src/stats/variant_stats.h @@ -241,15 +241,20 @@ class VariantStats { // Sample names included in the fragment inline static std::set fragment_sample_names_; - //=================================================================== - //= private non-static - //=================================================================== - - // maximum allele length ecountered + // maximum allele length encountered static int32_t max_length_; + // Array version static uint32_t array_version_; + // Flag to indicate if AN is present in the schema, needed to handle + // the case where AN could be missing in a version 2 schema + static bool an_present_; + + //=================================================================== + //= private non-static + //=================================================================== + // Count delta is +1 in ingest mode, -1 in delete mode int count_delta_ = 1;