Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle legacy stats arrays #761

Merged
merged 1 commit into from
Aug 20, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
28 changes: 28 additions & 0 deletions libtiledbvcf/src/stats/sample_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,10 @@
namespace tiledb::vcf {

SampleStats::~SampleStats() {
if (!enabled_) {
return;
}

// Flush any remaining stats
flush(true);

Expand Down Expand Up @@ -174,6 +178,30 @@ void SampleStats::init(
return;
}

// Check array version
{
auto array = Array(*ctx, uri, TILEDB_READ);

auto get_version = [&]() -> std::optional<int> {
tiledb_datatype_t value_type;
uint32_t value_num;
const void* value;
array.get_metadata("version", &value_type, &value_num, &value);
if (value_type == TILEDB_INT32 && value_num == 1) {
return *static_cast<const int*>(value);
}
return std::nullopt;
};

auto version = get_version();
if (version == std::nullopt) {
LOG_WARN(
"[SampleStats] Sample stats are deprecated for this array version.");
enabled_ = false;
return;
}
}

LOG_DEBUG("[SampleStats] Open array '{}'", uri);

// Open array
Expand Down
15 changes: 13 additions & 2 deletions libtiledbvcf/src/stats/variant_stats.cc
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,9 @@ namespace tiledb::vcf {
int32_t VariantStats::max_length_ = 0;

uint32_t VariantStats::array_version_ = VariantStats::VARIANT_STATS_MIN_VERSION;

bool VariantStats::an_present_ = false;

//===================================================================
//= public static functions
//===================================================================
Expand Down Expand Up @@ -213,6 +216,10 @@ void VariantStats::init(std::shared_ptr<Context> ctx, const Group& group) {
throw std::runtime_error(
"encountered variant stats array version out of range while writing");

// Check for presence of "an" attribute
auto schema = fetch_version.schema();
an_present_ = schema.has_attribute("an");
awenocur marked this conversation as resolved.
Show resolved Hide resolved

// Open array
array_ = std::make_unique<Array>(*ctx, uri, TILEDB_WRITE);
enabled_ = true;
Expand Down Expand Up @@ -421,7 +428,9 @@ void VariantStats::flush(bool finalize) {
}

query_->set_data_buffer("ac", ac_buffer_);
query_->set_data_buffer("an", an_buffer_);
if (an_present_) {
query_->set_data_buffer("an", an_buffer_);
}
query_->set_data_buffer("n_hom", n_hom_buffer_);
if (array_version_ >= 3) {
query_->set_data_buffer("max_length", max_length_buffer_);
Expand Down Expand Up @@ -469,7 +478,9 @@ void VariantStats::flush(bool finalize) {
}

query_->set_data_buffer("ac", ac_buffer_);
query_->set_data_buffer("an", an_buffer_);
if (an_present_) {
query_->set_data_buffer("an", an_buffer_);
}
query_->set_data_buffer("n_hom", n_hom_buffer_);
if (array_version_ >= 3) {
query_->set_data_buffer("max_length", max_length_buffer_);
Expand Down
15 changes: 10 additions & 5 deletions libtiledbvcf/src/stats/variant_stats.h
Original file line number Diff line number Diff line change
Expand Up @@ -241,15 +241,20 @@ class VariantStats {
// Sample names included in the fragment
inline static std::set<std::string> fragment_sample_names_;

//===================================================================
//= private non-static
//===================================================================

// maximum allele length ecountered
// maximum allele length encountered
static int32_t max_length_;

// Array version
static uint32_t array_version_;

// Flag to indicate if AN is present in the schema, needed to handle
// the case where AN could be missing in a version 2 schema
static bool an_present_;

//===================================================================
//= private non-static
//===================================================================

// Count delta is +1 in ingest mode, -1 in delete mode
int count_delta_ = 1;

Expand Down
Loading