Skip to content

Commit

Permalink
Prevent constructing or setting attribute with invalid cell_val_num. (#…
Browse files Browse the repository at this point in the history
…4952)

While working on the Rust API I observed that the following code would
error when `datatype == Datatype::Any`:
```
            AttributeBuilder::new(context, field.name(), datatype)?
                .nullability(field.is_nullable())?
                .cell_val_num(cell_val_num)?
```
the error arises in the call to core function `set_cell_val_num`, which
errors out if the datatype is `ANY`, even if you set to
`constants::var_num` which is required for `ANY`.

From there I noticed that it is also possible to construct an Attribute
with an invalid cell val num, e.g. construct an `ANY` attribute with a
non-var cell val num.

This pull request adds the `check_cell_val_num` function and moves all
associated error-checking logic into it, then calls this function from
all of the `Attribute` constructors as well as `set_cell_val_num`.

---
TYPE:  BUG
DESC: Prevent constructing attribute with invalid cell_val_num.
  • Loading branch information
rroelke committed Jun 6, 2024
1 parent dfe84f6 commit 2593e54
Show file tree
Hide file tree
Showing 4 changed files with 417 additions and 32 deletions.
62 changes: 35 additions & 27 deletions tiledb/sm/array_schema/attribute.cc
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
*
* The MIT License
*
* @copyright Copyright (c) 2017-2023 TileDB, Inc.
* @copyright Copyright (c) 2017-2024 TileDB, Inc.
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -57,6 +57,8 @@ class AttributeStatusException : public StatusException {
}
};

using AttributeException = AttributeStatusException;

/* ********************************* */
/* CONSTRUCTORS & DESTRUCTORS */
/* ********************************* */
Expand All @@ -81,25 +83,11 @@ Attribute::Attribute(
, name_(name)
, type_(type)
, order_(order) {
set_default_fill_value();

// If ordered, check the number of values of cells is supported.
if (order_ != DataOrder::UNORDERED_DATA) {
ensure_ordered_attribute_datatype_is_valid(type_);
if (type == Datatype::STRING_ASCII) {
if (cell_val_num_ != constants::var_num) {
throw std::invalid_argument(
"Ordered attributes with datatype '" + datatype_str(type_) +
"' must have cell_val_num=1.");
}
} else {
if (cell_val_num_ != 1) {
throw std::invalid_argument(
"Ordered attributes with datatype '" + datatype_str(type_) +
"' must have cell_val_num=1.");
}
}
}
validate_cell_val_num(cell_val_num_);
set_default_fill_value();
}

Attribute::Attribute(
Expand All @@ -121,6 +109,7 @@ Attribute::Attribute(
, fill_value_validity_(fill_value_validity)
, order_(order)
, enumeration_name_(enumeration_name) {
validate_cell_val_num(cell_val_num_);
}

/* ********************************* */
Expand Down Expand Up @@ -303,22 +292,41 @@ void Attribute::serialize(
}

void Attribute::set_cell_val_num(unsigned int cell_val_num) {
if (type_ == Datatype::ANY) {
throw AttributeStatusException(
validate_cell_val_num(cell_val_num);
cell_val_num_ = cell_val_num;
set_default_fill_value();
}

void Attribute::validate_cell_val_num(unsigned int cell_val_num) const {
if (type_ == Datatype::ANY && cell_val_num != constants::var_num) {
throw AttributeException(
"Cannot set number of values per cell; Attribute datatype `ANY` is "
"always variable-sized");
}

if (order_ != DataOrder::UNORDERED_DATA && type_ != Datatype::STRING_ASCII &&
cell_val_num != 1) {
throw AttributeStatusException(
"Cannot set number of values per cell; An ordered attribute with "
"datatype '" +
datatype_str(type_) + "' can only have cell_val_num=1.");
// If ordered, check the number of values of cells is supported.
if (order_ != DataOrder::UNORDERED_DATA) {
if (type_ == Datatype::STRING_ASCII) {
if (cell_val_num != constants::var_num) {
throw AttributeException(
"Cannot set number of values per cell; Ordered attributes with "
"datatype '" +
datatype_str(type_) +
"' must have `cell_val_num=constants::var_num`.");
}
} else {
if (cell_val_num != 1) {
throw AttributeException(
"Ordered attributes with datatype '" + datatype_str(type_) +
"' must have `cell_val_num=1`.");
}
}
}

cell_val_num_ = cell_val_num;
set_default_fill_value();
// check zero last so we get the more informative error first
if (cell_val_num == 0) {
throw AttributeException("Cannot set zero values per cell");
}
}

void Attribute::set_nullable(const bool nullable) {
Expand Down
28 changes: 24 additions & 4 deletions tiledb/sm/array_schema/attribute.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,7 +56,17 @@ class Enumeration;
enum class Compressor : uint8_t;
enum class Datatype : uint8_t;

/** Manipulates a TileDB attribute. */
/**
* Manipulates a TileDB attribute.
*
* @invariant
*
* A valid `cell_val_num` depends on the Attribute datatype and ordering.
* For `Datatype::ANY`, the only valid value is `constants::var_num`.
* If the attribute is unordered, then all other datatypes support any value.
* If the attribute is ordered, then an Attribute of `Datatype::STRING_ASCII`
* must have `constants::var_num`, and all other datatypes must have 1.
*/
class Attribute {
public:
/* ********************************* */
Expand Down Expand Up @@ -84,6 +94,8 @@ class Attribute {
* @param type The type of the attribute.
* @param cell_val_num The cell number of the attribute.
* @param order The ordering of the attribute.
*
* @throws if `cell_val_num` is invalid. See `set_cell_val_num`.
*/
Attribute(
const std::string& name,
Expand All @@ -102,6 +114,8 @@ class Attribute {
* @param fill_value The fill value of the attribute.
* @param fill_value_validity The validity of fill_value.
* @param order The order of the data stored in the attribute.
*
* @throws if `cell_val_num` is invalid. See `set_cell_val_num`.
*/
Attribute(
const std::string& name,
Expand Down Expand Up @@ -237,9 +251,13 @@ class Attribute {
void serialize(Serializer& serializer, uint32_t version) const;

/**
* Sets the attribute number of values per cell. Note that if the attribute
* datatype is `ANY` this function returns an error, since `ANY` datatype
* must always be variable-sized.
* Sets the attribute number of values per cell.
*
* @throws AttributeException if `cell_val_num` is invalid. See class
* documentation.
*
* @post `this->cell_val_num() == cell_val_num` if `cell_val_num` is
* valid, and `this->cell_val_num()` is unchanged otherwise.
*/
void set_cell_val_num(unsigned int cell_val_num);

Expand Down Expand Up @@ -306,6 +324,8 @@ class Attribute {
/* ********************************* */
/* PRIVATE METHODS */
/* ********************************* */
/** Called to validate a cell val num for this attribute */
void validate_cell_val_num(unsigned cell_val_num) const;

/** Sets the default fill value. */
void set_default_fill_value();
Expand Down
3 changes: 2 additions & 1 deletion tiledb/sm/array_schema/test/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
#
# The MIT License
#
# Copyright (c) 2021-2022 TileDB, Inc.
# Copyright (c) 2021-2024 TileDB, Inc.
#
# Permission is hereby granted, free of charge, to any person obtaining a copy
# of this software and associated documentation files (the "Software"), to deal
Expand Down Expand Up @@ -33,6 +33,7 @@ commence(unit_test array_schema)
this_target_sources(
main.cc
unit_array_schema.cc
unit_attribute.cc
unit_dimension.cc
unit_dimension_label.cc
unit_domain_data.cc
Expand Down
Loading

0 comments on commit 2593e54

Please sign in to comment.