Skip to content

Commit

Permalink
fix: escaped columns in dataskippingstatscolumns
Browse files Browse the repository at this point in the history
  • Loading branch information
ion-elgreco committed Sep 7, 2024
1 parent 40b2b27 commit 3127c81
Show file tree
Hide file tree
Showing 2 changed files with 25 additions and 5 deletions.
22 changes: 22 additions & 0 deletions crates/core/src/writer/stats.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ use std::{collections::HashMap, ops::AddAssign};

use delta_kernel::expressions::Scalar;
use indexmap::IndexMap;
use itertools::Itertools;
use parquet::file::metadata::ParquetMetaData;
use parquet::format::FileMetaData;
use parquet::schema::types::{ColumnDescriptor, SchemaDescriptor};
Expand Down Expand Up @@ -130,8 +131,29 @@ fn stats_from_metadata(
let mut min_values: HashMap<String, ColumnValueStat> = HashMap::new();
let mut max_values: HashMap<String, ColumnValueStat> = HashMap::new();
let mut null_count: HashMap<String, ColumnCountStat> = HashMap::new();
let dialect = sqlparser::dialect::GenericDialect {};

let idx_to_iterate = if let Some(stats_cols) = stats_columns {
let stats_cols = stats_cols
.into_iter()
.map(|v| {
match sqlparser::parser::Parser::new(&dialect)
.try_with_sql(v)
.map_err(|e| DeltaTableError::generic(e.to_string()))?
.parse_multipart_identifier()
{
Ok(parts) => Ok(parts.into_iter().map(|v| v.value).join(".")),
Err(e) => {
return Err(DeltaWriterError::DeltaTable(
DeltaTableError::GenericError {
source: Box::new(e),
},
))
}
}
})
.collect::<Result<Vec<String>, DeltaWriterError>>()?;

schema_descriptor
.columns()
.iter()
Expand Down
8 changes: 3 additions & 5 deletions python/tests/test_writer.py
Original file line number Diff line number Diff line change
Expand Up @@ -1699,8 +1699,7 @@ def _check_stats(dt: DeltaTable):
_check_stats(dt)


@pytest.mark.parametrize("engine", ["pyarrow", "rust"])
def test_write_stats_columns_stats_provided(tmp_path: pathlib.Path, engine):
def test_write_stats_columns_stats_provided(tmp_path: pathlib.Path):
def _check_stats(dt: DeltaTable):
add_actions_table = dt.get_add_actions(flatten=True)
stats = add_actions_table.to_pylist()[0]
Expand All @@ -1726,15 +1725,14 @@ def _check_stats(dt: DeltaTable):
tmp_path,
data,
mode="append",
engine=engine,
configuration={"delta.dataSkippingStatsColumns": "foo,baz"},
configuration={"delta.dataSkippingStatsColumns": "foo,`baz`"},
)

dt = DeltaTable(tmp_path)
_check_stats(dt)

# Check if it properly takes skippingNumIndexCols from the config in the table
write_deltalake(tmp_path, data, mode="overwrite", engine=engine)
write_deltalake(tmp_path, data, mode="overwrite")

dt = DeltaTable(tmp_path)
assert dt.version() == 1
Expand Down

0 comments on commit 3127c81

Please sign in to comment.