Skip to content

Commit

Permalink
fix (issue-1079): allow update_column to set doc as '' (#1083)
Browse files Browse the repository at this point in the history
* fix (issue-1079): allow update_column to set doc as ''

* fix (issue-1079): add test for required and fix version

* Apply suggestions from code review

Co-authored-by: Sung Yun <107272191+sungwy@users.noreply.github.com>

* Apply suggestions from code review

---------

Co-authored-by: Tiansu Yu <tiansu.yu@icloud>
Co-authored-by: Sung Yun <107272191+sungwy@users.noreply.github.com>
  • Loading branch information
3 people committed Aug 26, 2024
1 parent 87d52d3 commit 0b487f2
Show file tree
Hide file tree
Showing 2 changed files with 39 additions and 3 deletions.
7 changes: 4 additions & 3 deletions pyiceberg/table/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2492,20 +2492,21 @@ def update_column(
except ResolveError as e:
raise ValidationError(f"Cannot change column type: {full_name}: {field.field_type} -> {field_type}") from e

# if other updates for the same field exist in one transaction:
if updated := self._updates.get(field.field_id):
self._updates[field.field_id] = NestedField(
field_id=updated.field_id,
name=updated.name,
field_type=field_type or updated.field_type,
doc=doc or updated.doc,
doc=doc if doc is not None else updated.doc,
required=updated.required,
)
else:
self._updates[field.field_id] = NestedField(
field_id=field.field_id,
name=field.name,
field_type=field_type or field.field_type,
doc=doc or field.doc,
doc=doc if doc is not None else field.doc,
required=field.required,
)

Expand Down Expand Up @@ -2878,7 +2879,7 @@ def _update_column(self, field: NestedField, existing_field: NestedField) -> Non
if field.field_type.is_primitive and field.field_type != existing_field.field_type:
self.update_schema.update_column(full_name, field_type=field.field_type)

if field.doc is not None and not field.doc != existing_field.doc:
if field.doc is not None and field.doc != existing_field.doc:
self.update_schema.update_column(full_name, doc=field.doc)

def _find_field_type(self, field_id: int) -> IcebergType:
Expand Down
35 changes: 35 additions & 0 deletions tests/table/test_init.py
Original file line number Diff line number Diff line change
Expand Up @@ -512,6 +512,41 @@ def test_add_column(table_v2: Table) -> None:
assert apply_schema.highest_field_id == 4


def test_update_column(table_v1: Table, table_v2: Table) -> None:
"""
Table should be able to update existing property `doc`
Table should also be able to update property `required`, if the field is not an identifier field.
"""
COMMENT2 = "comment2"
for table in [table_v1, table_v2]:
original_schema = table.schema()
# update existing doc to a new doc
assert original_schema.find_field("y").doc == "comment"
new_schema = table.transaction().update_schema().update_column("y", doc=COMMENT2)._apply()
assert new_schema.find_field("y").doc == COMMENT2, "failed to update existing field doc"

# update existing doc to an emtpy string
assert new_schema.find_field("y").doc == COMMENT2
new_schema2 = table.transaction().update_schema().update_column("y", doc="")._apply()
assert new_schema2.find_field("y").doc == "", "failed to remove existing field doc"

# update required to False
assert original_schema.find_field("z").required is True
new_schema3 = table.transaction().update_schema().update_column("z", required=False)._apply()
assert new_schema3.find_field("z").required is False, "failed to update existing field required"

# assert the above two updates also works with union_by_name
assert (
table.update_schema().union_by_name(new_schema)._apply() == new_schema
), "failed to update existing field doc with union_by_name"
assert (
table.update_schema().union_by_name(new_schema2)._apply() == new_schema2
), "failed to remove existing field doc with union_by_name"
assert (
table.update_schema().union_by_name(new_schema3)._apply() == new_schema3
), "failed to update existing field required with union_by_name"


def test_add_primitive_type_column(table_v2: Table) -> None:
primitive_type: Dict[str, PrimitiveType] = {
"boolean": BooleanType(),
Expand Down

0 comments on commit 0b487f2

Please sign in to comment.