From 0b487f27ca4bb5427f096a1dec738659e0bdd0b7 Mon Sep 17 00:00:00 2001 From: Tiansu Date: Mon, 26 Aug 2024 16:38:29 +0200 Subject: [PATCH] fix (issue-1079): allow update_column to set doc as '' (#1083) * fix (issue-1079): allow update_column to set doc as '' * fix (issue-1079): add test for required and fix version * Apply suggestions from code review Co-authored-by: Sung Yun <107272191+sungwy@users.noreply.github.com> * Apply suggestions from code review --------- Co-authored-by: Tiansu Yu Co-authored-by: Sung Yun <107272191+sungwy@users.noreply.github.com> --- pyiceberg/table/__init__.py | 7 ++++--- tests/table/test_init.py | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+), 3 deletions(-) diff --git a/pyiceberg/table/__init__.py b/pyiceberg/table/__init__.py index 479448318..8ec44c6b8 100644 --- a/pyiceberg/table/__init__.py +++ b/pyiceberg/table/__init__.py @@ -2492,12 +2492,13 @@ def update_column( except ResolveError as e: raise ValidationError(f"Cannot change column type: {full_name}: {field.field_type} -> {field_type}") from e + # if other updates for the same field exist in one transaction: if updated := self._updates.get(field.field_id): self._updates[field.field_id] = NestedField( field_id=updated.field_id, name=updated.name, field_type=field_type or updated.field_type, - doc=doc or updated.doc, + doc=doc if doc is not None else updated.doc, required=updated.required, ) else: @@ -2505,7 +2506,7 @@ def update_column( field_id=field.field_id, name=field.name, field_type=field_type or field.field_type, - doc=doc or field.doc, + doc=doc if doc is not None else field.doc, required=field.required, ) @@ -2878,7 +2879,7 @@ def _update_column(self, field: NestedField, existing_field: NestedField) -> Non if field.field_type.is_primitive and field.field_type != existing_field.field_type: self.update_schema.update_column(full_name, field_type=field.field_type) - if field.doc is not None and not field.doc != existing_field.doc: + if field.doc is not None and field.doc != existing_field.doc: self.update_schema.update_column(full_name, doc=field.doc) def _find_field_type(self, field_id: int) -> IcebergType: diff --git a/tests/table/test_init.py b/tests/table/test_init.py index e5458677f..484abc24c 100644 --- a/tests/table/test_init.py +++ b/tests/table/test_init.py @@ -512,6 +512,41 @@ def test_add_column(table_v2: Table) -> None: assert apply_schema.highest_field_id == 4 +def test_update_column(table_v1: Table, table_v2: Table) -> None: + """ + Table should be able to update existing property `doc` + Table should also be able to update property `required`, if the field is not an identifier field. + """ + COMMENT2 = "comment2" + for table in [table_v1, table_v2]: + original_schema = table.schema() + # update existing doc to a new doc + assert original_schema.find_field("y").doc == "comment" + new_schema = table.transaction().update_schema().update_column("y", doc=COMMENT2)._apply() + assert new_schema.find_field("y").doc == COMMENT2, "failed to update existing field doc" + + # update existing doc to an emtpy string + assert new_schema.find_field("y").doc == COMMENT2 + new_schema2 = table.transaction().update_schema().update_column("y", doc="")._apply() + assert new_schema2.find_field("y").doc == "", "failed to remove existing field doc" + + # update required to False + assert original_schema.find_field("z").required is True + new_schema3 = table.transaction().update_schema().update_column("z", required=False)._apply() + assert new_schema3.find_field("z").required is False, "failed to update existing field required" + + # assert the above two updates also works with union_by_name + assert ( + table.update_schema().union_by_name(new_schema)._apply() == new_schema + ), "failed to update existing field doc with union_by_name" + assert ( + table.update_schema().union_by_name(new_schema2)._apply() == new_schema2 + ), "failed to remove existing field doc with union_by_name" + assert ( + table.update_schema().union_by_name(new_schema3)._apply() == new_schema3 + ), "failed to update existing field required with union_by_name" + + def test_add_primitive_type_column(table_v2: Table) -> None: primitive_type: Dict[str, PrimitiveType] = { "boolean": BooleanType(),