From 61af76978e97d94c1c9c7297fc73900d7827b433 Mon Sep 17 00:00:00 2001 From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com> Date: Wed, 25 Sep 2024 16:48:51 -1000 Subject: [PATCH] Add io/timezone APIs to pylibcudf (#16771) Contributes to https://github.com/rapidsai/cudf/issues/15162 Authors: - Matthew Roeschke (https://github.com/mroeschke) - Vyas Ramasubramani (https://github.com/vyasr) - GALI PREM SAGAR (https://github.com/galipremsagar) Approvers: - Vyas Ramasubramani (https://github.com/vyasr) - GALI PREM SAGAR (https://github.com/galipremsagar) URL: https://github.com/rapidsai/cudf/pull/16771 --- .../api_docs/pylibcudf/io/index.rst | 1 + .../api_docs/pylibcudf/io/timezone.rst | 6 +++ python/cudf/cudf/_lib/timezone.pyx | 27 ++---------- python/pylibcudf/pylibcudf/io/CMakeLists.txt | 4 +- python/pylibcudf/pylibcudf/io/__init__.pxd | 2 +- python/pylibcudf/pylibcudf/io/__init__.py | 2 +- python/pylibcudf/pylibcudf/io/timezone.pxd | 6 +++ python/pylibcudf/pylibcudf/io/timezone.pyx | 43 +++++++++++++++++++ .../pylibcudf/tests/io/test_timezone.py | 16 +++++++ 9 files changed, 81 insertions(+), 26 deletions(-) create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/io/timezone.rst create mode 100644 python/pylibcudf/pylibcudf/io/timezone.pxd create mode 100644 python/pylibcudf/pylibcudf/io/timezone.pyx create mode 100644 python/pylibcudf/pylibcudf/tests/io/test_timezone.py diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst index c8933981736..53638f071cc 100644 --- a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/index.rst @@ -19,3 +19,4 @@ I/O Functions csv json parquet + timezone diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/io/timezone.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/timezone.rst new file mode 100644 index 00000000000..20c1ffc2e93 --- /dev/null +++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/io/timezone.rst @@ -0,0 +1,6 @@ +======== +Timezone +======== + +.. automodule:: pylibcudf.io.timezone + :members: diff --git a/python/cudf/cudf/_lib/timezone.pyx b/python/cudf/cudf/_lib/timezone.pyx index bff3b2c4ce4..54624a5a2fd 100644 --- a/python/cudf/cudf/_lib/timezone.pyx +++ b/python/cudf/cudf/_lib/timezone.pyx @@ -1,29 +1,10 @@ # Copyright (c) 2023-2024, NVIDIA CORPORATION. -from libcpp.memory cimport unique_ptr -from libcpp.optional cimport make_optional -from libcpp.string cimport string -from libcpp.utility cimport move +import pylibcudf as plc -from pylibcudf.libcudf.io.timezone cimport ( - make_timezone_transition_table as cpp_make_timezone_transition_table, -) -from pylibcudf.libcudf.table.table cimport table - -from cudf._lib.utils cimport columns_from_unique_ptr +from cudf._lib.column cimport Column def make_timezone_transition_table(tzdir, tzname): - cdef unique_ptr[table] c_result - cdef string c_tzdir = tzdir.encode() - cdef string c_tzname = tzname.encode() - - with nogil: - c_result = move( - cpp_make_timezone_transition_table( - make_optional[string](c_tzdir), - c_tzname - ) - ) - - return columns_from_unique_ptr(move(c_result)) + plc_table = plc.io.timezone.make_timezone_transition_table(tzdir, tzname) + return [Column.from_pylibcudf(col) for col in plc_table.columns()] diff --git a/python/pylibcudf/pylibcudf/io/CMakeLists.txt b/python/pylibcudf/pylibcudf/io/CMakeLists.txt index 529a71a48ce..965724a47b1 100644 --- a/python/pylibcudf/pylibcudf/io/CMakeLists.txt +++ b/python/pylibcudf/pylibcudf/io/CMakeLists.txt @@ -12,7 +12,9 @@ # the License. # ============================================================================= -set(cython_sources avro.pyx csv.pyx datasource.pyx json.pyx orc.pyx parquet.pyx types.pyx) +set(cython_sources avro.pyx csv.pyx datasource.pyx json.pyx orc.pyx parquet.pyx timezone.pyx + types.pyx +) set(linked_libraries cudf::cudf) rapids_cython_create_modules( diff --git a/python/pylibcudf/pylibcudf/io/__init__.pxd b/python/pylibcudf/pylibcudf/io/__init__.pxd index 5927a19dc69..1bcc0a3f963 100644 --- a/python/pylibcudf/pylibcudf/io/__init__.pxd +++ b/python/pylibcudf/pylibcudf/io/__init__.pxd @@ -1,5 +1,5 @@ # Copyright (c) 2024, NVIDIA CORPORATION. # CSV is removed since it is def not cpdef (to force kw-only arguments) -from . cimport avro, datasource, json, orc, parquet, types +from . cimport avro, datasource, json, orc, parquet, timezone, types from .types cimport SourceInfo, TableWithMetadata diff --git a/python/pylibcudf/pylibcudf/io/__init__.py b/python/pylibcudf/pylibcudf/io/__init__.py index 5d899ee0808..2e4f215b12c 100644 --- a/python/pylibcudf/pylibcudf/io/__init__.py +++ b/python/pylibcudf/pylibcudf/io/__init__.py @@ -1,4 +1,4 @@ # Copyright (c) 2024, NVIDIA CORPORATION. -from . import avro, csv, datasource, json, orc, parquet, types +from . import avro, csv, datasource, json, orc, parquet, timezone, types from .types import SinkInfo, SourceInfo, TableWithMetadata diff --git a/python/pylibcudf/pylibcudf/io/timezone.pxd b/python/pylibcudf/pylibcudf/io/timezone.pxd new file mode 100644 index 00000000000..2aa755dbbd8 --- /dev/null +++ b/python/pylibcudf/pylibcudf/io/timezone.pxd @@ -0,0 +1,6 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from ..table cimport Table + + +cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name) diff --git a/python/pylibcudf/pylibcudf/io/timezone.pyx b/python/pylibcudf/pylibcudf/io/timezone.pyx new file mode 100644 index 00000000000..e02239d7252 --- /dev/null +++ b/python/pylibcudf/pylibcudf/io/timezone.pyx @@ -0,0 +1,43 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. + +from libcpp.memory cimport unique_ptr +from libcpp.optional cimport make_optional +from libcpp.string cimport string +from libcpp.utility cimport move +from pylibcudf.libcudf.io.timezone cimport ( + make_timezone_transition_table as cpp_make_timezone_transition_table, +) +from pylibcudf.libcudf.table.table cimport table + +from ..table cimport Table + + +cpdef Table make_timezone_transition_table(str tzif_dir, str timezone_name): + """ + Creates a transition table to convert ORC timestamps to UTC. + + Parameters + ---------- + tzif_dir : str + The directory where the TZif files are located + timezone_name : str + standard timezone name + + Returns + ------- + Table + The transition table for the given timezone. + """ + cdef unique_ptr[table] c_result + cdef string c_tzdir = tzif_dir.encode() + cdef string c_tzname = timezone_name.encode() + + with nogil: + c_result = move( + cpp_make_timezone_transition_table( + make_optional[string](c_tzdir), + c_tzname + ) + ) + + return Table.from_libcudf(move(c_result)) diff --git a/python/pylibcudf/pylibcudf/tests/io/test_timezone.py b/python/pylibcudf/pylibcudf/tests/io/test_timezone.py new file mode 100644 index 00000000000..76b0424b2af --- /dev/null +++ b/python/pylibcudf/pylibcudf/tests/io/test_timezone.py @@ -0,0 +1,16 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. +import zoneinfo + +import pylibcudf as plc +import pytest + + +def test_make_timezone_transition_table(): + if len(zoneinfo.TZPATH) == 0: + pytest.skip("No TZPATH available.") + tz_path = zoneinfo.TZPATH[0] + result = plc.io.timezone.make_timezone_transition_table( + tz_path, "America/Los_Angeles" + ) + assert isinstance(result, plc.Table) + assert result.num_rows() > 0