From 31b33b90430a4f2496fcf1a42778bcd8e070c87c Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Thu, 13 Jun 2024 08:58:02 +0100
Subject: [PATCH 01/25] Add tests of implemented StringFunctions (#16007)

Additionally, assert that we raise during translation for an unhandled function.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - https://github.com/brandon-b-miller

URL: https://github.com/rapidsai/cudf/pull/16007
---
 python/cudf_polars/cudf_polars/dsl/expr.py    |  4 +-
 .../tests/expressions/test_stringfunction.py  | 41 +++++++++++++++++++
 2 files changed, 44 insertions(+), 1 deletion(-)
 create mode 100644 python/cudf_polars/tests/expressions/test_stringfunction.py

diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index 377a905aed6..298ef5ab070 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -691,7 +691,9 @@ def do_evaluate(
                 )
             )
         else:
-            raise NotImplementedError(f"StringFunction {self.name}")
+            raise NotImplementedError(
+                f"StringFunction {self.name}"
+            )  # pragma: no cover; handled by init raising
 
 
 class Sort(Expr):
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
new file mode 100644
index 00000000000..198f35d376b
--- /dev/null
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -0,0 +1,41 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import pytest
+
+import polars as pl
+
+from cudf_polars import translate_ir
+from cudf_polars.testing.asserts import assert_gpu_result_equal
+
+
+def test_supported_stringfunction_expression():
+    ldf = pl.LazyFrame(
+        {
+            "a": ["a", "b", "cdefg", "h", "Wıth ünιcοde"],  # noqa: RUF001
+            "b": [0, 3, 1, -1, None],
+        }
+    )
+
+    query = ldf.select(
+        pl.col("a").str.starts_with("Z"),
+        pl.col("a").str.ends_with("h").alias("endswith_h"),
+        pl.col("a").str.to_lowercase().alias("lower"),
+        pl.col("a").str.to_uppercase().alias("upper"),
+    )
+    assert_gpu_result_equal(query)
+
+
+def test_unsupported_stringfunction():
+    ldf = pl.LazyFrame(
+        {
+            "a": ["a", "b", "cdefg", "h", "Wıth ünιcοde"],  # noqa: RUF001
+            "b": [0, 3, 1, -1, None],
+        }
+    )
+
+    q = ldf.select(pl.col("a").str.count_matches("e", literal=True))
+
+    with pytest.raises(NotImplementedError):
+        _ = translate_ir(q._ldf.visit())

From 8bbc5121b2dec93d24337d399ff6616bbb971a06 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Thu, 13 Jun 2024 08:58:27 +0100
Subject: [PATCH 02/25] Add coverage selecting len from a dataframe (number of
 rows) (#16005)

Fix bug (and report a polars issue) for the case that the dataframe is empty, and therefore we cannot ask a column for its length.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16005
---
 .../cudf_polars/containers/dataframe.py       |  2 +-
 .../cudf_polars/tests/expressions/test_len.py | 26 +++++++++++++++++++
 2 files changed, 27 insertions(+), 1 deletion(-)
 create mode 100644 python/cudf_polars/tests/expressions/test_len.py

diff --git a/python/cudf_polars/cudf_polars/containers/dataframe.py b/python/cudf_polars/cudf_polars/containers/dataframe.py
index d1f7a9ed2cf..ec8d00c3123 100644
--- a/python/cudf_polars/cudf_polars/containers/dataframe.py
+++ b/python/cudf_polars/cudf_polars/containers/dataframe.py
@@ -70,7 +70,7 @@ def num_columns(self) -> int:
     @cached_property
     def num_rows(self) -> int:
         """Number of rows."""
-        return self.table.num_rows()
+        return 0 if len(self.columns) == 0 else self.table.num_rows()
 
     @classmethod
     def from_cudf(cls, df: cudf.DataFrame) -> Self:
diff --git a/python/cudf_polars/tests/expressions/test_len.py b/python/cudf_polars/tests/expressions/test_len.py
new file mode 100644
index 00000000000..03b30928184
--- /dev/null
+++ b/python/cudf_polars/tests/expressions/test_len.py
@@ -0,0 +1,26 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import pytest
+
+import polars as pl
+
+from cudf_polars.testing.asserts import assert_gpu_result_equal
+
+
+@pytest.mark.parametrize("dtype", [pl.UInt32, pl.Int32, None])
+@pytest.mark.parametrize("empty", [False, True])
+def test_len(dtype, empty):
+    if empty:
+        df = pl.LazyFrame({})
+    else:
+        df = pl.LazyFrame({"a": [1, 2, 3]})
+
+    if dtype is None:
+        q = df.select(pl.len())
+    else:
+        q = df.select(pl.len().cast(dtype))
+
+    # Workaround for https://github.com/pola-rs/polars/issues/16904
+    assert_gpu_result_equal(q, collect_kwargs={"projection_pushdown": False})

From af09d3e60e4ac4c86602e4e47e58cdb47a02b22c Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Thu, 13 Jun 2024 08:58:46 +0100
Subject: [PATCH 03/25] Raise early on unhandled PythonScan node (#15992)

Add test of the behaviour.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/15992
---
 python/cudf_polars/cudf_polars/dsl/ir.py     |  4 ++++
 python/cudf_polars/tests/test_python_scan.py | 20 ++++++++++++++++++++
 2 files changed, 24 insertions(+)
 create mode 100644 python/cudf_polars/tests/test_python_scan.py

diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 46241ab8e71..9fb2468e4e9 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -165,6 +165,10 @@ class PythonScan(IR):
     predicate: expr.NamedExpr | None
     """Filter to apply to the constructed dataframe before returning it."""
 
+    def __post_init__(self):
+        """Validate preconditions."""
+        raise NotImplementedError("PythonScan not implemented")
+
 
 @dataclasses.dataclass(slots=True)
 class Scan(IR):
diff --git a/python/cudf_polars/tests/test_python_scan.py b/python/cudf_polars/tests/test_python_scan.py
new file mode 100644
index 00000000000..c03474e3dc8
--- /dev/null
+++ b/python/cudf_polars/tests/test_python_scan.py
@@ -0,0 +1,20 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import pytest
+
+import polars as pl
+
+from cudf_polars import translate_ir
+
+
+def test_python_scan():
+    def source(with_columns, predicate, nrows):
+        return pl.DataFrame({"a": pl.Series([1, 2, 3], dtype=pl.Int8())})
+
+    q = pl.LazyFrame._scan_python_function({"a": pl.Int8}, source, pyarrow=False)
+    with pytest.raises(NotImplementedError):
+        _ = translate_ir(q._ldf.visit())
+
+    assert q.collect().equals(source(None, None, None))

From 246d017669cbeca3570106b4bb52a92f931ea2c1 Mon Sep 17 00:00:00 2001
From: brandon-b-miller <53796099+brandon-b-miller@users.noreply.github.com>
Date: Thu, 13 Jun 2024 09:33:43 -0500
Subject: [PATCH 04/25] Plumb pylibcudf strings `contains_re` through
 cudf_polars (#15918)

This PR adds cudf-polars code for evaluating the `StringFunction.Contains` expression node.

Depends on https://github.com/rapidsai/cudf/pull/15880/

Authors:
  - https://github.com/brandon-b-miller
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/15918
---
 python/cudf_polars/cudf_polars/dsl/expr.py | 51 ++++++++++++++++++
 python/cudf_polars/tests/test_string.py    | 61 ++++++++++++++++++++++
 2 files changed, 112 insertions(+)
 create mode 100644 python/cudf_polars/tests/test_string.py

diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index 298ef5ab070..03c1db68dbd 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -644,13 +644,28 @@ def __init__(
         self.options = options
         self.name = name
         self.children = children
+        self._validate_input()
+
+    def _validate_input(self):
         if self.name not in (
             pl_expr.StringFunction.Lowercase,
             pl_expr.StringFunction.Uppercase,
             pl_expr.StringFunction.EndsWith,
             pl_expr.StringFunction.StartsWith,
+            pl_expr.StringFunction.Contains,
         ):
             raise NotImplementedError(f"String function {self.name}")
+        if self.name == pl_expr.StringFunction.Contains:
+            literal, strict = self.options
+            if not literal:
+                if not strict:
+                    raise NotImplementedError(
+                        "f{strict=} is not supported for regex contains"
+                    )
+                if not isinstance(self.children[1], Literal):
+                    raise NotImplementedError(
+                        "Regex contains only supports a scalar pattern"
+                    )
 
     def do_evaluate(
         self,
@@ -660,6 +675,26 @@ def do_evaluate(
         mapping: Mapping[Expr, Column] | None = None,
     ) -> Column:
         """Evaluate this expression given a dataframe for context."""
+        if self.name == pl_expr.StringFunction.Contains:
+            child, arg = self.children
+            column = child.evaluate(df, context=context, mapping=mapping)
+
+            literal, _ = self.options
+            if literal:
+                pat = arg.evaluate(df, context=context, mapping=mapping)
+                pattern = (
+                    pat.obj_scalar
+                    if pat.is_scalar and pat.obj.size() != column.obj.size()
+                    else pat.obj
+                )
+                return Column(plc.strings.find.contains(column.obj, pattern))
+            else:
+                assert isinstance(arg, Literal)
+                prog = plc.strings.regex_program.RegexProgram.create(
+                    arg.value.as_py(),
+                    flags=plc.strings.regex_flags.RegexFlags.DEFAULT,
+                )
+                return Column(plc.strings.contains.contains_re(column.obj, prog))
         columns = [
             child.evaluate(df, context=context, mapping=mapping)
             for child in self.children
@@ -691,6 +726,22 @@ def do_evaluate(
                 )
             )
         else:
+            columns = [
+                child.evaluate(df, context=context, mapping=mapping)
+                for child in self.children
+            ]
+            if self.name == pl_expr.StringFunction.Lowercase:
+                (column,) = columns
+                return Column(plc.strings.case.to_lower(column.obj))
+            elif self.name == pl_expr.StringFunction.Uppercase:
+                (column,) = columns
+                return Column(plc.strings.case.to_upper(column.obj))
+            elif self.name == pl_expr.StringFunction.EndsWith:
+                column, suffix = columns
+                return Column(plc.strings.find.ends_with(column.obj, suffix.obj))
+            elif self.name == pl_expr.StringFunction.StartsWith:
+                column, suffix = columns
+                return Column(plc.strings.find.starts_with(column.obj, suffix.obj))
             raise NotImplementedError(
                 f"StringFunction {self.name}"
             )  # pragma: no cover; handled by init raising
diff --git a/python/cudf_polars/tests/test_string.py b/python/cudf_polars/tests/test_string.py
new file mode 100644
index 00000000000..f1a080d040f
--- /dev/null
+++ b/python/cudf_polars/tests/test_string.py
@@ -0,0 +1,61 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+from functools import partial
+
+import pytest
+
+import polars as pl
+
+from cudf_polars.callback import execute_with_cudf
+from cudf_polars.testing.asserts import assert_gpu_result_equal
+
+
+@pytest.fixture
+def ldf():
+    return pl.DataFrame(
+        {"a": ["AbC", "de", "FGHI", "j", "kLm", "nOPq", None, "RsT", None, "uVw"]}
+    ).lazy()
+
+
+@pytest.mark.parametrize(
+    "substr",
+    [
+        "A",
+        "de",
+        ".*",
+        "^a",
+        "^A",
+        "[^a-z]",
+        "[a-z]{3,}",
+        "^[A-Z]{2,}",
+        "j|u",
+    ],
+)
+def test_contains_regex(ldf, substr):
+    query = ldf.select(pl.col("a").str.contains(substr))
+    assert_gpu_result_equal(query)
+
+
+@pytest.mark.parametrize(
+    "literal", ["A", "de", "FGHI", "j", "kLm", "nOPq", "RsT", "uVw"]
+)
+def test_contains_literal(ldf, literal):
+    query = ldf.select(pl.col("a").str.contains(pl.lit(literal), literal=True))
+    assert_gpu_result_equal(query)
+
+
+def test_contains_column(ldf):
+    query = ldf.select(pl.col("a").str.contains(pl.col("a"), literal=True))
+    assert_gpu_result_equal(query)
+
+
+@pytest.mark.parametrize("pat", ["["])
+def test_contains_invalid(ldf, pat):
+    query = ldf.select(pl.col("a").str.contains(pat))
+
+    with pytest.raises(pl.exceptions.ComputeError):
+        query.collect()
+    with pytest.raises(pl.exceptions.ComputeError):
+        query.collect(post_opt_callback=partial(execute_with_cudf, raise_on_fail=True))

From f651f12471edda51bf4c4071d74ff6720bd037fc Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Thu, 13 Jun 2024 16:05:44 +0100
Subject: [PATCH 05/25] Port start of datetime.hpp to pylibcudf (#15916)

Start exposing datetime extraction functions.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/15916
---
 .../api_docs/pylibcudf/datetime.rst           |  6 ++++
 .../user_guide/api_docs/pylibcudf/index.rst   |  1 +
 .../cudf/cudf/_lib/pylibcudf/CMakeLists.txt   |  1 +
 python/cudf/cudf/_lib/pylibcudf/__init__.pxd  |  4 ++-
 python/cudf/cudf/_lib/pylibcudf/__init__.py   |  4 ++-
 python/cudf/cudf/_lib/pylibcudf/datetime.pxd  |  8 +++++
 python/cudf/cudf/_lib/pylibcudf/datetime.pyx  | 33 +++++++++++++++++++
 .../_lib/pylibcudf/libcudf/CMakeLists.txt     |  2 +-
 python/cudf/cudf/pylibcudf_tests/conftest.py  |  5 +++
 .../cudf/pylibcudf_tests/test_datetime.py     | 30 +++++++++++++++++
 .../cudf/cudf/pylibcudf_tests/test_round.py   |  9 ++---
 11 files changed, 93 insertions(+), 10 deletions(-)
 create mode 100644 docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst
 create mode 100644 python/cudf/cudf/_lib/pylibcudf/datetime.pxd
 create mode 100644 python/cudf/cudf/_lib/pylibcudf/datetime.pyx
 create mode 100644 python/cudf/cudf/pylibcudf_tests/test_datetime.py

diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst
new file mode 100644
index 00000000000..ebf5fab3052
--- /dev/null
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/datetime.rst
@@ -0,0 +1,6 @@
+=======
+copying
+=======
+
+.. automodule:: cudf._lib.pylibcudf.datetime
+   :members:
diff --git a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
index 1e03fa80bb5..f98298ff052 100644
--- a/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
+++ b/docs/cudf/source/user_guide/api_docs/pylibcudf/index.rst
@@ -14,6 +14,7 @@ This page provides API documentation for pylibcudf.
     column_factories
     concatenate
     copying
+    datetime
     filling
     gpumemoryview
     groupby
diff --git a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
index ed396208f98..0a198f431a7 100644
--- a/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/pylibcudf/CMakeLists.txt
@@ -19,6 +19,7 @@ set(cython_sources
     column_factories.pyx
     concatenate.pyx
     copying.pyx
+    datetime.pyx
     filling.pyx
     gpumemoryview.pyx
     groupby.pyx
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
index a628ecdb038..5131df9a5cd 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.pxd
@@ -7,6 +7,7 @@ from . cimport (
     column_factories,
     concatenate,
     copying,
+    datetime,
     filling,
     groupby,
     join,
@@ -40,9 +41,10 @@ __all__ = [
     "Table",
     "aggregation",
     "binaryop",
+    "column_factories",
     "concatenate",
     "copying",
-    "column_factories",
+    "datetime",
     "filling",
     "gpumemoryview",
     "groupby",
diff --git a/python/cudf/cudf/_lib/pylibcudf/__init__.py b/python/cudf/cudf/_lib/pylibcudf/__init__.py
index 46d0fe13cd1..43a9e2aca31 100644
--- a/python/cudf/cudf/_lib/pylibcudf/__init__.py
+++ b/python/cudf/cudf/_lib/pylibcudf/__init__.py
@@ -6,6 +6,7 @@
     column_factories,
     concatenate,
     copying,
+    datetime,
     filling,
     groupby,
     interop,
@@ -39,9 +40,10 @@
     "TypeId",
     "aggregation",
     "binaryop",
+    "column_factories",
     "concatenate",
     "copying",
-    "column_factories",
+    "datetime",
     "filling",
     "gpumemoryview",
     "groupby",
diff --git a/python/cudf/cudf/_lib/pylibcudf/datetime.pxd b/python/cudf/cudf/_lib/pylibcudf/datetime.pxd
new file mode 100644
index 00000000000..2fce48cf1b4
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/datetime.pxd
@@ -0,0 +1,8 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+from .column cimport Column
+
+
+cpdef Column extract_year(
+    Column col
+)
diff --git a/python/cudf/cudf/_lib/pylibcudf/datetime.pyx b/python/cudf/cudf/_lib/pylibcudf/datetime.pyx
new file mode 100644
index 00000000000..82351327de6
--- /dev/null
+++ b/python/cudf/cudf/_lib/pylibcudf/datetime.pyx
@@ -0,0 +1,33 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+from libcpp.memory cimport unique_ptr
+from libcpp.utility cimport move
+
+from cudf._lib.pylibcudf.libcudf.column.column cimport column
+from cudf._lib.pylibcudf.libcudf.datetime cimport (
+    extract_year as cpp_extract_year,
+)
+
+from .column cimport Column
+
+
+cpdef Column extract_year(
+    Column values
+):
+    """
+    Extract the year from a datetime column.
+
+    Parameters
+    ----------
+    values : Column
+        The column to extract the year from.
+
+    Returns
+    -------
+    Column
+        Column with the extracted years.
+    """
+    cdef unique_ptr[column] result
+
+    with nogil:
+        result = move(cpp_extract_year(values.view()))
+    return Column.from_libcudf(move(result))
diff --git a/python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt b/python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt
index ac56d42dda8..6c66d01ca57 100644
--- a/python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt
+++ b/python/cudf/cudf/_lib/pylibcudf/libcudf/CMakeLists.txt
@@ -12,7 +12,7 @@
 # the License.
 # =============================================================================
 
-set(cython_sources aggregation.pyx binaryop.pyx copying.pyx replace.pyx reduce.pxd round.pyx
+set(cython_sources aggregation.pyx binaryop.pyx copying.pyx reduce.pyx replace.pyx round.pyx
                    stream_compaction.pyx types.pyx unary.pyx
 )
 
diff --git a/python/cudf/cudf/pylibcudf_tests/conftest.py b/python/cudf/cudf/pylibcudf_tests/conftest.py
index f3c6584ef8c..b169bbdee5b 100644
--- a/python/cudf/cudf/pylibcudf_tests/conftest.py
+++ b/python/cudf/cudf/pylibcudf_tests/conftest.py
@@ -58,3 +58,8 @@ def interp_opt(request):
 )
 def sorted_opt(request):
     return request.param
+
+
+@pytest.fixture(scope="session", params=[False, True])
+def has_nulls(request):
+    return request.param
diff --git a/python/cudf/cudf/pylibcudf_tests/test_datetime.py b/python/cudf/cudf/pylibcudf_tests/test_datetime.py
new file mode 100644
index 00000000000..75af0fa6ca1
--- /dev/null
+++ b/python/cudf/cudf/pylibcudf_tests/test_datetime.py
@@ -0,0 +1,30 @@
+# Copyright (c) 2024, NVIDIA CORPORATION.
+
+import datetime
+
+import pyarrow as pa
+import pytest
+from utils import assert_column_eq
+
+import cudf._lib.pylibcudf as plc
+
+
+@pytest.fixture
+def column(has_nulls):
+    values = [
+        datetime.date(1999, 1, 1),
+        datetime.date(2024, 10, 12),
+        datetime.date(1, 1, 1),
+        datetime.date(9999, 1, 1),
+    ]
+    if has_nulls:
+        values[2] = None
+    return plc.interop.from_arrow(pa.array(values, type=pa.date32()))
+
+
+def test_extract_year(column):
+    got = plc.datetime.extract_year(column)
+    # libcudf produces an int16, arrow produces an int64
+    expect = pa.compute.year(plc.interop.to_arrow(column)).cast(pa.int16())
+
+    assert_column_eq(expect, got)
diff --git a/python/cudf/cudf/pylibcudf_tests/test_round.py b/python/cudf/cudf/pylibcudf_tests/test_round.py
index a234860477f..991e6ed310d 100644
--- a/python/cudf/cudf/pylibcudf_tests/test_round.py
+++ b/python/cudf/cudf/pylibcudf_tests/test_round.py
@@ -7,16 +7,11 @@
 import cudf._lib.pylibcudf as plc
 
 
-@pytest.fixture(params=[False, True])
-def nullable(request):
-    return request.param
-
-
 @pytest.fixture(params=["float32", "float64"])
-def column(request, nullable):
+def column(request, has_nulls):
     values = [2.5, 2.49, 1.6, 8, -1.5, -1.7, -0.5, 0.5]
     typ = {"float32": pa.float32(), "float64": pa.float64()}[request.param]
-    if nullable:
+    if has_nulls:
         values[2] = None
     return plc.interop.from_arrow(pa.array(values, type=typ))
 

From cb564da1204f0da7eaeb8a0e636a0f23c97c314f Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 13 Jun 2024 05:11:37 -1000
Subject: [PATCH 06/25] Move some misc Frame methods to appropriate locations
 (#15963)

* Move `Frame._is_sorted` to `MultiIndex._is_sorted` (the only class that uses this method)
* Move `_apply_inverse_column` helper function to define `Column.__invert__`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/15963
---
 python/cudf/cudf/core/column/column.py    |  5 ++
 python/cudf/cudf/core/column/numerical.py |  8 +++
 python/cudf/cudf/core/frame.py            | 61 +----------------------
 python/cudf/cudf/core/multiindex.py       | 49 +++++++++++++++++-
 4 files changed, 62 insertions(+), 61 deletions(-)

diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 001e8996c19..75fc31ddbce 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -1118,6 +1118,11 @@ def __cuda_array_interface__(self) -> abc.Mapping[str, Any]:
     def __array_ufunc__(self, ufunc, method, *inputs, **kwargs):
         return _array_ufunc(self, ufunc, method, inputs, kwargs)
 
+    def __invert__(self):
+        raise TypeError(
+            f"Operation `~` not supported on {self.dtype.type.__name__}"
+        )
+
     def searchsorted(
         self,
         value,
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 6fb4f17b76d..1952d7eeb71 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -194,6 +194,14 @@ def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase:
         unaryop = pylibcudf.unary.UnaryOperator[unaryop]
         return libcudf.unary.unary_operation(self, unaryop)
 
+    def __invert__(self):
+        if self.dtype.kind in "ui":
+            return self.unary_operator("invert")
+        elif self.dtype.kind == "b":
+            return self.unary_operator("not")
+        else:
+            return super().__invert__()
+
     def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
         int_float_dtype_mapping = {
             np.int8: np.float32,
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index af8886a44a6..01b56f1edc4 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -32,7 +32,7 @@
 import cudf
 from cudf import _lib as libcudf
 from cudf._typing import Dtype
-from cudf.api.types import is_bool_dtype, is_dtype_equal, is_scalar
+from cudf.api.types import is_dtype_equal, is_scalar
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column import (
     ColumnBase,
@@ -1455,51 +1455,6 @@ def _get_sorted_inds(
             stable=True,
         )
 
-    @_cudf_nvtx_annotate
-    def _is_sorted(self, ascending=None, null_position=None):
-        """
-        Returns a boolean indicating whether the data of the Frame are sorted
-        based on the parameters given. Does not account for the index.
-
-        Parameters
-        ----------
-        self : Frame
-            Frame whose columns are to be checked for sort order
-        ascending : None or list-like of booleans
-            None or list-like of boolean values indicating expected sort order
-            of each column. If list-like, size of list-like must be
-            len(columns). If None, all columns expected sort order is set to
-            ascending. False (0) - ascending, True (1) - descending.
-        null_position : None or list-like of booleans
-            None or list-like of boolean values indicating desired order of
-            nulls compared to other elements. If list-like, size of list-like
-            must be len(columns). If None, null order is set to before. False
-            (0) - before, True (1) - after.
-
-        Returns
-        -------
-        returns : boolean
-            Returns True, if sorted as expected by ``ascending`` and
-            ``null_position``, False otherwise.
-        """
-        if ascending is not None and not cudf.api.types.is_list_like(
-            ascending
-        ):
-            raise TypeError(
-                f"Expected a list-like or None for `ascending`, got "
-                f"{type(ascending)}"
-            )
-        if null_position is not None and not cudf.api.types.is_list_like(
-            null_position
-        ):
-            raise TypeError(
-                f"Expected a list-like or None for `null_position`, got "
-                f"{type(null_position)}"
-            )
-        return libcudf.sort.is_sorted(
-            [*self._columns], ascending=ascending, null_position=null_position
-        )
-
     @_cudf_nvtx_annotate
     def _split(self, splits):
         """Split a frame with split points in ``splits``. Returns a list of
@@ -1920,7 +1875,7 @@ def __invert__(self):
         """Bitwise invert (~) for integral dtypes, logical NOT for bools."""
         return self._from_data_like_self(
             self._data._from_columns_like_self(
-                (_apply_inverse_column(col) for col in self._data.columns)
+                (~col for col in self._data.columns)
             )
         )
 
@@ -1970,15 +1925,3 @@ def __dask_tokenize__(self):
             str(dict(self._dtypes)),
             normalize_token(self.to_pandas()),
         ]
-
-
-def _apply_inverse_column(col: ColumnBase) -> ColumnBase:
-    """Bitwise invert (~) for integral dtypes, logical NOT for bools."""
-    if np.issubdtype(col.dtype, np.integer):
-        return col.unary_operator("invert")
-    elif is_bool_dtype(col.dtype):
-        return col.unary_operator("not")
-    else:
-        raise TypeError(
-            f"Operation `~` not supported on {col.dtype.type.__name__}"
-        )
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 11b4b9154a2..6d3520e33cf 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1636,9 +1636,54 @@ def is_unique(self):
     def dtype(self):
         return np.dtype("O")
 
+    @_cudf_nvtx_annotate
+    def _is_sorted(self, ascending=None, null_position=None) -> bool:
+        """
+        Returns a boolean indicating whether the data of the MultiIndex are sorted
+        based on the parameters given. Does not account for the index.
+
+        Parameters
+        ----------
+        self : MultiIndex
+            MultiIndex whose columns are to be checked for sort order
+        ascending : None or list-like of booleans
+            None or list-like of boolean values indicating expected sort order
+            of each column. If list-like, size of list-like must be
+            len(columns). If None, all columns expected sort order is set to
+            ascending. False (0) - ascending, True (1) - descending.
+        null_position : None or list-like of booleans
+            None or list-like of boolean values indicating desired order of
+            nulls compared to other elements. If list-like, size of list-like
+            must be len(columns). If None, null order is set to before. False
+            (0) - before, True (1) - after.
+
+        Returns
+        -------
+        returns : boolean
+            Returns True, if sorted as expected by ``ascending`` and
+            ``null_position``, False otherwise.
+        """
+        if ascending is not None and not cudf.api.types.is_list_like(
+            ascending
+        ):
+            raise TypeError(
+                f"Expected a list-like or None for `ascending`, got "
+                f"{type(ascending)}"
+            )
+        if null_position is not None and not cudf.api.types.is_list_like(
+            null_position
+        ):
+            raise TypeError(
+                f"Expected a list-like or None for `null_position`, got "
+                f"{type(null_position)}"
+            )
+        return libcudf.sort.is_sorted(
+            [*self._columns], ascending=ascending, null_position=null_position
+        )
+
     @cached_property  # type: ignore
     @_cudf_nvtx_annotate
-    def is_monotonic_increasing(self):
+    def is_monotonic_increasing(self) -> bool:
         """
         Return if the index is monotonic increasing
         (only equal or increasing) values.
@@ -1647,7 +1692,7 @@ def is_monotonic_increasing(self):
 
     @cached_property  # type: ignore
     @_cudf_nvtx_annotate
-    def is_monotonic_decreasing(self):
+    def is_monotonic_decreasing(self) -> bool:
         """
         Return if the index is monotonic decreasing
         (only equal or decreasing) values.

From 3cb3df3255efaec4a5ebb6cb7606067f753e3554 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 13 Jun 2024 11:54:55 -0500
Subject: [PATCH 07/25] Add ability to enable rmm pool on `cudf.pandas` import
 (#15628)

This PR enables allocating of rmm memory pool on `cudf.pandas` import using the following environment variables:

```
export CUDF_PANDAS_RMM_MODE="pool"
```

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Mads R. B. Kristensen (https://github.com/madsbk)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/15628
---
 python/cudf/cudf/pandas/__init__.py           | 43 +++++++++++++++++++
 .../cudf_pandas_tests/test_cudf_pandas.py     | 28 ++++++++++++
 2 files changed, 71 insertions(+)

diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py
index 5b3785531d3..59a88f85dda 100644
--- a/python/cudf/cudf/pandas/__init__.py
+++ b/python/cudf/cudf/pandas/__init__.py
@@ -2,6 +2,9 @@
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
+
+import warnings
+
 from .fast_slow_proxy import is_proxy_object
 from .magics import load_ipython_extension
 from .profiler import Profiler
@@ -19,6 +22,46 @@ def install():
     loader = ModuleAccelerator.install("pandas", "cudf", "pandas")
     global LOADED
     LOADED = loader is not None
+    import os
+
+    if (rmm_mode := os.getenv("CUDF_PANDAS_RMM_MODE", None)) is not None:
+        import rmm.mr
+        from rmm.mr import available_device_memory
+
+        # Check if a non-default memory resource is set
+        current_mr = rmm.mr.get_current_device_resource()
+        if not isinstance(current_mr, rmm.mr.CudaMemoryResource):
+            warnings.warn(
+                f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={str(rmm_mode)}",
+                UserWarning,
+            )
+        free_memory, _ = available_device_memory()
+        free_memory = int(round(float(free_memory) * 0.80 / 256) * 256)
+
+        if rmm_mode == "cuda":
+            mr = rmm.mr.CudaMemoryResource()
+            rmm.mr.set_current_device_resource(mr)
+        elif rmm_mode == "pool":
+            rmm.mr.set_current_device_resource(
+                rmm.mr.PoolMemoryResource(
+                    rmm.mr.get_current_device_resource(),
+                    initial_pool_size=free_memory,
+                )
+            )
+        elif rmm_mode == "async":
+            mr = rmm.mr.CudaAsyncMemoryResource(initial_pool_size=free_memory)
+            rmm.mr.set_current_device_resource(mr)
+        elif rmm_mode == "managed":
+            mr = rmm.mr.ManagedMemoryResource()
+            rmm.mr.set_current_device_resource(mr)
+        elif rmm_mode == "managed_pool":
+            rmm.reinitialize(
+                managed_memory=True,
+                pool_allocator=True,
+                initial_pool_size=free_memory,
+            )
+        else:
+            raise TypeError(f"Unsupported rmm mode: {rmm_mode}")
 
 
 def pytest_load_initial_conftests(early_config, parser, args):
diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
index 515a4714a5a..c251e4a197e 100644
--- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
+++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
@@ -9,6 +9,7 @@
 import os
 import pathlib
 import pickle
+import subprocess
 import tempfile
 import types
 from io import BytesIO, StringIO
@@ -1425,6 +1426,33 @@ def test_holidays_within_dates(holiday, start, expected):
     ) == [utc.localize(dt) for dt in expected]
 
 
+@pytest.mark.parametrize(
+    "env_value",
+    ["", "cuda", "pool", "async", "managed", "managed_pool", "abc"],
+)
+def test_rmm_option_on_import(env_value):
+    data_directory = os.path.dirname(os.path.abspath(__file__))
+    # Create a copy of the current environment variables
+    env = os.environ.copy()
+    env["CUDF_PANDAS_RMM_MODE"] = env_value
+
+    sp_completed = subprocess.run(
+        [
+            "python",
+            "-m",
+            "cudf.pandas",
+            data_directory + "/data/profile_basic.py",
+        ],
+        capture_output=True,
+        text=True,
+        env=env,
+    )
+    if env_value in {"cuda", "pool", "async", "managed", "managed_pool"}:
+        assert sp_completed.returncode == 0
+    else:
+        assert sp_completed.returncode == 1
+
+
 def test_cudf_pandas_debugging_different_results(monkeypatch):
     cudf_mean = cudf.Series.mean
 

From 3f8f2149129f97947223611e2709d235e889389b Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Thu, 13 Jun 2024 17:04:45 -0500
Subject: [PATCH 08/25] Refactor rmm usage in `cudf.pandas` (#16021)

This PR addresses review comments made by @bdice here: https://github.com/rapidsai/cudf/pull/15628#pullrequestreview-2116067037

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16021
---
 python/cudf/cudf/pandas/__init__.py | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/python/cudf/cudf/pandas/__init__.py b/python/cudf/cudf/pandas/__init__.py
index 59a88f85dda..ff445a63f74 100644
--- a/python/cudf/cudf/pandas/__init__.py
+++ b/python/cudf/cudf/pandas/__init__.py
@@ -2,9 +2,11 @@
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
 
-
+import os
 import warnings
 
+import rmm.mr
+
 from .fast_slow_proxy import is_proxy_object
 from .magics import load_ipython_extension
 from .profiler import Profiler
@@ -22,12 +24,8 @@ def install():
     loader = ModuleAccelerator.install("pandas", "cudf", "pandas")
     global LOADED
     LOADED = loader is not None
-    import os
 
     if (rmm_mode := os.getenv("CUDF_PANDAS_RMM_MODE", None)) is not None:
-        import rmm.mr
-        from rmm.mr import available_device_memory
-
         # Check if a non-default memory resource is set
         current_mr = rmm.mr.get_current_device_resource()
         if not isinstance(current_mr, rmm.mr.CudaMemoryResource):
@@ -35,7 +33,7 @@ def install():
                 f"cudf.pandas detected an already configured memory resource, ignoring 'CUDF_PANDAS_RMM_MODE'={str(rmm_mode)}",
                 UserWarning,
             )
-        free_memory, _ = available_device_memory()
+        free_memory, _ = rmm.mr.available_device_memory()
         free_memory = int(round(float(free_memory) * 0.80 / 256) * 256)
 
         if rmm_mode == "cuda":
@@ -55,13 +53,13 @@ def install():
             mr = rmm.mr.ManagedMemoryResource()
             rmm.mr.set_current_device_resource(mr)
         elif rmm_mode == "managed_pool":
-            rmm.reinitialize(
-                managed_memory=True,
-                pool_allocator=True,
+            mr = rmm.mr.PoolMemoryResource(
+                rmm.mr.ManagedMemoryResource(),
                 initial_pool_size=free_memory,
             )
+            rmm.mr.set_current_device_resource(mr)
         else:
-            raise TypeError(f"Unsupported rmm mode: {rmm_mode}")
+            raise ValueError(f"Unsupported rmm mode: {rmm_mode}")
 
 
 def pytest_load_initial_conftests(early_config, parser, args):

From 31d909b0af9bcf9cf804ca1c3893ea71fbd5d765 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Thu, 13 Jun 2024 13:27:05 -1000
Subject: [PATCH 09/25] Support IntervalDtype in cudf.from_pandas (#16014)

Noticed while running the pandas test suite against `cudf.pandas`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Lawrence Mitchell (https://github.com/wence-)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

URL: https://github.com/rapidsai/cudf/pull/16014
---
 python/cudf/cudf/core/dataframe.py      | 6 +++---
 python/cudf/cudf/tests/test_interval.py | 7 +++++++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index e1b6cc45dd3..7438b0237d5 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -8072,11 +8072,11 @@ def from_pandas(obj, nan_as_null=no_default):
         return cudf.Index.from_pandas(obj, nan_as_null=nan_as_null)
     elif isinstance(obj, pd.CategoricalDtype):
         return cudf.CategoricalDtype.from_pandas(obj)
+    elif isinstance(obj, pd.IntervalDtype):
+        return cudf.IntervalDtype.from_pandas(obj)
     else:
         raise TypeError(
-            "from_pandas only accepts Pandas Dataframes, Series, "
-            "Index, RangeIndex and MultiIndex objects. "
-            "Got %s" % type(obj)
+            f"from_pandas unsupported for object of type {type(obj).__name__}"
         )
 
 
diff --git a/python/cudf/cudf/tests/test_interval.py b/python/cudf/cudf/tests/test_interval.py
index 7b923af1f75..013f4439ad5 100644
--- a/python/cudf/cudf/tests/test_interval.py
+++ b/python/cudf/cudf/tests/test_interval.py
@@ -181,3 +181,10 @@ def test_interval_with_datetime(tz, box):
     else:
         with pytest.raises(NotImplementedError):
             cudf.from_pandas(pobj)
+
+
+def test_from_pandas_intervaldtype():
+    dtype = pd.IntervalDtype("int64", closed="left")
+    result = cudf.from_pandas(dtype)
+    expected = cudf.IntervalDtype("int64", closed="left")
+    assert_eq(result, expected)

From 987879ca4bdcae0d959266fd39196123007fa45e Mon Sep 17 00:00:00 2001
From: Yunsong Wang <yunsongw@nvidia.com>
Date: Thu, 13 Jun 2024 19:27:11 -0700
Subject: [PATCH 10/25] Fix the pool size alignment issue (#16024)

This PR fixes a pool size alignment bug.

Authors:
  - Yunsong Wang (https://github.com/PointKernel)

Approvers:
  - Mark Harris (https://github.com/harrism)
  - Vukasin Milovanovic (https://github.com/vuule)
  - David Wendt (https://github.com/davidwendt)

URL: https://github.com/rapidsai/cudf/pull/16024
---
 cpp/src/utilities/pinned_memory.cpp | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/cpp/src/utilities/pinned_memory.cpp b/cpp/src/utilities/pinned_memory.cpp
index 5d2e3ac332a..e90b7969b4d 100644
--- a/cpp/src/utilities/pinned_memory.cpp
+++ b/cpp/src/utilities/pinned_memory.cpp
@@ -43,9 +43,11 @@ class fixed_pinned_pool_memory_resource {
 
  public:
   fixed_pinned_pool_memory_resource(size_t size)
-    : pool_size_{size}, pool_{new host_pooled_mr(upstream_mr_, size, size)}
+    :  // rmm requires the pool size to be a multiple of 256 bytes
+      pool_size_{rmm::align_up(size, rmm::CUDA_ALLOCATION_ALIGNMENT)},
+      pool_{new host_pooled_mr(upstream_mr_, pool_size_, pool_size_)}
   {
-    if (pool_size_ == 0) { return; }
+    CUDF_LOG_INFO("Pinned pool size = {}", pool_size_);
 
     // Allocate full size from the pinned pool to figure out the beginning and end address
     pool_begin_ = pool_->allocate_async(pool_size_, stream_);
@@ -145,12 +147,8 @@ CUDF_EXPORT rmm::host_device_async_resource_ref& make_default_pinned_mr(
       return std::min(total / 200, size_t{100} * 1024 * 1024);
     }();
 
-    // rmm requires the pool size to be a multiple of 256 bytes
-    auto const aligned_size = rmm::align_up(size, rmm::RMM_DEFAULT_HOST_ALIGNMENT);
-    CUDF_LOG_INFO("Pinned pool size = {}", aligned_size);
-
     // make the pool with max size equal to the initial size
-    return fixed_pinned_pool_memory_resource{aligned_size};
+    return fixed_pinned_pool_memory_resource{size};
   }();
 
   static rmm::host_device_async_resource_ref mr_ref{mr};

From 829b3a959cc5f0d41fe51dca9a4335dba0da69a5 Mon Sep 17 00:00:00 2001
From: Muhammad Haseeb <14217455+mhaseeb123@users.noreply.github.com>
Date: Thu, 13 Jun 2024 20:40:56 -0700
Subject: [PATCH 11/25] Fix the int32 overflow when computing page fragment
 sizes for large string columns (#16028)

This PR fixes the possible `int32` overflow when computing page fragment sizes for large (2B+ char) string columns.

Authors:
  - Muhammad Haseeb (https://github.com/mhaseeb123)

Approvers:
  - Vukasin Milovanovic (https://github.com/vuule)
  - Nghia Truong (https://github.com/ttnghia)

URL: https://github.com/rapidsai/cudf/pull/16028
---
 cpp/src/io/parquet/writer_impl.cu | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/cpp/src/io/parquet/writer_impl.cu b/cpp/src/io/parquet/writer_impl.cu
index 6d466748c17..ca15b532d07 100644
--- a/cpp/src/io/parquet/writer_impl.cu
+++ b/cpp/src/io/parquet/writer_impl.cu
@@ -1763,10 +1763,10 @@ auto convert_table_to_parquet_data(table_input_metadata& table_meta,
     // for multiple fragments per page to smooth things out. using 2 was too
     // unbalanced in final page sizes, so using 4 which seems to be a good
     // compromise at smoothing things out without getting fragment sizes too small.
-    auto frag_size_fn = [&](auto const& col, size_type col_size) {
+    auto frag_size_fn = [&](auto const& col, size_t col_size) {
       int const target_frags_per_page = is_col_fixed_width(col) ? 1 : 4;
       auto const avg_len =
-        target_frags_per_page * util::div_rounding_up_safe<size_type>(col_size, input.num_rows());
+        target_frags_per_page * util::div_rounding_up_safe<size_t>(col_size, input.num_rows());
       if (avg_len > 0) {
         auto const frag_size = util::div_rounding_up_safe<size_type>(max_page_size_bytes, avg_len);
         return std::min<size_type>(max_page_fragment_size, frag_size);

From 34227d3cb687d465f1d4a5f12cbb37a47b97866e Mon Sep 17 00:00:00 2001
From: Zach Puller <zach.puller@gmail.com>
Date: Thu, 13 Jun 2024 23:45:35 -0700
Subject: [PATCH 12/25] orc multithreaded benchmark (#16009)

Addresses: https://github.com/rapidsai/cudf/issues/15973

Adds multithreaded benchmarks for the ORC reader. Based off of the parquet equivalent in https://github.com/rapidsai/cudf/pull/15585

```
# Benchmark Results

## orc_multithreaded_read_decode_mixed

### [0] NVIDIA RTX 5880 Ada Generation

| cardinality | total_data_size | num_threads | num_cols | run_length | Samples | CPU Time  | Noise | GPU Time  | Noise | bytes_per_second | peak_memory_usage | encoded_file_size |
|-------------|-----------------|-------------|----------|------------|---------|-----------|-------|-----------|-------|------------------|-------------------|-------------------|
|        1000 |       536870912 |           1 |        4 |          8 |    338x | 44.348 ms | 1.18% | 44.343 ms | 1.18% |      12107185968 |       939.341 MiB |        39.557 MiB |
|        1000 |      1073741824 |           1 |        4 |          8 |     80x | 77.634 ms | 0.65% | 77.629 ms | 0.65% |      13831742649 |         1.834 GiB |        79.072 MiB |
|        1000 |       536870912 |           2 |        4 |          8 |    341x | 43.921 ms | 1.20% | 43.916 ms | 1.20% |      12224889363 |       825.333 MiB |        39.568 MiB |
|        1000 |      1073741824 |           2 |        4 |          8 |     80x | 75.418 ms | 0.70% | 75.414 ms | 0.70% |      14237999015 |         1.611 GiB |        79.113 MiB |
|        1000 |       536870912 |           4 |        4 |          8 |     80x | 42.682 ms | 1.18% | 42.678 ms | 1.18% |      12579566132 |       883.436 MiB |        39.587 MiB |
|        1000 |      1073741824 |           4 |        4 |          8 |      9x | 74.056 ms | 0.48% | 74.052 ms | 0.48% |      14499873867 |         1.724 GiB |        79.136 MiB |
|        1000 |       536870912 |           8 |        4 |          8 |     25x | 42.198 ms | 0.50% | 42.194 ms | 0.49% |      12723960975 |       940.562 MiB |        39.600 MiB |
|        1000 |      1073741824 |           8 |        4 |          8 |      8x | 73.933 ms | 0.49% | 73.929 ms | 0.49% |      14524042443 |         1.781 GiB |        79.175 MiB |

## orc_multithreaded_read_decode_fixed_width

### [0] NVIDIA RTX 5880 Ada Generation

| cardinality | total_data_size | num_threads | num_cols | run_length | Samples | CPU Time  | Noise | GPU Time  | Noise | bytes_per_second | peak_memory_usage | encoded_file_size |
|-------------|-----------------|-------------|----------|------------|---------|-----------|-------|-----------|-------|------------------|-------------------|-------------------|
|        1000 |       536870912 |           1 |        4 |          8 |     13x | 40.149 ms | 0.04% | 40.144 ms | 0.04% |      13373482726 |       643.390 MiB |        59.821 MiB |
|        1000 |      1073741824 |           1 |        4 |          8 |    211x | 71.216 ms | 0.67% | 71.211 ms | 0.67% |      15078297784 |         1.257 GiB |       119.650 MiB |
|        1000 |       536870912 |           2 |        4 |          8 |    378x | 39.662 ms | 1.31% | 39.658 ms | 1.31% |      13537590893 |       643.392 MiB |        59.833 MiB |
|        1000 |      1073741824 |           2 |        4 |          8 |    209x | 71.693 ms | 0.71% | 71.688 ms | 0.71% |      14978085376 |         1.257 GiB |       119.642 MiB |
|        1000 |       536870912 |           4 |        4 |          8 |    377x | 39.731 ms | 1.30% | 39.726 ms | 1.30% |      13514305239 |       643.394 MiB |        59.856 MiB |
|        1000 |      1073741824 |           4 |        4 |          8 |      8x | 70.766 ms | 0.08% | 70.761 ms | 0.08% |      15174115364 |         1.030 GiB |       119.665 MiB |
|        1000 |       536870912 |           8 |        4 |          8 |    379x | 39.486 ms | 1.27% | 39.482 ms | 1.27% |      13597888468 |       647.399 MiB |        59.928 MiB |
|        1000 |      1073741824 |           8 |        4 |          8 |    207x | 72.686 ms | 2.04% | 72.681 ms | 2.04% |      14773317833 |         1.143 GiB |       119.711 MiB |

## orc_multithreaded_read_decode_string

### [0] NVIDIA RTX 5880 Ada Generation

| cardinality | total_data_size | num_threads | num_cols | run_length | Samples | CPU Time  | Noise | GPU Time  | Noise | bytes_per_second | peak_memory_usage | encoded_file_size |
|-------------|-----------------|-------------|----------|------------|---------|-----------|-------|-----------|-------|------------------|-------------------|-------------------|
|        1000 |       536870912 |           1 |        4 |          8 |     80x | 22.933 ms | 2.13% | 22.928 ms | 2.13% |      23415352877 |       661.948 MiB |        10.879 MiB |
|        1000 |      1073741824 |           1 |        4 |          8 |    160x | 34.167 ms | 1.41% | 34.162 ms | 1.41% |      31430436877 |         1.293 GiB |        21.757 MiB |
|        1000 |       536870912 |           2 |        4 |          8 |    560x | 22.533 ms | 2.18% | 22.528 ms | 2.18% |      23830839172 |       609.407 MiB |        10.941 MiB |
|        1000 |      1073741824 |           2 |        4 |          8 |     80x | 34.311 ms | 1.54% | 34.307 ms | 1.54% |      31298288990 |         1.188 GiB |        21.758 MiB |
|        1000 |       536870912 |           4 |        4 |          8 |     23x | 22.179 ms | 0.11% | 22.175 ms | 0.11% |      24211151047 |       624.177 MiB |        10.947 MiB |
|        1000 |      1073741824 |           4 |        4 |          8 |     15x | 33.793 ms | 0.08% | 33.789 ms | 0.08% |      31777989791 |         1.190 GiB |        21.881 MiB |
|        1000 |       536870912 |           8 |        4 |          8 |    679x | 22.006 ms | 1.74% | 22.002 ms | 1.74% |      24401381631 |       624.524 MiB |        10.951 MiB |
|        1000 |      1073741824 |           8 |        4 |          8 |    160x | 33.320 ms | 1.57% | 33.316 ms | 1.57% |      32229227026 |         1.207 GiB |        21.894 MiB |

## orc_multithreaded_read_decode_list

### [0] NVIDIA RTX 5880 Ada Generation

| cardinality | total_data_size | num_threads | num_cols | run_length | Samples |  CPU Time  | Noise  |  GPU Time  | Noise  | bytes_per_second | peak_memory_usage | encoded_file_size |
|-------------|-----------------|-------------|----------|------------|---------|------------|--------|------------|--------|------------------|-------------------|-------------------|
|        1000 |       536870912 |           1 |        4 |          8 |     96x |  74.437 ms |  0.68% |  74.433 ms |  0.68% |       7212831148 |       600.751 MiB |        60.245 MiB |
|        1000 |      1073741824 |           1 |        4 |          8 |      7x |  80.994 ms |  0.49% |  80.990 ms |  0.49% |      13257745936 |         1.173 GiB |       120.549 MiB |
|        1000 |       536870912 |           2 |        4 |          8 |     80x |  79.234 ms |  4.57% |  79.229 ms |  4.57% |       6776190522 |       600.950 MiB |        60.250 MiB |
|        1000 |      1073741824 |           2 |        4 |          8 |    166x |  90.437 ms | 17.19% |  90.432 ms | 17.19% |      11873413959 |         1.173 GiB |       120.489 MiB |
|        1000 |       536870912 |           4 |        4 |          8 |     80x |  78.613 ms |  2.98% |  78.608 ms |  2.98% |       6829702014 |       602.764 MiB |        60.323 MiB |
|        1000 |      1073741824 |           4 |        4 |          8 |    127x | 118.629 ms | 22.67% | 118.624 ms | 22.67% |       9051644873 |         1.174 GiB |       120.499 MiB |
|        1000 |       536870912 |           8 |        4 |          8 |    112x | 133.950 ms |  4.45% | 133.945 ms |  4.45% |       4008135293 |       603.471 MiB |        60.353 MiB |
|        1000 |      1073741824 |           8 |        4 |          8 |     90x | 167.850 ms | 15.93% | 167.844 ms | 15.93% |       6397248426 |         1.177 GiB |       120.646 MiB |

## orc_multithreaded_read_decode_chunked_mixed

### [0] NVIDIA RTX 5880 Ada Generation

| cardinality | total_data_size | num_threads | num_cols | run_length | input_limit | output_limit | Samples | CPU Time  | Noise | GPU Time  | Noise | bytes_per_second | peak_memory_usage | encoded_file_size |
|-------------|-----------------|-------------|----------|------------|-------------|--------------|---------|-----------|-------|-----------|-------|------------------|-------------------|-------------------|
|        1000 |       536870912 |           1 |        4 |          8 |   671088640 |    671088640 |    333x | 45.009 ms | 1.10% | 45.005 ms | 1.10% |      11929261073 |       939.341 MiB |        39.557 MiB |
|        1000 |      1073741824 |           1 |        4 |          8 |   671088640 |    671088640 |     96x | 81.524 ms | 0.61% | 81.519 ms | 0.61% |      13171640865 |         1.834 GiB |        79.072 MiB |
|        1000 |       536870912 |           2 |        4 |          8 |   671088640 |    671088640 |    339x | 44.183 ms | 0.96% | 44.179 ms | 0.96% |      12152252271 |       825.333 MiB |        39.568 MiB |
|        1000 |      1073741824 |           2 |        4 |          8 |   671088640 |    671088640 |      7x | 79.051 ms | 0.02% | 79.046 ms | 0.02% |      13583676002 |         1.611 GiB |        79.113 MiB |
|        1000 |       536870912 |           4 |        4 |          8 |   671088640 |    671088640 |     12x | 43.276 ms | 0.09% | 43.272 ms | 0.09% |      12407024794 |       883.436 MiB |        39.587 MiB |
|        1000 |      1073741824 |           4 |        4 |          8 |   671088640 |    671088640 |     19x | 78.019 ms | 0.49% | 78.014 ms | 0.49% |      13763433041 |         1.724 GiB |        79.136 MiB |
|        1000 |       536870912 |           8 |        4 |          8 |   671088640 |    671088640 |     80x | 42.803 ms | 1.22% | 42.799 ms | 1.22% |      12543864010 |       911.993 MiB |        39.600 MiB |
|        1000 |      1073741824 |           8 |        4 |          8 |   671088640 |    671088640 |    193x | 77.856 ms | 0.59% | 77.852 ms | 0.59% |      13792063986 |         1.837 GiB |        79.175 MiB |

## orc_multithreaded_read_decode_chunked_fixed_width

### [0] NVIDIA RTX 5880 Ada Generation

| cardinality | total_data_size | num_threads | num_cols | run_length | input_limit | output_limit | Samples | CPU Time  | Noise | GPU Time  | Noise | bytes_per_second | peak_memory_usage | encoded_file_size |
|-------------|-----------------|-------------|----------|------------|-------------|--------------|---------|-----------|-------|-----------|-------|------------------|-------------------|-------------------|
|        1000 |       536870912 |           1 |        4 |          8 |   671088640 |    671088640 |    112x | 40.497 ms | 1.23% | 40.493 ms | 1.23% |      13258480947 |       643.390 MiB |        59.821 MiB |
|        1000 |      1073741824 |           1 |        4 |          8 |   671088640 |    671088640 |      7x | 75.440 ms | 0.09% | 75.435 ms | 0.09% |      14234033611 |         1.648 GiB |       119.651 MiB |
|        1000 |       536870912 |           2 |        4 |          8 |   671088640 |    671088640 |     80x | 39.793 ms | 1.36% | 39.789 ms | 1.36% |      13493067216 |       643.392 MiB |        59.833 MiB |
|        1000 |      1073741824 |           2 |        4 |          8 |   671088640 |    671088640 |     69x | 74.499 ms | 0.50% | 74.494 ms | 0.50% |      14413864845 |         1.336 GiB |       119.642 MiB |
|        1000 |       536870912 |           4 |        4 |          8 |   671088640 |    671088640 |    381x | 39.273 ms | 1.11% | 39.269 ms | 1.11% |      13671742653 |       643.394 MiB |        59.856 MiB |
|        1000 |      1073741824 |           4 |        4 |          8 |   671088640 |    671088640 |    204x | 73.755 ms | 0.60% | 73.751 ms | 0.60% |      14559012350 |         1.648 GiB |       119.665 MiB |
|        1000 |       536870912 |           8 |        4 |          8 |   671088640 |    671088640 |     80x | 39.490 ms | 1.31% | 39.486 ms | 1.31% |      13596333864 |       631.980 MiB |        59.928 MiB |
|        1000 |      1073741824 |           8 |        4 |          8 |   671088640 |    671088640 |    203x | 73.907 ms | 1.34% | 73.903 ms | 1.34% |      14529071322 |         1.454 GiB |       119.711 MiB |

## orc_multithreaded_read_decode_chunked_string

### [0] NVIDIA RTX 5880 Ada Generation

| cardinality | total_data_size | num_threads | num_cols | run_length | input_limit | output_limit | Samples | CPU Time  | Noise | GPU Time  | Noise | bytes_per_second | peak_memory_usage | encoded_file_size |
|-------------|-----------------|-------------|----------|------------|-------------|--------------|---------|-----------|-------|-----------|-------|------------------|-------------------|-------------------|
|        1000 |       536870912 |           1 |        4 |          8 |   671088640 |    671088640 |     80x | 23.022 ms | 1.96% | 23.017 ms | 1.96% |      23324556592 |       661.948 MiB |        10.879 MiB |
|        1000 |      1073741824 |           1 |        4 |          8 |   671088640 |    671088640 |     80x | 37.687 ms | 1.37% | 37.682 ms | 1.37% |      28494755419 |         1.659 GiB |        21.757 MiB |
|        1000 |       536870912 |           2 |        4 |          8 |   671088640 |    671088640 |     80x | 22.703 ms | 2.30% | 22.699 ms | 2.30% |      23652118769 |       609.407 MiB |        10.941 MiB |
|        1000 |      1073741824 |           2 |        4 |          8 |   671088640 |    671088640 |     80x | 37.581 ms | 1.42% | 37.577 ms | 1.42% |      28574723179 |         1.658 GiB |        21.758 MiB |
|        1000 |       536870912 |           4 |        4 |          8 |   671088640 |    671088640 |    544x | 22.296 ms | 1.56% | 22.293 ms | 1.56% |      24082840350 |       631.319 MiB |        10.947 MiB |
|        1000 |      1073741824 |           4 |        4 |          8 |   671088640 |    671088640 |     14x | 36.990 ms | 0.14% | 36.985 ms | 0.14% |      29031484389 |         1.554 GiB |        21.881 MiB |
|        1000 |       536870912 |           8 |        4 |          8 |   671088640 |    671088640 |    676x | 22.114 ms | 1.22% | 22.110 ms | 1.22% |      24281965280 |       627.616 MiB |        10.951 MiB |
|        1000 |      1073741824 |           8 |        4 |          8 |   671088640 |    671088640 |     80x | 37.409 ms | 1.40% | 37.405 ms | 1.40% |      28706077426 |         1.562 GiB |        21.894 MiB |

## orc_multithreaded_read_decode_chunked_list

### [0] NVIDIA RTX 5880 Ada Generation

| cardinality | total_data_size | num_threads | num_cols | run_length | input_limit | output_limit | Samples |  CPU Time  | Noise  |  GPU Time  | Noise  | bytes_per_second | peak_memory_usage | encoded_file_size |
|-------------|-----------------|-------------|----------|------------|-------------|--------------|---------|------------|--------|------------|--------|------------------|-------------------|-------------------|
|        1000 |       536870912 |           1 |        4 |          8 |   671088640 |    671088640 |     80x |  74.780 ms |  0.67% |  74.776 ms |  0.67% |       7179747067 |       600.751 MiB |        60.245 MiB |
|        1000 |      1073741824 |           1 |        4 |          8 |   671088640 |    671088640 |    175x |  86.040 ms |  0.56% |  86.035 ms |  0.56% |      12480222210 |         1.576 GiB |       120.549 MiB |
|        1000 |       536870912 |           2 |        4 |          8 |   671088640 |    671088640 |    186x |  80.668 ms |  4.14% |  80.664 ms |  4.14% |       6655685080 |       600.951 MiB |        60.250 MiB |
|        1000 |      1073741824 |           2 |        4 |          8 |   671088640 |    671088640 |    143x | 105.217 ms | 21.56% | 105.212 ms | 21.56% |      10205531345 |         1.576 GiB |       120.489 MiB |
|        1000 |       536870912 |           4 |        4 |          8 |   671088640 |    671088640 |    128x |  80.087 ms |  3.05% |  80.082 ms |  3.05% |       6704042147 |       602.764 MiB |        60.323 MiB |
|        1000 |      1073741824 |           4 |        4 |          8 |   671088640 |    671088640 |    135x | 111.556 ms | 21.88% | 111.551 ms | 21.88% |       9625546746 |         1.489 GiB |       120.499 MiB |
|        1000 |       536870912 |           8 |        4 |          8 |   671088640 |    671088640 |    112x | 134.677 ms |  4.14% | 134.672 ms |  4.14% |       3986513604 |       603.471 MiB |        60.353 MiB |
|        1000 |      1073741824 |           8 |        4 |          8 |   671088640 |    671088640 |     80x | 178.735 ms | 14.17% | 178.730 ms | 14.17% |       6007630497 |         1.520 GiB |       120.646 MiB |

```

Authors:
  - Zach Puller (https://github.com/zpuller)
  - Vukasin Milovanovic (https://github.com/vuule)
  - MithunR (https://github.com/mythrocks)

Approvers:
  - Yunsong Wang (https://github.com/PointKernel)
  - MithunR (https://github.com/mythrocks)

URL: https://github.com/rapidsai/cudf/pull/16009
---
 cpp/benchmarks/CMakeLists.txt                 |   5 +
 .../io/orc/orc_reader_multithreaded.cpp       | 335 ++++++++++++++++++
 2 files changed, 340 insertions(+)
 create mode 100644 cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp

diff --git a/cpp/benchmarks/CMakeLists.txt b/cpp/benchmarks/CMakeLists.txt
index 49504e53424..8a48126e195 100644
--- a/cpp/benchmarks/CMakeLists.txt
+++ b/cpp/benchmarks/CMakeLists.txt
@@ -267,6 +267,11 @@ ConfigureNVBench(PARQUET_MULTITHREAD_READER_NVBENCH io/parquet/parquet_reader_mu
 # * orc reader benchmark --------------------------------------------------------------------------
 ConfigureNVBench(ORC_READER_NVBENCH io/orc/orc_reader_input.cpp io/orc/orc_reader_options.cpp)
 
+# ##################################################################################################
+# * orc multithreaded benchmark
+# --------------------------------------------------------------------------
+ConfigureNVBench(ORC_MULTITHREADED_NVBENCH io/orc/orc_reader_multithreaded.cpp)
+
 # ##################################################################################################
 # * csv reader benchmark --------------------------------------------------------------------------
 ConfigureNVBench(CSV_READER_NVBENCH io/csv/csv_reader_input.cpp io/csv/csv_reader_options.cpp)
diff --git a/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp b/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp
new file mode 100644
index 00000000000..ffbbc6f8464
--- /dev/null
+++ b/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) 2024, NVIDIA CORPORATION.
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include <benchmarks/common/generate_input.hpp>
+#include <benchmarks/fixture/benchmark_fixture.hpp>
+#include <benchmarks/io/cuio_common.hpp>
+#include <benchmarks/io/nvbench_helpers.hpp>
+
+#include <cudf/detail/nvtx/ranges.hpp>
+#include <cudf/detail/utilities/stream_pool.hpp>
+#include <cudf/io/orc.hpp>
+#include <cudf/utilities/default_stream.hpp>
+#include <cudf/utilities/pinned_memory.hpp>
+#include <cudf/utilities/thread_pool.hpp>
+
+#include <nvbench/nvbench.cuh>
+
+#include <vector>
+
+size_t get_num_read_threads(nvbench::state const& state) { return state.get_int64("num_threads"); }
+
+size_t get_read_size(nvbench::state const& state)
+{
+  auto const num_reads = get_num_read_threads(state);
+  return state.get_int64("total_data_size") / num_reads;
+}
+
+std::string get_label(std::string const& test_name, nvbench::state const& state)
+{
+  auto const num_cols       = state.get_int64("num_cols");
+  size_t const read_size_mb = get_read_size(state) / (1024 * 1024);
+  return {test_name + ", " + std::to_string(num_cols) + " columns, " +
+          std::to_string(get_num_read_threads(state)) + " threads " + " (" +
+          std::to_string(read_size_mb) + " MB each)"};
+}
+
+std::tuple<std::vector<cuio_source_sink_pair>, size_t, size_t> write_file_data(
+  nvbench::state& state, std::vector<cudf::type_id> const& d_types)
+{
+  auto const cardinality = state.get_int64("cardinality");
+  auto const run_length  = state.get_int64("run_length");
+  auto const num_cols    = state.get_int64("num_cols");
+  size_t const num_files            = get_num_read_threads(state);
+  size_t const per_file_data_size   = get_read_size(state);
+
+  std::vector<cuio_source_sink_pair> source_sink_vector;
+
+  size_t total_file_size = 0;
+
+  for (size_t i = 0; i < num_files; ++i) {
+    cuio_source_sink_pair source_sink{io_type::HOST_BUFFER};
+
+    auto const tbl = create_random_table(
+      cycle_dtypes(d_types, num_cols),
+      table_size_bytes{per_file_data_size},
+      data_profile_builder().cardinality(cardinality).avg_run_length(run_length));
+    auto const view = tbl->view();
+
+    cudf::io::orc_writer_options const write_opts =
+      cudf::io::orc_writer_options::builder(source_sink.make_sink_info(), view)
+        .compression(cudf::io::compression_type::SNAPPY);
+
+    cudf::io::write_orc(write_opts);
+    total_file_size += source_sink.size();
+
+    source_sink_vector.push_back(std::move(source_sink));
+  }
+
+  return {std::move(source_sink_vector), total_file_size, num_files};
+}
+
+void BM_orc_multithreaded_read_common(nvbench::state& state,
+                                      std::vector<cudf::type_id> const& d_types,
+                                      std::string const& label)
+{
+  auto const data_size = state.get_int64("total_data_size");
+  auto const num_threads = state.get_int64("num_threads");
+
+  auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads);
+  cudf::detail::thread_pool threads(num_threads);
+
+  auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types);
+  std::vector<cudf::io::source_info> source_info_vector;
+  std::transform(source_sink_vector.begin(),
+                 source_sink_vector.end(),
+                 std::back_inserter(source_info_vector),
+                 [](auto& source_sink) { return source_sink.make_source_info(); });
+
+  auto mem_stats_logger = cudf::memory_stats_logger();
+
+  {
+    cudf::scoped_range range{("(read) " + label).c_str()};
+    state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
+              [&](nvbench::launch& launch, auto& timer) {
+                auto read_func = [&](int index) {
+                  auto const stream = streams[index % num_threads];
+                  cudf::io::orc_reader_options read_opts =
+                    cudf::io::orc_reader_options::builder(source_info_vector[index]);
+                  cudf::io::read_orc(read_opts, stream, rmm::mr::get_current_device_resource());
+                };
+
+                threads.paused = true;
+                for (size_t i = 0; i < num_files; ++i) {
+                  threads.submit(read_func, i);
+                }
+                timer.start();
+                threads.paused = false;
+                threads.wait_for_tasks();
+                cudf::detail::join_streams(streams, cudf::get_default_stream());
+                timer.stop();
+              });
+  }
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(total_file_size, "encoded_file_size", "encoded_file_size");
+}
+
+void BM_orc_multithreaded_read_mixed(nvbench::state& state)
+{
+  auto label = get_label("mixed", state);
+  cudf::scoped_range range{label.c_str()};
+  BM_orc_multithreaded_read_common(
+    state, {cudf::type_id::INT32, cudf::type_id::DECIMAL64, cudf::type_id::STRING}, label);
+}
+
+void BM_orc_multithreaded_read_fixed_width(nvbench::state& state)
+{
+  auto label = get_label("fixed width", state);
+  cudf::scoped_range range{label.c_str()};
+  BM_orc_multithreaded_read_common(state, {cudf::type_id::INT32}, label);
+}
+
+void BM_orc_multithreaded_read_string(nvbench::state& state)
+{
+  auto label = get_label("string", state);
+  cudf::scoped_range range{label.c_str()};
+  BM_orc_multithreaded_read_common(state, {cudf::type_id::STRING}, label);
+}
+
+void BM_orc_multithreaded_read_list(nvbench::state& state)
+{
+  auto label = get_label("list", state);
+  cudf::scoped_range range{label.c_str()};
+  BM_orc_multithreaded_read_common(state, {cudf::type_id::LIST}, label);
+}
+
+void BM_orc_multithreaded_read_chunked_common(nvbench::state& state,
+                                              std::vector<cudf::type_id> const& d_types,
+                                              std::string const& label)
+{
+  size_t const data_size    = state.get_int64("total_data_size");
+  auto const num_threads    = state.get_int64("num_threads");
+  size_t const input_limit  = state.get_int64("input_limit");
+  size_t const output_limit = state.get_int64("output_limit");
+
+  auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads);
+  cudf::detail::thread_pool threads(num_threads);
+  auto [source_sink_vector, total_file_size, num_files] = write_file_data(state, d_types);
+  std::vector<cudf::io::source_info> source_info_vector;
+  std::transform(source_sink_vector.begin(),
+                 source_sink_vector.end(),
+                 std::back_inserter(source_info_vector),
+                 [](auto& source_sink) { return source_sink.make_source_info(); });
+
+  auto mem_stats_logger = cudf::memory_stats_logger();
+
+  {
+    cudf::scoped_range range{("(read) " + label).c_str()};
+    std::vector<cudf::io::table_with_metadata> chunks;
+    state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
+              [&](nvbench::launch& launch, auto& timer) {
+                auto read_func = [&](int index) {
+                  auto const stream = streams[index % num_threads];
+                  cudf::io::orc_reader_options read_opts =
+                    cudf::io::orc_reader_options::builder(source_info_vector[index]);
+                  // divide chunk limits by number of threads so the number of chunks produced is the
+                  // same for all cases. this seems better than the alternative, which is to keep the
+                  // limits the same. if we do that, as the number of threads goes up, the number of
+                  // chunks goes down - so are actually benchmarking the same thing in that case?
+                  auto reader = cudf::io::chunked_orc_reader(
+                    output_limit / num_threads, input_limit / num_threads, read_opts, stream);
+
+                  // read all the chunks
+                  do {
+                    auto table = reader.read_chunk();
+                  } while (reader.has_next());
+                };
+
+                threads.paused = true;
+                for (size_t i = 0; i < num_files; ++i) {
+                  threads.submit(read_func, i);
+                }
+                timer.start();
+                threads.paused = false;
+                threads.wait_for_tasks();
+                cudf::detail::join_streams(streams, cudf::get_default_stream());
+                timer.stop();
+              });
+  }
+
+  auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
+  state.add_element_count(static_cast<double>(data_size) / time, "bytes_per_second");
+  state.add_buffer_size(
+    mem_stats_logger.peak_memory_usage(), "peak_memory_usage", "peak_memory_usage");
+  state.add_buffer_size(total_file_size, "encoded_file_size", "encoded_file_size");
+}
+
+void BM_orc_multithreaded_read_chunked_mixed(nvbench::state& state)
+{
+  auto label = get_label("mixed", state);
+  cudf::scoped_range range{label.c_str()};
+  BM_orc_multithreaded_read_chunked_common(
+    state, {cudf::type_id::INT32, cudf::type_id::DECIMAL64, cudf::type_id::STRING}, label);
+}
+
+void BM_orc_multithreaded_read_chunked_fixed_width(nvbench::state& state)
+{
+  auto label = get_label("fixed width", state);
+  cudf::scoped_range range{label.c_str()};
+  BM_orc_multithreaded_read_chunked_common(state, {cudf::type_id::INT32}, label);
+}
+
+void BM_orc_multithreaded_read_chunked_string(nvbench::state& state)
+{
+  auto label = get_label("string", state);
+  cudf::scoped_range range{label.c_str()};
+  BM_orc_multithreaded_read_chunked_common(state, {cudf::type_id::STRING}, label);
+}
+
+void BM_orc_multithreaded_read_chunked_list(nvbench::state& state)
+{
+  auto label = get_label("list", state);
+  cudf::scoped_range range{label.c_str()};
+  BM_orc_multithreaded_read_chunked_common(state, {cudf::type_id::LIST}, label);
+}
+auto const thread_range  = std::vector<nvbench::int64_t>{1, 2, 4, 8};
+auto const total_data_size = std::vector<nvbench::int64_t>{512 * 1024 * 1024, 1024 * 1024 * 1024};
+
+// mixed data types: fixed width and strings
+NVBENCH_BENCH(BM_orc_multithreaded_read_mixed)
+  .set_name("orc_multithreaded_read_decode_mixed")
+  .set_min_samples(4)
+  .add_int64_axis("cardinality", {1000})
+  .add_int64_axis("total_data_size", total_data_size)
+  .add_int64_axis("num_threads", thread_range)
+  .add_int64_axis("num_cols", {4})
+  .add_int64_axis("run_length", {8});
+
+NVBENCH_BENCH(BM_orc_multithreaded_read_fixed_width)
+  .set_name("orc_multithreaded_read_decode_fixed_width")
+  .set_min_samples(4)
+  .add_int64_axis("cardinality", {1000})
+  .add_int64_axis("total_data_size", total_data_size)
+  .add_int64_axis("num_threads", thread_range)
+  .add_int64_axis("num_cols", {4})
+  .add_int64_axis("run_length", {8});
+
+NVBENCH_BENCH(BM_orc_multithreaded_read_string)
+  .set_name("orc_multithreaded_read_decode_string")
+  .set_min_samples(4)
+  .add_int64_axis("cardinality", {1000})
+  .add_int64_axis("total_data_size", total_data_size)
+  .add_int64_axis("num_threads", thread_range)
+  .add_int64_axis("num_cols", {4})
+  .add_int64_axis("run_length", {8});
+
+NVBENCH_BENCH(BM_orc_multithreaded_read_list)
+  .set_name("orc_multithreaded_read_decode_list")
+  .set_min_samples(4)
+  .add_int64_axis("cardinality", {1000})
+  .add_int64_axis("total_data_size", total_data_size)
+  .add_int64_axis("num_threads", thread_range)
+  .add_int64_axis("num_cols", {4})
+  .add_int64_axis("run_length", {8});
+
+// mixed data types: fixed width, strings
+NVBENCH_BENCH(BM_orc_multithreaded_read_chunked_mixed)
+  .set_name("orc_multithreaded_read_decode_chunked_mixed")
+  .set_min_samples(4)
+  .add_int64_axis("cardinality", {1000})
+  .add_int64_axis("total_data_size", total_data_size)
+  .add_int64_axis("num_threads", thread_range)
+  .add_int64_axis("num_cols", {4})
+  .add_int64_axis("run_length", {8})
+  .add_int64_axis("input_limit", {640 * 1024 * 1024})
+  .add_int64_axis("output_limit", {640 * 1024 * 1024});
+
+NVBENCH_BENCH(BM_orc_multithreaded_read_chunked_fixed_width)
+  .set_name("orc_multithreaded_read_decode_chunked_fixed_width")
+  .set_min_samples(4)
+  .add_int64_axis("cardinality", {1000})
+  .add_int64_axis("total_data_size", total_data_size)
+  .add_int64_axis("num_threads", thread_range)
+  .add_int64_axis("num_cols", {4})
+  .add_int64_axis("run_length", {8})
+  .add_int64_axis("input_limit", {640 * 1024 * 1024})
+  .add_int64_axis("output_limit", {640 * 1024 * 1024});
+
+NVBENCH_BENCH(BM_orc_multithreaded_read_chunked_string)
+  .set_name("orc_multithreaded_read_decode_chunked_string")
+  .set_min_samples(4)
+  .add_int64_axis("cardinality", {1000})
+  .add_int64_axis("total_data_size", total_data_size)
+  .add_int64_axis("num_threads", thread_range)
+  .add_int64_axis("num_cols", {4})
+  .add_int64_axis("run_length", {8})
+  .add_int64_axis("input_limit", {640 * 1024 * 1024})
+  .add_int64_axis("output_limit", {640 * 1024 * 1024});
+
+NVBENCH_BENCH(BM_orc_multithreaded_read_chunked_list)
+  .set_name("orc_multithreaded_read_decode_chunked_list")
+  .set_min_samples(4)
+  .add_int64_axis("cardinality", {1000})
+  .add_int64_axis("total_data_size", total_data_size)
+  .add_int64_axis("num_threads", thread_range)
+  .add_int64_axis("num_cols", {4})
+  .add_int64_axis("run_length", {8})
+  .add_int64_axis("input_limit", {640 * 1024 * 1024})
+  .add_int64_axis("output_limit", {640 * 1024 * 1024});

From 24fe359425b080594b05bab040699a1468483474 Mon Sep 17 00:00:00 2001
From: Robert Maynard <rmaynard@nvidia.com>
Date: Fri, 14 Jun 2024 09:35:13 -0400
Subject: [PATCH 13/25] Remove CCCL 2.2 patches as we now always use 2.5+
 (#15969)

Now that https://github.com/rapidsai/rapids-cmake/pull/607 has been merged we can drop support for patching CCCL 2.2

Authors:
  - Robert Maynard (https://github.com/robertmaynard)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Paul Taylor (https://github.com/trxcllnt)

URL: https://github.com/rapidsai/cudf/pull/15969
---
 .../thirdparty/patches/cccl_override.json     | 35 --------------
 .../patches/revert_pr_211_cccl_2.5.0.diff     | 47 -------------------
 .../thrust_disable_64bit_dispatching.diff     | 38 +++++++--------
 ..._disable_64bit_dispatching_cccl_2.5.0.diff | 25 ----------
 .../thrust_faster_scan_compile_times.diff     | 30 ++++++------
 ..._faster_scan_compile_times_cccl_2.5.0.diff | 39 ---------------
 .../thrust_faster_sort_compile_times.diff     | 32 ++++++-------
 ..._faster_sort_compile_times_cccl_2.5.0.diff | 39 ---------------
 8 files changed, 50 insertions(+), 235 deletions(-)
 delete mode 100644 cpp/cmake/thirdparty/patches/revert_pr_211_cccl_2.5.0.diff
 delete mode 100644 cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching_cccl_2.5.0.diff
 delete mode 100644 cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times_cccl_2.5.0.diff
 delete mode 100644 cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times_cccl_2.5.0.diff

diff --git a/cpp/cmake/thirdparty/patches/cccl_override.json b/cpp/cmake/thirdparty/patches/cccl_override.json
index 059f713e7a5..e61102dffac 100644
--- a/cpp/cmake/thirdparty/patches/cccl_override.json
+++ b/cpp/cmake/thirdparty/patches/cccl_override.json
@@ -3,60 +3,25 @@
   "packages" : {
     "CCCL" : {
       "patches" : [
-        {
-          "file" : "cccl/bug_fixes.diff",
-          "issue" : "CCCL installs header-search.cmake files in nondeterministic order and has a typo in checking target creation that leads to duplicates",
-          "fixed_in" : "2.3"
-        },
-        {
-          "file" : "cccl/hide_kernels.diff",
-          "issue" : "Mark all cub and thrust kernels with hidden visibility [https://github.com/nvidia/cccl/pulls/443]",
-          "fixed_in" : "2.3"
-        },
         {
           "file" : "cccl/revert_pr_211.diff",
           "issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue.",
           "fixed_in" : ""
         },
-        {
-          "file" : "${current_json_dir}/revert_pr_211_cccl_2.5.0.diff",
-          "issue" : "thrust::copy introduced a change in behavior that causes failures with cudaErrorInvalidValue.",
-          "fixed_in" : ""
-        },
-        {
-          "file": "cccl/kernel_pointer_hiding.diff",
-          "issue": "Hide APIs that accept kernel pointers [https://github.com/NVIDIA/cccl/pull/1395]",
-          "fixed_in": "2.4"
-        },
         {
           "file" : "${current_json_dir}/thrust_disable_64bit_dispatching.diff",
           "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
           "fixed_in" : ""
         },
-        {
-          "file" : "${current_json_dir}/thrust_disable_64bit_dispatching_cccl_2.5.0.diff",
-          "issue" : "Remove 64bit dispatching as not needed by libcudf and results in compiling twice as many kernels [https://github.com/rapidsai/cudf/pull/11437]",
-          "fixed_in" : ""
-        },
         {
           "file" : "${current_json_dir}/thrust_faster_sort_compile_times.diff",
           "issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]",
           "fixed_in" : ""
         },
-        {
-          "file" : "${current_json_dir}/thrust_faster_sort_compile_times_cccl_2.5.0.diff",
-          "issue" : "Improve Thrust sort compile times by not unrolling loops for inlined comparators [https://github.com/rapidsai/cudf/pull/10577]",
-          "fixed_in" : ""
-        },
         {
           "file" : "${current_json_dir}/thrust_faster_scan_compile_times.diff",
           "issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]",
           "fixed_in" : ""
-        },
-        {
-          "file" : "${current_json_dir}/thrust_faster_scan_compile_times_cccl_2.5.0.diff",
-          "issue" : "Improve Thrust scan compile times by reducing the number of kernels generated [https://github.com/rapidsai/cudf/pull/8183]",
-          "fixed_in" : ""
         }
       ]
     }
diff --git a/cpp/cmake/thirdparty/patches/revert_pr_211_cccl_2.5.0.diff b/cpp/cmake/thirdparty/patches/revert_pr_211_cccl_2.5.0.diff
deleted file mode 100644
index 27ff16744f5..00000000000
--- a/cpp/cmake/thirdparty/patches/revert_pr_211_cccl_2.5.0.diff
+++ /dev/null
@@ -1,47 +0,0 @@
-diff --git a/thrust/thrust/system/cuda/detail/internal/copy_device_to_device.h b/thrust/thrust/system/cuda/detail/internal/copy_device_to_device.h
-index 046eb83c0..8047c9701 100644
---- a/thrust/thrust/system/cuda/detail/internal/copy_device_to_device.h
-+++ b/thrust/thrust/system/cuda/detail/internal/copy_device_to_device.h
-@@ -53,41 +53,15 @@ namespace cuda_cub
- 
- namespace __copy
- {
--template <class Derived, class InputIt, class OutputIt>
--OutputIt THRUST_RUNTIME_FUNCTION device_to_device(
--  execution_policy<Derived>& policy, InputIt first, InputIt last, OutputIt result, thrust::detail::true_type)
--{
--  typedef typename thrust::iterator_traits<InputIt>::value_type InputTy;
--  const auto n = thrust::distance(first, last);
--  if (n > 0)
--  {
--    cudaError status;
--    status = trivial_copy_device_to_device(
--      policy,
--      reinterpret_cast<InputTy*>(thrust::raw_pointer_cast(&*result)),
--      reinterpret_cast<InputTy const*>(thrust::raw_pointer_cast(&*first)),
--      n);
--    cuda_cub::throw_on_error(status, "__copy:: D->D: failed");
--  }
--
--  return result + n;
--}
- 
- template <class Derived, class InputIt, class OutputIt>
- OutputIt THRUST_RUNTIME_FUNCTION device_to_device(
--  execution_policy<Derived>& policy, InputIt first, InputIt last, OutputIt result, thrust::detail::false_type)
-+  execution_policy<Derived>& policy, InputIt first, InputIt last, OutputIt result)
- {
-   typedef typename thrust::iterator_traits<InputIt>::value_type InputTy;
-   return cuda_cub::transform(policy, first, last, result, thrust::identity<InputTy>());
- }
- 
--template <class Derived, class InputIt, class OutputIt>
--OutputIt THRUST_RUNTIME_FUNCTION
--device_to_device(execution_policy<Derived>& policy, InputIt first, InputIt last, OutputIt result)
--{
--  return device_to_device(
--    policy, first, last, result, typename is_indirectly_trivially_relocatable_to<InputIt, OutputIt>::type());
--}
- } // namespace __copy
- 
- } // namespace cuda_cub
diff --git a/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff
index d3f1a26781f..6ae1e1c917b 100644
--- a/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff
+++ b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching.diff
@@ -1,25 +1,25 @@
 diff --git a/thrust/thrust/system/cuda/detail/dispatch.h b/thrust/thrust/system/cuda/detail/dispatch.h
-index d0e3f94ec..5c32a9c60 100644
+index 2a3cc4e33..8fb337b26 100644
 --- a/thrust/thrust/system/cuda/detail/dispatch.h
 +++ b/thrust/thrust/system/cuda/detail/dispatch.h
-@@ -32,8 +32,7 @@
-         status = call arguments; \
-     } \
-     else { \
--        auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
--        status = call arguments; \
-+        throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
-     }
-
+@@ -44,8 +44,7 @@
+   }                                                                                   \
+   else                                                                                \
+   {                                                                                   \
+-    auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
+-    status                             = call arguments;                              \
++    throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
+   }
+ 
  /**
-@@ -52,9 +51,7 @@
-         status = call arguments; \
-     } \
-     else { \
--        auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1); \
--        auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2); \
--        status = call arguments; \
-+        throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
-     }
+@@ -66,9 +65,7 @@
+   }                                                                                          \
+   else                                                                                       \
+   {                                                                                          \
+-    auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1);      \
+-    auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2);      \
+-    status                              = call arguments;                                    \
++    throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
+   }
  /**
   * Dispatch between 32-bit and 64-bit index based versions of the same algorithm
diff --git a/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching_cccl_2.5.0.diff b/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching_cccl_2.5.0.diff
deleted file mode 100644
index 6ae1e1c917b..00000000000
--- a/cpp/cmake/thirdparty/patches/thrust_disable_64bit_dispatching_cccl_2.5.0.diff
+++ /dev/null
@@ -1,25 +0,0 @@
-diff --git a/thrust/thrust/system/cuda/detail/dispatch.h b/thrust/thrust/system/cuda/detail/dispatch.h
-index 2a3cc4e33..8fb337b26 100644
---- a/thrust/thrust/system/cuda/detail/dispatch.h
-+++ b/thrust/thrust/system/cuda/detail/dispatch.h
-@@ -44,8 +44,7 @@
-   }                                                                                   \
-   else                                                                                \
-   {                                                                                   \
--    auto THRUST_PP_CAT2(count, _fixed) = static_cast<thrust::detail::int64_t>(count); \
--    status                             = call arguments;                              \
-+    throw std::runtime_error("THRUST_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
-   }
- 
- /**
-@@ -66,9 +65,7 @@
-   }                                                                                          \
-   else                                                                                       \
-   {                                                                                          \
--    auto THRUST_PP_CAT2(count1, _fixed) = static_cast<thrust::detail::int64_t>(count1);      \
--    auto THRUST_PP_CAT2(count2, _fixed) = static_cast<thrust::detail::int64_t>(count2);      \
--    status                              = call arguments;                                    \
-+    throw std::runtime_error("THRUST_DOUBLE_INDEX_TYPE_DISPATCH 64-bit count is unsupported in libcudf"); \
-   }
- /**
-  * Dispatch between 32-bit and 64-bit index based versions of the same algorithm
diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff b/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff
index a606e21b92d..fee46046194 100644
--- a/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff
+++ b/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times.diff
@@ -1,23 +1,23 @@
 diff --git a/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
-index 84b6ccffd..25a237f93 100644
+index 0606485bb..dbb99ff13 100644
 --- a/cub/cub/device/dispatch/dispatch_radix_sort.cuh
 +++ b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
-@@ -808,7 +808,7 @@ struct DeviceRadixSortPolicy
-
-
-     /// SM60 (GP100)
--    struct Policy600 : ChainedPolicy<600, Policy600, Policy500>
-+    struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
+@@ -1085,7 +1085,7 @@ struct DeviceRadixSortPolicy
+   };
+ 
+   /// SM60 (GP100)
+-  struct Policy600 : ChainedPolicy<600, Policy600, Policy500>
++  struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
+   {
+     enum
      {
-         enum {
-             PRIMARY_RADIX_BITS      = (sizeof(KeyT) > 1) ? 7 : 5,    // 6.9B 32b keys/s (Quadro P100)
 diff --git a/cub/cub/device/dispatch/dispatch_reduce.cuh b/cub/cub/device/dispatch/dispatch_reduce.cuh
-index 994adc095..d3e6719a7 100644
+index f39613adb..75bd16ff9 100644
 --- a/cub/cub/device/dispatch/dispatch_reduce.cuh
 +++ b/cub/cub/device/dispatch/dispatch_reduce.cuh
-@@ -479,7 +479,7 @@ struct DeviceReducePolicy
+@@ -488,7 +488,7 @@ struct DeviceReducePolicy
    };
-
+ 
    /// SM60
 -  struct Policy600 : ChainedPolicy<600, Policy600, Policy350>
 +  struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
@@ -25,15 +25,15 @@ index 994adc095..d3e6719a7 100644
      static constexpr int threads_per_block  = 256;
      static constexpr int items_per_thread   = 16;
 diff --git a/cub/cub/device/dispatch/tuning/tuning_scan.cuh b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
-index 0ea5c41ad..1bcd8a111 100644
+index 419908c4e..6ab0840e1 100644
 --- a/cub/cub/device/dispatch/tuning/tuning_scan.cuh
 +++ b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
-@@ -303,7 +303,7 @@ struct DeviceScanPolicy
+@@ -339,7 +339,7 @@ struct DeviceScanPolicy
    /// SM600
    struct Policy600
        : DefaultTuning
 -      , ChainedPolicy<600, Policy600, Policy520>
 +      , ChainedPolicy<600, Policy600, Policy600>
    {};
-
+ 
    /// SM800
diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times_cccl_2.5.0.diff b/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times_cccl_2.5.0.diff
deleted file mode 100644
index fee46046194..00000000000
--- a/cpp/cmake/thirdparty/patches/thrust_faster_scan_compile_times_cccl_2.5.0.diff
+++ /dev/null
@@ -1,39 +0,0 @@
-diff --git a/cub/cub/device/dispatch/dispatch_radix_sort.cuh b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
-index 0606485bb..dbb99ff13 100644
---- a/cub/cub/device/dispatch/dispatch_radix_sort.cuh
-+++ b/cub/cub/device/dispatch/dispatch_radix_sort.cuh
-@@ -1085,7 +1085,7 @@ struct DeviceRadixSortPolicy
-   };
- 
-   /// SM60 (GP100)
--  struct Policy600 : ChainedPolicy<600, Policy600, Policy500>
-+  struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
-   {
-     enum
-     {
-diff --git a/cub/cub/device/dispatch/dispatch_reduce.cuh b/cub/cub/device/dispatch/dispatch_reduce.cuh
-index f39613adb..75bd16ff9 100644
---- a/cub/cub/device/dispatch/dispatch_reduce.cuh
-+++ b/cub/cub/device/dispatch/dispatch_reduce.cuh
-@@ -488,7 +488,7 @@ struct DeviceReducePolicy
-   };
- 
-   /// SM60
--  struct Policy600 : ChainedPolicy<600, Policy600, Policy350>
-+  struct Policy600 : ChainedPolicy<600, Policy600, Policy600>
-   {
-     static constexpr int threads_per_block  = 256;
-     static constexpr int items_per_thread   = 16;
-diff --git a/cub/cub/device/dispatch/tuning/tuning_scan.cuh b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
-index 419908c4e..6ab0840e1 100644
---- a/cub/cub/device/dispatch/tuning/tuning_scan.cuh
-+++ b/cub/cub/device/dispatch/tuning/tuning_scan.cuh
-@@ -339,7 +339,7 @@ struct DeviceScanPolicy
-   /// SM600
-   struct Policy600
-       : DefaultTuning
--      , ChainedPolicy<600, Policy600, Policy520>
-+      , ChainedPolicy<600, Policy600, Policy600>
-   {};
- 
-   /// SM800
diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
index c34b6433d10..cb0cc55f4d2 100644
--- a/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
+++ b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times.diff
@@ -1,39 +1,39 @@
 diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh
-index dc07ef6c2..a066c14da 100644
+index eb76ebb0b..c6c529a50 100644
 --- a/cub/cub/block/block_merge_sort.cuh
 +++ b/cub/cub/block/block_merge_sort.cuh
-@@ -91,7 +91,7 @@ __device__ __forceinline__ void SerialMerge(KeyT *keys_shared,
+@@ -95,7 +95,7 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void SerialMerge(
    KeyT key1 = keys_shared[keys1_beg];
    KeyT key2 = keys_shared[keys2_beg];
-
+ 
 -#pragma unroll
 +#pragma unroll 1
    for (int item = 0; item < ITEMS_PER_THREAD; ++item)
    {
-     bool p = (keys2_beg < keys2_end) &&
-@@ -383,7 +383,7 @@ public:
+     bool p = (keys2_beg < keys2_end) && ((keys1_beg >= keys1_end) || compare_op(key2, key1));
+@@ -376,7 +376,7 @@ public:
        //
        KeyT max_key = oob_default;
-
--      #pragma unroll
-+      #pragma unroll 1
+ 
+-#pragma unroll
++#pragma unroll 1
        for (int item = 1; item < ITEMS_PER_THREAD; ++item)
        {
          if (ITEMS_PER_THREAD * linear_tid + item < valid_items)
 diff --git a/cub/cub/thread/thread_sort.cuh b/cub/cub/thread/thread_sort.cuh
-index 5d4867896..b42fb5f00 100644
+index 7d9e8622f..da5627306 100644
 --- a/cub/cub/thread/thread_sort.cuh
 +++ b/cub/cub/thread/thread_sort.cuh
-@@ -83,10 +83,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD],
+@@ -87,10 +87,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD], ValueT (&items)[ITEMS_PER_THRE
  {
-   constexpr bool KEYS_ONLY = std::is_same<ValueT, NullType>::value;
-
--  #pragma unroll
-+  #pragma unroll 1
+   constexpr bool KEYS_ONLY = ::cuda::std::is_same<ValueT, NullType>::value;
+ 
+-#pragma unroll
++#pragma unroll 1
    for (int i = 0; i < ITEMS_PER_THREAD; ++i)
    {
--  #pragma unroll
-+  #pragma unroll 1
+-#pragma unroll
++#pragma unroll 1
      for (int j = 1 & i; j < ITEMS_PER_THREAD - 1; j += 2)
      {
        if (compare_op(keys[j + 1], keys[j]))
diff --git a/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times_cccl_2.5.0.diff b/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times_cccl_2.5.0.diff
deleted file mode 100644
index cb0cc55f4d2..00000000000
--- a/cpp/cmake/thirdparty/patches/thrust_faster_sort_compile_times_cccl_2.5.0.diff
+++ /dev/null
@@ -1,39 +0,0 @@
-diff --git a/cub/cub/block/block_merge_sort.cuh b/cub/cub/block/block_merge_sort.cuh
-index eb76ebb0b..c6c529a50 100644
---- a/cub/cub/block/block_merge_sort.cuh
-+++ b/cub/cub/block/block_merge_sort.cuh
-@@ -95,7 +95,7 @@ _CCCL_DEVICE _CCCL_FORCEINLINE void SerialMerge(
-   KeyT key1 = keys_shared[keys1_beg];
-   KeyT key2 = keys_shared[keys2_beg];
- 
--#pragma unroll
-+#pragma unroll 1
-   for (int item = 0; item < ITEMS_PER_THREAD; ++item)
-   {
-     bool p = (keys2_beg < keys2_end) && ((keys1_beg >= keys1_end) || compare_op(key2, key1));
-@@ -376,7 +376,7 @@ public:
-       //
-       KeyT max_key = oob_default;
- 
--#pragma unroll
-+#pragma unroll 1
-       for (int item = 1; item < ITEMS_PER_THREAD; ++item)
-       {
-         if (ITEMS_PER_THREAD * linear_tid + item < valid_items)
-diff --git a/cub/cub/thread/thread_sort.cuh b/cub/cub/thread/thread_sort.cuh
-index 7d9e8622f..da5627306 100644
---- a/cub/cub/thread/thread_sort.cuh
-+++ b/cub/cub/thread/thread_sort.cuh
-@@ -87,10 +87,10 @@ StableOddEvenSort(KeyT (&keys)[ITEMS_PER_THREAD], ValueT (&items)[ITEMS_PER_THRE
- {
-   constexpr bool KEYS_ONLY = ::cuda::std::is_same<ValueT, NullType>::value;
- 
--#pragma unroll
-+#pragma unroll 1
-   for (int i = 0; i < ITEMS_PER_THREAD; ++i)
-   {
--#pragma unroll
-+#pragma unroll 1
-     for (int j = 1 & i; j < ITEMS_PER_THREAD - 1; j += 2)
-     {
-       if (compare_op(keys[j + 1], keys[j]))

From 374ee13adaf18503ee671b652f76a3ccb9dc118b Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Fri, 14 Jun 2024 15:28:53 +0100
Subject: [PATCH 14/25] Fix exclude regex in pre-commit clang-format hook
 (#16030)

The clang-tidy changes in #15894 introduce a new exclude regex list to the pre-commit clang-format hook. However, it was a single character too long, ending with a |. Consequently, the exclude regex matched the empty string, and hence excluded every C++ file.

Fix this, and apply formatting changes to the files that were modified in the interim and were not clang-format compatible.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Kyle Edwards (https://github.com/KyleFromNVIDIA)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16030
---
 .pre-commit-config.yaml                       |   2 +-
 .../io/orc/orc_reader_multithreaded.cpp       | 107 +++++++++---------
 cpp/tests/interop/from_arrow_test.cpp         |   5 +-
 3 files changed, 58 insertions(+), 56 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index cc08b832e69..f8c4f4b9143 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -60,7 +60,7 @@ repos:
           (?x)^(
             ^cpp/src/io/parquet/ipc/Schema_generated.h|
             ^cpp/src/io/parquet/ipc/Message_generated.h|
-            ^cpp/include/cudf_test/cxxopts.hpp|
+            ^cpp/include/cudf_test/cxxopts.hpp
           )
   - repo: https://github.com/sirosen/texthooks
     rev: 0.6.6
diff --git a/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp b/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp
index ffbbc6f8464..aa0ee39a179 100644
--- a/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp
+++ b/cpp/benchmarks/io/orc/orc_reader_multithreaded.cpp
@@ -50,11 +50,11 @@ std::string get_label(std::string const& test_name, nvbench::state const& state)
 std::tuple<std::vector<cuio_source_sink_pair>, size_t, size_t> write_file_data(
   nvbench::state& state, std::vector<cudf::type_id> const& d_types)
 {
-  auto const cardinality = state.get_int64("cardinality");
-  auto const run_length  = state.get_int64("run_length");
-  auto const num_cols    = state.get_int64("num_cols");
-  size_t const num_files            = get_num_read_threads(state);
-  size_t const per_file_data_size   = get_read_size(state);
+  auto const cardinality          = state.get_int64("cardinality");
+  auto const run_length           = state.get_int64("run_length");
+  auto const num_cols             = state.get_int64("num_cols");
+  size_t const num_files          = get_num_read_threads(state);
+  size_t const per_file_data_size = get_read_size(state);
 
   std::vector<cuio_source_sink_pair> source_sink_vector;
 
@@ -86,7 +86,7 @@ void BM_orc_multithreaded_read_common(nvbench::state& state,
                                       std::vector<cudf::type_id> const& d_types,
                                       std::string const& label)
 {
-  auto const data_size = state.get_int64("total_data_size");
+  auto const data_size   = state.get_int64("total_data_size");
   auto const num_threads = state.get_int64("num_threads");
 
   auto streams = cudf::detail::fork_streams(cudf::get_default_stream(), num_threads);
@@ -104,24 +104,24 @@ void BM_orc_multithreaded_read_common(nvbench::state& state,
   {
     cudf::scoped_range range{("(read) " + label).c_str()};
     state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
-              [&](nvbench::launch& launch, auto& timer) {
-                auto read_func = [&](int index) {
-                  auto const stream = streams[index % num_threads];
-                  cudf::io::orc_reader_options read_opts =
-                    cudf::io::orc_reader_options::builder(source_info_vector[index]);
-                  cudf::io::read_orc(read_opts, stream, rmm::mr::get_current_device_resource());
-                };
-
-                threads.paused = true;
-                for (size_t i = 0; i < num_files; ++i) {
-                  threads.submit(read_func, i);
-                }
-                timer.start();
-                threads.paused = false;
-                threads.wait_for_tasks();
-                cudf::detail::join_streams(streams, cudf::get_default_stream());
-                timer.stop();
-              });
+               [&](nvbench::launch& launch, auto& timer) {
+                 auto read_func = [&](int index) {
+                   auto const stream = streams[index % num_threads];
+                   cudf::io::orc_reader_options read_opts =
+                     cudf::io::orc_reader_options::builder(source_info_vector[index]);
+                   cudf::io::read_orc(read_opts, stream, rmm::mr::get_current_device_resource());
+                 };
+
+                 threads.paused = true;
+                 for (size_t i = 0; i < num_files; ++i) {
+                   threads.submit(read_func, i);
+                 }
+                 timer.start();
+                 threads.paused = false;
+                 threads.wait_for_tasks();
+                 cudf::detail::join_streams(streams, cudf::get_default_stream());
+                 timer.stop();
+               });
   }
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
@@ -184,34 +184,35 @@ void BM_orc_multithreaded_read_chunked_common(nvbench::state& state,
     cudf::scoped_range range{("(read) " + label).c_str()};
     std::vector<cudf::io::table_with_metadata> chunks;
     state.exec(nvbench::exec_tag::sync | nvbench::exec_tag::timer,
-              [&](nvbench::launch& launch, auto& timer) {
-                auto read_func = [&](int index) {
-                  auto const stream = streams[index % num_threads];
-                  cudf::io::orc_reader_options read_opts =
-                    cudf::io::orc_reader_options::builder(source_info_vector[index]);
-                  // divide chunk limits by number of threads so the number of chunks produced is the
-                  // same for all cases. this seems better than the alternative, which is to keep the
-                  // limits the same. if we do that, as the number of threads goes up, the number of
-                  // chunks goes down - so are actually benchmarking the same thing in that case?
-                  auto reader = cudf::io::chunked_orc_reader(
-                    output_limit / num_threads, input_limit / num_threads, read_opts, stream);
-
-                  // read all the chunks
-                  do {
-                    auto table = reader.read_chunk();
-                  } while (reader.has_next());
-                };
-
-                threads.paused = true;
-                for (size_t i = 0; i < num_files; ++i) {
-                  threads.submit(read_func, i);
-                }
-                timer.start();
-                threads.paused = false;
-                threads.wait_for_tasks();
-                cudf::detail::join_streams(streams, cudf::get_default_stream());
-                timer.stop();
-              });
+               [&](nvbench::launch& launch, auto& timer) {
+                 auto read_func = [&](int index) {
+                   auto const stream = streams[index % num_threads];
+                   cudf::io::orc_reader_options read_opts =
+                     cudf::io::orc_reader_options::builder(source_info_vector[index]);
+                   // divide chunk limits by number of threads so the number of chunks produced is
+                   // the same for all cases. this seems better than the alternative, which is to
+                   // keep the limits the same. if we do that, as the number of threads goes up, the
+                   // number of chunks goes down - so are actually benchmarking the same thing in
+                   // that case?
+                   auto reader = cudf::io::chunked_orc_reader(
+                     output_limit / num_threads, input_limit / num_threads, read_opts, stream);
+
+                   // read all the chunks
+                   do {
+                     auto table = reader.read_chunk();
+                   } while (reader.has_next());
+                 };
+
+                 threads.paused = true;
+                 for (size_t i = 0; i < num_files; ++i) {
+                   threads.submit(read_func, i);
+                 }
+                 timer.start();
+                 threads.paused = false;
+                 threads.wait_for_tasks();
+                 cudf::detail::join_streams(streams, cudf::get_default_stream());
+                 timer.stop();
+               });
   }
 
   auto const time = state.get_summary("nv/cold/time/gpu/mean").get_float64("value");
@@ -249,7 +250,7 @@ void BM_orc_multithreaded_read_chunked_list(nvbench::state& state)
   cudf::scoped_range range{label.c_str()};
   BM_orc_multithreaded_read_chunked_common(state, {cudf::type_id::LIST}, label);
 }
-auto const thread_range  = std::vector<nvbench::int64_t>{1, 2, 4, 8};
+auto const thread_range    = std::vector<nvbench::int64_t>{1, 2, 4, 8};
 auto const total_data_size = std::vector<nvbench::int64_t>{512 * 1024 * 1024, 1024 * 1024 * 1024};
 
 // mixed data types: fixed width and strings
diff --git a/cpp/tests/interop/from_arrow_test.cpp b/cpp/tests/interop/from_arrow_test.cpp
index af20a5c772f..6eaa1a07e08 100644
--- a/cpp/tests/interop/from_arrow_test.cpp
+++ b/cpp/tests/interop/from_arrow_test.cpp
@@ -50,7 +50,8 @@ std::unique_ptr<cudf::table> get_cudf_table()
                                                               {true, false, true, true, true});
   columns.emplace_back(std::move(cudf::dictionary::encode(col4)));
   columns.emplace_back(cudf::test::fixed_width_column_wrapper<bool>(
-                         {true, false, true, false, true}, {true, false, true, true, false}).release());
+                         {true, false, true, false, true}, {true, false, true, true, false})
+                         .release());
   columns.emplace_back(cudf::test::strings_column_wrapper(
                          {
                            "",
@@ -338,7 +339,7 @@ TEST_F(FromArrowTest, ChunkedArray)
     std::vector<std::shared_ptr<arrow::Array>>{dict_array1, dict_array2});
   auto boolean_array =
     get_arrow_array<bool>({true, false, true, false, true}, {true, false, true, true, false});
-  auto boolean_chunked_array = std::make_shared<arrow::ChunkedArray>(boolean_array);
+  auto boolean_chunked_array      = std::make_shared<arrow::ChunkedArray>(boolean_array);
   auto large_string_chunked_array = std::make_shared<arrow::ChunkedArray>(
     std::vector<std::shared_ptr<arrow::Array>>{large_string_array_1});
 

From 2297f9a61e2f4153ab2e8a0631f7cfe7971ead14 Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Fri, 14 Jun 2024 17:43:17 +0100
Subject: [PATCH 15/25] Fix initialization error in to_arrow for empty string
 views (#16033)

When converting an empty string view to arrow, we don't bother with copies from device, but rather create the arrow arrays directly. The offset buffer is therefore a singleton int32 array with zero in it.

Previously, the initialization of this array was incorrect, since mutable_data() returns a uint8_t pointer, and so setting the single element could leave 24 of the 32 bits uninitialized.

Fix this by using memset instead to zero out the full buffer.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16033
---
 cpp/src/interop/to_arrow.cu | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/cpp/src/interop/to_arrow.cu b/cpp/src/interop/to_arrow.cu
index 47aee982c32..2b3aa2f08f1 100644
--- a/cpp/src/interop/to_arrow.cu
+++ b/cpp/src/interop/to_arrow.cu
@@ -292,9 +292,9 @@ std::shared_ptr<arrow::Array> dispatch_to_arrow::operator()<cudf::string_view>(
   auto child_arrays      = fetch_child_array(input_view, {{}, {}}, ar_mr, stream);
   if (child_arrays.empty()) {
     // Empty string will have only one value in offset of 4 bytes
-    auto tmp_offset_buffer               = allocate_arrow_buffer(4, ar_mr);
-    auto tmp_data_buffer                 = allocate_arrow_buffer(0, ar_mr);
-    tmp_offset_buffer->mutable_data()[0] = 0;
+    auto tmp_offset_buffer = allocate_arrow_buffer(sizeof(int32_t), ar_mr);
+    auto tmp_data_buffer   = allocate_arrow_buffer(0, ar_mr);
+    memset(tmp_offset_buffer->mutable_data(), 0, sizeof(int32_t));
 
     return std::make_shared<arrow::StringArray>(
       0, std::move(tmp_offset_buffer), std::move(tmp_data_buffer));

From 5facc8cde15cc8301adb0c06fc682f558828fbc8 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 14 Jun 2024 07:12:09 -1000
Subject: [PATCH 16/25] Enable ruff TCH: typing imports under if TYPE_CHECKING
 (#16015)

Reduces some unnecessary imports for running cudf and nicely delineates which imports are meant for typing purposes

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16015
---
 docs/cudf/source/conf.py                      |  6 ++++
 pyproject.toml                                |  2 +-
 python/cudf/cudf/_typing.py                   |  3 +-
 python/cudf/cudf/core/_base_index.py          |  9 ++++--
 python/cudf/cudf/core/buffer/spill_manager.py |  6 ++--
 python/cudf/cudf/core/column/categorical.py   | 20 +++++++++----
 python/cudf/cudf/core/column/column.py        |  8 +++--
 python/cudf/cudf/core/column/datetime.py      | 16 +++++-----
 python/cudf/cudf/core/column/decimal.py       |  6 ++--
 python/cudf/cudf/core/column/lists.py         |  6 ++--
 python/cudf/cudf/core/column/numerical.py     | 29 +++++++++++++------
 .../cudf/cudf/core/column/numerical_base.py   |  6 ++--
 python/cudf/cudf/core/column/string.py        |  9 +++---
 python/cudf/cudf/core/column/struct.py        |  5 +++-
 python/cudf/cudf/core/column/timedelta.py     |  6 ++--
 python/cudf/cudf/core/dataframe.py            |  5 +++-
 python/cudf/cudf/core/dtypes.py               |  6 ++--
 python/cudf/cudf/core/frame.py                | 10 +++++--
 python/cudf/cudf/core/index.py                |  5 +++-
 python/cudf/cudf/core/indexed_frame.py        | 15 ++++++----
 python/cudf/cudf/core/multiindex.py           |  9 ++++--
 python/cudf/cudf/core/series.py               | 15 ++++++----
 python/cudf/cudf/core/single_column_frame.py  | 13 +++++----
 23 files changed, 143 insertions(+), 72 deletions(-)

diff --git a/docs/cudf/source/conf.py b/docs/cudf/source/conf.py
index e9c760e288e..108f12bc099 100644
--- a/docs/cudf/source/conf.py
+++ b/docs/cudf/source/conf.py
@@ -554,6 +554,12 @@ def on_missing_reference(app, env, node, contnode):
 nitpick_ignore = [
     ("py:class", "SeriesOrIndex"),
     ("py:class", "Dtype"),
+    # The following are erroneously warned due to
+    # https://github.com/sphinx-doc/sphinx/issues/11225
+    ("py:class", "pa.Array"),
+    ("py:class", "ScalarLike"),
+    ("py:class", "ParentType"),
+    ("py:class", "ColumnLike"),
     # TODO: Remove this when we figure out why typing_extensions doesn't seem
     # to map types correctly for intersphinx
     ("py:class", "typing_extensions.Self"),
diff --git a/pyproject.toml b/pyproject.toml
index d343b237ee7..c602240a0b7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ quiet-level = 3
 line-length = 79
 
 [tool.ruff.lint]
-select = ["E", "F", "W", "D201", "D204", "D206", "D207", "D208", "D209", "D210", "D211", "D214", "D215", "D300", "D301", "D403", "D405", "D406", "D407", "D408", "D409", "D410", "D411", "D412", "D414", "D418"]
+select = ["E", "F", "W", "D201", "D204", "D206", "D207", "D208", "D209", "D210", "D211", "D214", "D215", "D300", "D301", "D403", "D405", "D406", "D407", "D408", "D409", "D410", "D411", "D412", "D414", "D418", "TCH"]
 ignore = [
     # whitespace before :
     "E203",
diff --git a/python/cudf/cudf/_typing.py b/python/cudf/cudf/_typing.py
index 206173919e1..34c96cc8cb3 100644
--- a/python/cudf/cudf/_typing.py
+++ b/python/cudf/cudf/_typing.py
@@ -5,9 +5,10 @@
 
 import numpy as np
 from pandas import Period, Timedelta, Timestamp
-from pandas.api.extensions import ExtensionDtype
 
 if TYPE_CHECKING:
+    from pandas.api.extensions import ExtensionDtype
+
     import cudf
 
 # Backwards compat: mypy >= 0.790 rejects Type[NotImplemented], but
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index 5d0f7c4ede4..b29fc475b29 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -4,9 +4,8 @@
 
 import pickle
 import warnings
-from collections.abc import Generator
 from functools import cached_property
-from typing import Any, Literal, Set, Tuple
+from typing import TYPE_CHECKING, Any, Literal, Set, Tuple
 
 import pandas as pd
 from typing_extensions import Self
@@ -31,12 +30,16 @@
 )
 from cudf.core.abc import Serializable
 from cudf.core.column import ColumnBase, column
-from cudf.core.column_accessor import ColumnAccessor
 from cudf.errors import MixedTypeError
 from cudf.utils import ioutils
 from cudf.utils.dtypes import can_convert_to_column, is_mixed_with_object_dtype
 from cudf.utils.utils import _is_same_name
 
+if TYPE_CHECKING:
+    from collections.abc import Generator
+
+    from cudf.core.column_accessor import ColumnAccessor
+
 
 class BaseIndex(Serializable):
     """Base class for all cudf Index types."""
diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py
index cd81149bdb8..7bcf97302aa 100644
--- a/python/cudf/cudf/core/buffer/spill_manager.py
+++ b/python/cudf/cudf/core/buffer/spill_manager.py
@@ -13,15 +13,17 @@
 from contextlib import contextmanager
 from dataclasses import dataclass
 from functools import partial
-from typing import Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
 
 import rmm.mr
 
-from cudf.core.buffer.spillable_buffer import SpillableBufferOwner
 from cudf.options import get_option
 from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 from cudf.utils.string import format_bytes
 
+if TYPE_CHECKING:
+    from cudf.core.buffer.spillable_buffer import SpillableBufferOwner
+
 _spill_cudf_nvtx_annotate = partial(
     _cudf_nvtx_annotate, domain="cudf_python-spill"
 )
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index de20b2ace1d..97c2ce5cf1f 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -3,21 +3,17 @@
 from __future__ import annotations
 
 import warnings
-from collections import abc
 from functools import cached_property
 from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence, Tuple, cast
 
 import numpy as np
 import pandas as pd
 import pyarrow as pa
-from numba import cuda
 from typing_extensions import Self
 
 import cudf
 from cudf import _lib as libcudf
 from cudf._lib.transform import bools_to_mask
-from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike
-from cudf.core.buffer import Buffer
 from cudf.core.column import column
 from cudf.core.column.methods import ColumnMethods
 from cudf.core.dtypes import CategoricalDtype, IntervalDtype
@@ -29,7 +25,19 @@
 )
 
 if TYPE_CHECKING:
-    from cudf._typing import SeriesOrIndex, SeriesOrSingleColumnIndex
+    from collections import abc
+
+    import numba.cuda
+
+    from cudf._typing import (
+        ColumnBinaryOperand,
+        ColumnLike,
+        Dtype,
+        ScalarLike,
+        SeriesOrIndex,
+        SeriesOrSingleColumnIndex,
+    )
+    from cudf.core.buffer import Buffer
     from cudf.core.column import (
         ColumnBase,
         DatetimeColumn,
@@ -868,7 +876,7 @@ def clip(self, lo: ScalarLike, hi: ScalarLike) -> "column.ColumnBase":
 
     def data_array_view(
         self, *, mode="write"
-    ) -> cuda.devicearray.DeviceNDArray:
+    ) -> numba.cuda.devicearray.DeviceNDArray:
         return self.codes.data_array_view(mode=mode)
 
     def unique(self) -> CategoricalColumn:
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index 75fc31ddbce..dc937dc0469 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -2,13 +2,13 @@
 
 from __future__ import annotations
 
-import builtins
 import pickle
 from collections import abc
 from functools import cached_property
 from itertools import chain
 from types import SimpleNamespace
 from typing import (
+    TYPE_CHECKING,
     Any,
     Dict,
     List,
@@ -49,7 +49,6 @@
 )
 from cudf._lib.transform import bools_to_mask
 from cudf._lib.types import size_type_dtype
-from cudf._typing import ColumnLike, Dtype, ScalarLike
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
     _is_pandas_nullable_extension_dtype,
@@ -89,6 +88,11 @@
 )
 from cudf.utils.utils import _array_ufunc, mask_dtype
 
+if TYPE_CHECKING:
+    import builtins
+
+    from cudf._typing import ColumnLike, Dtype, ScalarLike
+
 if PANDAS_GE_210:
     NumpyExtensionArray = pd.arrays.NumpyExtensionArray
 else:
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index 057169aa7e1..e24d85bfedf 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -19,22 +19,22 @@
 from cudf import _lib as libcudf
 from cudf._lib.labeling import label_bins
 from cudf._lib.search import search_sorted
-from cudf._typing import (
-    ColumnBinaryOperand,
-    DatetimeLikeScalar,
-    Dtype,
-    DtypeObj,
-    ScalarLike,
-)
 from cudf.api.types import is_datetime64_dtype, is_scalar, is_timedelta64_dtype
 from cudf.core._compat import PANDAS_GE_220
-from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase, as_column, column, string
 from cudf.core.column.timedelta import _unit_to_nanoseconds_conversion
 from cudf.utils.dtypes import _get_base_dtype
 from cudf.utils.utils import _all_bools_with_nulls
 
 if TYPE_CHECKING:
+    from cudf._typing import (
+        ColumnBinaryOperand,
+        DatetimeLikeScalar,
+        Dtype,
+        DtypeObj,
+        ScalarLike,
+    )
+    from cudf.core.buffer import Buffer
     from cudf.core.column.numerical import NumericalColumn
 
 if PANDAS_GE_220:
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 3a0f6649e21..9c1bedc9926 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -4,7 +4,7 @@
 
 import warnings
 from decimal import Decimal
-from typing import Any, Optional, Sequence, Union, cast
+from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast
 
 import cupy as cp
 import numpy as np
@@ -16,7 +16,6 @@
 from cudf._lib.strings.convert.convert_fixed_point import (
     from_decimal as cpp_from_decimal,
 )
-from cudf._typing import ColumnBinaryOperand, Dtype
 from cudf.api.types import is_integer_dtype, is_scalar
 from cudf.core.buffer import as_buffer
 from cudf.core.column import ColumnBase
@@ -31,6 +30,9 @@
 
 from .numerical_base import NumericalBaseColumn
 
+if TYPE_CHECKING:
+    from cudf._typing import ColumnBinaryOperand, Dtype
+
 
 class DecimalBaseColumn(NumericalBaseColumn):
     """Base column for decimal32, decimal64 or decimal128 columns"""
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 8f8ee46c796..080ba949d62 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from functools import cached_property
-from typing import List, Optional, Sequence, Tuple, Union
+from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple, Union
 
 import numpy as np
 import pandas as pd
@@ -26,13 +26,15 @@
 )
 from cudf._lib.strings.convert.convert_lists import format_list_column
 from cudf._lib.types import size_type_dtype
-from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike
 from cudf.api.types import _is_non_decimal_numeric_dtype, is_scalar
 from cudf.core.column import ColumnBase, as_column, column
 from cudf.core.column.methods import ColumnMethods, ParentType
 from cudf.core.dtypes import ListDtype
 from cudf.core.missing import NA
 
+if TYPE_CHECKING:
+    from cudf._typing import ColumnBinaryOperand, ColumnLike, Dtype, ScalarLike
+
 
 class ListColumn(ColumnBase):
     dtype: ListDtype
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 1952d7eeb71..6af67e02bb4 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -3,7 +3,16 @@
 from __future__ import annotations
 
 import functools
-from typing import Any, Callable, Optional, Sequence, Tuple, Union, cast
+from typing import (
+    TYPE_CHECKING,
+    Any,
+    Callable,
+    Optional,
+    Sequence,
+    Tuple,
+    Union,
+    cast,
+)
 
 import cupy as cp
 import numpy as np
@@ -14,13 +23,6 @@
 from cudf import _lib as libcudf
 from cudf._lib import pylibcudf
 from cudf._lib.types import size_type_dtype
-from cudf._typing import (
-    ColumnBinaryOperand,
-    ColumnLike,
-    Dtype,
-    DtypeObj,
-    ScalarLike,
-)
 from cudf.api.types import (
     is_bool_dtype,
     is_float_dtype,
@@ -28,7 +30,6 @@
     is_integer_dtype,
     is_scalar,
 )
-from cudf.core.buffer import Buffer
 from cudf.core.column import (
     ColumnBase,
     as_column,
@@ -48,6 +49,16 @@
 
 from .numerical_base import NumericalBaseColumn
 
+if TYPE_CHECKING:
+    from cudf._typing import (
+        ColumnBinaryOperand,
+        ColumnLike,
+        Dtype,
+        DtypeObj,
+        ScalarLike,
+    )
+    from cudf.core.buffer import Buffer
+
 _unaryop_map = {
     "ASIN": "ARCSIN",
     "ACOS": "ARCCOS",
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index d38ec9cf30f..bd48054a951 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -3,17 +3,19 @@
 
 from __future__ import annotations
 
-from typing import Optional, cast
+from typing import TYPE_CHECKING, Optional, cast
 
 import numpy as np
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._typing import ScalarLike
 from cudf.core.column import ColumnBase
 from cudf.core.missing import NA
 from cudf.core.mixins import Scannable
 
+if TYPE_CHECKING:
+    from cudf._typing import ScalarLike
+
 
 class NumericalBaseColumn(ColumnBase, Scannable):
     """A column composed of numerical data.
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index ad7dbe5e52e..87df2d2f1f1 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -16,11 +16,9 @@
     overload,
 )
 
-import cupy
 import numpy as np
 import pandas as pd
 import pyarrow as pa
-from numba import cuda
 from typing_extensions import Self
 
 import cudf
@@ -30,7 +28,6 @@
 from cudf._lib.column import Column
 from cudf._lib.types import size_type_dtype
 from cudf.api.types import is_integer, is_scalar, is_string_dtype
-from cudf.core.buffer import Buffer
 from cudf.core.column import column, datetime
 from cudf.core.column.column import ColumnBase
 from cudf.core.column.methods import ColumnMethods
@@ -46,6 +43,9 @@ def str_to_boolean(column: StringColumn):
 
 
 if TYPE_CHECKING:
+    import cupy
+    import numba.cuda
+
     from cudf._typing import (
         ColumnBinaryOperand,
         ColumnLike,
@@ -53,6 +53,7 @@ def str_to_boolean(column: StringColumn):
         ScalarLike,
         SeriesOrIndex,
     )
+    from cudf.core.buffer import Buffer
 
 
 _str_to_numeric_typecast_functions = {
@@ -5598,7 +5599,7 @@ def any(self, skipna: bool = True) -> bool:
 
     def data_array_view(
         self, *, mode="write"
-    ) -> cuda.devicearray.DeviceNDArray:
+    ) -> numba.cuda.devicearray.DeviceNDArray:
         raise ValueError("Cannot get an array view of a StringColumn")
 
     @property
diff --git a/python/cudf/cudf/core/column/struct.py b/python/cudf/cudf/core/column/struct.py
index 6dd35570b95..c2ce787eeae 100644
--- a/python/cudf/cudf/core/column/struct.py
+++ b/python/cudf/cudf/core/column/struct.py
@@ -2,17 +2,20 @@
 from __future__ import annotations
 
 from functools import cached_property
+from typing import TYPE_CHECKING
 
 import pandas as pd
 import pyarrow as pa
 
 import cudf
-from cudf._typing import Dtype
 from cudf.core.column import ColumnBase
 from cudf.core.column.methods import ColumnMethods
 from cudf.core.dtypes import StructDtype
 from cudf.core.missing import NA
 
+if TYPE_CHECKING:
+    from cudf._typing import Dtype
+
 
 class StructColumn(ColumnBase):
     """
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index c6af052b56f..0af847f38af 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -4,7 +4,7 @@
 
 import datetime
 import functools
-from typing import Any, Optional, Sequence, cast
+from typing import TYPE_CHECKING, Any, Optional, Sequence, cast
 
 import numpy as np
 import pandas as pd
@@ -13,13 +13,15 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._typing import ColumnBinaryOperand, DatetimeLikeScalar, Dtype
 from cudf.api.types import is_scalar, is_timedelta64_dtype
 from cudf.core.buffer import Buffer, acquire_spill_lock
 from cudf.core.column import ColumnBase, column, string
 from cudf.utils.dtypes import np_to_pa_dtype
 from cudf.utils.utils import _all_bools_with_nulls
 
+if TYPE_CHECKING:
+    from cudf._typing import ColumnBinaryOperand, DatetimeLikeScalar, Dtype
+
 _unit_to_nanoseconds_conversion = {
     "ns": 1,
     "us": 1_000,
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 7438b0237d5..70820fa8e00 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -15,6 +15,7 @@
 from collections import abc, defaultdict
 from collections.abc import Iterator
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Dict,
@@ -41,7 +42,6 @@
 import cudf
 import cudf.core.common
 from cudf import _lib as libcudf
-from cudf._typing import ColumnLike, Dtype, NotImplementedType
 from cudf.api.extensions import no_default
 from cudf.api.types import (
     _is_scalar_or_zero_d_array,
@@ -99,6 +99,9 @@
 from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 from cudf.utils.utils import GetAttrGetItemMixin, _external_only_api
 
+if TYPE_CHECKING:
+    from cudf._typing import ColumnLike, Dtype, NotImplementedType
+
 _cupy_nan_methods_map = {
     "min": "nanmin",
     "max": "nanmax",
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index 4729233ee6e..b1282040e60 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -6,7 +6,7 @@
 import textwrap
 import warnings
 from functools import cached_property
-from typing import Any, Callable, Dict, List, Tuple, Type, Union
+from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type, Union
 
 import numpy as np
 import pandas as pd
@@ -19,9 +19,11 @@
 from cudf._typing import Dtype
 from cudf.core._compat import PANDAS_LT_300
 from cudf.core.abc import Serializable
-from cudf.core.buffer import Buffer
 from cudf.utils.docutils import doc_apply
 
+if TYPE_CHECKING:
+    from cudf.core.buffer import Buffer
+
 
 def dtype(arbitrary):
     """
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 01b56f1edc4..ffaa90ef915 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -6,10 +6,10 @@
 import itertools
 import operator
 import pickle
-import types
 import warnings
 from collections import abc
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Dict,
@@ -31,7 +31,6 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._typing import Dtype
 from cudf.api.types import is_dtype_equal, is_scalar
 from cudf.core.buffer import acquire_spill_lock
 from cudf.core.column import (
@@ -48,6 +47,11 @@
 from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 from cudf.utils.utils import _array_ufunc, _warn_no_dask_cudf
 
+if TYPE_CHECKING:
+    from types import ModuleType
+
+    from cudf._typing import Dtype
+
 
 # TODO: It looks like Frame is missing a declaration of `copy`, need to add
 class Frame(BinaryOperand, Scannable):
@@ -410,7 +414,7 @@ def __arrow_array__(self, type=None):
     def _to_array(
         self,
         get_array: Callable,
-        module: types.ModuleType,
+        module: ModuleType,
         copy: bool,
         dtype: Union[Dtype, None] = None,
         na_value=None,
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 732e5cdb01a..655f7607b37 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -5,10 +5,10 @@
 import operator
 import pickle
 import warnings
-from collections.abc import Generator
 from functools import cache, cached_property
 from numbers import Number
 from typing import (
+    TYPE_CHECKING,
     Any,
     List,
     Literal,
@@ -71,6 +71,9 @@
 from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 from cudf.utils.utils import _warn_no_dask_cudf, search_range
 
+if TYPE_CHECKING:
+    from collections.abc import Generator
+
 
 class IndexMeta(type):
     """Custom metaclass for Index that overrides instance/subclass tests."""
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index fdc78005996..75614fa46c7 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -9,6 +9,7 @@
 import warnings
 from collections import Counter, abc
 from typing import (
+    TYPE_CHECKING,
     Any,
     Callable,
     Dict,
@@ -31,12 +32,6 @@
 
 import cudf
 import cudf._lib as libcudf
-from cudf._typing import (
-    ColumnLike,
-    DataFrameOrSeries,
-    Dtype,
-    NotImplementedType,
-)
 from cudf.api.extensions import no_default
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
@@ -70,6 +65,14 @@
 from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 from cudf.utils.utils import _warn_no_dask_cudf
 
+if TYPE_CHECKING:
+    from cudf._typing import (
+        ColumnLike,
+        DataFrameOrSeries,
+        Dtype,
+        NotImplementedType,
+    )
+
 doc_reset_index_template = """
         Reset the index of the {klass}, or a level of it.
 
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 6d3520e33cf..865d9660b1d 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -8,10 +8,9 @@
 import pickle
 import warnings
 from collections import abc
-from collections.abc import Generator
 from functools import cached_property
 from numbers import Integral
-from typing import Any, List, MutableMapping, Tuple, Union
+from typing import TYPE_CHECKING, Any, List, MutableMapping, Tuple, Union
 
 import cupy as cp
 import numpy as np
@@ -20,7 +19,6 @@
 import cudf
 import cudf._lib as libcudf
 from cudf._lib.types import size_type_dtype
-from cudf._typing import DataFrameOrSeries
 from cudf.api.extensions import no_default
 from cudf.api.types import is_integer, is_list_like, is_object_dtype
 from cudf.core import column
@@ -36,6 +34,11 @@
 from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 from cudf.utils.utils import NotIterable, _external_only_api, _is_same_name
 
+if TYPE_CHECKING:
+    from collections.abc import Generator
+
+    from cudf._typing import DataFrameOrSeries
+
 
 def _maybe_indices_to_slice(indices: cp.ndarray) -> Union[slice, cp.ndarray]:
     """Makes best effort to convert an array of indices into a python slice.
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index a52b583d3b4..1b1e82333cf 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -10,6 +10,7 @@
 from collections import abc
 from shutil import get_terminal_size
 from typing import (
+    TYPE_CHECKING,
     Any,
     Dict,
     Literal,
@@ -27,12 +28,6 @@
 
 import cudf
 from cudf import _lib as libcudf
-from cudf._typing import (
-    ColumnLike,
-    DataFrameOrSeries,
-    NotImplementedType,
-    ScalarLike,
-)
 from cudf.api.extensions import no_default
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
@@ -85,6 +80,14 @@
 )
 from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 
+if TYPE_CHECKING:
+    from cudf._typing import (
+        ColumnLike,
+        DataFrameOrSeries,
+        NotImplementedType,
+        ScalarLike,
+    )
+
 
 def _format_percentile_names(percentiles):
     return [f"{int(x * 100)}%" for x in percentiles]
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index acc74129a29..6fd4e857e02 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -3,15 +3,11 @@
 
 from __future__ import annotations
 
-from typing import Any, Dict, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
 
-import cupy
-import numpy
-import pyarrow as pa
 from typing_extensions import Self
 
 import cudf
-from cudf._typing import NotImplementedType, ScalarLike
 from cudf.api.extensions import no_default
 from cudf.api.types import (
     _is_scalar_or_zero_d_array,
@@ -25,6 +21,13 @@
 from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 from cudf.utils.utils import NotIterable
 
+if TYPE_CHECKING:
+    import cupy
+    import numpy
+    import pyarrow as pa
+
+    from cudf._typing import NotImplementedType, ScalarLike
+
 
 class SingleColumnFrame(Frame, NotIterable):
     """A one-dimensional frame.

From 9225633e83ca09592c5a144c523f46e95c6e9d75 Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 14 Jun 2024 07:13:00 -1000
Subject: [PATCH 17/25] Avoid redefining Frame._get_columns_by_label in
 subclasses (#15912)

`Frame._get_columns_by_label` was redefined in `Series` and `DataFrame` to handle some special edge cases in `DataFrame.__getitem__` and empty `Series`

By making `_from_data_like_self` more consistent in preserving external properties and moving special casing, we can only define `Frame._get_columns_by_label` once

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Charles Blackmon-Luca (https://github.com/charlesbluca)

URL: https://github.com/rapidsai/cudf/pull/15912
---
 python/cudf/cudf/core/dataframe.py     | 36 +++++++-------------------
 python/cudf/cudf/core/frame.py         | 28 +++++++++++---------
 python/cudf/cudf/core/indexed_frame.py |  4 +--
 python/cudf/cudf/core/series.py        | 20 +++++---------
 4 files changed, 34 insertions(+), 54 deletions(-)

diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 70820fa8e00..80260c7699b 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -1348,7 +1348,16 @@ def __getitem__(self, arg):
         8  8  8  8
         """
         if _is_scalar_or_zero_d_array(arg) or isinstance(arg, tuple):
-            return self._get_columns_by_label(arg, downcast=True)
+            out = self._get_columns_by_label(arg)
+            if is_scalar(arg):
+                nlevels = 1
+            elif isinstance(arg, tuple):
+                nlevels = len(arg)
+            if self._data.multiindex is False or nlevels == self._data.nlevels:
+                out = self._constructor_sliced._from_data(out._data)
+                out.index = self.index
+                out.name = arg
+            return out
 
         elif isinstance(arg, slice):
             return self._slice(arg)
@@ -1993,31 +2002,6 @@ def _repr_html_(self):
     def _repr_latex_(self):
         return self._get_renderable_dataframe().to_pandas()._repr_latex_()
 
-    @_cudf_nvtx_annotate
-    def _get_columns_by_label(
-        self, labels, *, downcast=False
-    ) -> Self | Series:
-        """
-        Return columns of dataframe by `labels`
-
-        If downcast is True, try and downcast from a DataFrame to a Series
-        """
-        ca = self._data.select_by_label(labels)
-        if downcast:
-            if is_scalar(labels):
-                nlevels = 1
-            elif isinstance(labels, tuple):
-                nlevels = len(labels)
-            if self._data.multiindex is False or nlevels == self._data.nlevels:
-                out = self._constructor_sliced._from_data(
-                    ca, index=self.index, name=labels
-                )
-                return out
-        out = self.__class__._from_data(
-            ca, index=self.index, columns=ca.to_pandas_index()
-        )
-        return out
-
     def _make_operands_and_index_for_binop(
         self,
         other: Any,
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index ffaa90ef915..ee310cfcb58 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -136,12 +136,19 @@ def deserialize(cls, header, frames):
     @classmethod
     @_cudf_nvtx_annotate
     def _from_data(cls, data: MutableMapping) -> Self:
+        """
+        Construct cls from a ColumnAccessor-like mapping.
+        """
         obj = cls.__new__(cls)
         Frame.__init__(obj, data)
         return obj
 
     @_cudf_nvtx_annotate
     def _from_data_like_self(self, data: MutableMapping) -> Self:
+        """
+        Return type(self) from a ColumnAccessor-like mapping but
+        with the external properties, e.g. .index, .name, of self.
+        """
         return self._from_data(data)
 
     @_cudf_nvtx_annotate
@@ -355,12 +362,13 @@ def equals(self, other) -> bool:
         )
 
     @_cudf_nvtx_annotate
-    def _get_columns_by_label(self, labels, *, downcast=False) -> Self:
+    def _get_columns_by_label(self, labels) -> Self:
         """
-        Returns columns of the Frame specified by `labels`
+        Returns columns of the Frame specified by `labels`.
 
+        Akin to cudf.DataFrame(...).loc[:, labels]
         """
-        return self.__class__._from_data(self._data.select_by_label(labels))
+        return self._from_data_like_self(self._data.select_by_label(labels))
 
     @property
     @_cudf_nvtx_annotate
@@ -1438,14 +1446,10 @@ def _get_sorted_inds(
         Get the indices required to sort self according to the columns
         specified in by.
         """
-
-        to_sort = [
-            *(
-                self
-                if by is None
-                else self._get_columns_by_label(list(by), downcast=False)
-            )._columns
-        ]
+        if by is None:
+            to_sort = self._columns
+        else:
+            to_sort = self._get_columns_by_label(list(by))._columns
 
         if is_scalar(ascending):
             ascending_lst = [ascending] * len(to_sort)
@@ -1453,7 +1457,7 @@ def _get_sorted_inds(
             ascending_lst = list(ascending)
 
         return libcudf.sort.order_by(
-            to_sort,
+            list(to_sort),
             ascending_lst,
             na_position,
             stable=True,
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 75614fa46c7..3a4f4874e35 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -309,8 +309,8 @@ def _from_data(
 
     @_cudf_nvtx_annotate
     def _from_data_like_self(self, data: MutableMapping):
-        out = self._from_data(data, self.index)
-        out._data._level_names = self._data._level_names
+        out = super()._from_data_like_self(data)
+        out.index = self.index
         return out
 
     @_cudf_nvtx_annotate
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index 1b1e82333cf..ebf6910ca5f 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -685,6 +685,12 @@ def _from_data(
             out.name = name
         return out
 
+    @_cudf_nvtx_annotate
+    def _from_data_like_self(self, data: MutableMapping):
+        out = super()._from_data_like_self(data)
+        out.name = self.name
+        return out
+
     @_cudf_nvtx_annotate
     def __contains__(self, item):
         return item in self.index
@@ -859,20 +865,6 @@ def deserialize(cls, header, frames):
 
         return obj
 
-    def _get_columns_by_label(self, labels, *, downcast=False) -> Self:
-        """Return the column specified by `labels`
-
-        For cudf.Series, either the column, or an empty series is returned.
-        Parameter `downcast` does not have effects.
-        """
-        ca = self._data.select_by_label(labels)
-
-        return (
-            self.__class__._from_data(data=ca, index=self.index)
-            if len(ca) > 0
-            else self.__class__(dtype=self.dtype, name=self.name)
-        )
-
     @_cudf_nvtx_annotate
     def drop(
         self,

From 9dc5e8c2836fa2e54831d25b7f051e031bf553b9 Mon Sep 17 00:00:00 2001
From: Ben Jarmak <104460670+jarmak-nv@users.noreply.github.com>
Date: Fri, 14 Jun 2024 13:31:29 -0400
Subject: [PATCH 18/25] Project automation update: skip if not in project
 (#16035)

This PR adds another condition to when we should run the automation work. PRs aren't always in the cuDF Python project so when this is the case we should skip the job rather than attempting to run it and have it throw an error.

Authors:
  - Ben Jarmak (https://github.com/jarmak-nv)

Approvers:
  - James Lamb (https://github.com/jameslamb)
  - Vyas Ramasubramani (https://github.com/vyasr)

URL: https://github.com/rapidsai/cudf/pull/16035
---
 .github/workflows/pr_issue_status_automation.yml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.github/workflows/pr_issue_status_automation.yml b/.github/workflows/pr_issue_status_automation.yml
index 837963c3286..8ca971dc28d 100644
--- a/.github/workflows/pr_issue_status_automation.yml
+++ b/.github/workflows/pr_issue_status_automation.yml
@@ -35,7 +35,7 @@ jobs:
     update-status:
       # This job sets the PR and its linked issues to "In Progress" status
       uses: rapidsai/shared-workflows/.github/workflows/project-get-set-single-select-field.yaml@branch-24.08
-      if: github.event.pull_request.state == 'open'
+      if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
       needs: get-project-id
       with:
         PROJECT_ID: "PVT_kwDOAp2shc4AiNzl"
@@ -51,7 +51,7 @@ jobs:
     update-sprint:
       # This job sets the PR and its linked issues to the current "Weekly Sprint"
       uses: rapidsai/shared-workflows/.github/workflows/project-get-set-iteration-field.yaml@branch-24.08
-      if: github.event.pull_request.state == 'open'
+      if: ${{ github.event.pull_request.state == 'open' && needs.get-project-id.outputs.ITEM_PROJECT_ID != '' }}
       needs: get-project-id
       with:
         PROJECT_ID: "PVT_kwDOAp2shc4AiNzl"

From f89cc07b50d3f89e7da8f98afb5fe8f9d9cf33c6 Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Fri, 14 Jun 2024 13:22:49 -0500
Subject: [PATCH 19/25] Add `codecov` coverage for `pandas_tests` (#14513)

Fixes: #14496

This PR enables code-coverage for `pandas` tests that are run in cudf CI in pandas accelerator mode.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - Lawrence Mitchell (https://github.com/wence-)
  - Vyas Ramasubramani (https://github.com/vyasr)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/14513
---
 ci/cudf_pandas_scripts/run_tests.sh               | 11 ++++++++++-
 python/cudf/cudf_pandas_tests/test_cudf_pandas.py |  3 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/ci/cudf_pandas_scripts/run_tests.sh b/ci/cudf_pandas_scripts/run_tests.sh
index 78945d37f22..1c3b99953fb 100755
--- a/ci/cudf_pandas_scripts/run_tests.sh
+++ b/ci/cudf_pandas_scripts/run_tests.sh
@@ -5,6 +5,10 @@
 
 set -eoxu pipefail
 
+RAPIDS_TESTS_DIR=${RAPIDS_TESTS_DIR:-"${PWD}/test-results"}
+RAPIDS_COVERAGE_DIR=${RAPIDS_COVERAGE_DIR:-"${PWD}/coverage-results"}
+mkdir -p "${RAPIDS_TESTS_DIR}" "${RAPIDS_COVERAGE_DIR}"
+
 # Function to display script usage
 function display_usage {
     echo "Usage: $0 [--no-cudf]"
@@ -36,4 +40,9 @@ else
     python -m pip install $(ls ./local-cudf-dep/cudf*.whl)[test,cudf-pandas-tests]
 fi
 
-python -m pytest -p cudf.pandas ./python/cudf/cudf_pandas_tests/
+python -m pytest -p cudf.pandas \
+    --cov-config=./python/cudf/.coveragerc \
+    --cov=cudf \
+    --cov-report=xml:"${RAPIDS_COVERAGE_DIR}/cudf-pandas-coverage.xml" \
+    --cov-report=term \
+    ./python/cudf/cudf_pandas_tests/
diff --git a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
index c251e4a197e..5be4d350c0b 100644
--- a/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
+++ b/python/cudf/cudf_pandas_tests/test_cudf_pandas.py
@@ -464,6 +464,9 @@ def test_options_mode():
     assert xpd.options.mode.copy_on_write == pd.options.mode.copy_on_write
 
 
+# Codecov and Profiler interfere with each-other,
+# hence we don't want to run code-cov on this test.
+@pytest.mark.no_cover
 def test_profiler():
     pytest.importorskip("cudf")
 

From 2ad502efe5f9c927b5bc0e5a80820b99f6630e1b Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Fri, 14 Jun 2024 10:50:41 -1000
Subject: [PATCH 20/25] Fix nunique for `MultiIndex`, `DataFrame`, and all NA
 case with `dropna=False` (#15962)

Fixes 3 bugs with `nunique`

* `MultiIndex.nunique` returning a `dict` instead of an `int`
* `.nunique(dropna=False)` with all `NA`s returning 0 instead of 1
* `DataFrame.nunique` preserving column class and type in the resulting `Series.index`

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - GALI PREM SAGAR (https://github.com/galipremsagar)
  - David Wendt (https://github.com/davidwendt)
  - Bradley Dice (https://github.com/bdice)
  - Shruti Shivakumar (https://github.com/shrshi)

URL: https://github.com/rapidsai/cudf/pull/15962
---
 cpp/src/stream_compaction/distinct_count.cu  |  6 +++++-
 python/cudf/cudf/core/dataframe.py           |  8 +++++---
 python/cudf/cudf/core/frame.py               |  7 +++----
 python/cudf/cudf/core/index.py               |  2 +-
 python/cudf/cudf/core/multiindex.py          |  5 +++++
 python/cudf/cudf/core/single_column_frame.py |  2 --
 python/cudf/cudf/tests/test_dataframe.py     | 14 ++++++++++++++
 python/cudf/cudf/tests/test_multiindex.py    | 11 +++++++++++
 python/cudf/cudf/tests/test_series.py        | 10 ++++++++++
 9 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/cpp/src/stream_compaction/distinct_count.cu b/cpp/src/stream_compaction/distinct_count.cu
index b7aadbe14fa..99ca89cc021 100644
--- a/cpp/src/stream_compaction/distinct_count.cu
+++ b/cpp/src/stream_compaction/distinct_count.cu
@@ -187,7 +187,11 @@ cudf::size_type distinct_count(column_view const& input,
                                nan_policy nan_handling,
                                rmm::cuda_stream_view stream)
 {
-  if (0 == input.size() or input.null_count() == input.size()) { return 0; }
+  if (0 == input.size()) { return 0; }
+
+  if (input.null_count() == input.size()) {
+    return static_cast<size_type>(null_handling == null_policy::INCLUDE);
+  }
 
   auto count = detail::distinct_count(table_view{{input}}, null_equality::EQUAL, stream);
 
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index 80260c7699b..d8d46a6df73 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -7462,7 +7462,7 @@ def __dataframe__(
             self, nan_as_null=nan_as_null, allow_copy=allow_copy
         )
 
-    def nunique(self, axis=0, dropna=True):
+    def nunique(self, axis=0, dropna: bool = True) -> Series:
         """
         Count number of distinct elements in specified axis.
         Return Series with number of distinct elements. Can ignore NaN values.
@@ -7490,8 +7490,10 @@ def nunique(self, axis=0, dropna=True):
         """
         if axis != 0:
             raise NotImplementedError("axis parameter is not supported yet.")
-
-        return cudf.Series(super().nunique(dropna=dropna))
+        counts = [col.distinct_count(dropna=dropna) for col in self._columns]
+        return self._constructor_sliced(
+            counts, index=self._data.to_pandas_index()
+        )
 
     def _sample_axis_1(
         self,
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index ee310cfcb58..6a1ef05b1f9 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -1903,10 +1903,9 @@ def nunique(self, dropna: bool = True):
         dict
             Name and unique value counts of each column in frame.
         """
-        return {
-            name: col.distinct_count(dropna=dropna)
-            for name, col in self._data.items()
-        }
+        raise NotImplementedError(
+            f"{type(self).__name__} does not implement nunique"
+        )
 
     @staticmethod
     @_cudf_nvtx_annotate
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 655f7607b37..11d09e470ff 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -898,7 +898,7 @@ def __array__(self, dtype=None):
         )
 
     @_cudf_nvtx_annotate
-    def nunique(self) -> int:
+    def nunique(self, dropna: bool = True) -> int:
         return len(self)
 
     @_cudf_nvtx_annotate
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 865d9660b1d..91488e06f4e 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -1749,6 +1749,11 @@ def fillna(self, value):
     def unique(self):
         return self.drop_duplicates(keep="first")
 
+    @_cudf_nvtx_annotate
+    def nunique(self, dropna: bool = True) -> int:
+        mi = self.dropna(how="all") if dropna else self
+        return len(mi.unique())
+
     def _clean_nulls_from_index(self):
         """
         Convert all na values(if any) in MultiIndex object
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index 6fd4e857e02..43b5dc76f13 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -338,8 +338,6 @@ def nunique(self, dropna: bool = True) -> int:
         int
             Number of unique values in the column.
         """
-        if self._column.null_count == len(self):
-            return 0
         return self._column.distinct_count(dropna=dropna)
 
     def _get_elements_from_column(self, arg) -> Union[ScalarLike, ColumnBase]:
diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py
index 98e9f9881c7..649821b9b7c 100644
--- a/python/cudf/cudf/tests/test_dataframe.py
+++ b/python/cudf/cudf/tests/test_dataframe.py
@@ -9966,6 +9966,20 @@ def test_dataframe_nunique(data):
     assert_eq(expected, actual)
 
 
+@pytest.mark.parametrize(
+    "columns",
+    [
+        pd.RangeIndex(2, name="foo"),
+        pd.MultiIndex.from_arrays([[1, 2], [2, 3]], names=["foo", 1]),
+        pd.Index([3, 5], dtype=np.int8, name="foo"),
+    ],
+)
+def test_nunique_preserve_column_in_index(columns):
+    df = cudf.DataFrame([[1, 2]], columns=columns)
+    result = df.nunique().index.to_pandas()
+    assert_eq(result, columns, exact=True)
+
+
 @pytest.mark.parametrize(
     "data",
     [{"key": [0, 1, 1, 0, 0, 1], "val": [1, 8, 3, 9, -3, 8]}],
diff --git a/python/cudf/cudf/tests/test_multiindex.py b/python/cudf/cudf/tests/test_multiindex.py
index f143112a45f..7b95e4f9a44 100644
--- a/python/cudf/cudf/tests/test_multiindex.py
+++ b/python/cudf/cudf/tests/test_multiindex.py
@@ -2162,3 +2162,14 @@ def test_multi_index_contains_hashable():
         lfunc_args_and_kwargs=((),),
         rfunc_args_and_kwargs=((),),
     )
+
+
+@pytest.mark.parametrize("array", [[1, 2], [1, None], [None, None]])
+@pytest.mark.parametrize("dropna", [True, False])
+def test_nunique(array, dropna):
+    arrays = [array, [3, 4]]
+    gidx = cudf.MultiIndex.from_arrays(arrays)
+    pidx = pd.MultiIndex.from_arrays(arrays)
+    result = gidx.nunique(dropna=dropna)
+    expected = pidx.nunique(dropna=dropna)
+    assert result == expected
diff --git a/python/cudf/cudf/tests/test_series.py b/python/cudf/cudf/tests/test_series.py
index 30189e1ac8a..52956c230ba 100644
--- a/python/cudf/cudf/tests/test_series.py
+++ b/python/cudf/cudf/tests/test_series.py
@@ -2851,3 +2851,13 @@ def test_nans_to_nulls_noop_copies_column(value):
     ser1 = cudf.Series([value])
     ser2 = ser1.nans_to_nulls()
     assert ser1._column is not ser2._column
+
+
+@pytest.mark.parametrize("dropna", [False, True])
+def test_nunique_all_null(dropna):
+    data = [None, None]
+    pd_ser = pd.Series(data)
+    cudf_ser = cudf.Series(data)
+    result = pd_ser.nunique(dropna=dropna)
+    expected = cudf_ser.nunique(dropna=dropna)
+    assert result == expected

From 74b382637e69d39df292c59938b5911d9ca3bdf9 Mon Sep 17 00:00:00 2001
From: Paul Mattione <156858817+pmattione-nvidia@users.noreply.github.com>
Date: Fri, 14 Jun 2024 17:01:35 -0500
Subject: [PATCH 21/25] Fix decimal -> float cast in ast code (#16038)

Fix decimal -> float cast in ast code that was missed during the earlier code refactoring for making the cast explicit.

This closes [issue 16023](https://github.com/rapidsai/cudf/issues/16023)

Authors:
  - Paul Mattione (https://github.com/pmattione-nvidia)

Approvers:
  - Muhammad Haseeb (https://github.com/mhaseeb123)
  - Bradley Dice (https://github.com/bdice)

URL: https://github.com/rapidsai/cudf/pull/16038
---
 cpp/include/cudf/ast/detail/operators.hpp | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/cpp/include/cudf/ast/detail/operators.hpp b/cpp/include/cudf/ast/detail/operators.hpp
index b618f33a6e5..c483d459833 100644
--- a/cpp/include/cudf/ast/detail/operators.hpp
+++ b/cpp/include/cudf/ast/detail/operators.hpp
@@ -17,6 +17,7 @@
 
 #include <cudf/ast/expressions.hpp>
 #include <cudf/types.hpp>
+#include <cudf/unary.hpp>
 #include <cudf/utilities/error.hpp>
 #include <cudf/utilities/type_dispatcher.hpp>
 
@@ -819,7 +820,17 @@ struct operator_functor<ast_operator::NOT, false> {
 template <typename To>
 struct cast {
   static constexpr auto arity{1};
-  template <typename From>
+  template <typename From, typename std::enable_if_t<is_fixed_point<From>()>* = nullptr>
+  __device__ inline auto operator()(From f) -> To
+  {
+    if constexpr (cuda::std::is_floating_point_v<To>) {
+      return convert_fixed_to_floating<To>(f);
+    } else {
+      return static_cast<To>(f);
+    }
+  }
+
+  template <typename From, typename cuda::std::enable_if_t<!is_fixed_point<From>()>* = nullptr>
   __device__ inline auto operator()(From f) -> decltype(static_cast<To>(f))
   {
     return static_cast<To>(f);

From e9ebdea49d24f645a6ca5ff6d79e0525a114f5fc Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 17 Jun 2024 12:29:54 +0100
Subject: [PATCH 22/25] Delete unused code from stringfunction evaluator
 (#16032)

When introducing the handling of regex contains, we replicated the handlers for some other supported string functions. This means we can delete some code.

Additionally, migrate the contains tests to live with the other string function tests, and add coverage of exceptional cases.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - https://github.com/brandon-b-miller

URL: https://github.com/rapidsai/cudf/pull/16032
---
 python/cudf_polars/cudf_polars/dsl/expr.py    | 36 ++-----
 python/cudf_polars/tests/conftest.py          | 10 ++
 .../cudf_polars/tests/expressions/test_agg.py |  5 -
 .../tests/expressions/test_distinct.py        |  9 +-
 .../tests/expressions/test_numeric_binops.py  |  5 -
 .../tests/expressions/test_stringfunction.py  | 97 ++++++++++++++++---
 python/cudf_polars/tests/test_string.py       | 61 ------------
 7 files changed, 102 insertions(+), 121 deletions(-)
 create mode 100644 python/cudf_polars/tests/conftest.py
 delete mode 100644 python/cudf_polars/tests/test_string.py

diff --git a/python/cudf_polars/cudf_polars/dsl/expr.py b/python/cudf_polars/cudf_polars/dsl/expr.py
index 03c1db68dbd..0605bba6642 100644
--- a/python/cudf_polars/cudf_polars/dsl/expr.py
+++ b/python/cudf_polars/cudf_polars/dsl/expr.py
@@ -688,13 +688,12 @@ def do_evaluate(
                     else pat.obj
                 )
                 return Column(plc.strings.find.contains(column.obj, pattern))
-            else:
-                assert isinstance(arg, Literal)
-                prog = plc.strings.regex_program.RegexProgram.create(
-                    arg.value.as_py(),
-                    flags=plc.strings.regex_flags.RegexFlags.DEFAULT,
-                )
-                return Column(plc.strings.contains.contains_re(column.obj, prog))
+            assert isinstance(arg, Literal)
+            prog = plc.strings.regex_program.RegexProgram.create(
+                arg.value.as_py(),
+                flags=plc.strings.regex_flags.RegexFlags.DEFAULT,
+            )
+            return Column(plc.strings.contains.contains_re(column.obj, prog))
         columns = [
             child.evaluate(df, context=context, mapping=mapping)
             for child in self.children
@@ -725,26 +724,9 @@ def do_evaluate(
                     else prefix.obj,
                 )
             )
-        else:
-            columns = [
-                child.evaluate(df, context=context, mapping=mapping)
-                for child in self.children
-            ]
-            if self.name == pl_expr.StringFunction.Lowercase:
-                (column,) = columns
-                return Column(plc.strings.case.to_lower(column.obj))
-            elif self.name == pl_expr.StringFunction.Uppercase:
-                (column,) = columns
-                return Column(plc.strings.case.to_upper(column.obj))
-            elif self.name == pl_expr.StringFunction.EndsWith:
-                column, suffix = columns
-                return Column(plc.strings.find.ends_with(column.obj, suffix.obj))
-            elif self.name == pl_expr.StringFunction.StartsWith:
-                column, suffix = columns
-                return Column(plc.strings.find.starts_with(column.obj, suffix.obj))
-            raise NotImplementedError(
-                f"StringFunction {self.name}"
-            )  # pragma: no cover; handled by init raising
+        raise NotImplementedError(
+            f"StringFunction {self.name}"
+        )  # pragma: no cover; handled by init raising
 
 
 class Sort(Expr):
diff --git a/python/cudf_polars/tests/conftest.py b/python/cudf_polars/tests/conftest.py
new file mode 100644
index 00000000000..9bbce6bc080
--- /dev/null
+++ b/python/cudf_polars/tests/conftest.py
@@ -0,0 +1,10 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import pytest
+
+
+@pytest.fixture(params=[False, True], ids=["no_nulls", "nulls"], scope="session")
+def with_nulls(request):
+    return request.param
diff --git a/python/cudf_polars/tests/expressions/test_agg.py b/python/cudf_polars/tests/expressions/test_agg.py
index 79018c80bf3..b044bbb2885 100644
--- a/python/cudf_polars/tests/expressions/test_agg.py
+++ b/python/cudf_polars/tests/expressions/test_agg.py
@@ -20,11 +20,6 @@ def dtype(request):
     return request.param
 
 
-@pytest.fixture(params=[False, True], ids=["no-nulls", "with-nulls"])
-def with_nulls(request):
-    return request.param
-
-
 @pytest.fixture(
     params=[
         False,
diff --git a/python/cudf_polars/tests/expressions/test_distinct.py b/python/cudf_polars/tests/expressions/test_distinct.py
index 22865a7ce22..143dd7e9f0f 100644
--- a/python/cudf_polars/tests/expressions/test_distinct.py
+++ b/python/cudf_polars/tests/expressions/test_distinct.py
@@ -9,11 +9,6 @@
 from cudf_polars.testing.asserts import assert_gpu_result_equal
 
 
-@pytest.fixture(params=[False, True], ids=["no-nulls", "nulls"])
-def nullable(request):
-    return request.param
-
-
 @pytest.fixture(
     params=["is_first_distinct", "is_last_distinct", "is_unique", "is_duplicated"]
 )
@@ -22,9 +17,9 @@ def op(request):
 
 
 @pytest.fixture
-def df(nullable):
+def df(with_nulls):
     values: list[int | None] = [1, 2, 3, 1, 1, 7, 3, 2, 7, 8, 1]
-    if nullable:
+    if with_nulls:
         values[1] = None
         values[4] = None
     return pl.LazyFrame({"a": values})
diff --git a/python/cudf_polars/tests/expressions/test_numeric_binops.py b/python/cudf_polars/tests/expressions/test_numeric_binops.py
index 548aebf0875..7eefc59d927 100644
--- a/python/cudf_polars/tests/expressions/test_numeric_binops.py
+++ b/python/cudf_polars/tests/expressions/test_numeric_binops.py
@@ -29,11 +29,6 @@ def rtype(request):
     return request.param
 
 
-@pytest.fixture(params=[False, True], ids=["no_nulls", "nulls"])
-def with_nulls(request):
-    return request.param
-
-
 @pytest.fixture(
     params=[
         pl.Expr.eq,
diff --git a/python/cudf_polars/tests/expressions/test_stringfunction.py b/python/cudf_polars/tests/expressions/test_stringfunction.py
index 198f35d376b..3c498fe7286 100644
--- a/python/cudf_polars/tests/expressions/test_stringfunction.py
+++ b/python/cudf_polars/tests/expressions/test_stringfunction.py
@@ -2,22 +2,39 @@
 # SPDX-License-Identifier: Apache-2.0
 from __future__ import annotations
 
+from functools import partial
+
 import pytest
 
 import polars as pl
 
-from cudf_polars import translate_ir
+from cudf_polars import execute_with_cudf, translate_ir
 from cudf_polars.testing.asserts import assert_gpu_result_equal
 
 
-def test_supported_stringfunction_expression():
-    ldf = pl.LazyFrame(
-        {
-            "a": ["a", "b", "cdefg", "h", "Wıth ünιcοde"],  # noqa: RUF001
-            "b": [0, 3, 1, -1, None],
-        }
-    )
+@pytest.fixture
+def ldf(with_nulls):
+    a = [
+        "AbC",
+        "de",
+        "FGHI",
+        "j",
+        "kLm",
+        "nOPq",
+        "",
+        "RsT",
+        "sada",
+        "uVw",
+        "h",
+        "Wıth ünιcοde",  # noqa: RUF001
+    ]
+    if with_nulls:
+        a[4] = None
+        a[-3] = None
+    return pl.LazyFrame({"a": a, "b": range(len(a))})
 
+
+def test_supported_stringfunction_expression(ldf):
     query = ldf.select(
         pl.col("a").str.starts_with("Z"),
         pl.col("a").str.ends_with("h").alias("endswith_h"),
@@ -27,15 +44,63 @@ def test_supported_stringfunction_expression():
     assert_gpu_result_equal(query)
 
 
-def test_unsupported_stringfunction():
-    ldf = pl.LazyFrame(
-        {
-            "a": ["a", "b", "cdefg", "h", "Wıth ünιcοde"],  # noqa: RUF001
-            "b": [0, 3, 1, -1, None],
-        }
-    )
-
+def test_unsupported_stringfunction(ldf):
     q = ldf.select(pl.col("a").str.count_matches("e", literal=True))
 
     with pytest.raises(NotImplementedError):
         _ = translate_ir(q._ldf.visit())
+
+
+def test_contains_re_non_strict_raises(ldf):
+    q = ldf.select(pl.col("a").str.contains(".", strict=False))
+
+    with pytest.raises(NotImplementedError):
+        _ = translate_ir(q._ldf.visit())
+
+
+def test_contains_re_non_literal_raises(ldf):
+    q = ldf.select(pl.col("a").str.contains(pl.col("b"), literal=False))
+
+    with pytest.raises(NotImplementedError):
+        _ = translate_ir(q._ldf.visit())
+
+
+@pytest.mark.parametrize(
+    "substr",
+    [
+        "A",
+        "de",
+        ".*",
+        "^a",
+        "^A",
+        "[^a-z]",
+        "[a-z]{3,}",
+        "^[A-Z]{2,}",
+        "j|u",
+    ],
+)
+def test_contains_regex(ldf, substr):
+    query = ldf.select(pl.col("a").str.contains(substr))
+    assert_gpu_result_equal(query)
+
+
+@pytest.mark.parametrize(
+    "literal", ["A", "de", "FGHI", "j", "kLm", "nOPq", "RsT", "uVw"]
+)
+def test_contains_literal(ldf, literal):
+    query = ldf.select(pl.col("a").str.contains(pl.lit(literal), literal=True))
+    assert_gpu_result_equal(query)
+
+
+def test_contains_column(ldf):
+    query = ldf.select(pl.col("a").str.contains(pl.col("a"), literal=True))
+    assert_gpu_result_equal(query)
+
+
+def test_contains_invalid(ldf):
+    query = ldf.select(pl.col("a").str.contains("["))
+
+    with pytest.raises(pl.exceptions.ComputeError):
+        query.collect()
+    with pytest.raises(pl.exceptions.ComputeError):
+        query.collect(post_opt_callback=partial(execute_with_cudf, raise_on_fail=True))
diff --git a/python/cudf_polars/tests/test_string.py b/python/cudf_polars/tests/test_string.py
deleted file mode 100644
index f1a080d040f..00000000000
--- a/python/cudf_polars/tests/test_string.py
+++ /dev/null
@@ -1,61 +0,0 @@
-# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
-# SPDX-License-Identifier: Apache-2.0
-from __future__ import annotations
-
-from functools import partial
-
-import pytest
-
-import polars as pl
-
-from cudf_polars.callback import execute_with_cudf
-from cudf_polars.testing.asserts import assert_gpu_result_equal
-
-
-@pytest.fixture
-def ldf():
-    return pl.DataFrame(
-        {"a": ["AbC", "de", "FGHI", "j", "kLm", "nOPq", None, "RsT", None, "uVw"]}
-    ).lazy()
-
-
-@pytest.mark.parametrize(
-    "substr",
-    [
-        "A",
-        "de",
-        ".*",
-        "^a",
-        "^A",
-        "[^a-z]",
-        "[a-z]{3,}",
-        "^[A-Z]{2,}",
-        "j|u",
-    ],
-)
-def test_contains_regex(ldf, substr):
-    query = ldf.select(pl.col("a").str.contains(substr))
-    assert_gpu_result_equal(query)
-
-
-@pytest.mark.parametrize(
-    "literal", ["A", "de", "FGHI", "j", "kLm", "nOPq", "RsT", "uVw"]
-)
-def test_contains_literal(ldf, literal):
-    query = ldf.select(pl.col("a").str.contains(pl.lit(literal), literal=True))
-    assert_gpu_result_equal(query)
-
-
-def test_contains_column(ldf):
-    query = ldf.select(pl.col("a").str.contains(pl.col("a"), literal=True))
-    assert_gpu_result_equal(query)
-
-
-@pytest.mark.parametrize("pat", ["["])
-def test_contains_invalid(ldf, pat):
-    query = ldf.select(pl.col("a").str.contains(pat))
-
-    with pytest.raises(pl.exceptions.ComputeError):
-        query.collect()
-    with pytest.raises(pl.exceptions.ComputeError):
-        query.collect(post_opt_callback=partial(execute_with_cudf, raise_on_fail=True))

From a023d5fd189b52996c00a4b3132171bb3f41a02d Mon Sep 17 00:00:00 2001
From: GALI PREM SAGAR <sagarprem75@gmail.com>
Date: Mon, 17 Jun 2024 09:31:01 -0500
Subject: [PATCH 23/25] Return `FrozenList` for `Index.names` (#16047)

Fixes: #16046

This PR returns `FrozenList` for `Index.names` instead of `tuple`.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Lawrence Mitchell (https://github.com/wence-)

URL: https://github.com/rapidsai/cudf/pull/16047
---
 python/cudf/cudf/core/_base_index.py     | 4 ++--
 python/dask_cudf/dask_cudf/io/parquet.py | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index b29fc475b29..e5945f8860e 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -342,9 +342,9 @@ def deserialize(cls, header, frames):
     @property
     def names(self):
         """
-        Returns a tuple containing the name of the Index.
+        Returns a FrozenList containing the name of the Index.
         """
-        return (self.name,)
+        return pd.core.indexes.frozen.FrozenList([self.name])
 
     @names.setter
     def names(self, values):
diff --git a/python/dask_cudf/dask_cudf/io/parquet.py b/python/dask_cudf/dask_cudf/io/parquet.py
index ba8b1e89721..810a804e428 100644
--- a/python/dask_cudf/dask_cudf/io/parquet.py
+++ b/python/dask_cudf/dask_cudf/io/parquet.py
@@ -316,7 +316,7 @@ def read_partition(
 
             if index and (index[0] in df.columns):
                 df = df.set_index(index[0])
-            elif index is False and df.index.names != (None,):
+            elif index is False and df.index.names != [None]:
                 # If index=False, we shouldn't have a named index
                 df.reset_index(inplace=True)
 

From 107753ccaacdb62287c4dd4351e5caf3bf8bc62a Mon Sep 17 00:00:00 2001
From: Lawrence Mitchell <lmitchell@nvidia.com>
Date: Mon, 17 Jun 2024 15:43:13 +0100
Subject: [PATCH 24/25] Remove mapfunction nodes that don't exist/aren't
 supported (#15991)

We can't correctly implemented merge_sorted to match polars because libcudf's implementation is not stable wrt input order. drop_nulls is no longer implemented as a MapFunction, but instead a boolean filter.

Finally, add coverage of the mapfunctions we do handle.

Authors:
  - Lawrence Mitchell (https://github.com/wence-)

Approvers:
  - Thomas Li (https://github.com/lithomas1)

URL: https://github.com/rapidsai/cudf/pull/15991
---
 python/cudf_polars/cudf_polars/dsl/ir.py     | 56 ++++++--------------
 python/cudf_polars/tests/test_mapfunction.py | 43 +++++++++++++++
 2 files changed, 58 insertions(+), 41 deletions(-)
 create mode 100644 python/cudf_polars/tests/test_mapfunction.py

diff --git a/python/cudf_polars/cudf_polars/dsl/ir.py b/python/cudf_polars/cudf_polars/dsl/ir.py
index 9fb2468e4e9..7f0920e1b57 100644
--- a/python/cudf_polars/cudf_polars/dsl/ir.py
+++ b/python/cudf_polars/cudf_polars/dsl/ir.py
@@ -286,13 +286,18 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         pdf = pl.DataFrame._from_pydf(self.df)
         if self.projection is not None:
             pdf = pdf.select(self.projection)
-        # TODO: goes away when libcudf supports large strings
         table = pdf.to_arrow()
         schema = table.schema
         for i, field in enumerate(schema):
+            # TODO: Nested types
             if field.type == pa.large_string():
-                # TODO: Nested types
+                # TODO: goes away when libcudf supports large strings
                 schema = schema.set(i, pa.field(field.name, pa.string()))
+            elif isinstance(field.type, pa.LargeListType):
+                # TODO: goes away when libcudf supports large lists
+                schema = schema.set(
+                    i, pa.field(field.name, pa.list_(field.type.field(0)))
+                )
         table = table.cast(schema)
         df = DataFrame.from_table(
             plc.interop.from_arrow(table), list(self.schema.keys())
@@ -850,9 +855,11 @@ class MapFunction(IR):
 
     _NAMES: ClassVar[frozenset[str]] = frozenset(
         [
-            "drop_nulls",
             "rechunk",
-            "merge_sorted",
+            # libcudf merge is not stable wrt order of inputs, since
+            # it uses a priority queue to manage the tables it produces.
+            # See: https://github.com/rapidsai/cudf/issues/16010
+            # "merge_sorted",
             "rename",
             "explode",
         ]
@@ -869,46 +876,13 @@ def __post_init__(self) -> None:
                 # polars requires that all to-explode columns have the
                 # same sub-shapes
                 raise NotImplementedError("Explode with more than one column")
-        elif self.name == "merge_sorted":
-            assert isinstance(self.df, Union)
-            (key_column,) = self.options
-            if key_column not in self.df.dfs[0].schema:
-                raise ValueError(f"Key column {key_column} not found")
 
     def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
         """Evaluate and return a dataframe."""
-        if self.name == "merge_sorted":
-            # merge_sorted operates on Union inputs
-            # but if we evaluate the Union then we can't unpick the
-            # pieces, so we dive inside and evaluate the pieces by hand
-            assert isinstance(self.df, Union)
-            first, *rest = (c.evaluate(cache=cache) for c in self.df.dfs)
-            (key_column,) = self.options
-            if not all(first.column_names == r.column_names for r in rest):
-                raise ValueError("DataFrame shapes/column names don't match")
-            # Already validated that key_column is in column names
-            index = first.column_names.index(key_column)
-            return DataFrame.from_table(
-                plc.merge.merge_sorted(
-                    [first.table, *(df.table for df in rest)],
-                    [index],
-                    [plc.types.Order.ASCENDING],
-                    [plc.types.NullOrder.BEFORE],
-                ),
-                first.column_names,
-            ).sorted_like(first, subset={key_column})
-        elif self.name == "rechunk":
+        if self.name == "rechunk":
             # No-op in our data model
-            return self.df.evaluate(cache=cache)
-        elif self.name == "drop_nulls":
-            df = self.df.evaluate(cache=cache)
-            (subset,) = self.options
-            subset = set(subset)
-            indices = [i for i, name in enumerate(df.column_names) if name in subset]
-            return DataFrame.from_table(
-                plc.stream_compaction.drop_nulls(df.table, indices, len(indices)),
-                df.column_names,
-            ).sorted_like(df)
+            # Don't think this appears in a plan tree from python
+            return self.df.evaluate(cache=cache)  # pragma: no cover
         elif self.name == "rename":
             df = self.df.evaluate(cache=cache)
             # final tag is "swapping" which is useful for the
@@ -924,7 +898,7 @@ def evaluate(self, *, cache: MutableMapping[int, DataFrame]) -> DataFrame:
                 plc.lists.explode_outer(df.table, index), df.column_names
             ).sorted_like(df, subset=subset)
         else:
-            raise AssertionError("Should never be reached")
+            raise AssertionError("Should never be reached")  # pragma: no cover
 
 
 @dataclasses.dataclass(slots=True)
diff --git a/python/cudf_polars/tests/test_mapfunction.py b/python/cudf_polars/tests/test_mapfunction.py
new file mode 100644
index 00000000000..ec6b3f3fc0a
--- /dev/null
+++ b/python/cudf_polars/tests/test_mapfunction.py
@@ -0,0 +1,43 @@
+# SPDX-FileCopyrightText: Copyright (c) 2024 NVIDIA CORPORATION & AFFILIATES.
+# SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
+
+import pytest
+
+import polars as pl
+
+from cudf_polars import translate_ir
+from cudf_polars.testing.asserts import assert_gpu_result_equal
+
+
+def test_merge_sorted_raises():
+    df1 = pl.LazyFrame({"a": [1, 6, 9], "b": [1, -10, 4]})
+    df2 = pl.LazyFrame({"a": [-1, 5, 11, 20], "b": [2, 7, -4, None]})
+    df3 = pl.LazyFrame({"a": [-10, 20, 21], "b": [1, 2, 3]})
+
+    q = df1.merge_sorted(df2, key="a").merge_sorted(df3, key="a")
+
+    with pytest.raises(NotImplementedError):
+        _ = translate_ir(q._ldf.visit())
+
+
+def test_explode_multiple_raises():
+    df = pl.LazyFrame({"a": [[1, 2], [3, 4]], "b": [[5, 6], [7, 8]]})
+    q = df.explode("a", "b")
+
+    with pytest.raises(NotImplementedError):
+        _ = translate_ir(q._ldf.visit())
+
+
+@pytest.mark.parametrize("column", ["a", "b"])
+def test_explode_single(column):
+    df = pl.LazyFrame(
+        {
+            "a": [[1, 2], [3, 4], None],
+            "b": [[5, 6], [7, 8], [9, 10]],
+            "c": [None, 11, 12],
+        }
+    )
+    q = df.explode(column)
+
+    assert_gpu_result_equal(q)

From 87f6a7e15bb7d8dc0d8733392567fb647074b2fd Mon Sep 17 00:00:00 2001
From: Matthew Roeschke <10647082+mroeschke@users.noreply.github.com>
Date: Mon, 17 Jun 2024 06:21:10 -1000
Subject: [PATCH 25/25] Add ruff rules to avoid importing from typing (#16040)

Enabled the following ruff rules to update typing annotations according to PEP585 and PEP604

https://docs.astral.sh/ruff/rules/future-rewritable-type-annotation/
https://docs.astral.sh/ruff/rules/non-pep604-annotation/
https://docs.astral.sh/ruff/rules/non-pep585-annotation/

The changes were made by running `pre-commit run ruff --all-files` with `fix = True` and `unsafe-fixes = True` locally

Authors:
  - Matthew Roeschke (https://github.com/mroeschke)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Mike Sarahan (https://github.com/msarahan)

URL: https://github.com/rapidsai/cudf/pull/16040
---
 pyproject.toml                                |   2 +-
 python/cudf/cudf/_lib/column.pyi              |  46 ++++----
 python/cudf/cudf/api/types.py                 |   4 +-
 python/cudf/cudf/core/_base_index.py          |   6 +-
 .../cudf/cudf/core/_internals/expressions.py  |  12 +-
 python/cudf/cudf/core/_internals/timezones.py |  19 +--
 python/cudf/cudf/core/_internals/where.py     |  15 ++-
 python/cudf/cudf/core/buffer/buffer.py        |  14 +--
 .../core/buffer/exposure_tracked_buffer.py    |   4 +-
 python/cudf/cudf/core/buffer/spill_manager.py |  20 ++--
 .../cudf/cudf/core/buffer/spillable_buffer.py |  18 +--
 python/cudf/cudf/core/buffer/utils.py         |  20 ++--
 python/cudf/cudf/core/column/categorical.py   |  46 ++++----
 python/cudf/cudf/core/column/column.py        |  94 +++++++--------
 python/cudf/cudf/core/column/datetime.py      |  22 ++--
 python/cudf/cudf/core/column/decimal.py       |   8 +-
 python/cudf/cudf/core/column/lists.py         |  10 +-
 python/cudf/cudf/core/column/methods.py       |   4 +-
 python/cudf/cudf/core/column/numerical.py     |  35 +++---
 .../cudf/cudf/core/column/numerical_base.py   |  16 +--
 python/cudf/cudf/core/column/string.py        | 109 ++++++++----------
 python/cudf/cudf/core/column/timedelta.py     |  18 +--
 python/cudf/cudf/core/column_accessor.py      |  25 ++--
 python/cudf/cudf/core/dataframe.py            |  51 +++-----
 python/cudf/cudf/core/df_protocol.py          |  44 +++----
 python/cudf/cudf/core/dtypes.py               |  27 ++---
 python/cudf/cudf/core/frame.py                |  49 +++-----
 python/cudf/cudf/core/groupby/groupby.py      |  19 +--
 python/cudf/cudf/core/index.py                |  28 ++---
 python/cudf/cudf/core/indexed_frame.py        |  58 ++++------
 python/cudf/cudf/core/indexing_utils.py       |   8 +-
 python/cudf/cudf/core/join/_join_helpers.py   |   6 +-
 python/cudf/cudf/core/join/join.py            |   6 +-
 python/cudf/cudf/core/mixins/binops.pyi       |   6 +-
 python/cudf/cudf/core/mixins/reductions.pyi   |   4 +-
 python/cudf/cudf/core/mixins/scans.pyi        |   4 +-
 python/cudf/cudf/core/multiindex.py           |  18 +--
 python/cudf/cudf/core/reshape.py              |  15 ++-
 python/cudf/cudf/core/series.py               |  30 ++---
 python/cudf/cudf/core/single_column_frame.py  |  12 +-
 python/cudf/cudf/core/subword_tokenizer.py    |   3 +-
 python/cudf/cudf/core/tools/datetimes.py      |  13 ++-
 python/cudf/cudf/core/udf/groupby_typing.py   |   8 +-
 python/cudf/cudf/core/udf/utils.py            |   5 +-
 python/cudf/cudf/io/parquet.py                |  18 +--
 python/cudf/cudf/options.py                   |  11 +-
 python/cudf/cudf/pandas/fast_slow_proxy.py    |  40 +++----
 python/cudf/cudf/pandas/module_accelerator.py |   6 +-
 python/cudf/cudf/pandas/profiler.py           |  12 +-
 .../cudf/cudf/pylibcudf_tests/common/utils.py |   7 +-
 .../test_avro_reader_fastavro_integration.py  |   5 +-
 python/cudf/cudf/tests/test_df_protocol.py    |   5 +-
 python/cudf/cudf/tests/test_spilling.py       |   8 +-
 python/cudf/cudf/utils/applyutils.py          |   5 +-
 python/cudf/cudf/utils/queryutils.py          |   7 +-
 python/cudf/cudf/utils/utils.py               |   4 +-
 .../cudf_pandas_tests/test_fast_slow_proxy.py |   1 +
 python/dask_cudf/dask_cudf/groupby.py         |   4 +-
 58 files changed, 504 insertions(+), 610 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index c602240a0b7..2f59864894b 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -26,7 +26,7 @@ quiet-level = 3
 line-length = 79
 
 [tool.ruff.lint]
-select = ["E", "F", "W", "D201", "D204", "D206", "D207", "D208", "D209", "D210", "D211", "D214", "D215", "D300", "D301", "D403", "D405", "D406", "D407", "D408", "D409", "D410", "D411", "D412", "D414", "D418", "TCH"]
+select = ["E", "F", "W", "D201", "D204", "D206", "D207", "D208", "D209", "D210", "D211", "D214", "D215", "D300", "D301", "D403", "D405", "D406", "D407", "D408", "D409", "D410", "D411", "D412", "D414", "D418", "TCH", "FA", "UP006", "UP007"]
 ignore = [
     # whitespace before :
     "E203",
diff --git a/python/cudf/cudf/_lib/column.pyi b/python/cudf/cudf/_lib/column.pyi
index c667286fc16..bcab009c102 100644
--- a/python/cudf/cudf/_lib/column.pyi
+++ b/python/cudf/cudf/_lib/column.pyi
@@ -2,8 +2,6 @@
 
 from __future__ import annotations
 
-from typing import Dict, Optional, Tuple
-
 from typing_extensions import Self
 
 from cudf._typing import Dtype, DtypeObj, ScalarLike
@@ -11,27 +9,27 @@ from cudf.core.buffer import Buffer
 from cudf.core.column import ColumnBase
 
 class Column:
-    _data: Optional[Buffer]
-    _mask: Optional[Buffer]
-    _base_data: Optional[Buffer]
-    _base_mask: Optional[Buffer]
+    _data: Buffer | None
+    _mask: Buffer | None
+    _base_data: Buffer | None
+    _base_mask: Buffer | None
     _dtype: DtypeObj
     _size: int
     _offset: int
     _null_count: int
-    _children: Tuple[ColumnBase, ...]
-    _base_children: Tuple[ColumnBase, ...]
-    _distinct_count: Dict[bool, int]
+    _children: tuple[ColumnBase, ...]
+    _base_children: tuple[ColumnBase, ...]
+    _distinct_count: dict[bool, int]
 
     def __init__(
         self,
-        data: Optional[Buffer],
+        data: Buffer | None,
         size: int,
         dtype: Dtype,
-        mask: Optional[Buffer] = None,
-        offset: Optional[int] = None,
-        null_count: Optional[int] = None,
-        children: Tuple[ColumnBase, ...] = (),
+        mask: Buffer | None = None,
+        offset: int | None = None,
+        null_count: int | None = None,
+        children: tuple[ColumnBase, ...] = (),
     ) -> None: ...
     @property
     def base_size(self) -> int: ...
@@ -40,9 +38,9 @@ class Column:
     @property
     def size(self) -> int: ...
     @property
-    def base_data(self) -> Optional[Buffer]: ...
+    def base_data(self) -> Buffer | None: ...
     @property
-    def data(self) -> Optional[Buffer]: ...
+    def data(self) -> Buffer | None: ...
     @property
     def data_ptr(self) -> int: ...
     def set_base_data(self, value: Buffer) -> None: ...
@@ -50,25 +48,25 @@ class Column:
     def nullable(self) -> bool: ...
     def has_nulls(self, include_nan: bool = False) -> bool: ...
     @property
-    def base_mask(self) -> Optional[Buffer]: ...
+    def base_mask(self) -> Buffer | None: ...
     @property
-    def mask(self) -> Optional[Buffer]: ...
+    def mask(self) -> Buffer | None: ...
     @property
     def mask_ptr(self) -> int: ...
-    def set_base_mask(self, value: Optional[Buffer]) -> None: ...
-    def set_mask(self, value: Optional[Buffer]) -> Self: ...
+    def set_base_mask(self, value: Buffer | None) -> None: ...
+    def set_mask(self, value: Buffer | None) -> Self: ...
     @property
     def null_count(self) -> int: ...
     @property
     def offset(self) -> int: ...
     @property
-    def base_children(self) -> Tuple[ColumnBase, ...]: ...
+    def base_children(self) -> tuple[ColumnBase, ...]: ...
     @property
-    def children(self) -> Tuple[ColumnBase, ...]: ...
-    def set_base_children(self, value: Tuple[ColumnBase, ...]) -> None: ...
+    def children(self) -> tuple[ColumnBase, ...]: ...
+    def set_base_children(self, value: tuple[ColumnBase, ...]) -> None: ...
     def _mimic_inplace(
         self, other_col: ColumnBase, inplace=False
-    ) -> Optional[Self]: ...
+    ) -> Self | None: ...
 
     # TODO: The val parameter should be Scalar, not ScalarLike
     @staticmethod
diff --git a/python/cudf/cudf/api/types.py b/python/cudf/cudf/api/types.py
index 42b1524bd76..d97e9c815b6 100644
--- a/python/cudf/cudf/api/types.py
+++ b/python/cudf/cudf/api/types.py
@@ -8,7 +8,7 @@
 from collections import abc
 from functools import wraps
 from inspect import isclass
-from typing import List, Union, cast
+from typing import cast
 
 import cupy as cp
 import numpy as np
@@ -219,7 +219,7 @@ def wrapped_func(obj):
 
 
 def _union_categoricals(
-    to_union: List[Union[cudf.Series, cudf.CategoricalIndex]],
+    to_union: list[cudf.Series | cudf.CategoricalIndex],
     sort_categories: bool = False,
     ignore_order: bool = False,
 ):
diff --git a/python/cudf/cudf/core/_base_index.py b/python/cudf/cudf/core/_base_index.py
index e5945f8860e..e71e45e410e 100644
--- a/python/cudf/cudf/core/_base_index.py
+++ b/python/cudf/cudf/core/_base_index.py
@@ -5,7 +5,7 @@
 import pickle
 import warnings
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, Literal, Set, Tuple
+from typing import TYPE_CHECKING, Any, Literal
 
 import pandas as pd
 from typing_extensions import Self
@@ -44,11 +44,11 @@
 class BaseIndex(Serializable):
     """Base class for all cudf Index types."""
 
-    _accessors: Set[Any] = set()
+    _accessors: set[Any] = set()
     _data: ColumnAccessor
 
     @property
-    def _columns(self) -> Tuple[Any, ...]:
+    def _columns(self) -> tuple[Any, ...]:
         raise NotImplementedError
 
     @cached_property
diff --git a/python/cudf/cudf/core/_internals/expressions.py b/python/cudf/cudf/core/_internals/expressions.py
index 5cb9f0363e0..393a68dd844 100644
--- a/python/cudf/cudf/core/_internals/expressions.py
+++ b/python/cudf/cudf/core/_internals/expressions.py
@@ -1,8 +1,8 @@
-# Copyright (c) 2022-2023, NVIDIA CORPORATION.
+# Copyright (c) 2022-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import ast
 import functools
-from typing import List, Tuple
 
 from cudf._lib.expressions import (
     ASTOperator,
@@ -98,9 +98,9 @@ class libcudfASTVisitor(ast.NodeVisitor):
         The column names used to map the names in an expression.
     """
 
-    def __init__(self, col_names: Tuple[str]):
-        self.stack: List[Expression] = []
-        self.nodes: List[Expression] = []
+    def __init__(self, col_names: tuple[str]):
+        self.stack: list[Expression] = []
+        self.nodes: list[Expression] = []
         self.col_names = col_names
 
     @property
@@ -218,7 +218,7 @@ def visit_Call(self, node):
 
 
 @functools.lru_cache(256)
-def parse_expression(expr: str, col_names: Tuple[str]):
+def parse_expression(expr: str, col_names: tuple[str]):
     visitor = libcudfASTVisitor(col_names)
     visitor.visit(ast.parse(expr))
     return visitor
diff --git a/python/cudf/cudf/core/_internals/timezones.py b/python/cudf/cudf/core/_internals/timezones.py
index f04cae719c2..269fcf3e37f 100644
--- a/python/cudf/cudf/core/_internals/timezones.py
+++ b/python/cudf/cudf/core/_internals/timezones.py
@@ -1,20 +1,23 @@
 # Copyright (c) 2023-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import os
 import zoneinfo
 from functools import lru_cache
-from typing import Literal, Tuple
+from typing import TYPE_CHECKING, Literal
 
 import numpy as np
 
 from cudf._lib.timezone import make_timezone_transition_table
 from cudf.core.column.column import as_column
-from cudf.core.column.datetime import DatetimeColumn
-from cudf.core.column.timedelta import TimeDeltaColumn
+
+if TYPE_CHECKING:
+    from cudf.core.column.datetime import DatetimeColumn
+    from cudf.core.column.timedelta import TimeDeltaColumn
 
 
 @lru_cache(maxsize=20)
-def get_tz_data(zone_name: str) -> Tuple[DatetimeColumn, TimeDeltaColumn]:
+def get_tz_data(zone_name: str) -> tuple[DatetimeColumn, TimeDeltaColumn]:
     """
     Return timezone data (transition times and UTC offsets) for the
     given IANA time zone.
@@ -40,7 +43,7 @@ def get_tz_data(zone_name: str) -> Tuple[DatetimeColumn, TimeDeltaColumn]:
 
 def _find_and_read_tzfile_tzpath(
     zone_name: str,
-) -> Tuple[DatetimeColumn, TimeDeltaColumn]:
+) -> tuple[DatetimeColumn, TimeDeltaColumn]:
     for search_path in zoneinfo.TZPATH:
         if os.path.isfile(os.path.join(search_path, zone_name)):
             return _read_tzfile_as_columns(search_path, zone_name)
@@ -49,7 +52,7 @@ def _find_and_read_tzfile_tzpath(
 
 def _find_and_read_tzfile_tzdata(
     zone_name: str,
-) -> Tuple[DatetimeColumn, TimeDeltaColumn]:
+) -> tuple[DatetimeColumn, TimeDeltaColumn]:
     import importlib.resources
 
     package_base = "tzdata.zoneinfo"
@@ -78,7 +81,7 @@ def _find_and_read_tzfile_tzdata(
 
 def _read_tzfile_as_columns(
     tzdir, zone_name: str
-) -> Tuple[DatetimeColumn, TimeDeltaColumn]:
+) -> tuple[DatetimeColumn, TimeDeltaColumn]:
     transition_times_and_offsets = make_timezone_transition_table(
         tzdir, zone_name
     )
@@ -92,7 +95,7 @@ def _read_tzfile_as_columns(
 
 def check_ambiguous_and_nonexistent(
     ambiguous: Literal["NaT"], nonexistent: Literal["NaT"]
-) -> Tuple[Literal["NaT"], Literal["NaT"]]:
+) -> tuple[Literal["NaT"], Literal["NaT"]]:
     if ambiguous != "NaT":
         raise NotImplementedError(
             "Only ambiguous='NaT' is currently supported"
diff --git a/python/cudf/cudf/core/_internals/where.py b/python/cudf/cudf/core/_internals/where.py
index ef6b10f66c1..44ce0ddef25 100644
--- a/python/cudf/cudf/core/_internals/where.py
+++ b/python/cudf/cudf/core/_internals/where.py
@@ -1,18 +1,17 @@
-# Copyright (c) 2021-2023, NVIDIA CORPORATION.
+# Copyright (c) 2021-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import warnings
-from typing import Tuple, Union
+from typing import TYPE_CHECKING
 
 import numpy as np
 
 import cudf
-from cudf._typing import ScalarLike
 from cudf.api.types import (
     _is_non_decimal_numeric_dtype,
     is_bool_dtype,
     is_scalar,
 )
-from cudf.core.column import ColumnBase
 from cudf.core.dtypes import CategoricalDtype
 from cudf.utils.dtypes import (
     _can_cast,
@@ -21,6 +20,10 @@
     is_mixed_with_object_dtype,
 )
 
+if TYPE_CHECKING:
+    from cudf._typing import ScalarLike
+    from cudf.core.column import ColumnBase
+
 
 def _normalize_categorical(input_col, other):
     if isinstance(input_col, cudf.core.column.CategoricalColumn):
@@ -41,9 +44,9 @@ def _normalize_categorical(input_col, other):
 
 def _check_and_cast_columns_with_other(
     source_col: ColumnBase,
-    other: Union[ScalarLike, ColumnBase],
+    other: ScalarLike | ColumnBase,
     inplace: bool,
-) -> Tuple[ColumnBase, Union[ScalarLike, ColumnBase]]:
+) -> tuple[ColumnBase, ScalarLike | ColumnBase]:
     # Returns type-casted `source_col` & `other` based on `inplace`.
     source_dtype = source_col.dtype
     if isinstance(source_dtype, CategoricalDtype):
diff --git a/python/cudf/cudf/core/buffer/buffer.py b/python/cudf/cudf/core/buffer/buffer.py
index bf6f9f1a3c1..80dbbe4c048 100644
--- a/python/cudf/cudf/core/buffer/buffer.py
+++ b/python/cudf/cudf/core/buffer/buffer.py
@@ -6,7 +6,7 @@
 import pickle
 import weakref
 from types import SimpleNamespace
-from typing import Any, Dict, Literal, Mapping, Optional, Tuple
+from typing import Any, Literal, Mapping
 
 import numpy
 from typing_extensions import Self
@@ -42,7 +42,7 @@ def host_memory_allocation(nbytes: int) -> memoryview:
 def cuda_array_interface_wrapper(
     ptr: int,
     size: int,
-    owner: Optional[object] = None,
+    owner: object | None = None,
     readonly=False,
     typestr="|u1",
     version=0,
@@ -278,7 +278,7 @@ def get_ptr(self, *, mode: Literal["read", "write"]) -> int:
         return self._ptr
 
     def memoryview(
-        self, *, offset: int = 0, size: Optional[int] = None
+        self, *, offset: int = 0, size: int | None = None
     ) -> memoryview:
         """Read-only access to the buffer through host memory."""
         size = self._size if size is None else size
@@ -319,7 +319,7 @@ def __init__(
         *,
         owner: BufferOwner,
         offset: int = 0,
-        size: Optional[int] = None,
+        size: int | None = None,
     ) -> None:
         size = owner.size if size is None else size
         if size < 0:
@@ -414,7 +414,7 @@ def __cuda_array_interface__(self) -> Mapping:
             "version": 0,
         }
 
-    def serialize(self) -> Tuple[dict, list]:
+    def serialize(self) -> tuple[dict, list]:
         """Serialize the buffer into header and frames.
 
         The frames can be a mixture of memoryview, Buffer, and BufferOwner
@@ -427,7 +427,7 @@ def serialize(self) -> Tuple[dict, list]:
             serializable metadata required to reconstruct the object. The
             second element is a list containing single frame.
         """
-        header: Dict[str, Any] = {}
+        header: dict[str, Any] = {}
         header["type-serialized"] = pickle.dumps(type(self))
         header["owner-type-serialized"] = pickle.dumps(type(self._owner))
         header["frame_count"] = 1
@@ -480,7 +480,7 @@ def __str__(self) -> str:
         )
 
 
-def get_ptr_and_size(array_interface: Mapping) -> Tuple[int, int]:
+def get_ptr_and_size(array_interface: Mapping) -> tuple[int, int]:
     """Retrieve the pointer and size from an array interface.
 
     Raises ValueError if array isn't C-contiguous.
diff --git a/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py b/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py
index 15f00fc670d..0bd8d6054b3 100644
--- a/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py
+++ b/python/cudf/cudf/core/buffer/exposure_tracked_buffer.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Literal, Mapping, Optional
+from typing import Literal, Mapping
 
 from typing_extensions import Self
 
@@ -27,7 +27,7 @@ def __init__(
         self,
         owner: BufferOwner,
         offset: int = 0,
-        size: Optional[int] = None,
+        size: int | None = None,
     ) -> None:
         super().__init__(owner=owner, offset=offset, size=size)
         self.owner._slices.add(self)
diff --git a/python/cudf/cudf/core/buffer/spill_manager.py b/python/cudf/cudf/core/buffer/spill_manager.py
index 7bcf97302aa..762cd7f9e86 100644
--- a/python/cudf/cudf/core/buffer/spill_manager.py
+++ b/python/cudf/cudf/core/buffer/spill_manager.py
@@ -13,7 +13,7 @@
 from contextlib import contextmanager
 from dataclasses import dataclass
 from functools import partial
-from typing import TYPE_CHECKING, Dict, List, Optional, Tuple
+from typing import TYPE_CHECKING
 
 import rmm.mr
 
@@ -39,7 +39,7 @@ def get_traceback() -> str:
 
 def get_rmm_memory_resource_stack(
     mr: rmm.mr.DeviceMemoryResource,
-) -> List[rmm.mr.DeviceMemoryResource]:
+) -> list[rmm.mr.DeviceMemoryResource]:
     """Get the RMM resource stack
 
     Parameters
@@ -99,14 +99,14 @@ class Expose:
         total_nbytes: int = 0
         spilled_nbytes: int = 0
 
-    spill_totals: Dict[Tuple[str, str], Tuple[int, float]]
+    spill_totals: dict[tuple[str, str], tuple[int, float]]
 
     def __init__(self, level) -> None:
         self.lock = threading.Lock()
         self.level = level
         self.spill_totals = defaultdict(lambda: (0, 0))
         # Maps each traceback to a Expose
-        self.exposes: Dict[str, SpillStatistics.Expose] = {}
+        self.exposes: dict[str, SpillStatistics.Expose] = {}
 
     def log_spill(self, src: str, dst: str, nbytes: int, time: float) -> None:
         """Log a (un-)spilling event
@@ -227,7 +227,7 @@ class SpillManager:
     def __init__(
         self,
         *,
-        device_memory_limit: Optional[int] = None,
+        device_memory_limit: int | None = None,
         statistic_level: int = 0,
     ) -> None:
         self._lock = threading.Lock()
@@ -298,7 +298,7 @@ def add(self, buffer: SpillableBufferOwner) -> None:
 
     def buffers(
         self, order_by_access_time: bool = False
-    ) -> Tuple[SpillableBufferOwner, ...]:
+    ) -> tuple[SpillableBufferOwner, ...]:
         """Get all managed buffers
 
         Parameters
@@ -347,7 +347,7 @@ def spill_device_memory(self, nbytes: int) -> int:
                     buf.lock.release()
         return spilled
 
-    def spill_to_device_limit(self, device_limit: Optional[int] = None) -> int:
+    def spill_to_device_limit(self, device_limit: int | None = None) -> int:
         """Try to spill device memory until device limit
 
         Notice, by default this is a no-op.
@@ -402,10 +402,10 @@ def __repr__(self) -> str:
 #   - Initialized to None (spilling disabled)
 #   - Initialized to a SpillManager instance (spilling enabled)
 _global_manager_uninitialized: bool = True
-_global_manager: Optional[SpillManager] = None
+_global_manager: SpillManager | None = None
 
 
-def set_global_manager(manager: Optional[SpillManager]) -> None:
+def set_global_manager(manager: SpillManager | None) -> None:
     """Set the global manager, which if None disables spilling"""
 
     global _global_manager, _global_manager_uninitialized
@@ -419,7 +419,7 @@ def set_global_manager(manager: Optional[SpillManager]) -> None:
     _global_manager_uninitialized = False
 
 
-def get_global_manager() -> Optional[SpillManager]:
+def get_global_manager() -> SpillManager | None:
     """Get the global manager or None if spilling is disabled"""
     global _global_manager_uninitialized
     if _global_manager_uninitialized:
diff --git a/python/cudf/cudf/core/buffer/spillable_buffer.py b/python/cudf/cudf/core/buffer/spillable_buffer.py
index 49258fea9ab..eb57a371965 100644
--- a/python/cudf/cudf/core/buffer/spillable_buffer.py
+++ b/python/cudf/cudf/core/buffer/spillable_buffer.py
@@ -7,7 +7,7 @@
 import time
 import weakref
 from threading import RLock
-from typing import TYPE_CHECKING, Any, Dict, List, Literal, Optional, Tuple
+from typing import TYPE_CHECKING, Any, Literal
 
 import numpy
 from typing_extensions import Self
@@ -88,10 +88,10 @@ class SpillableBufferOwner(BufferOwner):
     lock: RLock
     _spill_locks: weakref.WeakSet
     _last_accessed: float
-    _ptr_desc: Dict[str, Any]
+    _ptr_desc: dict[str, Any]
     _manager: SpillManager
 
-    def _finalize_init(self, ptr_desc: Dict[str, Any]) -> None:
+    def _finalize_init(self, ptr_desc: dict[str, Any]) -> None:
         """Finish initialization of the spillable buffer
 
         This implements the common initialization that `from_device_memory`
@@ -297,7 +297,7 @@ def get_ptr(self, *, mode: Literal["read", "write"]) -> int:
             self._last_accessed = time.monotonic()
         return self._ptr
 
-    def memory_info(self) -> Tuple[int, int, str]:
+    def memory_info(self) -> tuple[int, int, str]:
         """Get pointer, size, and device type of this buffer.
 
         Warning, it is not safe to access the pointer value without
@@ -341,7 +341,7 @@ def __cuda_array_interface__(self) -> dict:
         }
 
     def memoryview(
-        self, *, offset: int = 0, size: Optional[int] = None
+        self, *, offset: int = 0, size: int | None = None
     ) -> memoryview:
         size = self._size if size is None else size
         with self.lock:
@@ -388,11 +388,11 @@ def spillable(self) -> bool:
     def spill_lock(self, spill_lock: SpillLock) -> None:
         self._owner.spill_lock(spill_lock=spill_lock)
 
-    def memory_info(self) -> Tuple[int, int, str]:
+    def memory_info(self) -> tuple[int, int, str]:
         (ptr, _, device_type) = self._owner.memory_info()
         return (ptr + self._offset, self.nbytes, device_type)
 
-    def serialize(self) -> Tuple[dict, list]:
+    def serialize(self) -> tuple[dict, list]:
         """Serialize the Buffer
 
         Normally, we would use `[self]` as the frames. This would work but
@@ -411,8 +411,8 @@ def serialize(self) -> Tuple[dict, list]:
         given to `.deserialize()`, otherwise we would have a `Buffer` pointing
         to memory already owned by an existing `SpillableBufferOwner`.
         """
-        header: Dict[str, Any] = {}
-        frames: List[Buffer | memoryview]
+        header: dict[str, Any] = {}
+        frames: list[Buffer | memoryview]
         with self._owner.lock:
             header["type-serialized"] = pickle.dumps(self.__class__)
             header["owner-type-serialized"] = pickle.dumps(type(self._owner))
diff --git a/python/cudf/cudf/core/buffer/utils.py b/python/cudf/cudf/core/buffer/utils.py
index 3346d05ed4a..42a1501c914 100644
--- a/python/cudf/cudf/core/buffer/utils.py
+++ b/python/cudf/cudf/core/buffer/utils.py
@@ -4,7 +4,7 @@
 
 import threading
 from contextlib import ContextDecorator
-from typing import Any, Dict, Optional, Tuple, Type, Union
+from typing import Any
 
 from cudf.core.buffer.buffer import (
     Buffer,
@@ -22,7 +22,7 @@
 from cudf.options import get_option
 
 
-def get_buffer_owner(data: Any) -> Optional[BufferOwner]:
+def get_buffer_owner(data: Any) -> BufferOwner | None:
     """Get the owner of `data`, if one exists
 
     Search through the stack of data owners in order to find an
@@ -47,10 +47,10 @@ def get_buffer_owner(data: Any) -> Optional[BufferOwner]:
 
 
 def as_buffer(
-    data: Union[int, Any],
+    data: int | Any,
     *,
-    size: Optional[int] = None,
-    owner: Optional[object] = None,
+    size: int | None = None,
+    owner: object | None = None,
     exposed: bool = False,
 ) -> Buffer:
     """Factory function to wrap `data` in a Buffer object.
@@ -117,8 +117,8 @@ def as_buffer(
         )
 
     # Find the buffer types to return based on the current config
-    owner_class: Type[BufferOwner]
-    buffer_class: Type[Buffer]
+    owner_class: type[BufferOwner]
+    buffer_class: type[Buffer]
     if get_global_manager() is not None:
         owner_class = SpillableBufferOwner
         buffer_class = SpillableBuffer
@@ -161,7 +161,7 @@ def as_buffer(
     return buffer_class(owner=owner, offset=ptr - base_ptr, size=size)
 
 
-_thread_spill_locks: Dict[int, Tuple[Optional[SpillLock], int]] = {}
+_thread_spill_locks: dict[int, tuple[SpillLock | None, int]] = {}
 
 
 def _push_thread_spill_lock() -> None:
@@ -193,7 +193,7 @@ class acquire_spill_lock(ContextDecorator):
     pushing and popping from `_thread_spill_locks` using its thread ID.
     """
 
-    def __enter__(self) -> Optional[SpillLock]:
+    def __enter__(self) -> SpillLock | None:
         _push_thread_spill_lock()
         return get_spill_lock()
 
@@ -201,7 +201,7 @@ def __exit__(self, *exc):
         _pop_thread_spill_lock()
 
 
-def get_spill_lock() -> Union[SpillLock, None]:
+def get_spill_lock() -> SpillLock | None:
     """Return a spill lock within the context of `acquire_spill_lock` or None
 
     Returns None, if spilling is disabled.
diff --git a/python/cudf/cudf/core/column/categorical.py b/python/cudf/cudf/core/column/categorical.py
index 97c2ce5cf1f..f538180805b 100644
--- a/python/cudf/cudf/core/column/categorical.py
+++ b/python/cudf/cudf/core/column/categorical.py
@@ -4,7 +4,7 @@
 
 import warnings
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, Mapping, Optional, Sequence, Tuple, cast
+from typing import TYPE_CHECKING, Any, Mapping, Sequence, cast
 
 import numpy as np
 import pandas as pd
@@ -139,7 +139,7 @@ def ordered(self) -> bool:
         """
         return self._column.ordered
 
-    def as_ordered(self) -> Optional[SeriesOrIndex]:
+    def as_ordered(self) -> SeriesOrIndex | None:
         """
         Set the Categorical to be ordered.
 
@@ -175,7 +175,7 @@ def as_ordered(self) -> Optional[SeriesOrIndex]:
         """
         return self._return_or_inplace(self._column.as_ordered(ordered=True))
 
-    def as_unordered(self) -> Optional[SeriesOrIndex]:
+    def as_unordered(self) -> SeriesOrIndex | None:
         """
         Set the Categorical to be unordered.
 
@@ -222,7 +222,7 @@ def as_unordered(self) -> Optional[SeriesOrIndex]:
         """
         return self._return_or_inplace(self._column.as_ordered(ordered=False))
 
-    def add_categories(self, new_categories: Any) -> Optional[SeriesOrIndex]:
+    def add_categories(self, new_categories: Any) -> SeriesOrIndex | None:
         """
         Add new categories.
 
@@ -294,7 +294,7 @@ def add_categories(self, new_categories: Any) -> Optional[SeriesOrIndex]:
     def remove_categories(
         self,
         removals: Any,
-    ) -> Optional[SeriesOrIndex]:
+    ) -> SeriesOrIndex | None:
         """
         Remove the specified categories.
 
@@ -370,7 +370,7 @@ def set_categories(
         new_categories: Any,
         ordered: bool = False,
         rename: bool = False,
-    ) -> Optional[SeriesOrIndex]:
+    ) -> SeriesOrIndex | None:
         """
         Set the categories to the specified new_categories.
 
@@ -443,7 +443,7 @@ def reorder_categories(
         self,
         new_categories: Any,
         ordered: bool = False,
-    ) -> Optional[SeriesOrIndex]:
+    ) -> SeriesOrIndex | None:
         """
         Reorder categories as specified in new_categories.
 
@@ -521,8 +521,8 @@ class CategoricalColumn(column.ColumnBase):
     """
 
     dtype: cudf.core.dtypes.CategoricalDtype
-    _codes: Optional[NumericalColumn]
-    _children: Tuple[NumericalColumn]
+    _codes: NumericalColumn | None
+    _children: tuple[NumericalColumn]
     _VALID_REDUCTIONS = {
         "max",
         "min",
@@ -539,11 +539,11 @@ class CategoricalColumn(column.ColumnBase):
     def __init__(
         self,
         dtype: CategoricalDtype,
-        mask: Optional[Buffer] = None,
-        size: Optional[int] = None,
+        mask: Buffer | None = None,
+        size: int | None = None,
         offset: int = 0,
-        null_count: Optional[int] = None,
-        children: Tuple["column.ColumnBase", ...] = (),
+        null_count: int | None = None,
+        children: tuple["column.ColumnBase", ...] = (),
     ):
         if size is None:
             for child in children:
@@ -590,23 +590,23 @@ def set_base_data(self, value):
 
     def _process_values_for_isin(
         self, values: Sequence
-    ) -> Tuple[ColumnBase, ColumnBase]:
+    ) -> tuple[ColumnBase, ColumnBase]:
         lhs = self
         # We need to convert values to same type as self,
         # hence passing dtype=self.dtype
         rhs = cudf.core.column.as_column(values, dtype=self.dtype)
         return lhs, rhs
 
-    def set_base_mask(self, value: Optional[Buffer]):
+    def set_base_mask(self, value: Buffer | None):
         super().set_base_mask(value)
         self._codes = None
 
-    def set_base_children(self, value: Tuple[ColumnBase, ...]):
+    def set_base_children(self, value: tuple[ColumnBase, ...]):
         super().set_base_children(value)
         self._codes = None
 
     @property
-    def children(self) -> Tuple[NumericalColumn]:
+    def children(self) -> tuple[NumericalColumn]:
         if self._children is None:
             codes_column = self.base_children[0]
             start = self.offset * codes_column.dtype.itemsize
@@ -693,9 +693,7 @@ def _fill(
         libcudf.filling.fill_in_place(result.codes, begin, end, fill_scalar)
         return result
 
-    def slice(
-        self, start: int, stop: int, stride: Optional[int] = None
-    ) -> Self:
+    def slice(self, start: int, stop: int, stride: int | None = None) -> Self:
         codes = self.codes.slice(start, stop, stride)
         return cast(
             Self,
@@ -714,7 +712,7 @@ def slice(
     def _reduce(
         self,
         op: str,
-        skipna: Optional[bool] = None,
+        skipna: bool | None = None,
         min_count: int = 0,
         *args,
         **kwargs,
@@ -1073,7 +1071,7 @@ def notnull(self) -> ColumnBase:
     def fillna(
         self,
         fill_value: Any = None,
-        method: Optional[str] = None,
+        method: str | None = None,
     ) -> Self:
         """
         Fill null values with *fill_value*
@@ -1207,7 +1205,7 @@ def memory_usage(self) -> int:
 
     def _mimic_inplace(
         self, other_col: ColumnBase, inplace: bool = False
-    ) -> Optional[Self]:
+    ) -> Self | None:
         out = super()._mimic_inplace(other_col, inplace=inplace)
         if inplace and isinstance(other_col, CategoricalColumn):
             self._codes = other_col._codes
@@ -1468,7 +1466,7 @@ def _create_empty_categorical_column(
 
 
 def pandas_categorical_as_column(
-    categorical: ColumnLike, codes: Optional[ColumnLike] = None
+    categorical: ColumnLike, codes: ColumnLike | None = None
 ) -> CategoricalColumn:
     """Creates a CategoricalColumn from a pandas.Categorical
 
diff --git a/python/cudf/cudf/core/column/column.py b/python/cudf/cudf/core/column/column.py
index dc937dc0469..c4e715aeb45 100644
--- a/python/cudf/cudf/core/column/column.py
+++ b/python/cudf/cudf/core/column/column.py
@@ -7,19 +7,7 @@
 from functools import cached_property
 from itertools import chain
 from types import SimpleNamespace
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Dict,
-    List,
-    Literal,
-    MutableSequence,
-    Optional,
-    Sequence,
-    Tuple,
-    Union,
-    cast,
-)
+from typing import TYPE_CHECKING, Any, Literal, MutableSequence, Sequence, cast
 
 import cupy
 import numpy as np
@@ -394,7 +382,7 @@ def _fill(
         begin: int,
         end: int,
         inplace: bool = False,
-    ) -> Optional[Self]:
+    ) -> Self | None:
         if end <= begin or begin >= self.size:
             return self if inplace else self.copy()
 
@@ -532,9 +520,7 @@ def element_indexing(self, index: int):
             raise IndexError("single positional indexer is out-of-bounds")
         return libcudf.copying.get_element(self, idx).value
 
-    def slice(
-        self, start: int, stop: int, stride: Optional[int] = None
-    ) -> Self:
+    def slice(self, start: int, stop: int, stride: int | None = None) -> Self:
         stride = 1 if stride is None else stride
         if start < 0:
             start = start + len(self)
@@ -570,7 +556,7 @@ def __setitem__(self, key: Any, value: Any):
             else as_column(value, dtype=self.dtype)
         )
 
-        out: Optional[ColumnBase]  # If None, no need to perform mimic inplace.
+        out: ColumnBase | None  # If None, no need to perform mimic inplace.
         if isinstance(key, slice):
             out = self._scatter_by_slice(key, value_normalized)
         else:
@@ -593,8 +579,8 @@ def _wrap_binop_normalization(self, other):
     def _scatter_by_slice(
         self,
         key: builtins.slice,
-        value: Union[cudf.core.scalar.Scalar, ColumnBase],
-    ) -> Optional[Self]:
+        value: cudf.core.scalar.Scalar | ColumnBase,
+    ) -> Self | None:
         """If this function returns None, it's either a no-op (slice is empty),
         or the inplace replacement is already performed (fill-in-place).
         """
@@ -630,7 +616,7 @@ def _scatter_by_slice(
     def _scatter_by_column(
         self,
         key: cudf.core.column.NumericalColumn,
-        value: Union[cudf.core.scalar.Scalar, ColumnBase],
+        value: cudf.core.scalar.Scalar | ColumnBase,
     ) -> Self:
         if is_bool_dtype(key.dtype):
             # `key` is boolean mask
@@ -667,7 +653,7 @@ def _scatter_by_column(
             ]._with_type_metadata(self.dtype)
 
     def _check_scatter_key_length(
-        self, num_keys: int, value: Union[cudf.core.scalar.Scalar, ColumnBase]
+        self, num_keys: int, value: cudf.core.scalar.Scalar | ColumnBase
     ) -> None:
         """`num_keys` is the number of keys to scatter. Should equal to the
         number of rows in ``value`` if ``value`` is a column.
@@ -682,7 +668,7 @@ def _check_scatter_key_length(
     def fillna(
         self,
         fill_value: Any = None,
-        method: Optional[str] = None,
+        method: str | None = None,
     ) -> Self:
         """Fill null values with ``value``.
 
@@ -740,7 +726,7 @@ def indices_of(
             [as_column(range(0, len(self)), dtype=size_type_dtype)], mask
         )[0]
 
-    def _find_first_and_last(self, value: ScalarLike) -> Tuple[int, int]:
+    def _find_first_and_last(self, value: ScalarLike) -> tuple[int, int]:
         indices = self.indices_of(value)
         if n := len(indices):
             return (
@@ -856,7 +842,7 @@ def isin(self, values: Sequence) -> ColumnBase:
 
     def _process_values_for_isin(
         self, values: Sequence
-    ) -> Tuple[ColumnBase, ColumnBase]:
+    ) -> tuple[ColumnBase, ColumnBase]:
         """
         Helper function for `isin` which pre-process `values` based on `self`.
         """
@@ -868,7 +854,7 @@ def _process_values_for_isin(
             rhs = rhs.astype(lhs.dtype)
         return lhs, rhs
 
-    def _isin_earlystop(self, rhs: ColumnBase) -> Union[ColumnBase, None]:
+    def _isin_earlystop(self, rhs: ColumnBase) -> ColumnBase | None:
         """
         Helper function for `isin` which determines possibility of
         early-stopping or not.
@@ -1070,7 +1056,7 @@ def as_string_column(
 
     def as_decimal_column(
         self, dtype: Dtype
-    ) -> Union["cudf.core.column.decimal.DecimalBaseColumn"]:
+    ) -> "cudf.core.column.decimal.DecimalBaseColumn":
         raise NotImplementedError
 
     def apply_boolean_mask(self, mask) -> ColumnBase:
@@ -1154,7 +1140,7 @@ def unique(self) -> ColumnBase:
             self.dtype
         )
 
-    def serialize(self) -> Tuple[dict, list]:
+    def serialize(self) -> tuple[dict, list]:
         # data model:
 
         # Serialization produces a nested metadata "header" and a flattened
@@ -1167,7 +1153,7 @@ def serialize(self) -> Tuple[dict, list]:
         # cudf native or foreign some special-casing is required here for
         # serialization.
 
-        header: Dict[Any, Any] = {}
+        header: dict[Any, Any] = {}
         frames = []
         header["type-serialized"] = pickle.dumps(type(self))
         try:
@@ -1200,7 +1186,7 @@ def serialize(self) -> Tuple[dict, list]:
 
     @classmethod
     def deserialize(cls, header: dict, frames: list) -> ColumnBase:
-        def unpack(header, frames) -> Tuple[Any, list]:
+        def unpack(header, frames) -> tuple[Any, list]:
             count = header["frame_count"]
             klass = pickle.loads(header["type-serialized"])
             obj = klass.deserialize(header, frames[:count])
@@ -1247,13 +1233,13 @@ def nans_to_nulls(self: Self) -> Self:
 
     def normalize_binop_value(
         self, other: ScalarLike
-    ) -> Union[ColumnBase, ScalarLike]:
+    ) -> ColumnBase | ScalarLike:
         raise NotImplementedError
 
     def _reduce(
         self,
         op: str,
-        skipna: Optional[bool] = None,
+        skipna: bool | None = None,
         min_count: int = 0,
         *args,
         **kwargs,
@@ -1274,8 +1260,8 @@ def _reduce(
         return preprocessed
 
     def _process_for_reduction(
-        self, skipna: Optional[bool] = None, min_count: int = 0
-    ) -> Union[ColumnBase, ScalarLike]:
+        self, skipna: bool | None = None, min_count: int = 0
+    ) -> ColumnBase | ScalarLike:
         if skipna is None:
             skipna = True
 
@@ -1315,8 +1301,8 @@ def _with_type_metadata(self: ColumnBase, dtype: Dtype) -> ColumnBase:
     def _label_encoding(
         self,
         cats: ColumnBase,
-        dtype: Optional[Dtype] = None,
-        na_sentinel: Optional[ScalarLike] = None,
+        dtype: Dtype | None = None,
+        na_sentinel: ScalarLike | None = None,
     ):
         """
         Convert each value in `self` into an integer code, with `cats`
@@ -1389,9 +1375,9 @@ def _return_sentinel_column():
 
 def column_empty_like(
     column: ColumnBase,
-    dtype: Optional[Dtype] = None,
+    dtype: Dtype | None = None,
     masked: bool = False,
-    newsize: Optional[int] = None,
+    newsize: int | None = None,
 ) -> ColumnBase:
     """Allocate a new column like the given *column*"""
     if dtype is None:
@@ -1446,7 +1432,7 @@ def column_empty(
 ) -> ColumnBase:
     """Allocate a new column like the given row_count and dtype."""
     dtype = cudf.dtype(dtype)
-    children = ()  # type: Tuple[ColumnBase, ...]
+    children: tuple[ColumnBase, ...] = ()
 
     if isinstance(dtype, StructDtype):
         data = None
@@ -1496,14 +1482,14 @@ def column_empty(
 
 
 def build_column(
-    data: Union[Buffer, None],
+    data: Buffer | None,
     dtype: Dtype,
     *,
-    size: Optional[int] = None,
-    mask: Optional[Buffer] = None,
+    size: int | None = None,
+    mask: Buffer | None = None,
     offset: int = 0,
-    null_count: Optional[int] = None,
-    children: Tuple[ColumnBase, ...] = (),
+    null_count: int | None = None,
+    children: tuple[ColumnBase, ...] = (),
 ) -> ColumnBase:
     """
     Build a Column of the appropriate type from the given parameters
@@ -1665,10 +1651,10 @@ def build_column(
 def build_categorical_column(
     categories: ColumnBase,
     codes: ColumnBase,
-    mask: Optional[Buffer] = None,
-    size: Optional[int] = None,
+    mask: Buffer | None = None,
+    size: int | None = None,
     offset: int = 0,
-    null_count: Optional[int] = None,
+    null_count: int | None = None,
     ordered: bool = False,
 ) -> "cudf.core.column.CategoricalColumn":
     """
@@ -1715,7 +1701,7 @@ def check_invalid_array(shape: tuple, dtype):
         raise TypeError("Unsupported type float16")
 
 
-def as_memoryview(arbitrary: Any) -> Optional[memoryview]:
+def as_memoryview(arbitrary: Any) -> memoryview | None:
     try:
         return memoryview(arbitrary)
     except TypeError:
@@ -1724,9 +1710,9 @@ def as_memoryview(arbitrary: Any) -> Optional[memoryview]:
 
 def as_column(
     arbitrary: Any,
-    nan_as_null: Optional[bool] = None,
-    dtype: Optional[Dtype] = None,
-    length: Optional[int] = None,
+    nan_as_null: bool | None = None,
+    dtype: Dtype | None = None,
+    length: int | None = None,
 ):
     """Create a Column from an arbitrary object
 
@@ -2199,7 +2185,7 @@ def _mask_from_cuda_array_interface_desc(obj, cai_mask) -> Buffer:
         raise NotImplementedError(f"Cannot infer mask from typestr {typestr}")
 
 
-def serialize_columns(columns: list[ColumnBase]) -> Tuple[List[dict], List]:
+def serialize_columns(columns: list[ColumnBase]) -> tuple[list[dict], list]:
     """
     Return the headers and frames resulting
     from serializing a list of Column
@@ -2216,7 +2202,7 @@ def serialize_columns(columns: list[ColumnBase]) -> Tuple[List[dict], List]:
     frames : list
         list of frames
     """
-    headers: List[Dict[Any, Any]] = []
+    headers: list[dict[Any, Any]] = []
     frames = []
 
     if len(columns) > 0:
@@ -2228,7 +2214,7 @@ def serialize_columns(columns: list[ColumnBase]) -> Tuple[List[dict], List]:
     return headers, frames
 
 
-def deserialize_columns(headers: List[dict], frames: List) -> List[ColumnBase]:
+def deserialize_columns(headers: list[dict], frames: list) -> list[ColumnBase]:
     """
     Construct a list of Columns from a list of headers
     and frames.
diff --git a/python/cudf/cudf/core/column/datetime.py b/python/cudf/cudf/core/column/datetime.py
index e24d85bfedf..7fdebda7d76 100644
--- a/python/cudf/cudf/core/column/datetime.py
+++ b/python/cudf/cudf/core/column/datetime.py
@@ -8,7 +8,7 @@
 import locale
 import re
 from locale import nl_langinfo
-from typing import TYPE_CHECKING, Any, Literal, Optional, Sequence, Tuple, cast
+from typing import TYPE_CHECKING, Any, Literal, Sequence, cast
 
 import numpy as np
 import pandas as pd
@@ -242,10 +242,10 @@ def __init__(
         self,
         data: Buffer,
         dtype: DtypeObj,
-        mask: Optional[Buffer] = None,
-        size: Optional[int] = None,  # TODO: make non-optional
+        mask: Buffer | None = None,
+        size: int | None = None,  # TODO: make non-optional
         offset: int = 0,
-        null_count: Optional[int] = None,
+        null_count: int | None = None,
     ):
         dtype = cudf.dtype(dtype)
         if dtype.kind != "M":
@@ -499,7 +499,7 @@ def mean(
 
     def std(
         self,
-        skipna: Optional[bool] = None,
+        skipna: bool | None = None,
         min_count: int = 0,
         dtype: Dtype = np.float64,
         ddof: int = 1,
@@ -511,7 +511,7 @@ def std(
             * _unit_to_nanoseconds_conversion[self.time_unit],
         ).as_unit(self.time_unit)
 
-    def median(self, skipna: Optional[bool] = None) -> pd.Timestamp:
+    def median(self, skipna: bool | None = None) -> pd.Timestamp:
         return pd.Timestamp(
             self.as_numerical_column("int64").median(skipna=skipna),
             unit=self.time_unit,
@@ -631,7 +631,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str) -> ColumnBase:
     def fillna(
         self,
         fill_value: Any = None,
-        method: Optional[str] = None,
+        method: str | None = None,
     ) -> Self:
         if fill_value is not None:
             if cudf.utils.utils._isnat(fill_value):
@@ -703,7 +703,7 @@ def _with_type_metadata(self, dtype):
 
     def _find_ambiguous_and_nonexistent(
         self, zone_name: str
-    ) -> Tuple[NumericalColumn, NumericalColumn] | Tuple[bool, bool]:
+    ) -> tuple[NumericalColumn, NumericalColumn] | tuple[bool, bool]:
         """
         Recognize ambiguous and nonexistent timestamps for the given timezone.
 
@@ -822,10 +822,10 @@ def __init__(
         self,
         data: Buffer,
         dtype: pd.DatetimeTZDtype,
-        mask: Optional[Buffer] = None,
-        size: Optional[int] = None,
+        mask: Buffer | None = None,
+        size: int | None = None,
         offset: int = 0,
-        null_count: Optional[int] = None,
+        null_count: int | None = None,
     ):
         super().__init__(
             data=data,
diff --git a/python/cudf/cudf/core/column/decimal.py b/python/cudf/cudf/core/column/decimal.py
index 9c1bedc9926..e9d9b4933e5 100644
--- a/python/cudf/cudf/core/column/decimal.py
+++ b/python/cudf/cudf/core/column/decimal.py
@@ -4,7 +4,7 @@
 
 import warnings
 from decimal import Decimal
-from typing import TYPE_CHECKING, Any, Optional, Sequence, Union, cast
+from typing import TYPE_CHECKING, Any, Sequence, cast
 
 import cupy as cp
 import numpy as np
@@ -49,7 +49,7 @@ def __cuda_array_interface__(self):
     def as_decimal_column(
         self,
         dtype: Dtype,
-    ) -> Union["DecimalBaseColumn"]:
+    ) -> "DecimalBaseColumn":
         if (
             isinstance(dtype, cudf.core.dtypes.DecimalDtype)
             and dtype.scale < self.dtype.scale
@@ -138,7 +138,7 @@ def _binaryop(self, other: ColumnBinaryOperand, op: str):
     def fillna(
         self,
         fill_value: Any = None,
-        method: Optional[str] = None,
+        method: str | None = None,
     ) -> Self:
         """Fill null values with ``value``.
 
@@ -199,7 +199,7 @@ def normalize_binop_value(self, other):
         return NotImplemented
 
     def _decimal_quantile(
-        self, q: Union[float, Sequence[float]], interpolation: str, exact: bool
+        self, q: float | Sequence[float], interpolation: str, exact: bool
     ) -> ColumnBase:
         quant = [float(q)] if not isinstance(q, (Sequence, np.ndarray)) else q
         # get sorted indices and exclude nulls
diff --git a/python/cudf/cudf/core/column/lists.py b/python/cudf/cudf/core/column/lists.py
index 080ba949d62..c548db67344 100644
--- a/python/cudf/cudf/core/column/lists.py
+++ b/python/cudf/cudf/core/column/lists.py
@@ -3,7 +3,7 @@
 from __future__ import annotations
 
 from functools import cached_property
-from typing import TYPE_CHECKING, List, Optional, Sequence, Tuple, Union
+from typing import TYPE_CHECKING, Sequence
 
 import numpy as np
 import pandas as pd
@@ -167,7 +167,7 @@ def set_base_data(self, value):
         else:
             super().set_base_data(value)
 
-    def set_base_children(self, value: Tuple[ColumnBase, ...]):
+    def set_base_children(self, value: tuple[ColumnBase, ...]):
         super().set_base_children(value)
         _, values = value
         self._dtype = cudf.ListDtype(element_type=values.dtype)
@@ -269,7 +269,7 @@ def _transform_leaves(self, func, *args, **kwargs) -> Self:
         # as ``self``, but with the leaf column transformed
         # by applying ``func`` to it
 
-        cc: List[ListColumn] = []
+        cc: list[ListColumn] = []
         c: ColumnBase = self
 
         while isinstance(c, ListColumn):
@@ -320,7 +320,7 @@ def __init__(self, parent: ParentType):
     def get(
         self,
         index: int,
-        default: Optional[Union[ScalarLike, ColumnLike]] = None,
+        default: ScalarLike | ColumnLike | None = None,
     ) -> ParentType:
         """
         Extract element at the given index from each list in a Series of lists.
@@ -424,7 +424,7 @@ def contains(self, search_key: ScalarLike) -> ParentType:
             contains_scalar(self._column, cudf.Scalar(search_key))
         )
 
-    def index(self, search_key: Union[ScalarLike, ColumnLike]) -> ParentType:
+    def index(self, search_key: ScalarLike | ColumnLike) -> ParentType:
         """
         Returns integers representing the index of the search key for each row.
 
diff --git a/python/cudf/cudf/core/column/methods.py b/python/cudf/cudf/core/column/methods.py
index 7f7355c571a..7c6f4e05577 100644
--- a/python/cudf/cudf/core/column/methods.py
+++ b/python/cudf/cudf/core/column/methods.py
@@ -2,7 +2,7 @@
 
 from __future__ import annotations
 
-from typing import Optional, Union, overload
+from typing import Union, overload
 
 from typing_extensions import Literal
 
@@ -52,7 +52,7 @@ def _return_or_inplace(
         inplace: bool = False,
         expand: bool = False,
         retain_index: bool = True,
-    ) -> Optional[ParentType]: ...
+    ) -> ParentType | None: ...
 
     def _return_or_inplace(
         self, new_col, inplace=False, expand=False, retain_index=True
diff --git a/python/cudf/cudf/core/column/numerical.py b/python/cudf/cudf/core/column/numerical.py
index 6af67e02bb4..098cf43421b 100644
--- a/python/cudf/cudf/core/column/numerical.py
+++ b/python/cudf/cudf/core/column/numerical.py
@@ -3,16 +3,7 @@
 from __future__ import annotations
 
 import functools
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    Optional,
-    Sequence,
-    Tuple,
-    Union,
-    cast,
-)
+from typing import TYPE_CHECKING, Any, Callable, Sequence, cast
 
 import cupy as cp
 import numpy as np
@@ -85,10 +76,10 @@ def __init__(
         self,
         data: Buffer,
         dtype: DtypeObj,
-        mask: Optional[Buffer] = None,
-        size: Optional[int] = None,  # TODO: make this non-optional
+        mask: Buffer | None = None,
+        size: int | None = None,  # TODO: make this non-optional
         offset: int = 0,
-        null_count: Optional[int] = None,
+        null_count: int | None = None,
     ):
         dtype = cudf.dtype(dtype)
 
@@ -179,7 +170,7 @@ def __setitem__(self, key: Any, value: Any):
         else:
             device_value = device_value.astype(self.dtype)
 
-        out: Optional[ColumnBase]  # If None, no need to perform mimic inplace.
+        out: ColumnBase | None  # If None, no need to perform mimic inplace.
         if isinstance(key, slice):
             out = self._scatter_by_slice(key, device_value)
         else:
@@ -196,7 +187,7 @@ def __setitem__(self, key: Any, value: Any):
         if out:
             self._mimic_inplace(out, inplace=True)
 
-    def unary_operator(self, unaryop: Union[str, Callable]) -> ColumnBase:
+    def unary_operator(self, unaryop: str | Callable) -> ColumnBase:
         if callable(unaryop):
             return libcudf.transform.transform(self, unaryop)
 
@@ -302,7 +293,7 @@ def nans_to_nulls(self: Self) -> Self:
 
     def normalize_binop_value(
         self, other: ScalarLike
-    ) -> Union[ColumnBase, cudf.Scalar]:
+    ) -> ColumnBase | cudf.Scalar:
         if isinstance(other, ColumnBase):
             if not isinstance(other, NumericalColumn):
                 return NotImplemented
@@ -422,7 +413,7 @@ def nan_count(self) -> int:
 
     def _process_values_for_isin(
         self, values: Sequence
-    ) -> Tuple[ColumnBase, ColumnBase]:
+    ) -> tuple[ColumnBase, ColumnBase]:
         lhs = cast("cudf.core.column.ColumnBase", self)
         try:
             rhs = as_column(values, nan_as_null=False)
@@ -456,12 +447,12 @@ def _process_values_for_isin(
 
         return lhs, rhs
 
-    def _can_return_nan(self, skipna: Optional[bool] = None) -> bool:
+    def _can_return_nan(self, skipna: bool | None = None) -> bool:
         return not skipna and self.has_nulls(include_nan=True)
 
     def _process_for_reduction(
-        self, skipna: Optional[bool] = None, min_count: int = 0
-    ) -> Union[NumericalColumn, ScalarLike]:
+        self, skipna: bool | None = None, min_count: int = 0
+    ) -> NumericalColumn | ScalarLike:
         skipna = True if skipna is None else skipna
 
         if self._can_return_nan(skipna=skipna):
@@ -544,7 +535,7 @@ def find_and_replace(
     def fillna(
         self,
         fill_value: Any = None,
-        method: Optional[str] = None,
+        method: str | None = None,
     ) -> Self:
         """
         Fill null values with *fill_value*
@@ -730,7 +721,7 @@ def _reduction_result_dtype(self, reduction_op: str) -> Dtype:
 
 
 def _normalize_find_and_replace_input(
-    input_column_dtype: DtypeObj, col_to_normalize: Union[ColumnBase, list]
+    input_column_dtype: DtypeObj, col_to_normalize: ColumnBase | list
 ) -> ColumnBase:
     normalized_column = column.as_column(
         col_to_normalize,
diff --git a/python/cudf/cudf/core/column/numerical_base.py b/python/cudf/cudf/core/column/numerical_base.py
index bd48054a951..95c78c5efcb 100644
--- a/python/cudf/cudf/core/column/numerical_base.py
+++ b/python/cudf/cudf/core/column/numerical_base.py
@@ -3,7 +3,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Optional, cast
+from typing import TYPE_CHECKING, cast
 
 import numpy as np
 
@@ -42,10 +42,10 @@ class NumericalBaseColumn(ColumnBase, Scannable):
         "cummax",
     }
 
-    def _can_return_nan(self, skipna: Optional[bool] = None) -> bool:
+    def _can_return_nan(self, skipna: bool | None = None) -> bool:
         return not skipna and self.has_nulls()
 
-    def kurtosis(self, skipna: Optional[bool] = None) -> float:
+    def kurtosis(self, skipna: bool | None = None) -> float:
         skipna = True if skipna is None else skipna
 
         if len(self) == 0 or self._can_return_nan(skipna=skipna):
@@ -70,7 +70,7 @@ def kurtosis(self, skipna: Optional[bool] = None) -> float:
         kurt = term_one_section_one * term_one_section_two - 3 * term_two
         return kurt
 
-    def skew(self, skipna: Optional[bool] = None) -> ScalarLike:
+    def skew(self, skipna: bool | None = None) -> ScalarLike:
         skipna = True if skipna is None else skipna
 
         if len(self) == 0 or self._can_return_nan(skipna=skipna):
@@ -142,7 +142,7 @@ def quantile(
 
     def mean(
         self,
-        skipna: Optional[bool] = None,
+        skipna: bool | None = None,
         min_count: int = 0,
         dtype=np.float64,
     ):
@@ -152,7 +152,7 @@ def mean(
 
     def var(
         self,
-        skipna: Optional[bool] = None,
+        skipna: bool | None = None,
         min_count: int = 0,
         dtype=np.float64,
         ddof=1,
@@ -163,7 +163,7 @@ def var(
 
     def std(
         self,
-        skipna: Optional[bool] = None,
+        skipna: bool | None = None,
         min_count: int = 0,
         dtype=np.float64,
         ddof=1,
@@ -172,7 +172,7 @@ def std(
             "std", skipna=skipna, min_count=min_count, dtype=dtype, ddof=ddof
         )
 
-    def median(self, skipna: Optional[bool] = None) -> NumericalBaseColumn:
+    def median(self, skipna: bool | None = None) -> NumericalBaseColumn:
         skipna = True if skipna is None else skipna
 
         if self._can_return_nan(skipna=skipna):
diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py
index 87df2d2f1f1..2451a9cc0af 100644
--- a/python/cudf/cudf/core/column/string.py
+++ b/python/cudf/cudf/core/column/string.py
@@ -5,16 +5,7 @@
 import re
 import warnings
 from functools import cached_property
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Optional,
-    Sequence,
-    Tuple,
-    Union,
-    cast,
-    overload,
-)
+from typing import TYPE_CHECKING, Any, Sequence, cast, overload
 
 import numpy as np
 import pandas as pd
@@ -257,13 +248,13 @@ def byte_count(self) -> SeriesOrIndex:
 
     @overload
     def cat(
-        self, sep: Optional[str] = None, na_rep: Optional[str] = None
+        self, sep: str | None = None, na_rep: str | None = None
     ) -> str: ...
 
     @overload
     def cat(
-        self, others, sep: Optional[str] = None, na_rep: Optional[str] = None
-    ) -> Union[SeriesOrIndex, "cudf.core.column.string.StringColumn"]: ...
+        self, others, sep: str | None = None, na_rep: str | None = None
+    ) -> SeriesOrIndex | "cudf.core.column.string.StringColumn": ...
 
     def cat(self, others=None, sep=None, na_rep=None):
         """
@@ -641,7 +632,7 @@ def extract(
 
     def contains(
         self,
-        pat: Union[str, Sequence],
+        pat: str | Sequence,
         case: bool = True,
         flags: int = 0,
         na=np.nan,
@@ -792,7 +783,7 @@ def contains(
             result_col = libstrings.contains_multiple(input_column, pat)
         return self._return_or_inplace(result_col)
 
-    def like(self, pat: str, esc: Optional[str] = None) -> SeriesOrIndex:
+    def like(self, pat: str, esc: str | None = None) -> SeriesOrIndex:
         """
         Test if a like pattern matches a string of a Series or Index.
 
@@ -863,7 +854,7 @@ def like(self, pat: str, esc: Optional[str] = None) -> SeriesOrIndex:
 
     def repeat(
         self,
-        repeats: Union[int, Sequence],
+        repeats: int | Sequence,
     ) -> SeriesOrIndex:
         """
         Duplicate each string in the Series or Index.
@@ -920,8 +911,8 @@ def repeat(
 
     def replace(
         self,
-        pat: Union[str, Sequence],
-        repl: Union[str, Sequence],
+        pat: str | Sequence,
+        repl: str | Sequence,
         n: int = -1,
         case=None,
         flags: int = 0,
@@ -1074,9 +1065,9 @@ def replace_with_backrefs(self, pat: str, repl: str) -> SeriesOrIndex:
 
     def slice(
         self,
-        start: Optional[int] = None,
-        stop: Optional[int] = None,
-        step: Optional[int] = None,
+        start: int | None = None,
+        stop: int | None = None,
+        step: int | None = None,
     ) -> SeriesOrIndex:
         """
         Slice substrings from each element in the Series or Index.
@@ -2051,7 +2042,7 @@ def istitle(self) -> SeriesOrIndex:
         return self._return_or_inplace(libstrings.is_title(self._column))
 
     def filter_alphanum(
-        self, repl: Optional[str] = None, keep: bool = True
+        self, repl: str | None = None, keep: bool = True
     ) -> SeriesOrIndex:
         """
         Remove non-alphanumeric characters from strings in this column.
@@ -2138,9 +2129,9 @@ def slice_from(
 
     def slice_replace(
         self,
-        start: Optional[int] = None,
-        stop: Optional[int] = None,
-        repl: Optional[str] = None,
+        start: int | None = None,
+        stop: int | None = None,
+        repl: str | None = None,
     ) -> SeriesOrIndex:
         """
         Replace the specified section of each string with a new string.
@@ -2228,9 +2219,7 @@ def slice_replace(
             ),
         )
 
-    def insert(
-        self, start: int = 0, repl: Optional[str] = None
-    ) -> SeriesOrIndex:
+    def insert(self, start: int = 0, repl: str | None = None) -> SeriesOrIndex:
         """
         Insert the specified string into each string in the specified
         position.
@@ -2410,10 +2399,10 @@ def get_json_object(
 
     def split(
         self,
-        pat: Optional[str] = None,
+        pat: str | None = None,
         n: int = -1,
         expand: bool = False,
-        regex: Optional[bool] = None,
+        regex: bool | None = None,
     ) -> SeriesOrIndex:
         """
         Split strings around given separator/delimiter.
@@ -2578,10 +2567,10 @@ def split(
 
     def rsplit(
         self,
-        pat: Optional[str] = None,
+        pat: str | None = None,
         n: int = -1,
         expand: bool = False,
-        regex: Optional[bool] = None,
+        regex: bool | None = None,
     ) -> SeriesOrIndex:
         """
         Split strings around given separator/delimiter.
@@ -3233,7 +3222,7 @@ def rjust(self, width: int, fillchar: str = " ") -> SeriesOrIndex:
             libstrings.rjust(self._column, width, fillchar)
         )
 
-    def strip(self, to_strip: Optional[str] = None) -> SeriesOrIndex:
+    def strip(self, to_strip: str | None = None) -> SeriesOrIndex:
         r"""
         Remove leading and trailing characters.
 
@@ -3292,7 +3281,7 @@ def strip(self, to_strip: Optional[str] = None) -> SeriesOrIndex:
             libstrings.strip(self._column, cudf.Scalar(to_strip, "str"))
         )
 
-    def lstrip(self, to_strip: Optional[str] = None) -> SeriesOrIndex:
+    def lstrip(self, to_strip: str | None = None) -> SeriesOrIndex:
         r"""
         Remove leading and trailing characters.
 
@@ -3339,7 +3328,7 @@ def lstrip(self, to_strip: Optional[str] = None) -> SeriesOrIndex:
             libstrings.lstrip(self._column, cudf.Scalar(to_strip, "str"))
         )
 
-    def rstrip(self, to_strip: Optional[str] = None) -> SeriesOrIndex:
+    def rstrip(self, to_strip: str | None = None) -> SeriesOrIndex:
         r"""
         Remove leading and trailing characters.
 
@@ -3844,7 +3833,7 @@ def endswith(self, pat: str) -> SeriesOrIndex:
 
         return self._return_or_inplace(result_col)
 
-    def startswith(self, pat: Union[str, Sequence]) -> SeriesOrIndex:
+    def startswith(self, pat: str | Sequence) -> SeriesOrIndex:
         """
         Test if the start of each string element matches a pattern.
 
@@ -3996,7 +3985,7 @@ def removeprefix(self, prefix: str) -> SeriesOrIndex:
         return self._return_or_inplace(result)
 
     def find(
-        self, sub: str, start: int = 0, end: Optional[int] = None
+        self, sub: str, start: int = 0, end: int | None = None
     ) -> SeriesOrIndex:
         """
         Return lowest indexes in each strings in the Series/Index
@@ -4053,7 +4042,7 @@ def find(
         return self._return_or_inplace(result_col)
 
     def rfind(
-        self, sub: str, start: int = 0, end: Optional[int] = None
+        self, sub: str, start: int = 0, end: int | None = None
     ) -> SeriesOrIndex:
         """
         Return highest indexes in each strings in the Series/Index
@@ -4114,7 +4103,7 @@ def rfind(
         return self._return_or_inplace(result_col)
 
     def index(
-        self, sub: str, start: int = 0, end: Optional[int] = None
+        self, sub: str, start: int = 0, end: int | None = None
     ) -> SeriesOrIndex:
         """
         Return lowest indexes in each strings where the substring
@@ -4176,7 +4165,7 @@ def index(
             return result
 
     def rindex(
-        self, sub: str, start: int = 0, end: Optional[int] = None
+        self, sub: str, start: int = 0, end: int | None = None
     ) -> SeriesOrIndex:
         """
         Return highest indexes in each strings where the substring
@@ -4443,7 +4432,7 @@ def translate(self, table: dict) -> SeriesOrIndex:
         )
 
     def filter_characters(
-        self, table: dict, keep: bool = True, repl: Optional[str] = None
+        self, table: dict, keep: bool = True, repl: str | None = None
     ) -> SeriesOrIndex:
         """
         Remove characters from each string using the character ranges
@@ -4924,7 +4913,7 @@ def ngrams_tokenize(
         )
 
     def replace_tokens(
-        self, targets, replacements, delimiter: Optional[str] = None
+        self, targets, replacements, delimiter: str | None = None
     ) -> SeriesOrIndex:
         """
         The targets tokens are searched for within each string in the series
@@ -5009,8 +4998,8 @@ def replace_tokens(
     def filter_tokens(
         self,
         min_token_length: int,
-        replacement: Optional[str] = None,
-        delimiter: Optional[str] = None,
+        replacement: str | None = None,
+        delimiter: str | None = None,
     ) -> SeriesOrIndex:
         """
         Remove tokens from within each string in the series that are
@@ -5279,7 +5268,7 @@ def edit_distance_matrix(self) -> SeriesOrIndex:
         )
 
     def minhash(
-        self, seeds: Optional[ColumnLike] = None, width: int = 4
+        self, seeds: ColumnLike | None = None, width: int = 4
     ) -> SeriesOrIndex:
         """
         Compute the minhash of a strings column.
@@ -5322,7 +5311,7 @@ def minhash(
         )
 
     def minhash64(
-        self, seeds: Optional[ColumnLike] = None, width: int = 4
+        self, seeds: ColumnLike | None = None, width: int = 4
     ) -> SeriesOrIndex:
         """
         Compute the minhash of a strings column.
@@ -5436,8 +5425,8 @@ class StringColumn(column.ColumnBase):
         respectively
     """
 
-    _start_offset: Optional[int]
-    _end_offset: Optional[int]
+    _start_offset: int | None
+    _end_offset: int | None
 
     _VALID_BINARY_OPERATIONS = {
         "__eq__",
@@ -5461,12 +5450,12 @@ class StringColumn(column.ColumnBase):
 
     def __init__(
         self,
-        data: Optional[Buffer] = None,
-        mask: Optional[Buffer] = None,
-        size: Optional[int] = None,  # TODO: make non-optional
+        data: Buffer | None = None,
+        mask: Buffer | None = None,
+        size: int | None = None,  # TODO: make non-optional
         offset: int = 0,
-        null_count: Optional[int] = None,
-        children: Tuple["column.ColumnBase", ...] = (),
+        null_count: int | None = None,
+        children: tuple["column.ColumnBase", ...] = (),
     ):
         dtype = cudf.api.types.dtype("object")
 
@@ -5634,8 +5623,8 @@ def to_arrow(self) -> pa.Array:
 
     def sum(
         self,
-        skipna: Optional[bool] = None,
-        dtype: Optional[Dtype] = None,
+        skipna: bool | None = None,
+        dtype: Dtype | None = None,
         min_count: int = 0,
     ):
         result_col = self._process_for_reduction(
@@ -5852,7 +5841,7 @@ def find_and_replace(
     def fillna(
         self,
         fill_value: Any = None,
-        method: Optional[str] = None,
+        method: str | None = None,
     ) -> Self:
         if fill_value is not None:
             if not is_scalar(fill_value):
@@ -5864,9 +5853,7 @@ def fillna(
                 fill_value = cudf.Scalar(fill_value, dtype=self.dtype)
         return super().fillna(fill_value, method=method)
 
-    def normalize_binop_value(
-        self, other
-    ) -> Union[column.ColumnBase, cudf.Scalar]:
+    def normalize_binop_value(self, other) -> column.ColumnBase | cudf.Scalar:
         if (
             isinstance(other, (column.ColumnBase, cudf.Scalar))
             and other.dtype == "object"
@@ -5930,8 +5917,8 @@ def _binaryop(
 
                 # Explicit types are necessary because mypy infers ColumnBase
                 # rather than StringColumn and sometimes forgets Scalar.
-                lhs: Union[cudf.Scalar, StringColumn]
-                rhs: Union[cudf.Scalar, StringColumn]
+                lhs: cudf.Scalar | StringColumn
+                rhs: cudf.Scalar | StringColumn
                 lhs, rhs = (other, self) if reflect else (self, other)
 
                 return cast(
diff --git a/python/cudf/cudf/core/column/timedelta.py b/python/cudf/cudf/core/column/timedelta.py
index 0af847f38af..8eec84b64f7 100644
--- a/python/cudf/cudf/core/column/timedelta.py
+++ b/python/cudf/cudf/core/column/timedelta.py
@@ -4,7 +4,7 @@
 
 import datetime
 import functools
-from typing import TYPE_CHECKING, Any, Optional, Sequence, cast
+from typing import TYPE_CHECKING, Any, Sequence, cast
 
 import numpy as np
 import pandas as pd
@@ -77,10 +77,10 @@ def __init__(
         self,
         data: Buffer,
         dtype: Dtype,
-        size: Optional[int] = None,  # TODO: make non-optional
-        mask: Optional[Buffer] = None,
+        size: int | None = None,  # TODO: make non-optional
+        mask: Buffer | None = None,
         offset: int = 0,
-        null_count: Optional[int] = None,
+        null_count: int | None = None,
     ):
         dtype = cudf.dtype(dtype)
         if dtype.kind != "m":
@@ -255,7 +255,7 @@ def time_unit(self) -> str:
     def fillna(
         self,
         fill_value: Any = None,
-        method: Optional[str] = None,
+        method: str | None = None,
     ) -> Self:
         if fill_value is not None:
             if cudf.utils.utils._isnat(fill_value):
@@ -316,7 +316,7 @@ def mean(self, skipna=None, dtype: Dtype = np.float64) -> pd.Timedelta:
             unit=self.time_unit,
         ).as_unit(self.time_unit)
 
-    def median(self, skipna: Optional[bool] = None) -> pd.Timedelta:
+    def median(self, skipna: bool | None = None) -> pd.Timedelta:
         return pd.Timedelta(
             self.as_numerical_column("int64").median(skipna=skipna),
             unit=self.time_unit,
@@ -346,9 +346,9 @@ def quantile(
 
     def sum(
         self,
-        skipna: Optional[bool] = None,
+        skipna: bool | None = None,
         min_count: int = 0,
-        dtype: Optional[Dtype] = None,
+        dtype: Dtype | None = None,
     ) -> pd.Timedelta:
         return pd.Timedelta(
             # Since sum isn't overridden in Numerical[Base]Column, mypy only
@@ -362,7 +362,7 @@ def sum(
 
     def std(
         self,
-        skipna: Optional[bool] = None,
+        skipna: bool | None = None,
         min_count: int = 0,
         dtype: Dtype = np.float64,
         ddof: int = 1,
diff --git a/python/cudf/cudf/core/column_accessor.py b/python/cudf/cudf/core/column_accessor.py
index 9f3de061ee8..1bf9a393566 100644
--- a/python/cudf/cudf/core/column_accessor.py
+++ b/python/cudf/cudf/core/column_accessor.py
@@ -6,16 +6,7 @@
 import sys
 from collections import abc
 from functools import cached_property, reduce
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    Dict,
-    Mapping,
-    Optional,
-    Tuple,
-    Union,
-)
+from typing import TYPE_CHECKING, Any, Callable, Mapping
 
 import numpy as np
 import pandas as pd
@@ -98,13 +89,13 @@ class ColumnAccessor(abc.MutableMapping):
         column length and type
     """
 
-    _data: "Dict[Any, ColumnBase]"
+    _data: "dict[Any, ColumnBase]"
     multiindex: bool
-    _level_names: Tuple[Any, ...]
+    _level_names: tuple[Any, ...]
 
     def __init__(
         self,
-        data: Union[abc.MutableMapping, ColumnAccessor, None] = None,
+        data: abc.MutableMapping | ColumnAccessor | None = None,
         multiindex: bool = False,
         level_names=None,
         rangeindex: bool = False,
@@ -210,7 +201,7 @@ def _from_columns_like_self(
         )
 
     @property
-    def level_names(self) -> Tuple[Any, ...]:
+    def level_names(self) -> tuple[Any, ...]:
         if self._level_names is None or len(self._level_names) == 0:
             return tuple((None,) * max(1, self.nlevels))
         else:
@@ -237,11 +228,11 @@ def nrows(self) -> int:
             return len(next(iter(self.values())))
 
     @cached_property
-    def names(self) -> Tuple[Any, ...]:
+    def names(self) -> tuple[Any, ...]:
         return tuple(self.keys())
 
     @cached_property
-    def columns(self) -> Tuple[ColumnBase, ...]:
+    def columns(self) -> tuple[ColumnBase, ...]:
         return tuple(self.values())
 
     @cached_property
@@ -610,7 +601,7 @@ def _pad_key(self, key: Any, pad_value="") -> Any:
         return key + (pad_value,) * (self.nlevels - len(key))
 
     def rename_levels(
-        self, mapper: Union[Mapping[Any, Any], Callable], level: Optional[int]
+        self, mapper: Mapping[Any, Any] | Callable, level: int | None
     ) -> ColumnAccessor:
         """
         Rename the specified levels of the given ColumnAccessor
diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py
index d8d46a6df73..065b13561ab 100644
--- a/python/cudf/cudf/core/dataframe.py
+++ b/python/cudf/cudf/core/dataframe.py
@@ -14,20 +14,7 @@
 import warnings
 from collections import abc, defaultdict
 from collections.abc import Iterator
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    Dict,
-    List,
-    Literal,
-    MutableMapping,
-    Optional,
-    Set,
-    Tuple,
-    Union,
-    cast,
-)
+from typing import TYPE_CHECKING, Any, Callable, Literal, MutableMapping, cast
 
 import cupy
 import numba
@@ -684,7 +671,7 @@ class DataFrame(IndexedFrame, Serializable, GetAttrGetItemMixin):
     """
 
     _PROTECTED_KEYS = frozenset(("_data", "_index"))
-    _accessors: Set[Any] = set()
+    _accessors: set[Any] = set()
     _loc_indexer_type = _DataFrameLocIndexer
     _iloc_indexer_type = _DataFrameIlocIndexer
     _groupby = DataFrameGroupBy
@@ -1123,7 +1110,7 @@ def _init_from_dict_like(
     def _from_data(
         cls,
         data: MutableMapping,
-        index: Optional[BaseIndex] = None,
+        index: BaseIndex | None = None,
         columns: Any = None,
     ) -> DataFrame:
         out = super()._from_data(data=data, index=index)
@@ -1553,7 +1540,7 @@ def _get_numeric_data(self):
         return self[columns]
 
     @_cudf_nvtx_annotate
-    def assign(self, **kwargs: Union[Callable[[Self], Any], Any]):
+    def assign(self, **kwargs: Callable[[Self], Any] | Any):
         """
         Assign columns to DataFrame from keyword arguments.
 
@@ -2009,12 +1996,10 @@ def _make_operands_and_index_for_binop(
         fill_value: Any = None,
         reflect: bool = False,
         can_reindex: bool = False,
-    ) -> Tuple[
-        Union[
-            Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]],
-            NotImplementedType,
-        ],
-        Optional[BaseIndex],
+    ) -> tuple[
+        dict[str | None, tuple[ColumnBase, Any, bool, Any]]
+        | NotImplementedType,
+        BaseIndex | None,
         bool,
     ]:
         lhs, rhs = self._data, other
@@ -2119,8 +2104,8 @@ def from_dict(
         cls,
         data: dict,
         orient: str = "columns",
-        dtype: Optional[Dtype] = None,
-        columns: Optional[list] = None,
+        dtype: Dtype | None = None,
+        columns: list | None = None,
     ) -> DataFrame:
         """
         Construct DataFrame from dict of array-like or dicts.
@@ -4584,7 +4569,7 @@ def apply(
     def applymap(
         self,
         func: Callable[[Any], Any],
-        na_action: Union[str, None] = None,
+        na_action: str | None = None,
         **kwargs,
     ) -> DataFrame:
         """
@@ -4617,7 +4602,7 @@ def applymap(
     def map(
         self,
         func: Callable[[Any], Any],
-        na_action: Union[str, None] = None,
+        na_action: str | None = None,
         **kwargs,
     ) -> DataFrame:
         """
@@ -7498,7 +7483,7 @@ def nunique(self, axis=0, dropna: bool = True) -> Series:
     def _sample_axis_1(
         self,
         n: int,
-        weights: Optional[ColumnLike],
+        weights: ColumnLike | None,
         replace: bool,
         random_state: np.random.RandomState,
         ignore_index: bool,
@@ -7523,11 +7508,11 @@ def _sample_axis_1(
 
     def _from_columns_like_self(
         self,
-        columns: List[ColumnBase],
-        column_names: Optional[abc.Iterable[str]] = None,
-        index_names: Optional[List[str]] = None,
+        columns: list[ColumnBase],
+        column_names: abc.Iterable[str] | None = None,
+        index_names: list[str] | None = None,
         *,
-        override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
+        override_dtypes: abc.Iterable[Dtype | None] | None = None,
     ) -> DataFrame:
         result = super()._from_columns_like_self(
             columns,
@@ -8128,7 +8113,7 @@ def _setitem_with_dataframe(
     input_df: DataFrame,
     replace_df: DataFrame,
     input_cols: Any = None,
-    mask: Optional[ColumnBase] = None,
+    mask: ColumnBase | None = None,
     ignore_index: bool = False,
 ):
     """
diff --git a/python/cudf/cudf/core/df_protocol.py b/python/cudf/cudf/core/df_protocol.py
index 62ded8ac6f1..9cd573aceb9 100644
--- a/python/cudf/cudf/core/df_protocol.py
+++ b/python/cudf/cudf/core/df_protocol.py
@@ -1,17 +1,9 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import enum
 from collections import abc
-from typing import (
-    Any,
-    Dict,
-    Iterable,
-    Mapping,
-    Optional,
-    Sequence,
-    Tuple,
-    cast,
-)
+from typing import Any, Iterable, Mapping, Sequence, Tuple, cast
 
 import cupy as cp
 import numpy as np
@@ -109,7 +101,7 @@ def __dlpack__(self):
         except ValueError:
             raise TypeError(f"dtype {self._dtype} unsupported by `dlpack`")
 
-    def __dlpack_device__(self) -> Tuple[_Device, int]:
+    def __dlpack_device__(self) -> tuple[_Device, int]:
         """
         _Device type and _Device ID for where the data in the buffer resides.
         """
@@ -265,7 +257,7 @@ def _dtype_from_cudfdtype(self, dtype) -> ProtoDtype:
         return (kind, bitwidth, format_str, endianness)
 
     @property
-    def describe_categorical(self) -> Tuple[bool, bool, Dict[int, Any]]:
+    def describe_categorical(self) -> tuple[bool, bool, dict[int, Any]]:
         """
         If the dtype is categorical, there are two options:
 
@@ -298,7 +290,7 @@ def describe_categorical(self) -> Tuple[bool, bool, Dict[int, Any]]:
         return ordered, is_dictionary, mapping
 
     @property
-    def describe_null(self) -> Tuple[int, Any]:
+    def describe_null(self) -> tuple[int, Any]:
         """
         Return the missing value (or "null") representation the column dtype
         uses, as a tuple ``(kind, value)``.
@@ -338,7 +330,7 @@ def null_count(self) -> int:
         return self._col.null_count
 
     @property
-    def metadata(self) -> Dict[str, Any]:
+    def metadata(self) -> dict[str, Any]:
         """
         Store specific metadata of the column.
         """
@@ -351,7 +343,7 @@ def num_chunks(self) -> int:
         return 1
 
     def get_chunks(
-        self, n_chunks: Optional[int] = None
+        self, n_chunks: int | None = None
     ) -> Iterable["_CuDFColumn"]:
         """
         Return an iterable yielding the chunks.
@@ -362,7 +354,7 @@ def get_chunks(
 
     def get_buffers(
         self,
-    ) -> Mapping[str, Optional[Tuple[_CuDFBuffer, ProtoDtype]]]:
+    ) -> Mapping[str, tuple[_CuDFBuffer, ProtoDtype] | None]:
         """
         Return a dictionary containing the underlying buffers.
 
@@ -400,7 +392,7 @@ def get_buffers(
 
     def _get_validity_buffer(
         self,
-    ) -> Optional[Tuple[_CuDFBuffer, ProtoDtype]]:
+    ) -> tuple[_CuDFBuffer, ProtoDtype] | None:
         """
         Return the buffer containing the mask values
         indicating missing data and the buffer's associated dtype.
@@ -433,7 +425,7 @@ def _get_validity_buffer(
 
     def _get_offsets_buffer(
         self,
-    ) -> Optional[Tuple[_CuDFBuffer, ProtoDtype]]:
+    ) -> tuple[_CuDFBuffer, ProtoDtype] | None:
         """
         Return the buffer containing the offset values for
         variable-size binary data (e.g., variable-length strings)
@@ -461,7 +453,7 @@ def _get_offsets_buffer(
 
     def _get_data_buffer(
         self,
-    ) -> Tuple[_CuDFBuffer, ProtoDtype]:
+    ) -> tuple[_CuDFBuffer, ProtoDtype]:
         """
         Return the buffer containing the data and
                the buffer's associated dtype.
@@ -588,7 +580,7 @@ def select_columns_by_name(self, names: Sequence[str]) -> "_CuDFDataFrame":
         )
 
     def get_chunks(
-        self, n_chunks: Optional[int] = None
+        self, n_chunks: int | None = None
     ) -> Iterable["_CuDFDataFrame"]:
         """
         Return an iterator yielding the chunks.
@@ -745,9 +737,9 @@ def from_dataframe(
 
 def _protocol_to_cudf_column_numeric(
     col, allow_copy: bool
-) -> Tuple[
+) -> tuple[
     cudf.core.column.ColumnBase,
-    Mapping[str, Optional[Tuple[_CuDFBuffer, ProtoDtype]]],
+    Mapping[str, tuple[_CuDFBuffer, ProtoDtype] | None],
 ]:
     """
     Convert an int, uint, float or bool protocol column
@@ -822,9 +814,9 @@ def protocol_dtype_to_cupy_dtype(_dtype: ProtoDtype) -> cp.dtype:
 
 def _protocol_to_cudf_column_categorical(
     col, allow_copy: bool
-) -> Tuple[
+) -> tuple[
     cudf.core.column.ColumnBase,
-    Mapping[str, Optional[Tuple[_CuDFBuffer, ProtoDtype]]],
+    Mapping[str, tuple[_CuDFBuffer, ProtoDtype] | None],
 ]:
     """
     Convert a categorical column to a Series instance
@@ -857,9 +849,9 @@ def _protocol_to_cudf_column_categorical(
 
 def _protocol_to_cudf_column_string(
     col, allow_copy: bool
-) -> Tuple[
+) -> tuple[
     cudf.core.column.ColumnBase,
-    Mapping[str, Optional[Tuple[_CuDFBuffer, ProtoDtype]]],
+    Mapping[str, tuple[_CuDFBuffer, ProtoDtype] | None],
 ]:
     """
     Convert a string ColumnObject to cudf Column object.
diff --git a/python/cudf/cudf/core/dtypes.py b/python/cudf/cudf/core/dtypes.py
index b1282040e60..034849d0e71 100644
--- a/python/cudf/cudf/core/dtypes.py
+++ b/python/cudf/cudf/core/dtypes.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import decimal
 import operator
@@ -6,7 +7,7 @@
 import textwrap
 import warnings
 from functools import cached_property
-from typing import TYPE_CHECKING, Any, Callable, Dict, List, Tuple, Type, Union
+from typing import TYPE_CHECKING, Any, Callable
 
 import numpy as np
 import pandas as pd
@@ -16,12 +17,12 @@
 from pandas.core.arrays.arrow.extension_types import ArrowIntervalType
 
 import cudf
-from cudf._typing import Dtype
 from cudf.core._compat import PANDAS_LT_300
 from cudf.core.abc import Serializable
 from cudf.utils.docutils import doc_apply
 
 if TYPE_CHECKING:
+    from cudf._typing import Dtype
     from cudf.core.buffer import Buffer
 
 
@@ -84,11 +85,11 @@ def dtype(arbitrary):
 
 
 def _decode_type(
-    cls: Type,
+    cls: type,
     header: dict,
     frames: list,
-    is_valid_class: Callable[[Type, Type], bool] = operator.is_,
-) -> Tuple[dict, list, Type]:
+    is_valid_class: Callable[[type, type], bool] = operator.is_,
+) -> tuple[dict, list, type]:
     """Decode metadata-encoded type and check validity
 
     Parameters
@@ -481,8 +482,8 @@ def __repr__(self):
     def __hash__(self):
         return hash(self._typ)
 
-    def serialize(self) -> Tuple[dict, list]:
-        header: Dict[str, Dtype] = {}
+    def serialize(self) -> tuple[dict, list]:
+        header: dict[str, Dtype] = {}
         header["type-serialized"] = pickle.dumps(type(self))
 
         frames = []
@@ -627,13 +628,13 @@ def __repr__(self):
     def __hash__(self):
         return hash(self._typ)
 
-    def serialize(self) -> Tuple[dict, list]:
-        header: Dict[str, Any] = {}
+    def serialize(self) -> tuple[dict, list]:
+        header: dict[str, Any] = {}
         header["type-serialized"] = pickle.dumps(type(self))
 
-        frames: List[Buffer] = []
+        frames: list[Buffer] = []
 
-        fields: Dict[str, Union[bytes, Tuple[Any, Tuple[int, int]]]] = {}
+        fields: dict[str, bytes | tuple[Any, tuple[int, int]]] = {}
 
         for k, dtype in self.fields.items():
             if isinstance(dtype, _BaseDtype):
@@ -823,7 +824,7 @@ def _from_decimal(cls, decimal):
         precision = max(len(metadata.digits), -metadata.exponent)
         return cls(precision, -metadata.exponent)
 
-    def serialize(self) -> Tuple[dict, list]:
+    def serialize(self) -> tuple[dict, list]:
         return (
             {
                 "type-serialized": pickle.dumps(type(self)),
@@ -946,7 +947,7 @@ def __eq__(self, other):
     def __hash__(self):
         return hash((self.subtype, self.closed))
 
-    def serialize(self) -> Tuple[dict, list]:
+    def serialize(self) -> tuple[dict, list]:
         header = {
             "type-serialized": pickle.dumps(type(self)),
             "fields": pickle.dumps((self.subtype, self.closed)),
diff --git a/python/cudf/cudf/core/frame.py b/python/cudf/cudf/core/frame.py
index 6a1ef05b1f9..c58a0161ee0 100644
--- a/python/cudf/cudf/core/frame.py
+++ b/python/cudf/cudf/core/frame.py
@@ -8,18 +8,7 @@
 import pickle
 import warnings
 from collections import abc
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Callable,
-    Dict,
-    List,
-    Literal,
-    MutableMapping,
-    Optional,
-    Tuple,
-    Union,
-)
+from typing import TYPE_CHECKING, Any, Callable, Literal, MutableMapping
 
 # TODO: The `numpy` import is needed for typing purposes during doc builds
 # only, need to figure out why the `np` alias is insufficient then remove.
@@ -83,11 +72,11 @@ def _num_rows(self) -> int:
         return self._data.nrows
 
     @property
-    def _column_names(self) -> Tuple[Any, ...]:
+    def _column_names(self) -> tuple[Any, ...]:
         return self._data.names
 
     @property
-    def _columns(self) -> Tuple[ColumnBase, ...]:
+    def _columns(self) -> tuple[ColumnBase, ...]:
         return self._data.columns
 
     @property
@@ -154,10 +143,10 @@ def _from_data_like_self(self, data: MutableMapping) -> Self:
     @_cudf_nvtx_annotate
     def _from_columns_like_self(
         self,
-        columns: List[ColumnBase],
-        column_names: Optional[abc.Iterable[str]] = None,
+        columns: list[ColumnBase],
+        column_names: abc.Iterable[str] | None = None,
         *,
-        override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
+        override_dtypes: abc.Iterable[Dtype | None] | None = None,
     ):
         """Construct a Frame from a list of columns with metadata from self.
 
@@ -172,7 +161,7 @@ def _from_columns_like_self(
     @_cudf_nvtx_annotate
     def _mimic_inplace(
         self, result: Self, inplace: bool = False
-    ) -> Optional[Self]:
+    ) -> Self | None:
         if inplace:
             for col in self._data:
                 if col in result._data:
@@ -424,15 +413,15 @@ def _to_array(
         get_array: Callable,
         module: ModuleType,
         copy: bool,
-        dtype: Union[Dtype, None] = None,
+        dtype: Dtype | None = None,
         na_value=None,
-    ) -> Union[cupy.ndarray, numpy.ndarray]:
+    ) -> cupy.ndarray | numpy.ndarray:
         # Internal function to implement to_cupy and to_numpy, which are nearly
         # identical except for the attribute they access to generate values.
 
         def to_array(
             col: ColumnBase, dtype: np.dtype
-        ) -> Union[cupy.ndarray, numpy.ndarray]:
+        ) -> cupy.ndarray | numpy.ndarray:
             if na_value is not None:
                 col = col.fillna(na_value)
             array = get_array(col)
@@ -485,7 +474,7 @@ def to_array(
     @_cudf_nvtx_annotate
     def to_cupy(
         self,
-        dtype: Union[Dtype, None] = None,
+        dtype: Dtype | None = None,
         copy: bool = False,
         na_value=None,
     ) -> cupy.ndarray:
@@ -519,7 +508,7 @@ def to_cupy(
     @_cudf_nvtx_annotate
     def to_numpy(
         self,
-        dtype: Union[Dtype, None] = None,
+        dtype: Dtype | None = None,
         copy: bool = True,
         na_value=None,
     ) -> numpy.ndarray:
@@ -552,7 +541,7 @@ def to_numpy(
         )
 
     @_cudf_nvtx_annotate
-    def where(self, cond, other=None, inplace: bool = False) -> Optional[Self]:
+    def where(self, cond, other=None, inplace: bool = False) -> Self | None:
         """
         Replace values where the condition is False.
 
@@ -628,11 +617,11 @@ def where(self, cond, other=None, inplace: bool = False) -> Optional[Self]:
     def fillna(
         self,
         value=None,
-        method: Optional[Literal["ffill", "bfill", "pad", "backfill"]] = None,
+        method: Literal["ffill", "bfill", "pad", "backfill"] | None = None,
         axis=None,
         inplace: bool = False,
         limit=None,
-    ) -> Optional[Self]:
+    ) -> Self | None:
         """Fill null values with ``value`` or specified ``method``.
 
         Parameters
@@ -1047,7 +1036,7 @@ def _copy_type_metadata(
         self,
         other: Self,
         *,
-        override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
+        override_dtypes: abc.Iterable[Dtype | None] | None = None,
     ) -> Self:
         """
         Copy type metadata from each column of `other` to the corresponding
@@ -1495,7 +1484,7 @@ def _unaryop(self, op):
     @_cudf_nvtx_annotate
     def _colwise_binop(
         cls,
-        operands: Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]],
+        operands: dict[str | None, tuple[ColumnBase, Any, bool, Any]],
         fn: str,
     ):
         """Implement binary ops between two frame-like objects.
@@ -1910,8 +1899,8 @@ def nunique(self, dropna: bool = True):
     @staticmethod
     @_cudf_nvtx_annotate
     def _repeat(
-        columns: List[ColumnBase], repeats, axis=None
-    ) -> List[ColumnBase]:
+        columns: list[ColumnBase], repeats, axis=None
+    ) -> list[ColumnBase]:
         if axis is not None:
             raise NotImplementedError(
                 "Only axis=`None` supported at this time."
diff --git a/python/cudf/cudf/core/groupby/groupby.py b/python/cudf/cudf/core/groupby/groupby.py
index aa96051ea51..d08268eea3a 100644
--- a/python/cudf/cudf/core/groupby/groupby.py
+++ b/python/cudf/cudf/core/groupby/groupby.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import copy
 import itertools
@@ -7,7 +8,7 @@
 import warnings
 from collections import abc
 from functools import cached_property
-from typing import Any, Iterable, List, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any, Iterable
 
 import cupy as cp
 import numpy as np
@@ -20,7 +21,6 @@
 from cudf._lib.reshape import interleave_columns
 from cudf._lib.sort import segmented_sort_by_key
 from cudf._lib.types import size_type_dtype
-from cudf._typing import AggType, DataFrameOrSeries, MultiColumnAggType
 from cudf.api.extensions import no_default
 from cudf.api.types import is_bool_dtype, is_list_like, is_numeric_dtype
 from cudf.core._compat import PANDAS_LT_300
@@ -34,6 +34,9 @@
 from cudf.utils.nvtx_annotation import _cudf_nvtx_annotate
 from cudf.utils.utils import GetAttrGetItemMixin
 
+if TYPE_CHECKING:
+    from cudf._typing import AggType, DataFrameOrSeries, MultiColumnAggType
+
 
 def _deprecate_collect():
     warnings.warn(
@@ -1033,11 +1036,11 @@ def ngroup(self, ascending=True):
 
     def sample(
         self,
-        n: Optional[int] = None,
-        frac: Optional[float] = None,
+        n: int | None = None,
+        frac: float | None = None,
         replace: bool = False,
-        weights: Union[abc.Sequence, "cudf.Series", None] = None,
-        random_state: Union[np.random.RandomState, int, None] = None,
+        weights: abc.Sequence | "cudf.Series" | None = None,
+        random_state: np.random.RandomState | int | None = None,
     ):
         """Return a random sample of items in each group.
 
@@ -1222,7 +1225,7 @@ def _grouped(self, *, include_groups: bool = True):
 
     def _normalize_aggs(
         self, aggs: MultiColumnAggType
-    ) -> Tuple[Iterable[Any], Tuple[ColumnBase, ...], List[List[AggType]]]:
+    ) -> tuple[Iterable[Any], tuple[ColumnBase, ...], list[list[AggType]]]:
         """
         Normalize aggs to a list of list of aggregations, where `out[i]`
         is a list of aggregations for column `self.obj[i]`. We support three
@@ -1237,7 +1240,7 @@ def _normalize_aggs(
         Each agg can be string or lambda functions.
         """
 
-        aggs_per_column: Iterable[Union[AggType, Iterable[AggType]]]
+        aggs_per_column: Iterable[AggType | Iterable[AggType]]
         if isinstance(aggs, dict):
             column_names, aggs_per_column = aggs.keys(), aggs.values()
             columns = tuple(self.obj._data[col] for col in column_names)
diff --git a/python/cudf/cudf/core/index.py b/python/cudf/cudf/core/index.py
index 11d09e470ff..13fa187842d 100644
--- a/python/cudf/cudf/core/index.py
+++ b/python/cudf/cudf/core/index.py
@@ -7,17 +7,7 @@
 import warnings
 from functools import cache, cached_property
 from numbers import Number
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    List,
-    Literal,
-    MutableMapping,
-    Optional,
-    Tuple,
-    Union,
-    cast,
-)
+from typing import TYPE_CHECKING, Any, Literal, MutableMapping, cast
 
 import cupy
 import numpy as np
@@ -101,10 +91,10 @@ def __subclasscheck__(self, subclass):
 
 
 def _lexsorted_equal_range(
-    idx: Union[Index, cudf.MultiIndex],
+    idx: Index | cudf.MultiIndex,
     key_as_table: Frame,
     is_sorted: bool,
-) -> Tuple[int, int, Optional[ColumnBase]]:
+) -> tuple[int, int, ColumnBase | None]:
     """Get equal range for key in lexicographically sorted index. If index
     is not sorted when called, a sort will take place and `sort_inds` is
     returned. Otherwise `None` is returned in that position.
@@ -2858,7 +2848,7 @@ class IntervalIndex(Index):
     def __init__(
         self,
         data,
-        closed: Optional[Literal["left", "right", "neither", "both"]] = None,
+        closed: Literal["left", "right", "neither", "both"] | None = None,
         dtype=None,
         copy: bool = False,
         name=None,
@@ -2917,9 +2907,7 @@ def closed(self):
     def from_breaks(
         cls,
         breaks,
-        closed: Optional[
-            Literal["left", "right", "neither", "both"]
-        ] = "right",
+        closed: Literal["left", "right", "neither", "both"] | None = "right",
         name=None,
         copy: bool = False,
         dtype=None,
@@ -3106,7 +3094,7 @@ def _getdefault_name(values, name):
 
 
 @_cudf_nvtx_annotate
-def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex:
+def _concat_range_index(indexes: list[RangeIndex]) -> BaseIndex:
     """
     An internal Utility function to concat RangeIndex objects.
     """
@@ -3147,7 +3135,7 @@ def _concat_range_index(indexes: List[RangeIndex]) -> BaseIndex:
 
 
 @_cudf_nvtx_annotate
-def _extended_gcd(a: int, b: int) -> Tuple[int, int, int]:
+def _extended_gcd(a: int, b: int) -> tuple[int, int, int]:
     """
     Extended Euclidean algorithms to solve Bezout's identity:
        a*x + b*y = gcd(x, y)
@@ -3197,7 +3185,7 @@ def _get_nearest_indexer(
     index: Index,
     positions: cudf.Series,
     target_col: cudf.core.column.ColumnBase,
-    tolerance: Union[int, float],
+    tolerance: int | float,
 ):
     """
     Get the indexer for the nearest index labels; requires an index with
diff --git a/python/cudf/cudf/core/indexed_frame.py b/python/cudf/cudf/core/indexed_frame.py
index 3a4f4874e35..06da62306e8 100644
--- a/python/cudf/cudf/core/indexed_frame.py
+++ b/python/cudf/cudf/core/indexed_frame.py
@@ -12,15 +12,9 @@
     TYPE_CHECKING,
     Any,
     Callable,
-    Dict,
-    List,
     Literal,
     MutableMapping,
-    Optional,
-    Tuple,
-    Type,
     TypeVar,
-    Union,
     cast,
 )
 from uuid import uuid4
@@ -258,8 +252,8 @@ class IndexedFrame(Frame):
     """
 
     # mypy can't handle bound type variables as class members
-    _loc_indexer_type: Type[_LocIndexerClass]  # type: ignore
-    _iloc_indexer_type: Type[_IlocIndexerClass]  # type: ignore
+    _loc_indexer_type: type[_LocIndexerClass]  # type: ignore
+    _iloc_indexer_type: type[_IlocIndexerClass]  # type: ignore
     _index: cudf.core.index.BaseIndex
     _groupby = GroupBy
     _resampler = _Resampler
@@ -294,14 +288,14 @@ def _num_rows(self) -> int:
         return len(self.index)
 
     @property
-    def _index_names(self) -> Tuple[Any, ...]:  # TODO: Tuple[str]?
+    def _index_names(self) -> tuple[Any, ...]:  # TODO: Tuple[str]?
         return self.index._data.names
 
     @classmethod
     def _from_data(
         cls,
         data: MutableMapping,
-        index: Optional[BaseIndex] = None,
+        index: BaseIndex | None = None,
     ):
         out = super()._from_data(data)
         out._index = RangeIndex(out._data.nrows) if index is None else index
@@ -316,11 +310,11 @@ def _from_data_like_self(self, data: MutableMapping):
     @_cudf_nvtx_annotate
     def _from_columns_like_self(
         self,
-        columns: List[ColumnBase],
-        column_names: Optional[abc.Iterable[str]] = None,
-        index_names: Optional[List[str]] = None,
+        columns: list[ColumnBase],
+        column_names: abc.Iterable[str] | None = None,
+        index_names: list[str] | None = None,
         *,
-        override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
+        override_dtypes: abc.Iterable[Dtype | None] | None = None,
     ) -> Self:
         """Construct a `Frame` from a list of columns with metadata from self.
 
@@ -368,7 +362,7 @@ def __round__(self, digits=0):
 
     def _mimic_inplace(
         self, result: Self, inplace: bool = False
-    ) -> Optional[Self]:
+    ) -> Self | None:
         if inplace:
             self._index = result.index
         return super()._mimic_inplace(result, inplace)
@@ -1788,7 +1782,7 @@ def skew(self, axis=0, skipna=True, numeric_only=False, **kwargs):
         )
 
     @_cudf_nvtx_annotate
-    def mask(self, cond, other=None, inplace: bool = False) -> Optional[Self]:
+    def mask(self, cond, other=None, inplace: bool = False) -> Self | None:
         """
         Replace values where the condition is True.
 
@@ -1924,7 +1918,7 @@ def _copy_type_metadata(
         other: Self,
         include_index: bool = True,
         *,
-        override_dtypes: Optional[abc.Iterable[Optional[Dtype]]] = None,
+        override_dtypes: abc.Iterable[Dtype | None] | None = None,
     ) -> Self:
         """
         Copy type metadata from each column of `other` to the corresponding
@@ -4670,9 +4664,9 @@ def sample(
     def _sample_axis_0(
         self,
         n: int,
-        weights: Optional[ColumnLike],
+        weights: ColumnLike | None,
         replace: bool,
-        random_state: Union[np.random.RandomState, cp.random.RandomState],
+        random_state: np.random.RandomState | cp.random.RandomState,
         ignore_index: bool,
     ):
         try:
@@ -4695,7 +4689,7 @@ def _sample_axis_0(
     def _sample_axis_1(
         self,
         n: int,
-        weights: Optional[ColumnLike],
+        weights: ColumnLike | None,
         replace: bool,
         random_state: np.random.RandomState,
         ignore_index: bool,
@@ -4742,12 +4736,10 @@ def _make_operands_and_index_for_binop(
         fill_value: Any = None,
         reflect: bool = False,
         can_reindex: bool = False,
-    ) -> Tuple[
-        Union[
-            Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]],
-            NotImplementedType,
-        ],
-        Optional[cudf.BaseIndex],
+    ) -> tuple[
+        dict[str | None, tuple[ColumnBase, Any, bool, Any]]
+        | NotImplementedType,
+        cudf.BaseIndex | None,
         bool,
     ]:
         raise NotImplementedError(
@@ -6328,8 +6320,8 @@ def _check_duplicate_level_names(specified, level_names):
 
 @_cudf_nvtx_annotate
 def _get_replacement_values_for_columns(
-    to_replace: Any, value: Any, columns_dtype_map: Dict[Any, Any]
-) -> Tuple[Dict[Any, bool], Dict[Any, Any], Dict[Any, Any]]:
+    to_replace: Any, value: Any, columns_dtype_map: dict[Any, Any]
+) -> tuple[dict[Any, bool], dict[Any, Any], dict[Any, Any]]:
     """
     Returns a per column mapping for the values to be replaced, new
     values to be replaced with and if all the values are empty.
@@ -6354,9 +6346,9 @@ def _get_replacement_values_for_columns(
         A dict mapping of all columns and the corresponding values
         to be replaced with.
     """
-    to_replace_columns: Dict[Any, Any] = {}
-    values_columns: Dict[Any, Any] = {}
-    all_na_columns: Dict[Any, Any] = {}
+    to_replace_columns: dict[Any, Any] = {}
+    values_columns: dict[Any, Any] = {}
+    all_na_columns: dict[Any, Any] = {}
 
     if is_scalar(to_replace) and is_scalar(value):
         to_replace_columns = {col: [to_replace] for col in columns_dtype_map}
@@ -6496,8 +6488,8 @@ def _is_series(obj):
 @_cudf_nvtx_annotate
 def _drop_rows_by_labels(
     obj: DataFrameOrSeries,
-    labels: Union[ColumnLike, abc.Iterable, str],
-    level: Union[int, str],
+    labels: ColumnLike | abc.Iterable | str,
+    level: int | str,
     errors: str,
 ) -> DataFrameOrSeries:
     """Remove rows specified by `labels`.
diff --git a/python/cudf/cudf/core/indexing_utils.py b/python/cudf/cudf/core/indexing_utils.py
index 7242de9964f..73a1cd26367 100644
--- a/python/cudf/cudf/core/indexing_utils.py
+++ b/python/cudf/cudf/core/indexing_utils.py
@@ -1,9 +1,9 @@
-# Copyright (c) 2023, NVIDIA CORPORATION.
+# Copyright (c) 2023-2024, NVIDIA CORPORATION.
 
 from __future__ import annotations
 
 from dataclasses import dataclass
-from typing import Any, List, Tuple, Union
+from typing import Any, List, Union
 
 from typing_extensions import TypeAlias
 
@@ -59,7 +59,7 @@ class ScalarIndexer:
 
 
 def destructure_iloc_key(
-    key: Any, frame: Union[cudf.Series, cudf.DataFrame]
+    key: Any, frame: cudf.Series | cudf.DataFrame
 ) -> tuple[Any, ...]:
     """
     Destructure a potentially tuple-typed key into row and column indexers.
@@ -124,7 +124,7 @@ def destructure_iloc_key(
 
 def destructure_dataframe_iloc_indexer(
     key: Any, frame: cudf.DataFrame
-) -> Tuple[Any, Tuple[bool, ColumnLabels]]:
+) -> tuple[Any, tuple[bool, ColumnLabels]]:
     """Destructure an index key for DataFrame iloc getitem.
 
     Parameters
diff --git a/python/cudf/cudf/core/join/_join_helpers.py b/python/cudf/cudf/core/join/_join_helpers.py
index 05cbb4429b9..dd0a4f666a1 100644
--- a/python/cudf/cudf/core/join/_join_helpers.py
+++ b/python/cudf/cudf/core/join/_join_helpers.py
@@ -4,7 +4,7 @@
 
 import warnings
 from collections import abc
-from typing import TYPE_CHECKING, Any, Tuple, cast
+from typing import TYPE_CHECKING, Any, cast
 
 import numpy as np
 
@@ -51,7 +51,7 @@ def set(self, obj: cudf.DataFrame, value: ColumnBase, validate=False):
 
 def _match_join_keys(
     lcol: ColumnBase, rcol: ColumnBase, how: str
-) -> Tuple[ColumnBase, ColumnBase]:
+) -> tuple[ColumnBase, ColumnBase]:
     # Casts lcol and rcol to a common dtype for use as join keys. If no casting
     # is necessary, they are returned as is.
 
@@ -133,7 +133,7 @@ def _match_join_keys(
 
 def _match_categorical_dtypes_both(
     lcol: CategoricalColumn, rcol: CategoricalColumn, how: str
-) -> Tuple[ColumnBase, ColumnBase]:
+) -> tuple[ColumnBase, ColumnBase]:
     ltype, rtype = lcol.dtype, rcol.dtype
 
     # when both are ordered and both have the same categories,
diff --git a/python/cudf/cudf/core/join/join.py b/python/cudf/cudf/core/join/join.py
index da999441ca3..ce81c1fc5b1 100644
--- a/python/cudf/cudf/core/join/join.py
+++ b/python/cudf/cudf/core/join/join.py
@@ -2,7 +2,7 @@
 from __future__ import annotations
 
 import itertools
-from typing import Any, ClassVar, List, Optional
+from typing import Any, ClassVar
 
 import cudf
 from cudf import _lib as libcudf
@@ -370,7 +370,7 @@ def _merge_results(
         else:
             multiindex_columns = False
 
-        index: Optional[cudf.BaseIndex]
+        index: cudf.BaseIndex | None
         if self._using_right_index:
             # right_index and left_on
             index = left_result.index
@@ -398,7 +398,7 @@ def _sort_result(self, result: cudf.DataFrame) -> cudf.DataFrame:
         # This is taken care of by using a stable sort here, and (in
         # pandas-compat mode) reordering the gather maps before
         # producing the input result.
-        by: List[Any] = []
+        by: list[Any] = []
         if self._using_left_index and self._using_right_index:
             by.extend(result.index._data.columns)
         if not self._using_left_index:
diff --git a/python/cudf/cudf/core/mixins/binops.pyi b/python/cudf/cudf/core/mixins/binops.pyi
index 8587b2dea48..6be73e25332 100644
--- a/python/cudf/cudf/core/mixins/binops.pyi
+++ b/python/cudf/cudf/core/mixins/binops.pyi
@@ -1,12 +1,12 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
-from typing import Any, Set, Tuple, TypeVar
+from typing import Any, TypeVar
 
 # Note: It may be possible to define a narrower bound here eventually.
 BinaryOperandType = TypeVar("BinaryOperandType", bound="Any")
 
 class BinaryOperand:
-    _SUPPORTED_BINARY_OPERATIONS: Set
+    _SUPPORTED_BINARY_OPERATIONS: set
 
     def _binaryop(self, other: BinaryOperandType, op: str): ...
     def __add__(self, other): ...
@@ -36,4 +36,4 @@ class BinaryOperand:
     def __gt__(self, other): ...
     def __ge__(self, other): ...
     @staticmethod
-    def _check_reflected_op(op) -> Tuple[bool, str]: ...
+    def _check_reflected_op(op) -> tuple[bool, str]: ...
diff --git a/python/cudf/cudf/core/mixins/reductions.pyi b/python/cudf/cudf/core/mixins/reductions.pyi
index dbaafdb5cd2..1c2126002ad 100644
--- a/python/cudf/cudf/core/mixins/reductions.pyi
+++ b/python/cudf/cudf/core/mixins/reductions.pyi
@@ -1,9 +1,7 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
-from typing import Set
-
 class Reducible:
-    _SUPPORTED_REDUCTIONS: Set
+    _SUPPORTED_REDUCTIONS: set
 
     def sum(self): ...
     def product(self): ...
diff --git a/python/cudf/cudf/core/mixins/scans.pyi b/python/cudf/cudf/core/mixins/scans.pyi
index 37995241b1f..5190750c698 100644
--- a/python/cudf/cudf/core/mixins/scans.pyi
+++ b/python/cudf/cudf/core/mixins/scans.pyi
@@ -1,9 +1,7 @@
 # Copyright (c) 2022, NVIDIA CORPORATION.
 
-from typing import Set
-
 class Scannable:
-    _SUPPORTED_SCANS: Set
+    _SUPPORTED_SCANS: set
 
     def cumsum(self): ...
     def cumprod(self): ...
diff --git a/python/cudf/cudf/core/multiindex.py b/python/cudf/cudf/core/multiindex.py
index 91488e06f4e..832cc003d2e 100644
--- a/python/cudf/cudf/core/multiindex.py
+++ b/python/cudf/cudf/core/multiindex.py
@@ -10,7 +10,7 @@
 from collections import abc
 from functools import cached_property
 from numbers import Integral
-from typing import TYPE_CHECKING, Any, List, MutableMapping, Tuple, Union
+from typing import TYPE_CHECKING, Any, MutableMapping
 
 import cupy as cp
 import numpy as np
@@ -40,7 +40,7 @@
     from cudf._typing import DataFrameOrSeries
 
 
-def _maybe_indices_to_slice(indices: cp.ndarray) -> Union[slice, cp.ndarray]:
+def _maybe_indices_to_slice(indices: cp.ndarray) -> slice | cp.ndarray:
     """Makes best effort to convert an array of indices into a python slice.
     If the conversion is not possible, return input. `indices` are expected
     to be valid.
@@ -849,9 +849,10 @@ def _index_and_downcast(self, result, index, index_key):
     def _get_row_major(
         self,
         df: DataFrameOrSeries,
-        row_tuple: Union[
-            numbers.Number, slice, Tuple[Any, ...], List[Tuple[Any, ...]]
-        ],
+        row_tuple: numbers.Number
+        | slice
+        | tuple[Any, ...]
+        | list[tuple[Any, ...]],
     ) -> DataFrameOrSeries:
         if pd.api.types.is_bool_dtype(
             list(row_tuple) if isinstance(row_tuple, tuple) else row_tuple
@@ -874,9 +875,10 @@ def _get_row_major(
     @_cudf_nvtx_annotate
     def _validate_indexer(
         self,
-        indexer: Union[
-            numbers.Number, slice, Tuple[Any, ...], List[Tuple[Any, ...]]
-        ],
+        indexer: numbers.Number
+        | slice
+        | tuple[Any, ...]
+        | list[tuple[Any, ...]],
     ):
         if isinstance(indexer, numbers.Number):
             return
diff --git a/python/cudf/cudf/core/reshape.py b/python/cudf/cudf/core/reshape.py
index 53239cb7ea0..903c4fe7df5 100644
--- a/python/cudf/cudf/core/reshape.py
+++ b/python/cudf/cudf/core/reshape.py
@@ -1,8 +1,9 @@
 # Copyright (c) 2018-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import itertools
 import warnings
-from typing import Dict, Optional
+from typing import TYPE_CHECKING
 
 import numpy as np
 import pandas as pd
@@ -10,13 +11,15 @@
 import cudf
 from cudf._lib.transform import one_hot_encode
 from cudf._lib.types import size_type_dtype
-from cudf._typing import Dtype
 from cudf.api.extensions import no_default
 from cudf.core._compat import PANDAS_LT_300
 from cudf.core.column import ColumnBase, as_column, column_empty_like
 from cudf.core.column.categorical import CategoricalColumn
 from cudf.utils.dtypes import min_unsigned_type
 
+if TYPE_CHECKING:
+    from cudf._typing import Dtype
+
 _AXIS_MAP = {0: 0, 1: 1, "index": 0, "columns": 1}
 
 
@@ -1217,10 +1220,10 @@ def _get_unique(column, dummy_na):
 def _one_hot_encode_column(
     column: ColumnBase,
     categories: ColumnBase,
-    prefix: Optional[str],
-    prefix_sep: Optional[str],
-    dtype: Optional[Dtype],
-) -> Dict[str, ColumnBase]:
+    prefix: str | None,
+    prefix_sep: str | None,
+    dtype: Dtype | None,
+) -> dict[str, ColumnBase]:
     """Encode a single column with one hot encoding. The return dictionary
     contains pairs of (category, encodings). The keys may be prefixed with
     `prefix`, separated with category name with `prefix_sep`. The encoding
diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py
index ebf6910ca5f..e532948fd11 100644
--- a/python/cudf/cudf/core/series.py
+++ b/python/cudf/cudf/core/series.py
@@ -9,17 +9,7 @@
 import warnings
 from collections import abc
 from shutil import get_terminal_size
-from typing import (
-    TYPE_CHECKING,
-    Any,
-    Dict,
-    Literal,
-    MutableMapping,
-    Optional,
-    Set,
-    Tuple,
-    Union,
-)
+from typing import TYPE_CHECKING, Any, Literal, MutableMapping
 
 import cupy
 import numpy as np
@@ -285,7 +275,7 @@ class _SeriesLocIndexer(_FrameIndexer):
     """
 
     @_cudf_nvtx_annotate
-    def __getitem__(self, arg: Any) -> Union[ScalarLike, DataFrameOrSeries]:
+    def __getitem__(self, arg: Any) -> ScalarLike | DataFrameOrSeries:
         if isinstance(arg, pd.MultiIndex):
             arg = cudf.from_pandas(arg)
 
@@ -464,7 +454,7 @@ class Series(SingleColumnFrame, IndexedFrame, Serializable):
         If ``False``, leaves ``np.nan`` values as is.
     """
 
-    _accessors: Set[Any] = set()
+    _accessors: set[Any] = set()
     _loc_indexer_type = _SeriesLocIndexer
     _iloc_indexer_type = _SeriesIlocIndexer
     _groupby = SeriesGroupBy
@@ -677,7 +667,7 @@ def __init__(
     def _from_data(
         cls,
         data: MutableMapping,
-        index: Optional[BaseIndex] = None,
+        index: BaseIndex | None = None,
         name: Any = no_default,
     ) -> Series:
         out = super()._from_data(data=data, index=index)
@@ -1311,7 +1301,7 @@ def map(self, arg, na_action=None) -> "Series":
     def _getitem_preprocessed(
         self,
         spec: indexing_utils.IndexingSpec,
-    ) -> Union[Self, ScalarLike]:
+    ) -> Self | ScalarLike:
         """Get subset of entries given structured data
 
         Parameters
@@ -1473,12 +1463,10 @@ def _make_operands_and_index_for_binop(
         fill_value: Any = None,
         reflect: bool = False,
         can_reindex: bool = False,
-    ) -> Tuple[
-        Union[
-            Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]],
-            NotImplementedType,
-        ],
-        Optional[BaseIndex],
+    ) -> tuple[
+        dict[str | None, tuple[ColumnBase, Any, bool, Any]]
+        | NotImplementedType,
+        BaseIndex | None,
         bool,
     ]:
         # Specialize binops to align indices.
diff --git a/python/cudf/cudf/core/single_column_frame.py b/python/cudf/cudf/core/single_column_frame.py
index 43b5dc76f13..23a2c828a04 100644
--- a/python/cudf/cudf/core/single_column_frame.py
+++ b/python/cudf/cudf/core/single_column_frame.py
@@ -3,7 +3,7 @@
 
 from __future__ import annotations
 
-from typing import TYPE_CHECKING, Any, Dict, Optional, Tuple, Union
+from typing import TYPE_CHECKING, Any
 
 from typing_extensions import Self
 
@@ -274,10 +274,10 @@ def _make_operands_for_binop(
         other: Any,
         fill_value: Any = None,
         reflect: bool = False,
-    ) -> Union[
-        Dict[Optional[str], Tuple[ColumnBase, Any, bool, Any]],
-        NotImplementedType,
-    ]:
+    ) -> (
+        dict[str | None, tuple[ColumnBase, Any, bool, Any]]
+        | NotImplementedType
+    ):
         """Generate the dictionary of operands used for a binary operation.
 
         Parameters
@@ -340,7 +340,7 @@ def nunique(self, dropna: bool = True) -> int:
         """
         return self._column.distinct_count(dropna=dropna)
 
-    def _get_elements_from_column(self, arg) -> Union[ScalarLike, ColumnBase]:
+    def _get_elements_from_column(self, arg) -> ScalarLike | ColumnBase:
         # A generic method for getting elements from a column that supports a
         # wide range of different inputs. This method should only used where
         # _absolutely_ necessary, since in almost all cases a more specific
diff --git a/python/cudf/cudf/core/subword_tokenizer.py b/python/cudf/cudf/core/subword_tokenizer.py
index 24c49e3662a..9e59b134b73 100644
--- a/python/cudf/cudf/core/subword_tokenizer.py
+++ b/python/cudf/cudf/core/subword_tokenizer.py
@@ -3,7 +3,6 @@
 from __future__ import annotations
 
 import warnings
-from typing import Union
 
 import cupy as cp
 
@@ -60,7 +59,7 @@ def __call__(
         max_num_rows: int,
         add_special_tokens: bool = True,
         padding: str = "max_length",
-        truncation: Union[bool, str] = False,
+        truncation: bool | str = False,
         stride: int = 0,
         return_tensors: str = "cp",
         return_token_type_ids: bool = False,
diff --git a/python/cudf/cudf/core/tools/datetimes.py b/python/cudf/cudf/core/tools/datetimes.py
index f002a838fa9..29130130732 100644
--- a/python/cudf/cudf/core/tools/datetimes.py
+++ b/python/cudf/cudf/core/tools/datetimes.py
@@ -1,9 +1,10 @@
 # Copyright (c) 2019-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import math
 import re
 import warnings
-from typing import Literal, Optional, Sequence, Union
+from typing import Literal, Sequence
 
 import cupy as cp
 import numpy as np
@@ -61,7 +62,7 @@ def to_datetime(
     dayfirst: bool = False,
     yearfirst: bool = False,
     utc: bool = False,
-    format: Optional[str] = None,
+    format: str | None = None,
     exact: bool = True,
     unit: str = "ns",
     infer_datetime_format: bool = True,
@@ -313,7 +314,7 @@ def _process_col(
     unit: str,
     dayfirst: bool,
     infer_datetime_format: bool,
-    format: Optional[str],
+    format: str | None,
     utc: bool,
 ):
     if col.dtype.kind == "f":
@@ -707,7 +708,7 @@ def _from_freqstr(cls, freqstr: str) -> Self:
     @classmethod
     def _from_pandas_ticks_or_weeks(
         cls,
-        tick: Union[pd.tseries.offsets.Tick, pd.tseries.offsets.Week],
+        tick: pd.tseries.offsets.Tick | pd.tseries.offsets.Week,
     ) -> Self:
         return cls(**{cls._TICK_OR_WEEK_TO_UNITS[type(tick)]: tick.n})
 
@@ -725,7 +726,7 @@ def _maybe_as_fast_pandas_offset(self):
 
 
 def _isin_datetimelike(
-    lhs: Union[column.TimeDeltaColumn, column.DatetimeColumn], values: Sequence
+    lhs: column.TimeDeltaColumn | column.DatetimeColumn, values: Sequence
 ) -> column.ColumnBase:
     """
     Check whether values are contained in the
@@ -784,7 +785,7 @@ def date_range(
     name=None,
     closed: Literal["left", "right", "both", "neither"] = "both",
     *,
-    unit: Optional[str] = None,
+    unit: str | None = None,
 ):
     """Return a fixed frequency DatetimeIndex.
 
diff --git a/python/cudf/cudf/core/udf/groupby_typing.py b/python/cudf/cudf/core/udf/groupby_typing.py
index 72088493074..dffd7db2f71 100644
--- a/python/cudf/cudf/core/udf/groupby_typing.py
+++ b/python/cudf/cudf/core/udf/groupby_typing.py
@@ -1,5 +1,7 @@
-# Copyright (c) 2020-2023, NVIDIA CORPORATION.
-from typing import Any, Dict
+# Copyright (c) 2020-2024, NVIDIA CORPORATION.
+from __future__ import annotations
+
+from typing import Any
 
 import numba
 from numba import cuda, types
@@ -124,7 +126,7 @@ def __init__(self, dmm, fe_type):
         super().__init__(dmm, fe_type, members)
 
 
-call_cuda_functions: Dict[Any, Any] = {}
+call_cuda_functions: dict[Any, Any] = {}
 
 
 def _register_cuda_binary_reduction_caller(funcname, lty, rty, retty):
diff --git a/python/cudf/cudf/core/udf/utils.py b/python/cudf/cudf/core/udf/utils.py
index bc1f4f2557e..f1704e4ea78 100644
--- a/python/cudf/cudf/core/udf/utils.py
+++ b/python/cudf/cudf/core/udf/utils.py
@@ -1,8 +1,9 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import functools
 import os
-from typing import Any, Callable, Dict
+from typing import Any, Callable
 
 import cachetools
 import cupy as cp
@@ -57,7 +58,7 @@
 MASK_BITSIZE = np.dtype("int32").itemsize * 8
 
 precompiled: cachetools.LRUCache = cachetools.LRUCache(maxsize=32)
-launch_arg_getters: Dict[Any, Any] = {}
+launch_arg_getters: dict[Any, Any] = {}
 
 
 @functools.cache
diff --git a/python/cudf/cudf/io/parquet.py b/python/cudf/cudf/io/parquet.py
index dbdb2093b72..58b104b84e9 100644
--- a/python/cudf/cudf/io/parquet.py
+++ b/python/cudf/cudf/io/parquet.py
@@ -10,7 +10,7 @@
 from collections import defaultdict
 from contextlib import ExitStack
 from functools import partial, reduce
-from typing import Callable, Dict, List, Optional, Tuple
+from typing import Callable
 from uuid import uuid4
 
 import numpy as np
@@ -679,7 +679,7 @@ def read_parquet(
     return df
 
 
-def _normalize_filters(filters: list | None) -> List[List[tuple]] | None:
+def _normalize_filters(filters: list | None) -> list[list[tuple]] | None:
     # Utility to normalize and validate the `filters`
     # argument to `read_parquet`
     if not filters:
@@ -709,7 +709,7 @@ def _validate_predicate(item):
 
 
 def _apply_post_filters(
-    df: cudf.DataFrame, filters: List[List[tuple]] | None
+    df: cudf.DataFrame, filters: list[list[tuple]] | None
 ) -> cudf.DataFrame:
     """Apply DNF filters to an in-memory DataFrame
 
@@ -738,7 +738,7 @@ def _handle_is(column: cudf.Series, value, *, negate) -> cudf.Series:
             )
         return ~column.isna() if negate else column.isna()
 
-    handlers: Dict[str, Callable] = {
+    handlers: dict[str, Callable] = {
         "==": operator.eq,
         "!=": operator.ne,
         "<": operator.lt,
@@ -1311,7 +1311,7 @@ def __init__(
     ) -> None:
         if isinstance(path, str) and path.startswith("s3://"):
             self.fs_meta = {"is_s3": True, "actual_path": path}
-            self.dir_: Optional[tempfile.TemporaryDirectory] = (
+            self.dir_: tempfile.TemporaryDirectory | None = (
                 tempfile.TemporaryDirectory()
             )
             self.path = self.dir_.name
@@ -1328,12 +1328,12 @@ def __init__(
         self.partition_cols = partition_cols
         # Collection of `ParquetWriter`s, and the corresponding
         # partition_col values they're responsible for
-        self._chunked_writers: List[
-            Tuple[libparquet.ParquetWriter, List[str], str]
+        self._chunked_writers: list[
+            tuple[libparquet.ParquetWriter, list[str], str]
         ] = []
         # Map of partition_col values to their ParquetWriter's index
         # in self._chunked_writers for reverse lookup
-        self.path_cw_map: Dict[str, int] = {}
+        self.path_cw_map: dict[str, int] = {}
         self.storage_options = storage_options
         self.filename = file_name_prefix
         self.max_file_size = max_file_size
@@ -1345,7 +1345,7 @@ def __init__(
                 )
             self.max_file_size = _parse_bytes(max_file_size)
 
-        self._file_sizes: Dict[str, int] = {}
+        self._file_sizes: dict[str, int] = {}
 
     @_cudf_nvtx_annotate
     def write_table(self, df):
diff --git a/python/cudf/cudf/options.py b/python/cudf/cudf/options.py
index efa8eabd8b8..fb5a963f008 100644
--- a/python/cudf/cudf/options.py
+++ b/python/cudf/cudf/options.py
@@ -1,11 +1,14 @@
 # Copyright (c) 2022-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import os
 import textwrap
-from collections.abc import Container
 from contextlib import ContextDecorator
 from dataclasses import dataclass
-from typing import Any, Callable, Dict, Optional
+from typing import TYPE_CHECKING, Any, Callable
+
+if TYPE_CHECKING:
+    from collections.abc import Container
 
 
 @dataclass
@@ -16,7 +19,7 @@ class Option:
     validator: Callable
 
 
-_OPTIONS: Dict[str, Option] = {}
+_OPTIONS: dict[str, Option] = {}
 
 
 def _env_get_int(name, default):
@@ -123,7 +126,7 @@ def _build_option_description(name, opt):
     )
 
 
-def describe_option(name: Optional[str] = None):
+def describe_option(name: str | None = None):
     """Prints the description of an option.
 
     If `name` is unspecified, prints the description of all available options.
diff --git a/python/cudf/cudf/pandas/fast_slow_proxy.py b/python/cudf/cudf/pandas/fast_slow_proxy.py
index 128913e5746..1540c6850e7 100644
--- a/python/cudf/cudf/pandas/fast_slow_proxy.py
+++ b/python/cudf/cudf/pandas/fast_slow_proxy.py
@@ -12,17 +12,7 @@
 import warnings
 from collections.abc import Iterator
 from enum import IntEnum
-from typing import (
-    Any,
-    Callable,
-    Dict,
-    Literal,
-    Mapping,
-    Optional,
-    Set,
-    Tuple,
-    Type,
-)
+from typing import Any, Callable, Literal, Mapping
 
 import numpy as np
 
@@ -118,12 +108,12 @@ def make_final_proxy_type(
     *,
     fast_to_slow: Callable,
     slow_to_fast: Callable,
-    module: Optional[str] = None,
+    module: str | None = None,
     additional_attributes: Mapping[str, Any] | None = None,
     postprocess: Callable[[_FinalProxy, Any, Any], Any] | None = None,
-    bases: Tuple = (),
-    metaclasses: Tuple = (),
-) -> Type[_FinalProxy]:
+    bases: tuple = (),
+    metaclasses: tuple = (),
+) -> type[_FinalProxy]:
     """
     Defines a fast-slow proxy type for a pair of "final" fast and slow
     types. Final types are types for which known operations exist for
@@ -270,8 +260,8 @@ def make_intermediate_proxy_type(
     fast_type: type,
     slow_type: type,
     *,
-    module: Optional[str] = None,
-) -> Type[_IntermediateProxy]:
+    module: str | None = None,
+) -> type[_IntermediateProxy]:
     """
     Defines a proxy type for a pair of "intermediate" fast and slow
     types. Intermediate types are the types of the results of
@@ -613,13 +603,13 @@ class _IntermediateProxy(_FastSlowProxy):
     `make_intermediate_proxy_type` to create subtypes.
     """
 
-    _method_chain: Tuple[Callable, Tuple, Dict]
+    _method_chain: tuple[Callable, tuple, dict]
 
     @classmethod
     def _fsproxy_wrap(
         cls,
         obj: Any,
-        method_chain: Tuple[Callable, Tuple, Dict],
+        method_chain: tuple[Callable, tuple, dict],
     ):
         """
         Parameters
@@ -955,7 +945,7 @@ def _fast_slow_function_call(
 def _transform_arg(
     arg: Any,
     attribute_name: Literal["_fsproxy_slow", "_fsproxy_fast"],
-    seen: Set[int],
+    seen: set[int],
 ) -> Any:
     """
     Transform "arg" into its corresponding slow (or fast) type.
@@ -1052,7 +1042,7 @@ def _fast_arg(arg: Any) -> Any:
     """
     Transform "arg" into its corresponding fast type.
     """
-    seen: Set[int] = set()
+    seen: set[int] = set()
     return _transform_arg(arg, "_fsproxy_fast", seen)
 
 
@@ -1060,7 +1050,7 @@ def _slow_arg(arg: Any) -> Any:
     """
     Transform "arg" into its corresponding slow type.
     """
-    seen: Set[int] = set()
+    seen: set[int] = set()
     return _transform_arg(arg, "_fsproxy_slow", seen)
 
 
@@ -1137,7 +1127,7 @@ def _is_function_or_method(obj: Any) -> bool:
 def _replace_closurevars(
     f: types.FunctionType,
     attribute_name: Literal["_fsproxy_slow", "_fsproxy_fast"],
-    seen: Set[int],
+    seen: set[int],
 ) -> Callable[..., Any]:
     """
     Return a copy of `f` with its closure variables replaced with
@@ -1199,10 +1189,10 @@ def is_proxy_object(obj: Any) -> bool:
     return False
 
 
-NUMPY_TYPES: Set[str] = set(np.sctypeDict.values())
+NUMPY_TYPES: set[str] = set(np.sctypeDict.values())
 
 
-_SPECIAL_METHODS: Set[str] = {
+_SPECIAL_METHODS: set[str] = {
     "__abs__",
     "__add__",
     "__and__",
diff --git a/python/cudf/cudf/pandas/module_accelerator.py b/python/cudf/cudf/pandas/module_accelerator.py
index 1d431c6d882..f82e300e83d 100644
--- a/python/cudf/cudf/pandas/module_accelerator.py
+++ b/python/cudf/cudf/pandas/module_accelerator.py
@@ -17,7 +17,7 @@
 from abc import abstractmethod
 from importlib._bootstrap import _ImportLockContext as ImportLock
 from types import ModuleType
-from typing import Any, ContextManager, Dict, NamedTuple, Tuple
+from typing import Any, ContextManager, NamedTuple
 
 from typing_extensions import Self
 
@@ -377,7 +377,7 @@ class ModuleAccelerator(ModuleAcceleratorBase):
     attempts to call the fast version first).
     """
 
-    _denylist: Tuple[str]
+    _denylist: tuple[str]
     _use_fast_lib: bool
     _use_fast_lib_lock: threading.RLock
     _module_cache_prefix: str = "_slow_lib_"
@@ -519,7 +519,7 @@ def disabled(self):
     def getattr_real_or_wrapped(
         name: str,
         *,
-        real: Dict[str, Any],
+        real: dict[str, Any],
         wrapped_objs,
         loader: ModuleAccelerator,
     ) -> Any:
diff --git a/python/cudf/cudf/pandas/profiler.py b/python/cudf/cudf/pandas/profiler.py
index 0dbd333ce4f..0fb41fc0b26 100644
--- a/python/cudf/cudf/pandas/profiler.py
+++ b/python/cudf/cudf/pandas/profiler.py
@@ -1,6 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2023-2024, NVIDIA CORPORATION & AFFILIATES.
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
 
 import inspect
 import operator
@@ -8,7 +9,6 @@
 import sys
 import time
 from collections import defaultdict
-from typing import Union
 
 from rich.console import Console
 from rich.syntax import Syntax
@@ -119,12 +119,10 @@ def __exit__(self, *args, **kwargs):
 
     @staticmethod
     def get_namespaced_function_name(
-        func_obj: Union[
-            _FunctionProxy,
-            _MethodProxy,
-            type[_FinalProxy],
-            type[_IntermediateProxy],
-        ],
+        func_obj: _FunctionProxy
+        | _MethodProxy
+        | type[_FinalProxy]
+        | type[_IntermediateProxy],
     ):
         if isinstance(func_obj, _MethodProxy):
             return func_obj._fsproxy_slow.__qualname__
diff --git a/python/cudf/cudf/pylibcudf_tests/common/utils.py b/python/cudf/cudf/pylibcudf_tests/common/utils.py
index 54d38f1a8cf..bf927e661fe 100644
--- a/python/cudf/cudf/pylibcudf_tests/common/utils.py
+++ b/python/cudf/cudf/pylibcudf_tests/common/utils.py
@@ -1,6 +1,5 @@
 # Copyright (c) 2024, NVIDIA CORPORATION.
-
-from typing import Optional, Union
+from __future__ import annotations
 
 import pyarrow as pa
 import pytest
@@ -10,7 +9,7 @@
 
 def metadata_from_arrow_array(
     pa_array: pa.Array,
-) -> Optional[plc.interop.ColumnMetadata]:
+) -> plc.interop.ColumnMetadata | None:
     metadata = None
     if pa.types.is_list(dtype := pa_array.type) or pa.types.is_struct(dtype):
         metadata = plc.interop.ColumnMetadata(
@@ -25,7 +24,7 @@ def metadata_from_arrow_array(
 
 
 def assert_column_eq(
-    lhs: Union[pa.Array, plc.Column], rhs: Union[pa.Array, plc.Column]
+    lhs: pa.Array | plc.Column, rhs: pa.Array | plc.Column
 ) -> None:
     """Verify that a pylibcudf array and PyArrow array are equal."""
     # Nested types require children metadata to be passed to the conversion function.
diff --git a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
index 0e38b10ed52..238e8d990cc 100644
--- a/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
+++ b/python/cudf/cudf/tests/test_avro_reader_fastavro_integration.py
@@ -11,10 +11,11 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+from __future__ import annotations
+
 import datetime
 import io
 import pathlib
-from typing import Optional
 
 import fastavro
 import numpy as np
@@ -292,7 +293,7 @@ def test_can_detect_dtypes_from_avro_logical_type(
     assert_eq(expected, actual)
 
 
-def get_days_from_epoch(date: Optional[datetime.date]) -> Optional[int]:
+def get_days_from_epoch(date: datetime.date | None) -> int | None:
     if date is None:
         return None
     return (date - datetime.date(1970, 1, 1)).days
diff --git a/python/cudf/cudf/tests/test_df_protocol.py b/python/cudf/cudf/tests/test_df_protocol.py
index a22b678ebe6..8ce4da792a4 100644
--- a/python/cudf/cudf/tests/test_df_protocol.py
+++ b/python/cudf/cudf/tests/test_df_protocol.py
@@ -1,6 +1,7 @@
 # Copyright (c) 2021-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
-from typing import Any, Tuple
+from typing import Any
 
 import cupy as cp
 import pandas as pd
@@ -64,7 +65,7 @@ def assert_validity_equal(protocol_buffer, cudf_buffer, size, null, valid):
         raise NotImplementedError()
 
 
-def assert_buffer_equal(buffer_and_dtype: Tuple[_CuDFBuffer, Any], cudfcol):
+def assert_buffer_equal(buffer_and_dtype: tuple[_CuDFBuffer, Any], cudfcol):
     buf, dtype = buffer_and_dtype
     device_id = cp.asarray(cudfcol.data).device.id
     assert buf.__dlpack_device__() == (2, device_id)
diff --git a/python/cudf/cudf/tests/test_spilling.py b/python/cudf/cudf/tests/test_spilling.py
index 913a958b4c2..59b8e6d2e70 100644
--- a/python/cudf/cudf/tests/test_spilling.py
+++ b/python/cudf/cudf/tests/test_spilling.py
@@ -1,4 +1,5 @@
 # Copyright (c) 2022-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import contextlib
 import importlib
@@ -7,7 +8,6 @@
 import warnings
 import weakref
 from concurrent.futures import ThreadPoolExecutor
-from typing import List, Tuple
 
 import cupy
 import numpy as np
@@ -107,7 +107,7 @@ def single_column_df_base_data(df: cudf.DataFrame) -> SpillableBuffer:
 gen_df_data_nbytes = single_column_df()._data._data["a"].data.nbytes
 
 
-def spilled_and_unspilled(manager: SpillManager) -> Tuple[int, int]:
+def spilled_and_unspilled(manager: SpillManager) -> tuple[int, int]:
     """Get bytes spilled and unspilled known by the manager"""
     spilled = sum(buf.size for buf in manager.buffers() if buf.is_spilled)
     unspilled = sum(
@@ -661,7 +661,7 @@ def test_statistics(manager: SpillManager):
 def test_statistics_expose(manager: SpillManager):
     assert len(manager.statistics.spill_totals) == 0
 
-    buffers: List[SpillableBuffer] = [
+    buffers: list[SpillableBuffer] = [
         as_buffer(data=rmm.DeviceBuffer(size=10), exposed=False)
         for _ in range(10)
     ]
@@ -687,7 +687,7 @@ def test_statistics_expose(manager: SpillManager):
     assert stat.spilled_nbytes == 0
 
     # Create and spill 10 new buffers
-    buffers: List[SpillableBuffer] = [
+    buffers: list[SpillableBuffer] = [
         as_buffer(data=rmm.DeviceBuffer(size=10), exposed=False)
         for _ in range(10)
     ]
diff --git a/python/cudf/cudf/utils/applyutils.py b/python/cudf/cudf/utils/applyutils.py
index d57303ca122..cd7fe5ee023 100644
--- a/python/cudf/cudf/utils/applyutils.py
+++ b/python/cudf/cudf/utils/applyutils.py
@@ -1,7 +1,8 @@
 # Copyright (c) 2018-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import functools
-from typing import Any, Dict
+from typing import Any
 
 import cupy as cp
 from numba import cuda
@@ -339,7 +340,7 @@ def chunk_wise_kernel(nrows, chunks, {args}):
     return kernel
 
 
-_cache: Dict[Any, Any] = dict()
+_cache: dict[Any, Any] = dict()
 
 
 @functools.wraps(_make_row_wise_kernel)
diff --git a/python/cudf/cudf/utils/queryutils.py b/python/cudf/cudf/utils/queryutils.py
index 239438afd24..78aeac425f7 100644
--- a/python/cudf/cudf/utils/queryutils.py
+++ b/python/cudf/cudf/utils/queryutils.py
@@ -1,8 +1,9 @@
-# Copyright (c) 2018-2023, NVIDIA CORPORATION.
+# Copyright (c) 2018-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import ast
 import datetime
-from typing import Any, Dict
+from typing import Any
 
 import numpy as np
 from numba import cuda
@@ -114,7 +115,7 @@ def _check_error(tree):
         raise QuerySyntaxError("too many expressions")
 
 
-_cache: Dict[Any, Any] = {}
+_cache: dict[Any, Any] = {}
 
 
 def query_compile(expr):
diff --git a/python/cudf/cudf/utils/utils.py b/python/cudf/cudf/utils/utils.py
index 95621cf9519..2e4dfc4bb14 100644
--- a/python/cudf/cudf/utils/utils.py
+++ b/python/cudf/cudf/utils/utils.py
@@ -1,11 +1,11 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 import decimal
 import functools
 import os
 import traceback
 import warnings
-from typing import FrozenSet, Set, Union
 
 import numpy as np
 import pandas as pd
@@ -218,7 +218,7 @@ class GetAttrGetItemMixin:
     # `__setstate__`, but this class may be used in complex multiple
     # inheritance hierarchies that might also override serialization.  The
     # solution here is a minimally invasive change that avoids such conflicts.
-    _PROTECTED_KEYS: Union[FrozenSet[str], Set[str]] = frozenset()
+    _PROTECTED_KEYS: frozenset[str] | set[str] = frozenset()
 
     def __getattr__(self, key):
         if key in self._PROTECTED_KEYS:
diff --git a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py
index 39bf07c49de..a75a20a4681 100644
--- a/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py
+++ b/python/cudf/cudf_pandas_tests/test_fast_slow_proxy.py
@@ -1,6 +1,7 @@
 # SPDX-FileCopyrightText: Copyright (c) 2023-2024 NVIDIA CORPORATION & AFFILIATES.
 # All rights reserved.
 # SPDX-License-Identifier: Apache-2.0
+from __future__ import annotations
 
 import inspect
 from functools import partial
diff --git a/python/dask_cudf/dask_cudf/groupby.py b/python/dask_cudf/dask_cudf/groupby.py
index ef47ea436c7..2e72461b43d 100644
--- a/python/dask_cudf/dask_cudf/groupby.py
+++ b/python/dask_cudf/dask_cudf/groupby.py
@@ -1,7 +1,7 @@
 # Copyright (c) 2020-2024, NVIDIA CORPORATION.
+from __future__ import annotations
 
 from functools import wraps
-from typing import Set
 
 import numpy as np
 import pandas as pd
@@ -695,7 +695,7 @@ def _aggs_optimized(arg, supported: set):
     """Check that aggregations in `arg` are a subset of `supported`"""
     if isinstance(arg, (list, dict)):
         if isinstance(arg, dict):
-            _global_set: Set[str] = set()
+            _global_set: set[str] = set()
             for col in arg:
                 if isinstance(arg[col], list):
                     _global_set = _global_set.union(set(arg[col]))