Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Align Index APIs with pandas 2.x #16361

Merged
merged 12 commits into from
Jul 29, 2024
4 changes: 4 additions & 0 deletions docs/cudf/source/conf.py
Original file line number Diff line number Diff line change
Expand Up @@ -561,6 +561,10 @@ def on_missing_reference(app, env, node, contnode):
("py:class", "ScalarLike"),
("py:class", "ParentType"),
("py:class", "ColumnLike"),
("py:class", "ColumnLike"),
("py:obj", "cudf.Index.transpose"),
("py:obj", "cudf.Index.to_flat_index"),
("py:obj", "cudf.MultiIndex.to_flat_index"),
# TODO: Remove this when we figure out why typing_extensions doesn't seem
# to map types correctly for intersphinx
("py:class", "typing_extensions.Self"),
Expand Down
25 changes: 23 additions & 2 deletions python/cudf/cudf/core/_base_index.py
Original file line number Diff line number Diff line change
Expand Up @@ -868,6 +868,24 @@ def to_numpy(self):
"""Convert to a numpy array."""
raise NotImplementedError

def to_flat_index(self) -> Self:
"""
Identity method.

This is implemented for compatibility with subclass implementations
when chaining.

Returns
-------
pd.Index
Caller.

See Also
--------
MultiIndex.to_flat_index : Subclass implementation.
"""
return self

def any(self):
"""
Return whether any elements is True in Index.
Expand Down Expand Up @@ -945,7 +963,7 @@ def to_pandas(self, *, nullable: bool = False, arrow_type: bool = False):
"""
raise NotImplementedError

def isin(self, values):
def isin(self, values, level=None):
"""Return a boolean array where the index values are in values.

Compute boolean array of whether each index value is found in
Expand All @@ -956,6 +974,9 @@ def isin(self, values):
----------
values : set, list-like, Index
Sought values.
level : str or int, optional
Name or position of the index level to use (if the index is a
`MultiIndex`).

Returns
-------
Expand All @@ -979,7 +1000,7 @@ def isin(self, values):
# ColumnBase.isin).
raise NotImplementedError

def unique(self):
def unique(self, level: int | None = None):
"""
Return unique values in the index.

Expand Down
24 changes: 20 additions & 4 deletions python/cudf/cudf/core/index.py
Original file line number Diff line number Diff line change
Expand Up @@ -540,8 +540,12 @@ def memory_usage(self, deep: bool = False) -> int:
)
return 0

def unique(self) -> Self:
def unique(self, level: int | None = None) -> Self:
# RangeIndex always has unique values
if level is not None and level > 0:
raise IndexError(
f"Too many levels: Index has only 1 level, not {level + 1}"
)
return self.copy()

@_performance_tracking
Expand Down Expand Up @@ -964,7 +968,11 @@ def _indices_of(self, value) -> cudf.core.column.NumericalColumn:
i = []
return as_column(i, dtype=size_type_dtype)

def isin(self, values):
def isin(self, values, level=None):
if level is not None and level > 0:
raise IndexError(
f"Too many levels: Index has only 1 level, not {level + 1}"
)
if is_scalar(values):
raise TypeError(
"only list-like objects are allowed to be passed "
Expand Down Expand Up @@ -1616,12 +1624,20 @@ def append(self, other):

return self._concat(to_concat)

def unique(self):
def unique(self, level: int | None = None) -> Self:
if level is not None and level > 0:
raise IndexError(
f"Too many levels: Index has only 1 level, not {level + 1}"
)
return cudf.core.index._index_from_data(
{self.name: self._values.unique()}, name=self.name
)

def isin(self, values):
def isin(self, values, level=None):
if level is not None and level > 0:
raise IndexError(
f"Too many levels: Index has only 1 level, not {level + 1}"
)
if is_scalar(values):
raise TypeError(
"only list-like objects are allowed to be passed "
Expand Down
16 changes: 14 additions & 2 deletions python/cudf/cudf/core/multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -1149,6 +1149,15 @@ def from_tuples(cls, tuples, names=None):
def to_numpy(self):
return self.values_host

def to_flat_index(self):
"""
Convert a MultiIndex to an Index of Tuples containing the level values.

This is not currently implemented
"""
# TODO: Could implement as Index of ListDtype?
raise NotImplementedError("to_flat_index is not currently supported.")

@property # type: ignore
@_performance_tracking
def values_host(self):
Expand Down Expand Up @@ -1712,8 +1721,11 @@ def fillna(self, value):
return super().fillna(value=value)

@_performance_tracking
def unique(self):
return self.drop_duplicates(keep="first")
def unique(self, level: int | None = None) -> Self | cudf.Index:
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add tests covering level?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Sure added a test

if level is None:
return self.drop_duplicates(keep="first")
else:
return self.get_level_values(level).unique()

@_performance_tracking
def nunique(self, dropna: bool = True) -> int:
Expand Down
8 changes: 0 additions & 8 deletions python/cudf/cudf/core/series.py
Original file line number Diff line number Diff line change
Expand Up @@ -2775,14 +2775,6 @@ def cov(self, other, min_periods=None, ddof: int | None = None):
f"{other.dtype}"
)

@_performance_tracking
def transpose(self):
"""Return the transpose, which is by definition self."""

return self

T = property(transpose, doc=transpose.__doc__)

@_performance_tracking
def duplicated(self, keep="first"):
"""
Expand Down
7 changes: 7 additions & 0 deletions python/cudf/cudf/core/single_column_frame.py
Original file line number Diff line number Diff line change
Expand Up @@ -389,3 +389,10 @@ def where(self, cond, other=None, inplace=False):
result = cudf._lib.copying.copy_if_else(input_col, other, cond)

return _make_categorical_like(result, self_column)

@_performance_tracking
def transpose(self):
"""Return the transpose, which is by definition self."""
return self

T = property(transpose, doc=transpose.__doc__)
9 changes: 9 additions & 0 deletions python/cudf/cudf/tests/test_multiindex.py
Original file line number Diff line number Diff line change
Expand Up @@ -2170,3 +2170,12 @@ def test_bool_raises():
lfunc_args_and_kwargs=[[cudf.MultiIndex.from_arrays([range(1)])]],
rfunc_args_and_kwargs=[[pd.MultiIndex.from_arrays([range(1)])]],
)


def test_unique_level():
pd_mi = pd.MultiIndex.from_arrays([[1, 1, 2], [3, 3, 2]])
cudf_mi = cudf.MultiIndex.from_pandas(pd_mi)

result = pd_mi.unique(level=1)
expected = cudf_mi.unique(level=1)
assert_eq(result, expected)
Loading