Skip to content

Commit

Permalink
Reindex in DataFrame.__setitem__ (#7957)
Browse files Browse the repository at this point in the history
* Reindex in `DataFrame.__setitem__` (#7948)

This PR fixes missing reindexing in `DataFrame.__setitem__` when the `value` argument is a `DataFrame`, we currently align index if `value` is a Series & `arg` is a column name already.

This change is necessary to continue with the upgrade to pandas `1.2.4`,  however pandas has confirmed this as a regression only in `1.2.0` and `1.2.2` hence corrected the pytest to only xfail in those versions of pandas.

Authors:
  - GALI PREM SAGAR (https://github.com/galipremsagar)

Approvers:
  - Ashwin Srinath (https://github.com/shwina)

URL: #7948

* pin max pandas to 1.2.4
  • Loading branch information
galipremsagar authored Apr 16, 2021
1 parent 6ac5cd6 commit 8e1ffd4
Show file tree
Hide file tree
Showing 17 changed files with 27 additions and 19 deletions.
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda10.1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- python>=3.7,<3.9
- numba>=0.49.0,!=0.51.0
- numpy
- pandas>=1.0,<1.3.0dev0
- pandas>=1.0,<=1.2.4
- pyarrow=1.0.1
- fastavro>=0.22.9
- notebook>=0.5.0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda10.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- python>=3.7,<3.9
- numba>=0.49,!=0.51.0
- numpy
- pandas>=1.0,<1.3.0dev0
- pandas>=1.0,<=1.2.4
- pyarrow=1.0.1
- fastavro>=0.22.9
- notebook>=0.5.0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- python>=3.7,<3.9
- numba>=0.49,!=0.51.0
- numpy
- pandas>=1.0,<1.3.0dev0
- pandas>=1.0,<=1.2.4
- pyarrow=1.0.1
- fastavro>=0.22.9
- notebook>=0.5.0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda11.1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- python>=3.7,<3.9
- numba>=0.49,!=0.51.0
- numpy
- pandas>=1.0,<1.3.0dev0
- pandas>=1.0,<=1.2.4
- pyarrow=1.0.1
- fastavro>=0.22.9
- notebook>=0.5.0
Expand Down
2 changes: 1 addition & 1 deletion conda/environments/cudf_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- python>=3.7,<3.9
- numba>=0.49,!=0.51.0
- numpy
- pandas>=1.0,<1.3.0dev0
- pandas>=1.0,<=1.2.4
- pyarrow=1.0.1
- fastavro>=0.22.9
- notebook>=0.5.0
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ requirements:
- protobuf
- python
- typing_extensions
- pandas >=1.0,<1.3.0dev0
- pandas >=1.0,<=1.2.4
- cupy >7.1.0,<9.0.0a0
- numba >=0.49.0
- numpy
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/cudf/core/_compat.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
PANDAS_GE_100 = PANDAS_VERSION >= version.parse("1.0")
PANDAS_GE_110 = PANDAS_VERSION >= version.parse("1.1")
PANDAS_GE_120 = PANDAS_VERSION >= version.parse("1.2")
PANDAS_EQ_123 = PANDAS_VERSION == version.parse("1.2.3")
PANDAS_LE_122 = PANDAS_VERSION <= version.parse("1.2.2")
10 changes: 9 additions & 1 deletion python/cudf/cudf/core/dataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -7931,7 +7931,12 @@ def _align_indices(lhs, rhs):
return lhs_out, rhs_out


def _setitem_with_dataframe(input_df, replace_df, input_cols=None, mask=None):
def _setitem_with_dataframe(
input_df: DataFrame,
replace_df: DataFrame,
input_cols: Any = None,
mask: Optional[cudf.core.column.ColumnBase] = None,
):
"""
This function sets item dataframes relevant columns with replacement df
:param input_df: Dataframe to be modified inplace
Expand All @@ -7948,6 +7953,9 @@ def _setitem_with_dataframe(input_df, replace_df, input_cols=None, mask=None):
"Number of Input Columns must be same replacement Dataframe"
)

if not input_df.index.equals(replace_df.index):
replace_df = replace_df.reindex(input_df.index)

for col_1, col_2 in zip(input_cols, replace_df.columns):
if col_1 in input_df.columns:
if mask is not None:
Expand Down
4 changes: 2 additions & 2 deletions python/cudf/cudf/tests/test_setitem.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
import pytest

import cudf
from cudf.core._compat import PANDAS_EQ_123, PANDAS_GE_120
from cudf.core._compat import PANDAS_GE_120, PANDAS_LE_122
from cudf.tests.utils import assert_eq, assert_exceptions_equal


Expand All @@ -21,7 +21,7 @@ def test_dataframe_setitem_bool_mask_scaler(df, arg, value):


@pytest.mark.xfail(
condition=PANDAS_EQ_123 or not PANDAS_GE_120,
condition=PANDAS_GE_120 and PANDAS_LE_122,
reason="https://github.com/pandas-dev/pandas/issues/40204",
)
def test_dataframe_setitem_scaler_bool():
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/requirements/cuda-10.1/dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ numpy
numpydoc
nvtx>=0.2.1
packaging
pandas>=1.0,<1.3.0dev0
pandas>=1.0,<=1.2.4
pandoc==2.0a4
protobuf
pyorc
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/requirements/cuda-10.2/dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ numpy
numpydoc
nvtx>=0.2.1
packaging
pandas>=1.0,<1.3.0dev0
pandas>=1.0,<=1.2.4
pandoc==2.0a4
protobuf
pyorc
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/requirements/cuda-11.0/dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ numpy
numpydoc
nvtx>=0.2.1
packaging
pandas>=1.0,<1.3.0dev0
pandas>=1.0,<=1.2.4
pandoc==2.0a4
protobuf
pyorc
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/requirements/cuda-11.1/dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ numpy
numpydoc
nvtx>=0.2.1
packaging
pandas>=1.0,<1.3.0dev0
pandas>=1.0,<=1.2.4
pandoc==2.0a4
protobuf
pyorc
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/requirements/cuda-11.2/dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ numpy
numpydoc
nvtx>=0.2.1
packaging
pandas>=1.0,<1.3.0dev0
pandas>=1.0,<=1.2.4
pandoc==2.0a4
protobuf
pyorc
Expand Down
2 changes: 1 addition & 1 deletion python/cudf/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
"fastavro>=0.22.9",
"fsspec>=0.6.0",
"numpy",
"pandas>=1.0,<1.3.0dev0",
"pandas>=1.0,<=1.2.4",
"typing_extensions",
"protobuf",
"nvtx>=0.2.1",
Expand Down
2 changes: 1 addition & 1 deletion python/dask_cudf/dev_requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ distributed>=2.22.0,<=2021.4.0
fsspec>=0.6.0
numba>=0.49.0,!=0.51.0
numpy
pandas>=1.0,<1.3.0dev0
pandas>=1.0,<=1.2.4
pytest
setuptools
wheel
Expand Down
4 changes: 2 additions & 2 deletions python/dask_cudf/setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,13 @@
"distributed>=2.22.0,<=2021.4.0",
"fsspec>=0.6.0",
"numpy",
"pandas>=1.0,<1.3.0dev0",
"pandas>=1.0,<=1.2.4",
]

extras_require = {
"test": [
"numpy",
"pandas>=1.0,<1.3.0dev0",
"pandas>=1.0,<=1.2.4",
"pytest",
"numba>=0.49.0,!=0.51.0",
"dask==2021.4.0",
Expand Down

0 comments on commit 8e1ffd4

Please sign in to comment.