Skip to content

Commit

Permalink
Update GeoDataFrame to Use the Structured GatherMap Class (#1219)
Browse files Browse the repository at this point in the history
As title, addresses upstream cudf change rapidsai/cudf#13534.

Fixes #1222

Authors:
  - Michael Wang (https://github.com/isVoid)

Approvers:
  - Lawrence Mitchell (https://github.com/wence-)
  - Mark Harris (https://github.com/harrism)
  - H. Thomson Comer (https://github.com/thomcom)

URL: #1219
  • Loading branch information
isVoid authored Jul 18, 2023
1 parent 29fb208 commit 1e0e357
Show file tree
Hide file tree
Showing 2 changed files with 16 additions and 15 deletions.
22 changes: 11 additions & 11 deletions python/cuspatial/cuspatial/core/geodataframe.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
from geopandas.geoseries import is_geometry_type as gp_is_geometry_type

import cudf
from cudf.core.copy_types import BooleanMask, GatherMap

from cuspatial.core._column.geocolumn import GeoColumn, GeoMeta
from cuspatial.core.geoseries import GeoSeries
Expand Down Expand Up @@ -181,31 +182,30 @@ def _slice(self: T, arg: slice) -> T:
)
return self.__class__(result)

def _apply_boolean_mask(self, mask) -> T:
def _apply_boolean_mask(self, mask: BooleanMask, keep_index=True) -> T:
geo_columns, data_columns = self._split_out_geometry_columns()
data = data_columns._apply_boolean_mask(mask)
data = data_columns._apply_boolean_mask(mask, keep_index)

geo = GeoDataFrame(
{name: geo_columns[name][mask] for name in geo_columns}
{name: geo_columns[name][mask.column] for name in geo_columns}
)

res = self.__class__._from_data(self._recombine_columns(geo, data))
res.index = data.index
if keep_index:
res.index = data.index
return res

def _gather(
self, gather_map, keep_index=True, nullify=False, check_bounds=True
):
def _gather(self, gather_map: GatherMap, keep_index=True):
geo_data, cudf_data = self._split_out_geometry_columns()
# gather cudf columns
df = cudf.DataFrame._from_data(data=cudf_data, index=self.index)
cudf_gathered = cudf.DataFrame._gather(
df, gather_map, keep_index, nullify, check_bounds
)

cudf_gathered = df._gather(gather_map, keep_index=keep_index)

# gather GeoColumns
gathered = {
geo: geo_data[geo].iloc[gather_map] for geo in geo_data.keys()
geo: geo_data[geo].iloc[gather_map.column]
for geo in geo_data.keys()
}
geo_gathered = GeoDataFrame(gathered)

Expand Down
9 changes: 5 additions & 4 deletions python/cuspatial/cuspatial/core/geoseries.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
import cudf
from cudf._typing import ColumnLike
from cudf.core.column.column import as_column
from cudf.core.copy_types import GatherMap

import cuspatial.io.pygeoarrow as pygeoarrow
from cuspatial.core._column.geocolumn import ColumnType, GeoColumn
Expand Down Expand Up @@ -922,10 +923,10 @@ def align(self, other):
aligned_right,
)

def _gather(
self, gather_map, keep_index=True, nullify=False, check_bounds=True
):
return self.iloc[gather_map]
def _gather(self, gather_map: GatherMap, keep_index=True):
# TODO: This could use the information to avoid reprocessing
# in iloc
return self.iloc[gather_map.column]

# def reset_index(self, drop=False, inplace=False, name=None):
def reset_index(
Expand Down

0 comments on commit 1e0e357

Please sign in to comment.