Skip to content

Commit

Permalink
Implementation of first and last reduction (#1093)
Browse files Browse the repository at this point in the history
* implement append, create and finalize methods for first() reduction

* implement append, create, finalize for last() reduction

* add numba.jit() decorator

* add cuda=False parameter to last finalize() method

* fix missing whitespace after ","

* update NotImplementedError message for _combine() method in first/last reduction

* added tests for first/last reduction on pandas DF

* Update datashader/reductions.py

add whitespace characters

Co-authored-by: Ian Thomas <ianthomas23@gmail.com>

* Update datashader/tests/test_pandas.py

compare directly the numpy arrays in testing

Co-authored-by: Ian Thomas <ianthomas23@gmail.com>

* Update datashader/reductions.py

add whitespace character

Co-authored-by: Ian Thomas <ianthomas23@gmail.com>

* compare the numpy arrays directly for first() and last() reduction's tests

Co-authored-by: Ian Thomas <ianthomas23@gmail.com>
  • Loading branch information
tselea and ianthomas23 committed Jun 18, 2022
1 parent 2557fae commit d5b5635
Show file tree
Hide file tree
Showing 2 changed files with 47 additions and 12 deletions.
28 changes: 16 additions & 12 deletions datashader/reductions.py
Original file line number Diff line number Diff line change
Expand Up @@ -875,20 +875,22 @@ class first(Reduction):
_dshape = dshape(Option(ct.float64))

@staticmethod
def _append(x, y, agg):
raise NotImplementedError("first is currently implemented only for rasters")
@ngjit
def _append(x, y, agg, field):
if not isnull(field) and isnull(agg[y, x]):
agg[y, x] = field

@staticmethod
def _create(shape, array_module):
raise NotImplementedError("first is currently implemented only for rasters")
return array_module.full(shape, np.nan)

@staticmethod
def _combine(aggs):
raise NotImplementedError("first is currently implemented only for rasters")
raise NotImplementedError("first is not implemented for dask DataFrames")

@staticmethod
def _finalize(bases, **kwargs):
raise NotImplementedError("first is currently implemented only for rasters")
def _finalize(bases, cuda=False, **kwargs):
return xr.DataArray(bases[0], **kwargs)



Expand All @@ -909,20 +911,22 @@ class last(Reduction):
_dshape = dshape(Option(ct.float64))

@staticmethod
def _append(x, y, agg):
raise NotImplementedError("last is currently implemented only for rasters")
@ngjit
def _append(x, y, agg, field):
if not isnull(field):
agg[y, x] = field

@staticmethod
def _create(shape, array_module):
raise NotImplementedError("last is currently implemented only for rasters")
return array_module.full(shape, np.nan)

@staticmethod
def _combine(aggs):
raise NotImplementedError("last is currently implemented only for rasters")
raise NotImplementedError("last is not implemented for dask DataFrames")

@staticmethod
def _finalize(bases, **kwargs):
raise NotImplementedError("last is currently implemented only for rasters")
def _finalize(bases, cuda=False, **kwargs):
return xr.DataArray(bases[0], **kwargs)



Expand Down
31 changes: 31 additions & 0 deletions datashader/tests/test_pandas.py
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,37 @@ def test_categorical_std(df):
assert_eq_xr(agg, out)


def test_first():
df = pd.DataFrame({'x': [4, 0, 2, 2, 5, 2],
'y': [0, 4, 5, 1, 1, 3],
'z': [100, 101, 102, 103, 104, 105]})
cvs = ds.Canvas(plot_height=5, plot_width=5)
agg = cvs.line(df, 'x', 'y', agg=ds.first('z'))
sol = np.array([
[np.nan, np.nan, np.nan, np.nan, 100.],
[np.nan, np.nan, 102., 100., 103.],
[np.nan, np.nan, 100., 104., np.nan],
[np.nan, 100., 102., np.nan, np.nan],
[100., 101., 101., np.nan, np.nan]], dtype='float64')

assert_eq_ndarray(agg, sol)


def test_last():
df = pd.DataFrame({'x': [4, 0, 2, 2, 5, 2],
'y': [0, 4, 5, 1, 1, 3],
'z': [100, 101, 102, 103, 104, 105]})
cvs = ds.Canvas(plot_height=5, plot_width=5)
agg = cvs.line(df, 'x', 'y', agg=ds.last('z'))
sol = np.array([
[np.nan, np.nan, np.nan, np.nan, 100.],
[np.nan, np.nan, 102., 103., 103.],
[np.nan, np.nan, 102., 104., np.nan],
[np.nan, 100., 104., np.nan, np.nan],
[100., 101., 101., np.nan, np.nan]], dtype='float64')

assert_eq_ndarray(agg, sol)

@pytest.mark.parametrize('df', dfs)
def test_multiple_aggregates(df):
agg = c.points(df, 'x', 'y',
Expand Down

0 comments on commit d5b5635

Please sign in to comment.