From d5b5635c4836bfcce1df44fcb331dfb2b4f683d6 Mon Sep 17 00:00:00 2001 From: Teodora Date: Sat, 18 Jun 2022 16:46:08 +0300 Subject: [PATCH] Implementation of first and last reduction (#1093) * implement append, create and finalize methods for first() reduction * implement append, create, finalize for last() reduction * add numba.jit() decorator * add cuda=False parameter to last finalize() method * fix missing whitespace after "," * update NotImplementedError message for _combine() method in first/last reduction * added tests for first/last reduction on pandas DF * Update datashader/reductions.py add whitespace characters Co-authored-by: Ian Thomas * Update datashader/tests/test_pandas.py compare directly the numpy arrays in testing Co-authored-by: Ian Thomas * Update datashader/reductions.py add whitespace character Co-authored-by: Ian Thomas * compare the numpy arrays directly for first() and last() reduction's tests Co-authored-by: Ian Thomas --- datashader/reductions.py | 28 ++++++++++++++++------------ datashader/tests/test_pandas.py | 31 +++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+), 12 deletions(-) diff --git a/datashader/reductions.py b/datashader/reductions.py index 942784144..766b3d647 100644 --- a/datashader/reductions.py +++ b/datashader/reductions.py @@ -875,20 +875,22 @@ class first(Reduction): _dshape = dshape(Option(ct.float64)) @staticmethod - def _append(x, y, agg): - raise NotImplementedError("first is currently implemented only for rasters") + @ngjit + def _append(x, y, agg, field): + if not isnull(field) and isnull(agg[y, x]): + agg[y, x] = field @staticmethod def _create(shape, array_module): - raise NotImplementedError("first is currently implemented only for rasters") + return array_module.full(shape, np.nan) @staticmethod def _combine(aggs): - raise NotImplementedError("first is currently implemented only for rasters") + raise NotImplementedError("first is not implemented for dask DataFrames") @staticmethod - def _finalize(bases, **kwargs): - raise NotImplementedError("first is currently implemented only for rasters") + def _finalize(bases, cuda=False, **kwargs): + return xr.DataArray(bases[0], **kwargs) @@ -909,20 +911,22 @@ class last(Reduction): _dshape = dshape(Option(ct.float64)) @staticmethod - def _append(x, y, agg): - raise NotImplementedError("last is currently implemented only for rasters") + @ngjit + def _append(x, y, agg, field): + if not isnull(field): + agg[y, x] = field @staticmethod def _create(shape, array_module): - raise NotImplementedError("last is currently implemented only for rasters") + return array_module.full(shape, np.nan) @staticmethod def _combine(aggs): - raise NotImplementedError("last is currently implemented only for rasters") + raise NotImplementedError("last is not implemented for dask DataFrames") @staticmethod - def _finalize(bases, **kwargs): - raise NotImplementedError("last is currently implemented only for rasters") + def _finalize(bases, cuda=False, **kwargs): + return xr.DataArray(bases[0], **kwargs) diff --git a/datashader/tests/test_pandas.py b/datashader/tests/test_pandas.py index ca41ca29a..dd24d1a0f 100644 --- a/datashader/tests/test_pandas.py +++ b/datashader/tests/test_pandas.py @@ -546,6 +546,37 @@ def test_categorical_std(df): assert_eq_xr(agg, out) +def test_first(): + df = pd.DataFrame({'x': [4, 0, 2, 2, 5, 2], + 'y': [0, 4, 5, 1, 1, 3], + 'z': [100, 101, 102, 103, 104, 105]}) + cvs = ds.Canvas(plot_height=5, plot_width=5) + agg = cvs.line(df, 'x', 'y', agg=ds.first('z')) + sol = np.array([ + [np.nan, np.nan, np.nan, np.nan, 100.], + [np.nan, np.nan, 102., 100., 103.], + [np.nan, np.nan, 100., 104., np.nan], + [np.nan, 100., 102., np.nan, np.nan], + [100., 101., 101., np.nan, np.nan]], dtype='float64') + + assert_eq_ndarray(agg, sol) + + +def test_last(): + df = pd.DataFrame({'x': [4, 0, 2, 2, 5, 2], + 'y': [0, 4, 5, 1, 1, 3], + 'z': [100, 101, 102, 103, 104, 105]}) + cvs = ds.Canvas(plot_height=5, plot_width=5) + agg = cvs.line(df, 'x', 'y', agg=ds.last('z')) + sol = np.array([ + [np.nan, np.nan, np.nan, np.nan, 100.], + [np.nan, np.nan, 102., 103., 103.], + [np.nan, np.nan, 102., 104., np.nan], + [np.nan, 100., 104., np.nan, np.nan], + [100., 101., 101., np.nan, np.nan]], dtype='float64') + + assert_eq_ndarray(agg, sol) + @pytest.mark.parametrize('df', dfs) def test_multiple_aggregates(df): agg = c.points(df, 'x', 'y',