Antialiased line support for where reductions (#1269)

* Dynamically create aa_stage_2_clear rather than use parallel_fill * Move combine function definitions out of where._build_combine for reuse * Antialiased where reductions on CPU * Add tests
holoviz · Aug 16, 2023 · ea163e9 · ea163e9
1 parent fe567d0
commit ea163e9
Show file tree

Hide file tree

Showing 7 changed files with 373 additions and 150 deletions.
diff --git a/datashader/compiler.py b/datashader/compiler.py
@@ -9,7 +9,7 @@
 
 from .antialias import AntialiasCombination
 from .reductions import SpecialColumn, UsesCudaMutex, by, category_codes, summary
-from .utils import (isnull, ngjit, parallel_fill,
+from .utils import (isnull, ngjit,
     nanmax_in_place, nanmin_in_place, nansum_in_place, nanfirst_in_place, nanlast_in_place,
     nanmax_n_in_place_3d, nanmax_n_in_place_4d, nanmin_n_in_place_3d, nanmin_n_in_place_4d,
     nanfirst_n_in_place_3d, nanfirst_n_in_place_4d, nanlast_n_in_place_3d, nanlast_n_in_place_4d,
@@ -113,7 +113,7 @@ def compile_components(agg, schema, glyph, *, antialias=False, cuda=False, parti
         else:
             array_module = np
         antialias_stage_2 = antialias_stage_2(array_module)
-        antialias_stage_2_funcs = make_antialias_stage_2_functions(antialias_stage_2)
+        antialias_stage_2_funcs = make_antialias_stage_2_functions(antialias_stage_2, bases, cuda, partitioned)
     else:
         self_intersect = False
         antialias_stage_2 = False
@@ -148,9 +148,9 @@ def _get_antialias_stage_2_combine_func(combination: AntialiasCombination, zero:
                                         n_reduction: bool, categorical: bool):
     if n_reduction:
         if zero == -1:
-            if combination == AntialiasCombination.MAX:
+            if combination in (AntialiasCombination.MAX, AntialiasCombination.LAST):
                 return row_max_n_in_place_4d if categorical else row_max_n_in_place_3d
-            elif combination == AntialiasCombination.MIN:
+            elif combination in (AntialiasCombination.MIN, AntialiasCombination.FIRST):
                 return row_min_n_in_place_4d if categorical else row_min_n_in_place_3d
             else:
                 raise NotImplementedError
@@ -170,9 +170,9 @@ def _get_antialias_stage_2_combine_func(combination: AntialiasCombination, zero:
         # 2D (ny, nx) if categorical is False. The same combination functions can be for both
         # as all elements are independent.
         if zero == -1:
-            if combination == AntialiasCombination.MAX:
+            if combination in (AntialiasCombination.MAX, AntialiasCombination.LAST):
                 return row_max_in_place
-            elif combination == AntialiasCombination.MIN:
+            elif combination in (AntialiasCombination.MIN, AntialiasCombination.FIRST):
                 return row_min_in_place
             else:
                 raise NotImplementedError
@@ -189,18 +189,25 @@ def _get_antialias_stage_2_combine_func(combination: AntialiasCombination, zero:
                 return nansum_in_place
 
 
-def make_antialias_stage_2_functions(antialias_stage_2):
+def make_antialias_stage_2_functions(antialias_stage_2, bases, cuda, partitioned):
     aa_combinations, aa_zeroes, aa_n_reductions, aa_categorical = antialias_stage_2
 
     # Accumulate functions.
     funcs = [_get_antialias_stage_2_combine_func(comb, zero, n_red, cat) for comb, zero, n_red, cat
              in zip(aa_combinations, aa_zeroes, aa_n_reductions, aa_categorical)]
 
+    base_is_where = [b.is_where() for b in bases]
+    next_base_is_where = base_is_where[1:] + [False]
+
     namespace = {}
     namespace["literal_unroll"] = literal_unroll
     for func in set(funcs):
         namespace[func.__name__] = func
 
+    # Generator of unique names for combine functions
+    names = (f"combine{i}" for i in count())
+
+    # aa_stage_2_accumulate
     lines = [
         "def aa_stage_2_accumulate(aggs_and_copies, first_pass):",
         #    Don't need to accumulate if first_pass, just copy (opposite of aa_stage_2_copy_back)
@@ -209,21 +216,38 @@ def make_antialias_stage_2_functions(antialias_stage_2):
         "            a[1][:] = a[0][:]",
         "    else:",
     ]
-    for i, func in enumerate(funcs):
-        lines.append(f"        {func.__name__}(aggs_and_copies[{i}][1], aggs_and_copies[{i}][0])")
-
+    for i, (func, is_where, next_is_where) in enumerate(zip(funcs, base_is_where, next_base_is_where)):
+        if is_where:
+            where_reduction = bases[i]
+            if isinstance(where_reduction, by):
+                where_reduction = where_reduction.reduction
+
+            combine = where_reduction._combine_callback(cuda, partitioned, aa_categorical[i])
+            name = next(names)  # Unique name
+            namespace[name] = combine
+
+            lines.append(f"        {name}(aggs_and_copies[{i}][::-1], aggs_and_copies[{i-1}][::-1])")
+        elif next_is_where:
+            # This is dealt with as part of the following base which is a where reduction.
+            pass
+        else:
+            lines.append(f"        {func.__name__}(aggs_and_copies[{i}][1], aggs_and_copies[{i}][0])")
     code = "\n".join(lines)
     exec(code, namespace)
     aa_stage_2_accumulate = ngjit(namespace["aa_stage_2_accumulate"])
 
-    @ngjit
-    def aa_stage_2_clear(aggs_and_copies):
-        k = 0
-        # Numba access to heterogeneous tuples is only permitted using literal_unroll.
-        for agg_and_copy in literal_unroll(aggs_and_copies):
-            parallel_fill(agg_and_copy[0], aa_zeroes[k])
-            k += 1
+    # aa_stage_2_clear
+    if np.any(np.isnan(aa_zeroes)):
+        namespace["nan"] = np.nan
+
+    lines = ["def aa_stage_2_clear(aggs_and_copies):"]
+    for i, aa_zero in enumerate(aa_zeroes):
+        lines.append(f"    aggs_and_copies[{i}][0].fill({aa_zero})")
+    code = "\n".join(lines)
+    exec(code, namespace)
+    aa_stage_2_clear = ngjit(namespace["aa_stage_2_clear"])
 
+    # aa_stage_2_copy_back
     @ngjit
     def aa_stage_2_copy_back(aggs_and_copies):
         # Numba access to heterogeneous tuples is only permitted using literal_unroll.

diff --git a/datashader/core.py b/datashader/core.py
@@ -441,7 +441,7 @@ def line(self, source, x=None, y=None, agg=None, axis=0, geometry=None,
             if not isinstance(non_cat_agg, (
                 rd.any, rd.count, rd.max, rd.min, rd.sum, rd.summary, rd._sum_zero,
                 rd._first_or_last, rd.mean, rd.max_n, rd.min_n, rd._first_n_or_last_n,
-                rd._max_or_min_row_index, rd._max_n_or_min_n_row_index
+                rd._max_or_min_row_index, rd._max_n_or_min_n_row_index, rd.where,
             )):
                 raise NotImplementedError(
                     f"{type(non_cat_agg)} reduction not implemented for antialiased lines")

diff --git a/datashader/reductions.py b/datashader/reductions.py
@@ -424,7 +424,7 @@ def _build_append(self, dshape, schema, cuda, antialias, self_intersect):
             else:
                 return self._append
 
-    def _build_combine(self, dshape, antialias, cuda, partitioned):
+    def _build_combine(self, dshape, antialias, cuda, partitioned, categorical = False):
         return self._combine
 
     def _build_finalize(self, dshape):
@@ -627,7 +627,7 @@ def _append_no_field_cuda(x, y, agg):
         nb_cuda.atomic.add(agg, (y, x), 1)
         return 0
 
-    def _build_combine(self, dshape, antialias, cuda, partitioned):
+    def _build_combine(self, dshape, antialias, cuda, partitioned, categorical = False):
         if antialias:
             return self._combine_antialias
         else:
@@ -751,8 +751,8 @@ def _build_bases(self, cuda, partitioned):
     def _build_append(self, dshape, schema, cuda, antialias, self_intersect):
         return self.reduction._build_append(dshape, schema, cuda, antialias, self_intersect)
 
-    def _build_combine(self, dshape, antialias, cuda, partitioned):
-        return self.reduction._build_combine(dshape, antialias, cuda, partitioned)
+    def _build_combine(self, dshape, antialias, cuda, partitioned, categorical = False):
+        return self.reduction._build_combine(dshape, antialias, cuda, partitioned, True)
 
     def _build_combine_temps(self, cuda, partitioned):
         return self.reduction._build_combine_temps(cuda, partitioned)
@@ -820,7 +820,7 @@ def _append_no_field_antialias(x, y, agg, aa_factor):
     _append_cuda =_append
     _append_no_field_cuda = _append_no_field
 
-    def _build_combine(self, dshape, antialias, cuda, partitioned):
+    def _build_combine(self, dshape, antialias, cuda, partitioned, categorical = False):
         if antialias:
             return self._combine_antialias
         else:
@@ -1596,7 +1596,7 @@ def _append_cuda(x, y, agg, field):
                     return i
         return -1
 
-    def _build_combine(self, dshape, antialias, cuda, partitioned):
+    def _build_combine(self, dshape, antialias, cuda, partitioned, categorical = False):
         if cuda:
             return self._combine_cuda
         else:
@@ -1676,7 +1676,7 @@ def _append_cuda(x, y, agg, field):
                     return i
         return -1
 
-    def _build_combine(self, dshape, antialias, cuda, partitioned):
+    def _build_combine(self, dshape, antialias, cuda, partitioned, categorical = False):
         if cuda:
             return self._combine_cuda
         else:
@@ -1883,7 +1883,12 @@ def _build_bases(self, cuda, partitioned):
         else:
             return selector._build_bases(cuda, partitioned) + super()._build_bases(cuda, partitioned)
 
-    def _build_combine(self, dshape, antialias, cuda, partitioned):
+    def _combine_callback(self, cuda, partitioned, categorical):
+        # Used by:
+        # 1) where._build_combine()) below, the usual mechanism for combining aggs from
+        #    different dask partitions.
+        # 2) make_antialias_stage_2_functions() in compiler.py to perform stage 2 combine
+        #    of antialiased aggs.
         selector = self.selector
         is_n_reduction = isinstance(selector, FloatingNReduction)
         if cuda:
@@ -1989,38 +1994,33 @@ def combine_cuda_n_4d(aggs, selector_aggs):
                         break
                     cuda_shift_and_insert(aggs[0][y, x, cat], aggs[1][y, x, cat, i], update_index)
 
-        def wrapped_combine(aggs, selector_aggs):
-            ret = aggs[0], selector_aggs[0]
-            ndim = aggs[0].ndim
+        if is_n_reduction:
+            # ndim is either 3 (ny, nx, n) or 4 (ny, nx, ncat, n)
+            if cuda:
+                return combine_cuda_n_4d if categorical else combine_cuda_n_3d
+            else:
+                return combine_cpu_n_4d if categorical else combine_cpu_n_3d
+        else:
+            # ndim is either 2 (ny, nx) or 3 (ny, nx, ncat)
+            if cuda:
+                return combine_cuda_3d if categorical else combine_cuda_2d
+            else:
+                return combine_cpu_3d if categorical else combine_cpu_2d
+
+    def _build_combine(self, dshape, antialias, cuda, partitioned, categorical = False):
+        combine = self._combine_callback(cuda, partitioned, categorical)
 
+        def wrapped_combine(aggs, selector_aggs):
             if len(aggs) == 1:
                 pass
-            elif is_n_reduction:
-                # ndim is either 3 (ny, nx, n) or 4 (ny, nx, ncat, n)
-                if cuda:
-                    if ndim == 3:
-                        combine_cuda_n_3d[cuda_args(aggs[0].shape[:2])](aggs, selector_aggs)
-                    else:
-                        combine_cuda_n_4d[cuda_args(aggs[0].shape[:3])](aggs, selector_aggs)
-                else:
-                    if ndim == 3:
-                        combine_cpu_n_3d(aggs, selector_aggs)
-                    else:
-                        combine_cpu_n_4d(aggs, selector_aggs)
+            elif cuda:
+                is_n_reduction = isinstance(self.selector, FloatingNReduction)
+                shape = aggs[0].shape[:-1] if is_n_reduction else aggs[0].shape
+                combine[cuda_args(shape)](aggs, selector_aggs)
             else:
-                # ndim is either 2 (ny, nx) or 3 (ny, nx, ncat)
-                if cuda:
-                    if ndim == 2:
-                        combine_cuda_2d[cuda_args(aggs[0].shape)](aggs, selector_aggs)
-                    else:
-                        combine_cuda_3d[cuda_args(aggs[0].shape)](aggs, selector_aggs)
-                else:
-                    if ndim == 2:
-                        combine_cpu_2d(aggs, selector_aggs)
-                    else:
-                        combine_cpu_3d(aggs, selector_aggs)
+                combine(aggs, selector_aggs)
 
-            return ret
+            return aggs[0], selector_aggs[0]
 
         return wrapped_combine
 
@@ -2226,7 +2226,7 @@ def _append_cuda(x, y, agg, field):
             return 0
         return -1
 
-    def _build_combine(self, dshape, antialias, cuda, partitioned):
+    def _build_combine(self, dshape, antialias, cuda, partitioned, categorical = False):
         if cuda:
             return self._combine_cuda
         else:
@@ -2270,7 +2270,7 @@ def uses_cuda_mutex(self) -> UsesCudaMutex:
     def uses_row_index(self, cuda, partitioned):
         return True
 
-    def _build_combine(self, dshape, antialias, cuda, partitioned):
+    def _build_combine(self, dshape, antialias, cuda, partitioned, categorical = False):
         if cuda:
             return self._combine_cuda
         else: