Add option to rescale_discrete_levels for how='eq_hist' (#1055)

holoviz · Apr 5, 2022 · 7b52f94 · 7b52f94
1 parent 1d3cafd
commit 7b52f94
Show file tree

Hide file tree

Showing 2 changed files with 86 additions and 12 deletions.
diff --git a/datashader/tests/test_transfer_functions.py b/datashader/tests/test_transfer_functions.py
@@ -25,7 +25,10 @@ def build_agg(array_module=np):
     c = array_module.arange(10, 19, dtype='f8').reshape((3, 3))
     c[[0, 1, 2], [0, 1, 2]] = array_module.nan
     s_c = xr.DataArray(c, coords=coords, dims=dims)
-    agg = xr.Dataset(dict(a=s_a, b=s_b, c=s_c))
+    d = array_module.arange(10, 19, dtype='u4').reshape((3, 3))
+    d[[0, 1, 2, 2], [0, 1, 2, 1]] = 1
+    s_d = xr.DataArray(d, coords=coords, dims=dims)
+    agg = xr.Dataset(dict(a=s_a, b=s_b, c=s_c, d=s_d))
     return agg
 
 
@@ -79,6 +82,15 @@ def create_dask_array_np(*args, **kwargs):
                               [4281281791, 4278190335, 0]], dtype='u4')}
 eq_hist_sol['c'] = eq_hist_sol['b']
 
+eq_hist_sol_rescale_discrete_levels = {
+    'a': np.array([[0, 4289306879, 4287070463],
+                   [4284834047, 0, 4282597631],
+                   [4280361215, 4278190335, 0]], dtype='u4'),
+    'b': np.array([[0, 4289306879, 4287070463],
+                   [4285228543, 0, 4282597631],
+                   [4280755711, 4278190335, 0]], dtype='u4')}
+eq_hist_sol_rescale_discrete_levels['c'] = eq_hist_sol_rescale_discrete_levels['b']
+
 
 def check_span(x, cmap, how, sol):
     # Copy inputs that will be modified
@@ -153,9 +165,14 @@ def test_shade(agg, attr, span):
     assert_eq_xr(img, sol)
 
     # span option not supported with how='eq_hist'
-    img = tf.shade(x, cmap=cmap, how='eq_hist')
-    sol = tf.Image(eq_hist_sol[attr], coords=coords, dims=dims)
-    assert_eq_xr(img, sol)
+    if span is None:
+        img = tf.shade(x, cmap=cmap, how='eq_hist', rescale_discrete_levels=False)
+        sol = tf.Image(eq_hist_sol[attr], coords=coords, dims=dims)
+        assert_eq_xr(img, sol)
+
+        img = tf.shade(x, cmap=cmap, how='eq_hist', rescale_discrete_levels=True)
+        sol = tf.Image(eq_hist_sol_rescale_discrete_levels[attr], coords=coords, dims=dims)
+        assert_eq_xr(img, sol)
 
     img = tf.shade(x, cmap=cmap,
                    how=lambda x, mask: np.where(mask, np.nan, x ** 2))
@@ -461,6 +478,7 @@ def test_shade_category(array):
     assert ((img.data[1,0] >> 24) & 0xFF) == 20 # min alpha
     assert ((img.data[1,1] >> 24) & 0xFF) == 20 # min alpha
 
+
 @pytest.mark.parametrize('array', arrays)
 def test_shade_zeros(array):
     coords = [np.array([0, 1]), np.array([2, 5])]
@@ -478,6 +496,25 @@ def test_shade_zeros(array):
     assert_eq_xr(img, sol)
 
 
+@pytest.mark.parametrize('agg', aggs)
+@pytest.mark.parametrize('attr', ['d'])
+@pytest.mark.parametrize('rescale', [False, True])
+def test_shade_rescale_discrete_levels(agg, attr, rescale):
+    x = getattr(agg, attr)
+    cmap = ['pink', 'red']
+    img = tf.shade(x, cmap=cmap, how='eq_hist', rescale_discrete_levels=rescale)
+    if rescale:
+        sol = np.array([[0xff8d85ff, 0xff716bff, 0xff5450ff],
+                        [0xff3835ff, 0xff8d85ff, 0xff1c1aff],
+                        [0xff0000ff, 0xff8d85ff, 0xff8d85ff]], dtype='uint32')
+    else:
+        sol = np.array([[0xffcbc0ff, 0xffa299ff, 0xff7973ff],
+                        [0xff514cff, 0xffcbc0ff, 0xff2826ff],
+                        [0xff0000ff, 0xffcbc0ff, 0xffcbc0ff]], dtype='uint32')
+    sol = tf.Image(sol, coords=coords, dims=dims)
+    assert_eq_xr(img, sol)
+
+
 coords2 = [np.array([0, 2]), np.array([3, 5])]
 img1 = tf.Image(np.array([[0xff00ffff, 0x00000000],
                           [0x00000000, 0xff00ff7d]], dtype='uint32'),
@@ -1034,13 +1071,13 @@ def test_eq_hist():
     data[np.random.randint(300**2, size=100)] = np.nan
     data = (data - np.nanmin(data)).reshape((300, 300))
     mask = np.isnan(data)
-    eq = tf.eq_hist(data, mask)
+    eq, _ = tf.eq_hist(data, mask)
     check_eq_hist_cdf_slope(eq)
     assert (np.isnan(eq) == mask).all()
     # Integer
     data = np.random.normal(scale=100, size=(300, 300)).astype('i8')
     data = data - data.min()
-    eq = tf.eq_hist(data)
+    eq, _ = tf.eq_hist(data)
     check_eq_hist_cdf_slope(eq)
 
 

diff --git a/datashader/transfer_functions/__init__.py b/datashader/transfer_functions/__init__.py
@@ -181,7 +181,8 @@ def eq_hist(data, mask=None, nbins=256*256):
     cdf = hist.cumsum()
     cdf = cdf / float(cdf[-1])
     out = interp(data, bin_centers, cdf).reshape(data.shape)
-    return out if mask is None else np.where(mask, np.nan, out)
+    return out if mask is None else np.where(mask, np.nan, out), data2.max()
+
 
 
 _interpolate_lookup = {'log': lambda d, m: np.log1p(np.where(m, np.nan, d)),
@@ -198,7 +199,7 @@ def _normalize_interpolate_how(how):
     raise ValueError("Unknown interpolation method: {0}".format(how))
 
 
-def _interpolate(agg, cmap, how, alpha, span, min_alpha, name):
+def _interpolate(agg, cmap, how, alpha, span, min_alpha, name, rescale_discrete_levels):
     if cupy and isinstance(agg.data, cupy.ndarray):
         from ._cuda_utils import masked_clip_2d, interp
     else:
@@ -245,14 +246,30 @@ def _interpolate(agg, cmap, how, alpha, span, min_alpha, name):
     with np.errstate(invalid="ignore", divide="ignore"):
         # Transform data (log, eq_hist, etc.)
         data = interpolater(data, mask)
+        discrete_levels = None
+        if isinstance(data, (list, tuple)):
+            data, discrete_levels = data
 
         # Transform span
         if span is None:
             masked_data = np.where(~mask, data, np.nan)
             span = np.nanmin(masked_data), np.nanmax(masked_data)
+
+            if rescale_discrete_levels:  # Only valid for how='eq_hist'
+                if discrete_levels is None:
+                    raise ValueError("interpolator did not return a valid discrete_levels")
+
+                # Straight line y = mx + c through (2, 1.5) and (100, 1) where
+                # x is number of discrete_levels and y is lower span limit.
+                m = -0.5/98.0  # (y[1] - y[0]) / (x[1] - x[0])
+                c = 1.5 - 2*m  # y[0] - m*x[0]
+                multiple = m*discrete_levels + c
+                if multiple > 1:
+                    lower_span = max(span[1] - multiple*(span[1] - span[0]), 0)
+                    span = (lower_span, 1)
         else:
             if how == 'eq_hist':
-                # For eq_hist to work with span, we'll need to compute the histogram
+                # For eq_hist to work with span, we'd need to compute the histogram
                 # only on the specified span's range.
                 raise ValueError("span is not (yet) valid to use with eq_hist")
 
@@ -408,7 +425,10 @@ def _interpolate_alpha(data, total, mask, how, alpha, span, min_alpha):
             if not np.all(mask):
                 offset = total[total > 0].min()
             total = np.where(~mask, total, np.nan)
+
         a_scaled = _normalize_interpolate_how(how)(total - offset, mask)
+        if isinstance(a_scaled, (list, tuple)):
+            a_scaled = a_scaled[0]  # Ignore discrete_levels
 
         # All-NaN objects (e.g. chunks of arrays with no data) are valid in Datashader
         with np.warnings.catch_warnings():
@@ -427,8 +447,15 @@ def _interpolate_alpha(data, total, mask, how, alpha, span, min_alpha):
             mask = mask | (total <= 0)
             total = np.where(~mask, total, np.nan)
         masked_clip_2d(total, mask, *span)
+
         a_scaled = _normalize_interpolate_how(how)(total - offset, mask)
+        if isinstance(a_scaled, (list, tuple)):
+            a_scaled = a_scaled[0]  # Ignore discrete_levels
+
         norm_span = _normalize_interpolate_how(how)([0, span[1] - span[0]], 0)
+        if isinstance(norm_span, (list, tuple)):
+            norm_span = norm_span[0]  # Ignore discrete_levels
+
     # Interpolate the alpha values
     a = interp(a_scaled, array(norm_span), array([min_alpha, alpha]),
                left=0, right=255).astype(np.uint8)
@@ -518,7 +545,7 @@ def _apply_discrete_colorkey(agg, color_key, alpha, name, color_baseline):
 
 def shade(agg, cmap=["lightblue", "darkblue"], color_key=Sets1to3,
           how='eq_hist', alpha=255, min_alpha=40, span=None, name=None,
-          color_baseline=None):
+          color_baseline=None, rescale_discrete_levels=False):
     """Convert a DataArray to an image by choosing an RGBA pixel color for each value.
 
     Requires a DataArray with a single data dimension, here called the
@@ -615,6 +642,13 @@ def shade(agg, cmap=["lightblue", "darkblue"], color_key=Sets1to3,
         color will be an evenly weighted average of all such
         categories with data (to avoid the color being undefined in
         this case).
+    rescale_discrete_levels : boolean, optional
+        If ``how='eq_hist`` and there are only a few discrete values,
+        then ``rescale_discrete_levels=True`` decreases the lower
+        limit of the autoranged span so that the values are rendering
+        towards the (more visible) top of the ``cmap`` range, thus
+        avoiding washout of the lower values.  Has no effect if
+        ``how!=`eq_hist``. Default is False.
     """
     if not isinstance(agg, xr.DataArray):
         raise TypeError("agg must be instance of DataArray")
@@ -623,13 +657,16 @@ def shade(agg, cmap=["lightblue", "darkblue"], color_key=Sets1to3,
     if not ((0 <= min_alpha <= 255) and (0 <= alpha <= 255)):
         raise ValueError("min_alpha ({}) and alpha ({}) must be between 0 and 255".format(min_alpha,alpha))
 
+    if rescale_discrete_levels and how != 'eq_hist':
+        rescale_discrete_levels = False
+
     if agg.ndim == 2:
         if color_key is not None and isinstance(color_key, dict):
             return _apply_discrete_colorkey(
                 agg, color_key, alpha, name, color_baseline
             )
         else:
-            return _interpolate(agg, cmap, how, alpha, span, min_alpha, name)
+            return _interpolate(agg, cmap, how, alpha, span, min_alpha, name, rescale_discrete_levels)
     elif agg.ndim == 3:
         return _colorize(agg, color_key, how, alpha, span, min_alpha, name, color_baseline)
     else:
@@ -872,7 +909,7 @@ def dynspread(img, threshold=0.5, max_px=3, shape='circle', how=None, name=None)
         if density > threshold:
             px_=px_-1
             break
-        
+
     if px_>=1:
         return spread(img, px_, shape=shape, how=how, name=name)
     else: