radiocosmology · jrs65 · Mar 11, 2022 · Oct 19, 2021 · Oct 28, 2021 · Mar 11, 2022
diff --git a/draco/util/_fast_tools.pyx b/draco/util/_fast_tools.pyx
@@ -9,6 +9,7 @@ cimport numpy as np
 from libc.stdint cimport int16_t, int32_t
 from libc.math cimport sin
 from libc.math cimport cos
+from libc.math cimport fabs
 
 cdef inline int int_max(int a, int b) nogil: return a if a >= b else b
 
@@ -251,3 +252,36 @@ cpdef beamform(float complex [:, :, ::1] vis,
 
     return np.asarray(formed_beam)
 
+
+cdef extern from "float.h" nogil:
+    double DBL_MAX
+    double FLT_MAX
+
+ctypedef fused real_or_complex:
+    double
+    double complex
+    float
+    float complex
+
+@cython.wraparound(False)
+@cython.boundscheck(False)
+@cython.cdivision(True)
+cpdef _invert_no_zero(real_or_complex [:] array, real_or_complex [:] out):
+
+    cdef bint cond
+    cdef Py_ssize_t i = 0
+    cdef Py_ssize_t n = array.shape[0]
+    cdef double thresh, ar, ai
+    if (real_or_complex is cython.doublecomplex) or (real_or_complex is cython.double):
+        thresh = 1.0 / DBL_MAX
+    else:
+        thresh = 1.0 / FLT_MAX
+
+    if (real_or_complex is cython.doublecomplex) or (real_or_complex is cython.floatcomplex):
+        for i in prange(n, nogil=True):
+            cond = (fabs(array[i].real) < thresh) and (fabs(array[i].imag) < thresh)
+            out[i] = 0.0 if cond else 1.0 / array[i]
+    else:
+        for i in prange(n, nogil=True):
+            cond = fabs(array[i]) < thresh
+            out[i] = 0.0 if cond else 1.0 / array[i]
diff --git a/draco/util/tools.py b/draco/util/tools.py
@@ -6,7 +6,7 @@
 import numpy as np
 from numpy.lib.recfunctions import structured_to_unstructured
 
-from ._fast_tools import _calc_redundancy
+from ._fast_tools import _calc_redundancy, _invert_no_zero
 
 
 def cmap(i, j, n):
@@ -223,7 +223,7 @@ def apply_gain(vis, gain, axis=1, out=None, prod_map=None):
     return out
 
 
-def invert_no_zero(x):
+def invert_no_zero(x, out=None):
     """Return the reciprocal, but ignoring zeros.
 
     Where `x != 0` return 1/x, or just return 0. Importantly this routine does
@@ -232,23 +232,30 @@ def invert_no_zero(x):
     Parameters
     ----------
     x : np.ndarray
+    out : np.ndarray
 
     Returns
     -------
     r : np.ndarray
         Return the reciprocal of x.
     """
-    if not isinstance(x, (np.generic, np.ndarray)):
-        cond = x == 0
-    elif np.iscomplexobj(x):
-        tol = 1.0 / np.finfo(x.real.dtype).max
-        cond = np.logical_and(np.abs(x.real) < tol, np.abs(x.imag) < tol)
+    if not isinstance(x, (np.generic, np.ndarray)) or np.issubdtype(
+        x.dtype, np.integer
+    ):
+        with np.errstate(divide="ignore", invalid="ignore", over="ignore"):
+            return np.where(x == 0, 0.0, 1.0 / x)
+
+    if out is not None:
+        if x.shape != out.shape:
+            raise ValueError(
+                f"Input and output arrays don't have same shape: {x.shape} != {out.shape}."
+            )
     else:
-        tol = 1.0 / np.finfo(x.dtype).max
-        cond = np.abs(x) < tol
+        out = np.zeros_like(x)
+
+    _invert_no_zero(x.reshape(-1), out.reshape(-1))
 
-    with np.errstate(divide="ignore", invalid="ignore", over="ignore"):
-        return np.where(cond, 0.0, 1.0 / x)
+    return out
 
 
 def extract_diagonal(utmat, axis=1):

diff --git a/test/test_tools.py b/test/test_tools.py
@@ -0,0 +1,43 @@
+import numpy as np
+import pytest
+
+from draco.util.random import _default_bitgen
+from draco.util.tools import invert_no_zero
+
+ARRAY_SIZE = (100, 111)
+SEED = 12345
+ATOL = 0.0
+rng = np.random.Generator(_default_bitgen(SEED))
+
+random_float_array = rng.standard_normal(size=ARRAY_SIZE, dtype=np.float32)
+random_double_array = rng.standard_normal(size=ARRAY_SIZE, dtype=np.float64)
+random_complex_array = rng.standard_normal(
+    size=ARRAY_SIZE
+) + 1.0j * rng.standard_normal(size=ARRAY_SIZE)
+
+
+@pytest.mark.parametrize(
+    "a", [random_complex_array, random_float_array, random_double_array]
+)
+def test_invert_no_zero(a):
+
+    zero_ind = ((0, 10, 12), (56, 34, 78))
+    good_ind = np.ones(a.shape, dtype=bool)
+    good_ind[zero_ind] = False
+
+    # set up some invalid values for inverse
+    a[zero_ind[0][0], zero_ind[1][0]] = 0.0
+    a[zero_ind[0][1], zero_ind[1][1]] = 0.5 / np.finfo(a.real.dtype).max
+
+    if np.iscomplexobj(a):
+        # these should be inverted fine
+        a[10, 0] = 1.0
+        a[10, 1] = 1.0j
+        # also test invalid in the imaginary part
+        a[zero_ind[0][2], zero_ind[1][2]] = 0.5j / np.finfo(a.real.dtype).max
+    else:
+        a[zero_ind[0][2], zero_ind[1][2]] = -0.5 / np.finfo(a.real.dtype).max
+
+    b = invert_no_zero(a)
+    assert np.allclose(b[good_ind], 1.0 / a[good_ind], atol=ATOL)
+    assert (b[zero_ind] == 0).all()