diff --git a/sparse/__init__.py b/sparse/__init__.py index 55b8b495..0b0e894d 100644 --- a/sparse/__init__.py +++ b/sparse/__init__.py @@ -49,11 +49,11 @@ roll, tril, triu, - where, unique_all, unique_counts, unique_inverse, unique_values, + where, ) from ._dok import DOK from ._io import load_npz, save_npz diff --git a/sparse/_coo/__init__.py b/sparse/_coo/__init__.py index f42d05a3..f213a6f5 100644 --- a/sparse/_coo/__init__.py +++ b/sparse/_coo/__init__.py @@ -20,11 +20,11 @@ stack, tril, triu, - where, unique_all, unique_counts, unique_inverse, unique_values, + where, ) from .core import COO, as_coo diff --git a/sparse/_coo/common.py b/sparse/_coo/common.py index 2ea63349..9de529be 100644 --- a/sparse/_coo/common.py +++ b/sparse/_coo/common.py @@ -2,7 +2,7 @@ import warnings from collections.abc import Iterable from functools import reduce -from typing import Optional, Tuple +from typing import NamedTuple, Optional, Tuple import numba @@ -1059,20 +1059,56 @@ def clip(a, a_min=None, a_max=None, out=None): return a.clip(a_min, a_max) +# Array API set functions + +class UniqueAllResult(NamedTuple): + values: np.ndarray + indices: np.ndarray + inverse_indices: np.ndarray + counts: np.ndarray + + +class UniqueCountsResult(NamedTuple): + values: np.ndarray + counts: np.ndarray + + +class UniqueInverseResult(NamedTuple): + values: np.ndarray + inverse_indices: np.ndarray + + def unique_all(x, /): """ - Returns the unique elements of an input array x, the first occurring indices - for each unique element in x, the indices from the set of unique elements that - reconstruct x, and the corresponding counts for each unique element in x. + Returns the unique elements of an input array `x`, the first occurring + indices for each unique element in `x`, the indices from the set of unique + elements that reconstruct `x`, and the corresponding counts for each + unique element in `x`. + + + Parameters + ---------- + x : COO + Input COO array. It will be flattened if it is not already 1-D. + + Returns + ------- + out : namedtuple + The result containing: + * values - The unique elements of an input array. + * indices - The first occurring indices for each unique element. + * inverse_indices - The indices from the set of unique elements + that reconstruct `x`. + * counts - The corresponding counts for each unique element. + """ from .core import COO + if not isinstance(x, COO): raise ValueError(f"Only COO arrays are supported but {type(x)} was passed.") x = x.flatten() - values, index, inverse, counts = np.unique( - x.data, return_index=True, return_inverse=True, return_counts=True - ) + values, index, inverse, counts = np.unique(x.data, return_index=True, return_inverse=True, return_counts=True) index = x.coords.squeeze()[index] if x.nnz < x.size: # find the first occurence of the fill value @@ -1091,18 +1127,34 @@ def unique_all(x, /): counts = np.concatenate([[x.size - x.nnz], counts]) from .._dok import DOK + result_inverse = DOK(shape=x.size, dtype=np.intp, fill_value=np.intp(0)) result_inverse[x.coords.squeeze()] = inverse - return values, index, result_inverse, counts + return UniqueAllResult(values, index, result_inverse, counts) def unique_counts(x, /): """ - Returns the unique elements of an input array x and the corresponding - counts for each unique element in x. + Returns the unique elements of an input array `x`, and the corresponding + counts for each unique element in `x`. + + + Parameters + ---------- + x : COO + Input COO array. It will be flattened if it is not already 1-D. + + Returns + ------- + out : namedtuple + The result containing: + * values - The unique elements of an input array. + * counts - The corresponding counts for each unique element. + """ from .core import COO + if not isinstance(x, COO): raise ValueError(f"Only COO arrays are supported but {type(x)} was passed.") @@ -1111,15 +1163,31 @@ def unique_counts(x, /): if x.nnz < x.size: values = np.concatenate([[x.fill_value], values]) counts = np.concatenate([[x.size - x.nnz], counts]) - return values, counts + + return UniqueCountsResult(values, counts) def unique_inverse(x, /): """ - Returns the unique elements of an input array x and the indices from - the set of unique elements that reconstruct x. + Returns the unique elements of an input array `x` and the indices + from the set of unique elements that reconstruct `x`. + + Parameters + ---------- + x : COO + Input COO array. It will be flattened if it is not already 1-D. + + Returns + ------- + out : namedtuple + The result containing: + * values - The unique elements of an input array. + * inverse_indices - The indices from the set of unique elements + that reconstruct `x`. + """ from .core import COO + if not isinstance(x, COO): raise ValueError(f"Only COO arrays are supported but {type(x)} was passed.") @@ -1130,17 +1198,30 @@ def unique_inverse(x, /): inverse = inverse + 1 from .._dok import DOK + result_inverse = DOK(shape=x.size, dtype=np.intp, fill_value=np.intp(0)) result_inverse[x.coords.squeeze()] = inverse - return values, result_inverse + return UniqueInverseResult(values, result_inverse) def unique_values(x, /): """ - Returns the unique elements of an input array x. + Returns the unique elements of an input array `x`. + + Parameters + ---------- + x : COO + Input COO array. It will be flattened if it is not already 1-D. + + Returns + ------- + out : ndarray + The unique elements of an input array. + """ from .core import COO + if not isinstance(x, COO): raise ValueError(f"Only COO arrays are supported but {type(x)} was passed.") diff --git a/sparse/tests/test_coo.py b/sparse/tests/test_coo.py index 50a93ffd..9f579362 100644 --- a/sparse/tests/test_coo.py +++ b/sparse/tests/test_coo.py @@ -1748,22 +1748,15 @@ def test_squeeze_validation(self): class TestUnique: - arr = np.array( - [[0, 0, 1, 5, 3, 0], - [1, 0, 4, 0, 3, 0], - [0, 1, 0, 1, 1, 0]], - dtype=np.int64 - ) - arr_empty = np.zeros((5,5)) + arr = np.array([[0, 0, 1, 5, 3, 0], [1, 0, 4, 0, 3, 0], [0, 1, 0, 1, 1, 0]], dtype=np.int64) + arr_empty = np.zeros((5, 5)) arr_full = np.arange(1, 10) @pytest.mark.parametrize("arr", [arr, arr_empty, arr_full]) def test_unique_all(self, arr): s_arr = sparse.COO.from_numpy(arr) - result_values, result_indices, result_inverse, result_count = ( - sparse.unique_all(s_arr) - ) + result_values, result_indices, result_inverse, result_count = sparse.unique_all(s_arr) expected_values, expected_indices, expected_inverse, expected_count = np.unique( arr, return_index=True, return_inverse=True, return_counts=True ) @@ -1801,3 +1794,16 @@ def test_unique_values(self, arr): expected = np.unique(arr) np.testing.assert_equal(result, expected) + + @pytest.mark.parametrize( + "func", + [ + sparse.unique_all, + sparse.unique_counts, + sparse.unique_inverse, + sparse.unique_values + ] + ) + def test_input_validation(self, func): + with pytest.raises(ValueError, match=r"Only COO arrays are supported"): + func(self.arr)