From 66ee2e9d1b0b9f91a2083a09e6f935e5edd4544c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= Date: Mon, 15 Apr 2024 16:13:19 +0200 Subject: [PATCH 1/2] API: Add Tensor.to_scipy_sparse --- pyproject.toml | 2 +- src/finch/__init__.py | 2 ++ src/finch/julia.py | 2 +- src/finch/tensor.py | 51 ++++++++++++++++++++++++++++++-- tests/test_scipy_constructors.py | 33 +++++++++++++++++++++ tests/test_sparse.py | 20 +++++++++---- 6 files changed, 99 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 687fca4..74cfd6c 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "finch-tensor" -version = "0.1.9" +version = "0.1.10" description = "" authors = ["Willow Ahrens "] readme = "README.md" diff --git a/src/finch/__init__.py b/src/finch/__init__.py index 4ad76a2..6da4d8e 100644 --- a/src/finch/__init__.py +++ b/src/finch/__init__.py @@ -13,6 +13,7 @@ ) from .tensor import ( Tensor, + SparseArray, asarray, astype, random, @@ -72,6 +73,7 @@ __all__ = [ "Tensor", + "SparseArray", "Dense", "Element", "Pattern", diff --git a/src/finch/julia.py b/src/finch/julia.py index 3d0d179..2fb5e73 100644 --- a/src/finch/julia.py +++ b/src/finch/julia.py @@ -1,6 +1,6 @@ import juliapkg -_FINCH_VERSION = "0.6.20" +_FINCH_VERSION = "0.6.21" _FINCH_HASH = "9177782c-1635-4eb9-9bfb-d9dfa25e6bce" deps = juliapkg.deps.load_cur_deps() diff --git a/src/finch/tensor.py b/src/finch/tensor.py index 4f9ecbb..3312b11 100644 --- a/src/finch/tensor.py +++ b/src/finch/tensor.py @@ -9,7 +9,13 @@ from .typing import OrderType, JuliaObj, spmatrix, TupleOf3Arrays, DType -class Tensor(_Display): +class SparseArray: + """ + PyData/Sparse marker class + """ + + +class Tensor(_Display, SparseArray): """ A wrapper class for Finch.Tensor and Finch.SwizzleArray. @@ -304,6 +310,12 @@ def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj: lvl = Dense(lvl, arr.shape[i]) return jl.swizzle(jl.Tensor(lvl._obj), *order) + @classmethod + def from_scipy_sparse(cls, x) -> "Tensor": + if not _is_scipy_sparse_obj(x): + raise ValueError("{x} is not a SciPy sparse object.") + return Tensor(x) + @classmethod def _from_scipy_sparse(cls, x) -> JuliaObj: if x.format == "coo": @@ -407,6 +419,35 @@ def construct_csf_jl_object( def construct_csf(cls, arg: TupleOf3Arrays, shape: tuple[int, ...]) -> "Tensor": return Tensor(cls.construct_csf_jl_object(arg, shape)) + def to_scipy_sparse(self): + import scipy.sparse as sp + + if self.ndim != 2: + raise ValueError("Can only convert a 2-dimensional array to a Scipy sparse matrix.") + if self.fill_value != 0: + raise ValueError("Can only convert arrays with 0 fill value to a Scipy sparse matrix.") + order = self.get_order() + body = self._obj.body + + if str(jl.typeof(body.lvl).name.name) == "SparseCOOLevel": + data = np.asarray(body.lvl.lvl.val) + coords = body.lvl.tbl + row, col = coords[::-1] if order == (1, 0) else coords + row, col = np.asarray(row) - 1, np.asarray(col) - 1 + return sp.coo_matrix((data, (row, col)), shape=self.shape) + + if ( + str(jl.typeof(body.lvl).name.name) == "DenseLevel" and + str(jl.typeof(body.lvl.lvl).name.name) == "SparseListLevel" + ): + data = np.asarray(body.lvl.lvl.lvl.val) + indices = np.asarray(body.lvl.lvl.idx) - 1 + indptr = np.asarray(body.lvl.lvl.ptr) - 1 + sp_class = sp.csr_matrix if order == (1, 0) else sp.csc_matrix + return sp_class((data, indices, indptr), shape=self.shape) + + raise ValueError("Invalid format. Tensor should be a COO, CSR or CSC.") + def random(shape, density=0.01, random_state=None): args = [*shape, density] @@ -430,9 +471,13 @@ def asarray(obj, /, *, dtype=None, format=None): if format == "coo": storage = Storage(SparseCOO(tensor.ndim, Element(tensor.fill_value)), order) elif format == "csr": - storage = Storage(Dense(SparseList(Element(tensor.fill_value))), order) + if order != (1, 0): + raise ValueError("Invalid order for csr") + storage = Storage(Dense(SparseList(Element(tensor.fill_value))), (2, 1)) elif format == "csc": - storage = Storage(Dense(SparseList(Element(tensor.fill_value))), order) + if order != (0, 1): + raise ValueError("Invalid order for csc") + storage = Storage(Dense(SparseList(Element(tensor.fill_value))), (1, 2)) elif format == "csf": storage = Element(tensor.fill_value) for _ in range(tensor.ndim - 1): diff --git a/tests/test_scipy_constructors.py b/tests/test_scipy_constructors.py index e7d76f9..be0a959 100644 --- a/tests/test_scipy_constructors.py +++ b/tests/test_scipy_constructors.py @@ -33,3 +33,36 @@ def test_scipy_compressed2d(arr2d, cls): assert_equal(finch_arr.todense(), sp_arr.todense()) new_arr = finch.permute_dims(finch_arr, (1, 0)) assert_equal(new_arr.todense(), sp_arr.todense().transpose()) + + +@pytest.mark.parametrize( + "format_with_cls_with_order", [ + ("coo", sp.coo_matrix, "C"), + ("coo", sp.coo_matrix, "F"), + ("csc", sp.csc_matrix, "F"), + ("csr", sp.csr_matrix, "C"), + ] +) +def test_to_scipy_sparse(format_with_cls_with_order): + format, sp_class, order = format_with_cls_with_order + np_arr = np.random.default_rng(0).random((4, 5)) + np_arr = np.array(np_arr, order=order) + + finch_arr = finch.asarray(np_arr, format=format) + + actual = finch_arr.to_scipy_sparse() + + assert isinstance(actual, sp_class) + assert_equal(actual.todense(), np_arr) + + +def test_to_scipy_sparse_invalid_input(): + finch_arr = finch.asarray(np.ones((3,3,3)), format="dense") + + with pytest.raises(ValueError, match="Can only convert a 2-dimensional array"): + finch_arr.to_scipy_sparse() + + finch_arr = finch.asarray(np.ones((3,4)), format="dense") + + with pytest.raises(ValueError, match="Invalid format. Tensor should be a COO, CSR or CSC."): + finch_arr.to_scipy_sparse() diff --git a/tests/test_sparse.py b/tests/test_sparse.py index 0a40b92..cfbfa7a 100644 --- a/tests/test_sparse.py +++ b/tests/test_sparse.py @@ -153,14 +153,20 @@ def test_asarray(arr2d, arr3d, order, format): arr = np.array(arr, order=order) arr_finch = finch.Tensor(arr) - result = finch.asarray(arr_finch, format=format) - - assert_equal(result.todense(), arr) + if (format, order) in [("csr", "F"), ("csc", "C")]: + with pytest.raises(ValueError, match="Invalid order for (csr|csc)"): + finch.asarray(arr_finch, format=format) + else: + result = finch.asarray(arr_finch, format=format) + assert_equal(result.todense(), arr) -@pytest.mark.parametrize("order", ["C", "F"]) -@pytest.mark.parametrize("format", [None, "coo", "csc"]) -def test_where(order, format): +@pytest.mark.parametrize( + "order_and_format", + [("C", None), ("F", None), ("C", "coo"), ("F", "coo"),("F", "csc")], +) +def test_where(order_and_format): + order, format = order_and_format cond = np.array( [[True, False, False, False], [False, True, True, False], @@ -187,6 +193,8 @@ def test_where(order, format): ) def test_nonzero(order, format_shape): format, shape = format_shape + if (format, order) in [("csr", "F"), ("csc", "C")]: + pytest.skip("invalid format+order") rng = np.random.default_rng(0) arr = rng.random(shape) arr = np.array(arr, order=order) From 228c4733c4eab60924c1ca1236754712be65f4d1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Mateusz=20Sok=C3=B3=C5=82?= Date: Mon, 15 Apr 2024 18:23:39 +0200 Subject: [PATCH 2/2] Apply review comments --- src/finch/levels.py | 13 +++++++++++++ src/finch/tensor.py | 23 ++++++++++++++++++++--- tests/test_scipy_constructors.py | 20 +++++++++++++++++++- 3 files changed, 52 insertions(+), 4 deletions(-) diff --git a/src/finch/levels.py b/src/finch/levels.py index c0e7e80..ff6eb35 100644 --- a/src/finch/levels.py +++ b/src/finch/levels.py @@ -73,6 +73,19 @@ def __init__(self, ndim, lvl): self._obj = jl.SparseHash[ndim](lvl._obj) +sparse_formats_names = ( + "SparseList", + "Sparse", + "SparseHash", + "SparseCOO", + "SparseRLE", + "SparseVBL", + "SparseBand", + "SparsePoint", + "SparseInterval", +) + + # STORAGE class Storage: diff --git a/src/finch/tensor.py b/src/finch/tensor.py index 3312b11..056096c 100644 --- a/src/finch/tensor.py +++ b/src/finch/tensor.py @@ -5,7 +5,16 @@ from .dtypes import bool as finch_bool from .julia import jl -from .levels import _Display, Dense, Element, Storage, DenseStorage, SparseCOO, SparseList +from .levels import ( + _Display, + Dense, + Element, + Storage, + DenseStorage, + SparseCOO, + SparseList, + sparse_formats_names, +) from .typing import OrderType, JuliaObj, spmatrix, TupleOf3Arrays, DType @@ -314,6 +323,8 @@ def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj: def from_scipy_sparse(cls, x) -> "Tensor": if not _is_scipy_sparse_obj(x): raise ValueError("{x} is not a SciPy sparse object.") + if x.format not in ("coo", "csr", "csc"): + x = x.asformat("coo") return Tensor(x) @classmethod @@ -445,8 +456,14 @@ def to_scipy_sparse(self): indptr = np.asarray(body.lvl.lvl.ptr) - 1 sp_class = sp.csr_matrix if order == (1, 0) else sp.csc_matrix return sp_class((data, indices, indptr), shape=self.shape) - - raise ValueError("Invalid format. Tensor should be a COO, CSR or CSC.") + if ( + jl.typeof(body.lvl).name.name in sparse_formats_names or + jl.typeof(body.lvl.lvl).name.name in sparse_formats_names + ): + storage = Storage(SparseCOO(self.ndim, Element(self.fill_value)), order) + return self.to_device(storage).to_scipy_sparse() + else: + raise ValueError("Tensor can't be converted to scipy.sparse object.") def random(shape, density=0.01, random_state=None): diff --git a/tests/test_scipy_constructors.py b/tests/test_scipy_constructors.py index be0a959..5a08605 100644 --- a/tests/test_scipy_constructors.py +++ b/tests/test_scipy_constructors.py @@ -64,5 +64,23 @@ def test_to_scipy_sparse_invalid_input(): finch_arr = finch.asarray(np.ones((3,4)), format="dense") - with pytest.raises(ValueError, match="Invalid format. Tensor should be a COO, CSR or CSC."): + with pytest.raises(ValueError, match="Tensor can't be converted to scipy.sparse object"): finch_arr.to_scipy_sparse() + + +@pytest.mark.parametrize( + "format_with_pattern", + [ + ("coo", "SparseCOO"), + ("csr", "SparseList"), + ("csc", "SparseList"), + ("bsr", "SparseCOO"), + ("dok", "SparseCOO") + ], +) +def test_from_scipy_sparse(format_with_pattern): + format, pattern = format_with_pattern + sp_arr = sp.random(10, 5, density=0.1, format=format) + + result = finch.Tensor.from_scipy_sparse(sp_arr) + assert pattern in str(result)