Skip to content

Commit

Permalink
Merge pull request #31 from willow-ahrens/to_scipy_sparse
Browse files Browse the repository at this point in the history
API: Add `Tensor.to_scipy_sparse`
  • Loading branch information
willow-ahrens committed Apr 16, 2024
2 parents ee02fde + 228c473 commit f281b7b
Show file tree
Hide file tree
Showing 7 changed files with 148 additions and 12 deletions.
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "finch-tensor"
version = "0.1.9"
version = "0.1.10"
description = ""
authors = ["Willow Ahrens <willow.marie.ahrens@gmail.com>"]
readme = "README.md"
Expand Down
2 changes: 2 additions & 0 deletions src/finch/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
)
from .tensor import (
Tensor,
SparseArray,
asarray,
astype,
random,
Expand Down Expand Up @@ -72,6 +73,7 @@

__all__ = [
"Tensor",
"SparseArray",
"Dense",
"Element",
"Pattern",
Expand Down
2 changes: 1 addition & 1 deletion src/finch/julia.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import juliapkg

_FINCH_VERSION = "0.6.20"
_FINCH_VERSION = "0.6.21"
_FINCH_HASH = "9177782c-1635-4eb9-9bfb-d9dfa25e6bce"

deps = juliapkg.deps.load_cur_deps()
Expand Down
13 changes: 13 additions & 0 deletions src/finch/levels.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,19 @@ def __init__(self, ndim, lvl):
self._obj = jl.SparseHash[ndim](lvl._obj)


sparse_formats_names = (
"SparseList",
"Sparse",
"SparseHash",
"SparseCOO",
"SparseRLE",
"SparseVBL",
"SparseBand",
"SparsePoint",
"SparseInterval",
)


# STORAGE

class Storage:
Expand Down
70 changes: 66 additions & 4 deletions src/finch/tensor.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,26 @@

from .dtypes import bool as finch_bool
from .julia import jl
from .levels import _Display, Dense, Element, Storage, DenseStorage, SparseCOO, SparseList
from .levels import (
_Display,
Dense,
Element,
Storage,
DenseStorage,
SparseCOO,
SparseList,
sparse_formats_names,
)
from .typing import OrderType, JuliaObj, spmatrix, TupleOf3Arrays, DType


class Tensor(_Display):
class SparseArray:
"""
PyData/Sparse marker class
"""


class Tensor(_Display, SparseArray):
"""
A wrapper class for Finch.Tensor and Finch.SwizzleArray.
Expand Down Expand Up @@ -304,6 +319,14 @@ def _from_numpy(cls, arr: np.ndarray, fill_value: np.number) -> JuliaObj:
lvl = Dense(lvl, arr.shape[i])
return jl.swizzle(jl.Tensor(lvl._obj), *order)

@classmethod
def from_scipy_sparse(cls, x) -> "Tensor":
if not _is_scipy_sparse_obj(x):
raise ValueError("{x} is not a SciPy sparse object.")
if x.format not in ("coo", "csr", "csc"):
x = x.asformat("coo")
return Tensor(x)

@classmethod
def _from_scipy_sparse(cls, x) -> JuliaObj:
if x.format == "coo":
Expand Down Expand Up @@ -407,6 +430,41 @@ def construct_csf_jl_object(
def construct_csf(cls, arg: TupleOf3Arrays, shape: tuple[int, ...]) -> "Tensor":
return Tensor(cls.construct_csf_jl_object(arg, shape))

def to_scipy_sparse(self):
import scipy.sparse as sp

if self.ndim != 2:
raise ValueError("Can only convert a 2-dimensional array to a Scipy sparse matrix.")
if self.fill_value != 0:
raise ValueError("Can only convert arrays with 0 fill value to a Scipy sparse matrix.")
order = self.get_order()
body = self._obj.body

if str(jl.typeof(body.lvl).name.name) == "SparseCOOLevel":
data = np.asarray(body.lvl.lvl.val)
coords = body.lvl.tbl
row, col = coords[::-1] if order == (1, 0) else coords
row, col = np.asarray(row) - 1, np.asarray(col) - 1
return sp.coo_matrix((data, (row, col)), shape=self.shape)

if (
str(jl.typeof(body.lvl).name.name) == "DenseLevel" and
str(jl.typeof(body.lvl.lvl).name.name) == "SparseListLevel"
):
data = np.asarray(body.lvl.lvl.lvl.val)
indices = np.asarray(body.lvl.lvl.idx) - 1
indptr = np.asarray(body.lvl.lvl.ptr) - 1
sp_class = sp.csr_matrix if order == (1, 0) else sp.csc_matrix
return sp_class((data, indices, indptr), shape=self.shape)
if (
jl.typeof(body.lvl).name.name in sparse_formats_names or
jl.typeof(body.lvl.lvl).name.name in sparse_formats_names
):
storage = Storage(SparseCOO(self.ndim, Element(self.fill_value)), order)
return self.to_device(storage).to_scipy_sparse()
else:
raise ValueError("Tensor can't be converted to scipy.sparse object.")


def random(shape, density=0.01, random_state=None):
args = [*shape, density]
Expand All @@ -430,9 +488,13 @@ def asarray(obj, /, *, dtype=None, format=None):
if format == "coo":
storage = Storage(SparseCOO(tensor.ndim, Element(tensor.fill_value)), order)
elif format == "csr":
storage = Storage(Dense(SparseList(Element(tensor.fill_value))), order)
if order != (1, 0):
raise ValueError("Invalid order for csr")
storage = Storage(Dense(SparseList(Element(tensor.fill_value))), (2, 1))
elif format == "csc":
storage = Storage(Dense(SparseList(Element(tensor.fill_value))), order)
if order != (0, 1):
raise ValueError("Invalid order for csc")
storage = Storage(Dense(SparseList(Element(tensor.fill_value))), (1, 2))
elif format == "csf":
storage = Element(tensor.fill_value)
for _ in range(tensor.ndim - 1):
Expand Down
51 changes: 51 additions & 0 deletions tests/test_scipy_constructors.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,3 +33,54 @@ def test_scipy_compressed2d(arr2d, cls):
assert_equal(finch_arr.todense(), sp_arr.todense())
new_arr = finch.permute_dims(finch_arr, (1, 0))
assert_equal(new_arr.todense(), sp_arr.todense().transpose())


@pytest.mark.parametrize(
"format_with_cls_with_order", [
("coo", sp.coo_matrix, "C"),
("coo", sp.coo_matrix, "F"),
("csc", sp.csc_matrix, "F"),
("csr", sp.csr_matrix, "C"),
]
)
def test_to_scipy_sparse(format_with_cls_with_order):
format, sp_class, order = format_with_cls_with_order
np_arr = np.random.default_rng(0).random((4, 5))
np_arr = np.array(np_arr, order=order)

finch_arr = finch.asarray(np_arr, format=format)

actual = finch_arr.to_scipy_sparse()

assert isinstance(actual, sp_class)
assert_equal(actual.todense(), np_arr)


def test_to_scipy_sparse_invalid_input():
finch_arr = finch.asarray(np.ones((3,3,3)), format="dense")

with pytest.raises(ValueError, match="Can only convert a 2-dimensional array"):
finch_arr.to_scipy_sparse()

finch_arr = finch.asarray(np.ones((3,4)), format="dense")

with pytest.raises(ValueError, match="Tensor can't be converted to scipy.sparse object"):
finch_arr.to_scipy_sparse()


@pytest.mark.parametrize(
"format_with_pattern",
[
("coo", "SparseCOO"),
("csr", "SparseList"),
("csc", "SparseList"),
("bsr", "SparseCOO"),
("dok", "SparseCOO")
],
)
def test_from_scipy_sparse(format_with_pattern):
format, pattern = format_with_pattern
sp_arr = sp.random(10, 5, density=0.1, format=format)

result = finch.Tensor.from_scipy_sparse(sp_arr)
assert pattern in str(result)
20 changes: 14 additions & 6 deletions tests/test_sparse.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,14 +153,20 @@ def test_asarray(arr2d, arr3d, order, format):
arr = np.array(arr, order=order)
arr_finch = finch.Tensor(arr)

result = finch.asarray(arr_finch, format=format)

assert_equal(result.todense(), arr)
if (format, order) in [("csr", "F"), ("csc", "C")]:
with pytest.raises(ValueError, match="Invalid order for (csr|csc)"):
finch.asarray(arr_finch, format=format)
else:
result = finch.asarray(arr_finch, format=format)
assert_equal(result.todense(), arr)


@pytest.mark.parametrize("order", ["C", "F"])
@pytest.mark.parametrize("format", [None, "coo", "csc"])
def test_where(order, format):
@pytest.mark.parametrize(
"order_and_format",
[("C", None), ("F", None), ("C", "coo"), ("F", "coo"),("F", "csc")],
)
def test_where(order_and_format):
order, format = order_and_format
cond = np.array(
[[True, False, False, False],
[False, True, True, False],
Expand All @@ -187,6 +193,8 @@ def test_where(order, format):
)
def test_nonzero(order, format_shape):
format, shape = format_shape
if (format, order) in [("csr", "F"), ("csc", "C")]:
pytest.skip("invalid format+order")
rng = np.random.default_rng(0)
arr = rng.random(shape)
arr = np.array(arr, order=order)
Expand Down

0 comments on commit f281b7b

Please sign in to comment.