Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

wip: add units implementation #2545

Closed
wants to merge 4 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion src/awkward/_broadcasting.py
Original file line number Diff line number Diff line change
Expand Up @@ -411,7 +411,7 @@ def apply_step(
# This whole function is one big switch statement.
def broadcast_any_record():
if not options["allow_records"]:
raise ValueError(f"cannot broadcast records {in_function(options)}")
raise TypeError(f"cannot broadcast records{in_function(options)}")

fields, length, istuple = UNSET, UNSET, UNSET
nextparameters = []
Expand Down
108 changes: 91 additions & 17 deletions src/awkward/_connect/numpy.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
from awkward._typing import Any, Iterator, Mapping
from awkward._util import Sentinel
from awkward.contents.numpyarray import NumpyArray
from awkward.units import get_unit_registry

# NumPy 1.13.1 introduced NEP13, without which Awkward ufuncs won't work, which
# would be worse than lacking a feature: it would cause unexpected output.
Expand Down Expand Up @@ -274,6 +275,52 @@ def _array_ufunc_categorical(
return tuple(ak.to_layout(x, allow_other=True) for x in out)


def _array_ufunc_custom_units(ufunc, inputs, kwargs, behavior):
registry = get_unit_registry()
if registry is None:
return None

# Check if we have units
for x in inputs:
if isinstance(x, ak.contents.Content) and x.parameter("__units__"):
break
elif isinstance(x, registry.Quantity):
break
# Exit now, if not!
else:
return None

# Wrap underlying data buffers with `pint.Quantity`
nextinputs = []
for x in inputs:
if isinstance(x, ak.contents.Content):
assert isinstance(x, ak.contents.NumpyArray)
nextinputs.append(
registry.Quantity(
x.data,
x.parameter("__units__"),
)
)
else:
nextinputs.append(x)

# Apply ufunc to wrapped NEP-13 aware arrays
out = ufunc(*nextinputs, **kwargs)
if not isinstance(out, tuple):
out = (out,)

# Rebuild `NumpyArray` with correct units of result
nextout = []
for qty in out:
assert isinstance(qty, registry.Quantity)
nextout.append(
ak.contents.NumpyArray(
qty.magnitude, parameters={"__units__": str(qty.units)}
)
)
return tuple(nextout)


def _array_ufunc_string_likes(
ufunc, method: str, inputs, kwargs: dict[str, Any], behavior: Mapping | None
):
Expand Down Expand Up @@ -334,14 +381,38 @@ def array_ufunc(ufunc, method: str, inputs, kwargs: dict[str, Any]):
inputs = _array_ufunc_custom_cast(inputs, behavior, backend)

def action(inputs, **ignore):
contents = [x for x in inputs if isinstance(x, ak.contents.Content)]
is_at_leaves = all(
isinstance(x, NumpyArray) or not isinstance(x, ak.contents.Content)
for x in inputs
)
if is_at_leaves:
# Do we have any units in the mix? If so, delegate to `pint` to perform
# the ufunc dispatch. This will re-enter `array_ufunc`, but without units
# NOTE: there's nothing preventing us from handling units for non-NumpyArray
# contents, but for now we restrict ourselves to NumpyArray (in the
# NumpyArray constructor). By running _before_ the custom machinery,
# custom user ufuncs can avoid needing to worry about units
out = _array_ufunc_custom_units(ufunc, inputs, kwargs, behavior)
if out is not None:
return out

signature = _array_ufunc_signature(ufunc, inputs)
# Do we have a custom ufunc (an override of the given ufunc)?
# Do we have a custom (specific) ufunc (an override of the given ufunc)?
custom = find_ufunc(behavior, signature)
if custom is not None:
return _array_ufunc_adjust(custom, inputs, kwargs, behavior)

# Do we have a custom generic ufunc override (a function that accepts _all_ ufuncs)?
contents = [x for x in inputs if isinstance(x, ak.contents.Content)]
for x in contents:
apply_ufunc = find_ufunc_generic(ufunc, x, behavior)
if apply_ufunc is not None:
out = _array_ufunc_adjust_apply(
apply_ufunc, ufunc, method, inputs, kwargs, behavior
)
if out is not None:
return out

# Do we have any categoricals?
if any(
x.is_indexed and x.parameter("__array__") == "categorical" for x in contents
Expand Down Expand Up @@ -376,20 +447,7 @@ def action(inputs, **ignore):
"matrix multiplication (`@` or `np.matmul`) is not yet implemented for Awkward Arrays"
)

# Do we have a custom generic ufunc override (a function that accepts _all_ ufuncs)?
for x in contents:
apply_ufunc = find_ufunc_generic(ufunc, x, behavior)
if apply_ufunc is not None:
out = _array_ufunc_adjust_apply(
apply_ufunc, ufunc, method, inputs, kwargs, behavior
)
if out is not None:
return out

if all(
isinstance(x, NumpyArray) or not isinstance(x, ak.contents.Content)
for x in inputs
):
if is_at_leaves:
nplike = backend.nplike

# Broadcast parameters against one another
Expand All @@ -412,8 +470,24 @@ def action(inputs, **ignore):

return (NumpyArray(result, backend=backend, parameters=parameters),)

# If we aren't at the leaves, and we have any records, broadcasting will subsequently fail
# Let's present a nicer error message
elif all(x.is_record for x in inputs if isinstance(x, ak.contents.Content)):
error_message = []
for x, y in zip(inputs, signature[1:]):
if y is None:
error_message.append(
f"<unnamed-{'record' if x.is_record else 'array'}>"
)
else:
error_message.append(str(y))
raise TypeError(
"no {}.{} overloads for signature: {}".format(
type(ufunc).__module__, ufunc.__name__, ", ".join(error_message)
)
)
# Do we have exclusively nominal types without custom overloads?
if all(
elif all(
x.parameter("__list__") is not None or x.parameter("__record__") is not None
for x in contents
):
Expand Down
59 changes: 33 additions & 26 deletions src/awkward/contents/content.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,36 +101,43 @@ def _init(self, parameters: dict[str, Any] | None, backend: Backend):
type(self).__name__, repr(parameters)
)
)
# Validate built-in `__array__`
elif parameters.get("__array__") is not None:
array_name = parameters["__array__"]
if not self.is_list and array_name in (
"string",
"bytestring",
):
raise TypeError(
'{} is not allowed to have parameters["__array__"] = "{}"'.format(
type(self).__name__, parameters["__array__"]
else:
# Validate built-in `__array__`
if parameters.get("__array__") is not None:
array_name = parameters["__array__"]
if not self.is_list and array_name in (
"string",
"bytestring",
):
raise TypeError(
'{} is not allowed to have parameters["__array__"] = "{}"'.format(
type(self).__name__, parameters["__array__"]
)
)
)
if not isinstance(self, ak.contents.NumpyArray) and parameters.get(
"__array__"
) in ("char", "byte"):
raise TypeError(
'{} is not allowed to have parameters["__array__"] = "{}"'.format(
type(self).__name__, parameters["__array__"]
if not isinstance(self, ak.contents.NumpyArray) and parameters.get(
"__array__"
) in ("char", "byte"):
raise TypeError(
'{} is not allowed to have parameters["__array__"] = "{}"'.format(
type(self).__name__, parameters["__array__"]
)
)
)
if not self.is_indexed and array_name == "categorical":
raise TypeError(
'{} is not allowed to have parameters["__array__"] = "{}"'.format(
type(self).__name__, parameters["__array__"]
if not self.is_indexed and array_name == "categorical":
raise TypeError(
'{} is not allowed to have parameters["__array__"] = "{}"'.format(
type(self).__name__, parameters["__array__"]
)
)
)
if not self.is_record and array_name == "sorted_map":
if not self.is_record and array_name == "sorted_map":
raise TypeError(
'{} is not allowed to have parameters["__array__"] = "{}"'.format(
type(self).__name__, parameters["__array__"]
)
)
if not self.is_numpy and parameters.get("__units__") is not None:
raise TypeError(
'{} is not allowed to have parameters["__array__"] = "{}"'.format(
type(self).__name__, parameters["__array__"]
'{} is not allowed to have parameters["__units__"] != None'.format(
type(self).__name__,
)
)
# TODO: enable this once we can guarantee this doesn't happen during broadcasting
Expand Down
16 changes: 12 additions & 4 deletions src/awkward/contents/recordarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -900,11 +900,19 @@ def _reduce_next(
):
reducer_recordclass = find_record_reducer(reducer, self, behavior)
if reducer_recordclass is None:
raise TypeError(
"no ak.{} overloads for custom types: {}".format(
reducer.name, ", ".join(self.fields)
nominal_type = self.parameter("__record__")
if nominal_type is None:
raise TypeError(
"no ak.{} overloads for unnamed record type: {}".format(
reducer.name, str(self.form.type)
)
)
else:
raise TypeError(
"no ak.{} overloads for record named {!r}".format(
reducer.name, nominal_type
)
)
)
else:
# Positional reducers ultimately need to do more work when rebuilding the result
# so asking for a mask doesn't help us!
Expand Down
2 changes: 2 additions & 0 deletions src/awkward/operations/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,7 @@
from awkward.operations.ak_from_json import *
from awkward.operations.ak_from_numpy import *
from awkward.operations.ak_from_parquet import *
from awkward.operations.ak_from_pint import *
from awkward.operations.ak_from_rdataframe import *
from awkward.operations.ak_from_regular import *
from awkward.operations.ak_full_like import *
Expand Down Expand Up @@ -86,6 +87,7 @@
from awkward.operations.ak_to_numpy import *
from awkward.operations.ak_to_packed import *
from awkward.operations.ak_to_parquet import *
from awkward.operations.ak_to_pint import *
from awkward.operations.ak_to_rdataframe import *
from awkward.operations.ak_to_regular import *
from awkward.operations.ak_transform import *
Expand Down
56 changes: 56 additions & 0 deletions src/awkward/operations/ak_from_pint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
__all__ = ("from_pint",)
from awkward._dispatch import high_level_function
from awkward._do import recursively_apply
from awkward._layout import from_arraylib, wrap_layout
from awkward.operations.ak_with_parameter import with_parameter


@high_level_function
def from_pint(
array, *, regulararray=False, recordarray=True, highlevel=True, behavior=None
):
"""
Args:
array (pint.Quantity): The Pint array to convert into an Awkward Array.
This Quantity can contain np.ma.MaskedArray.
regulararray (bool): If True and the array is multidimensional,
the dimensions are represented by nested #ak.contents.RegularArray
nodes; if False and the array is multidimensional, the dimensions
are represented by a multivalued #ak.contents.NumpyArray.shape.
If the array is one-dimensional, this has no effect.
recordarray (bool): If True and the wrapped array is a NumPy structured array
(dtype.names is not None), the fields are represented by an
#ak.contents.RecordArray; if False and the array is a structured
array, the structure is left in the #ak.contents.NumpyArray `format`,
which some functions do not recognize.
highlevel (bool): If True, return an #ak.Array; otherwise, return
a low-level #ak.contents.Content subclass.
behavior (None or dict): Custom #ak.behavior for the output array, if
high-level.

Converts a Pint.Quantity array into an Awkward Array.

The resulting layout can only involve the following #ak.contents.Content types:

* #ak.contents.NumpyArray
* #ak.contents.ByteMaskedArray or #ak.contents.UnmaskedArray if the
`array` is an np.ma.MaskedArray.
* #ak.contents.RegularArray if `regulararray=True`.
* #ak.contents.RecordArray if `recordarray=True`.

See also #ak.to_numpy and #ak.from_cupy.
"""
layout = from_arraylib(array.magnitude, regulararray, recordarray)
units = str(array.units)

def apply(layout, **kwargs):
if layout.is_numpy:
return with_parameter(layout, "__units__", units, highlevel=False)

out = recursively_apply(layout, apply, behavior=behavior)
return wrap_layout(
out,
highlevel=highlevel,
behavior=behavior,
)
57 changes: 57 additions & 0 deletions src/awkward/operations/ak_to_pint.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,57 @@
# BSD 3-Clause License; see https://github.com/scikit-hep/awkward-1.0/blob/main/LICENSE
__all__ = ("to_pint",)
import awkward as ak
from awkward._dispatch import high_level_function
from awkward.units import get_unit_registry


@high_level_function
def to_pint(array, *, allow_missing=True):
"""
Args:
array: Array-like data (anything #ak.to_layout recognizes).
allow_missing (bool): allow missing (None) values.

Converts `array` (many types supported, including all Awkward Arrays and
Records) into a rectilinear Pint.Quantity, if possible.

If the data are numerical and regular (nested lists have equal lengths
in each dimension, as described by the #ak.Array.type), they can be losslessly
converted to a Pint.Quantity array and this function returns without an error.

Otherwise, the function raises an error. It does not create a NumPy
array with dtype `"O"` for `np.object_` (see the
[note on object_ type](https://docs.scipy.org/doc/numpy/reference/arrays.scalars.html#arrays-scalars-built-in))
since silent conversions to dtype `"O"` arrays would not only be a
significant performance hit, but would also break functionality, since
nested lists in a NumPy `"O"` array are severed from the array and
cannot be sliced as dimensions.

If `array` is not an Awkward Array, then this function is equivalent
to calling `pint.Quantity` on it.

If `allow_missing` is True; NumPy
[masked arrays](https://docs.scipy.org/doc/numpy/reference/maskedarray.html)
are a possible result; otherwise, missing values (None) cause this
function to raise an error.

See also #ak.from_numpy and #ak.to_cupy.
"""
# Dispatch
yield (array,)

# Implementation
return _impl(array, allow_missing)


def _impl(array, allow_missing):
import numpy # noqa: TID251

with numpy.errstate(invalid="ignore"):
layout: ak.contents.Content = ak.to_layout(array, allow_record=False)

content = layout.to_backend_array(allow_missing)
unit = layout.purelist_parameter("__units__")

registry = get_unit_registry()
return registry.Quantity(content, unit)
Loading
Loading