Skip to content

Commit

Permalink
Add logic to recognize non-standard keys in DataCoordinate.
Browse files Browse the repository at this point in the history
This reimplements some of the special handling of non-standard keys
from Butler._findDatasetRef in the hopes of being able to move it all
down to Registry (and thus work on many more interfaces).  But it's
just a start at that; I realized while trying to make
Butler._findDatasetRef use the new code that we really need to make
queryDatasets work on CALIBRATION collections first.  But I think what
I've done so far will still be useful eventually, so I'm keeping it.
  • Loading branch information
TallJimbo authored and timj committed Jan 12, 2023
1 parent 8ecd2fa commit ac6121c
Showing 1 changed file with 113 additions and 9 deletions.
122 changes: 113 additions & 9 deletions python/lsst/daf/butler/core/dimensions/_coordinate.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@
)

from abc import abstractmethod
import logging
import numbers
from typing import (
AbstractSet,
Expand Down Expand Up @@ -63,6 +64,8 @@
from .._containers import HeterogeneousDimensionRecordAbstractSet
from ...registry import Registry

log = logging.getLogger(__name__)

DataIdKey = Union[str, Dimension]
"""Type annotation alias for the keys that can be used to index a
DataCoordinate.
Expand Down Expand Up @@ -164,6 +167,9 @@ def standardize(
universe: Optional[DimensionUniverse] = None,
defaults: Optional[DataCoordinate] = None,
records: Optional[HeterogeneousDimensionRecordAbstractSet] = None,
unused_dimensions: Optional[Dict[str, DataIdValue]] = None,
unused_constraints: Optional[NameLookupMapping[DimensionElement, Dict[str, Any]]] = None,
check_types: Optional[bool] = None,
**kwargs: Any
) -> DataCoordinate:
"""Standardize the supplied dataId.
Expand Down Expand Up @@ -192,6 +198,44 @@ def standardize(
Container of `DimensionRecord` instances that may be used to
fill in missing keys and/or attach records. If provided, the
returned object is guaranteed to have `hasRecords` return `True`.
unused_dimensions : `dict`, optional
A mapping that will be populated with any given key-value pairs
that identify unrelated dimensions or implied dimensions that could
not be returned (i.e. when some implied dimensions are missing, and
hence `hasFull` cannot be `True` on the returned object).
unused_constraints : `dict`, optional
A mapping that will be populated with any given key-value pairs
that cannot be included in the returned `DataCoordinate`. These
fall into trhee categories:
- Fully-qualified constraints on `DimensionRecord` fields (e.g.
``exposure.day_obs=20250101``).
- Unqualified constraints on `DimensionRecord` fields (e.g.
``day_obs=20250101``). These are expanded to constraints on
all matching elements in the dimension universe.
- Values for dimensions that have the wrong type for the primary
key, but can be transformed into constraints on an alternate key
(e.g. ``detector='S11R11'`` ->
``detector.full_name='S11R11'``). These are only considered if
``check_types`` is `True`.
If this argument is not `None`, it should be a mapping that
supports lookups for all `DimensionElement` names in the universe,
returning a `dict` to be populated with ``field: value``
constraints. ``defaultdict(dict)`` is a convenient way to
construct such a mapping.
If this argument is `None` (the default), it is assumed that
calling code cannot handle make use of attribute constraints, and
`ValueError` will be raised if they appear. Key-value pairs for
dimensions that are not part of the graph are silently ignored,
as are those for implied dimensions when some implied dimensions
are missing.
check_types : `bool`, optional
If `True` (default) check value types against the expected types
for that key, and transform ``numpy`` integer types to `int`.
**kwargs
Additional keyword arguments are treated like additional key-value
pairs in ``mapping``.
Expand All @@ -204,7 +248,9 @@ def standardize(
Raises
------
TypeError
Raised if the set of optional arguments provided is not supported.
Raised if the set of optional arguments provided is not supported,
or if a value has the wrong type and cannot be transformed to an
alternate constraint.
KeyError
Raised if a key-value pair for a required dimension is missing.
"""
Expand Down Expand Up @@ -234,15 +280,75 @@ def standardize(
universe = defaults.universe
else:
raise TypeError("universe must be provided if graph and defaults are not.")
if not (d.keys() <= universe.getStaticDimensions().names):
non_dimension_keys = set(d.keys() - universe.getStaticDimensions().names)
if non_dimension_keys and unused_constraints is not None:
for key in non_dimension_keys:
element_name, sep, attr_name = key.partition(".")
if sep:
try:
element = universe[element_name]
if attr_name in element.RecordClass.fields.names:
unused_constraints[element_name][attr_name] = d.pop(key)
except LookupError:
# If this doesn't work, we just leave this key in
# non_dimension_keys, and later exception-raising code
# will take care of it.
pass
else:
# This isn't a dimension name, and it isn't something like
# 'element.attribute'; maybe it's an element attribute
# where we have to infer the element(s).
value = d[key]
for element in universe.getStaticElements():
if key in element.RecordClass.fields.names:
unused_constraints[element.name][key] = value
log.debug("Creating constraint %s.%s=%s from data ID key %s.",
element.name, key, value, key)
d.pop(key, None) # drop from dict the first time we use it.
# Drop keys that we put into `unused` and dropped from `d`.
non_dimension_keys.intersection_update(d.keys())
if non_dimension_keys:
# We still have some keys we don't recognize.
# We silently ignore keys that aren't relevant for this particular
# data ID, but keys that aren't relevant for any possible data ID
# are a bug that we want to report to the user. This is especially
# important because other code frequently forwards unrecognized
# kwargs here.
raise ValueError(
f"Unrecognized key(s) for data ID: {d.keys() - universe.getStaticDimensions().names}."
f"Unrecognized key(s) for data ID: {non_dimension_keys}. "
"Note that non-dimension column constraints may only appear "
"without the table/dimension name if it can be inferred from "
"the set of dimensions to be constrained."
)
if check_types:
for key, value in list(d.items()): # copy so we can remove in loop
if isinstance(value, numbers.Integral): # type: ignore
d[key] = value # type: ignore
dimension = universe.getStaticDimensions()[key]
if not isinstance(value, dimension.primaryKey.getPythonType()):
if unused_constraints is not None:
for alternate in dimension.alternateKeys:
if isinstance(value, alternate.getPythonType()):
unused_constraints[key][alternate.name] = value
del d[key]
log.debug("Converting dimension %s to %s.%s=%s",
key, key, alternate.name, value)
break
else:
expected = [str(dimension.primaryKey.getPythonType())]
expected.extend(
f"{alternate.getPythonType()} ({alternate.name})"
for alternate in dimension.alternateKeys
)
raise TypeError(
f"Wrong type for {key}={value}; expected one of "
f"{expected}, got {type(value)}."
)
else:
raise TypeError(
f"Wrong type for {key}={value}; expected "
f"{dimension.primaryKey.getPythonType()}, got {type(value)}."
)
if graph is None:
graph = DimensionGraph(universe, names=d.keys())
if not graph.dimensions:
Expand Down Expand Up @@ -295,16 +401,14 @@ def standardize(
d.setdefault(dimension.name, None)
r.setdefault(dimension.name, None)
if d.keys() >= graph.dimensions.names:
values = tuple(d[name] for name in graph._dataCoordinateIndices.keys())
values = tuple(d.pop(name) for name in graph._dataCoordinateIndices.keys())
else:
try:
values = tuple(d[name] for name in graph.required.names)
values = tuple(d.pop(name) for name in graph.required.names)
except KeyError as err:
raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err
# Some backends cannot handle numpy.int64 type which is a subclass of
# numbers.Integral; convert that to int.
values = tuple(int(val) if isinstance(val, numbers.Integral) # type: ignore
else val for val in values)
if unused_dimensions is not None:
unused_dimensions.update(d)
result: DataCoordinate = _BasicTupleDataCoordinate(graph, values)
if r.keys() >= graph.elements.names:
result = result.expanded(r)
Expand Down

0 comments on commit ac6121c

Please sign in to comment.