Add logic to recognize non-standard keys in DataCoordinate.

This reimplements some of the special handling of non-standard keys from Butler._findDatasetRef in the hopes of being able to move it all down to Registry (and thus work on many more interfaces). But it's just a start at that; I realized while trying to make Butler._findDatasetRef use the new code that we really need to make queryDatasets work on CALIBRATION collections first. But I think what I've done so far will still be useful eventually, so I'm keeping it.
lsst · Jan 12, 2023 · ac6121c · ac6121c
1 parent 8ecd2fa
commit ac6121c
Showing 1 changed file with 113 additions and 9 deletions.
diff --git a/python/lsst/daf/butler/core/dimensions/_coordinate.py b/python/lsst/daf/butler/core/dimensions/_coordinate.py
@@ -36,6 +36,7 @@
 )
 
 from abc import abstractmethod
+import logging
 import numbers
 from typing import (
     AbstractSet,
@@ -63,6 +64,8 @@
     from .._containers import HeterogeneousDimensionRecordAbstractSet
     from ...registry import Registry
 
+log = logging.getLogger(__name__)
+
 DataIdKey = Union[str, Dimension]
 """Type annotation alias for the keys that can be used to index a
 DataCoordinate.
@@ -164,6 +167,9 @@ def standardize(
         universe: Optional[DimensionUniverse] = None,
         defaults: Optional[DataCoordinate] = None,
         records: Optional[HeterogeneousDimensionRecordAbstractSet] = None,
+        unused_dimensions: Optional[Dict[str, DataIdValue]] = None,
+        unused_constraints: Optional[NameLookupMapping[DimensionElement, Dict[str, Any]]] = None,
+        check_types: Optional[bool] = None,
         **kwargs: Any
     ) -> DataCoordinate:
         """Standardize the supplied dataId.
@@ -192,6 +198,44 @@ def standardize(
             Container of `DimensionRecord` instances that may be used to
             fill in missing keys and/or attach records.  If provided, the
             returned object is guaranteed to have `hasRecords` return `True`.
+        unused_dimensions : `dict`, optional
+            A mapping that will be populated with any given key-value pairs
+            that identify unrelated dimensions or implied dimensions that could
+            not be returned (i.e. when some implied dimensions are missing, and
+            hence `hasFull` cannot be `True` on the returned object).
+        unused_constraints : `dict`, optional
+            A mapping that will be populated with any given key-value pairs
+            that cannot be included in the returned `DataCoordinate`.  These
+            fall into trhee categories:
+
+            - Fully-qualified constraints on `DimensionRecord` fields (e.g.
+              ``exposure.day_obs=20250101``).
+
+            - Unqualified constraints on `DimensionRecord` fields (e.g.
+              ``day_obs=20250101``).  These are expanded to constraints on
+              all matching elements in the dimension universe.
+
+            - Values for dimensions that have the wrong type for the primary
+              key, but can be transformed into constraints on an alternate key
+              (e.g. ``detector='S11R11'`` ->
+              ``detector.full_name='S11R11'``).  These are only considered if
+              ``check_types`` is `True`.
+
+            If this argument is not `None`, it should be a mapping that
+            supports lookups for all `DimensionElement` names in the universe,
+            returning a `dict` to be populated with ``field: value``
+            constraints.  ``defaultdict(dict)`` is a convenient way to
+            construct such a mapping.
+
+            If this argument is `None` (the default), it is assumed that
+            calling code cannot handle make use of attribute constraints, and
+            `ValueError` will be raised if they appear.  Key-value pairs for
+            dimensions that are not part of the graph are silently ignored,
+            as are those for implied dimensions when some implied dimensions
+            are missing.
+        check_types : `bool`, optional
+            If `True` (default) check value types against the expected types
+            for that key, and transform ``numpy`` integer types to `int`.
         **kwargs
             Additional keyword arguments are treated like additional key-value
             pairs in ``mapping``.
@@ -204,7 +248,9 @@ def standardize(
         Raises
         ------
         TypeError
-            Raised if the set of optional arguments provided is not supported.
+            Raised if the set of optional arguments provided is not supported,
+            or if a value has the wrong type and cannot be transformed to an
+            alternate constraint.
         KeyError
             Raised if a key-value pair for a required dimension is missing.
         """
@@ -234,15 +280,75 @@ def standardize(
                 universe = defaults.universe
             else:
                 raise TypeError("universe must be provided if graph and defaults are not.")
-        if not (d.keys() <= universe.getStaticDimensions().names):
+        non_dimension_keys = set(d.keys() - universe.getStaticDimensions().names)
+        if non_dimension_keys and unused_constraints is not None:
+            for key in non_dimension_keys:
+                element_name, sep, attr_name = key.partition(".")
+                if sep:
+                    try:
+                        element = universe[element_name]
+                        if attr_name in element.RecordClass.fields.names:
+                            unused_constraints[element_name][attr_name] = d.pop(key)
+                    except LookupError:
+                        # If this doesn't work, we just leave this key in
+                        # non_dimension_keys, and later exception-raising code
+                        # will take care of it.
+                        pass
+                else:
+                    # This isn't a dimension name, and it isn't something like
+                    # 'element.attribute'; maybe it's an element attribute
+                    # where we have to infer the element(s).
+                    value = d[key]
+                    for element in universe.getStaticElements():
+                        if key in element.RecordClass.fields.names:
+                            unused_constraints[element.name][key] = value
+                            log.debug("Creating constraint %s.%s=%s from data ID key %s.",
+                                      element.name, key, value, key)
+                            d.pop(key, None)  # drop from dict the first time we use it.
+            # Drop keys that we put into `unused` and dropped from `d`.
+            non_dimension_keys.intersection_update(d.keys())
+        if non_dimension_keys:
+            # We still have some keys we don't recognize.
             # We silently ignore keys that aren't relevant for this particular
             # data ID, but keys that aren't relevant for any possible data ID
             # are a bug that we want to report to the user.  This is especially
             # important because other code frequently forwards unrecognized
             # kwargs here.
             raise ValueError(
-                f"Unrecognized key(s) for data ID: {d.keys() - universe.getStaticDimensions().names}."
+                f"Unrecognized key(s) for data ID: {non_dimension_keys}. "
+                "Note that non-dimension column constraints may only appear "
+                "without the table/dimension name if it can be inferred from "
+                "the set of dimensions to be constrained."
             )
+        if check_types:
+            for key, value in list(d.items()):  # copy so we can remove in loop
+                if isinstance(value, numbers.Integral):   # type: ignore
+                    d[key] = value     # type: ignore
+                dimension = universe.getStaticDimensions()[key]
+                if not isinstance(value, dimension.primaryKey.getPythonType()):
+                    if unused_constraints is not None:
+                        for alternate in dimension.alternateKeys:
+                            if isinstance(value, alternate.getPythonType()):
+                                unused_constraints[key][alternate.name] = value
+                                del d[key]
+                                log.debug("Converting dimension %s to %s.%s=%s",
+                                          key, key, alternate.name, value)
+                                break
+                        else:
+                            expected = [str(dimension.primaryKey.getPythonType())]
+                            expected.extend(
+                                f"{alternate.getPythonType()} ({alternate.name})"
+                                for alternate in dimension.alternateKeys
+                            )
+                            raise TypeError(
+                                f"Wrong type for {key}={value}; expected one of "
+                                f"{expected}, got {type(value)}."
+                            )
+                    else:
+                        raise TypeError(
+                            f"Wrong type for {key}={value}; expected "
+                            f"{dimension.primaryKey.getPythonType()}, got {type(value)}."
+                        )
         if graph is None:
             graph = DimensionGraph(universe, names=d.keys())
         if not graph.dimensions:
@@ -295,16 +401,14 @@ def standardize(
                         d.setdefault(dimension.name, None)
                         r.setdefault(dimension.name, None)
         if d.keys() >= graph.dimensions.names:
-            values = tuple(d[name] for name in graph._dataCoordinateIndices.keys())
+            values = tuple(d.pop(name) for name in graph._dataCoordinateIndices.keys())
         else:
             try:
-                values = tuple(d[name] for name in graph.required.names)
+                values = tuple(d.pop(name) for name in graph.required.names)
             except KeyError as err:
                 raise KeyError(f"No value in data ID ({mapping}) for required dimension {err}.") from err
-        # Some backends cannot handle numpy.int64 type which is a subclass of
-        # numbers.Integral; convert that to int.
-        values = tuple(int(val) if isinstance(val, numbers.Integral)  # type: ignore
-                       else val for val in values)
+        if unused_dimensions is not None:
+            unused_dimensions.update(d)
         result: DataCoordinate = _BasicTupleDataCoordinate(graph, values)
         if r.keys() >= graph.elements.names:
             result = result.expanded(r)