fixing RDT/CRDT bug with off-mom files

pylhc · Sep 13, 2024 · 9bb2bb7 · 9bb2bb7
1 parent 26617b5
commit 9bb2bb7
Show file tree

Hide file tree

Showing 4 changed files with 99 additions and 36 deletions.
diff --git a/omc3/optics_measurements/crdt.py b/omc3/optics_measurements/crdt.py
@@ -7,6 +7,7 @@
 It provides functions to compute combined resonance driving terms following the derivations in
 https://arxiv.org/pdf/1402.1461.pdf.
 """
+from collections.abc import Sequence
 from pathlib import Path
 from generic_parser import DotDict
 import numpy as np
@@ -25,7 +26,6 @@
 
 CRDT_COLUMNS = [AMPLITUDE, f'{ERR}{AMPLITUDE}', PHASE, f'{ERR}{PHASE}']
 
-
 CRDTS = [
     {'order': "skew_quadrupole", 'term': "F_XY", 'plane': 'X', 'line': [0, 1]},
     {'order': "skew_quadrupole", 'term': "F_YX", 'plane': 'Y', 'line': [1, 0]},
@@ -50,22 +50,33 @@
 
 
 def calculate(measure_input: DotDict, input_files: InputFiles, invariants, header):
-
+    """ Calculate the CRDT values. 
+    
+    Todo: https://github.com/pylhc/omc3/issues/456 
+    """
     LOGGER.info("Start of CRDT analysis")
+
+    # Todo: only on-momentum required? If not, remove this or set `dpp_value=None`, see #456
+    dpp_value = 0  
+    invariants = {plane: inv[input_files.dpp_frames_indices(plane, dpp_value)] for plane, inv in invariants.items()}
+
     assert len(input_files['X']) == len(input_files['Y'])
     bpm_names = input_files.bpms(dpp_value=0)
     for crdt in CRDTS:
         LOGGER.debug(f"Processing CRDT {crdt['term']}")
-        result_df = generic_dataframe(input_files, measure_input, bpm_names)
+        result_df = generic_dataframe(input_files, measure_input, bpm_names, dpp_value=dpp_value)
         phase_sign, line_suffix = get_line_sign_and_suffix(crdt["line"], input_files, crdt["plane"])
         lines_and_phases = get_column_names(line_suffix)
         nqx, nqy = crdt['line']
         crdt_order = abs(nqx) + abs(nqy)
         # don't know the exact way to get to this via line indices so for now only via hacky way
         signflip = -1 if crdt['term'] in ("F_NO2", "F_NO1") else 1
 
-        result_df = pd.merge(result_df, input_files.joined_frame(crdt["plane"], lines_and_phases.values()),
-                             how='inner', left_index=True, right_index=True)
+        result_df = pd.merge(
+            result_df, 
+            input_files.joined_frame(crdt["plane"], lines_and_phases.values(), dpp_value=dpp_value),
+            how='inner', left_index=True, right_index=True
+        )
 
         amplitudes = input_files.get_data(result_df, lines_and_phases[AMPLITUDE])
         err_amplitudes = input_files.get_data(result_df, lines_and_phases[f"{ERR}{AMPLITUDE}"])
@@ -89,12 +100,13 @@ def calculate(measure_input: DotDict, input_files: InputFiles, invariants, heade
               add_line_and_freq_to_header(header, crdt), measure_input, crdt['order'], crdt['term'])
 
 
-def generic_dataframe(input_files, measure_input, bpm_names):
+def generic_dataframe(input_files: InputFiles, measure_input: DotDict, bpm_names: Sequence[str], dpp_value: int = 0):
+    """ Generate a dataframe based on the MU-MDL columns from each measuement. """
     result_df = pd.DataFrame(measure_input.accelerator.model).loc[bpm_names, ["S", "MUX", "MUY"]]
     result_df.rename(columns={f"{COL_MU}X": f"{COL_MU}X{MDL}", f"{COL_MU}Y": f"{COL_MU}Y{MDL}"}, inplace=True)
     for plane in PLANES:
         result_df = pd.merge(
-            result_df, input_files.joined_frame(plane, [f"MU{plane}", f"{ERR}MU{plane}"], dpp_value=0),
+            result_df, input_files.joined_frame(plane, [f"MU{plane}", f"{ERR}MU{plane}"], dpp_value=dpp_value),
             how='inner', left_index=True, right_index=True)
     return result_df
 

diff --git a/omc3/optics_measurements/data_models.py b/omc3/optics_measurements/data_models.py
@@ -18,6 +18,8 @@
 
 LOGGER = logging_tools.get_logger(__name__)
 
+DPP_TOLERANCE = 1e-6
+
 class InputFiles(dict):
     """
     Stores the input files, provides methods to gather quantity specific data
@@ -75,12 +77,20 @@ def dpps(self, plane: str) -> np.ndarray:
         return np.array([df.DPP for df in self[plane]])
 
     def dpp_frames(self, plane, dpp_value):
-        dpp_dfs = []
-        for i in np.argwhere(np.abs(self.dpps(plane) - dpp_value) < 1e-6).T[0]:
-            dpp_dfs.append(self[plane][i])
+        if dpp_value is None:
+            return self._all_frames(plane)
+
+        dpp_dfs = [self[plane][i] for i in self.dpp_frames_indices(plane, dpp_value)]
         if len(dpp_dfs) == 0:
             raise ValueError(f"No data found for dp/p {dpp}")
         return dpp_dfs
+
+    def dpp_frames_indices(self, plane, dpp_value):
+        """ Return the indices of the frames that match the dpp. """
+        if dpp_value is None:
+            return list(range(len(self[plane])))
+
+        return np.argwhere(np.abs(self.dpps(plane) - dpp_value) < DPP_TOLERANCE).T[0]
 
     def _all_frames(self, plane):
         return self[plane]
@@ -101,15 +111,20 @@ def joined_frame(self, plane, columns, dpp_value=None, dpp_amp=False, how='inner
                    or to `None` to avoid conversion.
 
         Returns:
-            A merged `DataFrame` from `InputFiles`.
+            A merged `TfsDataFrame` from `InputFiles`.
         """
         if how not in ['inner', 'outer']:
             raise RuntimeWarning("'how' should be either 'inner' or 'outer', 'inner' will be used.")
-        frames_to_join = self.dpp_frames(plane, dpp_value) if dpp_value is not None else self._all_frames(plane)
+
+        # select frames ---
+        frames_to_join = self.dpp_frames(plane, dpp_value)
         if dpp_amp:
             frames_to_join = [df for df in frames_to_join if df.DPPAMP > 0]
+
         if len(frames_to_join) == 0:
             raise ValueError("No data found for non-zero |dp/p|")
+
+        # join frames ---
         joined_frame = frames_to_join[0].reindex(columns=columns, fill_value=np.nan)
         if len(frames_to_join) > 1:
             for i, df in enumerate(frames_to_join[1:]):

diff --git a/omc3/optics_measurements/rdt.py b/omc3/optics_measurements/rdt.py
@@ -5,6 +5,7 @@
 This module contains RDT calculations related functionality of ``optics_measurements``.
 It provides functions to compute global resonance driving terms **f_jklm**.
 """
+from __future__ import annotations
 from copy import deepcopy
 from os.path import join
 
@@ -14,6 +15,7 @@
 import tfs
 from scipy.optimize import curve_fit
 from scipy.sparse import diags
+from contextlib import contextmanager
 
 from omc3.definitions.constants import PLANES
 from omc3.optics_measurements.constants import AMPLITUDE, ERR, EXT, IMAG, PHASE, REAL
@@ -26,8 +28,10 @@
 NBPMS_FOR_90 = 3
 LOGGER = logging_tools.get_logger(__name__)
 
+RDTTuple = tuple[int, int, int, int]
 
-def _generate_plane_rdts(order):
+
+def _generate_plane_rdts(order: int) -> tuple[dict[str, list[RDTTuple]], dict[str, list[RDTTuple]]]:
     """
     This helper function generates two dictionnaries representing on what plane(s)
     a RDT can be seen and which tune it is a multiple of.
@@ -98,23 +102,17 @@ def calculate(
         for_rdts = _best_90_degree_phases(meas_input, bpm_names, phases, tunes, plane)
         LOGGER.info(f"Average phase advance between BPM pairs: {for_rdts.loc[:,'MEAS'].mean()}")
         for rdt in single_plane_rdts[plane]:
-            try:
+            with _check_amp_error(rdt):
                 df = _process_rdt(meas_input, input_files, for_rdts, invariants, plane, rdt)
-            except ValueError as e:  # catch the AMP line not being found in the lin file
-                LOGGER.warning(f"RDT calculation failed for {jklm2str(*rdt)}: {str(e)}")
-                continue
-            write(df, add_freq_to_header(header, plane, rdt), meas_input, plane, rdt)
+                write(df, add_freq_to_header(header, plane, rdt), meas_input, plane, rdt)
     for plane in PLANES:
         bpm_names = input_files.bpms(dpp_value=0)
         for_rdts = _best_90_degree_phases(meas_input, bpm_names, phases, tunes, plane)
         LOGGER.info(f"Average phase advance between BPM pairs: {for_rdts.loc[:, 'MEAS'].mean()}")
         for rdt in double_plane_rdts[plane]:
-            try:
+            with _check_amp_error(rdt):
                 df = _process_rdt(meas_input, input_files, for_rdts, invariants, plane, rdt)
-            except ValueError as e:
-                LOGGER.warning(f"RDT calculation failed for {jklm2str(*rdt)}: {str(e)}")
-                continue
-            write(df, add_freq_to_header(header, plane, rdt), meas_input, plane, rdt)
+                write(df, add_freq_to_header(header, plane, rdt), meas_input, plane, rdt)
 
 
 def write(df, header, meas_input, plane, rdt):
@@ -124,12 +122,30 @@ def write(df, header, meas_input, plane, rdt):
               save_index="NAME")
 
 
-def _rdt_to_str(rdt):
+@contextmanager
+def _check_amp_error(rdt: RDTTuple):
+    """ Context manager to catch a specific type of ValueError, regarding the AMP_ column not found in the output,
+    as raised by get_line_sign_and_suffix. 
+    
+    Alternatively, we could just raise a custom Error Type? """
+    try:
+        yield 
+    except ValueError as e:
+        error_str = str(e)
+        message  = f"RDT calculation failed for {jklm2str(*rdt)}."
+
+        if "AMP_" not in error_str:  # raise unexpected Value errors 
+            raise ValueError(message) from e
+
+        LOGGER.warning(f"{message}: {error_str}")  
+
+
+def _rdt_to_str(rdt: RDTTuple):
     j, k, l, m = rdt  # noqa: E741
     return f"{j}{k}{l}{m}"
 
 
-def _rdt_to_order_and_type(rdt):
+def _rdt_to_order_and_type(rdt: RDTTuple):
     j, k, l, m = rdt  # noqa: E741
     rdt_type = "normal" if (l + m) % 2 == 0 else "skew"
     orders = dict(((1, "dipole"), 
@@ -165,7 +181,7 @@ def _get_n_upper_diagonals(n, shape):
     return diags(np.ones((n, shape[0])), np.arange(n)+1, shape=shape).toarray()
 
 
-def _determine_line(rdt, plane):
+def _determine_line(rdt: RDTTuple, plane: str):
     j, k, l, m = rdt  # noqa: E741
     lines = dict(X=(1 - j + k, m - l, 0),
                  Y=(k - j, 1 - l + m, 0))
@@ -180,21 +196,28 @@ def add_freq_to_header(header, plane, rdt):
     return mod_header
 
 
-def _process_rdt(meas_input: DotDict, input_files: InputFiles, phase_data, invariants, plane, rdt):
+def _process_rdt(meas_input: DotDict, input_files: InputFiles, phase_data, invariants, plane, rdt: RDTTuple):
+    # Todo: only on-momentum required? If not, remove this or set `dpp_value=None`, see #456
+    dpp_value = 0  
+    invariants = {plane: inv[input_files.dpp_frames_indices(plane, dpp_value)] for plane, inv in invariants.items()}
+
     df = pd.DataFrame(phase_data)
     second_bpms = df.loc[:, "NAME2"].to_numpy()
     df["S2"] = df.loc[second_bpms, "S"].to_numpy()
-    df["COUNT"] = len(input_files.dpp_frames(plane, 0))
+    df["COUNT"] = len(input_files.dpp_frames(plane, dpp_value=dpp_value))
     line = _determine_line(rdt, plane)
     phase_sign, suffix = get_line_sign_and_suffix(line, input_files, plane)
-    comp_coeffs1 = to_complex(
-        input_files.joined_frame(plane, [f"AMP{suffix}"], dpp_value=0).loc[df.index, :].to_numpy(),
-        phase_sign * input_files.joined_frame(plane, [f"PHASE{suffix}"], dpp_value=0).loc[df.index, :].to_numpy())
+
+    df_all_amps = input_files.joined_frame(plane, [f"AMP{suffix}"], dpp_value=dpp_value)
+    df_all_phases = phase_sign * input_files.joined_frame(plane, [f"PHASE{suffix}"], dpp_value=dpp_value)
+
+    comp_coeffs1 = to_complex(df_all_amps.loc[df.index, :], df_all_phases.loc[df.index, :])
     # Multiples of tunes needs to be added to phase at second BPM if that is in second turn
-    phase2 = phase_sign * input_files.joined_frame(plane, [f"PHASE{suffix}"], dpp_value=0).loc[second_bpms, :].to_numpy()
     comp_coeffs2 = to_complex(
-        input_files.joined_frame(plane, [f"AMP{suffix}"], dpp_value=0).loc[second_bpms, :].to_numpy(),
-        _add_tunes_if_in_second_turn(df, input_files, line, phase2))
+        df_all_amps.loc[second_bpms, :], 
+        _add_tunes_if_in_second_turn(df, input_files, line, df_all_phases.loc[second_bpms, :].to_numpy())
+    )
+
     # Get amplitude and phase of the line from linx/liny file
     line_amp, line_phase, line_amp_e, line_phase_e = complex_secondary_lines(  # TODO use the errors
         df.loc[:, "MEAS"].to_numpy()[:, np.newaxis] * meas_input.accelerator.beam_direction,
@@ -246,7 +269,7 @@ def fitting(x, f):
     return amps, err_amps
 
 
-def get_linearized_problem(invs, plane, rdt):
+def get_linearized_problem(invs: dict[str, np.ndarray], plane: str, rdt: RDTTuple):
     """
     2 * j * f_jklm * (powers of 2Jx and 2Jy) : f_jklm is later a parameter of a fit
     we use sqrt(2J): unit is sqrt(m).
@@ -296,4 +319,14 @@ def complex_secondary_lines(phase_adv, err_padv, sig1, sig2):
 
 
 def to_complex(amplitudes, phases, period=1):
+    try:
+        amplitudes = amplitudes.to_numpy()
+    except AttributeError:
+        pass
+
+    try:
+        phases = phases.to_numpy()
+    except AttributeError:
+        pass
+
     return amplitudes * np.exp(2j * np.pi * phases / period)
diff --git a/tests/unit/test_hole_in_one.py b/tests/unit/test_hole_in_one.py
@@ -15,6 +15,9 @@
 
     - Pandas to numpy dtype conversions for the lin files went wrong and numpy had 'obj' arrays.
     https://github.com/pylhc/omc3/issues/453
+    
+    - RDT and CRDT dimensions mismatch when off-momentum files were analysed.
+    https://github.com/pylhc/omc3/issues/456
 
 """
 from __future__ import annotations
@@ -78,7 +81,7 @@ def test_hole_in_one(tmp_path, clean, which_files):
         accel="lhc",
         year="2022",
         model_dir=MODEL_DIR,
-        files=sdds_files,
+        files=[analysis_output / sdds_file.name for sdds_file in sdds_files],
         nonlinear=['rdt', 'crdt'],
         outputdir=optics_output,
     )