diff --git a/qlib/data/base.py b/qlib/data/base.py index ef345d1225..1c32c949f3 100644 --- a/qlib/data/base.py +++ b/qlib/data/base.py @@ -8,6 +8,8 @@ import abc import pandas as pd +from ..log import get_module_logger + class Expression(abc.ABC): """Expression base class""" @@ -150,7 +152,14 @@ def load(self, instrument, start_index, end_index, freq): return H["f"][args] if start_index is None or end_index is None or start_index > end_index: raise ValueError("Invalid index range: {} {}".format(start_index, end_index)) - series = self._load_internal(instrument, start_index, end_index, freq) + try: + series = self._load_internal(instrument, start_index, end_index, freq) + except Exception: + get_module_logger("data").error( + f"Loading data error: instrument={instrument}, expression={str(self)}, " + f"start_index={start_index}, end_index={end_index}, freq={freq}" + ) + raise series.name = str(self) H["f"][args] = series return series diff --git a/qlib/data/data.py b/qlib/data/data.py index 115d381703..9f27b5dadf 100644 --- a/qlib/data/data.py +++ b/qlib/data/data.py @@ -724,7 +724,14 @@ def expression(self, instrument, field, start_time=None, end_time=None, freq="da end_time = pd.Timestamp(end_time) _, _, start_index, end_index = Cal.locate_index(start_time, end_time, freq=freq, future=False) lft_etd, rght_etd = expression.get_extended_window_size() - series = expression.load(instrument, max(0, start_index - lft_etd), end_index + rght_etd, freq) + try: + series = expression.load(instrument, max(0, start_index - lft_etd), end_index + rght_etd, freq) + except Exception: + get_module_logger("data").error( + f"Loading expression error: " + f"instrument={instrument}, field=({field}), start_time={start_time}, end_time={end_time}, freq={freq}" + ) + raise # Ensure that each column type is consistent # FIXME: # 1) The stock data is currently float. If there is other types of data, this part needs to be re-implemented. diff --git a/qlib/data/ops.py b/qlib/data/ops.py index fc69e2e2f4..4a859f345c 100644 --- a/qlib/data/ops.py +++ b/qlib/data/ops.py @@ -305,7 +305,29 @@ def _load_internal(self, instrument, start_index, end_index, freq): series_right = self.feature_right.load(instrument, start_index, end_index, freq) else: series_right = self.feature_right - return getattr(np, self.func)(series_left, series_right) + check_length = isinstance(series_left, (np.ndarray, pd.Series)) and isinstance( + series_right, (np.ndarray, pd.Series) + ) + if check_length: + warning_info = ( + f"Loading {instrument}: {str(self)}; np.{self.func}(series_left, series_right), " + f"The length of series_left and series_right is different: ({len(series_left)}, {len(series_right)}), " + f"series_left is {str(self.feature_left)}, series_right is {str(self.feature_left)}. Please check the data" + ) + else: + warning_info = ( + f"Loading {instrument}: {str(self)}; np.{self.func}(series_left, series_right), " + f"series_left is {str(self.feature_left)}, series_right is {str(self.feature_left)}. Please check the data" + ) + try: + res = getattr(np, self.func)(series_left, series_right) + except ValueError as e: + get_module_logger("ops").error(warning_info) + raise ValueError(f"{str(e)}. \n\t{warning_info}") + else: + if check_length and len(series_left) != len(series_right): + get_module_logger("ops").warning(warning_info) + return res class Add(NpPairOperator):