Ouranosinc · juliettelavoie · Sep 12, 2022 · Sep 12, 2022 · Sep 12, 2022 · Sep 12, 2022
diff --git a/HISTORY.rst b/HISTORY.rst
@@ -23,6 +23,7 @@ Internal changes
 * Small bugfixes in aggregate.py (:pull:`55`, :pull:`56`).
 * Default method of `xs.extract.resample` now depends on frequency. (:issue:`57`, :pull:`58`).
 * Bugfix for `_restrict_by_resolution` with CMIP6 datasets (:pull:`71`).
+* More complete check of coverage in ``_subset_file_coverage`` (:issue: `70`, :pull: `72`)
 
 v0.3.0 (2022-08-23)
 -------------------

diff --git a/xscen/extract.py b/xscen/extract.py
@@ -998,6 +998,7 @@ def _subset_file_coverage(
 
         # Very rough guess of the coverage relative to the requested period,
         # without having to open the files or checking day-by-day
+        # This is only checking that you have the first and last time point, not that you have everything in between.
         guessed_nb_hrs = np.min(
             [
                 df[files_in_range]["date_end"].max(),
@@ -1009,12 +1010,22 @@ def _subset_file_coverage(
                 date_parser(str(period[0]), freq="H"),
             ]
         )
+
+        # This checks the sum of hours in all selected files
+        guessed_nb_hrs_sum = (
+            df[files_in_range]["date_end"] - df[files_in_range]["date_start"]
+        ).sum()
+
         period_nb_hrs = date_parser(
             str(period[1]), end_of_period=True, freq="H"
         ) - date_parser(str(period[0]), freq="H")
 
         # 'coverage' adds some leeway, for example to take different calendars into account or missing 2100-12-31
-        if guessed_nb_hrs / period_nb_hrs < coverage or len(df[files_in_range]) == 0:
+        if (
+            guessed_nb_hrs / period_nb_hrs < coverage
+            or len(df[files_in_range]) == 0
+            or guessed_nb_hrs_sum / period_nb_hrs < coverage
+        ):
             logging.warning(
                 f"{df['id'].iloc[0] + ': ' if 'id' in df.columns else ''}Insufficient coverage."
             )