Merge pull request #172 from swiss-seismological-service/feature/a-pos

Feature/a pos
swiss-seismological-service · Aug 19, 2024 · 1f85761 · 1f85761
2 parents 4d617d6 + 2f5991a
commit 1f85761
Show file tree

Hide file tree

Showing 5 changed files with 207 additions and 22 deletions.
diff --git a/seismostats/analysis/__init__.py b/seismostats/analysis/__init__.py
@@ -13,3 +13,4 @@
                                                 b_value_to_beta
                                                 )
 from seismostats.analysis.estimate_mc import mc_ks, mc_max_curvature
+from seismostats.analysis.estimate_a import estimate_a_positive, estimate_a_classic
diff --git a/seismostats/analysis/estimate_a.py b/seismostats/analysis/estimate_a.py
@@ -7,22 +7,62 @@
 
 
 def estimate_a(magnitudes: np.ndarray,
+               times=None,
                mc: float | None = None,
+               delta_m: float = None,
                m_ref: float | None = None,
                b_value: float | None = None,
                scaling_factor: float | None = None,
+               method: str = "classic",
                ) -> float:
     """Estimate the a-value of the Gutenberg-Richter (GR) law.
 
+    Args:
+        magnitudes: Magnitude sample
+        times:      Times of the events, in any format (datetime, float, etc.)
+        mc:         Completeness magnitude. If None, the lowest magnitude is
+                used as completeness magnitude.
+        delta_m:    Discretization of the magnitudes.
+        m_ref:      Reference magnitude for which the a-value is estimated. If
+                None, the a-value is estimated at mc.
+        b_value:    b-value of the Gutenberg-Richter distribution. Only needed
+                if m_ref is given.
+        scaling_factor:    Scaling factor. This should be chosen such that the
+                number of events observed can be normalized. For example:
+                Relative length of the time interval in which the events
+                occurred (relative to the time unit of interest, e.g., years).
+        method:     Method to estimate the a-value. Options are "classic" and
+                "positive".
+
+    """
+    if method == "classic":
+        return estimate_a_classic(
+            magnitudes, mc, delta_m, m_ref, b_value, scaling_factor)
+    elif method == "positive":
+        return estimate_a_positive(
+            magnitudes,
+            times,
+            mc,
+            delta_m,
+            m_ref,
+            b_value,
+            scaling_factor,
+            correction=False)
+
+
+def estimate_a_classic(magnitudes: np.ndarray,
+                       mc: float | None = None,
+                       delta_m: float = None,
+                       m_ref: float | None = None,
+                       b_value: float | None = None,
+                       scaling_factor: float | None = None,
+                       ) -> float:
+    """Estimate the a-value of the Gutenberg-Richter (GR) law.
+
     N = 10 ** (a - b * (m_ref - mc)) (1)
 
     where N is the number of events with magnitude greater than m_ref, which
-    occurred in the timeframe of the catalogue. The scaling_factor should be
-    given as a float- to be precise, and most of the time stands for the time
-    interval scaled to the time-unit of interest. E.g., if the number of events
-    per year are of interest, scaling_factor should be the number of years in
-    which the events occurred. The a-value can also be scaled by volume or area
-    using scaling_factor.
+    occurred in the timeframe of the catalogue.
 
     If only the magnitudes are given, the a-value is estimated at the lowest
     magnitude in the sample, with mc = min(magnitudes). Eq. (1) then simplifies
@@ -32,24 +72,31 @@ def estimate_a(magnitudes: np.ndarray,
         magnitudes: Magnitude sample
         mc:         Completeness magnitude. If None, the lowest magnitude is
                 used as completeness magnitude.
+        delta_m:    Discretization of the magnitudes. This is needed solely to
+                avoid rounding errors. By default rounding errors are not
+                considered. This is adequate if the megnitudes are either
+                coninuous or do not contain rounding errors.
         m_ref:      Reference magnitude for which the a-value is estimated. If
                 None, the a-value is estimated at mc.
         b_value:    b-value of the Gutenberg-Richter distribution
-        scaling_factor:  Scaling factor. For example: Relative length of the
-                time interval in which the events occurred (relative to the
-                time unit of interest, e.g., years)
+        scaling factor. This should be chosen such that the
+                number of events observed can be normalized. For example:
+                Relative length of the time interval in which the events
+                occurred (relative to the time unit of interest, e.g., years).
 
     Returns:
         a: a-value of the Gutenberg-Richter distribution
     """
+    if delta_m is None:
+        delta_m = 0
     if mc is None:
         mc = magnitudes.min()
-    elif magnitudes.min() < mc:
+    elif magnitudes.min() < mc - delta_m / 2:
         if get_option("warnings") is True:
             warnings.warn(
                 "Completeness magnitude is higher than the lowest magnitude."
                 "Cutting the magnitudes to the completeness magnitude.")
-        magnitudes = magnitudes[magnitudes >= mc]
+        magnitudes = magnitudes[magnitudes >= mc - delta_m / 2]
 
     a = np.log10(len(magnitudes))
 
@@ -60,7 +107,111 @@ def estimate_a(magnitudes: np.ndarray,
                 "b_value must be provided if m_ref is given")
         a = a - b_value * (m_ref - mc)
 
-    # scale to reference time-interval / volume / area ...
+    # scale to reference time-interal or volume of interest
+    if scaling_factor is not None:
+        a = a - np.log10(scaling_factor)
+
+    return a
+
+
+def estimate_a_positive(
+        magnitudes: np.ndarray,
+        times: np.ndarray,
+        delta_m: float,
+        mc: float | None = None,
+        dmc: float | None = None,
+        m_ref: float | None = None,
+        b_value: float | None = None,
+        scaling_factor: float | None = None,
+        correction: bool = False,
+) -> float:
+    """Estimate the a-value of the Gutenberg-Richter (GR) law using only the
+    earthquakes whose magnitude m_i >= m_i-1 + dmc.
+
+    Args:
+        magnitudes: Magnitude sample
+        times:      Times of the events. They should be sorted in ascending
+                order. Also, it is important that each time is scaled to the
+                time unit of interest (e.g., years). That is, in this case
+                each time should be a float and represent the time in years.
+        delta_m:    Discretization of the magnitudes.
+        dmc:        Minimum magnitude difference between consecutive events.
+                If None, the default value is delta_m.
+        mc:         Completeness magnitude. If None, the lowest magnitude is
+                used as completeness magnitude.
+        m_ref:      Reference magnitude for which the a-value is estimated. If
+                None, the a-value is estimated at mc.
+        b_value:    b-value of the Gutenberg-Richter distribution. Only needed
+                if m_ref is given.
+        scaling factor. This should be chosen such that the
+                number of events observed can be normalized. For example:
+                Relative length of the time interval in which the events
+                occurred (relative to the time unit of interest, e.g., years).
+        correction: If True, the a-value is corrected for the bias introduced
+                by the observation period being larger than the time interval
+                between the first and last event. This is only relevant if the
+                sample is very small (<30). The assumption is that the blind
+                time in the beginning and end of the catalogue has a similar
+                distribution as the rest of the catalogue. We think that this
+                improves the estimate of the a-value for small samples, however,
+                without proof.
+
+    Returns:
+        a_pos: a-value of the Gutenberg-Richter distribution
+    """
+
+    if mc is None:
+        mc = magnitudes.min()
+    elif magnitudes.min() < mc:
+        if get_option("warnings") is True:
+            warnings.warn(
+                "Completeness magnitude is higher than the lowest magnitude."
+                "Cutting the magnitudes to the completeness magnitude.")
+        idx = magnitudes >= mc - delta_m / 2
+        magnitudes = magnitudes[idx]
+        times = times[idx]
+    # TODO: check for correct binning
+
+    if dmc is None:
+        dmc = delta_m
+    elif dmc < 0:
+        raise ValueError("dmc must be larger or equal to 0")
+    elif dmc < delta_m and get_option("warnings") is True:
+        warnings.warn("dmc is smaller than delta_m, not recommended")
+
+    # order the magnitudes and times
+    idx = np.argsort(times)
+    magnitudes = magnitudes[idx]
+    times = times[idx]
+
+    # differences
+    mag_diffs = np.diff(magnitudes)
+    time_diffs = np.diff(times)
+
+    # only consider events with magnitude difference >= dmc
+    idx = mag_diffs > dmc - delta_m / 2
+    mag_diffs = mag_diffs[idx]
+    time_diffs = time_diffs[idx]
+
+    # estimate the number of events within the time interval
+    total_time = times[-1] - times[0]
+    total_time_pos = sum(time_diffs / total_time)
+    if correction:
+        total_time += 2 * np.mean(np.diff(times) / total_time)
+        total_time_pos += np.mean(time_diffs / total_time)
+    n_pos = 1 / total_time_pos * len(mag_diffs)
+
+    # estimate a-value
+    a = np.log10(n_pos)
+
+    # scale to reference magnitude
+    if m_ref is not None:
+        if b_value is None:
+            raise ValueError(
+                "b_value must be provided if m_ref is given")
+        a = a - b_value * (m_ref - mc)
+
+    # scale to reference time-interal or volume of interest
     if scaling_factor is not None:
         a = a - np.log10(scaling_factor)
 

diff --git a/seismostats/analysis/estimate_beta.py b/seismostats/analysis/estimate_beta.py
@@ -293,7 +293,7 @@ def estimate_b_positive(
         dmc = delta_m
     elif dmc < 0:
         raise ValueError("dmc must be larger or equal to 0")
-    elif dmc < delta_m:
+    elif dmc < delta_m and get_option("warnings") is True:
         warnings.warn("dmc is smaller than delta_m, not recommended")
 
     mag_diffs = np.diff(magnitudes)
@@ -361,7 +361,7 @@ def estimate_b_more_positive(
         dmc = delta_m
     elif dmc < 0:
         raise ValueError("dmc must be larger or equal to 0")
-    elif dmc < delta_m:
+    elif dmc < delta_m and get_option("warnings") is True:
         warnings.warn("dmc is smaller than delta_m, not recommended")
 
     mag_diffs = - np.ones(len(magnitudes) - 1) * delta_m

diff --git a/seismostats/analysis/tests/test_estimate_a.py b/seismostats/analysis/tests/test_estimate_a.py
@@ -1,28 +1,61 @@
 import numpy as np
 import warnings
-from seismostats.analysis.estimate_a import estimate_a
+from numpy.testing import assert_almost_equal
+from datetime import datetime, timedelta
 
+from seismostats.analysis.estimate_a import (
+    estimate_a_classic, estimate_a_positive)
 
-def test_estimate_a():
+
+def test_estimate_a_classic():
     mags = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 10])
-    a = estimate_a(mags)
+    a = estimate_a_classic(mags)
     assert a == 1.0
 
     # reference magnitude is given and b-value given
-    a = estimate_a(mags, mc=1, m_ref=0, b_value=1)
+    a = estimate_a_classic(mags, mc=1, m_ref=0, b_value=1)
     assert a == 2.0
 
     # reference magnitude but no b-value
     try:
-        a = estimate_a(mags, mc=1, m_ref=0)
+        a = estimate_a_classic(mags, mc=1, m_ref=0)
     except ValueError as e:
         assert str(e) == "b_value must be provided if m_ref is given"
 
     # reference time is given
-    a = estimate_a(mags, scaling_factor=10)
+    a = estimate_a_classic(mags, scaling_factor=10)
     assert a == 0.0
 
     # magnitudes not cut at mc
     with warnings.catch_warnings(record=True) as w:
-        estimate_a(mags, mc=2)
+        estimate_a_classic(mags, mc=2)
+        assert w[-1].category == UserWarning
+
+
+def test_estimate_a_positive():
+    mags = np.array([1, 2, 3, 4, 5, 6, 7, 8, 9, 11, 10])
+    # array of dates from 1.1.2000 to 10.1.2000
+    times = np.arange(datetime(2000, 1, 1), datetime(
+        2000, 1, 12), timedelta(days=1)).astype(datetime)
+
+    a = estimate_a_positive(mags, times, delta_m=1)
+    assert_almost_equal(10**a, 10.0)
+
+    # reference magnitude is given and b-value given
+    a = estimate_a_positive(mags, times, delta_m=1, mc=1, m_ref=0, b_value=1)
+    assert_almost_equal(10**a, 100.0)
+
+    # reference magnitude but no b-value
+    try:
+        a = estimate_a_positive(mags, times, delta_m=1, mc=1, m_ref=0)
+    except ValueError as e:
+        assert str(e) == "b_value must be provided if m_ref is given"
+
+    # reference time is given
+    a = estimate_a_positive(mags, times, delta_m=1, scaling_factor=10)
+    assert_almost_equal(10**a, 1.0)
+
+    # magnitudes not cut at mc
+    with warnings.catch_warnings(record=True) as w:
+        estimate_a_positive(mags, times, delta_m=1, mc=2)
         assert w[-1].category == UserWarning
diff --git a/seismostats/analysis/tests/test_estimate_beta.py b/seismostats/analysis/tests/test_estimate_beta.py
@@ -177,7 +177,7 @@ def test_estimate_b_positive(
 @pytest.mark.parametrize(
     "n,b,mc,delta_m,b_parameter,precision",
     [
-        (1000, 1.2 * np.log(10), 3, 0, "beta", 0.15),
+        (1000, 1.2 * np.log(10), 3, 0, "beta", 0.6),
         (1000, 1, 3, 0.1, "b_value", 0.2),
     ],
 )