Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Handle some edge cases in scores function #148

Merged
merged 9 commits into from
Apr 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
70 changes: 47 additions & 23 deletions monet/util/stats.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
import numpy as np
from pandas import DataFrame, crosstab


def STDO(obs, mod, axis=None):
Expand Down Expand Up @@ -1774,39 +1773,64 @@ def CSI(obs, mod, minval, maxval):


def scores(obs, mod, minval, maxval=1.0e5):
"""Short summary.
"""Calculate scores.

Parameters
----------
obs : type
Description of parameter `obs`.
mod : type
Description of parameter `mod`.
minval : type
Description of parameter `minval`.
maxval : type
Description of parameter `maxval`.
obs : array-like
Observation values ("truth").
mod : array-like
Model values ("prediction").
Should be the same size as `obs`.
minval, minval : float
Interval to test (exclusive on both sides).

Returns
-------
type
Description of returned object.

a, b, c, d : float
Counts of hits, misses, false alarms, and correct negatives.
"""
d = {}
d["obs"] = obs
d["mod"] = mod
df = DataFrame(d)
ct = crosstab(
import pandas as pd

df = pd.DataFrame({"obs": obs, "mod": mod})

# If NaN is involved in a cond, it will be F, we want to skip those
df = df.dropna(subset=["obs", "mod"], how="any")

if df.empty:
zero = np.float64(0)
return zero, zero, zero, zero

ct = pd.crosstab(
(df["mod"] > minval) & (df["mod"] < maxval),
(df["obs"] > minval) & (df["obs"] < maxval),
rownames=["mod"],
colnames=["obs"],
margins=True,
margins_name="All",
)
# print ct
a = ct[1][1].astype("float")
b = ct[1][0].astype("float")
c = ct[0][1].astype("float")
d = ct[0][0].astype("float")

# If there is a mix of T and F, the columns are [False, True, 'All']
# Otherwise, we need to add to get the full table
if set(ct.columns) == {True, "All"}:
ct.insert(0, False, 0)
elif set(ct.columns) == {False, "All"}:
ct.insert(0, True, 0)

# Same for the rows
if set(ct.index) == {True, "All"}:
ct = pd.concat([ct, pd.DataFrame(index=[False], data={False: 0, True: 0, "All": 0})])
elif set(ct.index) == {False, "All"}:
ct = pd.concat([ct, pd.DataFrame(index=[True], data={False: 0, True: 0, "All": 0})])

# Sort
ct = ct.loc[[True, False, "All"], [True, False, "All"]]

a = ct.at[True, True].astype("float") # hit
b = ct.at[False, True].astype("float") # miss
c = ct.at[True, False].astype("float") # false alarm
d = ct.at[False, False].astype("float") # correct negative

return a, b, c, d


Expand Down
49 changes: 49 additions & 0 deletions tests/test_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
import numpy as np

from monet.util.stats import scores


def test_scores():
# Due to our low bias, we miss one
obs = np.linspace(0, 1, 21)
mod = obs - 0.1 # low bias
a, b, c, d = scores(obs, mod, minval=0.5)
assert a == 9 and b == 1 and c == 0 and d == 11

# All good
a, b, c, d = scores([1, 1], [1, 1], minval=0)
assert a == 2 and b == 0 and c == 0 and d == 0

# All miss
a, b, c, d = scores([1, 1], [-1, -1], minval=0)
assert a == 0 and b == 2 and c == 0 and d == 0

# All false alarm
a, b, c, d = scores([-1, -1], [1, 1], minval=0)
assert a == 0 and b == 0 and c == 2 and d == 0

# All correct negative
a, b, c, d = scores([-1, -1], [-1, -1], minval=0)
assert a == 0 and b == 0 and c == 0 and d == 2

# Same but mix
a, b, c, d = scores([-1, 1], [-1, 1], minval=0)
assert a == 1 and b == 0 and c == 0 and d == 1
a, b, c, d = scores([1, -1], [1, -1], minval=0)
assert a == 1 and b == 0 and c == 0 and d == 1

# Opposite
a, b, c, d = scores([1, -1], [-1, 1], minval=0)
assert a == 0 and b == 1 and c == 1 and d == 0
a, b, c, d = scores([-1, 1], [1, -1], minval=0)
assert a == 0 and b == 1 and c == 1 and d == 0

# No pairs
a, b, c, d = scores([np.nan, np.nan], [np.nan, np.nan], minval=0)
assert a == 0 and b == 0 and c == 0 and d == 0
a, b, c, d = scores([np.nan, 1], [1, np.nan], minval=0)
assert a == 0 and b == 0 and c == 0 and d == 0

# Some pairs after NaN dropping
a, b, c, d = scores([np.nan, 1], [1, 1], minval=0)
assert a == 1 and b == 0 and c == 0 and d == 0