From 7e2c54867dde1c48934ece15660050a64c441185 Mon Sep 17 00:00:00 2001
From: Evgeny Ivanov <ivanov.evgeny.n@gmail.com>
Date: Sun, 11 Aug 2024 23:24:42 +0500
Subject: [PATCH] FDR (WIP)

---
 src/tea_tasting/multiplicity.py | 50 +++++++++++++++++++++++++++++++--
 1 file changed, 47 insertions(+), 3 deletions(-)

diff --git a/src/tea_tasting/multiplicity.py b/src/tea_tasting/multiplicity.py
index a4ddee8..b76c20c 100644
--- a/src/tea_tasting/multiplicity.py
+++ b/src/tea_tasting/multiplicity.py
@@ -4,6 +4,7 @@
 
 import abc
 from collections import UserDict
+from collections.abc import Sequence
 from typing import TYPE_CHECKING, Any
 
 import tea_tasting.config
@@ -12,7 +13,10 @@
 
 
 if TYPE_CHECKING:
-    from collections.abc import Callable, Sequence
+    from collections.abc import Callable
+
+
+NULL_COMPARISON = "-"
 
 
 class MultipleComparisonsResults(
@@ -33,7 +37,7 @@ class MultipleComparisonsResults(
     def to_dicts(self) -> tuple[dict[str, Any], ...]:
         """Convert the result to a sequence of dictionaries."""
         return tuple(
-            {"comparison": str(comparison)} | metric_result
+            {"comparison": _to_str(comparison)} | metric_result
             for comparison, experiment_result in self.items()
             for metric_result in experiment_result.to_dicts()
         )
@@ -47,6 +51,40 @@ def adjust_fdr(
     alpha: float | None = None,
     arbitrary_dependence: bool = True,
 ) -> MultipleComparisonsResults:
+    """Adjust p-value and alpha to control the false discovery rate.
+
+    The number of hypotheses tested is the total number of metrics included in
+    the comparison in all experiment results. For example, if there are
+    3 experiments with 2 metrics in each, the number of hypotheses is 6.
+
+    The function performs one of the following corrections, depending on parameters:
+
+    - Benjamini-Yekutieli procedure, assuming arbitrary dependence between
+        hypotheses (`arbitrary_dependence=True`).
+    - Benjamini-Hochberg procedure, assuming non-negative correlation between
+        hypotheses (`arbitrary_dependence=False`).
+
+    The function adds the following attributes to the results:
+        `pvalue_adj`: The adjusted p-value, which should be compared with the unadjusted
+            FDR (`alpha`).
+        `alpha_adj`: "The adjusted FDR, which should be compared with the unadjusted
+            p-value (`pvalue`).
+        `null_rejected`: A binary indicator (`0` or `1`) that shows whether
+            the null hypothesis is rejected.
+
+    Args:
+        experiment_results: Experiment results.
+        metrics: Metrics included in the comparison.
+            If `None`, all metrics are included.
+        alpha: Significance level. If `None`, the value from global settings is used.
+        arbitrary_dependence: If `True`, arbitrary dependence between hypotheses
+            is assumed and Benjamini-Yekutieli procedure is performed.
+            If `False`, non-negative correlation between hypotheses is assumed
+            and Benjamini-Hochberg procedure is performed.
+
+    Returns:
+        The experiments results with adjusted p-values and alpha.
+    """
     alpha = (
         tea_tasting.utils.auto_check(alpha, "alpha")
         if alpha is not None
@@ -101,7 +139,7 @@ def _copy_results(
     list[dict[str, Any]],
 ]:
     if not isinstance(experiment_results, dict):
-        experiment_results = {"-": experiment_results}
+        experiment_results = {NULL_COMPARISON: experiment_results}
 
     if metrics is not None:
         if isinstance(metrics, str):
@@ -128,6 +166,12 @@ def _copy_results(
     return copy_of_experiment_results, copy_of_metric_results
 
 
+def _to_str(x: Any, seq_sep: str = ", ") -> str:
+    if not isinstance(x, str) and isinstance(x, Sequence):
+        return seq_sep.join(str(v) for v in x)
+    return str(x)
+
+
 class _Adjustment(abc.ABC):
     @abc.abstractmethod
     def adjust(self, pvalue: float, k: int) -> tuple[float, float]: