From 7e2c54867dde1c48934ece15660050a64c441185 Mon Sep 17 00:00:00 2001 From: Evgeny Ivanov Date: Sun, 11 Aug 2024 23:24:42 +0500 Subject: [PATCH] FDR (WIP) --- src/tea_tasting/multiplicity.py | 50 +++++++++++++++++++++++++++++++-- 1 file changed, 47 insertions(+), 3 deletions(-) diff --git a/src/tea_tasting/multiplicity.py b/src/tea_tasting/multiplicity.py index a4ddee8..b76c20c 100644 --- a/src/tea_tasting/multiplicity.py +++ b/src/tea_tasting/multiplicity.py @@ -4,6 +4,7 @@ import abc from collections import UserDict +from collections.abc import Sequence from typing import TYPE_CHECKING, Any import tea_tasting.config @@ -12,7 +13,10 @@ if TYPE_CHECKING: - from collections.abc import Callable, Sequence + from collections.abc import Callable + + +NULL_COMPARISON = "-" class MultipleComparisonsResults( @@ -33,7 +37,7 @@ class MultipleComparisonsResults( def to_dicts(self) -> tuple[dict[str, Any], ...]: """Convert the result to a sequence of dictionaries.""" return tuple( - {"comparison": str(comparison)} | metric_result + {"comparison": _to_str(comparison)} | metric_result for comparison, experiment_result in self.items() for metric_result in experiment_result.to_dicts() ) @@ -47,6 +51,40 @@ def adjust_fdr( alpha: float | None = None, arbitrary_dependence: bool = True, ) -> MultipleComparisonsResults: + """Adjust p-value and alpha to control the false discovery rate. + + The number of hypotheses tested is the total number of metrics included in + the comparison in all experiment results. For example, if there are + 3 experiments with 2 metrics in each, the number of hypotheses is 6. + + The function performs one of the following corrections, depending on parameters: + + - Benjamini-Yekutieli procedure, assuming arbitrary dependence between + hypotheses (`arbitrary_dependence=True`). + - Benjamini-Hochberg procedure, assuming non-negative correlation between + hypotheses (`arbitrary_dependence=False`). + + The function adds the following attributes to the results: + `pvalue_adj`: The adjusted p-value, which should be compared with the unadjusted + FDR (`alpha`). + `alpha_adj`: "The adjusted FDR, which should be compared with the unadjusted + p-value (`pvalue`). + `null_rejected`: A binary indicator (`0` or `1`) that shows whether + the null hypothesis is rejected. + + Args: + experiment_results: Experiment results. + metrics: Metrics included in the comparison. + If `None`, all metrics are included. + alpha: Significance level. If `None`, the value from global settings is used. + arbitrary_dependence: If `True`, arbitrary dependence between hypotheses + is assumed and Benjamini-Yekutieli procedure is performed. + If `False`, non-negative correlation between hypotheses is assumed + and Benjamini-Hochberg procedure is performed. + + Returns: + The experiments results with adjusted p-values and alpha. + """ alpha = ( tea_tasting.utils.auto_check(alpha, "alpha") if alpha is not None @@ -101,7 +139,7 @@ def _copy_results( list[dict[str, Any]], ]: if not isinstance(experiment_results, dict): - experiment_results = {"-": experiment_results} + experiment_results = {NULL_COMPARISON: experiment_results} if metrics is not None: if isinstance(metrics, str): @@ -128,6 +166,12 @@ def _copy_results( return copy_of_experiment_results, copy_of_metric_results +def _to_str(x: Any, seq_sep: str = ", ") -> str: + if not isinstance(x, str) and isinstance(x, Sequence): + return seq_sep.join(str(v) for v in x) + return str(x) + + class _Adjustment(abc.ABC): @abc.abstractmethod def adjust(self, pvalue: float, k: int) -> tuple[float, float]: