Skip to content

Commit

Permalink
FDR (WIP)
Browse files Browse the repository at this point in the history
  • Loading branch information
e10v committed Aug 11, 2024
1 parent f2ac134 commit 7e2c548
Showing 1 changed file with 47 additions and 3 deletions.
50 changes: 47 additions & 3 deletions src/tea_tasting/multiplicity.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

import abc
from collections import UserDict
from collections.abc import Sequence
from typing import TYPE_CHECKING, Any

import tea_tasting.config
Expand All @@ -12,7 +13,10 @@


if TYPE_CHECKING:
from collections.abc import Callable, Sequence
from collections.abc import Callable


NULL_COMPARISON = "-"


class MultipleComparisonsResults(
Expand All @@ -33,7 +37,7 @@ class MultipleComparisonsResults(
def to_dicts(self) -> tuple[dict[str, Any], ...]:
"""Convert the result to a sequence of dictionaries."""
return tuple(
{"comparison": str(comparison)} | metric_result
{"comparison": _to_str(comparison)} | metric_result
for comparison, experiment_result in self.items()
for metric_result in experiment_result.to_dicts()
)
Expand All @@ -47,6 +51,40 @@ def adjust_fdr(
alpha: float | None = None,
arbitrary_dependence: bool = True,
) -> MultipleComparisonsResults:
"""Adjust p-value and alpha to control the false discovery rate.
The number of hypotheses tested is the total number of metrics included in
the comparison in all experiment results. For example, if there are
3 experiments with 2 metrics in each, the number of hypotheses is 6.
The function performs one of the following corrections, depending on parameters:
- Benjamini-Yekutieli procedure, assuming arbitrary dependence between
hypotheses (`arbitrary_dependence=True`).
- Benjamini-Hochberg procedure, assuming non-negative correlation between
hypotheses (`arbitrary_dependence=False`).
The function adds the following attributes to the results:
`pvalue_adj`: The adjusted p-value, which should be compared with the unadjusted
FDR (`alpha`).
`alpha_adj`: "The adjusted FDR, which should be compared with the unadjusted
p-value (`pvalue`).
`null_rejected`: A binary indicator (`0` or `1`) that shows whether
the null hypothesis is rejected.
Args:
experiment_results: Experiment results.
metrics: Metrics included in the comparison.
If `None`, all metrics are included.
alpha: Significance level. If `None`, the value from global settings is used.
arbitrary_dependence: If `True`, arbitrary dependence between hypotheses
is assumed and Benjamini-Yekutieli procedure is performed.
If `False`, non-negative correlation between hypotheses is assumed
and Benjamini-Hochberg procedure is performed.
Returns:
The experiments results with adjusted p-values and alpha.
"""
alpha = (
tea_tasting.utils.auto_check(alpha, "alpha")
if alpha is not None
Expand Down Expand Up @@ -101,7 +139,7 @@ def _copy_results(
list[dict[str, Any]],
]:
if not isinstance(experiment_results, dict):
experiment_results = {"-": experiment_results}
experiment_results = {NULL_COMPARISON: experiment_results}

if metrics is not None:
if isinstance(metrics, str):
Expand All @@ -128,6 +166,12 @@ def _copy_results(
return copy_of_experiment_results, copy_of_metric_results


def _to_str(x: Any, seq_sep: str = ", ") -> str:
if not isinstance(x, str) and isinstance(x, Sequence):
return seq_sep.join(str(v) for v in x)
return str(x)


class _Adjustment(abc.ABC):
@abc.abstractmethod
def adjust(self, pvalue: float, k: int) -> tuple[float, float]:
Expand Down

0 comments on commit 7e2c548

Please sign in to comment.