From de16b39b57157c067f8e18289869c688be7c31de Mon Sep 17 00:00:00 2001 From: Yan Wong Date: Sun, 30 Oct 2022 13:17:58 +0000 Subject: [PATCH] Add `genotype_values()` method See https://github.com/tskit-dev/tsinfer/issues/739 --- python/tests/test_genotypes.py | 27 +++++++++++++++++++++++++++ python/tskit/genotypes.py | 13 +++++++++++++ 2 files changed, 40 insertions(+) diff --git a/python/tests/test_genotypes.py b/python/tests/test_genotypes.py index 1443e40061..bfa7c18d91 100644 --- a/python/tests/test_genotypes.py +++ b/python/tests/test_genotypes.py @@ -655,6 +655,33 @@ def test_snipped_tree_sequence_mutations_over_isolated(self): assert non_missing_found assert missing_found + def test_genotype_values(self): + tables = tskit.TableCollection(1.0) + tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0) + tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0) + s = tables.sites.add_row(0, "C") + tables.mutations.add_row(site=s, derived_state="G", node=0) + tables.mutations.add_row(site=s, derived_state="T", node=1) + s = tables.sites.add_row(0.5, "") + tables.mutations.add_row(site=s, derived_state="A long string", node=0) + ts = tables.tree_sequence() + + v = ts.variants(isolated_as_missing=False) + vals = next(v).genotype_values() + assert vals.dtype.type == np.str_ + assert np.array_equal(vals, np.array(["G", "T"])) + vals = next(v).genotype_values() + assert vals.dtype.type == np.str_ + assert np.array_equal(vals, np.array(["A long string", ""])) + + v = ts.variants(isolated_as_missing=True) + vals = next(v).genotype_values() + assert vals.dtype.type == np.str_ + assert np.array_equal(vals, np.array(["G", "T"])) + vals = next(v).genotype_values() + assert vals.dtype.type == np.object_ + assert np.array_equal(vals, np.array(["A long string", None])) + class TestLimitInterval: def test_simple_case(self, ts_fixture): diff --git a/python/tskit/genotypes.py b/python/tskit/genotypes.py index d0abfb3835..b0c3a5ae6f 100644 --- a/python/tskit/genotypes.py +++ b/python/tskit/genotypes.py @@ -245,6 +245,19 @@ def copy(self) -> Variant: variant_copy._ll_variant = self._ll_variant.restricted_copy() return variant_copy + def genotype_values(self) -> np.ndarray: + """ + Returns the genotypes at this site as an numpy array of strings (if + there is no missing data) or objects (if the genotypes contain missing data, + in which case some elements will be equal to ``None``), + rather than an array of integer indexes. Note that this is inefficient + compared to working with the underlying integer representation as + returned by the :attr:`.genotypes`` property. + + :return: An array of length ``num_sites`` containing strings or objects. + """ + return np.array(self.alleles)[self.genotypes] + def counts(self) -> typing.Counter[str | None]: """ Returns a :class:`python:collections.Counter` object providing counts for each