Skip to content

Commit

Permalink
Add genotype_values() method
Browse files Browse the repository at this point in the history
  • Loading branch information
hyanwong committed Oct 30, 2022
1 parent 3d6ea1b commit de16b39
Show file tree
Hide file tree
Showing 2 changed files with 40 additions and 0 deletions.
27 changes: 27 additions & 0 deletions python/tests/test_genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -655,6 +655,33 @@ def test_snipped_tree_sequence_mutations_over_isolated(self):
assert non_missing_found
assert missing_found

def test_genotype_values(self):
tables = tskit.TableCollection(1.0)
tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
tables.nodes.add_row(tskit.NODE_IS_SAMPLE, 0)
s = tables.sites.add_row(0, "C")
tables.mutations.add_row(site=s, derived_state="G", node=0)
tables.mutations.add_row(site=s, derived_state="T", node=1)
s = tables.sites.add_row(0.5, "")
tables.mutations.add_row(site=s, derived_state="A long string", node=0)
ts = tables.tree_sequence()

v = ts.variants(isolated_as_missing=False)
vals = next(v).genotype_values()
assert vals.dtype.type == np.str_
assert np.array_equal(vals, np.array(["G", "T"]))
vals = next(v).genotype_values()
assert vals.dtype.type == np.str_
assert np.array_equal(vals, np.array(["A long string", ""]))

v = ts.variants(isolated_as_missing=True)
vals = next(v).genotype_values()
assert vals.dtype.type == np.str_
assert np.array_equal(vals, np.array(["G", "T"]))
vals = next(v).genotype_values()
assert vals.dtype.type == np.object_
assert np.array_equal(vals, np.array(["A long string", None]))


class TestLimitInterval:
def test_simple_case(self, ts_fixture):
Expand Down
13 changes: 13 additions & 0 deletions python/tskit/genotypes.py
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,19 @@ def copy(self) -> Variant:
variant_copy._ll_variant = self._ll_variant.restricted_copy()
return variant_copy

def genotype_values(self) -> np.ndarray:
"""
Returns the genotypes at this site as an numpy array of strings (if
there is no missing data) or objects (if the genotypes contain missing data,
in which case some elements will be equal to ``None``),
rather than an array of integer indexes. Note that this is inefficient
compared to working with the underlying integer representation as
returned by the :attr:`.genotypes`` property.
:return: An array of length ``num_sites`` containing strings or objects.
"""
return np.array(self.alleles)[self.genotypes]

def counts(self) -> typing.Counter[str | None]:
"""
Returns a :class:`python:collections.Counter` object providing counts for each
Expand Down

0 comments on commit de16b39

Please sign in to comment.