Skip to content

Commit

Permalink
Update simplify semantics and refactor
Browse files Browse the repository at this point in the history
Revert to old implementation

Simple tset

Update changelog and add C tests

Done?

Low level tests

More tests

Fix error-handling error in simplifier_init

We were clearing the input tables before checking sample errors

Fixup broken tests

Add test for mutations

Don't clear the node table when not filtering nodes

Refactor

Modernise simplify test

Fix provenance bug

Removed unused samples member

Implement filter-populations with no-touch semantics

updates

Refactor finalise references path

Finished no-touch semantics on the non-filter casese

Remove unused simplify_t struct member

Make simplify thread-safe in no-filter case

Update changelog
  • Loading branch information
jeromekelleher committed Jan 9, 2023
1 parent b3e65d6 commit 649b271
Show file tree
Hide file tree
Showing 12 changed files with 832 additions and 451 deletions.
7 changes: 7 additions & 0 deletions c/CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,13 @@
``tsk_treeseq_get_min_time`` and ``tsk_treeseq_get_max_time``, respectively.
(:user:`szhan`, :pr:`2612`, :issue:`2271`)

- Add the `TSK_SIMPLIFY_NO_FILTER_NODES` option to simplify to allow unreferenced
nodes be kept in the output (:user:`jeromekelleher`, :user:`hyanwong`,
:issue:`2606`, :pr:`2619`).

- Guarantee that unfiltered tables are not written to unnecessarily
during simplify (:user:`jeromekelleher` :pr:`2619`).

--------------------
[1.1.1] - 2022-07-29
--------------------
Expand Down
22 changes: 22 additions & 0 deletions c/tests/test_tables.c
Original file line number Diff line number Diff line change
Expand Up @@ -345,10 +345,32 @@ test_table_collection_simplify_errors(void)
tsk_id_t samples[] = { 0, 1 };
tsk_id_t ret_id;
const char *individuals = "1 0.25 -2\n";

ret = tsk_table_collection_init(&tables, 0);
CU_ASSERT_EQUAL_FATAL(ret, 0);
tables.sequence_length = 1;

ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_node_table_add_row(&tables.nodes, 0, 0, TSK_NULL, TSK_NULL, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);

/* Bad samples */
samples[0] = -1;
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = 10;
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_NODE_OUT_OF_BOUNDS);
samples[0] = 0;

/* Duplicate samples */
samples[0] = 0;
samples[1] = 0;
ret = tsk_table_collection_simplify(&tables, samples, 2, 0, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
samples[0] = 0;

ret_id = tsk_site_table_add_row(&tables.sites, 0, "A", 1, NULL, 0);
CU_ASSERT_FATAL(ret_id >= 0);
ret_id = tsk_site_table_add_row(&tables.sites, 0, "A", 1, NULL, 0);
Expand Down
67 changes: 67 additions & 0 deletions c/tests/test_trees.c
Original file line number Diff line number Diff line change
Expand Up @@ -3257,6 +3257,72 @@ test_simplest_individual_filter(void)
tsk_table_collection_free(&tables);
}

static void
test_simplest_no_node_filter(void)
{
const char *nodes = "1 0 0\n"
"1 0 0\n"
"0 1 0\n"
"0 1 0"; /* unreferenced node */
const char *edges = "0 1 2 0,1\n";
tsk_treeseq_t ts, simplified;
tsk_id_t sample_ids[] = { 0, 1 };
tsk_id_t node_map[] = { -1, -1, -1, -1 };
tsk_id_t j;
int ret;

tsk_treeseq_from_text(&ts, 1, nodes, edges, NULL, NULL, NULL, NULL, NULL, 0);

ret = tsk_treeseq_simplify(
&ts, NULL, 0, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);

ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);

/* Reversing sample order makes no difference */
sample_ids[0] = 1;
sample_ids[1] = 0;
ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_TRUE(tsk_table_collection_equals(ts.tables, simplified.tables, 0));
tsk_treeseq_free(&simplified);

ret = tsk_treeseq_simplify(
&ts, sample_ids, 1, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, node_map);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(&simplified), 0);
for (j = 0; j < 4; j++) {
CU_ASSERT_EQUAL(node_map[j], j);
}
tsk_treeseq_free(&simplified);

ret = tsk_treeseq_simplify(&ts, sample_ids, 1,
TSK_SIMPLIFY_NO_FILTER_NODES | TSK_SIMPLIFY_KEEP_INPUT_ROOTS
| TSK_SIMPLIFY_KEEP_UNARY,
&simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, 0);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_nodes(&simplified), 4);
CU_ASSERT_EQUAL(tsk_treeseq_get_num_edges(&simplified), 1);
tsk_treeseq_free(&simplified);

sample_ids[0] = 0;
sample_ids[1] = 0;
ret = tsk_treeseq_simplify(
&ts, sample_ids, 2, TSK_SIMPLIFY_NO_FILTER_NODES, &simplified, NULL);
CU_ASSERT_EQUAL_FATAL(ret, TSK_ERR_DUPLICATE_SAMPLE);
tsk_treeseq_free(&simplified);

tsk_treeseq_free(&ts);
}

static void
test_simplest_map_mutations(void)
{
Expand Down Expand Up @@ -8026,6 +8092,7 @@ main(int argc, char **argv)
{ "test_simplest_simplify_defragment", test_simplest_simplify_defragment },
{ "test_simplest_population_filter", test_simplest_population_filter },
{ "test_simplest_individual_filter", test_simplest_individual_filter },
{ "test_simplest_no_node_filter", test_simplest_no_node_filter },
{ "test_simplest_map_mutations", test_simplest_map_mutations },
{ "test_simplest_nonbinary_map_mutations",
test_simplest_nonbinary_map_mutations },
Expand Down
Loading

0 comments on commit 649b271

Please sign in to comment.