Skip to content

Commit

Permalink
pllmod_tree: fix SPR constraint check
Browse files Browse the repository at this point in the history
  • Loading branch information
amkozlov committed Oct 19, 2023
1 parent ab5bf73 commit 623e364
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 5 deletions.
40 changes: 37 additions & 3 deletions src/algorithm/algo_search.c
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,9 @@ Schloss-Wolfsbrunnenweg 35, D-69118 Heidelberg, Germany
#include "pllmod_algorithm.h"
#include "../pllmod_common.h"

/* constraint tree debugging */
//#define CONS_DEBUG

/* if not defined, branch length optimization will use
* the same starting set of branch lengths for every topology */
#define PLLMOD_SEARCH_GREEDY_BLO
Expand Down Expand Up @@ -733,6 +736,7 @@ static int best_reinsert_edge(pllmod_treeinfo_t * treeinfo,
/* do not re-insert if resulting tree would contradict the constraint */
if (check_cons && !pllmod_treeinfo_constraint_check_spr(treeinfo, p_edge, r_edge))
{
DBG("SKIP incompatible: %u %u\n", j, r_edge->clv_index);
++j;
continue;
}
Expand All @@ -749,6 +753,18 @@ static int best_reinsert_edge(pllmod_treeinfo_t * treeinfo,
retval = algo_utree_regraft(treeinfo, params, p_edge, r_edge);
assert(retval == PLL_SUCCESS);

#ifdef CONS_DEBUG
if (!pllmod_treeinfo_constraint_check_current(treeinfo))
{
pll_utree_show_ascii(treeinfo->root, PLL_UTREE_SHOW_LABEL | PLL_UTREE_SHOW_BRANCH_LENGTH |
PLL_UTREE_SHOW_CLV_INDEX );
printf("Constraint check failed after REGRAFT: %u %u\n", p_edge->clv_index, r_edge->clv_index);
pllmod_set_error(PLLMOD_TREE_ERROR_INVALID_TREE,
"Constraint check failed after applying SPR!");
return PLL_FAILURE;
}
#endif

/* place root at the pruning branch and invalidate CLV at the new root */
pllmod_treeinfo_set_root(treeinfo, p_edge);
pllmod_treeinfo_invalidate_clv(treeinfo, p_edge);
Expand Down Expand Up @@ -947,12 +963,14 @@ static double reinsert_nodes(pllmod_treeinfo_t * treeinfo, pll_unode_t ** nodes,
if (!retval)
return PLL_FAILURE;

#ifdef DEBUG
#ifdef CONS_DEBUG
if (!pllmod_treeinfo_constraint_check_current(treeinfo))
{
// pll_utree_show_ascii(treeinfo->root, PLL_UTREE_SHOW_LABEL | PLL_UTREE_SHOW_BRANCH_LENGTH |
// PLL_UTREE_SHOW_CLV_INDEX );
pll_utree_show_ascii(treeinfo->root, PLL_UTREE_SHOW_LABEL | PLL_UTREE_SHOW_BRANCH_LENGTH |
PLL_UTREE_SHOW_CLV_INDEX );
printf("Constraint check failed after applying SPR: %u %u\n", p_edge->clv_index, best_r_edge->clv_index);
pllmod_set_error(PLLMOD_TREE_ERROR_INVALID_TREE,
"Constraint check failed after applying SPR!");
return PLL_FAILURE;
}
#endif
Expand Down Expand Up @@ -1092,6 +1110,14 @@ PLL_EXPORT double pllmod_algo_spr_round(pllmod_treeinfo_t * treeinfo,
/* reset error */
pll_errno = 0;

/* make sure initial topology is compatible with constraint */
if (!pllmod_treeinfo_constraint_check_current(treeinfo))
{
pllmod_set_error(PLLMOD_TREE_ERROR_INVALID_TREE,
"Constraint check failed before SPR round!");
return PLL_FAILURE;
}

/* allocate brlen buffers */
for (i = 0; i < BRLEN_BUF_COUNT; ++i)
{
Expand Down Expand Up @@ -1155,6 +1181,14 @@ PLL_EXPORT double pllmod_algo_spr_round(pllmod_treeinfo_t * treeinfo,
goto error_exit;
}

/* make sure intermediate topology is compatible with constraint */
if (!pllmod_treeinfo_constraint_check_current(treeinfo))
{
pllmod_set_error(PLLMOD_TREE_ERROR_INVALID_TREE,
"Constraint check failed after reinsert_nodes() in SPR round!");
return PLL_FAILURE;
}

/* in FAST mode, we re-insert a subset of best-scoring subtrees with BLO
* (i.e., in SLOW mode) */
if (!params.thorough && bestnode_list->current > 0)
Expand Down
37 changes: 35 additions & 2 deletions src/tree/utree_constraint.c
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,36 @@

#include "../pllmod_common.h"

static inline unsigned int split_popcount(const pll_split_t bitv,
unsigned int bit_count,
unsigned int split_len)
{
unsigned int split_size = sizeof(pll_split_base_t) * 8;
unsigned int setb = 0;
unsigned int i;

if (!split_len)
split_len = bitv_length(bit_count);

for (i = 0; i < split_len; ++i)
{
setb += (unsigned int) PLL_POPCNT32(bitv[i]);
}

/* IMPORTANT: correct for padding bits in the last element! */
unsigned int split_offset = bit_count % split_size;
if (split_offset)
{
unsigned int mask = (1<<split_offset) - 1;
unsigned int last = bitv[split_len - 1];
/* count set bits in the padding part of the bit vector */
last &= ~mask;
setb -= (unsigned int) PLL_POPCNT32(last);
}

return setb;
}

static inline void invert_split(pll_split_t bitv, unsigned int bit_count)
{
unsigned int split_size = sizeof(pll_split_base_t) * 8;
Expand Down Expand Up @@ -385,7 +415,10 @@ PLL_EXPORT int pllmod_utree_constraint_check_spr(pll_split_set_t * cons_splits,
const pll_split_t prune_split = get_node_split(splits, p_edge->back);
pll_split_t new_split = (pll_split_t) calloc(1, cons_split_len * sizeof(pll_split_base_t));

if (bitv_popcount(prune_split, cons_tip_count, cons_split_len) < cons_tip_count-1)
unsigned int pruned_count = split_popcount(prune_split, cons_tip_count, cons_split_len);
assert(pruned_count <= cons_tip_count);

if (pruned_count < cons_tip_count-1)
{
/* remaining subtree contains at least 2 constrained taxa -> traverse into regraft subtree */
regraft_split = find_nonempty_regraft_split(splits, cons_split_len, prune_split, r_edge);
Expand All @@ -399,7 +432,7 @@ PLL_EXPORT int pllmod_utree_constraint_check_spr(pll_split_set_t * cons_splits,
{
/* remaining subtree contains just 1 constrained taxon -> traverse into pruned subtree */
copy_split(new_split, prune_split, cons_split_len);
invert_split(new_split, cons_split_len);
invert_split(new_split, cons_tip_count);

regraft_split = find_nonempty_regraft_split(splits, cons_split_len, new_split, p_edge);

Expand Down

0 comments on commit 623e364

Please sign in to comment.