void Basic_alignment::check_skipped_boundaries(Sequence *sequence) { vector<Site> *sites = sequence->get_sites(); // First, find 'Match/Skipped' and 'Skipped/Matched' boundaries and update the counts // for(unsigned int i=0;i<sites->size();i++) { Site *tsite = &sites->at(i); if( tsite->has_bwd_edge() ) { Edge *edge = tsite->get_first_bwd_edge(); while( tsite->has_next_bwd_edge() ) { Edge *another = tsite->get_next_bwd_edge(); if( another->get_start_site_index() > edge->get_start_site_index() ) edge = another; } Site *psite = &sites->at( edge->get_start_site_index() ); if( ( psite->get_path_state()==Site::matched || psite->get_path_state()==Site::start_site ) && ( tsite->get_path_state()==Site::xskipped || tsite->get_path_state()==Site::yskipped ) ) { edge->increase_branch_count_as_skipped_edge(); } } if( tsite->has_fwd_edge() ) { Edge *edge = tsite->get_first_fwd_edge(); while( tsite->has_next_fwd_edge() ) { Edge *another = tsite->get_next_fwd_edge(); if( another->get_start_site_index() < edge->get_start_site_index() ) edge = another; } Site *nsite = &sites->at( edge->get_end_site_index() ); if( ( tsite->get_path_state()==Site::xskipped || tsite->get_path_state()==Site::yskipped ) && ( nsite->get_path_state()==Site::matched || nsite->get_path_state()==Site::ends_site ) ) { edge->increase_branch_count_as_skipped_edge(); } } } // Then, see if any pair of boundaries (covering a skipped gap) is above the limit. Delete the range. // bool non_skipped = true; int skip_start = -1; for(unsigned int i=1;i<sites->size();i++) { Site *tsite = &sites->at(i); int tstate = tsite->get_path_state(); if( non_skipped && ( tstate == Site::xskipped || tstate == Site::yskipped ) ) { if( tsite->has_bwd_edge() ) { Edge *edge = tsite->get_first_bwd_edge(); while( tsite->has_next_bwd_edge() ) { Edge *another = tsite->get_next_bwd_edge(); if( another->get_start_site_index() > edge->get_start_site_index() ) edge = another; } if(edge->get_branch_count_as_skipped_edge()>max_allowed_match_skip_branches) { skip_start = i; } } non_skipped = false; } if(!non_skipped && skip_start>=0 && tstate == Site::matched) { int edge_ind = -1; if( tsite->has_bwd_edge() ) { Edge *edge = tsite->get_first_bwd_edge(); if(edge->get_branch_count_as_skipped_edge()>max_allowed_match_skip_branches) edge_ind = edge->get_index(); while( tsite->has_next_bwd_edge() ) { edge = tsite->get_next_bwd_edge(); if(edge->get_branch_count_as_skipped_edge()>max_allowed_match_skip_branches) edge_ind = edge->get_index(); } } if(edge_ind>=0) { Log_output::write_out("Basic_alignemnmt: delete range: "+Log_output::itos(edge_ind)+" "+Log_output::itos(skip_start)+" "+Log_output::itos(i)+"\n",4); this->delete_edge_range(sequence,edge_ind,skip_start); } non_skipped = true; skip_start = -1; } if(tstate == Site::xgapped || tstate == Site::ygapped || tstate == Site::matched) { non_skipped = true; skip_start = -1; } } }
void Basic_alignment::create_ancestral_edges(Sequence *sequence) { vector<Site> *sites = sequence->get_sites(); vector<int> left_child_index; vector<int> right_child_index; // First create an index for the child's sites in the parent // for(unsigned int i=0;i<sites->size();i++) { Site_children *offspring = sites->at(i).get_children(); Site *lsite; Site *rsite; if(offspring->left_index>=0) { lsite = left->get_site_at(offspring->left_index); left_child_index.push_back(i); } if(offspring->right_index>=0) { rsite = right->get_site_at(offspring->right_index); right_child_index.push_back(i); } } if(Settings::noise>4) { stringstream ss; ss<<"Child sequence site indeces:"<<endl; for(unsigned int i=0;i<left_child_index.size();i++) ss<<left_child_index.at(i)<<" "; ss<<endl; for(unsigned int i=0;i<right_child_index.size();i++) ss<<right_child_index.at(i)<<" "; ss<<endl; Log_output::write_out(ss.str(),5); } // Then copy the edges of child sequences in their parent. // Additionally, create edges for cases where skipped gap is flanked by a new gap. // Edge_history prev(-1,-1); for(unsigned int i=1;i<sites->size();i++) { Site *psite = &sites->at(i); int pstate = psite->get_path_state(); Site_children *offspring = psite->get_children(); // left sequence is matched if(offspring->left_index>=0) { Site *tsite = left->get_site_at(offspring->left_index); if( tsite->has_bwd_edge() ) { Edge *child = tsite->get_first_bwd_edge(); this->transfer_child_edge(sequence, child, &left_child_index, left_branch_length ); while( tsite->has_next_bwd_edge() ) { child = tsite->get_next_bwd_edge(); this->transfer_child_edge(sequence, child, &left_child_index, left_branch_length ); } } // these create edges to/from skipped sites flanked by gaps. if( (pstate == Site::matched || pstate == Site::ends_site ) && prev.left_skip_site_index >= 0 && edges_for_skipped_flanked_by_gaps) { // edge from the skipped site to the *next* site // as no better info is available, *this* edge is "copied" to one coming to the current site Edge query(prev.left_skip_site_index,prev.left_skip_site_index+1); int ind = left->get_fwd_edge_index_at_site(prev.left_skip_site_index,&query); if(ind>=0) { Edge *child = &left->get_edges()->at(ind); Edge edge( left_child_index.at(prev.left_skip_site_index), i ); this->transfer_child_edge(sequence, edge, child, left_branch_length ); } prev.left_skip_site_index = -1; } else if(pstate == Site::xskipped && ( prev.path_state == Site::xgapped || prev.path_state == Site::ygapped ) && edges_for_skipped_flanked_by_gaps) { // the same here: use this as a template for an extra edge Edge query(offspring->left_index-1, offspring->left_index); int ind = left->get_bwd_edge_index_at_site(offspring->left_index,&query); if(ind>=0) { Edge *child = &left->get_edges()->at(ind); Edge edge( prev.match_site_index, i ); this->transfer_child_edge(sequence, edge, child, left_branch_length ); } } if((pstate == Site::xgapped || pstate == Site::xskipped )&& (prev.path_state == Site::ygapped || prev.path_state == Site::yskipped )) { Edge edge(i-1,i,1.0); sequence->push_back_edge(edge); sequence->get_site_at( edge.get_start_site_index() )->add_new_fwd_edge_index( sequence->get_current_edge_index() ); sequence->get_site_at( edge.get_end_site_index() )->add_new_bwd_edge_index( sequence->get_current_edge_index() ); } if(pstate == Site::xskipped) prev.left_skip_site_index = offspring->left_index; else prev.left_real_site_index = offspring->left_index; if(pstate == Site::matched) prev.match_site_index = i; } if(offspring->right_index>=0) { Site *tsite = right->get_site_at(offspring->right_index); if( tsite->has_bwd_edge() ) { Edge *child = tsite->get_first_bwd_edge(); this->transfer_child_edge(sequence, child, &right_child_index, right_branch_length ); while( tsite->has_next_bwd_edge() ) { child = tsite->get_next_bwd_edge(); this->transfer_child_edge(sequence, child, &right_child_index, right_branch_length ); } } if( (pstate == Site::matched || pstate == Site::ends_site ) && prev.right_skip_site_index >= 0 && edges_for_skipped_flanked_by_gaps) { Edge query(prev.right_skip_site_index,prev.right_skip_site_index+1); int ind = right->get_fwd_edge_index_at_site(prev.right_skip_site_index,&query); if(ind>=0) { Edge *child = &right->get_edges()->at(ind); Edge edge( right_child_index.at(prev.right_skip_site_index), i ); this->transfer_child_edge(sequence,edge, child, right_branch_length ); } prev.right_skip_site_index = -1; } else if(pstate == Site::yskipped && ( prev.path_state == Site::xgapped || prev.path_state == Site::ygapped ) && edges_for_skipped_flanked_by_gaps) { Edge query(offspring->right_index-1, offspring->right_index); int ind = right->get_bwd_edge_index_at_site(offspring->right_index,&query); if(ind>=0) { Edge *child = &right->get_edges()->at(ind); Edge edge( prev.match_site_index, i ); this->transfer_child_edge(sequence, edge, child, right_branch_length ); } } if((pstate == Site::ygapped || pstate == Site::yskipped) && (prev.path_state == Site::xgapped || prev.path_state == Site::xskipped)) { Edge edge(i-1,i,1.0); sequence->push_back_edge(edge); sequence->get_site_at( edge.get_start_site_index() )->add_new_fwd_edge_index( sequence->get_current_edge_index() ); sequence->get_site_at( edge.get_end_site_index() )->add_new_bwd_edge_index( sequence->get_current_edge_index() ); } if(pstate == Site::yskipped) prev.right_skip_site_index = offspring->right_index; else prev.right_real_site_index = offspring->right_index; } prev.path_state = pstate; } }