Exemplo n.º 1
0
void Basic_alignment::check_skipped_boundaries(Sequence *sequence)
{

    vector<Site> *sites = sequence->get_sites();

    // First, find 'Match/Skipped' and 'Skipped/Matched' boundaries and update the counts
    //
    for(unsigned int i=0;i<sites->size();i++)
    {
        Site *tsite = &sites->at(i);

        if( tsite->has_bwd_edge() )
        {
            Edge *edge = tsite->get_first_bwd_edge();

            while( tsite->has_next_bwd_edge() )
            {
                Edge *another = tsite->get_next_bwd_edge();
                if( another->get_start_site_index() > edge->get_start_site_index() )
                    edge = another;
            }

            Site *psite = &sites->at( edge->get_start_site_index() );

            if( ( psite->get_path_state()==Site::matched || psite->get_path_state()==Site::start_site )
                && ( tsite->get_path_state()==Site::xskipped || tsite->get_path_state()==Site::yskipped ) )
            {
                edge->increase_branch_count_as_skipped_edge();
            }
        }

        if( tsite->has_fwd_edge() )
        {
            Edge *edge = tsite->get_first_fwd_edge();

            while( tsite->has_next_fwd_edge() )
            {
                Edge *another = tsite->get_next_fwd_edge();
                if( another->get_start_site_index() < edge->get_start_site_index() )
                    edge = another;
            }

            Site *nsite = &sites->at( edge->get_end_site_index() );

            if( ( tsite->get_path_state()==Site::xskipped || tsite->get_path_state()==Site::yskipped )
                && ( nsite->get_path_state()==Site::matched || nsite->get_path_state()==Site::ends_site ) )
            {
                edge->increase_branch_count_as_skipped_edge();
            }
        }
    }

    // Then, see if any pair of boundaries (covering a skipped gap) is above the limit. Delete the range.
    //
    bool non_skipped = true;
    int skip_start = -1;
    for(unsigned int i=1;i<sites->size();i++)
    {
        Site *tsite = &sites->at(i);
        int tstate = tsite->get_path_state();

        if( non_skipped && ( tstate == Site::xskipped || tstate == Site::yskipped ) )
        {

            if( tsite->has_bwd_edge() )
            {
                Edge *edge = tsite->get_first_bwd_edge();
                while( tsite->has_next_bwd_edge() )
                {
                    Edge *another = tsite->get_next_bwd_edge();
                    if( another->get_start_site_index() > edge->get_start_site_index() )
                        edge = another;
                }
                if(edge->get_branch_count_as_skipped_edge()>max_allowed_match_skip_branches)
                {
                    skip_start = i;
                }
            }

            non_skipped = false;
        }

        if(!non_skipped && skip_start>=0 && tstate == Site::matched)
        {

            int edge_ind = -1;
            if( tsite->has_bwd_edge() )
            {
                Edge *edge = tsite->get_first_bwd_edge();

                if(edge->get_branch_count_as_skipped_edge()>max_allowed_match_skip_branches)
                    edge_ind = edge->get_index();

                while( tsite->has_next_bwd_edge() )
                {
                    edge = tsite->get_next_bwd_edge();

                    if(edge->get_branch_count_as_skipped_edge()>max_allowed_match_skip_branches)
                        edge_ind = edge->get_index();

                }
            }

            if(edge_ind>=0)
            {
                Log_output::write_out("Basic_alignemnmt: delete range: "+Log_output::itos(edge_ind)+" "+Log_output::itos(skip_start)+" "+Log_output::itos(i)+"\n",4);
                this->delete_edge_range(sequence,edge_ind,skip_start);
            }

            non_skipped = true;
            skip_start = -1;
        }

        if(tstate == Site::xgapped || tstate == Site::ygapped || tstate == Site::matched)
        {
            non_skipped = true;
            skip_start = -1;
        }
    }
}
Exemplo n.º 2
0
void Basic_alignment::create_ancestral_edges(Sequence *sequence)
{

    vector<Site> *sites = sequence->get_sites();

    vector<int> left_child_index;
    vector<int> right_child_index;

    // First create an index for the child's sites in the parent
    //
    for(unsigned int i=0;i<sites->size();i++)
    {
        Site_children *offspring = sites->at(i).get_children();

        Site *lsite;
        Site *rsite;

        if(offspring->left_index>=0)
        {
            lsite = left->get_site_at(offspring->left_index);
            left_child_index.push_back(i);
        }
        if(offspring->right_index>=0)
        {
            rsite = right->get_site_at(offspring->right_index);
            right_child_index.push_back(i);
        }
    }

    if(Settings::noise>4)
    {
        stringstream ss;
        ss<<"Child sequence site indeces:"<<endl;
        for(unsigned int i=0;i<left_child_index.size();i++)
            ss<<left_child_index.at(i)<<" ";

        ss<<endl;
        for(unsigned int i=0;i<right_child_index.size();i++)
            ss<<right_child_index.at(i)<<" ";
        ss<<endl;

        Log_output::write_out(ss.str(),5);
    }

    // Then copy the edges of child sequences in their parent.
    // Additionally, create edges for cases where skipped gap is flanked by a new gap.
    //
    Edge_history prev(-1,-1);

    for(unsigned int i=1;i<sites->size();i++)
    {
        Site *psite = &sites->at(i);
        int pstate = psite->get_path_state();


        Site_children *offspring = psite->get_children();

        // left sequence is matched
        if(offspring->left_index>=0)
        {
            Site *tsite = left->get_site_at(offspring->left_index);

            if( tsite->has_bwd_edge() )
            {
                Edge *child = tsite->get_first_bwd_edge();

                this->transfer_child_edge(sequence, child, &left_child_index, left_branch_length );

                while( tsite->has_next_bwd_edge() )
                {
                    child = tsite->get_next_bwd_edge();
                    this->transfer_child_edge(sequence, child, &left_child_index, left_branch_length );
                }
            }

            // these create edges to/from skipped sites flanked by gaps.
            if( (pstate == Site::matched || pstate == Site::ends_site ) && prev.left_skip_site_index >= 0 && edges_for_skipped_flanked_by_gaps)
            {
                // edge from the skipped site to the *next* site
                // as no better info is available, *this* edge is "copied" to one coming to the current site
                Edge query(prev.left_skip_site_index,prev.left_skip_site_index+1);
                int ind = left->get_fwd_edge_index_at_site(prev.left_skip_site_index,&query);

                if(ind>=0)
                {
                    Edge *child = &left->get_edges()->at(ind);
                    Edge edge( left_child_index.at(prev.left_skip_site_index), i );
                    this->transfer_child_edge(sequence, edge, child, left_branch_length );
                }

                prev.left_skip_site_index = -1;
            }
            else if(pstate == Site::xskipped && ( prev.path_state == Site::xgapped || prev.path_state == Site::ygapped ) && edges_for_skipped_flanked_by_gaps)
            {
                // the same here: use this as a template for an extra edge
                Edge query(offspring->left_index-1, offspring->left_index);
                int ind = left->get_bwd_edge_index_at_site(offspring->left_index,&query);

                if(ind>=0)
                {
                    Edge *child = &left->get_edges()->at(ind);
                    Edge edge( prev.match_site_index, i );
                    this->transfer_child_edge(sequence, edge, child, left_branch_length );
                }

            }

            if((pstate == Site::xgapped || pstate == Site::xskipped )&& (prev.path_state == Site::ygapped || prev.path_state == Site::yskipped ))
            {

                Edge edge(i-1,i,1.0);
                sequence->push_back_edge(edge);

                sequence->get_site_at( edge.get_start_site_index() )->add_new_fwd_edge_index( sequence->get_current_edge_index() );
                sequence->get_site_at( edge.get_end_site_index()   )->add_new_bwd_edge_index( sequence->get_current_edge_index() );

            }

            if(pstate == Site::xskipped)
                prev.left_skip_site_index = offspring->left_index;
            else
                prev.left_real_site_index = offspring->left_index;

            if(pstate == Site::matched)
                prev.match_site_index = i;
        }

        if(offspring->right_index>=0)
        {
            Site *tsite = right->get_site_at(offspring->right_index);

            if( tsite->has_bwd_edge() )
            {
                Edge *child = tsite->get_first_bwd_edge();
                this->transfer_child_edge(sequence, child, &right_child_index, right_branch_length );

                while( tsite->has_next_bwd_edge() )
                {
                    child = tsite->get_next_bwd_edge();
                    this->transfer_child_edge(sequence, child, &right_child_index, right_branch_length );
                }
            }

            if( (pstate == Site::matched || pstate == Site::ends_site ) && prev.right_skip_site_index >= 0 && edges_for_skipped_flanked_by_gaps)
            {
                Edge query(prev.right_skip_site_index,prev.right_skip_site_index+1);
                int ind = right->get_fwd_edge_index_at_site(prev.right_skip_site_index,&query);

                if(ind>=0)
                {
                    Edge *child = &right->get_edges()->at(ind);
                    Edge edge( right_child_index.at(prev.right_skip_site_index), i );
                    this->transfer_child_edge(sequence,edge, child, right_branch_length );
                }

                prev.right_skip_site_index = -1;
            }
            else if(pstate == Site::yskipped && ( prev.path_state == Site::xgapped || prev.path_state == Site::ygapped ) && edges_for_skipped_flanked_by_gaps)
            {
                Edge query(offspring->right_index-1, offspring->right_index);
                int ind = right->get_bwd_edge_index_at_site(offspring->right_index,&query);

                if(ind>=0)
                {
                    Edge *child = &right->get_edges()->at(ind);
                    Edge edge( prev.match_site_index, i );
                    this->transfer_child_edge(sequence, edge, child, right_branch_length );
                }

            }

            if((pstate == Site::ygapped || pstate == Site::yskipped) && (prev.path_state == Site::xgapped || prev.path_state == Site::xskipped))
            {

                Edge edge(i-1,i,1.0);
                sequence->push_back_edge(edge);

                sequence->get_site_at( edge.get_start_site_index() )->add_new_fwd_edge_index( sequence->get_current_edge_index() );
                sequence->get_site_at( edge.get_end_site_index()   )->add_new_bwd_edge_index( sequence->get_current_edge_index() );
            }

            if(pstate == Site::yskipped)
                prev.right_skip_site_index = offspring->right_index;
            else
                prev.right_real_site_index = offspring->right_index;

        }
        prev.path_state = pstate;
    }
}