Ejemplo n.º 1
0
// Simulate single-end sequencing from a fragment.
void SequencingSimulator::_simulateBSTreatment(seqan::Dna5String & methFragment,
                                               TFragment const & frag,
                                               MethylationLevels const & levels,
                                               bool reverse)
{
    methFragment = frag;
    for (unsigned pos = 0; pos != length(frag); ++pos)
    {
        double level = reverse ? levels.levelR(pos + beginPosition(frag)) : levels.levelF(pos + beginPosition(frag));
        if ((!reverse && methFragment[pos] != 'C') || (reverse && methFragment[pos] != 'G'))  // Skip all non-cyteline chars
        {
            SEQAN_ASSERT_EQ_MSG(level, 0.0,
                                "Methylation for non-C should be 0 (pos+beginPosition(frag)=%d, reverse=%u",
                                pos + beginPosition(frag), reverse);
            continue;
        }

        // Decide whether methFragment[pos] is methylated.  If this is the case then we leave it untouched.
        seqan::Pdf<seqan::Uniform<double> > pdf(0, 1);
        if (pickRandomNumber(methRng, pdf) < level)
            continue;

        // Otherwise, pick whether we will convert.
        if (pickRandomNumber(methRng, pdf) < seqOptions->bsSeqOptions.bsConversionRate)
            methFragment[pos] = reverse ? 'A' : 'T';
    }
}
Ejemplo n.º 2
0
void check_cutted_frags(CharString frag, std::vector<table_entry*> &links, 
                        map<unsigned long long, string> &chains, unsigned int min_length){
    if(length(frag) > min_length){
	
        std::queue<int> l_link;
        std::queue<int> r_link;
        Pattern<CharString, ShiftOr > pattern(frag);
        for(unsigned int i=0; i<links.size(); ++i){
            CharString text = links[i]->get_short_read()->get_RNA_seq_sequence();
            Finder<CharString> finder(text);
            find(finder,pattern);
            if(beginPosition(finder) < min_length){
                //std::cout << "L link " << i << ::std::endl;
                l_link.push(i);
            }
            if(endPosition(finder) > length(text) - min_length){
                //std::cout << "R link" << ::std::endl;
                r_link.push(i);
            }
        }
        
        if(l_link.size() != 0 && r_link.size() != 0){
            string head;
            assign(head,frag);
            for(unsigned int z=0; z<min_length*2 - length(frag);++z){
                head.append("A");
            }
	    if(chains.find(fingerprint(head)) == chains.end()){
            	chains[fingerprint(head)] = toCString(frag);
		//std::cerr << "CUT: " << frag << " " << length(frag) << std::endl;
	    }else{
		//std::cerr << "Problem:" << std::endl;
		//std::cerr << chains[fingerprint(head)] << std::endl;
		//std::cerr << toCString(frag) << std::endl;
	    }            
            //::std::cout << toCString(frag) << ::std::endl;
            while(!l_link.empty()){
                links[l_link.front()]->push_D_link(fingerprint(head));
                l_link.pop();
            }
            while(!r_link.empty()){
                links[r_link.front()]->push_A_link(fingerprint(head));
                r_link.pop();
            }
        }        
    }
}
Ejemplo n.º 3
0
 iterator& begin() const { 
   iterator it(this, beginPosition());
   itbegin = it;
   return itbegin;
 }
Ejemplo n.º 4
0
void check_overlapping_nodes(::std::vector<table_entry*> & links, map<unsigned long long, string> & chains, int len,
                             ::std::map<unsigned long long, unsigned long long>& mapping, unsigned int min_overlap,
                             int ov_perc){
    ::std::map<unsigned long long, string>::iterator chain_it;
    ::std::map<unsigned long long, string>::iterator chain_it_2;
    ::std::vector<small_frag> short_blocks;
    stack<unsigned int> s;
    queue<unsigned long long> q;
    for(chain_it = chains.begin(); chain_it != chains.end(); ++chain_it){
        for(chain_it_2 = chains.begin(); chain_it_2 != chains.end(); ++chain_it_2){
            unsigned int ov = overlappedStringLength(chain_it->second,chain_it_2->second);
            if(chain_it != chain_it_2 && ov < (ov_perc*chain_it->second.length())/100 &&
               (ov_perc*ov < chain_it_2->second.length())/100 && ov > min_overlap){
                bool new_node = false;
                CharString pat_text=prefix(chain_it_2->second,ov);
                //::std::cout << chain_it->second << ::std::endl;
                //::std::cout << chain_it_2->second << ::std::endl;
                //::std::cout << ov << ::std::endl;
                Pattern<CharString, ShiftAnd> pattern(pat_text);
                for(unsigned int i=0; i<links.size();++i){
                    CharString link_read = links[i]->get_short_read()->get_RNA_seq_sequence();
                    Finder<CharString> finder(link_read);
                    if(find(finder,pattern) && (
                       prefix(link_read,beginPosition(finder)) == infix(chain_it->second,chain_it->second.length()-ov-beginPosition(finder),chain_it->second.length()-ov) ||
                       suffix(link_read,length(link_read) - endPosition(finder)) == infix(chain_it_2->second,ov,ov+endPosition(finder)))){
                        //::std::cout << link_read << ::std::endl;
                        //::std::cout << prefix(link_read,beginPosition(finder)) << ::std::endl;
                        //::std::cout << infix(chain_it->second,chain_it->second.length()-ov-beginPosition(finder),chain_it->second.length()-ov) << ::std::endl;
                        //::std::cout << suffix(link_read,length(link_read) - endPosition(finder)) << ::std::endl;
                        //::std::cout << infix(chain_it_2->second,ov,ov+endPosition(finder)) << ::std::endl;
                      
                        new_node = true;
                    }
                }
                if(new_node){
                    small_frag f;
                    f.frag_links.D_chain = chain_it->first;
                    f.frag_links.A_chain = chain_it_2->first;
                    f.frag = prefix(chain_it_2->second,ov);
                    short_blocks.push_back(f);
                }
            }else{
                if(chain_it != chain_it_2 && ov>=(ov_perc*chain_it->second.length())/100){
                    //::std::cout << "Chain_it sub-node of Chain_it_2" << ::std::endl;
                    //::std::cout << "Chain_it " << chain_it->second << ::std::endl;
                    //::std::cout << "Chain_it_2 " << chain_it_2->second << ::std::endl;
                    //::std::cout << ov << ::std::endl;
                    q.push(chain_it->first);
                }else{
                    if(chain_it != chain_it_2 && ov>=(ov_perc*chain_it_2->second.length())/100){
                        //::std::cout << "Chain_it_2 sub-node of Chain_it" << ::std::endl;
                        //::std::cout << "Chain_it " << chain_it->second << ::std::endl;
                        //::std::cout << "Chain_it_2 " <<chain_it_2->second << ::std::endl;
                        //::std::cout << ov << ::std::endl;
                        q.push(chain_it_2->first);
                    }
                }
            }
        }
    }

    for(unsigned int i=0; i<short_blocks.size(); ++i){
        bool sub_seq = false;
        for(unsigned int k=0; k<short_blocks.size(); ++k){
            if(short_blocks[i].frag == short_blocks[k].frag && i<k){
                links_pair erased_links;
                erased_links.D_chain = short_blocks[i].frag_links.D_chain;
                erased_links.A_chain = short_blocks[i].frag_links.A_chain;
                short_blocks[k].other_links.push_back(erased_links);
                sub_seq = true;
            }
            if(i!=k && (::seqan::length(short_blocks[i].frag)) < (::seqan::length(short_blocks[k].frag))){
                Finder<CharString> finder(short_blocks[k].frag);
                Pattern<CharString, ShiftAnd> pattern(short_blocks[i].frag);
                if(find(finder,pattern)){
                    links_pair erased_links;
                    erased_links.D_chain = short_blocks[i].frag_links.D_chain;
                    erased_links.A_chain = short_blocks[i].frag_links.A_chain;
                    //::std::cout << i << k << " - " << beginPosition(finder) << " " << endPosition(finder) << ::std::endl;
                    short_blocks[k].other_links.push_back(erased_links);
                    sub_seq = true;
                }
            }
        }
        if(sub_seq){
            s.push(i);
        }
    }

    while(!s.empty()){
        short_blocks.erase(short_blocks.begin()+s.top());
        s.pop();
    }
    while(!q.empty()){
        chains.erase(q.front());
        q.pop();
    }

    for(unsigned int i=0; i<short_blocks.size(); ++i){
        //::std::cout << short_blocks[i].frag << " " << length(short_blocks[i].frag) << ::std::endl; 
        string ch = "";
        for(unsigned int z = 0; z<len-length(short_blocks[i].frag); ++z){
            ch.append("A");
        }
        ch.append(toCString(short_blocks[i].frag));
        //if(chains.find(fingerprint(ch)) == chains.end()){//Start_If_5
            //chains[fingerprint(ch)] = ::seqan::toCString(short_blocks[i].frag);
            //::std::cout << ::seqan::toCString(short_blocks[i].frag) <<" "<< length(short_blocks[i].frag)<<::std::endl;
            //mapping[fingerprint(ch)] = fingerprint(ch);
            //Add the first link
            string first_half;
            assign(first_half,prefix(chains[short_blocks[i].frag_links.D_chain],len));
            string new_link_1 = first_half;
            new_link_1.append(ch);
            table_entry* link_1 = new table_entry(new_link_1,fingerprint(first_half),fingerprint(ch));
            link_1->push_D_link(short_blocks[i].frag_links.D_chain);
            link_1->push_A_link(short_blocks[i].frag_links.A_chain);
            links.push_back(link_1);
            /*
            //Add the second link
            string second_half;
            assign(second_half,prefix(chains[short_blocks[i].frag_links.A_chain],len));
            string new_link_2 = ch;
            new_link_2.append(second_half);
            table_entry* link_2 = new table_entry(new_link_2,fingerprint(ch),fingerprint(second_half));
            link_2->push_D_link(short_blocks[i].frag_links.D_chain);
            link_2->push_A_link(short_blocks[i].frag_links.A_chain);
            links.push_back(link_2);
            */
            //::std::cout<<links[short_blocks[i].frag_links.D_chain]->get_short_read()->get_RNA_seq_sequence()<<::std::endl;
            //::std::cout<<links[short_blocks[i].frag_links.A_chain]->get_short_read()->get_RNA_seq_sequence()<<::std::endl;

            for(unsigned int j=0; j<short_blocks[i].other_links.size(); ++j){//Start_For_6
                string second_half;
                assign(first_half,prefix(chains[short_blocks[i].other_links[j].D_chain],len));
                string new_link_2 = second_half;
                new_link_2.append(ch);
                table_entry* link_2 = new table_entry(new_link_2,fingerprint(second_half),fingerprint(ch));
                link_2->push_D_link(short_blocks[i].other_links[j].D_chain);
                link_2->push_A_link(short_blocks[i].other_links[j].A_chain);
                links.push_back(link_1);
            }//End_For_6
            //}//End_If_5
    }
}
Ejemplo n.º 5
0
void linking_refinement(::std::vector<table_entry*> & links, map<unsigned long long, string> & chains, unsigned int len,
                        ::std::map<unsigned long long, unsigned long long> & mapping){
    for(unsigned int i=0; i<links.size(); ++i){
        //Linkato solo a dx
        if(links[i]->size_D_link() == 0 && links[i]->size_A_link() != 0){
            //::std::cout << "D link" << ::std::endl;
            CharString p = ::seqan::prefix(links[i]->get_short_read()->get_RNA_seq_sequence(),len);
            Pattern<CharString, ShiftOr > pattern(p);
            ::std::map<unsigned long long, string>::iterator chain_it;
            ::std::set<unsigned long long> modif_chains;
            for(chain_it = chains.begin(); chain_it != chains.end(); ++chain_it){ 
                
                CharString text = chain_it->second;
                Finder<CharString> finder(text);
                
                if(modif_chains.find(chain_it->first) == modif_chains.end() && find(finder,pattern)){
                    links[i]->push_D_link(chain_it->first);
                    if(chain_it->second.length()- endPosition(finder) > len){
                        //::std::cout << "D " << (i+1) << " " << beginPosition(finder) << ::std::endl;
                        CharString pre = ::seqan::prefix(chain_it->second, beginPosition(finder) + len);
                        string str_pre = ::seqan::toCString(pre);
                        CharString suf = ::seqan::suffix(chain_it->second, beginPosition(finder) + len);
                        string str_suf = ::seqan::toCString(suf);
                        //::std::cout << chain_it->second << " - " << chain_it->second.length() << ::std::endl;
                        //Sono sicuro che sia > len dato che la estraggo da un prefisso
                        //di lunghezza len...
                        chains[chain_it->first] = str_pre;
                        //::std::cout << str_pre << " - " << str_pre.length() << ::std::endl;
                        modif_chains.insert(chain_it->first);
                        //...ma il suffissopotrebbe essere piu' corto di len
                        string head;
                        if(str_suf.length() >= len){
                            head = ::seqan::toCString(::seqan::prefix(suf,len));
                            chains[fingerprint(head)] = str_suf;
                            mapping[fingerprint(head)] = fingerprint(head);
                        }else{
                            head = str_suf;
                            for(unsigned int z=0; z<len-str_suf.length();++z){
                                head.append("A");
                            }
                            chains[fingerprint(head)] = str_suf;
                            mapping[fingerprint(head)] = fingerprint(head);
                        }
                        //::std::cout << str_suf << " - " << str_suf.length() << ::std::endl << ::std::endl;
                        modif_chains.insert(fingerprint(head));
                        for(unsigned int z=0; z<links.size();++z){
                            for(int k=0; k<links[z]->size_D_link();++k){
                                if(links[z]->at_D_link(k) == chain_it->first){
                                    links[z]->at_D_link(k) = fingerprint(head);
                                }
                            }
                        }
                        //Aggiungere un link tra le due catene create
                        CharString l_part = chains[chain_it->first];
                        string new_link = ::seqan::toCString(::seqan::suffix(l_part,length(l_part) - len));
                        unsigned long long f_l = fingerprint(new_link);
                        new_link.append(head);
                        table_entry* t_new = new table_entry(new_link,f_l,fingerprint(head));
                        t_new->push_D_link(chain_it->first);
                        t_new->push_A_link(fingerprint(head));
                        links.push_back(t_new);
                    }
                }
            }
        }
        
        //Linkato solo a sx
        if(links[i]->size_A_link() == 0 && links[i]->size_D_link() != 0){
            //::std::cout << "A link" << ::std::endl;
            CharString p = ::seqan::suffix(links[i]->get_short_read()->get_RNA_seq_sequence(),len);
            Pattern<CharString, ShiftOr > pattern(p);
            ::std::map<unsigned long long, string>::iterator chain_it;
            ::std::set<unsigned long long> modif_chains;
            for(chain_it = chains.begin(); chain_it != chains.end(); ++chain_it){ 
                CharString text = chain_it->second;
                Finder<CharString> finder(text);
            
                if(modif_chains.find(chain_it->first) == modif_chains.end() && find(finder,pattern)){
                //if(find(finder,pattern)){
                    //::std::cout << "1 - if " << beginPosition(finder) << " " << endPosition(finder) << ::std::endl;
                    if(beginPosition(finder) == 0){
                        links[i]->push_A_link(chain_it->first);
                    }
                    if(endPosition(finder) > len){
                        //::std::cout << "A " << (i+1) << " " << beginPosition(finder) << ::std::endl;
                        CharString pre = ::seqan::prefix(chain_it->second, beginPosition(finder) + len);
                        string str_pre = ::seqan::toCString(pre);
                        CharString suf = ::seqan::suffix(chain_it->second, beginPosition(finder) + len);
                        string str_suf = ::seqan::toCString(suf);
                        chains[chain_it->first] = str_pre;
                        //::std::cout << str_pre << " - " << str_pre.length() << ::std::endl;
                        modif_chains.insert(chain_it->first);
                        string head;
                        if(str_suf.length() >= len){
                            head = ::seqan::toCString(::seqan::prefix(suf,len));
                            chains[fingerprint(head)] = str_suf;
                            mapping[fingerprint(head)] = fingerprint(head);
                        }else{
                            head = str_suf;
                            for(unsigned int z=0; z<len-str_suf.length();++z){
                                head.append("A");
                            }
                            chains[fingerprint(head)] = str_suf;
                            mapping[fingerprint(head)] = fingerprint(head);
                        }
                        //::std::cout << str_suf << " - " << str_suf.length() << ::std::endl << ::std::endl;
                        modif_chains.insert(fingerprint(head));
                        for(unsigned int z=0; z<links.size();++z){
                            for(int k=0; k<links[z]->size_D_link();++k){
                                if(links[z]->at_D_link(k) == chain_it->first){
                                    links[z]->at_D_link(k) = fingerprint(head);
                                }
                            }
                        }
                        //Aggiungere un link tra le due catene create
                        CharString l_part = chains[chain_it->first];
                        string new_link = ::seqan::toCString(::seqan::suffix(l_part,length(l_part) - len));
                        unsigned long long f_l = fingerprint(new_link);
                        new_link.append(head);
                        table_entry* t_new = new table_entry(new_link,f_l,fingerprint(head));
                        t_new->push_D_link(chain_it->first);
                        t_new->push_A_link(fingerprint(head));
                        links.push_back(t_new);

                        links[i]->push_A_link(fingerprint(head));
                    }
                }
            }
        }
    }
    //::std::cout << chains.size() << ::std::endl;
}