// Simulate single-end sequencing from a fragment. void SequencingSimulator::_simulateBSTreatment(seqan::Dna5String & methFragment, TFragment const & frag, MethylationLevels const & levels, bool reverse) { methFragment = frag; for (unsigned pos = 0; pos != length(frag); ++pos) { double level = reverse ? levels.levelR(pos + beginPosition(frag)) : levels.levelF(pos + beginPosition(frag)); if ((!reverse && methFragment[pos] != 'C') || (reverse && methFragment[pos] != 'G')) // Skip all non-cyteline chars { SEQAN_ASSERT_EQ_MSG(level, 0.0, "Methylation for non-C should be 0 (pos+beginPosition(frag)=%d, reverse=%u", pos + beginPosition(frag), reverse); continue; } // Decide whether methFragment[pos] is methylated. If this is the case then we leave it untouched. seqan::Pdf<seqan::Uniform<double> > pdf(0, 1); if (pickRandomNumber(methRng, pdf) < level) continue; // Otherwise, pick whether we will convert. if (pickRandomNumber(methRng, pdf) < seqOptions->bsSeqOptions.bsConversionRate) methFragment[pos] = reverse ? 'A' : 'T'; } }
void check_cutted_frags(CharString frag, std::vector<table_entry*> &links, map<unsigned long long, string> &chains, unsigned int min_length){ if(length(frag) > min_length){ std::queue<int> l_link; std::queue<int> r_link; Pattern<CharString, ShiftOr > pattern(frag); for(unsigned int i=0; i<links.size(); ++i){ CharString text = links[i]->get_short_read()->get_RNA_seq_sequence(); Finder<CharString> finder(text); find(finder,pattern); if(beginPosition(finder) < min_length){ //std::cout << "L link " << i << ::std::endl; l_link.push(i); } if(endPosition(finder) > length(text) - min_length){ //std::cout << "R link" << ::std::endl; r_link.push(i); } } if(l_link.size() != 0 && r_link.size() != 0){ string head; assign(head,frag); for(unsigned int z=0; z<min_length*2 - length(frag);++z){ head.append("A"); } if(chains.find(fingerprint(head)) == chains.end()){ chains[fingerprint(head)] = toCString(frag); //std::cerr << "CUT: " << frag << " " << length(frag) << std::endl; }else{ //std::cerr << "Problem:" << std::endl; //std::cerr << chains[fingerprint(head)] << std::endl; //std::cerr << toCString(frag) << std::endl; } //::std::cout << toCString(frag) << ::std::endl; while(!l_link.empty()){ links[l_link.front()]->push_D_link(fingerprint(head)); l_link.pop(); } while(!r_link.empty()){ links[r_link.front()]->push_A_link(fingerprint(head)); r_link.pop(); } } } }
iterator& begin() const { iterator it(this, beginPosition()); itbegin = it; return itbegin; }
void check_overlapping_nodes(::std::vector<table_entry*> & links, map<unsigned long long, string> & chains, int len, ::std::map<unsigned long long, unsigned long long>& mapping, unsigned int min_overlap, int ov_perc){ ::std::map<unsigned long long, string>::iterator chain_it; ::std::map<unsigned long long, string>::iterator chain_it_2; ::std::vector<small_frag> short_blocks; stack<unsigned int> s; queue<unsigned long long> q; for(chain_it = chains.begin(); chain_it != chains.end(); ++chain_it){ for(chain_it_2 = chains.begin(); chain_it_2 != chains.end(); ++chain_it_2){ unsigned int ov = overlappedStringLength(chain_it->second,chain_it_2->second); if(chain_it != chain_it_2 && ov < (ov_perc*chain_it->second.length())/100 && (ov_perc*ov < chain_it_2->second.length())/100 && ov > min_overlap){ bool new_node = false; CharString pat_text=prefix(chain_it_2->second,ov); //::std::cout << chain_it->second << ::std::endl; //::std::cout << chain_it_2->second << ::std::endl; //::std::cout << ov << ::std::endl; Pattern<CharString, ShiftAnd> pattern(pat_text); for(unsigned int i=0; i<links.size();++i){ CharString link_read = links[i]->get_short_read()->get_RNA_seq_sequence(); Finder<CharString> finder(link_read); if(find(finder,pattern) && ( prefix(link_read,beginPosition(finder)) == infix(chain_it->second,chain_it->second.length()-ov-beginPosition(finder),chain_it->second.length()-ov) || suffix(link_read,length(link_read) - endPosition(finder)) == infix(chain_it_2->second,ov,ov+endPosition(finder)))){ //::std::cout << link_read << ::std::endl; //::std::cout << prefix(link_read,beginPosition(finder)) << ::std::endl; //::std::cout << infix(chain_it->second,chain_it->second.length()-ov-beginPosition(finder),chain_it->second.length()-ov) << ::std::endl; //::std::cout << suffix(link_read,length(link_read) - endPosition(finder)) << ::std::endl; //::std::cout << infix(chain_it_2->second,ov,ov+endPosition(finder)) << ::std::endl; new_node = true; } } if(new_node){ small_frag f; f.frag_links.D_chain = chain_it->first; f.frag_links.A_chain = chain_it_2->first; f.frag = prefix(chain_it_2->second,ov); short_blocks.push_back(f); } }else{ if(chain_it != chain_it_2 && ov>=(ov_perc*chain_it->second.length())/100){ //::std::cout << "Chain_it sub-node of Chain_it_2" << ::std::endl; //::std::cout << "Chain_it " << chain_it->second << ::std::endl; //::std::cout << "Chain_it_2 " << chain_it_2->second << ::std::endl; //::std::cout << ov << ::std::endl; q.push(chain_it->first); }else{ if(chain_it != chain_it_2 && ov>=(ov_perc*chain_it_2->second.length())/100){ //::std::cout << "Chain_it_2 sub-node of Chain_it" << ::std::endl; //::std::cout << "Chain_it " << chain_it->second << ::std::endl; //::std::cout << "Chain_it_2 " <<chain_it_2->second << ::std::endl; //::std::cout << ov << ::std::endl; q.push(chain_it_2->first); } } } } } for(unsigned int i=0; i<short_blocks.size(); ++i){ bool sub_seq = false; for(unsigned int k=0; k<short_blocks.size(); ++k){ if(short_blocks[i].frag == short_blocks[k].frag && i<k){ links_pair erased_links; erased_links.D_chain = short_blocks[i].frag_links.D_chain; erased_links.A_chain = short_blocks[i].frag_links.A_chain; short_blocks[k].other_links.push_back(erased_links); sub_seq = true; } if(i!=k && (::seqan::length(short_blocks[i].frag)) < (::seqan::length(short_blocks[k].frag))){ Finder<CharString> finder(short_blocks[k].frag); Pattern<CharString, ShiftAnd> pattern(short_blocks[i].frag); if(find(finder,pattern)){ links_pair erased_links; erased_links.D_chain = short_blocks[i].frag_links.D_chain; erased_links.A_chain = short_blocks[i].frag_links.A_chain; //::std::cout << i << k << " - " << beginPosition(finder) << " " << endPosition(finder) << ::std::endl; short_blocks[k].other_links.push_back(erased_links); sub_seq = true; } } } if(sub_seq){ s.push(i); } } while(!s.empty()){ short_blocks.erase(short_blocks.begin()+s.top()); s.pop(); } while(!q.empty()){ chains.erase(q.front()); q.pop(); } for(unsigned int i=0; i<short_blocks.size(); ++i){ //::std::cout << short_blocks[i].frag << " " << length(short_blocks[i].frag) << ::std::endl; string ch = ""; for(unsigned int z = 0; z<len-length(short_blocks[i].frag); ++z){ ch.append("A"); } ch.append(toCString(short_blocks[i].frag)); //if(chains.find(fingerprint(ch)) == chains.end()){//Start_If_5 //chains[fingerprint(ch)] = ::seqan::toCString(short_blocks[i].frag); //::std::cout << ::seqan::toCString(short_blocks[i].frag) <<" "<< length(short_blocks[i].frag)<<::std::endl; //mapping[fingerprint(ch)] = fingerprint(ch); //Add the first link string first_half; assign(first_half,prefix(chains[short_blocks[i].frag_links.D_chain],len)); string new_link_1 = first_half; new_link_1.append(ch); table_entry* link_1 = new table_entry(new_link_1,fingerprint(first_half),fingerprint(ch)); link_1->push_D_link(short_blocks[i].frag_links.D_chain); link_1->push_A_link(short_blocks[i].frag_links.A_chain); links.push_back(link_1); /* //Add the second link string second_half; assign(second_half,prefix(chains[short_blocks[i].frag_links.A_chain],len)); string new_link_2 = ch; new_link_2.append(second_half); table_entry* link_2 = new table_entry(new_link_2,fingerprint(ch),fingerprint(second_half)); link_2->push_D_link(short_blocks[i].frag_links.D_chain); link_2->push_A_link(short_blocks[i].frag_links.A_chain); links.push_back(link_2); */ //::std::cout<<links[short_blocks[i].frag_links.D_chain]->get_short_read()->get_RNA_seq_sequence()<<::std::endl; //::std::cout<<links[short_blocks[i].frag_links.A_chain]->get_short_read()->get_RNA_seq_sequence()<<::std::endl; for(unsigned int j=0; j<short_blocks[i].other_links.size(); ++j){//Start_For_6 string second_half; assign(first_half,prefix(chains[short_blocks[i].other_links[j].D_chain],len)); string new_link_2 = second_half; new_link_2.append(ch); table_entry* link_2 = new table_entry(new_link_2,fingerprint(second_half),fingerprint(ch)); link_2->push_D_link(short_blocks[i].other_links[j].D_chain); link_2->push_A_link(short_blocks[i].other_links[j].A_chain); links.push_back(link_1); }//End_For_6 //}//End_If_5 } }
void linking_refinement(::std::vector<table_entry*> & links, map<unsigned long long, string> & chains, unsigned int len, ::std::map<unsigned long long, unsigned long long> & mapping){ for(unsigned int i=0; i<links.size(); ++i){ //Linkato solo a dx if(links[i]->size_D_link() == 0 && links[i]->size_A_link() != 0){ //::std::cout << "D link" << ::std::endl; CharString p = ::seqan::prefix(links[i]->get_short_read()->get_RNA_seq_sequence(),len); Pattern<CharString, ShiftOr > pattern(p); ::std::map<unsigned long long, string>::iterator chain_it; ::std::set<unsigned long long> modif_chains; for(chain_it = chains.begin(); chain_it != chains.end(); ++chain_it){ CharString text = chain_it->second; Finder<CharString> finder(text); if(modif_chains.find(chain_it->first) == modif_chains.end() && find(finder,pattern)){ links[i]->push_D_link(chain_it->first); if(chain_it->second.length()- endPosition(finder) > len){ //::std::cout << "D " << (i+1) << " " << beginPosition(finder) << ::std::endl; CharString pre = ::seqan::prefix(chain_it->second, beginPosition(finder) + len); string str_pre = ::seqan::toCString(pre); CharString suf = ::seqan::suffix(chain_it->second, beginPosition(finder) + len); string str_suf = ::seqan::toCString(suf); //::std::cout << chain_it->second << " - " << chain_it->second.length() << ::std::endl; //Sono sicuro che sia > len dato che la estraggo da un prefisso //di lunghezza len... chains[chain_it->first] = str_pre; //::std::cout << str_pre << " - " << str_pre.length() << ::std::endl; modif_chains.insert(chain_it->first); //...ma il suffissopotrebbe essere piu' corto di len string head; if(str_suf.length() >= len){ head = ::seqan::toCString(::seqan::prefix(suf,len)); chains[fingerprint(head)] = str_suf; mapping[fingerprint(head)] = fingerprint(head); }else{ head = str_suf; for(unsigned int z=0; z<len-str_suf.length();++z){ head.append("A"); } chains[fingerprint(head)] = str_suf; mapping[fingerprint(head)] = fingerprint(head); } //::std::cout << str_suf << " - " << str_suf.length() << ::std::endl << ::std::endl; modif_chains.insert(fingerprint(head)); for(unsigned int z=0; z<links.size();++z){ for(int k=0; k<links[z]->size_D_link();++k){ if(links[z]->at_D_link(k) == chain_it->first){ links[z]->at_D_link(k) = fingerprint(head); } } } //Aggiungere un link tra le due catene create CharString l_part = chains[chain_it->first]; string new_link = ::seqan::toCString(::seqan::suffix(l_part,length(l_part) - len)); unsigned long long f_l = fingerprint(new_link); new_link.append(head); table_entry* t_new = new table_entry(new_link,f_l,fingerprint(head)); t_new->push_D_link(chain_it->first); t_new->push_A_link(fingerprint(head)); links.push_back(t_new); } } } } //Linkato solo a sx if(links[i]->size_A_link() == 0 && links[i]->size_D_link() != 0){ //::std::cout << "A link" << ::std::endl; CharString p = ::seqan::suffix(links[i]->get_short_read()->get_RNA_seq_sequence(),len); Pattern<CharString, ShiftOr > pattern(p); ::std::map<unsigned long long, string>::iterator chain_it; ::std::set<unsigned long long> modif_chains; for(chain_it = chains.begin(); chain_it != chains.end(); ++chain_it){ CharString text = chain_it->second; Finder<CharString> finder(text); if(modif_chains.find(chain_it->first) == modif_chains.end() && find(finder,pattern)){ //if(find(finder,pattern)){ //::std::cout << "1 - if " << beginPosition(finder) << " " << endPosition(finder) << ::std::endl; if(beginPosition(finder) == 0){ links[i]->push_A_link(chain_it->first); } if(endPosition(finder) > len){ //::std::cout << "A " << (i+1) << " " << beginPosition(finder) << ::std::endl; CharString pre = ::seqan::prefix(chain_it->second, beginPosition(finder) + len); string str_pre = ::seqan::toCString(pre); CharString suf = ::seqan::suffix(chain_it->second, beginPosition(finder) + len); string str_suf = ::seqan::toCString(suf); chains[chain_it->first] = str_pre; //::std::cout << str_pre << " - " << str_pre.length() << ::std::endl; modif_chains.insert(chain_it->first); string head; if(str_suf.length() >= len){ head = ::seqan::toCString(::seqan::prefix(suf,len)); chains[fingerprint(head)] = str_suf; mapping[fingerprint(head)] = fingerprint(head); }else{ head = str_suf; for(unsigned int z=0; z<len-str_suf.length();++z){ head.append("A"); } chains[fingerprint(head)] = str_suf; mapping[fingerprint(head)] = fingerprint(head); } //::std::cout << str_suf << " - " << str_suf.length() << ::std::endl << ::std::endl; modif_chains.insert(fingerprint(head)); for(unsigned int z=0; z<links.size();++z){ for(int k=0; k<links[z]->size_D_link();++k){ if(links[z]->at_D_link(k) == chain_it->first){ links[z]->at_D_link(k) = fingerprint(head); } } } //Aggiungere un link tra le due catene create CharString l_part = chains[chain_it->first]; string new_link = ::seqan::toCString(::seqan::suffix(l_part,length(l_part) - len)); unsigned long long f_l = fingerprint(new_link); new_link.append(head); table_entry* t_new = new table_entry(new_link,f_l,fingerprint(head)); t_new->push_D_link(chain_it->first); t_new->push_A_link(fingerprint(head)); links.push_back(t_new); links[i]->push_A_link(fingerprint(head)); } } } } } //::std::cout << chains.size() << ::std::endl; }