int main(int argc, char *argv[]) { if (argc == 1) usage(); // Print usage for (int i=0; i<argc; i++) { cerr << argv[i] << ' '; cerr << endl; } Initial_Time(); cerr<<"Start at: "<<Curr_Time()<<endl; int noptions = mGetOptions(argc, argv); // Mutltithreads part #if defined (_OPENMP) if (param.ncpu) omp_set_num_threads(param.ncpu); #endif fin_db.open(ref_file.c_str()); if (!fin_db) { cerr << "fatal error: failed to open ref file\n"; exit(1); } ref.Run_ConvertBinseq(fin_db); cerr << "Load in " << ref.total_num << " reference seqs, total size " << ref.sum_length << " bp. " << Cal_AllTime() << " secs passed" << endl; ref.CreateIndex(); // Qgram_Index(); cerr << "Create refseq k-mer index table. " << Cal_AllTime() << " secs passed" << endl; RunProcess(); return 0; }
// Can be either unique or multi-mapping reads inline void PROBerReadModel_iCLIP::update(AlignmentGroup& ag) { int size = ag.size(); BamAlignment *ba = NULL; char dir; if (size > 1) { assert(model_type >= 2); double frac = 1.0 / size; for (int i = 0; i < size; ++i) { ba = ag.getAlignment(i); fld->update(ba->getInsertSize(), frac); } return; } assert(ag.getSEQ(seq)); if (model_type & 1) assert(ag.getQUAL(qual)); for (int i = 0; i < size; ++i) { ba = ag.getAlignment(i); dir = ba->getMateDir(); assert(ba->getCIGAR(cigar)); assert(ba->getMD(mdstr)); refseq.setUp(dir, cigar, mdstr, seq); seqmodel->update(1.0, dir, 0, &refseq, &cigar, &seq, ((model_type & 1) ? &qual : NULL)); } if (model_type >= 2) { assert(ag.getSEQ(seq, 2)); if (model_type & 1) assert(ag.getQUAL(qual, 2)); for (int i = 0; i < size; ++i) { ba = ag.getAlignment(i); dir = ba->getMateDir(2); assert(ba->getCIGAR(cigar, 2)); assert(ba->getMD(mdstr, 2)); refseq.setUp(dir, cigar, mdstr, seq); seqmodel->update(1.0, dir, 0, &refseq, &cigar, &seq, ((model_type & 1) ? &qual : NULL)); } } }
inline void PROBerReadModel_iCLIP::calcProbs(AlignmentGroup& ag, double* conprbs) { int size = ag.size(); BamAlignment *ba = NULL; char dir; assert(ag.getSEQ(seq)); if (model_type & 1) assert(ag.getQUAL(qual)); for (int i = 0; i < size; ++i) { ba = ag.getAlignment(i); dir = ba->getMateDir(); assert(ba->getCIGAR(cigar)); assert(ba->getMD(mdstr)); refseq.setUp(dir, cigar, mdstr, seq); conprbs[i] = seqmodel->getProb(dir, 0, &refseq, &cigar, &seq, ((model_type & 1) ? &qual : NULL)); } if (model_type >= 2) { assert(ag.getSEQ(seq, 2)); if (model_type & 1) assert(ag.getQUAL(qual, 2)); for (int i = 0; i < size; ++i) { ba = ag.getAlignment(i); dir = ba->getMateDir(2); assert(ba->getCIGAR(cigar, 2)); assert(ba->getMD(mdstr, 2)); refseq.setUp(dir, cigar, mdstr, seq); conprbs[i] *= seqmodel->getProb(dir, 0, &refseq, &cigar, &seq, ((model_type & 1) ? &qual : NULL)); conprbs[i] *= fld->getProb(ba->getInsertSize()); // fragment length distribution } } double sum = 0.0; for (int i = 0; i < size; ++i) sum += conprbs[i]; //assert(sum > 0.0); if (sum <= 0.0) sum = 1.0; for (int i = 0; i < size; ++i) conprbs[i] /= sum; }
void Do_Formatdb() { ref.InitialIndex(); pthread_t pids_ab, pids_ac, pids_ad; //cal kmer freq pthread_create(&pids_ab, NULL, t_SeedFreq_ab, NULL); pthread_create(&pids_ac, NULL, t_SeedFreq_ac, NULL); pthread_create(&pids_ad, NULL, t_SeedFreq_ad, NULL); pthread_join(pids_ab, NULL); pthread_join(pids_ac, NULL); pthread_join(pids_ad, NULL); ref.AllocIndex(); //record kmer locations pthread_create(&pids_ab, NULL, t_Index_ab, NULL); pthread_create(&pids_ac, NULL, t_Index_ac, NULL); pthread_create(&pids_ad, NULL, t_Index_ad, NULL); pthread_join(pids_ab, NULL); pthread_join(pids_ac, NULL); pthread_join(pids_ad, NULL); // ref._blocks.clear(); cout<<"Create seed table. "<<Cal_AllTime()<<" secs passed"<<endl; };
void SingleAlign::SnpAlign(RefSeq &ref, bit32_t mode) { bit32_t i,j,m, modeindex, cmodeindex, mc, h, read_chain_index_mask; Hit prefetch_hit, _hHit; //cout<<_pread->seq<<endl; if(param.RRBS_flag){ //RRBS mode for(read_chain_index=0; read_chain_index<2;read_chain_index++){ if(!xflag_chain[read_chain_index]) continue; cmodeindex=modeindex=xseedindex[read_chain_index][mode].second; if(read_chain_index) cmodeindex=map_readlen/param.seed_size-1-modeindex; _seed=xseeds[read_chain_index][modeindex][0]; //cout<<"mode:"<<mode<<" m="<<ref.index[_seed].n1<<" seed:"<<_seed; //cout<<" "; disp_bfa(param.map3to4(_seed), param.seed_size); cout<<endl; if((m=ref.index[_seed].n1)==0) continue; _refloc=ref.index[_seed].loc1; //list of seeded reference locations h=param.profile[modeindex][0]+cseed_offset*read_chain_index; read_chain_index_mask=read_chain_index<<24; for(j=0; j!=m; j++) { _hHit=_refloc[j]; if(((_hHit.chr^read_chain_index_mask)>>16)!=cmodeindex) continue; // mode or strand not match _hit.chr=_hHit.chr&0xffff; if(_hHit.loc<h) continue; //underflow the start of refseq _hit.loc=_hHit.loc-h; //cout<<" j="<<j<<" chr"<<(int)_hit.chr<<":"<<_hit.loc<<endl; CountMismatch(xseq[read_chain_index], (_hit.loc%SEGLEN)<<1, ref.bfa[_hit.chr].s+_hit.loc/SEGLEN); //cout<<" mis:"<<tmp_snp<<endl; if(tmp_snp<=snp_thres) { _ghit=int2hit(ref,_hit,0,0); if(!param.pairend){ seg_info=ref.CCGG_seglen(_ghit.chr, _ghit.loc, map_readlen); //get fragment information //cout<<"seg1:"<<seg_info.first<<" seg2:"<<seg_info.second<<endl; //if(seg_info.second>param.max_insert) continue; // fragment too large } if(AddHit(ref, tmp_snp, mode)) return; } if(param.gap>0) { if(GapAlign(ref, mode, h)) return; } } } } else{ //WGBS mode for(read_chain_index=0; read_chain_index<2;read_chain_index++){
int main(int argc, char *argv[]) { //print usage if (argc == 1) { usage(); } Initial_Time(); cout<<"Start at: "<<Curr_Time()<<endl; int noptions; noptions=mGetOptions(argc, argv); fin_db.open(ref_file.c_str()); if(!fin_db) { cerr<<"fatal error: failed to open ref file\n"; exit(1); } ref.Run_ConvertBinseq(fin_db); cout<<"Load in "<<ref.total_num<<" db seqs, total size "<<ref.sum_length<<" bp. "<<Cal_AllTime()<<" secs passed"<<endl; //single command: if(noptions==argc) { Do_Formatdb(); RunProcess(); } else { int old_seed_size=0; char * margv[1000]; for(int i=0; i<1000; i++) { margv[i] = new char[1000]; } char ch[10000]; ifstream fin_batch(argv[noptions]); while(!fin_batch.eof()) { fin_batch.getline(ch, 10000); if(fin_batch.eof()) break; cout<<"Line of options: "<<ch<<endl; bool is_word=0; int margc=0; char *q=margv[margc]; for(int i=0; ch[i]!='\0'; i++) { if((ch[i]>=33)&&(ch[i]<=126)) { if(!is_word) { *q='\0'; margc++; q=margv[margc]; } is_word=1; *q++=ch[i]; } else { is_word=0; } } *q='\0'; margc++; mGetOptions(margc, margv); if(param.seed_size!=old_seed_size) { if(ref.total_kmers>0) ref.ReleaseIndex(); Do_Formatdb(); old_seed_size=param.seed_size; } RunProcess(); } fin_batch.close(); } return 0; }
void Do_Formatdb() { ref.CreateIndex(); cout<<"Create seed table. "<<Cal_AllTime()<<" secs passed"<<endl; };
void *t_Index_ad(void *) { ref.t_CreateIndex_ad(); };
void *t_SeedFreq_ad(void *) { ref.t_CalKmerFreq_ad(); };