Esempio n. 1
0
int main(int argc, char *argv[])
{
    if (argc == 1) usage(); // Print usage

    for (int i=0; i<argc; i++) 
    {
        cerr << argv[i] << ' ';
        cerr << endl;
    }

    Initial_Time();
    cerr<<"Start at:  "<<Curr_Time()<<endl;
    int noptions = mGetOptions(argc, argv);
    // Mutltithreads part
#if defined (_OPENMP)
    if (param.ncpu) omp_set_num_threads(param.ncpu);
#endif
    fin_db.open(ref_file.c_str());

    if (!fin_db)
    {
        cerr << "fatal error: failed to open ref file\n";
        exit(1);
    }

    ref.Run_ConvertBinseq(fin_db);
    cerr << "Load in " << ref.total_num << " reference seqs, total size " << ref.sum_length << " bp. " << Cal_AllTime() << " secs passed" << endl;
    ref.CreateIndex(); // Qgram_Index();

    cerr << "Create refseq k-mer index table. " << Cal_AllTime() << " secs passed" << endl;

    RunProcess();
    
    return 0;
}
// Can be either unique or multi-mapping reads
inline void PROBerReadModel_iCLIP::update(AlignmentGroup& ag) {
	int size = ag.size();
	BamAlignment *ba = NULL;
	char dir;

	if (size > 1) {
		assert(model_type >= 2);
		double frac = 1.0 / size;
		for (int i = 0; i < size; ++i) {
			ba = ag.getAlignment(i);
			fld->update(ba->getInsertSize(), frac);
		}
		return;
	}

	assert(ag.getSEQ(seq));
	if (model_type & 1) assert(ag.getQUAL(qual));
	for (int i = 0; i < size; ++i) {
		ba = ag.getAlignment(i);
		dir = ba->getMateDir();
		assert(ba->getCIGAR(cigar));
		assert(ba->getMD(mdstr));
		refseq.setUp(dir, cigar, mdstr, seq);
		seqmodel->update(1.0, dir, 0, &refseq, &cigar, &seq, ((model_type & 1) ? &qual : NULL));
	}

	if (model_type >= 2) {
		assert(ag.getSEQ(seq, 2));
		if (model_type & 1) assert(ag.getQUAL(qual, 2));
		for (int i = 0; i < size; ++i) {
			ba = ag.getAlignment(i);
			dir = ba->getMateDir(2);
			assert(ba->getCIGAR(cigar, 2));
			assert(ba->getMD(mdstr, 2));
			refseq.setUp(dir, cigar, mdstr, seq);
			seqmodel->update(1.0, dir, 0, &refseq, &cigar, &seq, ((model_type & 1) ? &qual : NULL));
		}
	}
}
inline void PROBerReadModel_iCLIP::calcProbs(AlignmentGroup& ag, double* conprbs) {
	int size = ag.size();
	BamAlignment *ba = NULL;
	char dir;

	assert(ag.getSEQ(seq));
	if (model_type & 1) assert(ag.getQUAL(qual));
	for (int i = 0; i < size; ++i) {
		ba = ag.getAlignment(i);
		dir = ba->getMateDir();
		assert(ba->getCIGAR(cigar));
		assert(ba->getMD(mdstr));
		refseq.setUp(dir, cigar, mdstr, seq);
		conprbs[i] = seqmodel->getProb(dir, 0, &refseq, &cigar, &seq, ((model_type & 1) ? &qual : NULL));
	}

	if (model_type >= 2) {
		assert(ag.getSEQ(seq, 2));
		if (model_type & 1) assert(ag.getQUAL(qual, 2));
		for (int i = 0; i < size; ++i) {
			ba = ag.getAlignment(i);
			dir = ba->getMateDir(2);
			assert(ba->getCIGAR(cigar, 2));
			assert(ba->getMD(mdstr, 2));
			refseq.setUp(dir, cigar, mdstr, seq);
			conprbs[i] *= seqmodel->getProb(dir, 0, &refseq, &cigar, &seq, ((model_type & 1) ? &qual : NULL));

			conprbs[i] *= fld->getProb(ba->getInsertSize()); // fragment length distribution
		}
	}

	double sum = 0.0;
	for (int i = 0; i < size; ++i) sum += conprbs[i];

	//assert(sum > 0.0);
	if (sum <= 0.0) sum = 1.0;
	
	for (int i = 0; i < size; ++i) conprbs[i] /= sum;  
}
Esempio n. 4
0
void Do_Formatdb()
{
	ref.InitialIndex();
	pthread_t pids_ab, pids_ac, pids_ad;
	//cal kmer freq
	pthread_create(&pids_ab, NULL, t_SeedFreq_ab, NULL);
	pthread_create(&pids_ac, NULL, t_SeedFreq_ac, NULL);
	pthread_create(&pids_ad, NULL, t_SeedFreq_ad, NULL);
	pthread_join(pids_ab, NULL);
	pthread_join(pids_ac, NULL);
	pthread_join(pids_ad, NULL);
	ref.AllocIndex();
	//record kmer locations
	pthread_create(&pids_ab, NULL, t_Index_ab, NULL);
	pthread_create(&pids_ac, NULL, t_Index_ac, NULL);
	pthread_create(&pids_ad, NULL, t_Index_ad, NULL);
	pthread_join(pids_ab, NULL);
	pthread_join(pids_ac, NULL);
	pthread_join(pids_ad, NULL);
	//
	ref._blocks.clear();
	cout<<"Create seed table. "<<Cal_AllTime()<<" secs passed"<<endl;	
};
Esempio n. 5
0
void SingleAlign::SnpAlign(RefSeq &ref, bit32_t mode) {
   	bit32_t i,j,m, modeindex, cmodeindex, mc, h, read_chain_index_mask;
   	Hit prefetch_hit, _hHit;

   	//cout<<_pread->seq<<endl;
    if(param.RRBS_flag){ //RRBS mode
    	for(read_chain_index=0; read_chain_index<2;read_chain_index++){
            if(!xflag_chain[read_chain_index]) continue;
            cmodeindex=modeindex=xseedindex[read_chain_index][mode].second;
            if(read_chain_index) cmodeindex=map_readlen/param.seed_size-1-modeindex;
       		_seed=xseeds[read_chain_index][modeindex][0];
    		//cout<<"mode:"<<mode<<" m="<<ref.index[_seed].n1<<" seed:"<<_seed;
            //cout<<"    "; disp_bfa(param.map3to4(_seed), param.seed_size); cout<<endl;    		
    		if((m=ref.index[_seed].n1)==0) continue; 
 	    	_refloc=ref.index[_seed].loc1;  //list of seeded reference locations
       		h=param.profile[modeindex][0]+cseed_offset*read_chain_index;
       		read_chain_index_mask=read_chain_index<<24;
       		for(j=0; j!=m; j++) {
   	    		_hHit=_refloc[j];
   	    		if(((_hHit.chr^read_chain_index_mask)>>16)!=cmodeindex) continue; // mode or strand not match
   	    		_hit.chr=_hHit.chr&0xffff;
   	    		if(_hHit.loc<h) continue; //underflow the start of refseq
   	    		_hit.loc=_hHit.loc-h;
   	    		//cout<<" j="<<j<<" chr"<<(int)_hit.chr<<":"<<_hit.loc<<endl;
                CountMismatch(xseq[read_chain_index], (_hit.loc%SEGLEN)<<1, ref.bfa[_hit.chr].s+_hit.loc/SEGLEN);
   	    		//cout<<" mis:"<<tmp_snp<<endl;
   	    	 	if(tmp_snp<=snp_thres) {
   	    	 	    _ghit=int2hit(ref,_hit,0,0);
                    if(!param.pairend){
                        seg_info=ref.CCGG_seglen(_ghit.chr, _ghit.loc, map_readlen); //get fragment information
                        //cout<<"seg1:"<<seg_info.first<<" seg2:"<<seg_info.second<<endl;
                        //if(seg_info.second>param.max_insert) continue; // fragment too large
                    }
          			if(AddHit(ref, tmp_snp, mode)) return;
                }
                if(param.gap>0) {
                    if(GapAlign(ref, mode, h)) return;
                }                             
    		}
    	}
    }
    else{ //WGBS mode
        for(read_chain_index=0; read_chain_index<2;read_chain_index++){
Esempio n. 6
0
int main(int argc, char *argv[])
{
	//print usage
	if (argc == 1)
	{
		usage();
	}
	Initial_Time();
	cout<<"Start at:  "<<Curr_Time()<<endl;
	int noptions;
	noptions=mGetOptions(argc, argv);
	fin_db.open(ref_file.c_str());
	if(!fin_db) {
		cerr<<"fatal error: failed to open ref file\n";
		exit(1);
	}
	ref.Run_ConvertBinseq(fin_db);
	cout<<"Load in "<<ref.total_num<<" db seqs, total size "<<ref.sum_length<<" bp. "<<Cal_AllTime()<<" secs passed"<<endl;			
	//single command:
	if(noptions==argc) {
		Do_Formatdb();
		RunProcess();
	}
  else {
  	int old_seed_size=0;
  	char * margv[1000];
  	for(int i=0; i<1000; i++) {
  		margv[i] = new char[1000];
  	}
  	char ch[10000];
  	ifstream fin_batch(argv[noptions]);
  	while(!fin_batch.eof()) {
  		fin_batch.getline(ch, 10000);
  		if(fin_batch.eof())
  			break;
  		cout<<"Line of options:  "<<ch<<endl;
  		bool is_word=0;
  		int margc=0;
  		char *q=margv[margc];
  		for(int i=0; ch[i]!='\0'; i++) {
  			if((ch[i]>=33)&&(ch[i]<=126)) {
  				if(!is_word) {
  					*q='\0';
  					margc++;
  					q=margv[margc];
  				}
  				is_word=1;
  				*q++=ch[i];
  			}
  			else {
  				is_word=0;
  			}
  		}
  		*q='\0';
  		margc++;
  		mGetOptions(margc, margv);
  		if(param.seed_size!=old_seed_size) {
  			if(ref.total_kmers>0)
  				ref.ReleaseIndex();
  			Do_Formatdb();
  			old_seed_size=param.seed_size;
  		}
  		RunProcess();
  	}
  	fin_batch.close();
  }
	return 0;
}
Esempio n. 7
0
void Do_Formatdb()
{
	ref.CreateIndex();
	cout<<"Create seed table. "<<Cal_AllTime()<<" secs passed"<<endl;
};
Esempio n. 8
0
void *t_Index_ad(void *)
{
	ref.t_CreateIndex_ad();
};
Esempio n. 9
0
void *t_SeedFreq_ad(void *)
{
	ref.t_CalKmerFreq_ad();
};