//each thread will run this. void *slave(void *ptr){ tpars *tp = (tpars *)ptr; // fprintf(stderr,"from:%d to:%d \n",tp->from,tp->to); for(int i=tp->from;i<tp->to;i++){ tp->skip=i; tp->val[i] = kmin_brent(myfun,1e-6,0.5,tp,1e-6,&tp->thetas[i]); assert(tp->thetas[i]!=1e-6); } pthread_exit(0); }
/* The following function combines EM and Brent's method. When the signal from * the data is strong, EM is faster but sometimes, EM may converge very slowly. * When this happens, we switch to Brent's method. The idea is learned from * Rasmus Nielsen. */ static double freqml(double f0, int beg, int end, const double *pdg) { int i; double f; for (i = 0, f = f0; i < ITER_TRY; ++i) if (freq_iter(&f, pdg, beg, end) < EPS) break; if (i == ITER_TRY) { // haven't converged yet; try Brent's method minaux1_t a; a.beg = beg; a.end = end; a.pdg = pdg; kmin_brent(prob1, f0 == f? .5*f0 : f0, f, (void*)&a, EPS, &f); } return f; }
double jackML(allPars *ap,int nthreads,char *fname,int nJack) { if(nJack==-1) nJack = ap->len; else nJack = std::min(ap->len,nJack); assert(nJack>0); double *thetas =new double[nJack]; double *val = new double[nJack]; if(nthreads>1){ pthread_t *thd = new pthread_t[nthreads]; tpars *tp = new tpars[nthreads]; int block = nJack/nthreads; for(int i=0;i<nthreads;i++){ tp[i].thetas = thetas; tp[i].val = val; tp[i].ap = ap; tp[i].from = i==0?0:tp[i-1].to; tp[i].to = tp[i].from+block; } tp[nthreads-1].to = nJack; for(int i=0;i<nthreads;i++) pthread_create(&thd[i],NULL,slave,&tp[i]); for(int i=0;i<nthreads;i++) pthread_join(thd[i],NULL); }else{ //if we do not threads tpars tp; tp.ap=ap; for(int i=0;i<nJack;i++){ tp.skip=i; val[i]=kmin_brent(myfun,1e-6,0.5-1e-6,&ap,0.0001,thetas+i); } } double esd = sd(thetas,nJack); if(fname){ FILE *fp =fopen(fname,"w"); for(int i=0;i<nJack;i++){ fprintf(fp,"%e\t%f\t%e\n",thetas[i],val[i],thetas[i]-1e6); } fclose(fp); } delete [] thetas; delete [] val; return esd; }
void analysis(dat &d,int nThreads,int nJack) { int *rowSum = new int[d.cn.size()]; int *rowMax = new int[d.cn.size()]; int *rowMaxW = new int[d.cn.size()]; int *error1 = new int[d.cn.size()];//number of non most frequent observed bases int *error2 = new int[d.cn.size()];//sampled size_t mat1[4]={0,0,0,0};//matrix used for fisher for method 1 size_t mat2[4]={0,0,0,0};//matrix used for fisher for method 2 size_t tab[2] = {0,0};//used for debug for(int i=0;i<d.cn.size();i++) { int s =d.cn[i][0]; int max=s; int which=0; for(int j=1;j<4;j++){ s += d.cn[i][j]; if(d.cn[i][j]>max){ max=d.cn[i][j]; which=j; } } rowSum[i] = s; rowMax[i]=max; rowMaxW[i]=which; aMap::iterator it= d.myMap.find(d.pos[i]); if(it!=d.myMap.end()){//if site is hapmap site // fprintf(stderr,"posi:%d wmax:%d all1:%d freq:%f\n",it->first,rowMaxW[i],it->second.allele1,it->second.freq); //if maximum occuring bases is the same as allele1 from hapmap, then set freq to 1-freq if(rowMaxW[i]==it->second.allele1) //it->first C++ syntax for getting key of iterator //it->second C++ syntax for getting value of key of iterator, key->value: key=pos,value=hapSite it->second.freq=1-it->second.freq; else it->second.freq=it->second.freq; // fprintf(stderr,"posi:%d wmax:%d all1:%d freq:%f\n",it->first,rowMaxW[i],it->second.allele1,it->second.freq); // exit(0); } error1[i] = rowSum[i]-rowMax[i]; error2[i] = simrbinom((1.0*error1[i])/(1.0*rowSum[i])); // fprintf(stdout,"simrbinom\t%d %d %d %d %d %d\n",rowSum[i],rowMax[i],rowMaxW[i],error1[i],error2[i],d.dist[i]); if(error1[i]>0) tab[1]++; else tab[0]++; if(d.dist[i]==0){//this is a snpsite mat1[0] +=error1[i]; mat1[1] +=rowSum[i]-error1[i]; mat2[0] +=error2[i]; mat2[1] +=1-error2[i]; // fprintf(stdout,"rs %d %d %d %d %d %d %d %f %d %d\n",d.pos[i],rowSum[i],rowMax[i],rowMaxW[i],error1[i],error2[i],d.dist[i],it->second.freq,it->second.allele1,it->second.allele2); }else{ mat1[2] +=error1[i]; mat1[3] +=rowSum[i]-error1[i]; mat2[2] += error2[i]; mat2[3] += 1-error2[i]; } } #if 0 fprintf(stderr,"tab:%lu %lu\n",tab[0],tab[1]); fprintf(stderr,"mat: %lu %lu %lu %lu\n",mat1[0],mat1[1],mat1[2],mat1[3]); fprintf(stderr,"mat2: %lu %lu %lu %lu\n",mat2[0],mat2[1],mat2[2],mat2[3]); #endif int n11, n12, n21, n22; double left, right, twotail, prob; // fprintf(stderr,"--------\nMAIN RESULTS: Fisher exact test:\n"); n11=mat1[0];n12=mat1[2];n21=mat1[1];n22=mat1[3]; prob = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &twotail); // fprintf(stdout,"Method\t n11 n12 n21 n22 prob left right twotail\n"); //fprintf(stdout,"%s\t%d\t%d\t%d\t%d\t%.6g\t%.6g\t%.6g\t%.6g\n", "method1", n11, n12, n21, n22, // prob, left, right, twotail); n11=mat2[0];n12=mat2[2];n21=mat2[1];n22=mat2[3]; prob = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &twotail); //fprintf(stdout,"%s\t%d\t%d\t%d\t%d\t%.6g\t%.6g\t%.6g\t%.6g\n", "method2", n11, n12, n21, n22, // prob, left, right, twotail); //estimate how much contamination double c= mat1[2]/(1.0*(mat1[2]+mat1[3]));//this is error for flanking site double err= mat1[0]/(1.0*(mat1[0]+mat1[1]));//this is error for snpsite fprintf(stderr,"Mismatch_rate_for_flanking:%f MisMatch_rate_for_snpsite:%f \n",c,err); int *err0 =new int[d.cn.size()/9];//<-nbases of non frequent most occuring at snpsite int *err1 =new int[d.cn.size()/9];//<-nbases of non frequent most occuring at flanking int *d0 =new int[d.cn.size()/9];//<-seqdepth for snpsite int *d1 =new int[d.cn.size()/9];//<-seqdepth for flanking double *freq =new double[d.cn.size()/9];//<- freq for snpsite for(int i=0;i<d.cn.size()/9;i++){ int adj=0; int dep=0; for(int j=0;j<9;j++){ if(d.dist[i*9+j]!=0){//<- flanking adj += error1[i*9+j]; dep += rowSum[i*9+j]; }else{ //snpsite err0[i] = error1[i*9+j]; d0[i] = rowSum[i*9+j]; freq[i] =d.myMap.find(d.pos[i*9+j])->second.freq; } } err1[i] =adj; d1[i] = dep; #if 0 if(it==d.myMap.end()){ fprintf(stderr,"Problem finding:%d\n",d.pos[i]); exit(0); } #endif // fprintf(stdout,"cont\t%d\t%d\t%d\t%d\t%f\n",err0[i],err1[i],d0[i],d1[i],freq[i]); } allPars ap; ap.len=d.cn.size()/9; ap.seqDepth = d0; ap.nonMajor = err0; ap.freq = freq; ap.eps = c; ap.newllh =0; ap.e1 = err1; ap.d1=d1; double mom,momJack,ML,mlJack,val; ap.newllh =0; mom= likeOldMom(d.cn.size()/9,d0,err0,freq,c,-1); momJack = jackMom(&ap,nJack); tpars tp;tp.ap=≈tp.skip=-1; // print(tp.ap,"asdff1"); kmin_brent(myfun,1e-6,0.5-1e-6,&tp,0.0001,&ML); mlJack= jackML(&ap,nThreads,NULL,nJack); fprintf(stderr,"\nMethod1: old_llh Version: MoM:%f SE(MoM):%e ML:%f SE(ML):%e",mom,momJack,ML,mlJack); ap.newllh =1; mom=likeNewMom(d.cn.size()/9,d0,err0,freq,c,-1); momJack= jackMom(&ap,nJack); //marshall(&ap,"prem1"); val=kmin_brent(myfun,1e-6,0.5-1e-6,&tp,0.0001,&ML); // fprintf(stderr,"\nM1: ML:%f VAL:%f\n",ML,val); mlJack= jackML(&ap,nThreads,NULL,nJack); fprintf(stderr,"\nMethod1: new_llh Version: MoM:%f SE(MoM):%e ML:%f SE(ML):%e",mom,momJack,ML,mlJack); // fread(error2,sizeof(int),d.cn.size(),fopen("error2.bin","rb")); //for(int i=0;0&&i<d.cn.size();i++) // fprintf(stdout,"pik\t%d\n",error2[i]); //exit(0); for(int i=0;i<d.cn.size()/9;i++){ int adj=0; for(int j=0;j<9;j++){ if(d.dist[i*9+j]!=0){ adj += error2[i*9+j]; }else{ err0[i] = error2[i*9+j]; freq[i] =d.myMap.find(d.pos[i*9+j])->second.freq; // fprintf(stderr,"freq:%f\n",freq[i]); } } err1[i] =adj; d0[i] = 1; d1[i] = 8; #if 0 if(it==d.myMap.end()){ fprintf(stderr,"Problem finding:%d\n",d.pos[i]); exit(0); } #endif // fprintf(stdout,"cont\t%d\t%d\t%d\t%d\t%f\n",err0[i],err1[i],d0[i],d1[i],freq[i]); } ap.seqDepth = d0; ap.nonMajor = err0; ap.e1=err1; ap.d1=d1; ap.freq = freq; ap.newllh =0; mom= likeOldMom(d.cn.size()/9,d0,err0,freq,c,-1); momJack = jackMom(&ap,nJack); //print(tp.ap,"asdff2"); // exit(0); val = kmin_brent(myfun,1e-6,0.5-1e-6,&tp,0.0001,&ML); //fprintf(stderr,"\nML2:%f VAL:%f\n",ML,val); // exit(0); //FILE *fp = fopen("heyaa","w"); print(&ap,fp);fclose(fp); //return; mlJack= jackML(&ap,nThreads,NULL,nJack); fprintf(stderr,"\nMethod2: old_llh Version: MoM:%f SE(MoM):%e ML:%f SE(ML):%e",mom,momJack,ML,mlJack); ap.newllh =1; mom=likeNewMom(d.cn.size()/9,d0,err0,freq,c,-1); momJack= jackMom(&ap,nJack); kmin_brent(myfun,1e-6,0.5-1e-6,&tp,0.0001,&ML); mlJack= jackML(&ap,nThreads,NULL,nJack); fprintf(stderr,"\nMethod2: new_llh Version: MoM:%f SE(MoM):%e ML:%f SE(ML):%e\n",mom,momJack,ML,mlJack); delete [] rowSum; delete [] rowMax; delete [] rowMaxW; delete [] error1; delete [] error2; delete [] err0; delete [] err1; delete [] d0; delete [] d1; delete [] freq; }