コード例 #1
0
ファイル: contamination2.cpp プロジェクト: ANGSD/angsd
//each thread will run this.
void *slave(void *ptr){
  tpars *tp = (tpars *)ptr; 
  //  fprintf(stderr,"from:%d to:%d \n",tp->from,tp->to);
  for(int i=tp->from;i<tp->to;i++){
    tp->skip=i;
    tp->val[i] = kmin_brent(myfun,1e-6,0.5,tp,1e-6,&tp->thetas[i]);
    assert(tp->thetas[i]!=1e-6);
  }
  pthread_exit(0);
}
コード例 #2
0
ファイル: em.c プロジェクト: xied75/samtools
/* The following function combines EM and Brent's method. When the signal from
 * the data is strong, EM is faster but sometimes, EM may converge very slowly.
 * When this happens, we switch to Brent's method. The idea is learned from
 * Rasmus Nielsen.
 */
static double freqml(double f0, int beg, int end, const double *pdg)
{
	int i;
	double f;
	for (i = 0, f = f0; i < ITER_TRY; ++i)
		if (freq_iter(&f, pdg, beg, end) < EPS) break;
	if (i == ITER_TRY) { // haven't converged yet; try Brent's method
		minaux1_t a;
		a.beg = beg; a.end = end; a.pdg = pdg;
		kmin_brent(prob1, f0 == f? .5*f0 : f0, f, (void*)&a, EPS, &f);
	}
	return f;
}
コード例 #3
0
ファイル: contamination2.cpp プロジェクト: ANGSD/angsd
double jackML(allPars *ap,int nthreads,char *fname,int nJack) {
  
  if(nJack==-1)
    nJack = ap->len;
  else
    nJack = std::min(ap->len,nJack);
  assert(nJack>0);
  double *thetas =new double[nJack];
  double *val = new double[nJack];
  if(nthreads>1){
    pthread_t *thd = new pthread_t[nthreads];
    tpars *tp = new tpars[nthreads];
    int block = nJack/nthreads;
    
    for(int i=0;i<nthreads;i++){
      tp[i].thetas = thetas;
      tp[i].val = val;
      tp[i].ap = ap;
      tp[i].from = i==0?0:tp[i-1].to;
      tp[i].to = tp[i].from+block;
    }
    tp[nthreads-1].to = nJack;
    for(int i=0;i<nthreads;i++)
      pthread_create(&thd[i],NULL,slave,&tp[i]);

    for(int i=0;i<nthreads;i++)
      pthread_join(thd[i],NULL);
    
  }else{    //if we do not threads  
    tpars tp;
    tp.ap=ap;
    for(int i=0;i<nJack;i++){
      tp.skip=i;
      val[i]=kmin_brent(myfun,1e-6,0.5-1e-6,&ap,0.0001,thetas+i);
    }
  }
  double esd = sd(thetas,nJack);
  if(fname){
    FILE *fp =fopen(fname,"w");
    for(int i=0;i<nJack;i++){
      fprintf(fp,"%e\t%f\t%e\n",thetas[i],val[i],thetas[i]-1e6);
    }
    fclose(fp);
  }
  delete [] thetas;
  delete [] val;
  return esd;
}
コード例 #4
0
ファイル: contamination2.cpp プロジェクト: ANGSD/angsd
void analysis(dat &d,int nThreads,int nJack) {
  int *rowSum = new int[d.cn.size()];
  int *rowMax = new int[d.cn.size()];
  int *rowMaxW = new int[d.cn.size()];
  int *error1 = new int[d.cn.size()];//number of non most frequent observed bases 
  int *error2 = new int[d.cn.size()];//sampled 
  size_t mat1[4]={0,0,0,0};//matrix used for fisher for method 1
  size_t mat2[4]={0,0,0,0};//matrix used for fisher for method 2
  size_t tab[2] = {0,0};//used for debug

  for(int i=0;i<d.cn.size();i++) {
    int s =d.cn[i][0];
    int max=s;
    int which=0;
    for(int j=1;j<4;j++){
      s += d.cn[i][j];
      if(d.cn[i][j]>max){
	max=d.cn[i][j];
	which=j;
      }
    }
    rowSum[i] = s;
    rowMax[i]=max;
    rowMaxW[i]=which;
    aMap::iterator it= d.myMap.find(d.pos[i]);
    if(it!=d.myMap.end()){//if site is hapmap site
      // fprintf(stderr,"posi:%d wmax:%d all1:%d freq:%f\n",it->first,rowMaxW[i],it->second.allele1,it->second.freq);
      //if maximum occuring bases is the same as allele1 from hapmap, then set freq to 1-freq
      if(rowMaxW[i]==it->second.allele1)
	//it->first C++ syntax for getting key of iterator
	//it->second C++ syntax for getting value of key of iterator, key->value: key=pos,value=hapSite
 	it->second.freq=1-it->second.freq;
      else
	it->second.freq=it->second.freq;
      // fprintf(stderr,"posi:%d wmax:%d all1:%d freq:%f\n",it->first,rowMaxW[i],it->second.allele1,it->second.freq);
      // exit(0);
    }

    error1[i] = rowSum[i]-rowMax[i];
    error2[i] = simrbinom((1.0*error1[i])/(1.0*rowSum[i]));
    //  fprintf(stdout,"simrbinom\t%d %d %d %d %d %d\n",rowSum[i],rowMax[i],rowMaxW[i],error1[i],error2[i],d.dist[i]);
    if(error1[i]>0)
      tab[1]++;
    else
      tab[0]++;
    if(d.dist[i]==0){//this is a snpsite
      mat1[0] +=error1[i];
      mat1[1] +=rowSum[i]-error1[i];
      mat2[0] +=error2[i];
      mat2[1] +=1-error2[i];
      // fprintf(stdout,"rs %d %d %d %d %d %d %d %f %d %d\n",d.pos[i],rowSum[i],rowMax[i],rowMaxW[i],error1[i],error2[i],d.dist[i],it->second.freq,it->second.allele1,it->second.allele2);
    }else{
      
      mat1[2] +=error1[i];
      mat1[3] +=rowSum[i]-error1[i];
      mat2[2] += error2[i];
      mat2[3] += 1-error2[i];
    }
  }
#if 0
  fprintf(stderr,"tab:%lu %lu\n",tab[0],tab[1]);
  fprintf(stderr,"mat: %lu %lu %lu %lu\n",mat1[0],mat1[1],mat1[2],mat1[3]);
  fprintf(stderr,"mat2: %lu %lu %lu %lu\n",mat2[0],mat2[1],mat2[2],mat2[3]);
#endif
  int n11, n12, n21, n22;
  double left, right, twotail, prob;
  //  fprintf(stderr,"--------\nMAIN RESULTS: Fisher exact test:\n");
  n11=mat1[0];n12=mat1[2];n21=mat1[1];n22=mat1[3];
  prob = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &twotail);
  //  fprintf(stdout,"Method\t n11 n12 n21 n22 prob left right twotail\n");
  //fprintf(stdout,"%s\t%d\t%d\t%d\t%d\t%.6g\t%.6g\t%.6g\t%.6g\n", "method1", n11, n12, n21, n22,
  //		prob, left, right, twotail);

  n11=mat2[0];n12=mat2[2];n21=mat2[1];n22=mat2[3];
  prob = kt_fisher_exact(n11, n12, n21, n22, &left, &right, &twotail);
//fprintf(stdout,"%s\t%d\t%d\t%d\t%d\t%.6g\t%.6g\t%.6g\t%.6g\n", "method2", n11, n12, n21, n22,
  //			prob, left, right, twotail);

  //estimate how much contamination
  double c= mat1[2]/(1.0*(mat1[2]+mat1[3]));//this is error for flanking site
  double err= mat1[0]/(1.0*(mat1[0]+mat1[1]));//this is error for snpsite
  fprintf(stderr,"Mismatch_rate_for_flanking:%f MisMatch_rate_for_snpsite:%f \n",c,err);

  int *err0 =new int[d.cn.size()/9];//<-nbases of non frequent most occuring at snpsite
  int *err1 =new int[d.cn.size()/9];//<-nbases of non frequent most occuring at flanking
  int *d0 =new int[d.cn.size()/9];//<-seqdepth for snpsite
  int *d1 =new int[d.cn.size()/9];//<-seqdepth for flanking
  double *freq =new double[d.cn.size()/9];//<- freq for snpsite

  for(int i=0;i<d.cn.size()/9;i++){
    int adj=0;
    int dep=0;
    for(int j=0;j<9;j++){
      
      if(d.dist[i*9+j]!=0){//<- flanking
	adj += error1[i*9+j];
	dep += rowSum[i*9+j];
      }else{ //snpsite
	err0[i] = error1[i*9+j];
	d0[i] = rowSum[i*9+j];
	
	freq[i] =d.myMap.find(d.pos[i*9+j])->second.freq;
	
      }
    }
    err1[i] =adj;
    d1[i] = dep;
#if 0
    if(it==d.myMap.end()){
      fprintf(stderr,"Problem finding:%d\n",d.pos[i]);
      exit(0);
    }
#endif
    //    fprintf(stdout,"cont\t%d\t%d\t%d\t%d\t%f\n",err0[i],err1[i],d0[i],d1[i],freq[i]);
    
  }


  allPars ap;
  ap.len=d.cn.size()/9;
  ap.seqDepth = d0;
  ap.nonMajor = err0;
  ap.freq = freq;
  ap.eps = c;
  ap.newllh =0;
  ap.e1 = err1;
  ap.d1=d1;

  double mom,momJack,ML,mlJack,val;

  ap.newllh =0;
  mom= likeOldMom(d.cn.size()/9,d0,err0,freq,c,-1);
  momJack = jackMom(&ap,nJack);
  tpars tp;tp.ap=&ap;tp.skip=-1;
  //  print(tp.ap,"asdff1");
  kmin_brent(myfun,1e-6,0.5-1e-6,&tp,0.0001,&ML);
  mlJack= jackML(&ap,nThreads,NULL,nJack);
  fprintf(stderr,"\nMethod1: old_llh Version: MoM:%f SE(MoM):%e ML:%f SE(ML):%e",mom,momJack,ML,mlJack);
  
  ap.newllh =1;
  mom=likeNewMom(d.cn.size()/9,d0,err0,freq,c,-1);
  momJack= jackMom(&ap,nJack);
  //marshall(&ap,"prem1");
  val=kmin_brent(myfun,1e-6,0.5-1e-6,&tp,0.0001,&ML);
  //  fprintf(stderr,"\nM1: ML:%f VAL:%f\n",ML,val);
  mlJack= jackML(&ap,nThreads,NULL,nJack);
  fprintf(stderr,"\nMethod1: new_llh Version: MoM:%f SE(MoM):%e ML:%f SE(ML):%e",mom,momJack,ML,mlJack);
  // fread(error2,sizeof(int),d.cn.size(),fopen("error2.bin","rb"));
  //for(int i=0;0&&i<d.cn.size();i++)
  //  fprintf(stdout,"pik\t%d\n",error2[i]);
  //exit(0);
  for(int i=0;i<d.cn.size()/9;i++){
    int adj=0;
    for(int j=0;j<9;j++){
      if(d.dist[i*9+j]!=0){
	adj += error2[i*9+j];
      }else{
	err0[i] = error2[i*9+j];
	freq[i] =d.myMap.find(d.pos[i*9+j])->second.freq;
	//	fprintf(stderr,"freq:%f\n",freq[i]);
      }
    }
    err1[i] =adj;
    d0[i] = 1; 
    d1[i] = 8;
#if 0
    if(it==d.myMap.end()){
      fprintf(stderr,"Problem finding:%d\n",d.pos[i]);
      exit(0);
    }
#endif
    //    fprintf(stdout,"cont\t%d\t%d\t%d\t%d\t%f\n",err0[i],err1[i],d0[i],d1[i],freq[i]);
    
  }

  ap.seqDepth = d0;
  ap.nonMajor = err0;
  ap.e1=err1;
  ap.d1=d1;
  ap.freq = freq;

  ap.newllh =0;
  mom= likeOldMom(d.cn.size()/9,d0,err0,freq,c,-1);
  momJack = jackMom(&ap,nJack);
  //print(tp.ap,"asdff2");
  //  exit(0);
  val = kmin_brent(myfun,1e-6,0.5-1e-6,&tp,0.0001,&ML);
  //fprintf(stderr,"\nML2:%f VAL:%f\n",ML,val);
  // exit(0);
  //FILE *fp = fopen("heyaa","w");  print(&ap,fp);fclose(fp);
  //return;
  mlJack= jackML(&ap,nThreads,NULL,nJack);
  fprintf(stderr,"\nMethod2: old_llh Version: MoM:%f SE(MoM):%e ML:%f SE(ML):%e",mom,momJack,ML,mlJack);

  ap.newllh =1;
  mom=likeNewMom(d.cn.size()/9,d0,err0,freq,c,-1);
  momJack= jackMom(&ap,nJack);
  kmin_brent(myfun,1e-6,0.5-1e-6,&tp,0.0001,&ML);
  mlJack= jackML(&ap,nThreads,NULL,nJack);
  fprintf(stderr,"\nMethod2: new_llh Version: MoM:%f SE(MoM):%e ML:%f SE(ML):%e\n",mom,momJack,ML,mlJack);
 

  delete [] rowSum;
  delete [] rowMax;
  delete [] rowMaxW;
  delete [] error1;
  delete [] error2;
  

  delete [] err0;
  delete [] err1;
  delete [] d0;
  delete [] d1;
  delete [] freq;



}