void setFreq(perChr &pc){

  pc.keeps=new char*[pc.nSites]; // array nSites x nInd 0 if missing info
  pc.keepInd = new int[pc.nSites];
  
  for(int s=0;s<pc.nSites;s++) {
    pc.keeps[s] = new char[pc.nInd];
    int nKeep =0;
    for(int i=0;i<pc.nInd;i++){
      double mmin=std::min(pc.gl[i][s*3],std::min(pc.gl[i][s*3+1],pc.gl[i][s*3+2]));
      double mmax=std::max(pc.gl[i][s*3],std::max(pc.gl[i][s*3+1],pc.gl[i][s*3+2]));
      if(fabs(mmax-mmin)<misTol)
	pc.keeps[s][i] =0;
      else{
	pc.keeps[s][i] =1;
	nKeep++;
      }
    }
    pc.keepInd[s] = nKeep;
    
  }
 
  double tmp[3*pc.nInd];
  pc.freq = new double[ pc.nSites];
  pc.qerf = new double [pc.nSites];
  for(int s=0;s<pc.nSites;s++){
    for(int i=0;i<pc.nInd;i++)
      for(int o=0;o<3;o++)
	tmp[i*3+o] = pc.gl[i][s*3+o];
    double af = emFrequency(tmp,pc.nInd,MAF_ITER,MAF_START,pc.keeps[s],pc.keepInd[s]);
    assert(af<=1 && af>=0);
    pc.freq[s] = log(af);
    pc.qerf[s] = log(1-af);
  }
  
}
Beispiel #2
0
void frequency::likeFreq(funkyPars *pars){//method=1: bfgs_known ;method=2 em;method=4 bfgs_unknown

  //here only the likelihoods for the three genotypes are used. 
  double **loglike = NULL;
  if(inputIsBeagle==1)
    loglike= pars->likes;
  else
    loglike=angsd::get3likesRescale(pars);
  assert(loglike!=NULL);
  //the pml frequencies 
  double *pml=NULL;
  double *pEM =NULL;
  double *pmlun =NULL;
  double *pEMun =NULL;
  double *pmlSNP=NULL;
  double *pEMSNP =NULL;
  double *pmlunSNP =NULL;
  double *pEMunSNP =NULL;
  double *returnFreq = new double[pars->numSites]; //return value not to be deleted
  double *lrt_snp = new double[pars->numSites]; //return value not to be deleted
  //  fprintf(stderr,"method:=%d\n",method);
  if(doMaf &1){ 
    pml = new double[pars->numSites]; // numeric optimisation
    if(doSNP)
      pmlSNP = new double[pars->numSites]; // numeric optimisation
  }if(doMaf &2){
    pEM =new double[pars->numSites]; //em algorithm
    for(int i=0;i<pars->numSites;i++)
      pEM[i] = 0.0;
    if(doSNP)
      pEMSNP =new double[pars->numSites]; //em algorithm
  }if(doMaf &4){
    pmlun = new double[pars->numSites]; // numeric optimisation
    if(doSNP)
      pmlunSNP = new double[pars->numSites]; // numeric optimisation
  }if(doMaf &8){
    pEMun = new double[pars->numSites]; // EM optimisation
    if(doSNP)
      pEMunSNP = new double[pars->numSites]; // EM optimisation
  }
  // number of individuals with data
  int *keepInd = pars->keepSites;
  int keepList[pars->nInd];  

  //loop though all sites and check if we have data.
  //fprintf(stderr,"keepSites[0] %d\n",pars->keepSites[0]);
  for(int s=0;s<pars->numSites;s++) {
    if(keepInd[s]==0)//if we dont have any information
      continue;
    keepInd[s]=0;//
    for(int i=0 ; i<pars->nInd ;i++) {//DRAGON CHECK THIS
      //fprintf(stderr,"size %d\nind %d\t loglike:%f\t%f\t%f\n",s,i,loglike[s][i*3+0],loglike[s][i*3+1],loglike[s][i*3+2]);
      keepList[i]=1;
      if(loglike[s][i*3+0]+loglike[s][i*3+1]+loglike[s][i*3+2]>-0.0001){
	//	fprintf(stderr,"size %d\nind %d\t loglike:%f\t%f\t%f\n",s,i,loglike[s][i*3+0],loglike[s][i*3+1],loglike[s][i*3+2]);
	keepList[i]=0;
      }
      else{
	keepInd[s]++;
      }

    }

    if(doMaf &1) {

      pml[s]= likeFixedMinor_bfgs(loglike[s],pars->nInd);

      if(doSNP &1)
    	pmlSNP[s] = 2*likeFixedMinor(0.0,loglike[s],pars->nInd)-2*likeFixedMinor(pml[s],loglike[s],pars->nInd);
    }

    if( doMaf &2) {

      if(pars->phat!=NULL)
	pEM[s]=emFrequency(loglike[s],pars->nInd,emIter,pars->phat[s],keepList,keepInd[s]);
      else
	pEM[s]=emFrequency(loglike[s],pars->nInd,emIter,EM_START,keepList,keepInd[s]);
      
      if(doSNP)
      	pEMSNP[s] = 2*likeFixedMinor(0.0,loglike[s],pars->nInd)-2*likeFixedMinor(pEM[s],loglike[s],pars->nInd);
    }
    if( doMaf &4) {
      pmlun[s]= likeNoFixedMinor_bfgs(pars->likes[s],pars->nInd,pars->major[s]);
      if(doSNP)
	pmlunSNP[s]= 2*likeNoFixedMinor(0.0,pars->likes[s],pars->nInd,pars->major[s])-2*likeNoFixedMinor(pmlun[s],pars->likes[s],pars->nInd,pars->major[s]);
      
    }
    if( doMaf &8 ){
      if(pars->phat!=NULL)
	pEMun[s]=emFrequencyNoFixed(pars->likes[s],pars->nInd,emIter,pars->phat[s],keepList,keepInd[s],pars->major[s],pars->minor[s]);
      else
      	pEMun[s]=emFrequencyNoFixed(pars->likes[s],pars->nInd,emIter,EM_START,keepList,keepInd[s],pars->major[s],pars->minor[s]);
      if(doSNP)
	pEMunSNP[s] = 2*likeNoFixedMinor(0.0,pars->likes[s],pars->nInd,pars->major[s])-2*likeNoFixedMinor(pEMun[s],pars->likes[s],pars->nInd,pars->major[s]);

    }

  }


  pars->results->freq->pml=pml;
  pars->results->freq->pEM=pEM;
  pars->results->freq->pmlun=pmlun;
  pars->results->freq->pEMun=pEMun;
  pars->results->freq->pmlSNP=pmlSNP;
  pars->results->freq->pEMSNP=pEMSNP;
  pars->results->freq->pmlunSNP=pmlunSNP;
  pars->results->freq->pEMunSNP=pEMunSNP;

  if(inputIsBeagle!=1){
    for(int i=0;i<pars->numSites;i++)
      delete [] loglike[i];
    delete [] loglike;
  }
 
  //anders: this is less stupid but slower
  //this is abit stupid, but in the case where multiple MAFS has been selected, we use the max(method)
  //thorfinn: this is very stupid
  for(int s=0;s<pars->numSites;s++){
    if(doMaf &8)
      returnFreq[s]=pEMun[s];
    else if(doMaf &4)
      returnFreq[s]=pmlun[s];
    else if(doMaf &2 )
      returnFreq[s]=pEM[s];
    else if(doMaf &1 )
      returnFreq[s]=pml[s];
  }
  //thorfinn april 16 2012
  for(int s=0;s<pars->numSites;s++){
    if((doMaf &8) && doSNP)
      lrt_snp[s]=pEMunSNP[s];
    else if((doMaf &4) && doSNP)
      lrt_snp[s]=pmlunSNP[s];
    else if((doMaf &2) && doSNP )
      lrt_snp[s]=pEMSNP[s];
    else if((doMaf &1) && doSNP)
      lrt_snp[s]=pmlSNP[s];
  }

  pars->results->asso->freq=returnFreq;
  pars->results->asso->lrt_snp = lrt_snp;//thorfinn add 16april 2012

}