void setFreq(perChr &pc){ pc.keeps=new char*[pc.nSites]; // array nSites x nInd 0 if missing info pc.keepInd = new int[pc.nSites]; for(int s=0;s<pc.nSites;s++) { pc.keeps[s] = new char[pc.nInd]; int nKeep =0; for(int i=0;i<pc.nInd;i++){ double mmin=std::min(pc.gl[i][s*3],std::min(pc.gl[i][s*3+1],pc.gl[i][s*3+2])); double mmax=std::max(pc.gl[i][s*3],std::max(pc.gl[i][s*3+1],pc.gl[i][s*3+2])); if(fabs(mmax-mmin)<misTol) pc.keeps[s][i] =0; else{ pc.keeps[s][i] =1; nKeep++; } } pc.keepInd[s] = nKeep; } double tmp[3*pc.nInd]; pc.freq = new double[ pc.nSites]; pc.qerf = new double [pc.nSites]; for(int s=0;s<pc.nSites;s++){ for(int i=0;i<pc.nInd;i++) for(int o=0;o<3;o++) tmp[i*3+o] = pc.gl[i][s*3+o]; double af = emFrequency(tmp,pc.nInd,MAF_ITER,MAF_START,pc.keeps[s],pc.keepInd[s]); assert(af<=1 && af>=0); pc.freq[s] = log(af); pc.qerf[s] = log(1-af); } }
void frequency::likeFreq(funkyPars *pars){//method=1: bfgs_known ;method=2 em;method=4 bfgs_unknown //here only the likelihoods for the three genotypes are used. double **loglike = NULL; if(inputIsBeagle==1) loglike= pars->likes; else loglike=angsd::get3likesRescale(pars); assert(loglike!=NULL); //the pml frequencies double *pml=NULL; double *pEM =NULL; double *pmlun =NULL; double *pEMun =NULL; double *pmlSNP=NULL; double *pEMSNP =NULL; double *pmlunSNP =NULL; double *pEMunSNP =NULL; double *returnFreq = new double[pars->numSites]; //return value not to be deleted double *lrt_snp = new double[pars->numSites]; //return value not to be deleted // fprintf(stderr,"method:=%d\n",method); if(doMaf &1){ pml = new double[pars->numSites]; // numeric optimisation if(doSNP) pmlSNP = new double[pars->numSites]; // numeric optimisation }if(doMaf &2){ pEM =new double[pars->numSites]; //em algorithm for(int i=0;i<pars->numSites;i++) pEM[i] = 0.0; if(doSNP) pEMSNP =new double[pars->numSites]; //em algorithm }if(doMaf &4){ pmlun = new double[pars->numSites]; // numeric optimisation if(doSNP) pmlunSNP = new double[pars->numSites]; // numeric optimisation }if(doMaf &8){ pEMun = new double[pars->numSites]; // EM optimisation if(doSNP) pEMunSNP = new double[pars->numSites]; // EM optimisation } // number of individuals with data int *keepInd = pars->keepSites; int keepList[pars->nInd]; //loop though all sites and check if we have data. //fprintf(stderr,"keepSites[0] %d\n",pars->keepSites[0]); for(int s=0;s<pars->numSites;s++) { if(keepInd[s]==0)//if we dont have any information continue; keepInd[s]=0;// for(int i=0 ; i<pars->nInd ;i++) {//DRAGON CHECK THIS //fprintf(stderr,"size %d\nind %d\t loglike:%f\t%f\t%f\n",s,i,loglike[s][i*3+0],loglike[s][i*3+1],loglike[s][i*3+2]); keepList[i]=1; if(loglike[s][i*3+0]+loglike[s][i*3+1]+loglike[s][i*3+2]>-0.0001){ // fprintf(stderr,"size %d\nind %d\t loglike:%f\t%f\t%f\n",s,i,loglike[s][i*3+0],loglike[s][i*3+1],loglike[s][i*3+2]); keepList[i]=0; } else{ keepInd[s]++; } } if(doMaf &1) { pml[s]= likeFixedMinor_bfgs(loglike[s],pars->nInd); if(doSNP &1) pmlSNP[s] = 2*likeFixedMinor(0.0,loglike[s],pars->nInd)-2*likeFixedMinor(pml[s],loglike[s],pars->nInd); } if( doMaf &2) { if(pars->phat!=NULL) pEM[s]=emFrequency(loglike[s],pars->nInd,emIter,pars->phat[s],keepList,keepInd[s]); else pEM[s]=emFrequency(loglike[s],pars->nInd,emIter,EM_START,keepList,keepInd[s]); if(doSNP) pEMSNP[s] = 2*likeFixedMinor(0.0,loglike[s],pars->nInd)-2*likeFixedMinor(pEM[s],loglike[s],pars->nInd); } if( doMaf &4) { pmlun[s]= likeNoFixedMinor_bfgs(pars->likes[s],pars->nInd,pars->major[s]); if(doSNP) pmlunSNP[s]= 2*likeNoFixedMinor(0.0,pars->likes[s],pars->nInd,pars->major[s])-2*likeNoFixedMinor(pmlun[s],pars->likes[s],pars->nInd,pars->major[s]); } if( doMaf &8 ){ if(pars->phat!=NULL) pEMun[s]=emFrequencyNoFixed(pars->likes[s],pars->nInd,emIter,pars->phat[s],keepList,keepInd[s],pars->major[s],pars->minor[s]); else pEMun[s]=emFrequencyNoFixed(pars->likes[s],pars->nInd,emIter,EM_START,keepList,keepInd[s],pars->major[s],pars->minor[s]); if(doSNP) pEMunSNP[s] = 2*likeNoFixedMinor(0.0,pars->likes[s],pars->nInd,pars->major[s])-2*likeNoFixedMinor(pEMun[s],pars->likes[s],pars->nInd,pars->major[s]); } } pars->results->freq->pml=pml; pars->results->freq->pEM=pEM; pars->results->freq->pmlun=pmlun; pars->results->freq->pEMun=pEMun; pars->results->freq->pmlSNP=pmlSNP; pars->results->freq->pEMSNP=pEMSNP; pars->results->freq->pmlunSNP=pmlunSNP; pars->results->freq->pEMunSNP=pEMunSNP; if(inputIsBeagle!=1){ for(int i=0;i<pars->numSites;i++) delete [] loglike[i]; delete [] loglike; } //anders: this is less stupid but slower //this is abit stupid, but in the case where multiple MAFS has been selected, we use the max(method) //thorfinn: this is very stupid for(int s=0;s<pars->numSites;s++){ if(doMaf &8) returnFreq[s]=pEMun[s]; else if(doMaf &4) returnFreq[s]=pmlun[s]; else if(doMaf &2 ) returnFreq[s]=pEM[s]; else if(doMaf &1 ) returnFreq[s]=pml[s]; } //thorfinn april 16 2012 for(int s=0;s<pars->numSites;s++){ if((doMaf &8) && doSNP) lrt_snp[s]=pEMunSNP[s]; else if((doMaf &4) && doSNP) lrt_snp[s]=pmlunSNP[s]; else if((doMaf &2) && doSNP ) lrt_snp[s]=pEMSNP[s]; else if((doMaf &1) && doSNP) lrt_snp[s]=pmlSNP[s]; } pars->results->asso->freq=returnFreq; pars->results->asso->lrt_snp = lrt_snp;//thorfinn add 16april 2012 }