예제 #1
0
double emFrequency(double *loglike,int numInds, int iter,double start,char *keep,int keepInd){

  if(keepInd == 0)
    return 0.0;
  
  double W0;
  double W1;
  double W2;
  // fprintf(stderr,"start=%f\n",start);
  double p=(double)start;
  double temp_p=(double)start;
  double accu=0.00001;
  double accu2=0;
  double sum;


  int it=0;
  
  for(it=0;it<iter;it++){
    sum= 0;
    double pl = log(p);
    double mpl = log(1-p);
    for(int i=0;i<numInds;i++){
      if(keep!=NULL && keep[i]==0)
        continue;
      W0=loglike[i*3+0]+2*mpl;
      W1=loglike[i*3+1]+M_LN2+pl+mpl;
      W2=loglike[i*3+2]+2*pl;
      sum+= exp(addProtect2(W1,M_LN2+W2)-M_LN2-addProtect3(W0,W1,W2));
      //  fprintf(stderr,"%f %f %f\n",W0,W1,W2);
      if(0&&isnan(sum)){
	//fprintf(stderr,"PRE[%d]: W %f\t%f\t%f sum=%f\n",i,W0,W1,W2,sum);
	exit(0);
      }
    }

    p=sum/keepInd;
    // fprintf(stderr,"it=%d\tp=%f\tsum=%f\tkeepInd=%d\n",it,p,log(sum),keepInd);
    if((p-temp_p<accu&&temp_p-p<accu)||(p/temp_p<1+accu2&&p/temp_p>1-accu2))
      break;
    temp_p=p;
  }



  if(isnan(p)){
    fprintf(stderr,"[emFrequency] caught nan will not exit\n");
    fprintf(stderr,"logLike (3*nInd). nInd=%d\n",numInds);
    //print_array(stderr,loglike,3*numInds);
    fprintf(stderr,"keepList (nInd)\n");
    //print_array(stderr,keep,numInds);
    fprintf(stderr,"used logLike (3*length(keep))=%d\n",keepInd);

    for(int ii=0;0&&ii<numInds;ii++){
      if(keep!=NULL && keep[ii]==1)
	    fprintf(stderr,"1\t");
	for(int gg=0;gg<3;gg++)
	  fprintf(stderr,"%f\t",loglike[ii*3+gg]);
      fprintf(stderr,"\n");
    }
    sum=0;
    for(int i=0;i<numInds;i++){
      if(keep!=NULL && keep[i]==0)
        continue;
      W0=(loglike[i*3+0])*pow(1-p,2);
      W1=(loglike[i*3+1])*2*p*(1-p);
      W2=(loglike[i*3+2])*(pow(p,2));
      sum+=(W1+2*W2)/(2*(W0+W1+W2));
      //fprintf(stderr,"p=%f W %f\t%f\t%f sum=%f loglike: %f\n",p,W0,W1,W2,sum,exp(loglike[i*3+2])*pow(1-p,2));
    }
    p=-999;
    // exit(0);
  }
  
  return(p);
}
예제 #2
0
파일: EM.cpp 프로젝트: fgvieira/ngsF
void EM_iter(params *pars, double **chunk_data, uint64_t chunk_abs_start_pos, uint64_t chunk_size, out_data *output, int iter) {

	// Loop over all sites
	for(uint64_t s = 0; s < chunk_size; s++) {
		uint64_t abs_s = s + chunk_abs_start_pos;
		double p = output->site_freq[abs_s];

		// Skip site if freq == 0
		if (p == 0) continue;

		// Loop over all individuals
		for(uint64_t i = 0; i < pars->n_ind; i++) {
			double F = output->indF[i];
			double p0 = pow(1-p,2) + p*(1-p)*F;
			double p1 = 2*p*(1-p) * (1 - F);
			double p2 = pow(p,2) + p*(1-p)*F;

			// If initial guess assumes uniform priors
			if(iter == 0) p0 = p1 = p2 = 1;

			double norm = addProtect3(log(p0)+chunk_data[s][i*3+0], log(p1)+chunk_data[s][i*3+1], log(p2)+chunk_data[s][i*3+2]);
			double pp0 = p0 * exp(chunk_data[s][i*3+0]-norm);
			double pp1 = p1 * exp(chunk_data[s][i*3+1]-norm);
			double pp2 = p2 * exp(chunk_data[s][i*3+2]-norm);

			output->site_tmpprob_var[abs_s] = fmax(output->site_tmpprob_var[abs_s], 1 - pp0);

			double IBD = 0;
			double indF_num = 0;
			double indF_den = 0;
			// P(IBD)
			if(iter == 0) { //if initial guess do not use any prior
				IBD = check_interv(1 - (pp1/(2*(1-p)*p)), false);
				indF_num = IBD;
				indF_den = 1;
			} else {
				if(pars->approx_EM) { // Vieira et al. algorithm
					double a0 = pp0*(1-p)*p;
					double b0 = pow(1-p,2);
					double c0 = (1-p)*p;
					double a1 = pp1*2*(1-p)*p;
					double b1 = 2*(1-p)*p;
					double c1 = -2*(1-p)*p;
					double a2 = pp2*(1-p)*p;
					double b2 = pow(p,2);
					double c2 = (1-p)*p;

					double s_num0 = a0/(b0+F*c0) + (a0*c0*F)/pow(b0+F*c0,2);
					double s_num1 = a1/(b1+F*c1) + (a1*c1*F)/pow(b1+F*c1,2);
					double s_num2 = a2/(b2+F*c2) + (a2*c2*F)/pow(b2+F*c2,2);
					double s_den0 = (a0*c0)/pow(b0+F*c0,2);
					double s_den1 = (a1*c1)/pow(b1+F*c1,2);
					double s_den2 = (a2*c2)/pow(b2+F*c2,2);

					indF_num = s_num0 - s_num1 + s_num2;
					indF_den = s_den0 - s_den1 + s_den2;
					if( indF_num != indF_num || indF_den != indF_den || indF_num/indF_den != indF_num/indF_den || pars->verbose >= 7 )
					  printf("site IBD: %lu %lu %f %f / %f %f %f %f %f %f %f %f %f / %f %f %f %f %f %f %f %f %f / %f %f %f %f %f %f / %f %f\n", 
						 abs_s, i, p, F, 
						 chunk_data[s][i*3+0], chunk_data[s][i*3+1], chunk_data[s][i*3+2], p0, p1, p2, pp0, pp1, pp2, 
						 a0, b0, c0, a1, b1, c1, a2, b2, c2, 
						 s_num0, s_num1, s_num2, s_den0, s_den1, s_den2, 
						 indF_num, indF_den);
					IBD = check_interv(indF_num/indF_den, false);

				} else { // Hall et al. algorithm
					IBD = pp0 * (1-p) * F / ((1-p) * F + pow(1-p,2) * (1-F)) + pp2 * p * F / (p * F + pow(p,2) * (1-F));
					indF_num = IBD;
					indF_den = 1;
				}
			}

			// Update site freq
			output->site_freq_num[abs_s] += pp1 + pp2 * (2-IBD);
			output->site_freq_den[abs_s] += pp1 + pp2 * (2-IBD) + pp1 + pp0*(2-IBD);

			// Update indiv F
			pthread_mutex_lock(&pars->_F_lock);
			output->indF_num[i] += indF_num;// * pow(output->site_prob_var[abs_s], 100);
			output->indF_den[i] += indF_den;// * pow(output->site_prob_var[abs_s], 100);
			pthread_mutex_unlock(&pars->_F_lock);

			if( pars->verbose >= 7 )
			  printf("Ind: %lu\t%.10f %.10f %.10f\tmaf: %f\tindF: %f\tp: %f %f %f\tpp: %f %f %f\tCum_freq: %f (%f/%f)\tCumF: %f (%f/%f)\n",
				 i+1, chunk_data[s][i*3+0], chunk_data[s][i*3+1], chunk_data[s][i*3+2], \
				 p, F, p0, p1, p2, pp0, pp1, pp2,	\
				 output->site_freq_num[abs_s]/output->site_freq_den[abs_s], output->site_freq_num[abs_s], output->site_freq_den[abs_s], \
				 output->indF_num[i]/output->indF_den[i], output->indF_num[i], output->indF_den[i]);
		}

		if( pars->verbose >= 6 ) printf("\t\t%lu\t%f (%f / %f) %f\n", abs_s+1, output->site_freq_num[abs_s]/output->site_freq_den[abs_s], output->site_freq_num[abs_s], output->site_freq_den[abs_s], output->site_prob_var[abs_s]);
	}
}