Esempio n. 1
0
double probnew(double ***MendelM,cmatrix loci, vector r, int i, int j,char c,char crosstype,int JorC,int ADJ,int start){
	if(start){
		return start_prob(crosstype,loci[j][i]);
	}
	char compareto;
	int index;
	int Nrecom;
	
	if(JorC==1){
		//Rprintf("C %d %d\n",i,j);
		compareto = c;
	}else{
		//Rprintf("loci[j+1][i] %d\n",j);
		compareto = loci[j+1][i];
	}
	if ((crosstype=='F')&&(loci[j][i]=='1')&&(compareto=='1')){
		index = 1;
	}else{
		index = 0;
	}
	Nrecom = absdouble((double)loci[j][i]-(double)compareto);
	return MendelM[j+ADJ][Nrecom][index];
}
Esempio n. 2
0
/* ML estimation of parameters in mixture model via EM; maximum-likelihood
 * estimation of parameters in the mixture model via the EM algorithm, using
 * multilocus information, but assuming known recombination frequencies
*/
double QTLmixture(MQMMarkerMatrix loci, cvector cofactor, vector r, cvector position,
                  vector y, ivector ind, int Nind, int Naug,
                  int Nloci,
                  double *variance, int em, vector *weight, const bool useREML,const bool fitQTL,const bool dominance, MQMCrossType crosstype, int verbose) {
                  
  //debug_trace("QTLmixture called Nloci=%d Nind=%d Naug=%d, REML=%d em=%d fit=%d domi=%d cross=%c\n",Nloci,Nind,Naug,useREML,em,fitQTL,dominance,crosstype);
  //for (int i=0; i<Nloci; i++){ debug_trace("loci %d : recombfreq=%f\n",i,r[i]); }
  int iem= 0, i, j;
  bool warnZeroDist=false;
  bool biasadj=false;
  double oldlogL=-10000, delta=1.0, calc_i, Pscale=1.75;

  vector indweight  = newvector(Nind);
  int newNaug       = ((!fitQTL) ? Naug : 3*Naug);
  vector Fy         = newvector(newNaug);
  double logP       = Nloci*log(Pscale);                          // only for computational accuracy
  bool varknown     = (((*variance)==-1.0) ? false : true );
  vector Ploci      = newvector(newNaug);
  #ifndef STANDALONE
    R_CheckUserInterrupt(); /* check for ^C */
    R_FlushConsole();
  #endif
  if (!useREML) {
    varknown=false;
    biasadj=false;
  }
  for (i=0; i<newNaug; i++) { Ploci[i]= 1.0; }

  if (!fitQTL) {
    for (j=0; j<Nloci; j++) {
      for (i=0; i<Naug; i++)
        Ploci[i]*= Pscale;
      if ((position[j]==MLEFT)||(position[j]==MUNLINKED)) {
        for (i=0; i<Naug; i++) {
          calc_i = start_prob(crosstype, loci[j][i]);   // calc_i= prob(loci, r, i, j, MH, crosstype, 0, 1);
          Ploci[i]*= calc_i;
          //Als Ploci > 0 en calc_i > 0 then we want to assert Ploci[] != 0
        }
      }
      if ((position[j]==MLEFT)||(position[j]==MMIDDLE)) {
        for (i=0; i<Naug; i++) {
          calc_i =left_prob(r[j],loci[j][i],loci[j+1][i],crosstype); //calc_i = prob(loci, r, i, j, loci[j+1][i], crosstype, 0);
          if(calc_i == 0.0){calc_i=1.0;warnZeroDist=true;}
          Ploci[i]*= calc_i;
        }
      }
    }
  } else {
    for (j=0; j<Nloci; j++) {
      for (i=0; i<Naug; i++) {
        Ploci[i]*= Pscale;           // only for computational accuracy; see use of logP
        Ploci[i+Naug]*= Pscale;      // only for computational accuracy; see use of logP
        Ploci[i+2*Naug]*= Pscale;    // only for computational accuracy; see use of logP
      }
      if ((position[j]==MLEFT)||(position[j]==MUNLINKED)) {
        if (cofactor[j]<=MCOF){
          for (i=0; i<Naug; i++) {
            calc_i = start_prob(crosstype, loci[j][i]);  // calc_i= prob(loci, r, i, j, MH, crosstype, 0, 1);
            Ploci[i] *= calc_i;
            Ploci[i+Naug] *= calc_i;
            Ploci[i+2*Naug] *= calc_i;
          }
        }else{
          for (i=0; i<Naug; i++) {
            Ploci[i]*= start_prob(crosstype, MAA);          //startvalue for MAA for new chromosome
            Ploci[i+Naug]*= start_prob(crosstype, MH);      //startvalue for MH for new chromosome
            Ploci[i+2*Naug] *= start_prob(crosstype, MBB);  //startvalue for MBB for new chromosome
          }
        }
      }
      if ((position[j]==MLEFT)||(position[j]==MMIDDLE)) {
        if ((cofactor[j]<=MCOF)&&(cofactor[j+1]<=MCOF))
          for (i=0; i<Naug; i++) {
            calc_i =left_prob(r[j],loci[j][i],loci[j+1][i],crosstype);  //calc_i = prob(loci, r, i, j, loci[j+1][i], crosstype, 0);
            if(calc_i == 0.0){calc_i=1.0;warnZeroDist=true;}
            Ploci[i]*= calc_i;
            Ploci[i+Naug]*= calc_i;
            Ploci[i+2*Naug]*= calc_i;
          }
        else if (cofactor[j]<=MCOF) // locus j+1 == QTL
          for (i=0; i<Naug; i++) { // QTL==MAA || MH || MBB means: What is the prob of finding an MAA at J=1    
            calc_i =left_prob(r[j],loci[j][i],MAA,crosstype);     //calc_i = prob(loci, r, i, j, MAA, crosstype, 0);
            Ploci[i]*= calc_i;      
            calc_i = left_prob(r[j],loci[j][i],MH,crosstype);     //calc_i = prob(loci, r, i, j, MH, crosstype, 0);
            Ploci[i+Naug]*= calc_i;
            calc_i = left_prob(r[j],loci[j][i],MBB,crosstype);    //calc_i = prob(loci, r, i, j, MBB, crosstype, 0);
            Ploci[i+2*Naug]*= calc_i;
          }
        else // locus j == QTL
          for (i=0; i<Naug; i++) { // QTL==MQTL
            calc_i = left_prob(r[j],MAA,loci[j+1][i],crosstype);  //calc_i = prob(loci, r, i, j+1, MAA, crosstype, -1);
            Ploci[i]*= calc_i;
            calc_i = left_prob(r[j],MH,loci[j+1][i],crosstype);   //calc_i = prob(loci, r, i, j+1, MH, crosstype, -1);
            Ploci[i+Naug]*= calc_i;
            calc_i = left_prob(r[j],MBB,loci[j+1][i],crosstype);  //calc_i = prob(loci, r, i, j+1, MBB, crosstype, -1);
            Ploci[i+2*Naug]*= calc_i;
          }
      }
    }
  }
  if(warnZeroDist)info("!!! 0.0 from Prob !!! Markers at same Cm but different genotype !!!"); 
//	Rprintf("INFO: Done fitting QTL's\n");
  if ((*weight)[0]== -1.0) {
    for (i=0; i<Nind; i++) indweight[i]= 0.0;
    if (!fitQTL) {
      for (i=0; i<Naug; i++) indweight[ind[i]]+=Ploci[i];
      for (i=0; i<Naug; i++) (*weight)[i]= Ploci[i]/indweight[ind[i]];
    } else {
      for (i=0; i<Naug; i++) indweight[ind[i]]+=Ploci[i]+Ploci[i+Naug]+Ploci[i+2*Naug];
      for (i=0; i<Naug; i++) {
        (*weight)[i]       = Ploci[i]/indweight[ind[i]];
        (*weight)[i+Naug]  = Ploci[i+Naug]/indweight[ind[i]];
        (*weight)[i+2*Naug]= Ploci[i+2*Naug]/indweight[ind[i]];
      }
    }
  }
  debug_trace("Weights done\n");
  debug_trace("Individual->trait,indweight weight Ploci\n");
  //for (int j=0; j<Nind; j++){
  //  debug_trace("%d->%f,%f %f %f\n", j, y[j],indweight[i], (*weight)[j], Ploci[j]);
  //}
  double logL = 0;
  vector indL = newvector(Nind);
  while ((iem<em)&&(delta>1.0e-5)) {
    iem+=1;
    if (!varknown) *variance=-1.0;
    logL = regression(Nind, Nloci, cofactor, loci, y, weight, ind, Naug, variance, Fy, biasadj, fitQTL, dominance);
    logL = 0.0;
    for (i=0; i<Nind; i++) indL[i]= 0.0;
    if (!fitQTL) // no QTL fitted
      for (i=0; i<Naug; i++) {
        (*weight)[i]= Ploci[i]*Fy[i];
        indL[ind[i]]= indL[ind[i]] + (*weight)[i];
      }
    else // QTL moved along the chromosomes
      for (i=0; i<Naug; i++) {
        (*weight)[i]= Ploci[i]*Fy[i];
        (*weight)[i+Naug]  = Ploci[i+Naug]*  Fy[i+Naug];
        (*weight)[i+2*Naug]= Ploci[i+2*Naug]*Fy[i+2*Naug];
        indL[ind[i]]+=(*weight)[i]+(*weight)[i+Naug]+(*weight)[i+2*Naug];
      }
    for (i=0; i<Nind; i++) logL+=log(indL[i])-logP;
    for (i=0; i<Nind; i++) indweight[i]= 0.0;
    if (!fitQTL) {
      for (i=0; i<Naug; i++) indweight[ind[i]]+=(*weight)[i];
      for (i=0; i<Naug; i++) (*weight)[i]/=indweight[ind[i]];
    } else {
      for (i=0; i<Naug; i++)
        indweight[ind[i]]+=(*weight)[i]+(*weight)[i+Naug]+(*weight)[i+2*Naug];
      for (i=0; i<Naug; i++) {
        (*weight)[i]       /=indweight[ind[i]];
        (*weight)[i+Naug]  /=indweight[ind[i]];
        (*weight)[i+2*Naug]/=indweight[ind[i]];
      }
    }
    delta= fabs(logL-oldlogL);
    oldlogL= logL;
  }
  
  if ((useREML)&&(!varknown)) { // Bias adjustment after finished ML estimation via EM
    *variance=-1.0;
    biasadj=true;
    logL = regression(Nind, Nloci, cofactor, loci, y, weight, ind, Naug, variance, Fy, biasadj, fitQTL, dominance);
    logL = 0.0;
    for (int _i=0; _i<Nind; _i++) indL[_i]= 0.0;
    if (!fitQTL)
      for (i=0; i<Naug; i++) {
        (*weight)[i]= Ploci[i]*Fy[i];
        indL[ind[i]]+=(*weight)[i];
      }
    else
      for (i=0; i<Naug; i++) {
        (*weight)[i]= Ploci[i]*Fy[i];
        (*weight)[i+Naug]= Ploci[i+Naug]*Fy[i+Naug];
        (*weight)[i+2*Naug]= Ploci[i+2*Naug]*Fy[i+2*Naug];
        indL[ind[i]]+=(*weight)[i];
        indL[ind[i]]+=(*weight)[i+Naug];
        indL[ind[i]]+=(*weight)[i+2*Naug];
      }
    for (i=0; i<Nind; i++) logL+=log(indL[i])-logP;
    for (i=0; i<Nind; i++) indweight[i]= 0.0;
    if (!fitQTL) {
      for (i=0; i<Naug; i++) indweight[ind[i]]+=(*weight)[i];
      for (i=0; i<Naug; i++) (*weight)[i]/=indweight[ind[i]];
    } else {
      for (i=0; i<Naug; i++) {
        indweight[ind[i]]+=(*weight)[i];
        indweight[ind[i]]+=(*weight)[i+Naug];
        indweight[ind[i]]+=(*weight)[i+2*Naug];
      }
      for (i=0; i<Naug; i++) {
        (*weight)[i]       /=indweight[ind[i]];
        (*weight)[i+Naug]  /=indweight[ind[i]];
        (*weight)[i+2*Naug]/=indweight[ind[i]];
      }
    }
  }
  //for (i=0; i<Nind; i++){ debug_trace("IND %d Ploci: %f Fy: %f UNLOG:%f LogL:%f LogL-LogP: %f\n", i, Ploci[i], Fy[i], indL[i], log(indL[i]), log(indL[i])-logP); }
  Free(Fy);
  Free(Ploci);
  Free(indweight);
  Free(indL);
  return logL;
}
Esempio n. 3
0
/* ML estimation of parameters in mixture model via EM;
*/
double QTLmixture(cmatrix loci, cvector cofactor, vector r, cvector position,
              vector y, ivector ind, int Nind, int Naug,
              int Nloci,
              double *variance, int em, vector *weight,char REMLorML,char fitQTL,char dominance,char crosstype,Mmatrix MendelM,int verbose){
	//if(verbose==1){Rprintf("QTLmixture called\n");}
    int iem= 0, newNaug, i, j;
    char varknown, biasadj='n';
	double oldlogL=-10000, delta=1.0, calc_i, logP=0.0, Pscale=1.75;
    double calc_ii;
	vector indweight, Ploci, Fy;
    
	indweight= newvector(Nind);
    newNaug= (fitQTL=='n' ? Naug : 3*Naug);
    Fy= newvector(newNaug);
    logP= Nloci*log(Pscale); // only for computational accuracy
	varknown= (((*variance)==-1.0) ? 'n' : 'y' );
    Ploci= newvector(newNaug);	
	#ifndef ALONE
		R_CheckUserInterrupt(); /* check for ^C */
		R_ProcessEvents();
		R_FlushConsole();
	#endif	
    if ((REMLorML=='0')&&(varknown=='n')){ 
//		Rprintf("INFO: REML\n");
	}
    if (REMLorML=='1') { 
//		Rprintf("INFO: ML\n");                       
		varknown='n'; biasadj='n'; 
	}
    for (i=0; i<newNaug; i++){ 
		Ploci[i]= 1.0;
	}
    if (fitQTL=='n'){
	    //Rprintf("FitQTL=N\n");	
		for (j=0; j<Nloci; j++){
		    for (i=0; i<Naug; i++) 
			Ploci[i]*= Pscale;
			//Here we have ProbLeft
		    if ((position[j]=='L')||(position[j]=='U')){
				for (i=0; i<Naug; i++){
					calc_i= prob(loci,r,i,j,'1',crosstype,1,0,1);
					//calc_ii= probnew(MendelM,loci,r,i,j,'1',crosstype,1,0,1);
					Ploci[i]*= calc_i;
					//Rprintf("DEBUG: Prob vs ProbNew: %f %f\n",calc_i,calc_ii);
				}
			}
		    if ((position[j]=='L')||(position[j]=='M')){
				for (i=0; i<Naug; i++){
					calc_i = prob(loci,r,i,j,loci[j+1][i],'F',0,0,0);
					//calc_ii = probnew(MendelM,loci,r,i,j,loci[j+1][i],'F',0,0,0);
					Ploci[i]*= calc_i;
					//Rprintf("DEBUG: Prob vs ProbNew: %f %f\n",calc_i,calc_ii);
				}
			}
		}
	}else{
//	Rprintf("FitQTL=Y\n");	
     for (j=0; j<Nloci; j++)
     {    for (i=0; i<Naug; i++)
          {   Ploci[i]*= Pscale; Ploci[i+Naug]*= Pscale; Ploci[i+2*Naug]*= Pscale;
              // only for computational accuracy; see use of logP
          }
          if ((position[j]=='L')||(position[j]=='U'))
          {  
			//Here we don't have any f2 dependancies anymore by using the prob function
			if (cofactor[j]<='1')
             for (i=0; i<Naug; i++)
             {  
				calc_i= prob(loci,r,i,j,'1',crosstype,1,0,1);
				//calc_ii= probnew(MendelM,loci,r,i,j,'1',crosstype,1,0,1);
                Ploci[i]*= calc_i; Ploci[i+Naug]*= calc_i; Ploci[i+2*Naug]*= calc_i;
				//Rprintf("DEBUG: Prob vs ProbNew: %f %f\n",calc_i,calc_ii);
             }
             else
             for (i=0; i<Naug; i++)
             {
				//startvalues for each new chromosome
				Ploci[i]*= start_prob(crosstype,'0');
				Ploci[i+Naug]*= start_prob(crosstype,'1'); 
				Ploci[i+2*Naug] *= start_prob(crosstype,'2');
			 }
                 // QTL='0', '1' or'2'
          }
          if ((position[j]=='L')||(position[j]=='M'))
          {  if ((cofactor[j]<='1')&&(cofactor[j+1]<='1'))
             for (i=0; i<Naug; i++){  
				calc_i = prob(loci,r,i,j,loci[j+1][i],crosstype,0,0,0);
				//calc_ii = probnew(MendelM,loci,r,i,j,loci[j+1][i],crosstype,0,0,0);
                Ploci[i]*= calc_i; Ploci[i+Naug]*= calc_i; Ploci[i+2*Naug]*= calc_i;
				//Rprintf("DEBUG: Prob vs ProbNew: %f %f\n",calc_i,calc_ii);
             }
             else if (cofactor[j]<='1') // locus j+1 == QTL
             for (i=0; i<Naug; i++)
             {  // QTL=='0' What is the prob of finding an '0' at J=1
				calc_i = prob(loci,r,i,j,'0',crosstype,1,0,0);
				//calc_ii = probnew(MendelM,loci,r,i,j,'0',crosstype,1,0,0);
                Ploci[i]*= calc_i;
                // QTL=='1'
                calc_i = prob(loci,r,i,j,'1',crosstype,1,0,0);
				//calc_ii = probnew(MendelM,loci,r,i,j,'1',crosstype,1,0,0);
                Ploci[i+Naug]*= calc_i;
                // QTL=='2'
                calc_i = prob(loci,r,i,j,'2',crosstype,1,0,0);
				//calc_ii = probnew(MendelM,loci,r,i,j,'2',crosstype,1,0,0);
                Ploci[i+2*Naug]*= calc_i;
				//Rprintf("DEBUG: Prob vs ProbNew: %f %f\n",calc_i,calc_ii);
             }
             else // locus j == QTL
             for (i=0; i<Naug; i++)
             {  // QTL=='0'
                calc_i = prob(loci,r,i,j+1,'0',crosstype,1,-1,0);
				//calc_ii = probnew(MendelM,loci,r,i,j+1,'0',crosstype,1,-1,0);
                Ploci[i]*= calc_i;
                // QTL=='1'
				calc_i = prob(loci,r,i,j+1,'1',crosstype,1,-1,0);
				//calc_ii = probnew(MendelM,loci,r,i,j+1,'1',crosstype,1,-1,0);
                Ploci[i+Naug]*= calc_i;
                // QTL=='2'
                calc_i = prob(loci,r,i,j+1,'2',crosstype,1,-1,0);
				//calc_ii = probnew(MendelM,loci,r,i,j+1,'2',crosstype,1,-1,0);
                Ploci[i+2*Naug]*= calc_i;
				//Rprintf("DEBUG: Prob vs ProbNew: %f %f\n",calc_i,calc_ii);
             }
          }
	 }
	 }
//	Rprintf("INFO: Done fitting QTL's\n");
     if ((*weight)[0]== -1.0)
     {  for (i=0; i<Nind; i++) indweight[i]= 0.0;
		if (fitQTL=='n')
        {  for (i=0; i<Naug; i++) indweight[ind[i]]+=Ploci[i];
           for (i=0; i<Naug; i++) (*weight)[i]= Ploci[i]/indweight[ind[i]];
        }
        else
        {  for (i=0; i<Naug; i++) indweight[ind[i]]+=Ploci[i]+Ploci[i+Naug]+Ploci[i+2*Naug];
           for (i=0; i<Naug; i++)
           {   (*weight)[i]       = Ploci[i]/indweight[ind[i]];
               (*weight)[i+Naug]  = Ploci[i+Naug]/indweight[ind[i]];
               (*weight)[i+2*Naug]= Ploci[i+2*Naug]/indweight[ind[i]];
           }
        }
     }
	//Rprintf("Weights done\n");
    //Rprintf("Individual->trait->cofactor->weight\n");
    //for (int j=0; j<Nind; j++){
	//  Rprintf("%d->%f,%d,%f %f\n",j,y[j],cofactor[j],(*weight)[j],Ploci[j]);
	//}	
     double logL=0;
     vector indL;
     indL= newvector(Nind);
     while ((iem<em)&&(delta>1.0e-5))
     {  
           iem+=1;
           if (varknown=='n') *variance=-1.0;
			//Rprintf("Checkpoint_b\n");           
           logL= regression(Nind, Nloci, cofactor, loci, y,
                 weight, ind, Naug, variance, Fy, biasadj,fitQTL,dominance);
           logL=0.0;
        //Rprintf("regression ready\n");
           for (i=0; i<Nind; i++) indL[i]= 0.0;
           if (fitQTL=='n') // no QTL fitted
           for (i=0; i<Naug; i++)
           {   (*weight)[i]= Ploci[i]*Fy[i];
               indL[ind[i]]= indL[ind[i]] + (*weight)[i];
           }
           else // QTL moved along the chromosomes
           for (i=0; i<Naug; i++)
           {  (*weight)[i]= Ploci[i]*Fy[i];
              (*weight)[i+Naug]  = Ploci[i+Naug]*  Fy[i+Naug];
              (*weight)[i+2*Naug]= Ploci[i+2*Naug]*Fy[i+2*Naug];
              indL[ind[i]]+=(*weight)[i]+(*weight)[i+Naug]+(*weight)[i+2*Naug];
           }
           for (i=0; i<Nind; i++) logL+=log(indL[i])-logP;
           for (i=0; i<Nind; i++) indweight[i]= 0.0;
           if (fitQTL=='n')
           {  for (i=0; i<Naug; i++) indweight[ind[i]]+=(*weight)[i];
              for (i=0; i<Naug; i++) (*weight)[i]/=indweight[ind[i]];
           }
           else
           {  for (i=0; i<Naug; i++)
                  indweight[ind[i]]+=(*weight)[i]+(*weight)[i+Naug]+(*weight)[i+2*Naug];
              for (i=0; i<Naug; i++)
              {   (*weight)[i]       /=indweight[ind[i]];
                  (*weight)[i+Naug]  /=indweight[ind[i]];
                  (*weight)[i+2*Naug]/=indweight[ind[i]];
              }
           }
           delta= absdouble(logL-oldlogL);
           oldlogL= logL;
     }
     //Rprintf("EM Finished\n");
     // bias adjustment after finished ML estimation via EM
     if ((REMLorML=='0')&&(varknown=='n'))
     {  
       // RRprintf("Checkpoint_c\n");
        *variance=-1.0;
        biasadj='y';
        logL= regression(Nind, Nloci, cofactor, loci, y,
              weight, ind, Naug, variance, Fy, biasadj,fitQTL,dominance);
        logL=0.0;
        for (int _i=0; _i<Nind; _i++) indL[_i]= 0.0;
        if (fitQTL=='n')
        for (i=0; i<Naug; i++)
        {   (*weight)[i]= Ploci[i]*Fy[i];
            indL[ind[i]]+=(*weight)[i];
        }
        else
        for (i=0; i<Naug; i++)
        {   (*weight)[i]= Ploci[i]*Fy[i];
            (*weight)[i+Naug]= Ploci[i+Naug]*Fy[i+Naug];
            (*weight)[i+2*Naug]= Ploci[i+2*Naug]*Fy[i+2*Naug];
            indL[ind[i]]+=(*weight)[i];
            indL[ind[i]]+=(*weight)[i+Naug];
            indL[ind[i]]+=(*weight)[i+2*Naug];
        }
        for (i=0; i<Nind; i++) logL+=log(indL[i])-logP;
        for (i=0; i<Nind; i++) indweight[i]= 0.0;
        if (fitQTL=='n')
        {  for (i=0; i<Naug; i++) indweight[ind[i]]+=(*weight)[i];
           for (i=0; i<Naug; i++) (*weight)[i]/=indweight[ind[i]];
        }
        else
        {  for (i=0; i<Naug; i++)
           {   indweight[ind[i]]+=(*weight)[i];
               indweight[ind[i]]+=(*weight)[i+Naug];
               indweight[ind[i]]+=(*weight)[i+2*Naug];
           }
           for (i=0; i<Naug; i++)
           {   (*weight)[i]       /=indweight[ind[i]];
               (*weight)[i+Naug]  /=indweight[ind[i]];
               (*weight)[i+2*Naug]/=indweight[ind[i]];
           }
        }
     }
	//for (i=0; i<Nind; i++){
    //    Rprintf("IND %d Ploci: %f Fy: %f UNLOG:%f LogL:%f LogL-LogP: %f\n",i,Ploci[i],Fy[i],indL[i],log(indL[i]),log(indL[i])-logP);
    //}
	Free(Fy);
	Free(Ploci);
	Free(indweight);
	Free(indL);
    return logL;
}
Esempio n. 4
0
int mqmaugment(const MQMMarkerMatrix marker, const vector y, 
               MQMMarkerMatrix* augmarker, vector *augy, 
               ivector* augind, ivector* sucind, int *Nind, int *Naug, const int Nmark, 
               const cvector position, vector r, const int maxNaug, 
               const int imaxNaug, const double minprob, 
               const MQMCrossType crosstype, const int verbose) 
{
  int retvalue = 1;     //[Danny] Assume everything will go right, (it never returned a 1 OK, initialization to 0 and return
  int jj;
  const int nind0 = *Nind;              //Original number of individuals
  (*Naug) = maxNaug;     // sets and returns the maximum size of augmented dataset
  // new variables sized to maxNaug:
  MQMMarkerMatrix newmarker;
  vector newy;
  MQMMarkerVector imarker;
  ivector newind;
  ivector succesind;
  
  double minprobratio = (1.0f/minprob);
  if(minprob!=1){
    minprobratio += 0.00001;
  }
  newmarker = newMQMMarkerMatrix(Nmark+1, maxNaug);  // augmented marker matrix
  newy      = newvector(maxNaug);            // phenotypes
  newind    = newivector(maxNaug);           // individuals index
  succesind = newivector(nind0);              // Tracks if the augmentation is a succes
  imarker   = newMQMMarkerVector(Nmark);             

  int iaug     = 0;     // iaug keeps track of current augmented individual
  double prob0, prob1, prob2, sumprob,
  prob0left, prob1left, prob2left,
  prob0right=0.0, prob1right=0.0, prob2right = 0.0f;
  vector newprob = newvector(maxNaug);
  vector newprobmax = newvector(maxNaug);
  if (verbose) Rprintf("INFO: Crosstype determined by the algorithm: %c\n", crosstype);
  if (verbose) Rprintf("INFO: Augmentation parameters: Maximum augmentation=%d, Maximum augmentation per individual=%d, Minprob=%f\n", maxNaug, imaxNaug, minprob);
  // ---- foreach individual create one in the newmarker matrix
 
  int newNind = nind0;                  //Number of unique individuals
  int previaug = 0;                     // previous index in newmarkers
  for (int i=0; i<nind0; i++) {
    //Loop through individuals
    succesind[i] = 1;                   //Assume we succeed in augmentation
    #ifndef STANDALONE
      //R_ProcessEvents(); /*  Try not to crash windows */
      R_FlushConsole();
    #endif
    const int dropped = nind0-newNind;  //How many are dropped
    const int iidx = i - dropped;       //Individuals I's new individual number based on dropped individuals
    newind[iaug]   = iidx;              // iidx corrects for dropped individuals
    newy[iaug]     = y[i];              // cvariance (phenotype)
    newprob[iaug]  = 1.0;               //prop
    double probmax = 1.0;               //current maximum probability

    for (int j=0; j<Nmark; j++){ 
      newmarker[j][iaug]=marker[j][i];    // copy markers into newmarkers for the new indidivudal under investigation
    }
    for (int j=0; j<Nmark; j++) {
      //Loop through markers:
      const int maxiaug = iaug;          // fixate maxiaug
      if ((maxiaug-previaug)<=imaxNaug)  // within bounds for individual?
        for (int ii=previaug; ii<=maxiaug; ii++) {
          #ifndef STANDALONE
            R_CheckUserInterrupt(); /* check for ^C */
          #endif
          debug_trace("i=%d ii=%d iidx=%d maxiaug=%d previaug=%d,imaxNaug=%d\n",i,ii,iidx,maxiaug,previaug,imaxNaug);
          // ---- walk from previous augmented to current augmented genotype
          //WE HAVE 3 SPECIAL CASES: (1) NOTAA, (2) NOTBB and (3)UNKNOWN, and the std case of a next known marker
          if (newmarker[j][ii]==MNOTAA) {
            //NOTAA augment data to contain AB and BB
            for (jj=0; jj<Nmark; jj++) imarker[jj] = newmarker[jj][ii];

            if ((position[j]==MLEFT||position[j]==MUNLINKED)) {
              prob1left= start_prob(crosstype, MH);
              prob2left= start_prob(crosstype, MBB);
            } else {
              prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype);      //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0);
              prob2left= left_prob(r[j-1],newmarker[j-1][ii],MBB,crosstype);     //prob2left= prob(newmarker, r, ii, j-1, MBB, crosstype, 0);
            }
            switch (crosstype) {
              case CF2:
                prob1right= right_prob_F2(MH, j, imarker, r, position);          //prob1right= probright(MH, j, imarker, r, position, crosstype);
                prob2right= right_prob_F2(MBB, j, imarker, r, position);         //prob2right= probright(MBB, j, imarker, r, position, crosstype);
              break;
              case CBC:
                prob1right= right_prob_BC(MH, j, imarker, r, position);
                prob2right= right_prob_BC(MBB, j, imarker, r, position);                
              break;
              case CRIL:
                prob1right= right_prob_RIL(MH, j, imarker, r, position);
                prob2right= right_prob_RIL(MBB, j, imarker, r, position);                
              break;
              case CUNKNOWN:
                fatal("Strange: unknown crosstype in mqm augment()", "");
              break;
            }
            prob1= prob1left*prob1right;
            prob2= prob2left*prob2right;

            if (ii==previaug) probmax = (prob2>prob1 ? newprob[ii]*prob2 : newprob[ii]*prob1);
            if (prob1>prob2) {
              if (probmax/(newprob[ii]*prob2)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MBB;
                newprob[iaug]= newprob[ii]*prob2left;
                newprobmax[iaug]= newprob[iaug]*prob2right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MH;
              newprobmax[ii]= newprob[ii]*prob1;
              newprob[ii]= newprob[ii]*prob1left;
            } else {
              if (probmax/(newprob[ii]*prob1)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MH;
                newprob[iaug]= newprob[ii]*prob1left;
                newprobmax[iaug]= newprob[iaug]*prob1right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MBB;
              newprobmax[ii]= newprob[ii]*prob2;
              newprob[ii]*= prob2left;
            }
            probmax = (probmax>newprobmax[ii] ? probmax : newprobmax[ii]);
          } else if (newmarker[j][ii]==MNOTBB) {
            //NOTBB: augment data can contain MH and MAA 
            for (jj=0; jj<Nmark; jj++) imarker[jj]= newmarker[jj][ii];

            if ((position[j]==MLEFT||position[j]==MUNLINKED)) {
              prob0left= start_prob(crosstype, MAA);
              prob1left= start_prob(crosstype, MH);
            } else {
              prob0left= left_prob(r[j-1],newmarker[j-1][ii],MAA,crosstype);  //prob0left= prob(newmarker, r, ii, j-1, MAA, crosstype, 0);
              prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype);   //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0);
            }
            switch (crosstype) {
              case CF2:
                prob0right= right_prob_F2(MAA, j, imarker, r, position);      //prob0right= probright(MAA, j, imarker, r, position, crosstype);
                prob1right= right_prob_F2(MH, j, imarker, r, position);       //prob1right= probright(MH, j, imarker, r, position, crosstype);
              break;
              case CBC:
                prob0right= right_prob_BC(MAA, j, imarker, r, position);
                prob1right= right_prob_BC(MH, j, imarker, r, position);               
              break;
              case CRIL:
                prob0right= right_prob_RIL(MAA, j, imarker, r, position);
                prob1right= right_prob_RIL(MH, j, imarker, r, position);              
              break;
              case CUNKNOWN:
                fatal("Strange: unknown crosstype in mqm augment()", "");
              break;
            }
            prob0= prob0left*prob0right;
            prob1= prob1left*prob1right;

            if (ii==previaug) probmax= (prob0>prob1 ? newprob[ii]*prob0 : newprob[ii]*prob1);
            if (prob1>prob0) {
              if (probmax/(newprob[ii]*prob0)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MAA;
                newprob[iaug]= newprob[ii]*prob0left;
                newprobmax[iaug]= newprob[iaug]*prob0right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MH;
              newprobmax[ii]= newprob[ii]*prob1;
              newprob[ii]*= prob1left;
            } else {
              if (probmax/(newprob[ii]*prob1)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MH;
                newprob[iaug]= newprob[ii]*prob1left;
                newprobmax[iaug]= newprob[iaug]*prob1right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MAA;
              newprobmax[ii]= newprob[ii]*prob0;
              newprob[ii]*= prob0left;
            }
            probmax= (probmax>newprobmax[ii] ? probmax : newprobmax[ii]);
          } else if (newmarker[j][ii]==MMISSING) {
            //UNKNOWN: augment data to contain AB, AA and BB
            for (jj=0; jj<Nmark; jj++) imarker[jj]= newmarker[jj][ii];

            if ((position[j]==MLEFT||position[j]==MUNLINKED)) {
              prob0left= start_prob(crosstype, MAA);
              prob1left= start_prob(crosstype, MH);
              prob2left= start_prob(crosstype, MBB);
            } else {
              prob0left= left_prob(r[j-1],newmarker[j-1][ii],MAA,crosstype);  //prob0left= prob(newmarker, r, ii, j-1, MAA, crosstype, 0);
              prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype);   //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0);
              prob2left= left_prob(r[j-1],newmarker[j-1][ii],MBB,crosstype);  //prob2left= prob(newmarker, r, ii, j-1, MBB, crosstype, 0);
            }
            switch (crosstype) {
              case CF2:
                prob0right= right_prob_F2(MAA, j, imarker, r, position); //prob0right= probright(MAA, j, imarker, r, position, crosstype);
                prob1right= right_prob_F2(MH, j, imarker, r, position);  //prob1right= probright(MH, j, imarker, r, position, crosstype);
                prob2right= right_prob_F2(MBB, j, imarker, r, position); //prob2right= probright(MBB, j, imarker, r, position, crosstype);
              break;
              case CBC:
                prob0right= right_prob_BC(MAA, j, imarker, r, position);
                prob1right= right_prob_BC(MH, j, imarker, r, position);
                prob2right= 0.0;              
              break;
              case CRIL:
                prob0right= right_prob_RIL(MAA, j, imarker, r, position);
                prob1right= 0.0;
                prob2right= right_prob_RIL(MBB, j, imarker, r, position);              
              break;
              case CUNKNOWN:
                fatal("Strange: unknown crosstype in mqm augment()", "");
              break;
            }            
            prob0= prob0left*prob0right;
            prob1= prob1left*prob1right;
            prob2= prob2left*prob2right;
            if (ii==previaug) {
              if ((prob2>prob1)&&(prob2>prob0)) probmax= newprob[ii]*prob2;
              else if ((prob1>prob0)&&(prob1>prob2)) probmax= newprob[ii]*prob1;
              else probmax= newprob[ii]*prob0;
            }
            if ((prob2>prob1)&&(prob2>prob0)) {
              if (probmax/(newprob[ii]*prob1)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MH;
                newprob[iaug]= newprob[ii]*prob1left;
                newprobmax[iaug]= newprob[iaug]*prob1right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              if (probmax/(newprob[ii]*prob0)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MAA;
                newprob[iaug]= newprob[ii]*prob0left;
                newprobmax[iaug]= newprob[iaug]*prob0right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MBB;
              newprobmax[ii]= newprob[ii]*prob2;
              newprob[ii]*= prob2left;

            } else if ((prob1>prob2)&&(prob1>prob0)) {
              if (probmax/(newprob[ii]*prob2)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MBB;
                newprob[iaug]= newprob[ii]*prob2left;
                newprobmax[iaug]= newprob[iaug]*prob2right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              if (probmax/(newprob[ii]*prob0)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MAA;
                newprob[iaug]= newprob[ii]*prob0left;
                newprobmax[iaug]= newprob[iaug]*prob0right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MH;
              newprobmax[ii]= newprob[ii]*prob1;
              newprob[ii]*= prob1left;
            } else {
              if (probmax/(newprob[ii]*prob1)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MH;
                newprob[iaug]= newprob[ii]*prob1left;
                newprobmax[iaug]= newprob[iaug]*prob1right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              if (probmax/(newprob[ii]*prob2)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MBB;
                newprob[iaug]= newprob[ii]*prob2left;
                newprobmax[iaug]= newprob[iaug]*prob2right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MAA;
              newprobmax[ii]= newprob[ii]*prob0;
              newprob[ii]*= prob0left;
            }
            probmax= (probmax>newprobmax[ii] ? probmax : newprobmax[ii]);
          } else {
            //STD case we know what the next marker is nou use probleft to estimate the likelihood of the current location
            if ((position[j]==MLEFT||position[j]==MUNLINKED)) {
              prob0left= start_prob(crosstype, newmarker[j][ii]);
            } else {
              prob0left= left_prob(r[j-1],newmarker[j-1][ii],newmarker[j][ii],crosstype); //prob0left= prob(newmarker, r, ii, j-1, newmarker[j][ii], crosstype, 0);
            }
            newprob[ii]*= prob0left;
          }

          if (iaug+3>maxNaug) {
            Rprintf("ERROR: augmentation (this code should not be reached)\n");  
            goto bailout;
          }
        }
      if ((iaug-previaug+1)>imaxNaug) {
        newNind-= 1;
        iaug= previaug-1;
        succesind[i]=0;
        //for(int x=previaug;x<previaug+imaxNaug;x++){
        //  Rprintf("INFO: Individual: %d, variant: %d, prob: %f",i,x,newprob[x]);
        //}
        if (verbose) Rprintf("INFO: Individual %d moved to second augmentation round\n", i);
      }
      sumprob= 0.0;
      for (int ii=previaug; ii<=iaug; ii++) sumprob+= newprob[ii];
      for (int ii=previaug; ii<=iaug; ii++) newprob[ii]/= sumprob;
    }
    if (++iaug >= maxNaug) goto bailout;
    previaug=iaug;
  }
  *Naug = iaug;
  *Nind = newNind;
  *augmarker = newMQMMarkerMatrix(Nmark, *Naug);
  *augy = newvector(*Naug);
  *augind = newivector(*Naug);
  *sucind = newivector(nind0);
  for (int i=0; i<nind0; i++) {
    (*sucind)[i] = succesind[i];
  }
  for (int i=0; i<(*Naug); i++) {
    (*augy)[i]= newy[i];
    (*augind)[i]= newind[i];
    for (int j=0; j<Nmark; j++) (*augmarker)[j][i]= newmarker[j][i];
  }
  goto cleanup;
bailout:
  Rprintf("INFO: Dataset too large after augmentation\n");
  if (verbose) fatal("Recall procedure with larger value for augmentation parameters or increase the parameter minprob\n");
  retvalue = 0;
cleanup:
  Free(newy);
  delMQMMarkerMatrix(newmarker, Nmark+1); //Free(newmarker);
  Free(newind);
  Free(newprob);
  Free(newprobmax);
  Free(imarker);
  return retvalue;
}