Exemplo n.º 1
0
/* ML estimation of parameters in mixture model via EM; maximum-likelihood
 * estimation of parameters in the mixture model via the EM algorithm, using
 * multilocus information, but assuming known recombination frequencies
*/
double QTLmixture(MQMMarkerMatrix loci, cvector cofactor, vector r, cvector position,
                  vector y, ivector ind, int Nind, int Naug,
                  int Nloci,
                  double *variance, int em, vector *weight, const bool useREML,const bool fitQTL,const bool dominance, MQMCrossType crosstype, int verbose) {
                  
  //debug_trace("QTLmixture called Nloci=%d Nind=%d Naug=%d, REML=%d em=%d fit=%d domi=%d cross=%c\n",Nloci,Nind,Naug,useREML,em,fitQTL,dominance,crosstype);
  //for (int i=0; i<Nloci; i++){ debug_trace("loci %d : recombfreq=%f\n",i,r[i]); }
  int iem= 0, i, j;
  bool warnZeroDist=false;
  bool biasadj=false;
  double oldlogL=-10000, delta=1.0, calc_i, Pscale=1.75;

  vector indweight  = newvector(Nind);
  int newNaug       = ((!fitQTL) ? Naug : 3*Naug);
  vector Fy         = newvector(newNaug);
  double logP       = Nloci*log(Pscale);                          // only for computational accuracy
  bool varknown     = (((*variance)==-1.0) ? false : true );
  vector Ploci      = newvector(newNaug);
  #ifndef STANDALONE
    R_CheckUserInterrupt(); /* check for ^C */
    R_FlushConsole();
  #endif
  if (!useREML) {
    varknown=false;
    biasadj=false;
  }
  for (i=0; i<newNaug; i++) { Ploci[i]= 1.0; }

  if (!fitQTL) {
    for (j=0; j<Nloci; j++) {
      for (i=0; i<Naug; i++)
        Ploci[i]*= Pscale;
      if ((position[j]==MLEFT)||(position[j]==MUNLINKED)) {
        for (i=0; i<Naug; i++) {
          calc_i = start_prob(crosstype, loci[j][i]);   // calc_i= prob(loci, r, i, j, MH, crosstype, 0, 1);
          Ploci[i]*= calc_i;
          //Als Ploci > 0 en calc_i > 0 then we want to assert Ploci[] != 0
        }
      }
      if ((position[j]==MLEFT)||(position[j]==MMIDDLE)) {
        for (i=0; i<Naug; i++) {
          calc_i =left_prob(r[j],loci[j][i],loci[j+1][i],crosstype); //calc_i = prob(loci, r, i, j, loci[j+1][i], crosstype, 0);
          if(calc_i == 0.0){calc_i=1.0;warnZeroDist=true;}
          Ploci[i]*= calc_i;
        }
      }
    }
  } else {
    for (j=0; j<Nloci; j++) {
      for (i=0; i<Naug; i++) {
        Ploci[i]*= Pscale;           // only for computational accuracy; see use of logP
        Ploci[i+Naug]*= Pscale;      // only for computational accuracy; see use of logP
        Ploci[i+2*Naug]*= Pscale;    // only for computational accuracy; see use of logP
      }
      if ((position[j]==MLEFT)||(position[j]==MUNLINKED)) {
        if (cofactor[j]<=MCOF){
          for (i=0; i<Naug; i++) {
            calc_i = start_prob(crosstype, loci[j][i]);  // calc_i= prob(loci, r, i, j, MH, crosstype, 0, 1);
            Ploci[i] *= calc_i;
            Ploci[i+Naug] *= calc_i;
            Ploci[i+2*Naug] *= calc_i;
          }
        }else{
          for (i=0; i<Naug; i++) {
            Ploci[i]*= start_prob(crosstype, MAA);          //startvalue for MAA for new chromosome
            Ploci[i+Naug]*= start_prob(crosstype, MH);      //startvalue for MH for new chromosome
            Ploci[i+2*Naug] *= start_prob(crosstype, MBB);  //startvalue for MBB for new chromosome
          }
        }
      }
      if ((position[j]==MLEFT)||(position[j]==MMIDDLE)) {
        if ((cofactor[j]<=MCOF)&&(cofactor[j+1]<=MCOF))
          for (i=0; i<Naug; i++) {
            calc_i =left_prob(r[j],loci[j][i],loci[j+1][i],crosstype);  //calc_i = prob(loci, r, i, j, loci[j+1][i], crosstype, 0);
            if(calc_i == 0.0){calc_i=1.0;warnZeroDist=true;}
            Ploci[i]*= calc_i;
            Ploci[i+Naug]*= calc_i;
            Ploci[i+2*Naug]*= calc_i;
          }
        else if (cofactor[j]<=MCOF) // locus j+1 == QTL
          for (i=0; i<Naug; i++) { // QTL==MAA || MH || MBB means: What is the prob of finding an MAA at J=1    
            calc_i =left_prob(r[j],loci[j][i],MAA,crosstype);     //calc_i = prob(loci, r, i, j, MAA, crosstype, 0);
            Ploci[i]*= calc_i;      
            calc_i = left_prob(r[j],loci[j][i],MH,crosstype);     //calc_i = prob(loci, r, i, j, MH, crosstype, 0);
            Ploci[i+Naug]*= calc_i;
            calc_i = left_prob(r[j],loci[j][i],MBB,crosstype);    //calc_i = prob(loci, r, i, j, MBB, crosstype, 0);
            Ploci[i+2*Naug]*= calc_i;
          }
        else // locus j == QTL
          for (i=0; i<Naug; i++) { // QTL==MQTL
            calc_i = left_prob(r[j],MAA,loci[j+1][i],crosstype);  //calc_i = prob(loci, r, i, j+1, MAA, crosstype, -1);
            Ploci[i]*= calc_i;
            calc_i = left_prob(r[j],MH,loci[j+1][i],crosstype);   //calc_i = prob(loci, r, i, j+1, MH, crosstype, -1);
            Ploci[i+Naug]*= calc_i;
            calc_i = left_prob(r[j],MBB,loci[j+1][i],crosstype);  //calc_i = prob(loci, r, i, j+1, MBB, crosstype, -1);
            Ploci[i+2*Naug]*= calc_i;
          }
      }
    }
  }
  if(warnZeroDist)info("!!! 0.0 from Prob !!! Markers at same Cm but different genotype !!!"); 
//	Rprintf("INFO: Done fitting QTL's\n");
  if ((*weight)[0]== -1.0) {
    for (i=0; i<Nind; i++) indweight[i]= 0.0;
    if (!fitQTL) {
      for (i=0; i<Naug; i++) indweight[ind[i]]+=Ploci[i];
      for (i=0; i<Naug; i++) (*weight)[i]= Ploci[i]/indweight[ind[i]];
    } else {
      for (i=0; i<Naug; i++) indweight[ind[i]]+=Ploci[i]+Ploci[i+Naug]+Ploci[i+2*Naug];
      for (i=0; i<Naug; i++) {
        (*weight)[i]       = Ploci[i]/indweight[ind[i]];
        (*weight)[i+Naug]  = Ploci[i+Naug]/indweight[ind[i]];
        (*weight)[i+2*Naug]= Ploci[i+2*Naug]/indweight[ind[i]];
      }
    }
  }
  debug_trace("Weights done\n");
  debug_trace("Individual->trait,indweight weight Ploci\n");
  //for (int j=0; j<Nind; j++){
  //  debug_trace("%d->%f,%f %f %f\n", j, y[j],indweight[i], (*weight)[j], Ploci[j]);
  //}
  double logL = 0;
  vector indL = newvector(Nind);
  while ((iem<em)&&(delta>1.0e-5)) {
    iem+=1;
    if (!varknown) *variance=-1.0;
    logL = regression(Nind, Nloci, cofactor, loci, y, weight, ind, Naug, variance, Fy, biasadj, fitQTL, dominance);
    logL = 0.0;
    for (i=0; i<Nind; i++) indL[i]= 0.0;
    if (!fitQTL) // no QTL fitted
      for (i=0; i<Naug; i++) {
        (*weight)[i]= Ploci[i]*Fy[i];
        indL[ind[i]]= indL[ind[i]] + (*weight)[i];
      }
    else // QTL moved along the chromosomes
      for (i=0; i<Naug; i++) {
        (*weight)[i]= Ploci[i]*Fy[i];
        (*weight)[i+Naug]  = Ploci[i+Naug]*  Fy[i+Naug];
        (*weight)[i+2*Naug]= Ploci[i+2*Naug]*Fy[i+2*Naug];
        indL[ind[i]]+=(*weight)[i]+(*weight)[i+Naug]+(*weight)[i+2*Naug];
      }
    for (i=0; i<Nind; i++) logL+=log(indL[i])-logP;
    for (i=0; i<Nind; i++) indweight[i]= 0.0;
    if (!fitQTL) {
      for (i=0; i<Naug; i++) indweight[ind[i]]+=(*weight)[i];
      for (i=0; i<Naug; i++) (*weight)[i]/=indweight[ind[i]];
    } else {
      for (i=0; i<Naug; i++)
        indweight[ind[i]]+=(*weight)[i]+(*weight)[i+Naug]+(*weight)[i+2*Naug];
      for (i=0; i<Naug; i++) {
        (*weight)[i]       /=indweight[ind[i]];
        (*weight)[i+Naug]  /=indweight[ind[i]];
        (*weight)[i+2*Naug]/=indweight[ind[i]];
      }
    }
    delta= fabs(logL-oldlogL);
    oldlogL= logL;
  }
  
  if ((useREML)&&(!varknown)) { // Bias adjustment after finished ML estimation via EM
    *variance=-1.0;
    biasadj=true;
    logL = regression(Nind, Nloci, cofactor, loci, y, weight, ind, Naug, variance, Fy, biasadj, fitQTL, dominance);
    logL = 0.0;
    for (int _i=0; _i<Nind; _i++) indL[_i]= 0.0;
    if (!fitQTL)
      for (i=0; i<Naug; i++) {
        (*weight)[i]= Ploci[i]*Fy[i];
        indL[ind[i]]+=(*weight)[i];
      }
    else
      for (i=0; i<Naug; i++) {
        (*weight)[i]= Ploci[i]*Fy[i];
        (*weight)[i+Naug]= Ploci[i+Naug]*Fy[i+Naug];
        (*weight)[i+2*Naug]= Ploci[i+2*Naug]*Fy[i+2*Naug];
        indL[ind[i]]+=(*weight)[i];
        indL[ind[i]]+=(*weight)[i+Naug];
        indL[ind[i]]+=(*weight)[i+2*Naug];
      }
    for (i=0; i<Nind; i++) logL+=log(indL[i])-logP;
    for (i=0; i<Nind; i++) indweight[i]= 0.0;
    if (!fitQTL) {
      for (i=0; i<Naug; i++) indweight[ind[i]]+=(*weight)[i];
      for (i=0; i<Naug; i++) (*weight)[i]/=indweight[ind[i]];
    } else {
      for (i=0; i<Naug; i++) {
        indweight[ind[i]]+=(*weight)[i];
        indweight[ind[i]]+=(*weight)[i+Naug];
        indweight[ind[i]]+=(*weight)[i+2*Naug];
      }
      for (i=0; i<Naug; i++) {
        (*weight)[i]       /=indweight[ind[i]];
        (*weight)[i+Naug]  /=indweight[ind[i]];
        (*weight)[i+2*Naug]/=indweight[ind[i]];
      }
    }
  }
  //for (i=0; i<Nind; i++){ debug_trace("IND %d Ploci: %f Fy: %f UNLOG:%f LogL:%f LogL-LogP: %f\n", i, Ploci[i], Fy[i], indL[i], log(indL[i]), log(indL[i])-logP); }
  Free(Fy);
  Free(Ploci);
  Free(indweight);
  Free(indL);
  return logL;
}
Exemplo n.º 2
0
/*
 * ML estimation of recombination frequencies via EM; calculation of multilocus
 * genotype probabilities; ignorance of unlikely genotypes. Called by the
 * mqmscan.  maximum-likelihood estimation of recombination frequencies via the
 * EM algorithm, using multilocus information (default: the recombination
 * frequencies are not estimated but taken from mqm.in)
 *
 * When reestimate is 'n' the method is skipped
 */
double rmixture(MQMMarkerMatrix marker, vector weight, vector r,
                cvector position, ivector ind, int Nind, int Naug, int Nmark,
                vector *mapdistance, char reestimate, MQMCrossType crosstype,
                int verbose) {
  int i, j;
  int iem= 0;
  double Nrecom, oldr=0.0, newr, rdelta=1.0;
  double maximum = 0.0;
  float last_step = 0.0;
  vector indweight;
  indweight = newvector(Nind);
  vector distance;
  distance = newvector(Nmark+1);

  if (reestimate=='n') {
    if (verbose==1) {
      Rprintf("INFO: recombination parameters are not re-estimated\n");
    }
    for (j=0; j<Nmark; j++) {
      if (maximum < (*mapdistance)[j]) {
        maximum = (*mapdistance)[j];
      }
    }
  } else {
    if (verbose==1) {
      Rprintf("INFO: recombination parameters are re-estimated\n");
    }
    //Reestimation of map now works
    while ((iem<1000)&&(rdelta>0.0001)) {
      iem+=1;
      rdelta= 0.0;
      /* calculate weights = conditional genotype probabilities */
      for (i=0; i<Naug; i++) weight[i]=1.0;
      for (j=0; j<Nmark; j++) {
        if ((position[j]==MLEFT)||(position[j]==MUNLINKED))
          for (i=0; i<Naug; i++)
            if (marker[j][i]==MH) weight[i]*= 0.5;
            else weight[i]*= 0.25;
        if ((position[j]==MLEFT)||(position[j]==MMIDDLE))
          for (i=0; i<Naug; i++) {
            double calc_i = left_prob(r[j],marker[j][i],marker[j+1][i],crosstype);            //double calc_i = prob(marker, r, i, j, marker[j+1][i], crosstype, 0);
            weight[i]*=calc_i;
          }
      }
      for (i=0; i<Nind; i++) {
        indweight[i]= 0.0;
      }
      for (i=0; i<Naug; i++) {
        indweight[ind[i]]+=weight[i];
      }
      for (i=0; i<Naug; i++) {
        weight[i]/=indweight[ind[i]];
      }
      for (j=0; j<Nmark; j++) {
        if ((position[j]==MLEFT)||(position[j]==MMIDDLE)) {
          newr= 0.0;
          for (i=0; i<Naug; i++) {
            Nrecom= fabs((double)(marker[j][i]-marker[j+1][i]));
            if ((marker[j][i]==MH)&&(marker[j+1][i]==MH))
              Nrecom= 2.0*r[j]*r[j]/(r[j]*r[j]+(1-r[j])*(1-r[j]));
            newr+= Nrecom*weight[i];
          }
          if (reestimate=='y' && position[j]!=MRIGHT) { //only update if it isn't the last marker of a chromosome ;)
            oldr=r[j];
            r[j]= newr/(2.0*Nind);
            rdelta+=pow(r[j]-oldr, 2.0);
          } else rdelta+=0.0;
        }
      }
    }
    /*   print new estimates of recombination frequencies */
    //Rprintf("INFO: Reestimate? %c\n", reestimate);
    //Rprintf("INFO: looping over all markers %d\n", Nmark);
    for (j=0; j<Nmark; j++) {
      if (position[j+1]==MRIGHT) {
        last_step = (*mapdistance)[j+1]-(*mapdistance)[j];
      }
      if (position[j]!=MLEFT) {
        if (position[j]!=MRIGHT) {
          (*mapdistance)[j]= -50*log(1-2.0*r[j])+(*mapdistance)[j-1];
        } else {
          (*mapdistance)[j]= (*mapdistance)[j-1]+last_step;
        }
      } else {
        (*mapdistance)[j]= -50*log(1-2.0*r[j]);
      }
      if (maximum < (*mapdistance)[j]) {
        maximum = (*mapdistance)[j];
      }
      //Rprintf("r(%d)= %f -> %f\n", j, r[j], (*mapdistance)[j]);
    }
  }
  if (verbose==1) {
    Rprintf("INFO: Re-estimation of the genetic map took %d iterations, to reach a rdelta of %f\n", iem, rdelta);
  }
  Free(indweight);
  freevector(distance);
  return maximum;
}
Exemplo n.º 3
0
int mqmaugment(const MQMMarkerMatrix marker, const vector y, 
               MQMMarkerMatrix* augmarker, vector *augy, 
               ivector* augind, ivector* sucind, int *Nind, int *Naug, const int Nmark, 
               const cvector position, vector r, const int maxNaug, 
               const int imaxNaug, const double minprob, 
               const MQMCrossType crosstype, const int verbose) 
{
  int retvalue = 1;     //[Danny] Assume everything will go right, (it never returned a 1 OK, initialization to 0 and return
  int jj;
  const int nind0 = *Nind;              //Original number of individuals
  (*Naug) = maxNaug;     // sets and returns the maximum size of augmented dataset
  // new variables sized to maxNaug:
  MQMMarkerMatrix newmarker;
  vector newy;
  MQMMarkerVector imarker;
  ivector newind;
  ivector succesind;
  
  double minprobratio = (1.0f/minprob);
  if(minprob!=1){
    minprobratio += 0.00001;
  }
  newmarker = newMQMMarkerMatrix(Nmark+1, maxNaug);  // augmented marker matrix
  newy      = newvector(maxNaug);            // phenotypes
  newind    = newivector(maxNaug);           // individuals index
  succesind = newivector(nind0);              // Tracks if the augmentation is a succes
  imarker   = newMQMMarkerVector(Nmark);             

  int iaug     = 0;     // iaug keeps track of current augmented individual
  double prob0, prob1, prob2, sumprob,
  prob0left, prob1left, prob2left,
  prob0right=0.0, prob1right=0.0, prob2right = 0.0f;
  vector newprob = newvector(maxNaug);
  vector newprobmax = newvector(maxNaug);
  if (verbose) Rprintf("INFO: Crosstype determined by the algorithm: %c\n", crosstype);
  if (verbose) Rprintf("INFO: Augmentation parameters: Maximum augmentation=%d, Maximum augmentation per individual=%d, Minprob=%f\n", maxNaug, imaxNaug, minprob);
  // ---- foreach individual create one in the newmarker matrix
 
  int newNind = nind0;                  //Number of unique individuals
  int previaug = 0;                     // previous index in newmarkers
  for (int i=0; i<nind0; i++) {
    //Loop through individuals
    succesind[i] = 1;                   //Assume we succeed in augmentation
    #ifndef STANDALONE
      //R_ProcessEvents(); /*  Try not to crash windows */
      R_FlushConsole();
    #endif
    const int dropped = nind0-newNind;  //How many are dropped
    const int iidx = i - dropped;       //Individuals I's new individual number based on dropped individuals
    newind[iaug]   = iidx;              // iidx corrects for dropped individuals
    newy[iaug]     = y[i];              // cvariance (phenotype)
    newprob[iaug]  = 1.0;               //prop
    double probmax = 1.0;               //current maximum probability

    for (int j=0; j<Nmark; j++){ 
      newmarker[j][iaug]=marker[j][i];    // copy markers into newmarkers for the new indidivudal under investigation
    }
    for (int j=0; j<Nmark; j++) {
      //Loop through markers:
      const int maxiaug = iaug;          // fixate maxiaug
      if ((maxiaug-previaug)<=imaxNaug)  // within bounds for individual?
        for (int ii=previaug; ii<=maxiaug; ii++) {
          #ifndef STANDALONE
            R_CheckUserInterrupt(); /* check for ^C */
          #endif
          debug_trace("i=%d ii=%d iidx=%d maxiaug=%d previaug=%d,imaxNaug=%d\n",i,ii,iidx,maxiaug,previaug,imaxNaug);
          // ---- walk from previous augmented to current augmented genotype
          //WE HAVE 3 SPECIAL CASES: (1) NOTAA, (2) NOTBB and (3)UNKNOWN, and the std case of a next known marker
          if (newmarker[j][ii]==MNOTAA) {
            //NOTAA augment data to contain AB and BB
            for (jj=0; jj<Nmark; jj++) imarker[jj] = newmarker[jj][ii];

            if ((position[j]==MLEFT||position[j]==MUNLINKED)) {
              prob1left= start_prob(crosstype, MH);
              prob2left= start_prob(crosstype, MBB);
            } else {
              prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype);      //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0);
              prob2left= left_prob(r[j-1],newmarker[j-1][ii],MBB,crosstype);     //prob2left= prob(newmarker, r, ii, j-1, MBB, crosstype, 0);
            }
            switch (crosstype) {
              case CF2:
                prob1right= right_prob_F2(MH, j, imarker, r, position);          //prob1right= probright(MH, j, imarker, r, position, crosstype);
                prob2right= right_prob_F2(MBB, j, imarker, r, position);         //prob2right= probright(MBB, j, imarker, r, position, crosstype);
              break;
              case CBC:
                prob1right= right_prob_BC(MH, j, imarker, r, position);
                prob2right= right_prob_BC(MBB, j, imarker, r, position);                
              break;
              case CRIL:
                prob1right= right_prob_RIL(MH, j, imarker, r, position);
                prob2right= right_prob_RIL(MBB, j, imarker, r, position);                
              break;
              case CUNKNOWN:
                fatal("Strange: unknown crosstype in mqm augment()", "");
              break;
            }
            prob1= prob1left*prob1right;
            prob2= prob2left*prob2right;

            if (ii==previaug) probmax = (prob2>prob1 ? newprob[ii]*prob2 : newprob[ii]*prob1);
            if (prob1>prob2) {
              if (probmax/(newprob[ii]*prob2)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MBB;
                newprob[iaug]= newprob[ii]*prob2left;
                newprobmax[iaug]= newprob[iaug]*prob2right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MH;
              newprobmax[ii]= newprob[ii]*prob1;
              newprob[ii]= newprob[ii]*prob1left;
            } else {
              if (probmax/(newprob[ii]*prob1)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MH;
                newprob[iaug]= newprob[ii]*prob1left;
                newprobmax[iaug]= newprob[iaug]*prob1right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MBB;
              newprobmax[ii]= newprob[ii]*prob2;
              newprob[ii]*= prob2left;
            }
            probmax = (probmax>newprobmax[ii] ? probmax : newprobmax[ii]);
          } else if (newmarker[j][ii]==MNOTBB) {
            //NOTBB: augment data can contain MH and MAA 
            for (jj=0; jj<Nmark; jj++) imarker[jj]= newmarker[jj][ii];

            if ((position[j]==MLEFT||position[j]==MUNLINKED)) {
              prob0left= start_prob(crosstype, MAA);
              prob1left= start_prob(crosstype, MH);
            } else {
              prob0left= left_prob(r[j-1],newmarker[j-1][ii],MAA,crosstype);  //prob0left= prob(newmarker, r, ii, j-1, MAA, crosstype, 0);
              prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype);   //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0);
            }
            switch (crosstype) {
              case CF2:
                prob0right= right_prob_F2(MAA, j, imarker, r, position);      //prob0right= probright(MAA, j, imarker, r, position, crosstype);
                prob1right= right_prob_F2(MH, j, imarker, r, position);       //prob1right= probright(MH, j, imarker, r, position, crosstype);
              break;
              case CBC:
                prob0right= right_prob_BC(MAA, j, imarker, r, position);
                prob1right= right_prob_BC(MH, j, imarker, r, position);               
              break;
              case CRIL:
                prob0right= right_prob_RIL(MAA, j, imarker, r, position);
                prob1right= right_prob_RIL(MH, j, imarker, r, position);              
              break;
              case CUNKNOWN:
                fatal("Strange: unknown crosstype in mqm augment()", "");
              break;
            }
            prob0= prob0left*prob0right;
            prob1= prob1left*prob1right;

            if (ii==previaug) probmax= (prob0>prob1 ? newprob[ii]*prob0 : newprob[ii]*prob1);
            if (prob1>prob0) {
              if (probmax/(newprob[ii]*prob0)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MAA;
                newprob[iaug]= newprob[ii]*prob0left;
                newprobmax[iaug]= newprob[iaug]*prob0right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MH;
              newprobmax[ii]= newprob[ii]*prob1;
              newprob[ii]*= prob1left;
            } else {
              if (probmax/(newprob[ii]*prob1)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MH;
                newprob[iaug]= newprob[ii]*prob1left;
                newprobmax[iaug]= newprob[iaug]*prob1right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MAA;
              newprobmax[ii]= newprob[ii]*prob0;
              newprob[ii]*= prob0left;
            }
            probmax= (probmax>newprobmax[ii] ? probmax : newprobmax[ii]);
          } else if (newmarker[j][ii]==MMISSING) {
            //UNKNOWN: augment data to contain AB, AA and BB
            for (jj=0; jj<Nmark; jj++) imarker[jj]= newmarker[jj][ii];

            if ((position[j]==MLEFT||position[j]==MUNLINKED)) {
              prob0left= start_prob(crosstype, MAA);
              prob1left= start_prob(crosstype, MH);
              prob2left= start_prob(crosstype, MBB);
            } else {
              prob0left= left_prob(r[j-1],newmarker[j-1][ii],MAA,crosstype);  //prob0left= prob(newmarker, r, ii, j-1, MAA, crosstype, 0);
              prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype);   //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0);
              prob2left= left_prob(r[j-1],newmarker[j-1][ii],MBB,crosstype);  //prob2left= prob(newmarker, r, ii, j-1, MBB, crosstype, 0);
            }
            switch (crosstype) {
              case CF2:
                prob0right= right_prob_F2(MAA, j, imarker, r, position); //prob0right= probright(MAA, j, imarker, r, position, crosstype);
                prob1right= right_prob_F2(MH, j, imarker, r, position);  //prob1right= probright(MH, j, imarker, r, position, crosstype);
                prob2right= right_prob_F2(MBB, j, imarker, r, position); //prob2right= probright(MBB, j, imarker, r, position, crosstype);
              break;
              case CBC:
                prob0right= right_prob_BC(MAA, j, imarker, r, position);
                prob1right= right_prob_BC(MH, j, imarker, r, position);
                prob2right= 0.0;              
              break;
              case CRIL:
                prob0right= right_prob_RIL(MAA, j, imarker, r, position);
                prob1right= 0.0;
                prob2right= right_prob_RIL(MBB, j, imarker, r, position);              
              break;
              case CUNKNOWN:
                fatal("Strange: unknown crosstype in mqm augment()", "");
              break;
            }            
            prob0= prob0left*prob0right;
            prob1= prob1left*prob1right;
            prob2= prob2left*prob2right;
            if (ii==previaug) {
              if ((prob2>prob1)&&(prob2>prob0)) probmax= newprob[ii]*prob2;
              else if ((prob1>prob0)&&(prob1>prob2)) probmax= newprob[ii]*prob1;
              else probmax= newprob[ii]*prob0;
            }
            if ((prob2>prob1)&&(prob2>prob0)) {
              if (probmax/(newprob[ii]*prob1)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MH;
                newprob[iaug]= newprob[ii]*prob1left;
                newprobmax[iaug]= newprob[iaug]*prob1right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              if (probmax/(newprob[ii]*prob0)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MAA;
                newprob[iaug]= newprob[ii]*prob0left;
                newprobmax[iaug]= newprob[iaug]*prob0right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MBB;
              newprobmax[ii]= newprob[ii]*prob2;
              newprob[ii]*= prob2left;

            } else if ((prob1>prob2)&&(prob1>prob0)) {
              if (probmax/(newprob[ii]*prob2)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MBB;
                newprob[iaug]= newprob[ii]*prob2left;
                newprobmax[iaug]= newprob[iaug]*prob2right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              if (probmax/(newprob[ii]*prob0)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MAA;
                newprob[iaug]= newprob[ii]*prob0left;
                newprobmax[iaug]= newprob[iaug]*prob0right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MH;
              newprobmax[ii]= newprob[ii]*prob1;
              newprob[ii]*= prob1left;
            } else {
              if (probmax/(newprob[ii]*prob1)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MH;
                newprob[iaug]= newprob[ii]*prob1left;
                newprobmax[iaug]= newprob[iaug]*prob1right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              if (probmax/(newprob[ii]*prob2)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MBB;
                newprob[iaug]= newprob[ii]*prob2left;
                newprobmax[iaug]= newprob[iaug]*prob2right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MAA;
              newprobmax[ii]= newprob[ii]*prob0;
              newprob[ii]*= prob0left;
            }
            probmax= (probmax>newprobmax[ii] ? probmax : newprobmax[ii]);
          } else {
            //STD case we know what the next marker is nou use probleft to estimate the likelihood of the current location
            if ((position[j]==MLEFT||position[j]==MUNLINKED)) {
              prob0left= start_prob(crosstype, newmarker[j][ii]);
            } else {
              prob0left= left_prob(r[j-1],newmarker[j-1][ii],newmarker[j][ii],crosstype); //prob0left= prob(newmarker, r, ii, j-1, newmarker[j][ii], crosstype, 0);
            }
            newprob[ii]*= prob0left;
          }

          if (iaug+3>maxNaug) {
            Rprintf("ERROR: augmentation (this code should not be reached)\n");  
            goto bailout;
          }
        }
      if ((iaug-previaug+1)>imaxNaug) {
        newNind-= 1;
        iaug= previaug-1;
        succesind[i]=0;
        //for(int x=previaug;x<previaug+imaxNaug;x++){
        //  Rprintf("INFO: Individual: %d, variant: %d, prob: %f",i,x,newprob[x]);
        //}
        if (verbose) Rprintf("INFO: Individual %d moved to second augmentation round\n", i);
      }
      sumprob= 0.0;
      for (int ii=previaug; ii<=iaug; ii++) sumprob+= newprob[ii];
      for (int ii=previaug; ii<=iaug; ii++) newprob[ii]/= sumprob;
    }
    if (++iaug >= maxNaug) goto bailout;
    previaug=iaug;
  }
  *Naug = iaug;
  *Nind = newNind;
  *augmarker = newMQMMarkerMatrix(Nmark, *Naug);
  *augy = newvector(*Naug);
  *augind = newivector(*Naug);
  *sucind = newivector(nind0);
  for (int i=0; i<nind0; i++) {
    (*sucind)[i] = succesind[i];
  }
  for (int i=0; i<(*Naug); i++) {
    (*augy)[i]= newy[i];
    (*augind)[i]= newind[i];
    for (int j=0; j<Nmark; j++) (*augmarker)[j][i]= newmarker[j][i];
  }
  goto cleanup;
bailout:
  Rprintf("INFO: Dataset too large after augmentation\n");
  if (verbose) fatal("Recall procedure with larger value for augmentation parameters or increase the parameter minprob\n");
  retvalue = 0;
cleanup:
  Free(newy);
  delMQMMarkerMatrix(newmarker, Nmark+1); //Free(newmarker);
  Free(newind);
  Free(newprob);
  Free(newprobmax);
  Free(imarker);
  return retvalue;
}