Exemplo n.º 1
0
double right_prob_BC(const char markerL, const int j, const MQMMarkerVector imarker, const vector rs, const cvector position){
  R_CheckUserInterrupt(); /* check for ^C */

  if(position[j] == MRIGHT||position[j] == MUNLINKED){
    return 1.0;
  }
  if (markerL == MBB) {
    return 0.0;  //info("Strange: encountered BB genotype in BC");
  }  

  const char markerR = imarker[j+1];                   		//Next marker at the right side
  const double r = rs[j];                              		//Recombination freq beween markerL and markerR
  double prob0 = 0.0;                                  		//Internal variable holding the probability AA if the next rightmarker is (Semi) Unknown
  double prob1 = 0.0;                                  		//Internal variable holding the probability H if the next rightmarker is (Semi) Unknown
  const double rr = 1.0-r;                             		//Breeding Logic (see prob_new)
  const int recombinations = abs(markerL-markerR);    //Number of recombinations between markerL and markerR

  if (is_knownMarker(markerR, CBC)) {
    return ((recombinations==0)? rr : r );
  } else {
    if (markerL==MAA) {
      prob0= rr;
      prob1= r;
    } else {
      prob0= r;
      prob1= rr;
    }
    return prob0*right_prob_BC(MAA, j+1, imarker, rs, position) + prob1*right_prob_BC(MH, j+1, imarker, rs, position);
  }
}
Exemplo n.º 2
0
int mqmaugment(const MQMMarkerMatrix marker, const vector y, 
               MQMMarkerMatrix* augmarker, vector *augy, 
               ivector* augind, ivector* sucind, int *Nind, int *Naug, const int Nmark, 
               const cvector position, vector r, const int maxNaug, 
               const int imaxNaug, const double minprob, 
               const MQMCrossType crosstype, const int verbose) 
{
  int retvalue = 1;     //[Danny] Assume everything will go right, (it never returned a 1 OK, initialization to 0 and return
  int jj;
  const int nind0 = *Nind;              //Original number of individuals
  (*Naug) = maxNaug;     // sets and returns the maximum size of augmented dataset
  // new variables sized to maxNaug:
  MQMMarkerMatrix newmarker;
  vector newy;
  MQMMarkerVector imarker;
  ivector newind;
  ivector succesind;
  
  double minprobratio = (1.0f/minprob);
  if(minprob!=1){
    minprobratio += 0.00001;
  }
  newmarker = newMQMMarkerMatrix(Nmark+1, maxNaug);  // augmented marker matrix
  newy      = newvector(maxNaug);            // phenotypes
  newind    = newivector(maxNaug);           // individuals index
  succesind = newivector(nind0);              // Tracks if the augmentation is a succes
  imarker   = newMQMMarkerVector(Nmark);             

  int iaug     = 0;     // iaug keeps track of current augmented individual
  double prob0, prob1, prob2, sumprob,
  prob0left, prob1left, prob2left,
  prob0right=0.0, prob1right=0.0, prob2right = 0.0f;
  vector newprob = newvector(maxNaug);
  vector newprobmax = newvector(maxNaug);
  if (verbose) Rprintf("INFO: Crosstype determined by the algorithm: %c\n", crosstype);
  if (verbose) Rprintf("INFO: Augmentation parameters: Maximum augmentation=%d, Maximum augmentation per individual=%d, Minprob=%f\n", maxNaug, imaxNaug, minprob);
  // ---- foreach individual create one in the newmarker matrix
 
  int newNind = nind0;                  //Number of unique individuals
  int previaug = 0;                     // previous index in newmarkers
  for (int i=0; i<nind0; i++) {
    //Loop through individuals
    succesind[i] = 1;                   //Assume we succeed in augmentation
    #ifndef STANDALONE
      //R_ProcessEvents(); /*  Try not to crash windows */
      R_FlushConsole();
    #endif
    const int dropped = nind0-newNind;  //How many are dropped
    const int iidx = i - dropped;       //Individuals I's new individual number based on dropped individuals
    newind[iaug]   = iidx;              // iidx corrects for dropped individuals
    newy[iaug]     = y[i];              // cvariance (phenotype)
    newprob[iaug]  = 1.0;               //prop
    double probmax = 1.0;               //current maximum probability

    for (int j=0; j<Nmark; j++){ 
      newmarker[j][iaug]=marker[j][i];    // copy markers into newmarkers for the new indidivudal under investigation
    }
    for (int j=0; j<Nmark; j++) {
      //Loop through markers:
      const int maxiaug = iaug;          // fixate maxiaug
      if ((maxiaug-previaug)<=imaxNaug)  // within bounds for individual?
        for (int ii=previaug; ii<=maxiaug; ii++) {
          #ifndef STANDALONE
            R_CheckUserInterrupt(); /* check for ^C */
          #endif
          debug_trace("i=%d ii=%d iidx=%d maxiaug=%d previaug=%d,imaxNaug=%d\n",i,ii,iidx,maxiaug,previaug,imaxNaug);
          // ---- walk from previous augmented to current augmented genotype
          //WE HAVE 3 SPECIAL CASES: (1) NOTAA, (2) NOTBB and (3)UNKNOWN, and the std case of a next known marker
          if (newmarker[j][ii]==MNOTAA) {
            //NOTAA augment data to contain AB and BB
            for (jj=0; jj<Nmark; jj++) imarker[jj] = newmarker[jj][ii];

            if ((position[j]==MLEFT||position[j]==MUNLINKED)) {
              prob1left= start_prob(crosstype, MH);
              prob2left= start_prob(crosstype, MBB);
            } else {
              prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype);      //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0);
              prob2left= left_prob(r[j-1],newmarker[j-1][ii],MBB,crosstype);     //prob2left= prob(newmarker, r, ii, j-1, MBB, crosstype, 0);
            }
            switch (crosstype) {
              case CF2:
                prob1right= right_prob_F2(MH, j, imarker, r, position);          //prob1right= probright(MH, j, imarker, r, position, crosstype);
                prob2right= right_prob_F2(MBB, j, imarker, r, position);         //prob2right= probright(MBB, j, imarker, r, position, crosstype);
              break;
              case CBC:
                prob1right= right_prob_BC(MH, j, imarker, r, position);
                prob2right= right_prob_BC(MBB, j, imarker, r, position);                
              break;
              case CRIL:
                prob1right= right_prob_RIL(MH, j, imarker, r, position);
                prob2right= right_prob_RIL(MBB, j, imarker, r, position);                
              break;
              case CUNKNOWN:
                fatal("Strange: unknown crosstype in mqm augment()", "");
              break;
            }
            prob1= prob1left*prob1right;
            prob2= prob2left*prob2right;

            if (ii==previaug) probmax = (prob2>prob1 ? newprob[ii]*prob2 : newprob[ii]*prob1);
            if (prob1>prob2) {
              if (probmax/(newprob[ii]*prob2)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MBB;
                newprob[iaug]= newprob[ii]*prob2left;
                newprobmax[iaug]= newprob[iaug]*prob2right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MH;
              newprobmax[ii]= newprob[ii]*prob1;
              newprob[ii]= newprob[ii]*prob1left;
            } else {
              if (probmax/(newprob[ii]*prob1)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MH;
                newprob[iaug]= newprob[ii]*prob1left;
                newprobmax[iaug]= newprob[iaug]*prob1right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MBB;
              newprobmax[ii]= newprob[ii]*prob2;
              newprob[ii]*= prob2left;
            }
            probmax = (probmax>newprobmax[ii] ? probmax : newprobmax[ii]);
          } else if (newmarker[j][ii]==MNOTBB) {
            //NOTBB: augment data can contain MH and MAA 
            for (jj=0; jj<Nmark; jj++) imarker[jj]= newmarker[jj][ii];

            if ((position[j]==MLEFT||position[j]==MUNLINKED)) {
              prob0left= start_prob(crosstype, MAA);
              prob1left= start_prob(crosstype, MH);
            } else {
              prob0left= left_prob(r[j-1],newmarker[j-1][ii],MAA,crosstype);  //prob0left= prob(newmarker, r, ii, j-1, MAA, crosstype, 0);
              prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype);   //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0);
            }
            switch (crosstype) {
              case CF2:
                prob0right= right_prob_F2(MAA, j, imarker, r, position);      //prob0right= probright(MAA, j, imarker, r, position, crosstype);
                prob1right= right_prob_F2(MH, j, imarker, r, position);       //prob1right= probright(MH, j, imarker, r, position, crosstype);
              break;
              case CBC:
                prob0right= right_prob_BC(MAA, j, imarker, r, position);
                prob1right= right_prob_BC(MH, j, imarker, r, position);               
              break;
              case CRIL:
                prob0right= right_prob_RIL(MAA, j, imarker, r, position);
                prob1right= right_prob_RIL(MH, j, imarker, r, position);              
              break;
              case CUNKNOWN:
                fatal("Strange: unknown crosstype in mqm augment()", "");
              break;
            }
            prob0= prob0left*prob0right;
            prob1= prob1left*prob1right;

            if (ii==previaug) probmax= (prob0>prob1 ? newprob[ii]*prob0 : newprob[ii]*prob1);
            if (prob1>prob0) {
              if (probmax/(newprob[ii]*prob0)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MAA;
                newprob[iaug]= newprob[ii]*prob0left;
                newprobmax[iaug]= newprob[iaug]*prob0right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MH;
              newprobmax[ii]= newprob[ii]*prob1;
              newprob[ii]*= prob1left;
            } else {
              if (probmax/(newprob[ii]*prob1)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MH;
                newprob[iaug]= newprob[ii]*prob1left;
                newprobmax[iaug]= newprob[iaug]*prob1right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MAA;
              newprobmax[ii]= newprob[ii]*prob0;
              newprob[ii]*= prob0left;
            }
            probmax= (probmax>newprobmax[ii] ? probmax : newprobmax[ii]);
          } else if (newmarker[j][ii]==MMISSING) {
            //UNKNOWN: augment data to contain AB, AA and BB
            for (jj=0; jj<Nmark; jj++) imarker[jj]= newmarker[jj][ii];

            if ((position[j]==MLEFT||position[j]==MUNLINKED)) {
              prob0left= start_prob(crosstype, MAA);
              prob1left= start_prob(crosstype, MH);
              prob2left= start_prob(crosstype, MBB);
            } else {
              prob0left= left_prob(r[j-1],newmarker[j-1][ii],MAA,crosstype);  //prob0left= prob(newmarker, r, ii, j-1, MAA, crosstype, 0);
              prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype);   //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0);
              prob2left= left_prob(r[j-1],newmarker[j-1][ii],MBB,crosstype);  //prob2left= prob(newmarker, r, ii, j-1, MBB, crosstype, 0);
            }
            switch (crosstype) {
              case CF2:
                prob0right= right_prob_F2(MAA, j, imarker, r, position); //prob0right= probright(MAA, j, imarker, r, position, crosstype);
                prob1right= right_prob_F2(MH, j, imarker, r, position);  //prob1right= probright(MH, j, imarker, r, position, crosstype);
                prob2right= right_prob_F2(MBB, j, imarker, r, position); //prob2right= probright(MBB, j, imarker, r, position, crosstype);
              break;
              case CBC:
                prob0right= right_prob_BC(MAA, j, imarker, r, position);
                prob1right= right_prob_BC(MH, j, imarker, r, position);
                prob2right= 0.0;              
              break;
              case CRIL:
                prob0right= right_prob_RIL(MAA, j, imarker, r, position);
                prob1right= 0.0;
                prob2right= right_prob_RIL(MBB, j, imarker, r, position);              
              break;
              case CUNKNOWN:
                fatal("Strange: unknown crosstype in mqm augment()", "");
              break;
            }            
            prob0= prob0left*prob0right;
            prob1= prob1left*prob1right;
            prob2= prob2left*prob2right;
            if (ii==previaug) {
              if ((prob2>prob1)&&(prob2>prob0)) probmax= newprob[ii]*prob2;
              else if ((prob1>prob0)&&(prob1>prob2)) probmax= newprob[ii]*prob1;
              else probmax= newprob[ii]*prob0;
            }
            if ((prob2>prob1)&&(prob2>prob0)) {
              if (probmax/(newprob[ii]*prob1)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MH;
                newprob[iaug]= newprob[ii]*prob1left;
                newprobmax[iaug]= newprob[iaug]*prob1right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              if (probmax/(newprob[ii]*prob0)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MAA;
                newprob[iaug]= newprob[ii]*prob0left;
                newprobmax[iaug]= newprob[iaug]*prob0right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MBB;
              newprobmax[ii]= newprob[ii]*prob2;
              newprob[ii]*= prob2left;

            } else if ((prob1>prob2)&&(prob1>prob0)) {
              if (probmax/(newprob[ii]*prob2)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MBB;
                newprob[iaug]= newprob[ii]*prob2left;
                newprobmax[iaug]= newprob[iaug]*prob2right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              if (probmax/(newprob[ii]*prob0)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MAA;
                newprob[iaug]= newprob[ii]*prob0left;
                newprobmax[iaug]= newprob[iaug]*prob0right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MH;
              newprobmax[ii]= newprob[ii]*prob1;
              newprob[ii]*= prob1left;
            } else {
              if (probmax/(newprob[ii]*prob1)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MH;
                newprob[iaug]= newprob[ii]*prob1left;
                newprobmax[iaug]= newprob[iaug]*prob1right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              if (probmax/(newprob[ii]*prob2)<minprobratio) {
                if (++iaug >= maxNaug) goto bailout;
                newmarker[j][iaug]= MBB;
                newprob[iaug]= newprob[ii]*prob2left;
                newprobmax[iaug]= newprob[iaug]*prob2right;
                for (jj=0; jj<Nmark; jj++) {
                  if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii];
                }
                newind[iaug]=iidx;
                newy[iaug]=y[i];
              }
              newmarker[j][ii]= MAA;
              newprobmax[ii]= newprob[ii]*prob0;
              newprob[ii]*= prob0left;
            }
            probmax= (probmax>newprobmax[ii] ? probmax : newprobmax[ii]);
          } else {
            //STD case we know what the next marker is nou use probleft to estimate the likelihood of the current location
            if ((position[j]==MLEFT||position[j]==MUNLINKED)) {
              prob0left= start_prob(crosstype, newmarker[j][ii]);
            } else {
              prob0left= left_prob(r[j-1],newmarker[j-1][ii],newmarker[j][ii],crosstype); //prob0left= prob(newmarker, r, ii, j-1, newmarker[j][ii], crosstype, 0);
            }
            newprob[ii]*= prob0left;
          }

          if (iaug+3>maxNaug) {
            Rprintf("ERROR: augmentation (this code should not be reached)\n");  
            goto bailout;
          }
        }
      if ((iaug-previaug+1)>imaxNaug) {
        newNind-= 1;
        iaug= previaug-1;
        succesind[i]=0;
        //for(int x=previaug;x<previaug+imaxNaug;x++){
        //  Rprintf("INFO: Individual: %d, variant: %d, prob: %f",i,x,newprob[x]);
        //}
        if (verbose) Rprintf("INFO: Individual %d moved to second augmentation round\n", i);
      }
      sumprob= 0.0;
      for (int ii=previaug; ii<=iaug; ii++) sumprob+= newprob[ii];
      for (int ii=previaug; ii<=iaug; ii++) newprob[ii]/= sumprob;
    }
    if (++iaug >= maxNaug) goto bailout;
    previaug=iaug;
  }
  *Naug = iaug;
  *Nind = newNind;
  *augmarker = newMQMMarkerMatrix(Nmark, *Naug);
  *augy = newvector(*Naug);
  *augind = newivector(*Naug);
  *sucind = newivector(nind0);
  for (int i=0; i<nind0; i++) {
    (*sucind)[i] = succesind[i];
  }
  for (int i=0; i<(*Naug); i++) {
    (*augy)[i]= newy[i];
    (*augind)[i]= newind[i];
    for (int j=0; j<Nmark; j++) (*augmarker)[j][i]= newmarker[j][i];
  }
  goto cleanup;
bailout:
  Rprintf("INFO: Dataset too large after augmentation\n");
  if (verbose) fatal("Recall procedure with larger value for augmentation parameters or increase the parameter minprob\n");
  retvalue = 0;
cleanup:
  Free(newy);
  delMQMMarkerMatrix(newmarker, Nmark+1); //Free(newmarker);
  Free(newind);
  Free(newprob);
  Free(newprobmax);
  Free(imarker);
  return retvalue;
}