double right_prob_RIL(const char markerL, const int j, const MQMMarkerVector imarker, const vector rs, const cvector position){ R_CheckUserInterrupt(); /* check for ^C */ if(position[j] == MRIGHT||position[j] == MUNLINKED){ return 1.0; //END of chromosome or only 1 marker on a chromosome } if (markerL == MH) { return 0.0; //info("Strange: encountered H genotype in RIL"); } const char markerR = imarker[j+1]; //Next marker at the right side const double r = rs[j]; //Recombination freq beween markerL and markerR double prob0 = 0.0; double prob2 = 0.0; const double rr = 1.0-r; const int recombinations = abs(markerL-markerR); if (is_knownMarker(markerR, CRIL)) { return ((recombinations==0) ? rr : r); } else { //Next marker is semi unknown if (markerL==MAA) { prob0= rr; prob2= r; } else { // MBB prob0= r; prob2= rr; } return prob0*right_prob_RIL(MAA, j+1, imarker, rs, position) + prob2*right_prob_RIL(MBB, j+1, imarker, rs, position); } }
int mqmaugment(const MQMMarkerMatrix marker, const vector y, MQMMarkerMatrix* augmarker, vector *augy, ivector* augind, ivector* sucind, int *Nind, int *Naug, const int Nmark, const cvector position, vector r, const int maxNaug, const int imaxNaug, const double minprob, const MQMCrossType crosstype, const int verbose) { int retvalue = 1; //[Danny] Assume everything will go right, (it never returned a 1 OK, initialization to 0 and return int jj; const int nind0 = *Nind; //Original number of individuals (*Naug) = maxNaug; // sets and returns the maximum size of augmented dataset // new variables sized to maxNaug: MQMMarkerMatrix newmarker; vector newy; MQMMarkerVector imarker; ivector newind; ivector succesind; double minprobratio = (1.0f/minprob); if(minprob!=1){ minprobratio += 0.00001; } newmarker = newMQMMarkerMatrix(Nmark+1, maxNaug); // augmented marker matrix newy = newvector(maxNaug); // phenotypes newind = newivector(maxNaug); // individuals index succesind = newivector(nind0); // Tracks if the augmentation is a succes imarker = newMQMMarkerVector(Nmark); int iaug = 0; // iaug keeps track of current augmented individual double prob0, prob1, prob2, sumprob, prob0left, prob1left, prob2left, prob0right=0.0, prob1right=0.0, prob2right = 0.0f; vector newprob = newvector(maxNaug); vector newprobmax = newvector(maxNaug); if (verbose) Rprintf("INFO: Crosstype determined by the algorithm: %c\n", crosstype); if (verbose) Rprintf("INFO: Augmentation parameters: Maximum augmentation=%d, Maximum augmentation per individual=%d, Minprob=%f\n", maxNaug, imaxNaug, minprob); // ---- foreach individual create one in the newmarker matrix int newNind = nind0; //Number of unique individuals int previaug = 0; // previous index in newmarkers for (int i=0; i<nind0; i++) { //Loop through individuals succesind[i] = 1; //Assume we succeed in augmentation #ifndef STANDALONE //R_ProcessEvents(); /* Try not to crash windows */ R_FlushConsole(); #endif const int dropped = nind0-newNind; //How many are dropped const int iidx = i - dropped; //Individuals I's new individual number based on dropped individuals newind[iaug] = iidx; // iidx corrects for dropped individuals newy[iaug] = y[i]; // cvariance (phenotype) newprob[iaug] = 1.0; //prop double probmax = 1.0; //current maximum probability for (int j=0; j<Nmark; j++){ newmarker[j][iaug]=marker[j][i]; // copy markers into newmarkers for the new indidivudal under investigation } for (int j=0; j<Nmark; j++) { //Loop through markers: const int maxiaug = iaug; // fixate maxiaug if ((maxiaug-previaug)<=imaxNaug) // within bounds for individual? for (int ii=previaug; ii<=maxiaug; ii++) { #ifndef STANDALONE R_CheckUserInterrupt(); /* check for ^C */ #endif debug_trace("i=%d ii=%d iidx=%d maxiaug=%d previaug=%d,imaxNaug=%d\n",i,ii,iidx,maxiaug,previaug,imaxNaug); // ---- walk from previous augmented to current augmented genotype //WE HAVE 3 SPECIAL CASES: (1) NOTAA, (2) NOTBB and (3)UNKNOWN, and the std case of a next known marker if (newmarker[j][ii]==MNOTAA) { //NOTAA augment data to contain AB and BB for (jj=0; jj<Nmark; jj++) imarker[jj] = newmarker[jj][ii]; if ((position[j]==MLEFT||position[j]==MUNLINKED)) { prob1left= start_prob(crosstype, MH); prob2left= start_prob(crosstype, MBB); } else { prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype); //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0); prob2left= left_prob(r[j-1],newmarker[j-1][ii],MBB,crosstype); //prob2left= prob(newmarker, r, ii, j-1, MBB, crosstype, 0); } switch (crosstype) { case CF2: prob1right= right_prob_F2(MH, j, imarker, r, position); //prob1right= probright(MH, j, imarker, r, position, crosstype); prob2right= right_prob_F2(MBB, j, imarker, r, position); //prob2right= probright(MBB, j, imarker, r, position, crosstype); break; case CBC: prob1right= right_prob_BC(MH, j, imarker, r, position); prob2right= right_prob_BC(MBB, j, imarker, r, position); break; case CRIL: prob1right= right_prob_RIL(MH, j, imarker, r, position); prob2right= right_prob_RIL(MBB, j, imarker, r, position); break; case CUNKNOWN: fatal("Strange: unknown crosstype in mqm augment()", ""); break; } prob1= prob1left*prob1right; prob2= prob2left*prob2right; if (ii==previaug) probmax = (prob2>prob1 ? newprob[ii]*prob2 : newprob[ii]*prob1); if (prob1>prob2) { if (probmax/(newprob[ii]*prob2)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MBB; newprob[iaug]= newprob[ii]*prob2left; newprobmax[iaug]= newprob[iaug]*prob2right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MH; newprobmax[ii]= newprob[ii]*prob1; newprob[ii]= newprob[ii]*prob1left; } else { if (probmax/(newprob[ii]*prob1)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MH; newprob[iaug]= newprob[ii]*prob1left; newprobmax[iaug]= newprob[iaug]*prob1right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MBB; newprobmax[ii]= newprob[ii]*prob2; newprob[ii]*= prob2left; } probmax = (probmax>newprobmax[ii] ? probmax : newprobmax[ii]); } else if (newmarker[j][ii]==MNOTBB) { //NOTBB: augment data can contain MH and MAA for (jj=0; jj<Nmark; jj++) imarker[jj]= newmarker[jj][ii]; if ((position[j]==MLEFT||position[j]==MUNLINKED)) { prob0left= start_prob(crosstype, MAA); prob1left= start_prob(crosstype, MH); } else { prob0left= left_prob(r[j-1],newmarker[j-1][ii],MAA,crosstype); //prob0left= prob(newmarker, r, ii, j-1, MAA, crosstype, 0); prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype); //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0); } switch (crosstype) { case CF2: prob0right= right_prob_F2(MAA, j, imarker, r, position); //prob0right= probright(MAA, j, imarker, r, position, crosstype); prob1right= right_prob_F2(MH, j, imarker, r, position); //prob1right= probright(MH, j, imarker, r, position, crosstype); break; case CBC: prob0right= right_prob_BC(MAA, j, imarker, r, position); prob1right= right_prob_BC(MH, j, imarker, r, position); break; case CRIL: prob0right= right_prob_RIL(MAA, j, imarker, r, position); prob1right= right_prob_RIL(MH, j, imarker, r, position); break; case CUNKNOWN: fatal("Strange: unknown crosstype in mqm augment()", ""); break; } prob0= prob0left*prob0right; prob1= prob1left*prob1right; if (ii==previaug) probmax= (prob0>prob1 ? newprob[ii]*prob0 : newprob[ii]*prob1); if (prob1>prob0) { if (probmax/(newprob[ii]*prob0)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MAA; newprob[iaug]= newprob[ii]*prob0left; newprobmax[iaug]= newprob[iaug]*prob0right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MH; newprobmax[ii]= newprob[ii]*prob1; newprob[ii]*= prob1left; } else { if (probmax/(newprob[ii]*prob1)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MH; newprob[iaug]= newprob[ii]*prob1left; newprobmax[iaug]= newprob[iaug]*prob1right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MAA; newprobmax[ii]= newprob[ii]*prob0; newprob[ii]*= prob0left; } probmax= (probmax>newprobmax[ii] ? probmax : newprobmax[ii]); } else if (newmarker[j][ii]==MMISSING) { //UNKNOWN: augment data to contain AB, AA and BB for (jj=0; jj<Nmark; jj++) imarker[jj]= newmarker[jj][ii]; if ((position[j]==MLEFT||position[j]==MUNLINKED)) { prob0left= start_prob(crosstype, MAA); prob1left= start_prob(crosstype, MH); prob2left= start_prob(crosstype, MBB); } else { prob0left= left_prob(r[j-1],newmarker[j-1][ii],MAA,crosstype); //prob0left= prob(newmarker, r, ii, j-1, MAA, crosstype, 0); prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype); //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0); prob2left= left_prob(r[j-1],newmarker[j-1][ii],MBB,crosstype); //prob2left= prob(newmarker, r, ii, j-1, MBB, crosstype, 0); } switch (crosstype) { case CF2: prob0right= right_prob_F2(MAA, j, imarker, r, position); //prob0right= probright(MAA, j, imarker, r, position, crosstype); prob1right= right_prob_F2(MH, j, imarker, r, position); //prob1right= probright(MH, j, imarker, r, position, crosstype); prob2right= right_prob_F2(MBB, j, imarker, r, position); //prob2right= probright(MBB, j, imarker, r, position, crosstype); break; case CBC: prob0right= right_prob_BC(MAA, j, imarker, r, position); prob1right= right_prob_BC(MH, j, imarker, r, position); prob2right= 0.0; break; case CRIL: prob0right= right_prob_RIL(MAA, j, imarker, r, position); prob1right= 0.0; prob2right= right_prob_RIL(MBB, j, imarker, r, position); break; case CUNKNOWN: fatal("Strange: unknown crosstype in mqm augment()", ""); break; } prob0= prob0left*prob0right; prob1= prob1left*prob1right; prob2= prob2left*prob2right; if (ii==previaug) { if ((prob2>prob1)&&(prob2>prob0)) probmax= newprob[ii]*prob2; else if ((prob1>prob0)&&(prob1>prob2)) probmax= newprob[ii]*prob1; else probmax= newprob[ii]*prob0; } if ((prob2>prob1)&&(prob2>prob0)) { if (probmax/(newprob[ii]*prob1)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MH; newprob[iaug]= newprob[ii]*prob1left; newprobmax[iaug]= newprob[iaug]*prob1right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } if (probmax/(newprob[ii]*prob0)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MAA; newprob[iaug]= newprob[ii]*prob0left; newprobmax[iaug]= newprob[iaug]*prob0right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MBB; newprobmax[ii]= newprob[ii]*prob2; newprob[ii]*= prob2left; } else if ((prob1>prob2)&&(prob1>prob0)) { if (probmax/(newprob[ii]*prob2)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MBB; newprob[iaug]= newprob[ii]*prob2left; newprobmax[iaug]= newprob[iaug]*prob2right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } if (probmax/(newprob[ii]*prob0)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MAA; newprob[iaug]= newprob[ii]*prob0left; newprobmax[iaug]= newprob[iaug]*prob0right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MH; newprobmax[ii]= newprob[ii]*prob1; newprob[ii]*= prob1left; } else { if (probmax/(newprob[ii]*prob1)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MH; newprob[iaug]= newprob[ii]*prob1left; newprobmax[iaug]= newprob[iaug]*prob1right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } if (probmax/(newprob[ii]*prob2)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MBB; newprob[iaug]= newprob[ii]*prob2left; newprobmax[iaug]= newprob[iaug]*prob2right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MAA; newprobmax[ii]= newprob[ii]*prob0; newprob[ii]*= prob0left; } probmax= (probmax>newprobmax[ii] ? probmax : newprobmax[ii]); } else { //STD case we know what the next marker is nou use probleft to estimate the likelihood of the current location if ((position[j]==MLEFT||position[j]==MUNLINKED)) { prob0left= start_prob(crosstype, newmarker[j][ii]); } else { prob0left= left_prob(r[j-1],newmarker[j-1][ii],newmarker[j][ii],crosstype); //prob0left= prob(newmarker, r, ii, j-1, newmarker[j][ii], crosstype, 0); } newprob[ii]*= prob0left; } if (iaug+3>maxNaug) { Rprintf("ERROR: augmentation (this code should not be reached)\n"); goto bailout; } } if ((iaug-previaug+1)>imaxNaug) { newNind-= 1; iaug= previaug-1; succesind[i]=0; //for(int x=previaug;x<previaug+imaxNaug;x++){ // Rprintf("INFO: Individual: %d, variant: %d, prob: %f",i,x,newprob[x]); //} if (verbose) Rprintf("INFO: Individual %d moved to second augmentation round\n", i); } sumprob= 0.0; for (int ii=previaug; ii<=iaug; ii++) sumprob+= newprob[ii]; for (int ii=previaug; ii<=iaug; ii++) newprob[ii]/= sumprob; } if (++iaug >= maxNaug) goto bailout; previaug=iaug; } *Naug = iaug; *Nind = newNind; *augmarker = newMQMMarkerMatrix(Nmark, *Naug); *augy = newvector(*Naug); *augind = newivector(*Naug); *sucind = newivector(nind0); for (int i=0; i<nind0; i++) { (*sucind)[i] = succesind[i]; } for (int i=0; i<(*Naug); i++) { (*augy)[i]= newy[i]; (*augind)[i]= newind[i]; for (int j=0; j<Nmark; j++) (*augmarker)[j][i]= newmarker[j][i]; } goto cleanup; bailout: Rprintf("INFO: Dataset too large after augmentation\n"); if (verbose) fatal("Recall procedure with larger value for augmentation parameters or increase the parameter minprob\n"); retvalue = 0; cleanup: Free(newy); delMQMMarkerMatrix(newmarker, Nmark+1); //Free(newmarker); Free(newind); Free(newprob); Free(newprobmax); Free(imarker); return retvalue; }