double probnew(double ***MendelM,cmatrix loci, vector r, int i, int j,char c,char crosstype,int JorC,int ADJ,int start){ if(start){ return start_prob(crosstype,loci[j][i]); } char compareto; int index; int Nrecom; if(JorC==1){ //Rprintf("C %d %d\n",i,j); compareto = c; }else{ //Rprintf("loci[j+1][i] %d\n",j); compareto = loci[j+1][i]; } if ((crosstype=='F')&&(loci[j][i]=='1')&&(compareto=='1')){ index = 1; }else{ index = 0; } Nrecom = absdouble((double)loci[j][i]-(double)compareto); return MendelM[j+ADJ][Nrecom][index]; }
/* ML estimation of parameters in mixture model via EM; maximum-likelihood * estimation of parameters in the mixture model via the EM algorithm, using * multilocus information, but assuming known recombination frequencies */ double QTLmixture(MQMMarkerMatrix loci, cvector cofactor, vector r, cvector position, vector y, ivector ind, int Nind, int Naug, int Nloci, double *variance, int em, vector *weight, const bool useREML,const bool fitQTL,const bool dominance, MQMCrossType crosstype, int verbose) { //debug_trace("QTLmixture called Nloci=%d Nind=%d Naug=%d, REML=%d em=%d fit=%d domi=%d cross=%c\n",Nloci,Nind,Naug,useREML,em,fitQTL,dominance,crosstype); //for (int i=0; i<Nloci; i++){ debug_trace("loci %d : recombfreq=%f\n",i,r[i]); } int iem= 0, i, j; bool warnZeroDist=false; bool biasadj=false; double oldlogL=-10000, delta=1.0, calc_i, Pscale=1.75; vector indweight = newvector(Nind); int newNaug = ((!fitQTL) ? Naug : 3*Naug); vector Fy = newvector(newNaug); double logP = Nloci*log(Pscale); // only for computational accuracy bool varknown = (((*variance)==-1.0) ? false : true ); vector Ploci = newvector(newNaug); #ifndef STANDALONE R_CheckUserInterrupt(); /* check for ^C */ R_FlushConsole(); #endif if (!useREML) { varknown=false; biasadj=false; } for (i=0; i<newNaug; i++) { Ploci[i]= 1.0; } if (!fitQTL) { for (j=0; j<Nloci; j++) { for (i=0; i<Naug; i++) Ploci[i]*= Pscale; if ((position[j]==MLEFT)||(position[j]==MUNLINKED)) { for (i=0; i<Naug; i++) { calc_i = start_prob(crosstype, loci[j][i]); // calc_i= prob(loci, r, i, j, MH, crosstype, 0, 1); Ploci[i]*= calc_i; //Als Ploci > 0 en calc_i > 0 then we want to assert Ploci[] != 0 } } if ((position[j]==MLEFT)||(position[j]==MMIDDLE)) { for (i=0; i<Naug; i++) { calc_i =left_prob(r[j],loci[j][i],loci[j+1][i],crosstype); //calc_i = prob(loci, r, i, j, loci[j+1][i], crosstype, 0); if(calc_i == 0.0){calc_i=1.0;warnZeroDist=true;} Ploci[i]*= calc_i; } } } } else { for (j=0; j<Nloci; j++) { for (i=0; i<Naug; i++) { Ploci[i]*= Pscale; // only for computational accuracy; see use of logP Ploci[i+Naug]*= Pscale; // only for computational accuracy; see use of logP Ploci[i+2*Naug]*= Pscale; // only for computational accuracy; see use of logP } if ((position[j]==MLEFT)||(position[j]==MUNLINKED)) { if (cofactor[j]<=MCOF){ for (i=0; i<Naug; i++) { calc_i = start_prob(crosstype, loci[j][i]); // calc_i= prob(loci, r, i, j, MH, crosstype, 0, 1); Ploci[i] *= calc_i; Ploci[i+Naug] *= calc_i; Ploci[i+2*Naug] *= calc_i; } }else{ for (i=0; i<Naug; i++) { Ploci[i]*= start_prob(crosstype, MAA); //startvalue for MAA for new chromosome Ploci[i+Naug]*= start_prob(crosstype, MH); //startvalue for MH for new chromosome Ploci[i+2*Naug] *= start_prob(crosstype, MBB); //startvalue for MBB for new chromosome } } } if ((position[j]==MLEFT)||(position[j]==MMIDDLE)) { if ((cofactor[j]<=MCOF)&&(cofactor[j+1]<=MCOF)) for (i=0; i<Naug; i++) { calc_i =left_prob(r[j],loci[j][i],loci[j+1][i],crosstype); //calc_i = prob(loci, r, i, j, loci[j+1][i], crosstype, 0); if(calc_i == 0.0){calc_i=1.0;warnZeroDist=true;} Ploci[i]*= calc_i; Ploci[i+Naug]*= calc_i; Ploci[i+2*Naug]*= calc_i; } else if (cofactor[j]<=MCOF) // locus j+1 == QTL for (i=0; i<Naug; i++) { // QTL==MAA || MH || MBB means: What is the prob of finding an MAA at J=1 calc_i =left_prob(r[j],loci[j][i],MAA,crosstype); //calc_i = prob(loci, r, i, j, MAA, crosstype, 0); Ploci[i]*= calc_i; calc_i = left_prob(r[j],loci[j][i],MH,crosstype); //calc_i = prob(loci, r, i, j, MH, crosstype, 0); Ploci[i+Naug]*= calc_i; calc_i = left_prob(r[j],loci[j][i],MBB,crosstype); //calc_i = prob(loci, r, i, j, MBB, crosstype, 0); Ploci[i+2*Naug]*= calc_i; } else // locus j == QTL for (i=0; i<Naug; i++) { // QTL==MQTL calc_i = left_prob(r[j],MAA,loci[j+1][i],crosstype); //calc_i = prob(loci, r, i, j+1, MAA, crosstype, -1); Ploci[i]*= calc_i; calc_i = left_prob(r[j],MH,loci[j+1][i],crosstype); //calc_i = prob(loci, r, i, j+1, MH, crosstype, -1); Ploci[i+Naug]*= calc_i; calc_i = left_prob(r[j],MBB,loci[j+1][i],crosstype); //calc_i = prob(loci, r, i, j+1, MBB, crosstype, -1); Ploci[i+2*Naug]*= calc_i; } } } } if(warnZeroDist)info("!!! 0.0 from Prob !!! Markers at same Cm but different genotype !!!"); // Rprintf("INFO: Done fitting QTL's\n"); if ((*weight)[0]== -1.0) { for (i=0; i<Nind; i++) indweight[i]= 0.0; if (!fitQTL) { for (i=0; i<Naug; i++) indweight[ind[i]]+=Ploci[i]; for (i=0; i<Naug; i++) (*weight)[i]= Ploci[i]/indweight[ind[i]]; } else { for (i=0; i<Naug; i++) indweight[ind[i]]+=Ploci[i]+Ploci[i+Naug]+Ploci[i+2*Naug]; for (i=0; i<Naug; i++) { (*weight)[i] = Ploci[i]/indweight[ind[i]]; (*weight)[i+Naug] = Ploci[i+Naug]/indweight[ind[i]]; (*weight)[i+2*Naug]= Ploci[i+2*Naug]/indweight[ind[i]]; } } } debug_trace("Weights done\n"); debug_trace("Individual->trait,indweight weight Ploci\n"); //for (int j=0; j<Nind; j++){ // debug_trace("%d->%f,%f %f %f\n", j, y[j],indweight[i], (*weight)[j], Ploci[j]); //} double logL = 0; vector indL = newvector(Nind); while ((iem<em)&&(delta>1.0e-5)) { iem+=1; if (!varknown) *variance=-1.0; logL = regression(Nind, Nloci, cofactor, loci, y, weight, ind, Naug, variance, Fy, biasadj, fitQTL, dominance); logL = 0.0; for (i=0; i<Nind; i++) indL[i]= 0.0; if (!fitQTL) // no QTL fitted for (i=0; i<Naug; i++) { (*weight)[i]= Ploci[i]*Fy[i]; indL[ind[i]]= indL[ind[i]] + (*weight)[i]; } else // QTL moved along the chromosomes for (i=0; i<Naug; i++) { (*weight)[i]= Ploci[i]*Fy[i]; (*weight)[i+Naug] = Ploci[i+Naug]* Fy[i+Naug]; (*weight)[i+2*Naug]= Ploci[i+2*Naug]*Fy[i+2*Naug]; indL[ind[i]]+=(*weight)[i]+(*weight)[i+Naug]+(*weight)[i+2*Naug]; } for (i=0; i<Nind; i++) logL+=log(indL[i])-logP; for (i=0; i<Nind; i++) indweight[i]= 0.0; if (!fitQTL) { for (i=0; i<Naug; i++) indweight[ind[i]]+=(*weight)[i]; for (i=0; i<Naug; i++) (*weight)[i]/=indweight[ind[i]]; } else { for (i=0; i<Naug; i++) indweight[ind[i]]+=(*weight)[i]+(*weight)[i+Naug]+(*weight)[i+2*Naug]; for (i=0; i<Naug; i++) { (*weight)[i] /=indweight[ind[i]]; (*weight)[i+Naug] /=indweight[ind[i]]; (*weight)[i+2*Naug]/=indweight[ind[i]]; } } delta= fabs(logL-oldlogL); oldlogL= logL; } if ((useREML)&&(!varknown)) { // Bias adjustment after finished ML estimation via EM *variance=-1.0; biasadj=true; logL = regression(Nind, Nloci, cofactor, loci, y, weight, ind, Naug, variance, Fy, biasadj, fitQTL, dominance); logL = 0.0; for (int _i=0; _i<Nind; _i++) indL[_i]= 0.0; if (!fitQTL) for (i=0; i<Naug; i++) { (*weight)[i]= Ploci[i]*Fy[i]; indL[ind[i]]+=(*weight)[i]; } else for (i=0; i<Naug; i++) { (*weight)[i]= Ploci[i]*Fy[i]; (*weight)[i+Naug]= Ploci[i+Naug]*Fy[i+Naug]; (*weight)[i+2*Naug]= Ploci[i+2*Naug]*Fy[i+2*Naug]; indL[ind[i]]+=(*weight)[i]; indL[ind[i]]+=(*weight)[i+Naug]; indL[ind[i]]+=(*weight)[i+2*Naug]; } for (i=0; i<Nind; i++) logL+=log(indL[i])-logP; for (i=0; i<Nind; i++) indweight[i]= 0.0; if (!fitQTL) { for (i=0; i<Naug; i++) indweight[ind[i]]+=(*weight)[i]; for (i=0; i<Naug; i++) (*weight)[i]/=indweight[ind[i]]; } else { for (i=0; i<Naug; i++) { indweight[ind[i]]+=(*weight)[i]; indweight[ind[i]]+=(*weight)[i+Naug]; indweight[ind[i]]+=(*weight)[i+2*Naug]; } for (i=0; i<Naug; i++) { (*weight)[i] /=indweight[ind[i]]; (*weight)[i+Naug] /=indweight[ind[i]]; (*weight)[i+2*Naug]/=indweight[ind[i]]; } } } //for (i=0; i<Nind; i++){ debug_trace("IND %d Ploci: %f Fy: %f UNLOG:%f LogL:%f LogL-LogP: %f\n", i, Ploci[i], Fy[i], indL[i], log(indL[i]), log(indL[i])-logP); } Free(Fy); Free(Ploci); Free(indweight); Free(indL); return logL; }
/* ML estimation of parameters in mixture model via EM; */ double QTLmixture(cmatrix loci, cvector cofactor, vector r, cvector position, vector y, ivector ind, int Nind, int Naug, int Nloci, double *variance, int em, vector *weight,char REMLorML,char fitQTL,char dominance,char crosstype,Mmatrix MendelM,int verbose){ //if(verbose==1){Rprintf("QTLmixture called\n");} int iem= 0, newNaug, i, j; char varknown, biasadj='n'; double oldlogL=-10000, delta=1.0, calc_i, logP=0.0, Pscale=1.75; double calc_ii; vector indweight, Ploci, Fy; indweight= newvector(Nind); newNaug= (fitQTL=='n' ? Naug : 3*Naug); Fy= newvector(newNaug); logP= Nloci*log(Pscale); // only for computational accuracy varknown= (((*variance)==-1.0) ? 'n' : 'y' ); Ploci= newvector(newNaug); #ifndef ALONE R_CheckUserInterrupt(); /* check for ^C */ R_ProcessEvents(); R_FlushConsole(); #endif if ((REMLorML=='0')&&(varknown=='n')){ // Rprintf("INFO: REML\n"); } if (REMLorML=='1') { // Rprintf("INFO: ML\n"); varknown='n'; biasadj='n'; } for (i=0; i<newNaug; i++){ Ploci[i]= 1.0; } if (fitQTL=='n'){ //Rprintf("FitQTL=N\n"); for (j=0; j<Nloci; j++){ for (i=0; i<Naug; i++) Ploci[i]*= Pscale; //Here we have ProbLeft if ((position[j]=='L')||(position[j]=='U')){ for (i=0; i<Naug; i++){ calc_i= prob(loci,r,i,j,'1',crosstype,1,0,1); //calc_ii= probnew(MendelM,loci,r,i,j,'1',crosstype,1,0,1); Ploci[i]*= calc_i; //Rprintf("DEBUG: Prob vs ProbNew: %f %f\n",calc_i,calc_ii); } } if ((position[j]=='L')||(position[j]=='M')){ for (i=0; i<Naug; i++){ calc_i = prob(loci,r,i,j,loci[j+1][i],'F',0,0,0); //calc_ii = probnew(MendelM,loci,r,i,j,loci[j+1][i],'F',0,0,0); Ploci[i]*= calc_i; //Rprintf("DEBUG: Prob vs ProbNew: %f %f\n",calc_i,calc_ii); } } } }else{ // Rprintf("FitQTL=Y\n"); for (j=0; j<Nloci; j++) { for (i=0; i<Naug; i++) { Ploci[i]*= Pscale; Ploci[i+Naug]*= Pscale; Ploci[i+2*Naug]*= Pscale; // only for computational accuracy; see use of logP } if ((position[j]=='L')||(position[j]=='U')) { //Here we don't have any f2 dependancies anymore by using the prob function if (cofactor[j]<='1') for (i=0; i<Naug; i++) { calc_i= prob(loci,r,i,j,'1',crosstype,1,0,1); //calc_ii= probnew(MendelM,loci,r,i,j,'1',crosstype,1,0,1); Ploci[i]*= calc_i; Ploci[i+Naug]*= calc_i; Ploci[i+2*Naug]*= calc_i; //Rprintf("DEBUG: Prob vs ProbNew: %f %f\n",calc_i,calc_ii); } else for (i=0; i<Naug; i++) { //startvalues for each new chromosome Ploci[i]*= start_prob(crosstype,'0'); Ploci[i+Naug]*= start_prob(crosstype,'1'); Ploci[i+2*Naug] *= start_prob(crosstype,'2'); } // QTL='0', '1' or'2' } if ((position[j]=='L')||(position[j]=='M')) { if ((cofactor[j]<='1')&&(cofactor[j+1]<='1')) for (i=0; i<Naug; i++){ calc_i = prob(loci,r,i,j,loci[j+1][i],crosstype,0,0,0); //calc_ii = probnew(MendelM,loci,r,i,j,loci[j+1][i],crosstype,0,0,0); Ploci[i]*= calc_i; Ploci[i+Naug]*= calc_i; Ploci[i+2*Naug]*= calc_i; //Rprintf("DEBUG: Prob vs ProbNew: %f %f\n",calc_i,calc_ii); } else if (cofactor[j]<='1') // locus j+1 == QTL for (i=0; i<Naug; i++) { // QTL=='0' What is the prob of finding an '0' at J=1 calc_i = prob(loci,r,i,j,'0',crosstype,1,0,0); //calc_ii = probnew(MendelM,loci,r,i,j,'0',crosstype,1,0,0); Ploci[i]*= calc_i; // QTL=='1' calc_i = prob(loci,r,i,j,'1',crosstype,1,0,0); //calc_ii = probnew(MendelM,loci,r,i,j,'1',crosstype,1,0,0); Ploci[i+Naug]*= calc_i; // QTL=='2' calc_i = prob(loci,r,i,j,'2',crosstype,1,0,0); //calc_ii = probnew(MendelM,loci,r,i,j,'2',crosstype,1,0,0); Ploci[i+2*Naug]*= calc_i; //Rprintf("DEBUG: Prob vs ProbNew: %f %f\n",calc_i,calc_ii); } else // locus j == QTL for (i=0; i<Naug; i++) { // QTL=='0' calc_i = prob(loci,r,i,j+1,'0',crosstype,1,-1,0); //calc_ii = probnew(MendelM,loci,r,i,j+1,'0',crosstype,1,-1,0); Ploci[i]*= calc_i; // QTL=='1' calc_i = prob(loci,r,i,j+1,'1',crosstype,1,-1,0); //calc_ii = probnew(MendelM,loci,r,i,j+1,'1',crosstype,1,-1,0); Ploci[i+Naug]*= calc_i; // QTL=='2' calc_i = prob(loci,r,i,j+1,'2',crosstype,1,-1,0); //calc_ii = probnew(MendelM,loci,r,i,j+1,'2',crosstype,1,-1,0); Ploci[i+2*Naug]*= calc_i; //Rprintf("DEBUG: Prob vs ProbNew: %f %f\n",calc_i,calc_ii); } } } } // Rprintf("INFO: Done fitting QTL's\n"); if ((*weight)[0]== -1.0) { for (i=0; i<Nind; i++) indweight[i]= 0.0; if (fitQTL=='n') { for (i=0; i<Naug; i++) indweight[ind[i]]+=Ploci[i]; for (i=0; i<Naug; i++) (*weight)[i]= Ploci[i]/indweight[ind[i]]; } else { for (i=0; i<Naug; i++) indweight[ind[i]]+=Ploci[i]+Ploci[i+Naug]+Ploci[i+2*Naug]; for (i=0; i<Naug; i++) { (*weight)[i] = Ploci[i]/indweight[ind[i]]; (*weight)[i+Naug] = Ploci[i+Naug]/indweight[ind[i]]; (*weight)[i+2*Naug]= Ploci[i+2*Naug]/indweight[ind[i]]; } } } //Rprintf("Weights done\n"); //Rprintf("Individual->trait->cofactor->weight\n"); //for (int j=0; j<Nind; j++){ // Rprintf("%d->%f,%d,%f %f\n",j,y[j],cofactor[j],(*weight)[j],Ploci[j]); //} double logL=0; vector indL; indL= newvector(Nind); while ((iem<em)&&(delta>1.0e-5)) { iem+=1; if (varknown=='n') *variance=-1.0; //Rprintf("Checkpoint_b\n"); logL= regression(Nind, Nloci, cofactor, loci, y, weight, ind, Naug, variance, Fy, biasadj,fitQTL,dominance); logL=0.0; //Rprintf("regression ready\n"); for (i=0; i<Nind; i++) indL[i]= 0.0; if (fitQTL=='n') // no QTL fitted for (i=0; i<Naug; i++) { (*weight)[i]= Ploci[i]*Fy[i]; indL[ind[i]]= indL[ind[i]] + (*weight)[i]; } else // QTL moved along the chromosomes for (i=0; i<Naug; i++) { (*weight)[i]= Ploci[i]*Fy[i]; (*weight)[i+Naug] = Ploci[i+Naug]* Fy[i+Naug]; (*weight)[i+2*Naug]= Ploci[i+2*Naug]*Fy[i+2*Naug]; indL[ind[i]]+=(*weight)[i]+(*weight)[i+Naug]+(*weight)[i+2*Naug]; } for (i=0; i<Nind; i++) logL+=log(indL[i])-logP; for (i=0; i<Nind; i++) indweight[i]= 0.0; if (fitQTL=='n') { for (i=0; i<Naug; i++) indweight[ind[i]]+=(*weight)[i]; for (i=0; i<Naug; i++) (*weight)[i]/=indweight[ind[i]]; } else { for (i=0; i<Naug; i++) indweight[ind[i]]+=(*weight)[i]+(*weight)[i+Naug]+(*weight)[i+2*Naug]; for (i=0; i<Naug; i++) { (*weight)[i] /=indweight[ind[i]]; (*weight)[i+Naug] /=indweight[ind[i]]; (*weight)[i+2*Naug]/=indweight[ind[i]]; } } delta= absdouble(logL-oldlogL); oldlogL= logL; } //Rprintf("EM Finished\n"); // bias adjustment after finished ML estimation via EM if ((REMLorML=='0')&&(varknown=='n')) { // RRprintf("Checkpoint_c\n"); *variance=-1.0; biasadj='y'; logL= regression(Nind, Nloci, cofactor, loci, y, weight, ind, Naug, variance, Fy, biasadj,fitQTL,dominance); logL=0.0; for (int _i=0; _i<Nind; _i++) indL[_i]= 0.0; if (fitQTL=='n') for (i=0; i<Naug; i++) { (*weight)[i]= Ploci[i]*Fy[i]; indL[ind[i]]+=(*weight)[i]; } else for (i=0; i<Naug; i++) { (*weight)[i]= Ploci[i]*Fy[i]; (*weight)[i+Naug]= Ploci[i+Naug]*Fy[i+Naug]; (*weight)[i+2*Naug]= Ploci[i+2*Naug]*Fy[i+2*Naug]; indL[ind[i]]+=(*weight)[i]; indL[ind[i]]+=(*weight)[i+Naug]; indL[ind[i]]+=(*weight)[i+2*Naug]; } for (i=0; i<Nind; i++) logL+=log(indL[i])-logP; for (i=0; i<Nind; i++) indweight[i]= 0.0; if (fitQTL=='n') { for (i=0; i<Naug; i++) indweight[ind[i]]+=(*weight)[i]; for (i=0; i<Naug; i++) (*weight)[i]/=indweight[ind[i]]; } else { for (i=0; i<Naug; i++) { indweight[ind[i]]+=(*weight)[i]; indweight[ind[i]]+=(*weight)[i+Naug]; indweight[ind[i]]+=(*weight)[i+2*Naug]; } for (i=0; i<Naug; i++) { (*weight)[i] /=indweight[ind[i]]; (*weight)[i+Naug] /=indweight[ind[i]]; (*weight)[i+2*Naug]/=indweight[ind[i]]; } } } //for (i=0; i<Nind; i++){ // Rprintf("IND %d Ploci: %f Fy: %f UNLOG:%f LogL:%f LogL-LogP: %f\n",i,Ploci[i],Fy[i],indL[i],log(indL[i]),log(indL[i])-logP); //} Free(Fy); Free(Ploci); Free(indweight); Free(indL); return logL; }
int mqmaugment(const MQMMarkerMatrix marker, const vector y, MQMMarkerMatrix* augmarker, vector *augy, ivector* augind, ivector* sucind, int *Nind, int *Naug, const int Nmark, const cvector position, vector r, const int maxNaug, const int imaxNaug, const double minprob, const MQMCrossType crosstype, const int verbose) { int retvalue = 1; //[Danny] Assume everything will go right, (it never returned a 1 OK, initialization to 0 and return int jj; const int nind0 = *Nind; //Original number of individuals (*Naug) = maxNaug; // sets and returns the maximum size of augmented dataset // new variables sized to maxNaug: MQMMarkerMatrix newmarker; vector newy; MQMMarkerVector imarker; ivector newind; ivector succesind; double minprobratio = (1.0f/minprob); if(minprob!=1){ minprobratio += 0.00001; } newmarker = newMQMMarkerMatrix(Nmark+1, maxNaug); // augmented marker matrix newy = newvector(maxNaug); // phenotypes newind = newivector(maxNaug); // individuals index succesind = newivector(nind0); // Tracks if the augmentation is a succes imarker = newMQMMarkerVector(Nmark); int iaug = 0; // iaug keeps track of current augmented individual double prob0, prob1, prob2, sumprob, prob0left, prob1left, prob2left, prob0right=0.0, prob1right=0.0, prob2right = 0.0f; vector newprob = newvector(maxNaug); vector newprobmax = newvector(maxNaug); if (verbose) Rprintf("INFO: Crosstype determined by the algorithm: %c\n", crosstype); if (verbose) Rprintf("INFO: Augmentation parameters: Maximum augmentation=%d, Maximum augmentation per individual=%d, Minprob=%f\n", maxNaug, imaxNaug, minprob); // ---- foreach individual create one in the newmarker matrix int newNind = nind0; //Number of unique individuals int previaug = 0; // previous index in newmarkers for (int i=0; i<nind0; i++) { //Loop through individuals succesind[i] = 1; //Assume we succeed in augmentation #ifndef STANDALONE //R_ProcessEvents(); /* Try not to crash windows */ R_FlushConsole(); #endif const int dropped = nind0-newNind; //How many are dropped const int iidx = i - dropped; //Individuals I's new individual number based on dropped individuals newind[iaug] = iidx; // iidx corrects for dropped individuals newy[iaug] = y[i]; // cvariance (phenotype) newprob[iaug] = 1.0; //prop double probmax = 1.0; //current maximum probability for (int j=0; j<Nmark; j++){ newmarker[j][iaug]=marker[j][i]; // copy markers into newmarkers for the new indidivudal under investigation } for (int j=0; j<Nmark; j++) { //Loop through markers: const int maxiaug = iaug; // fixate maxiaug if ((maxiaug-previaug)<=imaxNaug) // within bounds for individual? for (int ii=previaug; ii<=maxiaug; ii++) { #ifndef STANDALONE R_CheckUserInterrupt(); /* check for ^C */ #endif debug_trace("i=%d ii=%d iidx=%d maxiaug=%d previaug=%d,imaxNaug=%d\n",i,ii,iidx,maxiaug,previaug,imaxNaug); // ---- walk from previous augmented to current augmented genotype //WE HAVE 3 SPECIAL CASES: (1) NOTAA, (2) NOTBB and (3)UNKNOWN, and the std case of a next known marker if (newmarker[j][ii]==MNOTAA) { //NOTAA augment data to contain AB and BB for (jj=0; jj<Nmark; jj++) imarker[jj] = newmarker[jj][ii]; if ((position[j]==MLEFT||position[j]==MUNLINKED)) { prob1left= start_prob(crosstype, MH); prob2left= start_prob(crosstype, MBB); } else { prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype); //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0); prob2left= left_prob(r[j-1],newmarker[j-1][ii],MBB,crosstype); //prob2left= prob(newmarker, r, ii, j-1, MBB, crosstype, 0); } switch (crosstype) { case CF2: prob1right= right_prob_F2(MH, j, imarker, r, position); //prob1right= probright(MH, j, imarker, r, position, crosstype); prob2right= right_prob_F2(MBB, j, imarker, r, position); //prob2right= probright(MBB, j, imarker, r, position, crosstype); break; case CBC: prob1right= right_prob_BC(MH, j, imarker, r, position); prob2right= right_prob_BC(MBB, j, imarker, r, position); break; case CRIL: prob1right= right_prob_RIL(MH, j, imarker, r, position); prob2right= right_prob_RIL(MBB, j, imarker, r, position); break; case CUNKNOWN: fatal("Strange: unknown crosstype in mqm augment()", ""); break; } prob1= prob1left*prob1right; prob2= prob2left*prob2right; if (ii==previaug) probmax = (prob2>prob1 ? newprob[ii]*prob2 : newprob[ii]*prob1); if (prob1>prob2) { if (probmax/(newprob[ii]*prob2)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MBB; newprob[iaug]= newprob[ii]*prob2left; newprobmax[iaug]= newprob[iaug]*prob2right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MH; newprobmax[ii]= newprob[ii]*prob1; newprob[ii]= newprob[ii]*prob1left; } else { if (probmax/(newprob[ii]*prob1)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MH; newprob[iaug]= newprob[ii]*prob1left; newprobmax[iaug]= newprob[iaug]*prob1right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MBB; newprobmax[ii]= newprob[ii]*prob2; newprob[ii]*= prob2left; } probmax = (probmax>newprobmax[ii] ? probmax : newprobmax[ii]); } else if (newmarker[j][ii]==MNOTBB) { //NOTBB: augment data can contain MH and MAA for (jj=0; jj<Nmark; jj++) imarker[jj]= newmarker[jj][ii]; if ((position[j]==MLEFT||position[j]==MUNLINKED)) { prob0left= start_prob(crosstype, MAA); prob1left= start_prob(crosstype, MH); } else { prob0left= left_prob(r[j-1],newmarker[j-1][ii],MAA,crosstype); //prob0left= prob(newmarker, r, ii, j-1, MAA, crosstype, 0); prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype); //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0); } switch (crosstype) { case CF2: prob0right= right_prob_F2(MAA, j, imarker, r, position); //prob0right= probright(MAA, j, imarker, r, position, crosstype); prob1right= right_prob_F2(MH, j, imarker, r, position); //prob1right= probright(MH, j, imarker, r, position, crosstype); break; case CBC: prob0right= right_prob_BC(MAA, j, imarker, r, position); prob1right= right_prob_BC(MH, j, imarker, r, position); break; case CRIL: prob0right= right_prob_RIL(MAA, j, imarker, r, position); prob1right= right_prob_RIL(MH, j, imarker, r, position); break; case CUNKNOWN: fatal("Strange: unknown crosstype in mqm augment()", ""); break; } prob0= prob0left*prob0right; prob1= prob1left*prob1right; if (ii==previaug) probmax= (prob0>prob1 ? newprob[ii]*prob0 : newprob[ii]*prob1); if (prob1>prob0) { if (probmax/(newprob[ii]*prob0)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MAA; newprob[iaug]= newprob[ii]*prob0left; newprobmax[iaug]= newprob[iaug]*prob0right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MH; newprobmax[ii]= newprob[ii]*prob1; newprob[ii]*= prob1left; } else { if (probmax/(newprob[ii]*prob1)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MH; newprob[iaug]= newprob[ii]*prob1left; newprobmax[iaug]= newprob[iaug]*prob1right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MAA; newprobmax[ii]= newprob[ii]*prob0; newprob[ii]*= prob0left; } probmax= (probmax>newprobmax[ii] ? probmax : newprobmax[ii]); } else if (newmarker[j][ii]==MMISSING) { //UNKNOWN: augment data to contain AB, AA and BB for (jj=0; jj<Nmark; jj++) imarker[jj]= newmarker[jj][ii]; if ((position[j]==MLEFT||position[j]==MUNLINKED)) { prob0left= start_prob(crosstype, MAA); prob1left= start_prob(crosstype, MH); prob2left= start_prob(crosstype, MBB); } else { prob0left= left_prob(r[j-1],newmarker[j-1][ii],MAA,crosstype); //prob0left= prob(newmarker, r, ii, j-1, MAA, crosstype, 0); prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype); //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0); prob2left= left_prob(r[j-1],newmarker[j-1][ii],MBB,crosstype); //prob2left= prob(newmarker, r, ii, j-1, MBB, crosstype, 0); } switch (crosstype) { case CF2: prob0right= right_prob_F2(MAA, j, imarker, r, position); //prob0right= probright(MAA, j, imarker, r, position, crosstype); prob1right= right_prob_F2(MH, j, imarker, r, position); //prob1right= probright(MH, j, imarker, r, position, crosstype); prob2right= right_prob_F2(MBB, j, imarker, r, position); //prob2right= probright(MBB, j, imarker, r, position, crosstype); break; case CBC: prob0right= right_prob_BC(MAA, j, imarker, r, position); prob1right= right_prob_BC(MH, j, imarker, r, position); prob2right= 0.0; break; case CRIL: prob0right= right_prob_RIL(MAA, j, imarker, r, position); prob1right= 0.0; prob2right= right_prob_RIL(MBB, j, imarker, r, position); break; case CUNKNOWN: fatal("Strange: unknown crosstype in mqm augment()", ""); break; } prob0= prob0left*prob0right; prob1= prob1left*prob1right; prob2= prob2left*prob2right; if (ii==previaug) { if ((prob2>prob1)&&(prob2>prob0)) probmax= newprob[ii]*prob2; else if ((prob1>prob0)&&(prob1>prob2)) probmax= newprob[ii]*prob1; else probmax= newprob[ii]*prob0; } if ((prob2>prob1)&&(prob2>prob0)) { if (probmax/(newprob[ii]*prob1)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MH; newprob[iaug]= newprob[ii]*prob1left; newprobmax[iaug]= newprob[iaug]*prob1right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } if (probmax/(newprob[ii]*prob0)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MAA; newprob[iaug]= newprob[ii]*prob0left; newprobmax[iaug]= newprob[iaug]*prob0right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MBB; newprobmax[ii]= newprob[ii]*prob2; newprob[ii]*= prob2left; } else if ((prob1>prob2)&&(prob1>prob0)) { if (probmax/(newprob[ii]*prob2)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MBB; newprob[iaug]= newprob[ii]*prob2left; newprobmax[iaug]= newprob[iaug]*prob2right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } if (probmax/(newprob[ii]*prob0)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MAA; newprob[iaug]= newprob[ii]*prob0left; newprobmax[iaug]= newprob[iaug]*prob0right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MH; newprobmax[ii]= newprob[ii]*prob1; newprob[ii]*= prob1left; } else { if (probmax/(newprob[ii]*prob1)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MH; newprob[iaug]= newprob[ii]*prob1left; newprobmax[iaug]= newprob[iaug]*prob1right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } if (probmax/(newprob[ii]*prob2)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MBB; newprob[iaug]= newprob[ii]*prob2left; newprobmax[iaug]= newprob[iaug]*prob2right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MAA; newprobmax[ii]= newprob[ii]*prob0; newprob[ii]*= prob0left; } probmax= (probmax>newprobmax[ii] ? probmax : newprobmax[ii]); } else { //STD case we know what the next marker is nou use probleft to estimate the likelihood of the current location if ((position[j]==MLEFT||position[j]==MUNLINKED)) { prob0left= start_prob(crosstype, newmarker[j][ii]); } else { prob0left= left_prob(r[j-1],newmarker[j-1][ii],newmarker[j][ii],crosstype); //prob0left= prob(newmarker, r, ii, j-1, newmarker[j][ii], crosstype, 0); } newprob[ii]*= prob0left; } if (iaug+3>maxNaug) { Rprintf("ERROR: augmentation (this code should not be reached)\n"); goto bailout; } } if ((iaug-previaug+1)>imaxNaug) { newNind-= 1; iaug= previaug-1; succesind[i]=0; //for(int x=previaug;x<previaug+imaxNaug;x++){ // Rprintf("INFO: Individual: %d, variant: %d, prob: %f",i,x,newprob[x]); //} if (verbose) Rprintf("INFO: Individual %d moved to second augmentation round\n", i); } sumprob= 0.0; for (int ii=previaug; ii<=iaug; ii++) sumprob+= newprob[ii]; for (int ii=previaug; ii<=iaug; ii++) newprob[ii]/= sumprob; } if (++iaug >= maxNaug) goto bailout; previaug=iaug; } *Naug = iaug; *Nind = newNind; *augmarker = newMQMMarkerMatrix(Nmark, *Naug); *augy = newvector(*Naug); *augind = newivector(*Naug); *sucind = newivector(nind0); for (int i=0; i<nind0; i++) { (*sucind)[i] = succesind[i]; } for (int i=0; i<(*Naug); i++) { (*augy)[i]= newy[i]; (*augind)[i]= newind[i]; for (int j=0; j<Nmark; j++) (*augmarker)[j][i]= newmarker[j][i]; } goto cleanup; bailout: Rprintf("INFO: Dataset too large after augmentation\n"); if (verbose) fatal("Recall procedure with larger value for augmentation parameters or increase the parameter minprob\n"); retvalue = 0; cleanup: Free(newy); delMQMMarkerMatrix(newmarker, Nmark+1); //Free(newmarker); Free(newind); Free(newprob); Free(newprobmax); Free(imarker); return retvalue; }