/* * mapQTL moves a QTL along the chromosome and calculated at each map position * the QTL likelihood. Uses either all cofactors, or selected cofactors only */ double mapQTL(int Nind, int Nmark, cvector cofactor, cvector selcofactor, MQMMarkerMatrix marker, cvector position, vector mapdistance, vector y, vector r, ivector ind, int Naug, double variance, char printoutput, vector *informationcontent, matrix *Frun, int run, char REMLorML, bool fitQTL, bool dominance, int em, double windowsize, double stepsize, double stepmin, double stepmax, MQMCrossType crosstype, int verbose) { //Rprintf("INFO: mapQTL function called.\n"); int j, jj, jjj=0; int Nloci = Nmark+1; vector Fy = newvector(Naug); cvector QTLcofactor = newcvector(Nloci); cvector saveQTLcofactor = newcvector(Nloci); double infocontent; vector info0 = newvector(Nind); vector info1 = newvector(Nind); vector info2 = newvector(Nind); vector weight = newvector(Naug); weight[0]= -1.0; /* fit QTL on top of markers (but: should also be done with routine QTLmixture() for exact ML) */ cvector newcofactor= newcvector(Nmark); cvector direction = newcvector(Nmark); vector cumdistance = newvector(Nmark+1); double QTLlikelihood=0.0; for (j=0; j<Nmark; j++) { if (position[j]==MLEFT) cumdistance[j]= -50*log(1-2.0*r[j]); else if (position[j]==MMIDDLE) cumdistance[j]= cumdistance[j-1]-50*log(1-2.0*r[j]); } double savelogL=0.0; // log-likelihood of model with all selected cofactors /* fit QTL on top of markers (full ML) fit QTL between markers (full ML) */ // cout << "please wait (mixture calculus may take quite a lot of time)" << endl; /* estimate variance in mixture model with all marker cofactors */ // cout << "estimate variance in mixture model with all cofactors" << endl; variance= -1.0; savelogL= 2.0*QTLmixture(marker, cofactor, r, position, y, ind, Nind, Naug, Nmark, &variance, em, &weight, REMLorML, fitQTL, dominance, crosstype, verbose); if (verbose==1){ info("INFO: log-likelihood of full model= %f\n", savelogL/2); } // augment data for missing QTL observations (x 3) fitQTL=true; int newNaug = 3 * Naug; Free(weight); weight = newvector(newNaug); weight[0] = 1.0; vector weight0 = newvector(newNaug); weight0[0] = -1.0; vector QTLr = newvector(Nloci); vector QTLmapdistance = newvector(Nloci); cvector QTLposition = newcvector(Nloci); MQMMarkerMatrix QTLloci = (MQMMarkerMatrix)Calloc(Nloci, MQMMarkerVector); double moveQTL = stepmin; char nextinterval= 'n', firsttime='y'; double maxF=0.0, savebaseNoQTLModel=0.0; int baseNoQTLModel=0, step=0; for (j=0; j<Nmark; j++) { /* fit a QTL in two steps: 1. move QTL along marker interval j -> j+1 with steps of stepsize=20 cM, starting from -20 cM up to 220 cM 2. all marker-cofactors in the neighborhood of the QTL are dropped by using cM='windows' as criterium */ nextinterval= 'n'; #ifndef STANDALONE R_CheckUserInterrupt(); /* check for ^C */ R_FlushConsole(); #endif while (nextinterval=='n') { // step 1: // Rprintf("DEBUG testing STEP 1"); if (position[j]==MLEFT) { if (moveQTL<=mapdistance[j]) { QTLposition[j]= position[j]; QTLposition[j+1]= MMIDDLE; QTLr[j]= recombination_frequentie((mapdistance[j]-moveQTL)); QTLr[j+1]= r[j]; QTLloci[j+1]= marker[j]; QTLloci[j]= marker[Nloci-1]; QTLmapdistance[j]= moveQTL; QTLmapdistance[j+1]= mapdistance[j]; if (firsttime=='y') weight[0]= -1.0; moveQTL+= stepsize; } else if (moveQTL<=mapdistance[j+1]) { QTLposition[j]= position[j]; QTLposition[j+1]= MMIDDLE; QTLr[j]= recombination_frequentie((moveQTL-mapdistance[j])); QTLr[j+1]= recombination_frequentie((mapdistance[j+1]-moveQTL)); //r[j]; QTLloci[j]= marker[j]; QTLloci[j+1]= marker[Nloci-1]; QTLmapdistance[j]= mapdistance[j]; QTLmapdistance[j+1]= moveQTL; moveQTL+= stepsize; } else nextinterval= 'y'; } else if (position[j]==MMIDDLE) { if (moveQTL<=mapdistance[j+1]) { QTLposition[j]= position[j]; QTLposition[j+1]= MMIDDLE; QTLr[j]= recombination_frequentie((moveQTL-mapdistance[j])); //0.0; QTLr[j+1]= recombination_frequentie((mapdistance[j+1]-moveQTL)); //r[j]; QTLloci[j]= marker[j]; QTLloci[j+1]= marker[Nloci-1]; QTLmapdistance[j]= mapdistance[j]; QTLmapdistance[j+1]= moveQTL; moveQTL+= stepsize; } else nextinterval= 'y'; } else if (position[j]==MRIGHT) { if (moveQTL<=stepmax) { QTLposition[j]= MMIDDLE; QTLposition[j+1]= MRIGHT; QTLr[j]= recombination_frequentie((moveQTL-mapdistance[j])); //0.0; QTLr[j+1]= r[j]; // note r[j]=999.0 QTLloci[j]= marker[j]; QTLloci[j+1]= marker[Nloci-1]; QTLmapdistance[j]= mapdistance[j]; QTLmapdistance[j+1]= moveQTL; moveQTL+= stepsize; } else { nextinterval= 'y'; moveQTL= stepmin; } } else if (position[j]==MUNLINKED) { QTLposition[j]= MLEFT; QTLposition[j+1]= MRIGHT; //position[j] ?? MRIGHT ? QTLr[j]= 0.0; QTLr[j+1]= r[j]; QTLloci[j+1]= marker[j]; QTLloci[j]= marker[Nloci-1]; QTLmapdistance[j]= mapdistance[j]; QTLmapdistance[j+1]= mapdistance[j]; if (firsttime=='y') weight[0]= -1.0; nextinterval= 'y'; moveQTL= stepmin; } if (nextinterval=='n') { // QTLcofactor[j]= MAA; // QTLcofactor[j+1]= MAA; for (jj=0; jj<j; jj++) { QTLposition[jj]= position[jj]; QTLr[jj]= r[jj]; QTLloci[jj]= marker[jj]; QTLmapdistance[jj]= mapdistance[jj]; QTLcofactor[jj]= selcofactor[jj]; } for (jj=j+1; jj<Nmark; jj++) { QTLposition[jj+1]= position[jj]; QTLr[jj+1]= r[jj]; QTLloci[jj+1]= marker[jj]; QTLcofactor[jj+1]= selcofactor[jj]; QTLmapdistance[jj+1]= mapdistance[jj]; QTLcofactor[jj+1]= selcofactor[jj]; } // step 2: // Rprintf("DEBUG testing STEP 2"); if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) { QTLcofactor[j]= MNOCOF; QTLcofactor[j+1]= (((QTLmapdistance[j+1]-QTLmapdistance[j])<windowsize) ? MNOCOF : selcofactor[j]); } else { QTLcofactor[j+1]= MNOCOF; QTLcofactor[j]= (((QTLmapdistance[j+1]-QTLmapdistance[j])<windowsize) ? MNOCOF : selcofactor[j]); } if ((position[j]==MLEFT)||(position[j]==MMIDDLE)) { jjj=j+2; while (QTLposition[jjj]==MMIDDLE) { if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) QTLcofactor[jjj]= (((QTLmapdistance[jjj]-QTLmapdistance[j])<windowsize) ? MNOCOF : QTLcofactor[jjj]); else QTLcofactor[jjj]= (((QTLmapdistance[jjj]-QTLmapdistance[j+1])<windowsize) ? MNOCOF : QTLcofactor[jjj]); jjj++; } QTLcofactor[jjj]= (((QTLmapdistance[jjj]-QTLmapdistance[j+1])<windowsize) ? MNOCOF : QTLcofactor[jjj]); } if ((position[j]==MMIDDLE)||(position[j]==MRIGHT)) { jjj=j-1; while (QTLposition[jjj]==MMIDDLE) { QTLcofactor[jjj]= (((QTLmapdistance[j+1]-QTLmapdistance[jjj])<windowsize) ? MNOCOF : QTLcofactor[jjj]); jjj--; } QTLcofactor[jjj]= (((QTLmapdistance[j+1]-QTLmapdistance[jjj])<windowsize) ? MNOCOF : QTLcofactor[jjj]); } // fit no-QTL model at current map position (cofactors only) if (firsttime=='y') { for (jj=0; jj<Nloci; jj++) saveQTLcofactor[jj]= QTLcofactor[jj]; baseNoQTLModel=1; firsttime='n'; } else { baseNoQTLModel=0; for (jj=0; jj<Nloci; jj++) baseNoQTLModel+= (saveQTLcofactor[jj]==QTLcofactor[jj] ? 0 : 1); } // Rprintf("fitting NO-QTL model\n"); if (baseNoQTLModel!=0) { // new base no-QTL model if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) QTLcofactor[j]= MSEX; else QTLcofactor[j+1]= MSEX; // Rprintf("INFO: Before base model\n", QTLlikelihood/-2); QTLlikelihood= -2.0*QTLmixture(QTLloci, QTLcofactor, QTLr, QTLposition, y, ind, Nind, Naug, Nloci, &variance, em, &weight0, REMLorML, fitQTL, dominance, crosstype, verbose); // Rprintf("INFO: log-likelihood of NO QTL model= %f\n", QTLlikelihood/-2); weight0[0]= -1.0; savebaseNoQTLModel= QTLlikelihood; if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) QTLcofactor[j]= MNOCOF; else QTLcofactor[j+1]= MNOCOF; for (jj=0; jj<Nloci; jj++) saveQTLcofactor[jj]= QTLcofactor[jj]; } else QTLlikelihood= savebaseNoQTLModel; // fit QTL-model (plus cofactors) at current map position // MNOTAA= QTL if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) QTLcofactor[j]= MQTL; else QTLcofactor[j+1]= MQTL; if (REMLorML==MH) weight[0]= -1.0; QTLlikelihood+=2.0*QTLmixture(QTLloci, QTLcofactor, QTLr, QTLposition, y, ind, Nind, Naug, Nloci, &variance, em, &weight, REMLorML, fitQTL, dominance, crosstype, verbose); //this is the place we error at, because the likelihood is not correct. if (QTLlikelihood<-0.05) { Rprintf("WARNING: Negative QTLlikelihood=%f versus BASE MODEL: %f\nThis applies to the QTL at %d\n", QTLlikelihood, (savebaseNoQTLModel/-2), j); //return 0;} } maxF= (maxF<QTLlikelihood ? QTLlikelihood : maxF); if (run>0) (*Frun)[step][run]+= QTLlikelihood; else (*Frun)[step][0]+= QTLlikelihood; /* Each individual has condition multilocus probabilities for being 0, 1 or 2 at the QTL. Calculate the maximum per individu. Calculate the mean of this maximum, averaging over all individuals This is the information content plotted. */ infocontent= 0.0; for (int i=0; i<Nind; i++) { info0[i]= 0.0; // qq info1[i]= 0.0; // Qq info2[i]= 0.0; // QQ } for (int i=0; i<Naug; i++) { info0[ind[i]]+= weight[i]; info1[ind[i]]+= weight[i+Naug]; info2[ind[i]]+= weight[i+2*Naug]; } for (int i=0; i<Nind; i++) if (info0[i]<info1[i]) infocontent+= (info1[i]<info2[i] ? info2[i] : info1[i]); else infocontent+= (info0[i]<info2[i] ? info2[i] : info0[i]); (*informationcontent)[step]+=infocontent/Nind; step++; } } } fitQTL=false; freevector(direction); Free(info0); Free(info1); Free(info2); Free(weight); Free(weight0); Free(QTLr); Free(QTLposition); Free(Fy); Free(newcofactor); Free(QTLcofactor); Free(cumdistance); Free(QTLmapdistance); Free(QTLloci); Free(saveQTLcofactor); return maxF; //QTLlikelihood; }
double analyseF2(int Nind, int *nummark, cvector *cofactor, MQMMarkerMatrix marker, vector y, int Backwards, double **QTL,vector *mapdistance, int **Chromo, int Nrun, int RMLorML, double windowsize, double stepsize, double stepmin, double stepmax, double alfa, int em, int out_Naug, int **INDlist, char reestimate, MQMCrossType crosstype, bool dominance, int verbose) { if (verbose) Rprintf("INFO: Starting C-part of the MQM analysis\n"); int Naug, Nmark = (*nummark), run = 0; bool useREML = true, fitQTL = false; bool warned = false; ivector chr = newivector(Nmark); // The chr vector contains the chromosome number for every marker for(int i = 0; i < Nmark; i++){ // Rprintf("INFO: Receiving the chromosome matrix from R"); chr[i] = Chromo[0][i]; } if(RMLorML == 1) useREML=false; // use ML instead // Create an array of marker positions - and calculate R[f] based on these locations cvector position = relative_marker_position(Nmark,chr); vector r = recombination_frequencies(Nmark, position, (*mapdistance)); //Rprintf("INFO: Initialize Frun and informationcontent to 0.0"); const int Nsteps = (int)(chr[Nmark-1]*((stepmax-stepmin)/stepsize+1)); matrix Frun = newmatrix(Nsteps,Nrun+1); vector informationcontent = newvector(Nsteps); for (int i = 0; i < (Nrun+1); i++) { for (int ii = 0; ii < Nsteps; ii++) { if(i==0) informationcontent[ii] = 0.0; Frun[ii][i]= 0.0; } } bool dropj = false; int jj=0; // Rprintf("any triple of non-segregating markers is considered to be the result of:\n"); // Rprintf("identity-by-descent (IBD) instead of identity-by-state (IBS)\n"); // Rprintf("no (segregating!) cofactors are fitted in such non-segregating IBD regions\n"); for (int j=0; j < Nmark; j++) { // WRONG: (Nmark-1) Should fix the out of bound in mapdistance, it does fix, but created problems for the last marker dropj = false; if(j+1 < Nmark){ // Check if we can look ahead if(((*mapdistance)[j+1]-(*mapdistance)[j])==0.0){ dropj=true; } } if (!dropj) { marker[jj] = marker[j]; (*cofactor)[jj] = (*cofactor)[j]; (*mapdistance)[jj] = (*mapdistance)[j]; chr[jj] = chr[j]; r[jj] = r[j]; position[jj] = position[j]; jj++; } else{ if (verbose) Rprintf("INFO: Marker %d at chr %d is dropped\n",j,chr[j]); if ((*cofactor)[j]==MCOF) { if (verbose) Rprintf("INFO: Cofactor at chr %d is dropped\n",chr[j]); } } } //if(verbose) Rprintf("INFO: Number of markers: %d -> %d\n",Nmark,jj); Nmark = jj; (*nummark) = jj; // Update the array of marker positions - and calculate R[f] based on these new locations position = relative_marker_position(Nmark,chr); r = recombination_frequencies(Nmark, position, (*mapdistance)); debug_trace("After dropping of uninformative cofactors\n"); ivector newind; // calculate Traits mean and variance vector newy; MQMMarkerMatrix newmarker; double ymean = 0.0, yvari = 0.0; //Rprintf("INFO: Number of individuals: %d Number Aug: %d",Nind,out_Naug); int cur = -1; for (int i=0; i < Nind; i++){ if(INDlist[0][i] != cur){ ymean += y[i]; cur = INDlist[0][i]; } } ymean/= out_Naug; for (int i=0; i < Nind; i++){ if(INDlist[0][i] != cur){ yvari += pow(y[i]-ymean, 2); cur = INDlist[0][i]; } } yvari /= (out_Naug-1); Naug = Nind; // Fix for not doing dataaugmentation, we just copy the current as the augmented and set Naug to Nind Nind = out_Naug; newind = newivector(Naug); newy = newvector(Naug); newmarker = newMQMMarkerMatrix(Nmark,Naug); for (int i=0; i<Naug; i++) { newy[i]= y[i]; newind[i]= INDlist[0][i]; for (int j=0; j<Nmark; j++) { newmarker[j][i]= marker[j][i]; } } // End fix vector newweight = newvector(Naug); double max = rmixture(newmarker, newweight, r, position, newind,Nind, Naug, Nmark, mapdistance,reestimate,crosstype,verbose); //Re-estimation of mapdistances if reestimate=TRUE if(max > stepmax){ fatal("ERROR: Re-estimation of the map put markers at: %f Cm, run the algorithm with a step.max larger than %f Cm", max, max); } //Check if everything still is correct positions and R[f] position = relative_marker_position(Nmark,chr); r = recombination_frequencies(Nmark, position, (*mapdistance)); /* eliminate individuals with missing trait values */ //We can skip this part iirc because R throws out missing phenotypes beforehand int oldNind = Nind; for (int i=0; i<oldNind; i++) { Nind -= ((y[i]==TRAITUNKNOWN) ? 1 : 0); } int oldNaug = Naug; for (int i=0; i<oldNaug; i++) { Naug -= ((newy[i]==TRAITUNKNOWN) ? 1 : 0); } marker = newMQMMarkerMatrix(Nmark+1,Naug); y = newvector(Naug); ivector ind = newivector(Naug); vector weight = newvector(Naug); int newi = 0; for (int i=0; i < oldNaug; i++) if (newy[i]!=TRAITUNKNOWN) { y[newi]= newy[i]; ind[newi]= newind[i]; weight[newi]= newweight[i]; for (int j=0; j<Nmark; j++) marker[j][newi]= newmarker[j][i]; newi++; } int diff; for (int i=0; i < (Naug-1); i++) { diff = ind[i+1]-ind[i]; if (diff>1) { for (int ii=i+1; ii<Naug; ii++){ ind[ii]=ind[ii]-diff+1; } } } //END throwing out missing phenotypes double variance=-1.0; cvector selcofactor = newcvector(Nmark); /* selected cofactors */ int dimx = designmatrixdimensions((*cofactor),Nmark,dominance); double F1 = inverseF(1,Nind-dimx,alfa,verbose); double F2 = inverseF(2,Nind-dimx,alfa,verbose); if (verbose) { Rprintf("INFO: dimX: %d, nInd: %d\n",dimx,Nind); Rprintf("INFO: F(Threshold, Degrees of freedom 1, Degrees of freedom 2) = Alfa\n"); Rprintf("INFO: F(%.3f, 1, %d) = %f\n",ftruncate3(F1),(Nind-dimx),alfa); Rprintf("INFO: F(%.3f, 2, %d) = %f\n",ftruncate3(F2),(Nind-dimx),alfa); } F2 = 2.0* F2; // 9-6-1998 using threshold x*F(x,df,alfa) weight[0]= -1.0; double logL = QTLmixture(marker,(*cofactor),r,position,y,ind,Nind,Naug,Nmark,&variance,em,&weight,useREML,fitQTL,dominance,crosstype, &warned, verbose); if(verbose){ if (!R_finite(logL)) { Rprintf("WARNING: Log-likelihood of full model = INFINITE\n"); }else{ if (R_IsNaN(logL)) { Rprintf("WARNING: Log-likelihood of full model = NOT A NUMBER (NAN)\n"); }else{ Rprintf("INFO: Log-likelihood of full model = %.3f\n",ftruncate3(logL)); } } Rprintf("INFO: Residual variance = %.3f\n",ftruncate3(variance)); Rprintf("INFO: Trait mean= %.3f; Trait variation = %.3f\n",ftruncate3(ymean),ftruncate3(yvari)); } if (R_finite(logL) && !R_IsNaN(logL)) { if(Backwards==1){ // use only selected cofactors logL = backward(Nind, Nmark, (*cofactor), marker, y, weight, ind, Naug, logL,variance, F1, F2, &selcofactor, r, position, &informationcontent, mapdistance,&Frun,run,useREML,fitQTL,dominance, em, windowsize, stepsize, stepmin, stepmax,crosstype,verbose); }else{ // use all cofactors logL = mapQTL(Nind, Nmark, (*cofactor), (*cofactor), marker, position,(*mapdistance), y, r, ind, Naug, variance, 'n', &informationcontent,&Frun,run,useREML,fitQTL,dominance, em, windowsize, stepsize, stepmin, stepmax,crosstype,verbose); // printout=='n' } } // Write output and/or send it back to R // Cofactors that made it to the final model for (int j=0; j<Nmark; j++) { if (selcofactor[j]==MCOF) { (*cofactor)[j]=MCOF; }else{ (*cofactor)[j]=MNOCOF; } } if (verbose) Rprintf("INFO: Number of output datapoints: %d\n", Nsteps); // QTL likelihood for each location for (int ii=0; ii<Nsteps; ii++) { //Convert LR to LOD before sending back QTL[0][ii] = Frun[ii][0] / 4.60517; QTL[0][Nsteps+ii] = informationcontent[ii]; } return logL; }