double multivariateregression(uint nvariables, uint nsamples, dmatrix x, dvector w, dvector y, dvector Fy){ int d=0; double xtwj; dmatrix Xt = newdmatrix(nvariables,nsamples); dmatrix XtWX = newdmatrix(nvariables, nvariables); dvector XtWY = newdvector(nvariables); ivector indx = newivector(nvariables); //cout << "calculating Xt" << endl; for(uint i=0; i<nsamples; i++){ for(uint j=0; j<nvariables; j++){ Xt[j][i] = x[i][j]; } } //cout << "calculating XtWX and XtWY" << endl; for(uint i=0; i<nsamples; i++){ for(uint j=0; j<nvariables; j++){ xtwj = Xt[j][i] * w[i]; XtWY[j] += xtwj * y[i]; for(uint jj=0; jj<=j; jj++){ XtWX[j][jj] += xtwj * Xt[jj][i]; } } } LUdecomposition(XtWX, nvariables, indx, &d); LUsolve(XtWX, nvariables, indx, XtWY); //cout << "Estimated parameters:" << endl; //for (uint i=0; i < nvariables; i++){ // cout << "Parameter " << i << " = " << XtWY[i] << endl; //} dvector fit = newdvector(nsamples); dvector residual = newdvector(nsamples); dvector indL = newdvector(nsamples); double variance= 0.0; double logL=0.0; for (uint i=0; i<nsamples; i++){ fit[i]= 0.0; for (uint j=0; j<nvariables; j++){ fit[i] += Xt[j][i] * XtWY[j]; residual[i] = y[i]-fit[i]; variance += w[i]*pow(residual[i],2.0); } Fy[i] = Lnormal(residual[i],variance); indL[i] += w[i]*Fy[i]; logL += log(indL[i]); } //cout << "Estimated response:" << endl; //printdvector(fit,nsamples); //cout << "Residuals:" << endl; //printdvector(residual,nsamples); //cout << "Estimated Fy:" << endl; //printdvector(Fy,nsamples); //cout << "Variance: " << variance << endl; //cout << "Loglikelihood: " << logL << endl; freematrix((void**)Xt,nvariables); freematrix((void**)XtWX, nvariables); freevector((void*)XtWY); freevector((void*)fit); freevector((void*)residual); freevector((void*)indL); return logL; }
double regression(int Nind, int Nmark, cvector cofactor, MQMMarkerMatrix marker, vector y, vector *weight, ivector ind, int Naug, double *variance, vector Fy, bool biasadj, bool fitQTL, bool dominance, bool verbose) { debug_trace("regression IN\n"); /* cofactor[j] at locus j: MNOCOF: no cofactor at locus j MCOF: cofactor at locus j MSEX: QTL at locus j, but QTL effect is not included in the model MQTL: QTL at locu j and QTL effect is included in the model */ //Calculate the dimensions of the designMatrix int dimx=designmatrixdimensions(cofactor,Nmark,dominance); int j, jj; const int dimx_alloc = dimx+2; //Allocate structures matrix XtWX = newmatrix(dimx_alloc, dimx_alloc); cmatrix Xt = newcmatrix(dimx_alloc, Naug); vector XtWY = newvector(dimx_alloc); //Reset dimension designmatrix dimx = 1; for (j=0; j<Nmark; j++){ if ((cofactor[j]==MCOF)||(cofactor[j]==MQTL)) dimx+= (dominance ? 2 : 1); } cvector xtQTL = newcvector(dimx); int jx=0; for (int i=0; i<Naug; i++) Xt[jx][i]= MH; xtQTL[jx]= MNOCOF; for (j=0; j<Nmark; j++) if (cofactor[j]==MCOF) { // cofactor (not a QTL moving along the chromosome) jx++; xtQTL[jx]= MCOF; if (dominance) { for (int i=0; i<Naug; i++) if (marker[j][i]==MH) { Xt[jx][i]=48; //ASCII code 47, 48 en 49 voor -1, 0, 1; Xt[jx+1][i]=49; } else if (marker[j][i]==MAA) { Xt[jx][i]=47; // '/' stands for -1 Xt[jx+1][i]=48; } else { Xt[jx][i]=49; Xt[jx+1][i]=48; } jx++; xtQTL[jx]= MCOF; } else { for (int i=0; i<Naug; i++) { if (marker[j][i]==MH) { Xt[jx][i]=48; //ASCII code 47, 48 en 49 voor -1, 0, 1; } else if (marker[j][i]==MAA) { Xt[jx][i]=47; // '/' stands for -1 } else { Xt[jx][i]=49; } } } } else if (cofactor[j]==MQTL) { // QTL jx++; xtQTL[jx]= MSEX; if (dominance) { jx++; xtQTL[jx]= MQTL; } } //Rprintf("calculate xtwx and xtwy\n"); /* calculate xtwx and xtwy */ double xtwj, yi, wi, calc_i; for (j=0; j<dimx; j++) { XtWY[j]= 0.0; for (jj=0; jj<dimx; jj++) XtWX[j][jj]= 0.0; } if (!fitQTL){ for (int i=0; i<Naug; i++) { yi= y[i]; wi= (*weight)[i]; //in the original version when we enable Dominance , we crash around here for (j=0; j<dimx; j++) { xtwj= ((double)Xt[j][i]-48.0)*wi; XtWY[j]+= xtwj*yi; for (jj=0; jj<=j; jj++) XtWX[j][jj]+= xtwj*((double)Xt[jj][i]-48.0); } } }else{ // QTL is moving along the chromosomes for (int i=0; i<Naug; i++) { wi= (*weight)[i]+ (*weight)[i+Naug]+ (*weight)[i+2*Naug]; yi= y[i]; //Changed <= to < to prevent chrashes, this could make calculations a tad different then before for (j=0; j<dimx; j++){ if (xtQTL[j]<=MCOF) { xtwj= ((double)Xt[j][i]-48.0)*wi; XtWY[j]+= xtwj*yi; for (jj=0; jj<=j; jj++) if (xtQTL[jj]<=MCOF) XtWX[j][jj]+= xtwj*((double)Xt[jj][i]-48.0); else if (xtQTL[jj]==MSEX) // QTL: additive effect if QTL=MCOF or MSEX { // QTL==MAA XtWX[j][jj]+= ((double)(Xt[j][i]-48.0))*(*weight)[i]*(47.0-48.0); // QTL==MBB XtWX[j][jj]+= ((double)(Xt[j][i]-48.0))*(*weight)[i+2*Naug]*(49.0-48.0); } else // (xtQTL[jj]==MNOTAA) QTL: dominance effect only if QTL=MCOF { // QTL==MH XtWX[j][jj]+= ((double)(Xt[j][i]-48.0))*(*weight)[i+Naug]*(49.0-48.0); } } else if (xtQTL[j]==MSEX) { // QTL: additive effect if QTL=MCOF or MSEX xtwj= -1.0*(*weight)[i]; // QTL==MAA XtWY[j]+= xtwj*yi; for (jj=0; jj<j; jj++) XtWX[j][jj]+= xtwj*((double)Xt[jj][i]-48.0); XtWX[j][j]+= xtwj*-1.0; xtwj= 1.0*(*weight)[i+2*Naug]; // QTL==MBB XtWY[j]+= xtwj*yi; for (jj=0; jj<j; jj++) XtWX[j][jj]+= xtwj*((double)Xt[jj][i]-48.0); XtWX[j][j]+= xtwj*1.0; } else { // (xtQTL[j]==MQTL) QTL: dominance effect only if QTL=MCOF xtwj= 1.0*(*weight)[i+Naug]; // QTL==MCOF XtWY[j]+= xtwj*yi; // j-1 is for additive effect, which is orthogonal to dominance effect for (jj=0; jj<j-1; jj++) XtWX[j][jj]+= xtwj*((double)Xt[jj][i]-48.0); XtWX[j][j]+= xtwj*1.0; } } } } for (j=0; j<dimx; j++){ for (jj=j+1; jj<dimx; jj++){ XtWX[j][jj]= XtWX[jj][j]; } } int d; ivector indx= newivector(dimx); /* solve equations */ ludcmp(XtWX, dimx, indx, &d); lusolve(XtWX, dimx, indx, XtWY); double* indL = (double *)R_alloc(Nind, sizeof(double)); int newNaug = ((!fitQTL) ? Naug : 3*Naug); vector fit = newvector(newNaug); vector resi = newvector(newNaug); debug_trace("Calculate residuals\n"); if (*variance<0) { *variance= 0.0; if (!fitQTL) for (int i=0; i<Naug; i++) { fit[i]= 0.0; for (j=0; j<dimx; j++) fit[i]+=((double)Xt[j][i]-48.0)*XtWY[j]; resi[i]= y[i]-fit[i]; *variance += (*weight)[i]*pow(resi[i], 2.0); } else for (int i=0; i<Naug; i++) { fit[i]= 0.0; fit[i+Naug]= 0.0; fit[i+2*Naug]= 0.0; for (j=0; j<dimx; j++) if (xtQTL[j]<=MCOF) { calc_i =((double)Xt[j][i]-48.0)*XtWY[j]; fit[i]+= calc_i; fit[i+Naug]+= calc_i; fit[i+2*Naug]+= calc_i; } else if (xtQTL[j]==MSEX) { fit[i]+=-1.0*XtWY[j]; fit[i+2*Naug]+=1.0*XtWY[j]; } else fit[i+Naug]+=1.0*XtWY[j]; resi[i]= y[i]-fit[i]; resi[i+Naug]= y[i]-fit[i+Naug]; resi[i+2*Naug]= y[i]-fit[i+2*Naug]; *variance +=(*weight)[i]*pow(resi[i], 2.0); *variance +=(*weight)[i+Naug]*pow(resi[i+Naug], 2.0); *variance +=(*weight)[i+2*Naug]*pow(resi[i+2*Naug], 2.0); } *variance/= (!biasadj ? Nind : Nind-dimx); // to compare results with Johan; variance/=Nind; if (!fitQTL) for (int i=0; i<Naug; i++) Fy[i]= Lnormal(resi[i], *variance); else for (int i=0; i<Naug; i++) { Fy[i] = Lnormal(resi[i], *variance); Fy[i+Naug] = Lnormal(resi[i+Naug], *variance); Fy[i+2*Naug]= Lnormal(resi[i+2*Naug], *variance); } } else { if (!fitQTL) for (int i=0; i<Naug; i++) { fit[i]= 0.0; for (j=0; j<dimx; j++) fit[i]+=((double)Xt[j][i]-48.0)*XtWY[j]; resi[i]= y[i]-fit[i]; Fy[i] = Lnormal(resi[i], *variance); // ???? } else for (int i=0; i<Naug; i++) { fit[i]= 0.0; fit[i+Naug]= 0.0; fit[i+2*Naug]= 0.0; for (j=0; j<dimx; j++) if (xtQTL[j]<=MCOF) { calc_i =((double)Xt[j][i]-48.0)*XtWY[j]; fit[i]+= calc_i; fit[i+Naug]+= calc_i; fit[i+2*Naug]+= calc_i; } else if (xtQTL[j]==MSEX) { fit[i]+=-1.0*XtWY[j]; fit[i+2*Naug]+=1.0*XtWY[j]; } else fit[i+Naug]+=1.0*XtWY[j]; resi[i]= y[i]-fit[i]; resi[i+Naug]= y[i]-fit[i+Naug]; resi[i+2*Naug]= y[i]-fit[i+2*Naug]; Fy[i] = Lnormal(resi[i], *variance); Fy[i+Naug] = Lnormal(resi[i+Naug], *variance); Fy[i+2*Naug]= Lnormal(resi[i+2*Naug], *variance); } } /* calculation of logL */ debug_trace("calculate logL\n"); double logL=0.0; for (int i=0; i<Nind; i++) { indL[i]= 0.0; } if (!fitQTL) { for (int i=0; i<Naug; i++) indL[ind[i]]+=(*weight)[i]*Fy[i]; } else { for (int i=0; i<Naug; i++) { indL[ind[i]]+=(*weight)[i]* Fy[i]; indL[ind[i]]+=(*weight)[i+Naug]* Fy[i+Naug]; indL[ind[i]]+=(*weight)[i+2*Naug]*Fy[i+2*Naug]; } } for (int i=0; i<Nind; i++) { //Sum up log likelihoods for each individual logL+= log(indL[i]); } return (double)logL; }
void R_mqmaugment(int *geno, double *dist, double *pheno, int *auggeno, double *augPheno, int *augIND, int *Nind, int *Naug, int *Nmark, int *Npheno, int *maxind, int *maxiaug, double *minprob, int *chromo, int *rqtlcrosstypep, int *augment_strategy, int *verbosep) { int **Geno; double **Pheno; double **Dist; int **NEW; //Holds the output for the augmentdata function int **Chromo; double **NEWPheno; //New phenotype vector int **NEWIND; //New list of individuals const int nind0 = *Nind; //Individuals we start with const int verbose = *verbosep; const RqtlCrossType rqtlcrosstype = (RqtlCrossType) *rqtlcrosstypep; if(verbose) Rprintf("INFO: Starting C-part of the data augmentation routine\n"); ivector new_ind; MQMMarkerMatrix markers = newMQMMarkerMatrix(*Nmark, nind0); vector mapdistance = newvector(*Nmark); ivector chr = newivector(*Nmark); //Reorganise the pointers into arrays, Singletons are just cast into the function reorg_geno(nind0, *Nmark, geno, &Geno); reorg_int(*Nmark, 1, chromo, &Chromo); reorg_pheno(nind0, *Npheno, pheno, &Pheno); reorg_pheno(*Nmark, 1, dist, &Dist); reorg_int(*maxind, *Nmark, auggeno, &NEW); reorg_int((*maxiaug)*nind0, 1, augIND, &NEWIND); reorg_pheno((*maxiaug)*nind0, 1, augPheno, &NEWPheno); MQMCrossType crosstype = determine_MQMCross(*Nmark, *Nind, (const int **)Geno, rqtlcrosstype); // Determine cross change_coding(Nmark, Nind, Geno, markers, crosstype); // Change all the markers from R/qtl format to MQM internal if(verbose) Rprintf("INFO: Filling the chromosome matrix\n"); for (int i=0; i<(*Nmark); i++) { //Set some general information structures per marker mapdistance[i] = POSITIONUNKNOWN; mapdistance[i] = Dist[0][i]; chr[i] = Chromo[0][i]; } if(mqmaugmentfull(&markers,Nind,Naug,&new_ind,*minprob, *maxind, *maxiaug,&Pheno,*Nmark,chr,mapdistance,*augment_strategy,crosstype,verbose)){ //Data augmentation finished succesfully, encode it back into RQTL format for (int i = 0; i<(*Nmark); i++) { for (int j = 0; j<(*Naug); j++) { //Rprintf("INFO: Phenotype after return: %f",NEWPheno[0][j]); NEWPheno[0][j] = Pheno[0][j]; NEWIND[0][j] = new_ind[j]; NEW[i][j] = 9; if (markers[i][j] == MAA) { NEW[i][j] = 1; } if (markers[i][j] == MH) { NEW[i][j] = 2; } if (markers[i][j] == MBB) { // [karl:] this might need to be changed for RIL crosstype==CRIL ? NEW[i][j]=2 : NEW[i][j] = 3; //[Danny:] This should solve it } if (markers[i][j] == MNOTAA) { NEW[i][j] = 5; } if (markers[i][j] == MNOTBB) { NEW[i][j] = 4; } } } if (verbose) { Rprintf("# Unique individuals before augmentation:%d\n", nind0); Rprintf("# Unique selected individuals:%d\n", *Nind); Rprintf("# Marker p individual:%d\n", *Nmark); Rprintf("# Individuals after augmentation:%d\n", *Naug); Rprintf("INFO: Data augmentation succesfull\n"); } } else { //Unsuccessfull data augmentation exit Rprintf("INFO: This code should not be reached, data corruption could have occured. Please re-run this analysis.\n"); *Naug = nind0; for (int i=0; i<(*Nmark); i++) { for (int j=0; j<(*Naug); j++) { NEWPheno[0][j] = Pheno[0][j]; NEW[i][j] = 9; if (markers[i][j] == MAA) { NEW[i][j] = 1; } if (markers[i][j] == MH) { NEW[i][j] = 2; } if (markers[i][j] == MBB) { // [Karl:] this might need to be changed for RIL crosstype==CRIL ? NEW[i][j]=2 : NEW[i][j] = 3; // [Danny:] This should solve it } if (markers[i][j] == MNOTAA) { NEW[i][j] = 5; } if (markers[i][j] == MNOTBB) { NEW[i][j] = 4; } } } fatal("Data augmentation failed", ""); } delMQMMarkerMatrix(markers,*Nmark); // [Danny:] This looked suspicious, we were leaking memory here because we didn't clean it Free(mapdistance); Free(chr); return; }
int mqmaugment(const MQMMarkerMatrix marker, const vector y, MQMMarkerMatrix* augmarker, vector *augy, ivector* augind, ivector* sucind, int *Nind, int *Naug, const int Nmark, const cvector position, vector r, const int maxNaug, const int imaxNaug, const double minprob, const MQMCrossType crosstype, const int verbose) { int retvalue = 1; //[Danny] Assume everything will go right, (it never returned a 1 OK, initialization to 0 and return int jj; const int nind0 = *Nind; //Original number of individuals (*Naug) = maxNaug; // sets and returns the maximum size of augmented dataset // new variables sized to maxNaug: MQMMarkerMatrix newmarker; vector newy; MQMMarkerVector imarker; ivector newind; ivector succesind; double minprobratio = (1.0f/minprob); if(minprob!=1){ minprobratio += 0.00001; } newmarker = newMQMMarkerMatrix(Nmark+1, maxNaug); // augmented marker matrix newy = newvector(maxNaug); // phenotypes newind = newivector(maxNaug); // individuals index succesind = newivector(nind0); // Tracks if the augmentation is a succes imarker = newMQMMarkerVector(Nmark); int iaug = 0; // iaug keeps track of current augmented individual double prob0, prob1, prob2, sumprob, prob0left, prob1left, prob2left, prob0right=0.0, prob1right=0.0, prob2right = 0.0f; vector newprob = newvector(maxNaug); vector newprobmax = newvector(maxNaug); if (verbose) Rprintf("INFO: Crosstype determined by the algorithm: %c\n", crosstype); if (verbose) Rprintf("INFO: Augmentation parameters: Maximum augmentation=%d, Maximum augmentation per individual=%d, Minprob=%f\n", maxNaug, imaxNaug, minprob); // ---- foreach individual create one in the newmarker matrix int newNind = nind0; //Number of unique individuals int previaug = 0; // previous index in newmarkers for (int i=0; i<nind0; i++) { //Loop through individuals succesind[i] = 1; //Assume we succeed in augmentation #ifndef STANDALONE //R_ProcessEvents(); /* Try not to crash windows */ R_FlushConsole(); #endif const int dropped = nind0-newNind; //How many are dropped const int iidx = i - dropped; //Individuals I's new individual number based on dropped individuals newind[iaug] = iidx; // iidx corrects for dropped individuals newy[iaug] = y[i]; // cvariance (phenotype) newprob[iaug] = 1.0; //prop double probmax = 1.0; //current maximum probability for (int j=0; j<Nmark; j++){ newmarker[j][iaug]=marker[j][i]; // copy markers into newmarkers for the new indidivudal under investigation } for (int j=0; j<Nmark; j++) { //Loop through markers: const int maxiaug = iaug; // fixate maxiaug if ((maxiaug-previaug)<=imaxNaug) // within bounds for individual? for (int ii=previaug; ii<=maxiaug; ii++) { #ifndef STANDALONE R_CheckUserInterrupt(); /* check for ^C */ #endif debug_trace("i=%d ii=%d iidx=%d maxiaug=%d previaug=%d,imaxNaug=%d\n",i,ii,iidx,maxiaug,previaug,imaxNaug); // ---- walk from previous augmented to current augmented genotype //WE HAVE 3 SPECIAL CASES: (1) NOTAA, (2) NOTBB and (3)UNKNOWN, and the std case of a next known marker if (newmarker[j][ii]==MNOTAA) { //NOTAA augment data to contain AB and BB for (jj=0; jj<Nmark; jj++) imarker[jj] = newmarker[jj][ii]; if ((position[j]==MLEFT||position[j]==MUNLINKED)) { prob1left= start_prob(crosstype, MH); prob2left= start_prob(crosstype, MBB); } else { prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype); //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0); prob2left= left_prob(r[j-1],newmarker[j-1][ii],MBB,crosstype); //prob2left= prob(newmarker, r, ii, j-1, MBB, crosstype, 0); } switch (crosstype) { case CF2: prob1right= right_prob_F2(MH, j, imarker, r, position); //prob1right= probright(MH, j, imarker, r, position, crosstype); prob2right= right_prob_F2(MBB, j, imarker, r, position); //prob2right= probright(MBB, j, imarker, r, position, crosstype); break; case CBC: prob1right= right_prob_BC(MH, j, imarker, r, position); prob2right= right_prob_BC(MBB, j, imarker, r, position); break; case CRIL: prob1right= right_prob_RIL(MH, j, imarker, r, position); prob2right= right_prob_RIL(MBB, j, imarker, r, position); break; case CUNKNOWN: fatal("Strange: unknown crosstype in mqm augment()", ""); break; } prob1= prob1left*prob1right; prob2= prob2left*prob2right; if (ii==previaug) probmax = (prob2>prob1 ? newprob[ii]*prob2 : newprob[ii]*prob1); if (prob1>prob2) { if (probmax/(newprob[ii]*prob2)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MBB; newprob[iaug]= newprob[ii]*prob2left; newprobmax[iaug]= newprob[iaug]*prob2right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MH; newprobmax[ii]= newprob[ii]*prob1; newprob[ii]= newprob[ii]*prob1left; } else { if (probmax/(newprob[ii]*prob1)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MH; newprob[iaug]= newprob[ii]*prob1left; newprobmax[iaug]= newprob[iaug]*prob1right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MBB; newprobmax[ii]= newprob[ii]*prob2; newprob[ii]*= prob2left; } probmax = (probmax>newprobmax[ii] ? probmax : newprobmax[ii]); } else if (newmarker[j][ii]==MNOTBB) { //NOTBB: augment data can contain MH and MAA for (jj=0; jj<Nmark; jj++) imarker[jj]= newmarker[jj][ii]; if ((position[j]==MLEFT||position[j]==MUNLINKED)) { prob0left= start_prob(crosstype, MAA); prob1left= start_prob(crosstype, MH); } else { prob0left= left_prob(r[j-1],newmarker[j-1][ii],MAA,crosstype); //prob0left= prob(newmarker, r, ii, j-1, MAA, crosstype, 0); prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype); //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0); } switch (crosstype) { case CF2: prob0right= right_prob_F2(MAA, j, imarker, r, position); //prob0right= probright(MAA, j, imarker, r, position, crosstype); prob1right= right_prob_F2(MH, j, imarker, r, position); //prob1right= probright(MH, j, imarker, r, position, crosstype); break; case CBC: prob0right= right_prob_BC(MAA, j, imarker, r, position); prob1right= right_prob_BC(MH, j, imarker, r, position); break; case CRIL: prob0right= right_prob_RIL(MAA, j, imarker, r, position); prob1right= right_prob_RIL(MH, j, imarker, r, position); break; case CUNKNOWN: fatal("Strange: unknown crosstype in mqm augment()", ""); break; } prob0= prob0left*prob0right; prob1= prob1left*prob1right; if (ii==previaug) probmax= (prob0>prob1 ? newprob[ii]*prob0 : newprob[ii]*prob1); if (prob1>prob0) { if (probmax/(newprob[ii]*prob0)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MAA; newprob[iaug]= newprob[ii]*prob0left; newprobmax[iaug]= newprob[iaug]*prob0right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MH; newprobmax[ii]= newprob[ii]*prob1; newprob[ii]*= prob1left; } else { if (probmax/(newprob[ii]*prob1)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MH; newprob[iaug]= newprob[ii]*prob1left; newprobmax[iaug]= newprob[iaug]*prob1right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MAA; newprobmax[ii]= newprob[ii]*prob0; newprob[ii]*= prob0left; } probmax= (probmax>newprobmax[ii] ? probmax : newprobmax[ii]); } else if (newmarker[j][ii]==MMISSING) { //UNKNOWN: augment data to contain AB, AA and BB for (jj=0; jj<Nmark; jj++) imarker[jj]= newmarker[jj][ii]; if ((position[j]==MLEFT||position[j]==MUNLINKED)) { prob0left= start_prob(crosstype, MAA); prob1left= start_prob(crosstype, MH); prob2left= start_prob(crosstype, MBB); } else { prob0left= left_prob(r[j-1],newmarker[j-1][ii],MAA,crosstype); //prob0left= prob(newmarker, r, ii, j-1, MAA, crosstype, 0); prob1left= left_prob(r[j-1],newmarker[j-1][ii],MH,crosstype); //prob1left= prob(newmarker, r, ii, j-1, MH, crosstype, 0); prob2left= left_prob(r[j-1],newmarker[j-1][ii],MBB,crosstype); //prob2left= prob(newmarker, r, ii, j-1, MBB, crosstype, 0); } switch (crosstype) { case CF2: prob0right= right_prob_F2(MAA, j, imarker, r, position); //prob0right= probright(MAA, j, imarker, r, position, crosstype); prob1right= right_prob_F2(MH, j, imarker, r, position); //prob1right= probright(MH, j, imarker, r, position, crosstype); prob2right= right_prob_F2(MBB, j, imarker, r, position); //prob2right= probright(MBB, j, imarker, r, position, crosstype); break; case CBC: prob0right= right_prob_BC(MAA, j, imarker, r, position); prob1right= right_prob_BC(MH, j, imarker, r, position); prob2right= 0.0; break; case CRIL: prob0right= right_prob_RIL(MAA, j, imarker, r, position); prob1right= 0.0; prob2right= right_prob_RIL(MBB, j, imarker, r, position); break; case CUNKNOWN: fatal("Strange: unknown crosstype in mqm augment()", ""); break; } prob0= prob0left*prob0right; prob1= prob1left*prob1right; prob2= prob2left*prob2right; if (ii==previaug) { if ((prob2>prob1)&&(prob2>prob0)) probmax= newprob[ii]*prob2; else if ((prob1>prob0)&&(prob1>prob2)) probmax= newprob[ii]*prob1; else probmax= newprob[ii]*prob0; } if ((prob2>prob1)&&(prob2>prob0)) { if (probmax/(newprob[ii]*prob1)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MH; newprob[iaug]= newprob[ii]*prob1left; newprobmax[iaug]= newprob[iaug]*prob1right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } if (probmax/(newprob[ii]*prob0)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MAA; newprob[iaug]= newprob[ii]*prob0left; newprobmax[iaug]= newprob[iaug]*prob0right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MBB; newprobmax[ii]= newprob[ii]*prob2; newprob[ii]*= prob2left; } else if ((prob1>prob2)&&(prob1>prob0)) { if (probmax/(newprob[ii]*prob2)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MBB; newprob[iaug]= newprob[ii]*prob2left; newprobmax[iaug]= newprob[iaug]*prob2right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } if (probmax/(newprob[ii]*prob0)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MAA; newprob[iaug]= newprob[ii]*prob0left; newprobmax[iaug]= newprob[iaug]*prob0right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MH; newprobmax[ii]= newprob[ii]*prob1; newprob[ii]*= prob1left; } else { if (probmax/(newprob[ii]*prob1)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MH; newprob[iaug]= newprob[ii]*prob1left; newprobmax[iaug]= newprob[iaug]*prob1right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } if (probmax/(newprob[ii]*prob2)<minprobratio) { if (++iaug >= maxNaug) goto bailout; newmarker[j][iaug]= MBB; newprob[iaug]= newprob[ii]*prob2left; newprobmax[iaug]= newprob[iaug]*prob2right; for (jj=0; jj<Nmark; jj++) { if (jj!=j) newmarker[jj][iaug]=newmarker[jj][ii]; } newind[iaug]=iidx; newy[iaug]=y[i]; } newmarker[j][ii]= MAA; newprobmax[ii]= newprob[ii]*prob0; newprob[ii]*= prob0left; } probmax= (probmax>newprobmax[ii] ? probmax : newprobmax[ii]); } else { //STD case we know what the next marker is nou use probleft to estimate the likelihood of the current location if ((position[j]==MLEFT||position[j]==MUNLINKED)) { prob0left= start_prob(crosstype, newmarker[j][ii]); } else { prob0left= left_prob(r[j-1],newmarker[j-1][ii],newmarker[j][ii],crosstype); //prob0left= prob(newmarker, r, ii, j-1, newmarker[j][ii], crosstype, 0); } newprob[ii]*= prob0left; } if (iaug+3>maxNaug) { Rprintf("ERROR: augmentation (this code should not be reached)\n"); goto bailout; } } if ((iaug-previaug+1)>imaxNaug) { newNind-= 1; iaug= previaug-1; succesind[i]=0; //for(int x=previaug;x<previaug+imaxNaug;x++){ // Rprintf("INFO: Individual: %d, variant: %d, prob: %f",i,x,newprob[x]); //} if (verbose) Rprintf("INFO: Individual %d moved to second augmentation round\n", i); } sumprob= 0.0; for (int ii=previaug; ii<=iaug; ii++) sumprob+= newprob[ii]; for (int ii=previaug; ii<=iaug; ii++) newprob[ii]/= sumprob; } if (++iaug >= maxNaug) goto bailout; previaug=iaug; } *Naug = iaug; *Nind = newNind; *augmarker = newMQMMarkerMatrix(Nmark, *Naug); *augy = newvector(*Naug); *augind = newivector(*Naug); *sucind = newivector(nind0); for (int i=0; i<nind0; i++) { (*sucind)[i] = succesind[i]; } for (int i=0; i<(*Naug); i++) { (*augy)[i]= newy[i]; (*augind)[i]= newind[i]; for (int j=0; j<Nmark; j++) (*augmarker)[j][i]= newmarker[j][i]; } goto cleanup; bailout: Rprintf("INFO: Dataset too large after augmentation\n"); if (verbose) fatal("Recall procedure with larger value for augmentation parameters or increase the parameter minprob\n"); retvalue = 0; cleanup: Free(newy); delMQMMarkerMatrix(newmarker, Nmark+1); //Free(newmarker); Free(newind); Free(newprob); Free(newprobmax); Free(imarker); return retvalue; }
int mqmaugmentfull(MQMMarkerMatrix* markers,int* nind, int* augmentednind, ivector* INDlist, double neglect_unlikely, int max_totalaugment, int max_indaugment, const matrix* pheno_value, const int nmark, const ivector chr, const vector mapdistance, const int augment_strategy, const MQMCrossType crosstype,const int verbose){ //Prepare for the first augmentation if (verbose) Rprintf("INFO: Augmentation routine\n"); const int nind0 = *nind; const vector originalpheno = (*pheno_value)[0]; MQMMarkerMatrix newmarkerset; // [Danny:] This LEAKS MEMORY the Matrices and vectors are not cleaned at ALL vector new_y; // Because we do a phenotype matrix, we optimize by storing original the R-individual ivector new_ind; // numbers inside the trait-values, ands use new_ind etc for inside C ivector succes_ind; cvector position = relative_marker_position(nmark,chr); vector r = recombination_frequencies(nmark, position, mapdistance); if(verbose) Rprintf("INFO: Step 1: Augmentation"); mqmaugment((*markers), (*pheno_value)[0], &newmarkerset, &new_y, &new_ind, &succes_ind, nind, augmentednind, nmark, position, r, max_totalaugment, max_indaugment, neglect_unlikely, crosstype, verbose); //First round of augmentation, check if there are still individuals we need to do int ind_still_left=0; int ind_done=0; for(int i=0; i<nind0; i++){ debug_trace("Individual:%d Succesfull?:%d",i,succes_ind[i]); if(succes_ind[i]==0){ ind_still_left++; }else{ ind_done++; } } if(ind_still_left && verbose) Rprintf("INFO: Step 2: Unaugmented individuals\n"); if(ind_still_left && augment_strategy != 3){ //Second round we augment dropped individuals from the first augmentation MQMMarkerMatrix left_markerset; matrix left_y_input = newmatrix(1,ind_still_left); vector left_y; ivector left_ind; if(verbose) Rprintf("INFO: Done with: %d/%d individuals still need to do %d\n",ind_done,nind0,ind_still_left); //Create a new markermatrix for the individuals MQMMarkerMatrix indleftmarkers= newMQMMarkerMatrix(nmark,ind_still_left); int current_leftover_ind=0; for(int i=0;i<nind0;i++){ if(succes_ind[i]==0){ debug_trace("IND %d -> %d",i,current_leftover_ind); left_y_input[0][current_leftover_ind] = originalpheno[i]; for(int j=0;j<nmark;j++){ indleftmarkers[j][current_leftover_ind] = (*markers)[j][i]; } current_leftover_ind++; } } mqmaugment(indleftmarkers, left_y_input[0], &left_markerset, &left_y, &left_ind, &succes_ind, ¤t_leftover_ind, ¤t_leftover_ind, nmark, position, r, max_totalaugment, max_indaugment, 1, crosstype, verbose); if(verbose) Rprintf("INFO: Augmentation step 2 returned most likely for %d individuals\n", current_leftover_ind); //Data augmentation done, we need to return both matrices to R int numimputations=1; if(augment_strategy==2){ numimputations=max_indaugment; //If we do imputation, we should generate enough to not increase likelihood for the 'unlikely genotypes' } MQMMarkerMatrix newmarkerset_all = newMQMMarkerMatrix(nmark,(*augmentednind)+numimputations*current_leftover_ind); vector new_y_all = newvector((*augmentednind)+numimputations*current_leftover_ind); ivector new_ind_all = newivector((*augmentednind)+numimputations*current_leftover_ind);; for(int i=0;i<(*augmentednind)+current_leftover_ind;i++){ int currentind; double currentpheno; if(i < (*augmentednind)){ // Results from first augmentation step currentind = new_ind[i]; currentpheno = new_y[i]; for(int j=0;j<nmark;j++){ newmarkerset_all[j][i] = newmarkerset[j][i]; } new_ind_all[i]= currentind; new_y_all[i]= currentpheno; }else{ // Results from second augmentation step currentind = ind_done+(i-(*augmentednind)); currentpheno = left_y[(i-(*augmentednind))]; debug_trace("Imputation of individual %d %d",currentind,numimputations); for(int a=0;a<numimputations;a++){ int newindex = (*augmentednind)+a+((i-(*augmentednind))*numimputations); debug_trace("i=%d,s=%d,i-s=%d index=%d/%d",i,(*augmentednind),(i-(*augmentednind)),newindex,(*augmentednind)+numimputations*current_leftover_ind); if(augment_strategy == 2 && a > 0){ for(int j=0;j<nmark;j++){ // Imputed genotype at 1 ... max_indaugment if(indleftmarkers[j][(i-(*augmentednind))]==MMISSING){ newmarkerset_all[j][newindex] = randommarker(crosstype); }else{ newmarkerset_all[j][newindex] = left_markerset[j][(i-(*augmentednind))]; } } }else{ for(int j=0;j<nmark;j++){ // Most likely genotype at 0 newmarkerset_all[j][newindex] = left_markerset[j][(i-(*augmentednind))]; } } new_ind_all[newindex]= currentind; new_y_all[newindex]= currentpheno; debug_trace("Individual: %d OriginalID:%f Variant:%d",currentind,currentpheno,a); } } } //Everything is added together so lets set out return pointers (*pheno_value)[0] = new_y_all; (*INDlist) = new_ind_all; (*markers) = newmarkerset_all; (*augmentednind)=(*augmentednind)+(numimputations*current_leftover_ind); (*nind)= (*nind)+(current_leftover_ind); debug_trace("nind:%d,naugmented:%d",(*nind)+(current_leftover_ind),(*augmentednind)+(current_leftover_ind)); Rprintf("INFO: VALGRIND MEMORY DEBUG BARRIERE TRIGGERED\n", ""); delMQMMarkerMatrix(newmarkerset, nmark); // Free the newmarkerset, this can only be done here since: (*markers) = newmarkerset_all; // Free(new_y_all); // Free(new_ind_all); }else{ if(ind_still_left && augment_strategy == 3){ if(verbose) Rprintf("INFO: Dropping %d augment_strategy individuals from further analysis\n",ind_still_left); } //We augmented all individuals in the first go so lets use those (*pheno_value)[0] = new_y; (*INDlist) = new_ind; (*markers) = newmarkerset; } if(verbose) Rprintf("INFO: Done with augmentation\n"); // Free(new_y); // Free vector indicating the new phenotypes // Free(new_ind); // Free vector indicating the new individuals Free(succes_ind); // Free vector indicating the result of round 1 - augmentation Free(position); // Free the positions of the markers Free(r); // Free the recombination frequencies return 1; }
imatrix newimatrix(uint rows, uint cols){ imatrix m = (imatrix)mycalloc(rows, sizeof(ivector)); if(m==NULL){ Rprintf("Not enough memory for new matrix\n"); } for(size_t i = 0; i < rows; i++){ m[i]= newivector(cols); } return m; }
double analyseF2(int Nind, int *nummark, cvector *cofactor, MQMMarkerMatrix marker, vector y, int Backwards, double **QTL,vector *mapdistance, int **Chromo, int Nrun, int RMLorML, double windowsize, double stepsize, double stepmin, double stepmax, double alfa, int em, int out_Naug, int **INDlist, char reestimate, MQMCrossType crosstype, bool dominance, int verbose) { if (verbose) Rprintf("INFO: Starting C-part of the MQM analysis\n"); int Naug, Nmark = (*nummark), run = 0; bool useREML = true, fitQTL = false; bool warned = false; ivector chr = newivector(Nmark); // The chr vector contains the chromosome number for every marker for(int i = 0; i < Nmark; i++){ // Rprintf("INFO: Receiving the chromosome matrix from R"); chr[i] = Chromo[0][i]; } if(RMLorML == 1) useREML=false; // use ML instead // Create an array of marker positions - and calculate R[f] based on these locations cvector position = relative_marker_position(Nmark,chr); vector r = recombination_frequencies(Nmark, position, (*mapdistance)); //Rprintf("INFO: Initialize Frun and informationcontent to 0.0"); const int Nsteps = (int)(chr[Nmark-1]*((stepmax-stepmin)/stepsize+1)); matrix Frun = newmatrix(Nsteps,Nrun+1); vector informationcontent = newvector(Nsteps); for (int i = 0; i < (Nrun+1); i++) { for (int ii = 0; ii < Nsteps; ii++) { if(i==0) informationcontent[ii] = 0.0; Frun[ii][i]= 0.0; } } bool dropj = false; int jj=0; // Rprintf("any triple of non-segregating markers is considered to be the result of:\n"); // Rprintf("identity-by-descent (IBD) instead of identity-by-state (IBS)\n"); // Rprintf("no (segregating!) cofactors are fitted in such non-segregating IBD regions\n"); for (int j=0; j < Nmark; j++) { // WRONG: (Nmark-1) Should fix the out of bound in mapdistance, it does fix, but created problems for the last marker dropj = false; if(j+1 < Nmark){ // Check if we can look ahead if(((*mapdistance)[j+1]-(*mapdistance)[j])==0.0){ dropj=true; } } if (!dropj) { marker[jj] = marker[j]; (*cofactor)[jj] = (*cofactor)[j]; (*mapdistance)[jj] = (*mapdistance)[j]; chr[jj] = chr[j]; r[jj] = r[j]; position[jj] = position[j]; jj++; } else{ if (verbose) Rprintf("INFO: Marker %d at chr %d is dropped\n",j,chr[j]); if ((*cofactor)[j]==MCOF) { if (verbose) Rprintf("INFO: Cofactor at chr %d is dropped\n",chr[j]); } } } //if(verbose) Rprintf("INFO: Number of markers: %d -> %d\n",Nmark,jj); Nmark = jj; (*nummark) = jj; // Update the array of marker positions - and calculate R[f] based on these new locations position = relative_marker_position(Nmark,chr); r = recombination_frequencies(Nmark, position, (*mapdistance)); debug_trace("After dropping of uninformative cofactors\n"); ivector newind; // calculate Traits mean and variance vector newy; MQMMarkerMatrix newmarker; double ymean = 0.0, yvari = 0.0; //Rprintf("INFO: Number of individuals: %d Number Aug: %d",Nind,out_Naug); int cur = -1; for (int i=0; i < Nind; i++){ if(INDlist[0][i] != cur){ ymean += y[i]; cur = INDlist[0][i]; } } ymean/= out_Naug; for (int i=0; i < Nind; i++){ if(INDlist[0][i] != cur){ yvari += pow(y[i]-ymean, 2); cur = INDlist[0][i]; } } yvari /= (out_Naug-1); Naug = Nind; // Fix for not doing dataaugmentation, we just copy the current as the augmented and set Naug to Nind Nind = out_Naug; newind = newivector(Naug); newy = newvector(Naug); newmarker = newMQMMarkerMatrix(Nmark,Naug); for (int i=0; i<Naug; i++) { newy[i]= y[i]; newind[i]= INDlist[0][i]; for (int j=0; j<Nmark; j++) { newmarker[j][i]= marker[j][i]; } } // End fix vector newweight = newvector(Naug); double max = rmixture(newmarker, newweight, r, position, newind,Nind, Naug, Nmark, mapdistance,reestimate,crosstype,verbose); //Re-estimation of mapdistances if reestimate=TRUE if(max > stepmax){ fatal("ERROR: Re-estimation of the map put markers at: %f Cm, run the algorithm with a step.max larger than %f Cm", max, max); } //Check if everything still is correct positions and R[f] position = relative_marker_position(Nmark,chr); r = recombination_frequencies(Nmark, position, (*mapdistance)); /* eliminate individuals with missing trait values */ //We can skip this part iirc because R throws out missing phenotypes beforehand int oldNind = Nind; for (int i=0; i<oldNind; i++) { Nind -= ((y[i]==TRAITUNKNOWN) ? 1 : 0); } int oldNaug = Naug; for (int i=0; i<oldNaug; i++) { Naug -= ((newy[i]==TRAITUNKNOWN) ? 1 : 0); } marker = newMQMMarkerMatrix(Nmark+1,Naug); y = newvector(Naug); ivector ind = newivector(Naug); vector weight = newvector(Naug); int newi = 0; for (int i=0; i < oldNaug; i++) if (newy[i]!=TRAITUNKNOWN) { y[newi]= newy[i]; ind[newi]= newind[i]; weight[newi]= newweight[i]; for (int j=0; j<Nmark; j++) marker[j][newi]= newmarker[j][i]; newi++; } int diff; for (int i=0; i < (Naug-1); i++) { diff = ind[i+1]-ind[i]; if (diff>1) { for (int ii=i+1; ii<Naug; ii++){ ind[ii]=ind[ii]-diff+1; } } } //END throwing out missing phenotypes double variance=-1.0; cvector selcofactor = newcvector(Nmark); /* selected cofactors */ int dimx = designmatrixdimensions((*cofactor),Nmark,dominance); double F1 = inverseF(1,Nind-dimx,alfa,verbose); double F2 = inverseF(2,Nind-dimx,alfa,verbose); if (verbose) { Rprintf("INFO: dimX: %d, nInd: %d\n",dimx,Nind); Rprintf("INFO: F(Threshold, Degrees of freedom 1, Degrees of freedom 2) = Alfa\n"); Rprintf("INFO: F(%.3f, 1, %d) = %f\n",ftruncate3(F1),(Nind-dimx),alfa); Rprintf("INFO: F(%.3f, 2, %d) = %f\n",ftruncate3(F2),(Nind-dimx),alfa); } F2 = 2.0* F2; // 9-6-1998 using threshold x*F(x,df,alfa) weight[0]= -1.0; double logL = QTLmixture(marker,(*cofactor),r,position,y,ind,Nind,Naug,Nmark,&variance,em,&weight,useREML,fitQTL,dominance,crosstype, &warned, verbose); if(verbose){ if (!R_finite(logL)) { Rprintf("WARNING: Log-likelihood of full model = INFINITE\n"); }else{ if (R_IsNaN(logL)) { Rprintf("WARNING: Log-likelihood of full model = NOT A NUMBER (NAN)\n"); }else{ Rprintf("INFO: Log-likelihood of full model = %.3f\n",ftruncate3(logL)); } } Rprintf("INFO: Residual variance = %.3f\n",ftruncate3(variance)); Rprintf("INFO: Trait mean= %.3f; Trait variation = %.3f\n",ftruncate3(ymean),ftruncate3(yvari)); } if (R_finite(logL) && !R_IsNaN(logL)) { if(Backwards==1){ // use only selected cofactors logL = backward(Nind, Nmark, (*cofactor), marker, y, weight, ind, Naug, logL,variance, F1, F2, &selcofactor, r, position, &informationcontent, mapdistance,&Frun,run,useREML,fitQTL,dominance, em, windowsize, stepsize, stepmin, stepmax,crosstype,verbose); }else{ // use all cofactors logL = mapQTL(Nind, Nmark, (*cofactor), (*cofactor), marker, position,(*mapdistance), y, r, ind, Naug, variance, 'n', &informationcontent,&Frun,run,useREML,fitQTL,dominance, em, windowsize, stepsize, stepmin, stepmax,crosstype,verbose); // printout=='n' } } // Write output and/or send it back to R // Cofactors that made it to the final model for (int j=0; j<Nmark; j++) { if (selcofactor[j]==MCOF) { (*cofactor)[j]=MCOF; }else{ (*cofactor)[j]=MNOCOF; } } if (verbose) Rprintf("INFO: Number of output datapoints: %d\n", Nsteps); // QTL likelihood for each location for (int ii=0; ii<Nsteps; ii++) { //Convert LR to LOD before sending back QTL[0][ii] = Frun[ii][0] / 4.60517; QTL[0][Nsteps+ii] = informationcontent[ii]; } return logL; }