bool LUdecomposition(dmatrix m, int dim, ivector ndx, int *d) { int r, c, rowmax, i; double max, temp, sum; dvector swap = newdvector(dim); dvector scale = newdvector(dim); *d=1; for (r=0; r<dim; r++) { for (max=0.0, c=0; c<dim; c++){ if ((temp=fabs(m[r][c])) > max){ max=temp; } } if (max==0.0){ cout << "Singular matrix" << endl; return false; } scale[r]=1.0/max; } for (c=0; c<dim; c++) { for (r=0; r<c; r++) { for (sum=m[r][c], i=0; i<r; i++) sum-= m[r][i]*m[i][c]; m[r][c]=sum; } for (max=0.0, rowmax=c, r=c; r<dim; r++) { for (sum=m[r][c], i=0; i<c; i++) sum-= m[r][i]*m[i][c]; m[r][c]=sum; if ((temp=scale[r]*fabs(sum)) > max) { max=temp; rowmax=r; } } if (max==0.0){ cout << "Singular matrix" << endl; return false; } if (rowmax!=c) { swap=m[rowmax]; m[rowmax]=m[c]; m[c]=swap; scale[rowmax]=scale[c]; (*d)= -(*d); } ndx[c]=rowmax; temp=1.0/m[c][c]; for(r=c+1; r<dim; r++){ m[r][c]*=temp; } } freevector((void*)scale); freevector((void*)swap); return true; }
double likelihoodbyem(uint nvariables,uint nsamples, dmatrix x, dvector w, dvector y){ uint maxemcycles = 1000; uint emcycle = 0; double delta = 1.0f; double logL = 0.0f; double logLprev = 0.0f; dvector Fy = newdvector(nsamples); //printdmatrix(x,nsamples,nvariables); while((emcycle<maxemcycles) && (delta>1.0e-5)){ logL = multivariateregression(nvariables,nsamples,x,w,y,Fy); for(uint s=0;s<nsamples;s++){ w[s] = (w[s]+Fy[s])/w[s]; } delta= abs(logL-logLprev); logLprev=logL; emcycle++; } freevector((void*)Fy); cout << "[EM algorithm]\tFinished with "<< logL <<" after " << emcycle << "/" << maxemcycles << " cycles" << endl; return logL; }
void LUinvert(dmatrix lu, dmatrix inv, int dim, int *ndx){ int r,c; dvector b = newdvector(dim); for (c=0; c<dim; c++){ b[c]=1.0; LUsolve(lu,dim,ndx,b); for (r=0; r<dim; r++) inv[r][c]= b[r]; } freevector((void*)b); }
double multivariateregression(uint nvariables, uint nsamples, dmatrix x, dvector w, dvector y, dvector Fy, bool nullmodel, ivector nullmodellayout,int verbose){ dmatrix Xt = translatematrix(nvariables,nsamples,x,verbose); dvector XtWY = calculateparameters(nvariables,nsamples,Xt,w,y,verbose); if(nullmodel){ for (uint i=1; i < nvariables; i++){ if(nullmodellayout[(i-1)] == 1){ //SHIFTED Because the nullmodel has always 1 parameter less (The first parameter estimated mean) XtWY[i] = 0.0; } } } if(verbose){ Rprintf("Estimated parameters:\n"); printdvector(XtWY,nvariables); } dvector fit = newdvector(nsamples); dvector residual = newdvector(nsamples); double variance = calculatestatistics(nvariables, nsamples, Xt, XtWY, y, w, &fit, &residual,verbose); double logLQTL = calculateloglikelihood(nsamples, residual, w, variance, &Fy, verbose); if(verbose){ Rprintf("Estimated response:\n"); printdvector(fit,nsamples); Rprintf("Residuals:\n"); printdvector(residual,nsamples); Rprintf("Estimated Fy:\n"); printdvector(Fy,nsamples); Rprintf("Variance: %f\n",variance); Rprintf("Loglikelihood QTL: %f\n",logLQTL); } freematrix((void**)Xt,nvariables); freevector((void*)XtWY); freevector((void*)fit); freevector((void*)residual); return logLQTL; }
void backwardelimination(uint nvariables,uint nsamples, dmatrix x, dvector w, dvector y){ bool finished = false; uint leastinterestingmodel; double logLfull = likelihoodbyem(nvariables,nsamples,x,w,y); bvector model = newbvector(nvariables); double dropneeded = 2*inverseF(2,nsamples-nvariables,0.005); cout << "Likelihood of the full model: " << logLfull << endl; while((!finished) && modelsize(nvariables,model) > 1){ cout << "modelsize(model) = " << modelsize(nvariables,model) << "Drop " << dropneeded <<endl; dvector logL = newdvector(modelsize(nvariables,model)); for(uint todrop=0;todrop<modelsize(nvariables,model);todrop++){ bvector tempmodel = newbvector(nvariables); copybvector(nvariables,model,tempmodel); dropterm(nvariables,tempmodel,todrop); dmatrix designmatrix = createdesignmatrix(nvariables,nsamples,x,tempmodel); logL[todrop] = likelihoodbyem(modelsize(nvariables,tempmodel),nsamples,designmatrix,w,y); freematrix((void**)designmatrix,nsamples); freevector((void*)tempmodel); } leastinterestingmodel = lowestindex(modelsize(nvariables,model),logL); cout << "Least interesting model:" << leastinterestingmodel << " Difference to fullmodel:" << (logLfull - logL[leastinterestingmodel]) << endl; if(dropneeded > fabs(logLfull - logL[leastinterestingmodel])){ dropterm(nvariables,model,leastinterestingmodel); logLfull = logL[leastinterestingmodel]; cout << "Drop variable" << leastinterestingmodel << endl; cout << "Likelihood of the new full model: " << logLfull<< endl; }else{ for(uint x=0;x<nvariables;x++){ if(model[x]) cout << "Variable" << x << "In Model" << endl; } finished=true; } } }
/* * ML estimation of recombination frequencies via EM; calculation of multilocus * genotype probabilities; ignorance of unlikely genotypes. Called by the * mqmscan. maximum-likelihood estimation of recombination frequencies via the * EM algorithm, using multilocus information (default: the recombination * frequencies are not estimated but taken from mqm.in) * * When reestimate is 'n' the method is skipped */ double rmixture(MQMMarkerMatrix marker, vector weight, vector r, cvector position, ivector ind, int Nind, int Naug, int Nmark, vector *mapdistance, char reestimate, MQMCrossType crosstype, int verbose) { int i, j; int iem= 0; double Nrecom, oldr=0.0, newr, rdelta=1.0; double maximum = 0.0; float last_step = 0.0; vector indweight; indweight = newvector(Nind); vector distance; distance = newvector(Nmark+1); if (reestimate=='n') { if (verbose==1) { Rprintf("INFO: recombination parameters are not re-estimated\n"); } for (j=0; j<Nmark; j++) { if (maximum < (*mapdistance)[j]) { maximum = (*mapdistance)[j]; } } } else { if (verbose==1) { Rprintf("INFO: recombination parameters are re-estimated\n"); } //Reestimation of map now works while ((iem<1000)&&(rdelta>0.0001)) { iem+=1; rdelta= 0.0; /* calculate weights = conditional genotype probabilities */ for (i=0; i<Naug; i++) weight[i]=1.0; for (j=0; j<Nmark; j++) { if ((position[j]==MLEFT)||(position[j]==MUNLINKED)) for (i=0; i<Naug; i++) if (marker[j][i]==MH) weight[i]*= 0.5; else weight[i]*= 0.25; if ((position[j]==MLEFT)||(position[j]==MMIDDLE)) for (i=0; i<Naug; i++) { double calc_i = left_prob(r[j],marker[j][i],marker[j+1][i],crosstype); //double calc_i = prob(marker, r, i, j, marker[j+1][i], crosstype, 0); weight[i]*=calc_i; } } for (i=0; i<Nind; i++) { indweight[i]= 0.0; } for (i=0; i<Naug; i++) { indweight[ind[i]]+=weight[i]; } for (i=0; i<Naug; i++) { weight[i]/=indweight[ind[i]]; } for (j=0; j<Nmark; j++) { if ((position[j]==MLEFT)||(position[j]==MMIDDLE)) { newr= 0.0; for (i=0; i<Naug; i++) { Nrecom= fabs((double)(marker[j][i]-marker[j+1][i])); if ((marker[j][i]==MH)&&(marker[j+1][i]==MH)) Nrecom= 2.0*r[j]*r[j]/(r[j]*r[j]+(1-r[j])*(1-r[j])); newr+= Nrecom*weight[i]; } if (reestimate=='y' && position[j]!=MRIGHT) { //only update if it isn't the last marker of a chromosome ;) oldr=r[j]; r[j]= newr/(2.0*Nind); rdelta+=pow(r[j]-oldr, 2.0); } else rdelta+=0.0; } } } /* print new estimates of recombination frequencies */ //Rprintf("INFO: Reestimate? %c\n", reestimate); //Rprintf("INFO: looping over all markers %d\n", Nmark); for (j=0; j<Nmark; j++) { if (position[j+1]==MRIGHT) { last_step = (*mapdistance)[j+1]-(*mapdistance)[j]; } if (position[j]!=MLEFT) { if (position[j]!=MRIGHT) { (*mapdistance)[j]= -50*log(1-2.0*r[j])+(*mapdistance)[j-1]; } else { (*mapdistance)[j]= (*mapdistance)[j-1]+last_step; } } else { (*mapdistance)[j]= -50*log(1-2.0*r[j]); } if (maximum < (*mapdistance)[j]) { maximum = (*mapdistance)[j]; } //Rprintf("r(%d)= %f -> %f\n", j, r[j], (*mapdistance)[j]); } } if (verbose==1) { Rprintf("INFO: Re-estimation of the genetic map took %d iterations, to reach a rdelta of %f\n", iem, rdelta); } Free(indweight); freevector(distance); return maximum; }
/* * mapQTL moves a QTL along the chromosome and calculated at each map position * the QTL likelihood. Uses either all cofactors, or selected cofactors only */ double mapQTL(int Nind, int Nmark, cvector cofactor, cvector selcofactor, MQMMarkerMatrix marker, cvector position, vector mapdistance, vector y, vector r, ivector ind, int Naug, double variance, char printoutput, vector *informationcontent, matrix *Frun, int run, char REMLorML, bool fitQTL, bool dominance, int em, double windowsize, double stepsize, double stepmin, double stepmax, MQMCrossType crosstype, int verbose) { //Rprintf("INFO: mapQTL function called.\n"); int j, jj, jjj=0; int Nloci = Nmark+1; vector Fy = newvector(Naug); cvector QTLcofactor = newcvector(Nloci); cvector saveQTLcofactor = newcvector(Nloci); double infocontent; vector info0 = newvector(Nind); vector info1 = newvector(Nind); vector info2 = newvector(Nind); vector weight = newvector(Naug); weight[0]= -1.0; /* fit QTL on top of markers (but: should also be done with routine QTLmixture() for exact ML) */ cvector newcofactor= newcvector(Nmark); cvector direction = newcvector(Nmark); vector cumdistance = newvector(Nmark+1); double QTLlikelihood=0.0; for (j=0; j<Nmark; j++) { if (position[j]==MLEFT) cumdistance[j]= -50*log(1-2.0*r[j]); else if (position[j]==MMIDDLE) cumdistance[j]= cumdistance[j-1]-50*log(1-2.0*r[j]); } double savelogL=0.0; // log-likelihood of model with all selected cofactors /* fit QTL on top of markers (full ML) fit QTL between markers (full ML) */ // cout << "please wait (mixture calculus may take quite a lot of time)" << endl; /* estimate variance in mixture model with all marker cofactors */ // cout << "estimate variance in mixture model with all cofactors" << endl; variance= -1.0; savelogL= 2.0*QTLmixture(marker, cofactor, r, position, y, ind, Nind, Naug, Nmark, &variance, em, &weight, REMLorML, fitQTL, dominance, crosstype, verbose); if (verbose==1){ info("INFO: log-likelihood of full model= %f\n", savelogL/2); } // augment data for missing QTL observations (x 3) fitQTL=true; int newNaug = 3 * Naug; Free(weight); weight = newvector(newNaug); weight[0] = 1.0; vector weight0 = newvector(newNaug); weight0[0] = -1.0; vector QTLr = newvector(Nloci); vector QTLmapdistance = newvector(Nloci); cvector QTLposition = newcvector(Nloci); MQMMarkerMatrix QTLloci = (MQMMarkerMatrix)Calloc(Nloci, MQMMarkerVector); double moveQTL = stepmin; char nextinterval= 'n', firsttime='y'; double maxF=0.0, savebaseNoQTLModel=0.0; int baseNoQTLModel=0, step=0; for (j=0; j<Nmark; j++) { /* fit a QTL in two steps: 1. move QTL along marker interval j -> j+1 with steps of stepsize=20 cM, starting from -20 cM up to 220 cM 2. all marker-cofactors in the neighborhood of the QTL are dropped by using cM='windows' as criterium */ nextinterval= 'n'; #ifndef STANDALONE R_CheckUserInterrupt(); /* check for ^C */ R_FlushConsole(); #endif while (nextinterval=='n') { // step 1: // Rprintf("DEBUG testing STEP 1"); if (position[j]==MLEFT) { if (moveQTL<=mapdistance[j]) { QTLposition[j]= position[j]; QTLposition[j+1]= MMIDDLE; QTLr[j]= recombination_frequentie((mapdistance[j]-moveQTL)); QTLr[j+1]= r[j]; QTLloci[j+1]= marker[j]; QTLloci[j]= marker[Nloci-1]; QTLmapdistance[j]= moveQTL; QTLmapdistance[j+1]= mapdistance[j]; if (firsttime=='y') weight[0]= -1.0; moveQTL+= stepsize; } else if (moveQTL<=mapdistance[j+1]) { QTLposition[j]= position[j]; QTLposition[j+1]= MMIDDLE; QTLr[j]= recombination_frequentie((moveQTL-mapdistance[j])); QTLr[j+1]= recombination_frequentie((mapdistance[j+1]-moveQTL)); //r[j]; QTLloci[j]= marker[j]; QTLloci[j+1]= marker[Nloci-1]; QTLmapdistance[j]= mapdistance[j]; QTLmapdistance[j+1]= moveQTL; moveQTL+= stepsize; } else nextinterval= 'y'; } else if (position[j]==MMIDDLE) { if (moveQTL<=mapdistance[j+1]) { QTLposition[j]= position[j]; QTLposition[j+1]= MMIDDLE; QTLr[j]= recombination_frequentie((moveQTL-mapdistance[j])); //0.0; QTLr[j+1]= recombination_frequentie((mapdistance[j+1]-moveQTL)); //r[j]; QTLloci[j]= marker[j]; QTLloci[j+1]= marker[Nloci-1]; QTLmapdistance[j]= mapdistance[j]; QTLmapdistance[j+1]= moveQTL; moveQTL+= stepsize; } else nextinterval= 'y'; } else if (position[j]==MRIGHT) { if (moveQTL<=stepmax) { QTLposition[j]= MMIDDLE; QTLposition[j+1]= MRIGHT; QTLr[j]= recombination_frequentie((moveQTL-mapdistance[j])); //0.0; QTLr[j+1]= r[j]; // note r[j]=999.0 QTLloci[j]= marker[j]; QTLloci[j+1]= marker[Nloci-1]; QTLmapdistance[j]= mapdistance[j]; QTLmapdistance[j+1]= moveQTL; moveQTL+= stepsize; } else { nextinterval= 'y'; moveQTL= stepmin; } } else if (position[j]==MUNLINKED) { QTLposition[j]= MLEFT; QTLposition[j+1]= MRIGHT; //position[j] ?? MRIGHT ? QTLr[j]= 0.0; QTLr[j+1]= r[j]; QTLloci[j+1]= marker[j]; QTLloci[j]= marker[Nloci-1]; QTLmapdistance[j]= mapdistance[j]; QTLmapdistance[j+1]= mapdistance[j]; if (firsttime=='y') weight[0]= -1.0; nextinterval= 'y'; moveQTL= stepmin; } if (nextinterval=='n') { // QTLcofactor[j]= MAA; // QTLcofactor[j+1]= MAA; for (jj=0; jj<j; jj++) { QTLposition[jj]= position[jj]; QTLr[jj]= r[jj]; QTLloci[jj]= marker[jj]; QTLmapdistance[jj]= mapdistance[jj]; QTLcofactor[jj]= selcofactor[jj]; } for (jj=j+1; jj<Nmark; jj++) { QTLposition[jj+1]= position[jj]; QTLr[jj+1]= r[jj]; QTLloci[jj+1]= marker[jj]; QTLcofactor[jj+1]= selcofactor[jj]; QTLmapdistance[jj+1]= mapdistance[jj]; QTLcofactor[jj+1]= selcofactor[jj]; } // step 2: // Rprintf("DEBUG testing STEP 2"); if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) { QTLcofactor[j]= MNOCOF; QTLcofactor[j+1]= (((QTLmapdistance[j+1]-QTLmapdistance[j])<windowsize) ? MNOCOF : selcofactor[j]); } else { QTLcofactor[j+1]= MNOCOF; QTLcofactor[j]= (((QTLmapdistance[j+1]-QTLmapdistance[j])<windowsize) ? MNOCOF : selcofactor[j]); } if ((position[j]==MLEFT)||(position[j]==MMIDDLE)) { jjj=j+2; while (QTLposition[jjj]==MMIDDLE) { if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) QTLcofactor[jjj]= (((QTLmapdistance[jjj]-QTLmapdistance[j])<windowsize) ? MNOCOF : QTLcofactor[jjj]); else QTLcofactor[jjj]= (((QTLmapdistance[jjj]-QTLmapdistance[j+1])<windowsize) ? MNOCOF : QTLcofactor[jjj]); jjj++; } QTLcofactor[jjj]= (((QTLmapdistance[jjj]-QTLmapdistance[j+1])<windowsize) ? MNOCOF : QTLcofactor[jjj]); } if ((position[j]==MMIDDLE)||(position[j]==MRIGHT)) { jjj=j-1; while (QTLposition[jjj]==MMIDDLE) { QTLcofactor[jjj]= (((QTLmapdistance[j+1]-QTLmapdistance[jjj])<windowsize) ? MNOCOF : QTLcofactor[jjj]); jjj--; } QTLcofactor[jjj]= (((QTLmapdistance[j+1]-QTLmapdistance[jjj])<windowsize) ? MNOCOF : QTLcofactor[jjj]); } // fit no-QTL model at current map position (cofactors only) if (firsttime=='y') { for (jj=0; jj<Nloci; jj++) saveQTLcofactor[jj]= QTLcofactor[jj]; baseNoQTLModel=1; firsttime='n'; } else { baseNoQTLModel=0; for (jj=0; jj<Nloci; jj++) baseNoQTLModel+= (saveQTLcofactor[jj]==QTLcofactor[jj] ? 0 : 1); } // Rprintf("fitting NO-QTL model\n"); if (baseNoQTLModel!=0) { // new base no-QTL model if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) QTLcofactor[j]= MSEX; else QTLcofactor[j+1]= MSEX; // Rprintf("INFO: Before base model\n", QTLlikelihood/-2); QTLlikelihood= -2.0*QTLmixture(QTLloci, QTLcofactor, QTLr, QTLposition, y, ind, Nind, Naug, Nloci, &variance, em, &weight0, REMLorML, fitQTL, dominance, crosstype, verbose); // Rprintf("INFO: log-likelihood of NO QTL model= %f\n", QTLlikelihood/-2); weight0[0]= -1.0; savebaseNoQTLModel= QTLlikelihood; if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) QTLcofactor[j]= MNOCOF; else QTLcofactor[j+1]= MNOCOF; for (jj=0; jj<Nloci; jj++) saveQTLcofactor[jj]= QTLcofactor[jj]; } else QTLlikelihood= savebaseNoQTLModel; // fit QTL-model (plus cofactors) at current map position // MNOTAA= QTL if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) QTLcofactor[j]= MQTL; else QTLcofactor[j+1]= MQTL; if (REMLorML==MH) weight[0]= -1.0; QTLlikelihood+=2.0*QTLmixture(QTLloci, QTLcofactor, QTLr, QTLposition, y, ind, Nind, Naug, Nloci, &variance, em, &weight, REMLorML, fitQTL, dominance, crosstype, verbose); //this is the place we error at, because the likelihood is not correct. if (QTLlikelihood<-0.05) { Rprintf("WARNING: Negative QTLlikelihood=%f versus BASE MODEL: %f\nThis applies to the QTL at %d\n", QTLlikelihood, (savebaseNoQTLModel/-2), j); //return 0;} } maxF= (maxF<QTLlikelihood ? QTLlikelihood : maxF); if (run>0) (*Frun)[step][run]+= QTLlikelihood; else (*Frun)[step][0]+= QTLlikelihood; /* Each individual has condition multilocus probabilities for being 0, 1 or 2 at the QTL. Calculate the maximum per individu. Calculate the mean of this maximum, averaging over all individuals This is the information content plotted. */ infocontent= 0.0; for (int i=0; i<Nind; i++) { info0[i]= 0.0; // qq info1[i]= 0.0; // Qq info2[i]= 0.0; // QQ } for (int i=0; i<Naug; i++) { info0[ind[i]]+= weight[i]; info1[ind[i]]+= weight[i+Naug]; info2[ind[i]]+= weight[i+2*Naug]; } for (int i=0; i<Nind; i++) if (info0[i]<info1[i]) infocontent+= (info1[i]<info2[i] ? info2[i] : info1[i]); else infocontent+= (info0[i]<info2[i] ? info2[i] : info0[i]); (*informationcontent)[step]+=infocontent/Nind; step++; } } } fitQTL=false; freevector(direction); Free(info0); Free(info1); Free(info2); Free(weight); Free(weight0); Free(QTLr); Free(QTLposition); Free(Fy); Free(newcofactor); Free(QTLcofactor); Free(cumdistance); Free(QTLmapdistance); Free(QTLloci); Free(saveQTLcofactor); return maxF; //QTLlikelihood; }
main() { FILE *fpt; // define a pointer to a file for reading*/ FILE *fmodelmat; // define a pointer to a file for writing the array modelmat int i,j,jseed,k,l,nrow = 100, p=15, ncolumn, dimgam, prop, itno = p*3276, g=100, *indices, ok, *varin, *varout ; double Xfull[nrow][p+1], XTfull[p+1][nrow], XTXfull[p+1][p+1], y[nrow], XTyfull[p+1]; double *XTXg, *XTyg; double meany,TSS,SSRgamma,Rsqgamma, logMHratio,logmarggammaold,logmarggammanew,dif; int modelmat[p], tempvec[p]; //counter[(int)(pow(2,p))], totalunique, modelunique[(int)(pow(2,p))], tempint; time_t start, end; struct timeval start_time, end_time; double total_usecs; char filename[50]; for (jseed=1; jseed<101; jseed++) { srand(jseed); sprintf (filename,"sim-rs-thin.%d.dat",jseed); //printf ("Filename is %s \n",filename); fmodelmat = fopen(filename,"w"); /* First, call gettimeofday() to get start time */ gettimeofday(&start_time, (struct timeval*)0); fpt = fopen("simcen-x.txt","r"); // open file for reading only for (i=0; i<nrow; i++) { for (j=0; j<p; j++) { fscanf(fpt,"%lf",& Xfull[i][j+1]); } } fclose(fpt); // close the data file fpt = fopen("simcen-y.txt","r"); // open file for reading only for (i=0; i<nrow; i++) { fscanf(fpt,"%lf",& y[i]); } fclose(fpt); // close the data file for (k=0; k<nrow; k++) {Xfull[k][0] = 1; } for (j=0; j<(p+1); j++) { for(k=0; k<nrow; k++) {XTfull[j][k] = Xfull[k][j];} // 1. find XTfull } for (j=0; j<(p+1); j++) { for (k=0; k<(p+1); k++) { XTXfull[j][k] = 0.0; for(l=0; l<nrow; l++) { XTXfull[j][k] += XTfull[j][l]*Xfull[l][k]; // 2. find XTXfull } } } for (j=0; j<(p+1); j++) { XTyfull[j] = 0.0; for (k=0; k<nrow; k++) { XTyfull[j] += XTfull[j][k]*y[k]; // 3. find XTyfull } } double sumy = 0.0; for (j=0; j<nrow; j++) {sumy += y[j];} meany = sumy/(double)(nrow); double sumysq = 0.0; for (j=0; j<nrow; j++) {sumysq += y[j]*y[j];} TSS = sumysq - (double)(nrow)*(meany*meany); // printf("Total Sum of squares is %lf \n",TSS); for (j=0; j<p; j++) { modelmat[j] = 0 ;// initializing modelmat } for (j=0; j<p; j++) { fprintf(fmodelmat,"%d \t",modelmat[j]) ; } logmarggammaold = 0.0; fprintf(fmodelmat,"%lf \t",logmarggammaold); fprintf(fmodelmat,"\n"); for (i=1; i<itno; i++) // starting iterations for MH Algorithm { dimgam = 0; for (j=0; j<p; j++) {dimgam += modelmat[j];} // dimension of current model for (j=0; j<p; j++) // tempvec initially is the model from the previous iteration { tempvec[j] = modelmat[j]; } if (dimgam==0) {prop = 0;} else if (dimgam==p) {prop = 0;} else {prop = (rand())%2;} // prop: picks randomly from the i)add/delete or ii)swap proposal // printf("prop is %d \n",prop); if (prop==0) { int index = (rand())%p; // index :corresponds to the indicator gamma_j chosen by the proposal q in MH tempvec[index] = 1 - tempvec[index]; // now tempvec is changed by one bit as in MC^3 for the proposed move } else if (prop==1) { varin = ivector(dimgam) ;//vector of included variables in current model (don't consider intercept) j = 0; k = 0; while (j<p && k<dimgam) { if (modelmat[j]==1) {varin[k] = j; k++;} j++ ; } varout = ivector(p-dimgam) ;//vector of excluded variables in current model (don't consider intercept) j = 0; k = 0; while (j<p && k<(p-dimgam)) { if (modelmat[j]==0) {varout[k] = j; k++;} j++ ; } int swapin = (rand())%dimgam; // swapin :corresponds to position of randomly chosen included variable int swapout = (rand())%(p-dimgam);// swapout :corresponds to position of randomly chosen excluded variable tempvec[varin[swapin]] = 0; tempvec[varout[swapout]] =1; } ncolumn = 0; for (j=0; j<p; j++) {ncolumn += tempvec[j];} // printf("Dimesion of model is %d \n",ncolumn); indices = ivector(ncolumn) ;//indices indicates the position of the nonzero gamma_j's, it always has 0 as the first argument j = 0; k = 0; while (j<p && k<ncolumn) { if (tempvec[j]==1) {indices[k] = j+1; k++;} j++ ; } // need to compute R^2_gamma and eventually the marg lik under g-prior // First trying to compute betahat_gamma using lapack // 5. Compute Rsqgamma as follows: // a. Transpose (Xgamma^T*y ) to get y^T* Xgamma // b. Multiply y^T* Xgamma by betagammahat // 6. Calculate marggamma (marginal likelihood under model gamma) XTXg = vector(ncolumn*ncolumn); XTyg = vector(ncolumn); // allocate memory for XTXg,XTyg for (j=0; j<(ncolumn); j++) { XTyg[j] = XTyfull[indices[j]]; //printf("XTyg[%d] is %lf \n",j,XTyg[j]); for (k=0; k<(ncolumn); k++) { XTXg[j*ncolumn+k] = XTXfull[indices[k]][indices[j]]; } } int c2 = 1; int *pivot; if (ncolumn == 0) { logmarggammanew = 0.0; } else { pivot = ivector(ncolumn); //allocate memory //printf("%d \n",ncolumn); dgesv_(&ncolumn, &c2, XTXg, &ncolumn, pivot, XTyg, &ncolumn, &ok); // replaces XTyg by the soln i.e.(XTXg)^(-1)*(XTyg) // 5b. Multiply yTX by betagammahat to get SSRgamma(SS due to Regression) SSRgamma = 0.0; for (j=0; j<(ncolumn); j++){SSRgamma += XTyg[j]*XTyfull[indices[j]];} Rsqgamma = SSRgamma/ TSS; logmarggammanew = .5*((double)(log(1 + g)) * (double) (nrow - ncolumn - 1) - log(1.0 + (double)(g)*(1.0 - Rsqgamma)) * (double)(nrow-1)); freevector(pivot); } // MH step if (dimgam==0 | dimgam==p) {logMHratio = (double)log(0.5) + logmarggammanew - logmarggammaold;} else if (ncolumn==0 | ncolumn==p) {logMHratio = (double)log(2.0) + logmarggammanew - logmarggammaold;} else {logMHratio = logmarggammanew - logmarggammaold;} double randnum = (double)(rand())/RAND_MAX; if (log(randnum) <= logMHratio) { for (j=0; j<p; j++) { modelmat[j] = tempvec[j]; // accepting the move with prob MHratio } logmarggammaold = logmarggammanew; } if(i%p == 0) // storing every pth { for (j=0; j<p; j++) { fprintf(fmodelmat,"%d \t",modelmat[j]); } fprintf(fmodelmat,"%lf \t",logmarggammaold); fprintf(fmodelmat,"\n") ; if (i%(p*10000)==0){printf("%d \n",i/p);} } //printf("%d \n",bintoint(p,modelmat)); freevector(varin); freevector(varout); freevector(indices); // free memory freevector(XTXg); freevector(XTyg); //////////////////////////////////////////////////////////////////////////// } // end of the MCMC i-iterations loop for the MH Algorithm /* Now call gettimeofday() to get end time */ gettimeofday(&end_time, (struct timeval*)0); /* after time */ /* Print the execution time */ total_usecs = (end_time.tv_sec-start_time.tv_sec) + (end_time.tv_usec-start_time.tv_usec)/1000000.0; //printf("Total time was %lf Sec.\n", total_usecs); fclose(fmodelmat); // finished writing the file printf("Finished replicate %d \n",jseed) ; } // end jseed }
int transform(const char * source, const char * destination, const char * output){ char src_pts_name[256]; char dest_pts_name[256]; char out_param_name[256]; int n=3; int m=0; int m2=0; int k,l; double **src_mat=NULL; double **dest_mat=NULL; double **dest_mat_T=NULL; double **src_mat_T=NULL; double **E_mat=NULL; double **C_mat=NULL; double **C_mat_interm=NULL; double **D_mat_interm=NULL; double **P_mat=NULL; double *D_vec=NULL; double *T_vec=NULL; double *one_vec=NULL; double **D_mat=NULL; double **Q_mat=NULL; double **P_mat_T=NULL; double **R_mat=NULL; double trace1=0.0; double trace2=0.0; double scal=0.0; double ppm=0.0; FILE *outfile; printf("\n*******************************\n"); printf( "* helmparms3d v%1.2f *\n",VERS); printf( "* (c) U. Niethammer 2011 *\n"); printf( "* http://helmparms3d.sf.net *\n"); printf( "*******************************\n"); memset(src_pts_name,0,sizeof(src_pts_name)); memset(dest_pts_name,0,sizeof(dest_pts_name)); memset(out_param_name,0,sizeof(out_param_name)); strcpy(src_pts_name, source); strcpy(dest_pts_name, destination); strcpy(out_param_name, output); m=get_m_size(src_pts_name); m2=get_m_size(dest_pts_name); if(m2!=m){ printf("Error, number of source and destination points is not equal!\n"); } else { src_mat=matrix(m,m, src_mat); dest_mat=matrix(m,m, dest_mat); read_points(src_pts_name, src_mat); read_points(dest_pts_name, dest_mat); D_vec=vector(n, D_vec); E_mat=matrix(m, m, E_mat); P_mat=matrix(m, m, P_mat); D_mat=matrix(m, m, D_mat); Q_mat=matrix(m, m, Q_mat); P_mat_T=matrix(m, m, P_mat_T); R_mat=matrix(m, m, R_mat); dest_mat_T=matrix(m, m, dest_mat_T); C_mat=matrix(m, m, C_mat); C_mat_interm=matrix(m, m, C_mat_interm); src_mat_T=matrix(m, m, src_mat_T); D_mat_interm=matrix(m, m, D_mat_interm); transpose_matrix(m, m, dest_mat, dest_mat_T); if(debug)printf("%s_T:\n",dest_pts_name); if(debug)plot_matrix(stdout, n, m, dest_mat_T); for(k=0;k<m;k++){ for(l=0;l<m;l++){ if(k!=l){ E_mat[k][l]=-1.0/(double)m; } else{ E_mat[k][l]=1.0-1.0/(double)m; } } } if(debug)printf("E:\n"); if(debug)plot_matrix(stdout, m, m, E_mat); if(debug)printf("dest_mat_T:\n"); if(debug)plot_matrix(stdout, n, m, dest_mat_T); matmult(dest_mat_T, m, m, E_mat, m, m, C_mat_interm, m, n); if(debug)printf("C_interm:\n"); if(debug)plot_matrix(stdout, n, m, C_mat_interm); matmult(C_mat_interm, n, m, src_mat, m, n, C_mat, n, n); if(debug)printf("C:\n"); if(debug)plot_matrix(stdout, n, n, C_mat); copy_matrix(n,n,C_mat,P_mat); if(debug)printf("P:\n"); if(debug)plot_matrix(stdout, n, n, P_mat); //Given matrix C[m][n], m>=n, using svd decomposition C = P D Q' to get P[m][n], diag D[n] and Q[n][n]. svd(n, n, C_mat, P_mat, D_vec, Q_mat); transpose_matrix(n, n, P_mat, P_mat_T); if(debug)printf("P\n"); if(debug)plot_matrix(stdout, n, n, P_mat); if(debug)printf("P_T\n"); if(debug)plot_matrix(stdout, n, n, P_mat_T); if(debug)printf("D_vec\n"); if(debug)plot_vector(stdout, n, D_vec); for(k=0;k<n;k++){ for(l=0;l<n;l++){ D_mat[k][l]=0.0; D_mat[l][l]=D_vec[l]; } } if(debug)printf("D\n"); if(debug)plot_matrix(stdout, n, n, D_mat); matmult(Q_mat, n, n, P_mat_T, n, n, R_mat, n, n); if(debug)printf("R_trans:\n"); if(debug)plot_matrix(stdout, n, n, R_mat); matmult(C_mat, m, n, R_mat, n, m, C_mat_interm, m, n); if(debug)printf("C_interm:\n"); if(debug)plot_matrix(stdout, n, n, C_mat_interm); trace1=trace(n,n,C_mat_interm); if(debug)printf("\ntra=%lf\n\n",trace1); transpose_matrix(m, m, src_mat, src_mat_T); if(debug)printf("%s_T:\n",src_pts_name); if(debug)plot_matrix(stdout, n, m, src_mat_T); init_matrix(m,m,C_mat); init_matrix(m,m,C_mat_interm); matmult(src_mat_T, m, m, E_mat, m, m, C_mat_interm, n, n); if(debug)printf("C_interm:\n"); if(debug)plot_matrix(stdout, n, m, C_mat_interm); matmult(C_mat_interm, n, m, src_mat, m, n, C_mat, n, n); if(debug)printf("C:\n"); if(debug)plot_matrix(stdout, n, n, C_mat); trace2=trace(n,n,C_mat); if(debug)printf("\ntra=%lf\n\n",trace2); scal=trace1/trace2; ppm=scal-1.0; if(debug)printf("\nscal = %10.10lf\nscal = %10.10lf ppm\n\n",scal, ppm); init_matrix(m,m,C_mat); init_matrix(m,m,C_mat_interm); matmult(src_mat, m, n, R_mat, n,m, D_mat_interm, m, n); if(debug)printf("C_mat_interm:\n"); if(debug)plot_matrix(stdout, m, n, D_mat_interm); scal_matrix(m, n, scal, D_mat_interm, C_mat_interm); if(debug)printf("C_mat_interm:\n"); if(debug)plot_matrix(stdout, m, n, C_mat_interm); subtract_matrix(m, n, dest_mat, C_mat_interm, D_mat_interm); if(debug)plot_matrix(stdout, m, n, D_mat_interm); scal_matrix(m, n, 1.0/m, D_mat_interm, C_mat_interm); if(debug)plot_matrix(stdout, m, n, C_mat_interm); init_matrix(m,m,src_mat_T); transpose_matrix(m, m, C_mat_interm, src_mat_T); if(debug)plot_matrix(stdout, n, m, src_mat_T); T_vec=vector(m, T_vec); one_vec=vector(m, one_vec); for(k=0;k<m;k++){ one_vec[k]=1.0; } matrix_multiply(n, m, src_mat_T, one_vec, T_vec); if(debug)printf("T:\n"); if(debug)plot_vector(stdout, 3, T_vec); outfile = fopen(out_param_name, "w"); if(outfile == NULL){ printf("Error writing %s\r\n",out_param_name); exit(-1); } init_matrix(m,m,src_mat_T); transpose_matrix(m, m, R_mat, src_mat_T); plot_matrix(outfile, n, n, src_mat_T); printf("R =\n");fflush(stdout); plot_matrix(stdout, n, n, src_mat_T); printf("\n");fflush(stdout); plot_vector(outfile, 3, T_vec); printf("T =\n");fflush(stdout); plot_vector(stdout, 3, T_vec); printf("\n");fflush(stdout); fprintf(outfile, "%10.10lf\n", scal); printf("s = %10.10lf (= %10.10lf ppm)\n\n",scal, ppm);fflush(stdout); fclose(outfile); freevector(D_vec); freevector(T_vec); freevector(one_vec); freematrix(m, src_mat); freematrix(m, dest_mat); freematrix(m, E_mat); freematrix(m, P_mat); freematrix(m, D_mat); freematrix(m, Q_mat); freematrix(m, P_mat_T); freematrix(m, R_mat); freematrix(m, dest_mat_T); freematrix(m, C_mat); freematrix(m, C_mat_interm); freematrix(m, src_mat_T); freematrix(m, D_mat_interm); printf("\n...done\n"); } }
double multivariateregression(uint nvariables, uint nsamples, dmatrix x, dvector w, dvector y, dvector Fy){ int d=0; double xtwj; dmatrix Xt = newdmatrix(nvariables,nsamples); dmatrix XtWX = newdmatrix(nvariables, nvariables); dvector XtWY = newdvector(nvariables); ivector indx = newivector(nvariables); //cout << "calculating Xt" << endl; for(uint i=0; i<nsamples; i++){ for(uint j=0; j<nvariables; j++){ Xt[j][i] = x[i][j]; } } //cout << "calculating XtWX and XtWY" << endl; for(uint i=0; i<nsamples; i++){ for(uint j=0; j<nvariables; j++){ xtwj = Xt[j][i] * w[i]; XtWY[j] += xtwj * y[i]; for(uint jj=0; jj<=j; jj++){ XtWX[j][jj] += xtwj * Xt[jj][i]; } } } LUdecomposition(XtWX, nvariables, indx, &d); LUsolve(XtWX, nvariables, indx, XtWY); //cout << "Estimated parameters:" << endl; //for (uint i=0; i < nvariables; i++){ // cout << "Parameter " << i << " = " << XtWY[i] << endl; //} dvector fit = newdvector(nsamples); dvector residual = newdvector(nsamples); dvector indL = newdvector(nsamples); double variance= 0.0; double logL=0.0; for (uint i=0; i<nsamples; i++){ fit[i]= 0.0; for (uint j=0; j<nvariables; j++){ fit[i] += Xt[j][i] * XtWY[j]; residual[i] = y[i]-fit[i]; variance += w[i]*pow(residual[i],2.0); } Fy[i] = Lnormal(residual[i],variance); indL[i] += w[i]*Fy[i]; logL += log(indL[i]); } //cout << "Estimated response:" << endl; //printdvector(fit,nsamples); //cout << "Residuals:" << endl; //printdvector(residual,nsamples); //cout << "Estimated Fy:" << endl; //printdvector(Fy,nsamples); //cout << "Variance: " << variance << endl; //cout << "Loglikelihood: " << logL << endl; freematrix((void**)Xt,nvariables); freematrix((void**)XtWX, nvariables); freevector((void*)XtWY); freevector((void*)fit); freevector((void*)residual); freevector((void*)indL); return logL; }
void freematrix(void **m,uint rows) { for(size_t i = 0; i < rows; i++){ freevector(m[i]); } if(m != NULL) Free(m); }
double nullmodel(uint nvariables, uint nsamples, dmatrix x, dvector w, dvector y,ivector nullmodellayout,int verbose){ dvector Fy = newdvector(nsamples); double logL = multivariateregression(nvariables,nsamples,x,w,y,Fy,true,nullmodellayout,verbose);; freevector((void*)Fy); return logL; }