Пример #1
0
bool LUdecomposition(dmatrix m, int dim, ivector ndx, int *d) {
  int r, c, rowmax, i;
  double max, temp, sum;
  dvector swap = newdvector(dim);
  dvector scale = newdvector(dim);
  *d=1;
  for (r=0; r<dim; r++) {
    for (max=0.0, c=0; c<dim; c++){
      if ((temp=fabs(m[r][c])) > max){
        max=temp;
      }
    }
    if (max==0.0){
      cout << "Singular matrix" << endl;
      return false;
    }
    scale[r]=1.0/max;
  }
  for (c=0; c<dim; c++) {
    for (r=0; r<c; r++) {
      for (sum=m[r][c], i=0; i<r; i++) sum-= m[r][i]*m[i][c];
      m[r][c]=sum;
    }
    for (max=0.0, rowmax=c, r=c; r<dim; r++) {
      for (sum=m[r][c], i=0; i<c; i++) sum-= m[r][i]*m[i][c];
      m[r][c]=sum;
      if ((temp=scale[r]*fabs(sum)) > max) {
        max=temp;
        rowmax=r;
      }
    }
    if (max==0.0){
      cout << "Singular matrix" << endl;
      return false;
    }
    if (rowmax!=c) {
      swap=m[rowmax];
      m[rowmax]=m[c];
      m[c]=swap;
      scale[rowmax]=scale[c];
      (*d)= -(*d);
    }
    ndx[c]=rowmax;
    temp=1.0/m[c][c];
    for(r=c+1; r<dim; r++){
      m[r][c]*=temp;
    }
  }
  freevector((void*)scale);
  freevector((void*)swap);
  return true;
}
Пример #2
0
double likelihoodbyem(uint nvariables,uint nsamples, dmatrix x, dvector w, dvector y){
  uint   maxemcycles = 1000;
  uint   emcycle     = 0;
  double delta       = 1.0f;
  double logL        = 0.0f;
  double logLprev    = 0.0f;

  dvector Fy = newdvector(nsamples);
  //printdmatrix(x,nsamples,nvariables);
  while((emcycle<maxemcycles) && (delta>1.0e-5)){
    logL = multivariateregression(nvariables,nsamples,x,w,y,Fy);

    for(uint s=0;s<nsamples;s++){
      w[s] = (w[s]+Fy[s])/w[s];
    }

    delta= abs(logL-logLprev);
    logLprev=logL;
    emcycle++;
  }

  freevector((void*)Fy);

  cout << "[EM algorithm]\tFinished with "<< logL <<" after " << emcycle << "/" << maxemcycles << " cycles" << endl;
  return logL;
}
Пример #3
0
void LUinvert(dmatrix lu, dmatrix inv, int dim, int *ndx){
  int r,c;
  dvector b = newdvector(dim);
  for (c=0; c<dim; c++){
     b[c]=1.0;
     LUsolve(lu,dim,ndx,b);
     for (r=0; r<dim; r++) inv[r][c]= b[r];
  }
  freevector((void*)b);
} 
Пример #4
0
double multivariateregression(uint nvariables, uint nsamples, dmatrix x, dvector w, dvector y, dvector Fy, bool nullmodel, ivector nullmodellayout,int verbose){
  dmatrix Xt   = translatematrix(nvariables,nsamples,x,verbose);
  dvector XtWY = calculateparameters(nvariables,nsamples,Xt,w,y,verbose);

  if(nullmodel){
    for (uint i=1; i < nvariables; i++){
      if(nullmodellayout[(i-1)] == 1){ //SHIFTED Because the nullmodel has always 1 parameter less (The first parameter estimated mean)
        XtWY[i] = 0.0;
      }
    }
  }
  
  if(verbose){
    Rprintf("Estimated parameters:\n");
    printdvector(XtWY,nvariables);
  }
  
  dvector fit       = newdvector(nsamples);
  dvector residual  = newdvector(nsamples);
  double  variance  = calculatestatistics(nvariables, nsamples, Xt, XtWY, y, w, &fit, &residual,verbose);
  double  logLQTL   = calculateloglikelihood(nsamples, residual, w, variance, &Fy, verbose);
  
  if(verbose){
    Rprintf("Estimated response:\n");
    printdvector(fit,nsamples);

    Rprintf("Residuals:\n");
    printdvector(residual,nsamples);

    Rprintf("Estimated Fy:\n");
    printdvector(Fy,nsamples);

    Rprintf("Variance: %f\n",variance);
    Rprintf("Loglikelihood QTL: %f\n",logLQTL);
  }
  
  freematrix((void**)Xt,nvariables);
  freevector((void*)XtWY);
  freevector((void*)fit);
  freevector((void*)residual);

  return logLQTL;
}
Пример #5
0
void backwardelimination(uint nvariables,uint nsamples, dmatrix x, dvector w, dvector y){
  bool    finished   = false;
  uint    leastinterestingmodel;
  double  logLfull   = likelihoodbyem(nvariables,nsamples,x,w,y);
  bvector model      = newbvector(nvariables);
  double  dropneeded = 2*inverseF(2,nsamples-nvariables,0.005);
  cout << "Likelihood of the full model: " << logLfull << endl;
  while((!finished) && modelsize(nvariables,model) > 1){

    cout << "modelsize(model) = " << modelsize(nvariables,model) << "Drop " << dropneeded <<endl;
    dvector logL = newdvector(modelsize(nvariables,model));
    for(uint todrop=0;todrop<modelsize(nvariables,model);todrop++){
      bvector tempmodel = newbvector(nvariables);
      copybvector(nvariables,model,tempmodel);
      dropterm(nvariables,tempmodel,todrop);
      dmatrix designmatrix = createdesignmatrix(nvariables,nsamples,x,tempmodel);
      logL[todrop] = likelihoodbyem(modelsize(nvariables,tempmodel),nsamples,designmatrix,w,y);
      freematrix((void**)designmatrix,nsamples);
      freevector((void*)tempmodel);
    }

    leastinterestingmodel = lowestindex(modelsize(nvariables,model),logL);
    cout << "Least interesting model:" << leastinterestingmodel << " Difference to fullmodel:" << (logLfull - logL[leastinterestingmodel]) << endl;
    if(dropneeded > fabs(logLfull - logL[leastinterestingmodel])){
      dropterm(nvariables,model,leastinterestingmodel);
      logLfull = logL[leastinterestingmodel];
      cout << "Drop variable" << leastinterestingmodel << endl;
      cout << "Likelihood of the new full model: " << logLfull<< endl;
    }else{
      for(uint x=0;x<nvariables;x++){
        if(model[x]) cout << "Variable" << x << "In Model" << endl;
      }
      finished=true;
    }
  }

}
Пример #6
0
/*
 * ML estimation of recombination frequencies via EM; calculation of multilocus
 * genotype probabilities; ignorance of unlikely genotypes. Called by the
 * mqmscan.  maximum-likelihood estimation of recombination frequencies via the
 * EM algorithm, using multilocus information (default: the recombination
 * frequencies are not estimated but taken from mqm.in)
 *
 * When reestimate is 'n' the method is skipped
 */
double rmixture(MQMMarkerMatrix marker, vector weight, vector r,
                cvector position, ivector ind, int Nind, int Naug, int Nmark,
                vector *mapdistance, char reestimate, MQMCrossType crosstype,
                int verbose) {
  int i, j;
  int iem= 0;
  double Nrecom, oldr=0.0, newr, rdelta=1.0;
  double maximum = 0.0;
  float last_step = 0.0;
  vector indweight;
  indweight = newvector(Nind);
  vector distance;
  distance = newvector(Nmark+1);

  if (reestimate=='n') {
    if (verbose==1) {
      Rprintf("INFO: recombination parameters are not re-estimated\n");
    }
    for (j=0; j<Nmark; j++) {
      if (maximum < (*mapdistance)[j]) {
        maximum = (*mapdistance)[j];
      }
    }
  } else {
    if (verbose==1) {
      Rprintf("INFO: recombination parameters are re-estimated\n");
    }
    //Reestimation of map now works
    while ((iem<1000)&&(rdelta>0.0001)) {
      iem+=1;
      rdelta= 0.0;
      /* calculate weights = conditional genotype probabilities */
      for (i=0; i<Naug; i++) weight[i]=1.0;
      for (j=0; j<Nmark; j++) {
        if ((position[j]==MLEFT)||(position[j]==MUNLINKED))
          for (i=0; i<Naug; i++)
            if (marker[j][i]==MH) weight[i]*= 0.5;
            else weight[i]*= 0.25;
        if ((position[j]==MLEFT)||(position[j]==MMIDDLE))
          for (i=0; i<Naug; i++) {
            double calc_i = left_prob(r[j],marker[j][i],marker[j+1][i],crosstype);            //double calc_i = prob(marker, r, i, j, marker[j+1][i], crosstype, 0);
            weight[i]*=calc_i;
          }
      }
      for (i=0; i<Nind; i++) {
        indweight[i]= 0.0;
      }
      for (i=0; i<Naug; i++) {
        indweight[ind[i]]+=weight[i];
      }
      for (i=0; i<Naug; i++) {
        weight[i]/=indweight[ind[i]];
      }
      for (j=0; j<Nmark; j++) {
        if ((position[j]==MLEFT)||(position[j]==MMIDDLE)) {
          newr= 0.0;
          for (i=0; i<Naug; i++) {
            Nrecom= fabs((double)(marker[j][i]-marker[j+1][i]));
            if ((marker[j][i]==MH)&&(marker[j+1][i]==MH))
              Nrecom= 2.0*r[j]*r[j]/(r[j]*r[j]+(1-r[j])*(1-r[j]));
            newr+= Nrecom*weight[i];
          }
          if (reestimate=='y' && position[j]!=MRIGHT) { //only update if it isn't the last marker of a chromosome ;)
            oldr=r[j];
            r[j]= newr/(2.0*Nind);
            rdelta+=pow(r[j]-oldr, 2.0);
          } else rdelta+=0.0;
        }
      }
    }
    /*   print new estimates of recombination frequencies */
    //Rprintf("INFO: Reestimate? %c\n", reestimate);
    //Rprintf("INFO: looping over all markers %d\n", Nmark);
    for (j=0; j<Nmark; j++) {
      if (position[j+1]==MRIGHT) {
        last_step = (*mapdistance)[j+1]-(*mapdistance)[j];
      }
      if (position[j]!=MLEFT) {
        if (position[j]!=MRIGHT) {
          (*mapdistance)[j]= -50*log(1-2.0*r[j])+(*mapdistance)[j-1];
        } else {
          (*mapdistance)[j]= (*mapdistance)[j-1]+last_step;
        }
      } else {
        (*mapdistance)[j]= -50*log(1-2.0*r[j]);
      }
      if (maximum < (*mapdistance)[j]) {
        maximum = (*mapdistance)[j];
      }
      //Rprintf("r(%d)= %f -> %f\n", j, r[j], (*mapdistance)[j]);
    }
  }
  if (verbose==1) {
    Rprintf("INFO: Re-estimation of the genetic map took %d iterations, to reach a rdelta of %f\n", iem, rdelta);
  }
  Free(indweight);
  freevector(distance);
  return maximum;
}
Пример #7
0
/* 
 * mapQTL moves a QTL along the chromosome and calculated at each map position
 * the QTL likelihood. Uses either all cofactors, or selected cofactors only
 */
double mapQTL(int Nind, int Nmark, cvector cofactor, cvector selcofactor, 
              MQMMarkerMatrix marker, cvector position, vector mapdistance, vector y,
              vector r, ivector ind, int Naug, double variance, char
              printoutput, vector *informationcontent, matrix *Frun, int run,
              char REMLorML, bool fitQTL, bool dominance, int em, double
              windowsize, double stepsize, double stepmin, double stepmax, 
              MQMCrossType crosstype, int verbose) {
  //Rprintf("INFO: mapQTL function called.\n");
  int j, jj, jjj=0;
  int Nloci = Nmark+1;
  vector Fy = newvector(Naug);
  cvector QTLcofactor       = newcvector(Nloci);
  cvector saveQTLcofactor   = newcvector(Nloci);
  double infocontent;

  vector info0 = newvector(Nind);
  vector info1 = newvector(Nind);
  vector info2 = newvector(Nind);

  vector weight = newvector(Naug);
  weight[0]= -1.0;

  /* fit QTL on top of markers (but: should also be done with routine QTLmixture() for exact ML) */
  cvector newcofactor= newcvector(Nmark);
  cvector direction = newcvector(Nmark);
  vector cumdistance = newvector(Nmark+1);
  double QTLlikelihood=0.0;

  for (j=0; j<Nmark; j++) {
    if (position[j]==MLEFT)
      cumdistance[j]= -50*log(1-2.0*r[j]);
    else if (position[j]==MMIDDLE)
      cumdistance[j]= cumdistance[j-1]-50*log(1-2.0*r[j]);
  }
  double savelogL=0.0; // log-likelihood of model with all selected cofactors

  /* fit QTL on top of markers (full ML)   fit QTL between markers (full ML) */
  // cout << "please wait (mixture calculus may take quite a lot of time)" << endl;
  /* estimate variance in mixture model with all marker cofactors */
  // cout << "estimate variance in mixture model with all cofactors" << endl;

  variance= -1.0;
  savelogL= 2.0*QTLmixture(marker, cofactor, r, position, y, ind, Nind, Naug, Nmark, &variance, em, &weight, REMLorML, fitQTL, dominance, crosstype, verbose);
  if (verbose==1){ info("INFO: log-likelihood of full model= %f\n", savelogL/2); }

  // augment data for missing QTL observations (x 3)
  fitQTL=true;
  int newNaug = 3 * Naug;
  Free(weight);
  weight           = newvector(newNaug);
  weight[0]        = 1.0;
  vector weight0   = newvector(newNaug);
  weight0[0]       = -1.0;

  vector QTLr              = newvector(Nloci);
  vector QTLmapdistance    = newvector(Nloci);
  cvector QTLposition      = newcvector(Nloci);
  MQMMarkerMatrix QTLloci  = (MQMMarkerMatrix)Calloc(Nloci, MQMMarkerVector);

  double moveQTL = stepmin;
  char nextinterval= 'n', firsttime='y';
  double maxF=0.0, savebaseNoQTLModel=0.0;
  int baseNoQTLModel=0, step=0;

  for (j=0; j<Nmark; j++) {
    /* 	fit a QTL in two steps:
    1. move QTL along marker interval j -> j+1 with steps of stepsize=20 cM, starting from -20 cM up to 220 cM
    2. all marker-cofactors in the neighborhood of the QTL are dropped by using cM='windows' as criterium
    */
    nextinterval= 'n';
#ifndef STANDALONE
    R_CheckUserInterrupt(); /* check for ^C */
    R_FlushConsole();
#endif
    while (nextinterval=='n') { // step 1:
      // Rprintf("DEBUG testing STEP 1");
      if (position[j]==MLEFT) {
        if (moveQTL<=mapdistance[j]) {
          QTLposition[j]= position[j];
          QTLposition[j+1]= MMIDDLE;
          QTLr[j]= recombination_frequentie((mapdistance[j]-moveQTL));
          QTLr[j+1]= r[j];
          QTLloci[j+1]= marker[j];
          QTLloci[j]= marker[Nloci-1];
          QTLmapdistance[j]= moveQTL;
          QTLmapdistance[j+1]= mapdistance[j];
          if (firsttime=='y') weight[0]= -1.0;
          moveQTL+= stepsize;
        } else if (moveQTL<=mapdistance[j+1]) {
          QTLposition[j]= position[j];
          QTLposition[j+1]= MMIDDLE;
          QTLr[j]= recombination_frequentie((moveQTL-mapdistance[j]));
          QTLr[j+1]= recombination_frequentie((mapdistance[j+1]-moveQTL)); //r[j];
          QTLloci[j]= marker[j];
          QTLloci[j+1]= marker[Nloci-1];
          QTLmapdistance[j]= mapdistance[j];
          QTLmapdistance[j+1]= moveQTL;
          moveQTL+= stepsize;
        } else nextinterval= 'y';
      } else if (position[j]==MMIDDLE) {
        if (moveQTL<=mapdistance[j+1]) {
          QTLposition[j]= position[j];
          QTLposition[j+1]= MMIDDLE;
          QTLr[j]= recombination_frequentie((moveQTL-mapdistance[j])); //0.0;
          QTLr[j+1]= recombination_frequentie((mapdistance[j+1]-moveQTL)); //r[j];
          QTLloci[j]= marker[j];
          QTLloci[j+1]= marker[Nloci-1];
          QTLmapdistance[j]= mapdistance[j];
          QTLmapdistance[j+1]= moveQTL;
          moveQTL+= stepsize;
        } else nextinterval= 'y';
      } else if (position[j]==MRIGHT) {
        if (moveQTL<=stepmax) {
          QTLposition[j]= MMIDDLE;
          QTLposition[j+1]= MRIGHT;
          QTLr[j]= recombination_frequentie((moveQTL-mapdistance[j])); //0.0;
          QTLr[j+1]= r[j]; // note r[j]=999.0
          QTLloci[j]= marker[j];
          QTLloci[j+1]= marker[Nloci-1];
          QTLmapdistance[j]= mapdistance[j];
          QTLmapdistance[j+1]= moveQTL;
          moveQTL+= stepsize;
        } else {
          nextinterval= 'y';
          moveQTL= stepmin;
        }
      } else if (position[j]==MUNLINKED) {
        QTLposition[j]= MLEFT;
        QTLposition[j+1]= MRIGHT; //position[j] ?? MRIGHT ?
        QTLr[j]= 0.0;
        QTLr[j+1]= r[j];
        QTLloci[j+1]= marker[j];
        QTLloci[j]= marker[Nloci-1];
        QTLmapdistance[j]= mapdistance[j];
        QTLmapdistance[j+1]= mapdistance[j];
        if (firsttime=='y') weight[0]= -1.0;
        nextinterval= 'y';
        moveQTL= stepmin;
      }
      if (nextinterval=='n') { // QTLcofactor[j]= MAA;
        // QTLcofactor[j+1]= MAA;
        for (jj=0; jj<j; jj++) {
          QTLposition[jj]= position[jj];
          QTLr[jj]= r[jj];
          QTLloci[jj]= marker[jj];
          QTLmapdistance[jj]= mapdistance[jj];
          QTLcofactor[jj]= selcofactor[jj];
        }
        for (jj=j+1; jj<Nmark; jj++) {
          QTLposition[jj+1]= position[jj];
          QTLr[jj+1]= r[jj];
          QTLloci[jj+1]= marker[jj];
          QTLcofactor[jj+1]= selcofactor[jj];
          QTLmapdistance[jj+1]= mapdistance[jj];
          QTLcofactor[jj+1]= selcofactor[jj];
        }
        // step 2:
        //  Rprintf("DEBUG testing STEP 2");
        if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) {
          QTLcofactor[j]= MNOCOF;
          QTLcofactor[j+1]=
            (((QTLmapdistance[j+1]-QTLmapdistance[j])<windowsize) ? MNOCOF : selcofactor[j]);
        } else {
          QTLcofactor[j+1]= MNOCOF;
          QTLcofactor[j]=
            (((QTLmapdistance[j+1]-QTLmapdistance[j])<windowsize) ? MNOCOF : selcofactor[j]);
        }
        if ((position[j]==MLEFT)||(position[j]==MMIDDLE)) {
          jjj=j+2;
          while (QTLposition[jjj]==MMIDDLE) {
            if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j]))
              QTLcofactor[jjj]=
                (((QTLmapdistance[jjj]-QTLmapdistance[j])<windowsize) ? MNOCOF : QTLcofactor[jjj]);
            else
              QTLcofactor[jjj]=
                (((QTLmapdistance[jjj]-QTLmapdistance[j+1])<windowsize) ? MNOCOF : QTLcofactor[jjj]);
            jjj++;
          }
          QTLcofactor[jjj]=
            (((QTLmapdistance[jjj]-QTLmapdistance[j+1])<windowsize) ? MNOCOF : QTLcofactor[jjj]);
        }
        if ((position[j]==MMIDDLE)||(position[j]==MRIGHT)) {
          jjj=j-1;
          while (QTLposition[jjj]==MMIDDLE) {
            QTLcofactor[jjj]= (((QTLmapdistance[j+1]-QTLmapdistance[jjj])<windowsize) ? MNOCOF : QTLcofactor[jjj]);
            jjj--;
          }
          QTLcofactor[jjj]= (((QTLmapdistance[j+1]-QTLmapdistance[jjj])<windowsize) ? MNOCOF : QTLcofactor[jjj]);
        }

        // fit no-QTL model at current map position (cofactors only)
        if (firsttime=='y') {
          for (jj=0; jj<Nloci; jj++) saveQTLcofactor[jj]= QTLcofactor[jj];
          baseNoQTLModel=1;
          firsttime='n';
        } else {
          baseNoQTLModel=0;
          for (jj=0; jj<Nloci; jj++) baseNoQTLModel+= (saveQTLcofactor[jj]==QTLcofactor[jj] ? 0 : 1);
        }
        //  Rprintf("fitting NO-QTL model\n");
        if (baseNoQTLModel!=0) { // new base no-QTL model
          if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) QTLcofactor[j]= MSEX;
          else QTLcofactor[j+1]= MSEX;
          // Rprintf("INFO: Before base model\n", QTLlikelihood/-2);
          QTLlikelihood= -2.0*QTLmixture(QTLloci, QTLcofactor, QTLr, QTLposition, y, ind, Nind, Naug, Nloci, &variance, em, &weight0, REMLorML, fitQTL, dominance, crosstype, verbose);
          // Rprintf("INFO: log-likelihood of NO QTL model= %f\n", QTLlikelihood/-2);
          weight0[0]= -1.0;
          savebaseNoQTLModel= QTLlikelihood;
          if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) QTLcofactor[j]= MNOCOF;
          else QTLcofactor[j+1]= MNOCOF;
          for (jj=0; jj<Nloci; jj++) saveQTLcofactor[jj]= QTLcofactor[jj];
        } else
          QTLlikelihood= savebaseNoQTLModel;

        // fit QTL-model (plus cofactors) at current map position
        // MNOTAA= QTL
        if ((position[j]==MLEFT)&&((moveQTL-stepsize)<=mapdistance[j])) QTLcofactor[j]= MQTL;
        else QTLcofactor[j+1]= MQTL;
        if (REMLorML==MH) weight[0]= -1.0;
        QTLlikelihood+=2.0*QTLmixture(QTLloci, QTLcofactor, QTLr, QTLposition, y, ind, Nind, Naug, Nloci, &variance, em, &weight, REMLorML, fitQTL, dominance, crosstype, verbose);
        //this is the place we error at, because the likelihood is not correct.
        if (QTLlikelihood<-0.05) {
          Rprintf("WARNING: Negative QTLlikelihood=%f versus BASE MODEL: %f\nThis applies to the QTL at %d\n", QTLlikelihood, (savebaseNoQTLModel/-2), j); //return 0;}
        }
        maxF= (maxF<QTLlikelihood ? QTLlikelihood : maxF);
        if (run>0) (*Frun)[step][run]+= QTLlikelihood;
        else (*Frun)[step][0]+= QTLlikelihood;

        /* 	Each individual has condition multilocus probabilities for being 0, 1 or 2 at the QTL.
        Calculate the maximum per individu. Calculate the mean of this maximum, averaging over all individuals
        This is the information content plotted.
        */
        infocontent= 0.0;
        for (int i=0; i<Nind; i++) {
          info0[i]= 0.0; // qq
          info1[i]= 0.0; // Qq
          info2[i]= 0.0; // QQ
        }
        for (int i=0; i<Naug; i++) {
          info0[ind[i]]+= weight[i];
          info1[ind[i]]+= weight[i+Naug];
          info2[ind[i]]+= weight[i+2*Naug];
        }
        for (int i=0; i<Nind; i++)
          if (info0[i]<info1[i]) infocontent+= (info1[i]<info2[i] ? info2[i] : info1[i]);
          else infocontent+= (info0[i]<info2[i] ? info2[i] : info0[i]);
        (*informationcontent)[step]+=infocontent/Nind;

        step++;
      }
    }
  }
  fitQTL=false;

  freevector(direction);
  Free(info0);
  Free(info1);
  Free(info2);
  Free(weight);
  Free(weight0);
  Free(QTLr);
  Free(QTLposition);
  Free(Fy);
  Free(newcofactor);
  Free(QTLcofactor);
  Free(cumdistance);
  Free(QTLmapdistance);
  Free(QTLloci);
  Free(saveQTLcofactor);
  return maxF; //QTLlikelihood;
}
Пример #8
0
main()
{
  FILE *fpt;         // define a pointer to a file for reading*/
  FILE *fmodelmat;   // define a pointer to a file for writing the array modelmat
  int i,j,jseed,k,l,nrow = 100, p=15, ncolumn,  dimgam, prop, itno = p*3276, g=100, *indices, ok, *varin, *varout ;               
  double Xfull[nrow][p+1], XTfull[p+1][nrow], XTXfull[p+1][p+1], y[nrow], XTyfull[p+1];
  double *XTXg, *XTyg;
  double meany,TSS,SSRgamma,Rsqgamma, logMHratio,logmarggammaold,logmarggammanew,dif;
  int modelmat[p], tempvec[p]; //counter[(int)(pow(2,p))], totalunique, modelunique[(int)(pow(2,p))], tempint;
  time_t start, end;
  struct timeval start_time, end_time;
  double total_usecs;
 
  char filename[50];
 
  
 
for (jseed=1; jseed<101; jseed++)
   {
 srand(jseed); 
 sprintf (filename,"sim-rs-thin.%d.dat",jseed);
 //printf ("Filename is %s \n",filename);
 fmodelmat =   fopen(filename,"w");
 
  /* First, call gettimeofday() to get start time */
   gettimeofday(&start_time, (struct timeval*)0);


fpt = fopen("simcen-x.txt","r");  // open file for reading only
    for (i=0; i<nrow; i++)
       {
	  for (j=0; j<p; j++)
	    {	      
               fscanf(fpt,"%lf",& Xfull[i][j+1]);
            }
         }
    fclose(fpt);                             // close the data file  

   fpt = fopen("simcen-y.txt","r");  // open file for reading only
    for (i=0; i<nrow; i++)
       {	      
          fscanf(fpt,"%lf",& y[i]);
       }
    fclose(fpt);                             // close the data file  

    for (k=0; k<nrow; k++)   {Xfull[k][0] = 1; }
  
   
     
      for (j=0; j<(p+1); j++)
        {
	  for(k=0; k<nrow; k++) {XTfull[j][k] = Xfull[k][j];}	     // 1. find XTfull
        }

     for (j=0; j<(p+1); j++)
        {
	for (k=0; k<(p+1); k++)
	  {
	     XTXfull[j][k] = 0.0;
	     for(l=0; l<nrow; l++)
	      {
                XTXfull[j][k] += XTfull[j][l]*Xfull[l][k];
		                                              // 2. find XTXfull
	      }
	  }
      }

       for (j=0; j<(p+1); j++)
      {
          XTyfull[j] = 0.0;
	  for (k=0; k<nrow; k++)
	   {
	     XTyfull[j] += XTfull[j][k]*y[k];            // 3. find XTyfull
	   }
      }
          double sumy = 0.0;
          for (j=0; j<nrow; j++) {sumy += y[j];}
          meany = sumy/(double)(nrow);
          double sumysq = 0.0;
          for (j=0; j<nrow; j++) {sumysq += y[j]*y[j];}
          TSS = sumysq - (double)(nrow)*(meany*meany); 
	  // printf("Total Sum of squares is %lf \n",TSS);
      

	  

 for (j=0; j<p; j++)
  {
    modelmat[j] = 0 ;// initializing modelmat
  }

  for (j=0; j<p; j++)
  {
    fprintf(fmodelmat,"%d \t",modelmat[j]) ;
  }
 
 logmarggammaold = 0.0; 
 fprintf(fmodelmat,"%lf \t",logmarggammaold);
    fprintf(fmodelmat,"\n");
   
 for (i=1; i<itno; i++)       // starting iterations for MH Algorithm
   {
      dimgam = 0;
      for (j=0; j<p; j++) {dimgam += modelmat[j];} // dimension of current model 
    
      for (j=0; j<p; j++)    // tempvec initially is the model from the previous iteration
       {
	   tempvec[j] = modelmat[j];
       }

 if (dimgam==0) {prop = 0;} else if (dimgam==p)  
        {prop = 0;} else
        {prop = (rand())%2;} // prop: picks randomly from the i)add/delete or ii)swap proposal
 // printf("prop is %d \n",prop);
       
      if (prop==0)      
        {
            int  index = (rand())%p; // index :corresponds to the indicator gamma_j chosen by the proposal q in MH
            tempvec[index] = 1 - tempvec[index]; // now tempvec is changed by one bit as in MC^3 for the proposed move
        } else if (prop==1)
      
        {
          varin = ivector(dimgam) ;//vector of included variables in current model (don't consider intercept)
     
      j = 0; k = 0;
      while (j<p && k<dimgam)
        {
             if (modelmat[j]==1) {varin[k] = j; k++;}
	       j++ ;
        }

    varout = ivector(p-dimgam) ;//vector of excluded variables in current model (don't consider intercept)
     
      j = 0; k = 0;
      while (j<p && k<(p-dimgam))
        {
             if (modelmat[j]==0) {varout[k] = j; k++;}
	       j++ ;
        }

          int swapin = (rand())%dimgam;    // swapin :corresponds to position of randomly chosen included variable
          int swapout = (rand())%(p-dimgam);// swapout :corresponds to position of randomly chosen excluded variable
          tempvec[varin[swapin]] = 0;
          tempvec[varout[swapout]] =1; 
	}

     
      ncolumn = 0;
      for (j=0; j<p; j++) {ncolumn += tempvec[j];}  
      // printf("Dimesion of model is %d \n",ncolumn);
      
     indices = ivector(ncolumn) ;//indices  indicates the position of the nonzero gamma_j's, it always has 0 as the first argument
     
      j = 0; k = 0;
      while (j<p && k<ncolumn)
        {
             if (tempvec[j]==1) {indices[k] = j+1; k++;}
	       j++ ;
        }
       // need to compute R^2_gamma and eventually the marg lik under g-prior

      // First trying to compute betahat_gamma using lapack
       // 5. Compute Rsqgamma as follows:
      //                           a. Transpose (Xgamma^T*y ) to get y^T* Xgamma
      //                           b. Multiply  y^T* Xgamma by betagammahat
      // 6. Calculate marggamma (marginal likelihood under model gamma)
       

       XTXg = vector(ncolumn*ncolumn); XTyg = vector(ncolumn);  // allocate memory for XTXg,XTyg
       for (j=0; j<(ncolumn); j++)
        {
	  XTyg[j] = XTyfull[indices[j]];  
	  //printf("XTyg[%d] is %lf \n",j,XTyg[j]);
	 for (k=0; k<(ncolumn); k++)
	  {
	     XTXg[j*ncolumn+k] = XTXfull[indices[k]][indices[j]];
	  }
      }
      	  
       int c2 = 1; 
       int *pivot;
       
     
       if (ncolumn == 0)  
             { 
	       logmarggammanew = 0.0;
             } else
	 {
       pivot = ivector(ncolumn);  //allocate memory
       //printf("%d \n",ncolumn);
       dgesv_(&ncolumn, &c2, XTXg, &ncolumn, pivot, XTyg, &ncolumn, &ok); // replaces XTyg by the soln i.e.(XTXg)^(-1)*(XTyg)
       // 5b. Multiply yTX by betagammahat to get SSRgamma(SS due to Regression)
	 
          SSRgamma = 0.0; for (j=0; j<(ncolumn); j++){SSRgamma += XTyg[j]*XTyfull[indices[j]];}
	   Rsqgamma = SSRgamma/ TSS;
	   
	  logmarggammanew = .5*((double)(log(1 + g)) * (double) (nrow - ncolumn - 1)
				- log(1.0 + (double)(g)*(1.0 - Rsqgamma)) * (double)(nrow-1));
              freevector(pivot); 
            }  
          // MH step
       if (dimgam==0 | dimgam==p) {logMHratio = (double)log(0.5) + logmarggammanew - logmarggammaold;} 
       else if (ncolumn==0 | ncolumn==p)  {logMHratio = (double)log(2.0) + logmarggammanew - logmarggammaold;} 
       else {logMHratio = logmarggammanew - logmarggammaold;} 
	 
	 
          double randnum = (double)(rand())/RAND_MAX;
            if (log(randnum) <= logMHratio)
             {
                for (j=0; j<p; j++)
                 {
		   modelmat[j] = tempvec[j];  // accepting the move with prob MHratio
                 }
               logmarggammaold = logmarggammanew;
             }

	    if(i%p == 0) // storing every pth
	      {
            for (j=0; j<p; j++)
             {
                fprintf(fmodelmat,"%d \t",modelmat[j]);
             }
            fprintf(fmodelmat,"%lf \t",logmarggammaold);
           
            fprintf(fmodelmat,"\n") ;
	    if (i%(p*10000)==0){printf("%d \n",i/p);}
	       }
	    
          
	    //printf("%d \n",bintoint(p,modelmat));
            
             freevector(varin);  
             freevector(varout);  
             freevector(indices);                        //   free memory
             freevector(XTXg);                           
             freevector(XTyg);                            
	  ////////////////////////////////////////////////////////////////////////////
  
   }    // end of the MCMC i-iterations loop for the MH Algorithm
  
 

   /* Now call gettimeofday() to get end time */
   gettimeofday(&end_time, (struct timeval*)0);  /* after time */
   
  
  /* Print the execution time */
   total_usecs = (end_time.tv_sec-start_time.tv_sec)  + (end_time.tv_usec-start_time.tv_usec)/1000000.0;

   //printf("Total time was %lf Sec.\n", total_usecs);
fclose(fmodelmat); // finished writing the file
printf("Finished replicate %d \n",jseed) ;   
} // end jseed
 
}
Пример #9
0
int transform(const char * source, const char * destination, const char * output){

 char src_pts_name[256];
 char dest_pts_name[256];
 char out_param_name[256];
 int n=3;
 int m=0;
 int m2=0;
 int k,l;
 double **src_mat=NULL;
 double **dest_mat=NULL;
 double **dest_mat_T=NULL;
 double **src_mat_T=NULL;
 double **E_mat=NULL;
 double **C_mat=NULL;
 double **C_mat_interm=NULL;
 double **D_mat_interm=NULL;
 double **P_mat=NULL;
 double *D_vec=NULL;
 double *T_vec=NULL;
 double *one_vec=NULL;
 double **D_mat=NULL;
 double **Q_mat=NULL;
 double **P_mat_T=NULL;
 double **R_mat=NULL;
 double trace1=0.0;
 double trace2=0.0;
 double scal=0.0;
 double ppm=0.0;
 FILE *outfile;


   printf("\n*******************************\n");
   printf(  "*      helmparms3d v%1.2f      *\n",VERS);
   printf(  "*   (c) U. Niethammer 2011    *\n");
   printf(  "*  http://helmparms3d.sf.net  *\n");
   printf(  "*******************************\n");
   memset(src_pts_name,0,sizeof(src_pts_name));
   memset(dest_pts_name,0,sizeof(dest_pts_name));
   memset(out_param_name,0,sizeof(out_param_name));
   strcpy(src_pts_name, source);
   strcpy(dest_pts_name, destination);
   strcpy(out_param_name, output);

   m=get_m_size(src_pts_name);
   m2=get_m_size(dest_pts_name);
   if(m2!=m){
      printf("Error, number of source and destination points is not equal!\n");
   }
   else
   {
   src_mat=matrix(m,m, src_mat);
   dest_mat=matrix(m,m, dest_mat);

   read_points(src_pts_name, src_mat);
   read_points(dest_pts_name, dest_mat);


   D_vec=vector(n, D_vec);

   E_mat=matrix(m, m, E_mat);
   P_mat=matrix(m, m, P_mat);
   D_mat=matrix(m, m, D_mat);
   Q_mat=matrix(m, m, Q_mat);
   P_mat_T=matrix(m, m, P_mat_T);
   R_mat=matrix(m, m, R_mat);
   dest_mat_T=matrix(m, m, dest_mat_T);
   C_mat=matrix(m, m, C_mat); 
   C_mat_interm=matrix(m, m, C_mat_interm); 
   src_mat_T=matrix(m, m, src_mat_T);
   D_mat_interm=matrix(m, m, D_mat_interm);


   transpose_matrix(m, m, dest_mat, dest_mat_T);
   if(debug)printf("%s_T:\n",dest_pts_name);
   if(debug)plot_matrix(stdout,  n, m, dest_mat_T);


   for(k=0;k<m;k++){
      for(l=0;l<m;l++){
         if(k!=l){
            E_mat[k][l]=-1.0/(double)m;
         }
         else{
            E_mat[k][l]=1.0-1.0/(double)m;
         }
      }
   }
   if(debug)printf("E:\n");
   if(debug)plot_matrix(stdout,  m, m, E_mat);



   if(debug)printf("dest_mat_T:\n");
   if(debug)plot_matrix(stdout,  n, m, dest_mat_T);

   matmult(dest_mat_T, m, m, E_mat, m, m,  C_mat_interm, m, n);
   if(debug)printf("C_interm:\n");
   if(debug)plot_matrix(stdout,  n, m, C_mat_interm);


   matmult(C_mat_interm, n, m, src_mat, m, n,  C_mat, n, n);
   if(debug)printf("C:\n");
   if(debug)plot_matrix(stdout,  n, n, C_mat);

   copy_matrix(n,n,C_mat,P_mat);
   if(debug)printf("P:\n");
   if(debug)plot_matrix(stdout,  n, n, P_mat);
   //Given matrix C[m][n], m>=n, using svd decomposition C = P D Q' to get P[m][n], diag D[n] and Q[n][n].
   svd(n, n, C_mat, P_mat, D_vec, Q_mat);
   transpose_matrix(n, n, P_mat, P_mat_T);

   if(debug)printf("P\n");
   if(debug)plot_matrix(stdout,  n, n, P_mat);
   if(debug)printf("P_T\n");
   if(debug)plot_matrix(stdout,  n, n, P_mat_T);


   if(debug)printf("D_vec\n");
   if(debug)plot_vector(stdout,  n, D_vec);
   for(k=0;k<n;k++){
      for(l=0;l<n;l++){
         D_mat[k][l]=0.0;
         D_mat[l][l]=D_vec[l];

      }
   }
   if(debug)printf("D\n");
   if(debug)plot_matrix(stdout,  n, n, D_mat);

   matmult(Q_mat, n, n, P_mat_T, n, n,  R_mat, n, n);
   if(debug)printf("R_trans:\n");
   if(debug)plot_matrix(stdout, n, n, R_mat);

   matmult(C_mat, m, n, R_mat, n, m,  C_mat_interm, m, n);
   if(debug)printf("C_interm:\n");
   if(debug)plot_matrix(stdout,  n, n, C_mat_interm);
   trace1=trace(n,n,C_mat_interm);
   if(debug)printf("\ntra=%lf\n\n",trace1);



   transpose_matrix(m, m, src_mat, src_mat_T);
   if(debug)printf("%s_T:\n",src_pts_name);
   if(debug)plot_matrix(stdout,  n, m, src_mat_T);


   init_matrix(m,m,C_mat);
   init_matrix(m,m,C_mat_interm);
   matmult(src_mat_T, m, m, E_mat, m, m,  C_mat_interm, n, n);
   if(debug)printf("C_interm:\n");
   if(debug)plot_matrix(stdout,  n, m, C_mat_interm);
   matmult(C_mat_interm, n, m, src_mat, m, n,  C_mat, n, n);
   if(debug)printf("C:\n");
   if(debug)plot_matrix(stdout,  n, n, C_mat);
   trace2=trace(n,n,C_mat);
   if(debug)printf("\ntra=%lf\n\n",trace2);

   scal=trace1/trace2;
   ppm=scal-1.0;
   if(debug)printf("\nscal = %10.10lf\nscal = %10.10lf ppm\n\n",scal, ppm);


   init_matrix(m,m,C_mat);
   init_matrix(m,m,C_mat_interm);

   matmult(src_mat, m, n, R_mat, n,m,  D_mat_interm, m, n);
   if(debug)printf("C_mat_interm:\n");
   if(debug)plot_matrix(stdout,  m, n, D_mat_interm);

   scal_matrix(m, n, scal, D_mat_interm, C_mat_interm);
   if(debug)printf("C_mat_interm:\n");
   if(debug)plot_matrix(stdout,  m, n, C_mat_interm);

   subtract_matrix(m, n, dest_mat, C_mat_interm, D_mat_interm);
   if(debug)plot_matrix(stdout,  m, n, D_mat_interm);
   scal_matrix(m, n, 1.0/m, D_mat_interm, C_mat_interm);
   if(debug)plot_matrix(stdout,  m, n, C_mat_interm);
   init_matrix(m,m,src_mat_T);
   transpose_matrix(m, m, C_mat_interm, src_mat_T);
   if(debug)plot_matrix(stdout,  n, m, src_mat_T);

   T_vec=vector(m, T_vec);
   one_vec=vector(m, one_vec);
   for(k=0;k<m;k++){
      one_vec[k]=1.0;
   }
   matrix_multiply(n, m, src_mat_T, one_vec, T_vec);
   if(debug)printf("T:\n");
   if(debug)plot_vector(stdout, 3, T_vec);

   outfile = fopen(out_param_name, "w");
   if(outfile == NULL){
      printf("Error writing %s\r\n",out_param_name);
      exit(-1);
   }
   init_matrix(m,m,src_mat_T);
   transpose_matrix(m, m, R_mat, src_mat_T);
   plot_matrix(outfile, n, n, src_mat_T);
   printf("R =\n");fflush(stdout);
   plot_matrix(stdout, n, n, src_mat_T);
   printf("\n");fflush(stdout);
   plot_vector(outfile, 3, T_vec);
   printf("T =\n");fflush(stdout);
   plot_vector(stdout, 3, T_vec);
   printf("\n");fflush(stdout);
   fprintf(outfile, "%10.10lf\n", scal);
   printf("s = %10.10lf (= %10.10lf ppm)\n\n",scal, ppm);fflush(stdout);
   fclose(outfile);

   freevector(D_vec);
   freevector(T_vec);
   freevector(one_vec);

   freematrix(m, src_mat);
   freematrix(m, dest_mat);
   freematrix(m, E_mat);
   freematrix(m, P_mat);
   freematrix(m, D_mat);
   freematrix(m, Q_mat);
   freematrix(m, P_mat_T);
   freematrix(m, R_mat);
   freematrix(m, dest_mat_T);
   freematrix(m, C_mat); 
   freematrix(m, C_mat_interm); 
   freematrix(m, src_mat_T);
   freematrix(m, D_mat_interm);
   printf("\n...done\n");
   }
}
Пример #10
0
double multivariateregression(uint nvariables, uint nsamples, dmatrix x, dvector w, dvector y, dvector Fy){
  
  int d=0;
  double xtwj;
  dmatrix Xt   = newdmatrix(nvariables,nsamples);
  dmatrix XtWX = newdmatrix(nvariables, nvariables);
  dvector XtWY = newdvector(nvariables);

  ivector indx = newivector(nvariables);

  //cout << "calculating Xt" << endl;
  for(uint i=0; i<nsamples; i++){
    for(uint j=0; j<nvariables; j++){
      Xt[j][i] = x[i][j];
    }
  }

  //cout << "calculating XtWX and XtWY" << endl;
  for(uint i=0; i<nsamples; i++){
    for(uint j=0; j<nvariables; j++){
      xtwj     = Xt[j][i] * w[i];
      XtWY[j] += xtwj    * y[i];
      for(uint jj=0; jj<=j; jj++){
        XtWX[j][jj] += xtwj * Xt[jj][i];
      }
    }
  }
  
  LUdecomposition(XtWX, nvariables, indx, &d);
  LUsolve(XtWX, nvariables, indx, XtWY);

  //cout << "Estimated parameters:" << endl;
  //for (uint i=0; i < nvariables; i++){
  //  cout << "Parameter " << i << " = " << XtWY[i] << endl;
  //}

  dvector fit = newdvector(nsamples);
  dvector residual = newdvector(nsamples);
  dvector indL = newdvector(nsamples);
  
  double variance= 0.0;
  double logL=0.0;

  for (uint i=0; i<nsamples; i++){
    fit[i]= 0.0;
    for (uint j=0; j<nvariables; j++){
      fit[i]       += Xt[j][i] * XtWY[j];
      residual[i]   = y[i]-fit[i];
      variance     += w[i]*pow(residual[i],2.0);
    }
    Fy[i]     = Lnormal(residual[i],variance);
    indL[i]  += w[i]*Fy[i];
    logL     += log(indL[i]);
  }
  
  //cout << "Estimated response:" << endl;
  //printdvector(fit,nsamples);

  //cout << "Residuals:" << endl;
  //printdvector(residual,nsamples);

  //cout << "Estimated Fy:" << endl;
  //printdvector(Fy,nsamples);

  //cout << "Variance: " << variance << endl;
  //cout << "Loglikelihood: " << logL << endl;
  freematrix((void**)Xt,nvariables);
  freematrix((void**)XtWX, nvariables);
  freevector((void*)XtWY);
  freevector((void*)fit);
  freevector((void*)residual);
  freevector((void*)indL);
  return logL;
}
Пример #11
0
void freematrix(void **m,uint rows) {
  for(size_t i = 0; i < rows; i++){ freevector(m[i]); }
  if(m != NULL) Free(m);
}
Пример #12
0
double nullmodel(uint nvariables, uint nsamples, dmatrix x, dvector w, dvector y,ivector nullmodellayout,int verbose){
  dvector Fy = newdvector(nsamples);
  double logL = multivariateregression(nvariables,nsamples,x,w,y,Fy,true,nullmodellayout,verbose);;
  freevector((void*)Fy);
  return logL;
}