Exemplo n.º 1
0
void sim_geno(int n_ind, int n_pos, int n_gen, int n_draws,
              int *geno, double *rf, double *rf2,
              double error_prob, int *draws,
              double initf(int, int *),
              double emitf(int, int, double, int *),
              double stepf(int, int, double, double, int *))
{
    int i, k, j, v, v2;
    double s, **beta, *probs;
    int **Geno, ***Draws, curstate;
    int cross_scheme[2];

    /* cross scheme hidden in draws argument; used by hmm_bcsft */
    cross_scheme[0] = draws[0];
    cross_scheme[1] = draws[1];
    draws[0] = 0;
    draws[1] = 0;

    /* allocate space for beta and
       reorganize geno and draws */
    /* Geno indexed as Geno[pos][ind] */
    /* Draws indexed as Draws[rep][pos][ind] */
    reorg_geno(n_ind, n_pos, geno, &Geno);
    reorg_draws(n_ind, n_pos, n_draws, draws, &Draws);
    allocate_alpha(n_pos, n_gen, &beta);
    allocate_double(n_gen, &probs);

    /* Read R's random seed */
    GetRNGstate();

    for(i=0; i<n_ind; i++) { /* i = individual */

        R_CheckUserInterrupt(); /* check for ^C */

        /* do backward equations */
        /* initialize beta */
        for(v=0; v<n_gen; v++) beta[v][n_pos-1] = 0.0;

        /* backward equations */
        for(j=n_pos-2; j>=0; j--) {

            for(v=0; v<n_gen; v++) {
                beta[v][j] = beta[0][j+1] + stepf(v+1,1,rf[j], rf2[j], cross_scheme) +
                    emitf(Geno[j+1][i],1,error_prob, cross_scheme);

                for(v2=1; v2<n_gen; v2++)
                    beta[v][j] = addlog(beta[v][j], beta[v2][j+1] +
                                        stepf(v+1,v2+1,rf[j],rf2[j], cross_scheme) +
                                        emitf(Geno[j+1][i],v2+1,error_prob, cross_scheme));
            }
        }

        for(k=0; k<n_draws; k++) { /* k = simulation replicate */

            /* first draw */
            /* calculate probs */
            s = (probs[0] = initf(1, cross_scheme)+emitf(Geno[0][i],1,error_prob, cross_scheme)+beta[0][0]);
            for(v=1; v<n_gen; v++) {
                probs[v] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme) +
                    beta[v][0];
                s = addlog(s, probs[v]);
            }
            for(v=0; v<n_gen; v++) probs[v] = exp(probs[v] - s);

            /* make draw: returns a value from {1, 2, ..., n_gen} */
            curstate = Draws[k][0][i] = sample_int(n_gen, probs);

            /* move along chromosome */
            for(j=1; j<n_pos; j++) {
                /* calculate probs */
                for(v=0; v<n_gen; v++)
                    probs[v] = exp(stepf(curstate,v+1,rf[j-1],rf2[j-1], cross_scheme) +
                                   emitf(Geno[j][i],v+1,error_prob, cross_scheme) +
                                   beta[v][j] - beta[curstate-1][j-1]);
                /* make draw */
                curstate = Draws[k][j][i] = sample_int(n_gen, probs);
            }

        } /* loop over replicates */

    } /* loop over individuals */

    /* write R's random seed */
    PutRNGstate();

}
Exemplo n.º 2
0
/**********************************************************************
 * 
 * sim_ril
 * 
 * n_chr   Number of chromosomes
 * n_mar   Number of markers on each chromosome (vector of length n_chr)
 * n_ril   Number of RILs to simulate
 * 
 * map     Vector of marker locations, of length sum(n_mar)
 *         First marker on each chromosome should be at 0.
 *
 * n_str   Number of parental strains (either 2, 4, or 8)
 *
 * m       Interference parameter (0 is no interference)
 * p       For Stahl model, proportion of chiasmata from the NI model
 *
 * include_x   Whether the last chromosome is the X chromosome
 *
 * random_cross  Indicates whether the order of the strains in the cross
 *               should be randomized.
 *
 * selfing If 1, use selfing; if 0, use sib mating
 *
 * cross   On output, the cross used for each line 
 *         (vector of length n_ril x n_str)
 *
 * ril     On output, the simulated data 
 *         (vector of length sum(n_mar) x n_ril)
 *
 * origgeno       Like ril, but with no missing data
 *
 * error_prob     Genotyping error probability (used nly with n_str==2)
 *
 * missing_prob   Rate of missing genotypes
 *
 * errors         Error indicators (n_mar x n_ril)
 *
 **********************************************************************/
void sim_ril(int n_chr, int *n_mar, int n_ril, double *map, 
	     int n_str, int m, double p, int include_x, 
	     int random_cross, int selfing, int *cross, int *ril,
	     int *origgeno, 
	     double error_prob, double missing_prob, int *errors)
{
  int i, j, k, ngen, tot_mar, curseg;
  struct individual par1, par2, kid1, kid2;
  int **Ril, **Cross, **Errors, **OrigGeno; 
  int maxwork, isX, flag, max_xo, *firstmarker;
  double **Map, maxlen, chrlen, *work;

 /* count total number of markers */
  for(i=0, tot_mar=0; i<n_chr; i++) 
    tot_mar += n_mar[i];

  reorg_geno(tot_mar, n_ril, ril, &Ril);
  reorg_geno(n_str, n_ril, cross, &Cross);
  reorg_geno(tot_mar, n_ril, errors, &Errors);
  reorg_geno(tot_mar, n_ril, origgeno, &OrigGeno);

  /* allocate space */
  Map = (double **)R_alloc(n_chr, sizeof(double *));
  Map[0] = map;
  for(i=1; i<n_chr; i++)
    Map[i] = Map[i-1] + n_mar[i-1];

  /* location of first marker */
  firstmarker = (int *)R_alloc(n_chr, sizeof(int));
  firstmarker[0] = 0;
  for(i=1; i<n_chr; i++) 
    firstmarker[i] = firstmarker[i-1] + n_mar[i-1];

  /* maximum chromosome length (in cM) */
  maxlen = Map[0][n_mar[0]-1];
  for(i=1; i<n_chr; i++)
    if(maxlen < Map[i][n_mar[i]-1])
      maxlen =  Map[i][n_mar[i]-1];

  /* allocate space for individuals */
  max_xo = (int)qpois(1e-10, maxlen/100.0, 0, 0)*6;
  if(!selfing) max_xo *= 5;
  allocate_individual(&par1, max_xo);
  allocate_individual(&par2, max_xo);
  allocate_individual(&kid1, max_xo);
  allocate_individual(&kid2, max_xo);
  maxwork = (int)qpois(1e-10, (m+1)*maxlen/50.0, 0, 0)*3;
  work = (double *)R_alloc(maxwork, sizeof(double));

  for(i=0; i<n_ril; i++) {

    /* set up cross */
    for(j=0; j<n_str; j++) Cross[i][j] = j+1;
    if(random_cross) int_permute(Cross[i], n_str);

    for(j=0; j<n_chr; j++) {
      isX = include_x && j==n_chr-1;

      chrlen = Map[j][n_mar[j]-1];

      par1.n_xo[0] = par1.n_xo[1] = par2.n_xo[0] = par2.n_xo[1] = 0;

      /* initial generations */
      if(n_str==2) {
	par1.allele[0][0] = par2.allele[0][0] = 1;
	par1.allele[1][0] = par2.allele[1][0] = 2;
      }
      else if(n_str==4) {
	par1.allele[0][0] = 1;
	par1.allele[1][0] = 2;
	par2.allele[0][0] = 3;
	par2.allele[1][0] = 4;
      }
      else { /* 8 strain case */
	par1.allele[0][0] = 1;
	par1.allele[1][0] = 2;
	par2.allele[0][0] = 3;
	par2.allele[1][0] = 4;

	docross(par1, par2, &kid1, chrlen, m, p, 0, 
	      &maxwork, &work);

	par1.allele[0][0] = 5;
	par1.allele[1][0] = 6;
	par2.allele[0][0] = 7;
	par2.allele[1][0] = 8;

	docross(par1, par2, &kid2, chrlen, m, p, isX,
	      &maxwork, &work);

	copy_individual(&kid1, &par1);
	copy_individual(&kid2, &par2);
      }

      /* start inbreeding */
      ngen=1;
      while(1) {
	R_CheckUserInterrupt(); /* check for ^C */

	docross(par1, par2, &kid1, chrlen, m, p, 0,
		&maxwork, &work);
	if(!selfing) 
	  docross(par1, par2, &kid2, chrlen, m, p, isX,
		  &maxwork, &work);

	/* are we done? */
	flag = 0;
	if(selfing) {
	  if(kid1.n_xo[0] == kid1.n_xo[1]) {
	    for(k=0; k<kid1.n_xo[0]; k++) {
	      if(kid1.allele[0][k] != kid1.allele[1][k] ||
		 fabs(kid1.xoloc[0][k] - kid1.xoloc[1][k]) > 1e-6) {
		flag = 1;
		break;
	      }
	    }
	    if(kid1.allele[0][kid1.n_xo[0]] != kid1.allele[1][kid1.n_xo[0]])
	      flag = 1;
	  }
	  else flag = 1;
	}
	else {
	  if(kid1.n_xo[0] == kid1.n_xo[1] && 
	     kid1.n_xo[0] == kid2.n_xo[0] && 
	     kid1.n_xo[0] == kid2.n_xo[1]) {
	    for(k=0; k<kid1.n_xo[0]; k++) {
	      if(kid1.allele[0][k] != kid1.allele[1][k] ||
		 kid1.allele[0][k] != kid2.allele[0][k] ||
		 kid1.allele[0][k] != kid2.allele[1][k] ||
		 fabs(kid1.xoloc[0][k] - kid1.xoloc[1][k]) > 1e-6 ||
		 fabs(kid1.xoloc[0][k] - kid2.xoloc[0][k]) > 1e-6 ||
		 fabs(kid1.xoloc[0][k] - kid2.xoloc[1][k]) > 1e-6) {
		flag = 1;
		break;
	      }
	    }
	    if(kid1.allele[0][kid1.n_xo[0]] != kid1.allele[1][kid1.n_xo[0]] ||
	       kid1.allele[0][kid1.n_xo[0]] != kid2.allele[0][kid1.n_xo[0]] ||
	       kid1.allele[0][kid1.n_xo[0]] != kid2.allele[1][kid1.n_xo[0]]) 
	      flag = 1;
	  }
	  else flag = 1;
	}

	if(!flag) break; /* done inbreeding */

	/* go to next generation */
	copy_individual(&kid1, &par1);
	if(selfing) copy_individual(&kid1, &par2);
	else copy_individual(&kid2, &par2);

      } /* end with inbreeding of this chromosome */

      /* fill in alleles */
      curseg = 0;
      for(k=0; k<n_mar[j]; k++) { /* loop over markers */
	while(curseg < kid1.n_xo[0] && Map[j][k] > kid1.xoloc[0][curseg]) 
	  curseg++;
	  
	OrigGeno[i][k+firstmarker[j]] = 
	  Ril[i][k+firstmarker[j]] = Cross[i][kid1.allele[0][curseg]-1];

	/* simulate missing ? */
	if(unif_rand() < missing_prob) {
	  Ril[i][k+firstmarker[j]] = 0;
	}
	else if(n_str == 2 && unif_rand() < error_prob) {
	  /* simulate error */
	  Ril[i][k+firstmarker[j]] = 3 - Ril[i][k+firstmarker[j]];
	  Errors[i][k+firstmarker[j]] = 1;
	}
      }

    } /* loop over chromosomes */

  } /* loop over lines */
}
Exemplo n.º 3
0
void calc_genoprob_special(int n_ind, int n_pos, int n_gen, int *geno,
                           double *rf, double *rf2,
                           double error_prob, double *genoprob,
                           double initf(int, int *),
                           double emitf(int, int, double, int *),
                           double stepf(int, int, double, double, int *))
{
    int i, j, j2, v, v2, curpos;
    double s, **alpha, **beta;
    int **Geno;
    double ***Genoprob;
    int cross_scheme[2];

    /* cross scheme hidden in genoprob argument; used by hmm_bcsft */
    cross_scheme[0] = genoprob[0];
    cross_scheme[1] = genoprob[1];
    genoprob[0] = 0.0;
    genoprob[1] = 0.0;

    /* allocate space for alpha and beta and
       reorganize geno and genoprob */
    reorg_geno(n_ind, n_pos, geno, &Geno);
    reorg_genoprob(n_ind, n_pos, n_gen, genoprob, &Genoprob);
    allocate_alpha(n_pos, n_gen, &alpha);
    allocate_alpha(n_pos, n_gen, &beta);

    for(i=0; i<n_ind; i++) { /* i = individual */

        for(curpos=0; curpos < n_pos; curpos++) {

            if(!Geno[curpos][i]) continue;

            R_CheckUserInterrupt(); /* check for ^C */

            /* initialize alpha and beta */
            for(v=0; v<n_gen; v++) {
                if(curpos==0)
                    alpha[v][0] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme);
                else
                    alpha[v][0] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, TOL, cross_scheme);
                beta[v][n_pos-1] = 0.0;
            }

            /* forward-backward equations */
            for(j=1,j2=n_pos-2; j<n_pos; j++, j2--) {

                for(v=0; v<n_gen; v++) {
                    alpha[v][j] = alpha[0][j-1] + stepf(1, v+1, rf[j-1], rf2[j-1], cross_scheme);

                    if(curpos==j2+1)
                        beta[v][j2] = beta[0][j2+1] + stepf(v+1,1,rf[j2], rf2[j2], cross_scheme) +
                            emitf(Geno[j2+1][i],1,error_prob, cross_scheme);
                    else
                        beta[v][j2] = beta[0][j2+1] + stepf(v+1,1,rf[j2], rf2[j2], cross_scheme) +
                            emitf(Geno[j2+1][i],1,TOL, cross_scheme);

                    for(v2=1; v2<n_gen; v2++) {
                        alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] +
                                             stepf(v2+1,v+1,rf[j-1],rf2[j-1], cross_scheme));
                        if(curpos==j2+1)
                            beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] +
                                                 stepf(v+1,v2+1,rf[j2],rf2[j2], cross_scheme) +
                                                 emitf(Geno[j2+1][i],v2+1,error_prob, cross_scheme));
                        else
                            beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] +
                                                 stepf(v+1,v2+1,rf[j2],rf2[j2], cross_scheme) +
                                                 emitf(Geno[j2+1][i],v2+1,TOL, cross_scheme));

                    }

                    if(curpos==j)
                        alpha[v][j] += emitf(Geno[j][i],v+1,error_prob, cross_scheme);
                    else
                        alpha[v][j] += emitf(Geno[j][i],v+1,TOL, cross_scheme);
                }
            }

            /* calculate genotype probabilities */
            s = Genoprob[0][curpos][i] = alpha[0][curpos] + beta[0][curpos];
            for(v=1; v<n_gen; v++) {
                Genoprob[v][curpos][i] = alpha[v][curpos] + beta[v][curpos];
                s = addlog(s, Genoprob[v][curpos][i]);
            }
            for(v=0; v<n_gen; v++)
                Genoprob[v][curpos][i] = exp(Genoprob[v][curpos][i] - s);

        } /* end loop over current position */

    } /* loop over individuals */
}
Exemplo n.º 4
0
void R_mqmaugment(int *geno, double *dist, double *pheno, int *auggeno, 
               double *augPheno, int *augIND, int *Nind, int *Naug, int *Nmark,
               int *Npheno, int *maxind, int *maxiaug, double *minprob, int
               *chromo, int *rqtlcrosstypep, int *augment_strategy, int *verbosep) {
  int **Geno;
  double **Pheno;
  double **Dist;
  int **NEW;                      //Holds the output for the augmentdata function
  int **Chromo;
  double **NEWPheno;              //New phenotype vector
  int **NEWIND;                   //New list of individuals 
  const int nind0 = *Nind;        //Individuals we start with
  const int verbose = *verbosep;
  const RqtlCrossType rqtlcrosstype = (RqtlCrossType) *rqtlcrosstypep;

  if(verbose) Rprintf("INFO: Starting C-part of the data augmentation routine\n");
  ivector new_ind;

  MQMMarkerMatrix markers = newMQMMarkerMatrix(*Nmark, nind0);
  vector mapdistance = newvector(*Nmark);
  ivector chr = newivector(*Nmark);

  //Reorganise the pointers into arrays, Singletons are just cast into the function
  reorg_geno(nind0, *Nmark, geno, &Geno);
  reorg_int(*Nmark, 1, chromo, &Chromo);
  reorg_pheno(nind0, *Npheno, pheno, &Pheno);
  reorg_pheno(*Nmark, 1, dist, &Dist);
  reorg_int(*maxind, *Nmark, auggeno, &NEW);
  reorg_int((*maxiaug)*nind0, 1, augIND, &NEWIND);
  reorg_pheno((*maxiaug)*nind0, 1, augPheno, &NEWPheno);

  MQMCrossType crosstype = determine_MQMCross(*Nmark, *Nind, (const int **)Geno, rqtlcrosstype);        // Determine cross
  change_coding(Nmark, Nind, Geno, markers, crosstype);                                                 // Change all the markers from R/qtl format to MQM internal

  if(verbose) Rprintf("INFO: Filling the chromosome matrix\n");
  for (int i=0; i<(*Nmark); i++) {
    //Set some general information structures per marker
    mapdistance[i] = POSITIONUNKNOWN;
    mapdistance[i] = Dist[0][i];
    chr[i] = Chromo[0][i];
  }

  if(mqmaugmentfull(&markers,Nind,Naug,&new_ind,*minprob, *maxind, *maxiaug,&Pheno,*Nmark,chr,mapdistance,*augment_strategy,crosstype,verbose)){
    //Data augmentation finished succesfully, encode it back into RQTL format
    for (int i = 0; i<(*Nmark); i++) {
      for (int j = 0; j<(*Naug); j++) {
        //Rprintf("INFO: Phenotype after return: %f",NEWPheno[0][j]);
        NEWPheno[0][j] = Pheno[0][j];
        NEWIND[0][j] = new_ind[j];
        NEW[i][j] = 9;
        if (markers[i][j] == MAA) {
          NEW[i][j] = 1;
        }
        if (markers[i][j] == MH) {
          NEW[i][j] = 2;
        }
        if (markers[i][j] == MBB) {  // [karl:] this might need to be changed for RIL
          crosstype==CRIL ? NEW[i][j]=2 : NEW[i][j] = 3;  //[Danny:] This should solve it 
        }
        if (markers[i][j] == MNOTAA) {
          NEW[i][j] = 5;
        }
        if (markers[i][j] == MNOTBB) {
          NEW[i][j] = 4;
        }
      }
    }
    if (verbose) {
      Rprintf("# Unique individuals before augmentation:%d\n", nind0);
      Rprintf("# Unique selected individuals:%d\n", *Nind);
      Rprintf("# Marker p individual:%d\n", *Nmark);
      Rprintf("# Individuals after augmentation:%d\n", *Naug);
      Rprintf("INFO: Data augmentation succesfull\n");
    }
  } else {
    //Unsuccessfull data augmentation exit
    Rprintf("INFO: This code should not be reached, data corruption could have occured. Please re-run this analysis.\n");
    *Naug = nind0;
    for (int i=0; i<(*Nmark); i++) {
      for (int j=0; j<(*Naug); j++) {
        NEWPheno[0][j] = Pheno[0][j];
        NEW[i][j] = 9;
        if (markers[i][j] == MAA) {
          NEW[i][j] = 1;
        }
        if (markers[i][j] == MH) {
          NEW[i][j] = 2;
        }
        if (markers[i][j] == MBB) {                       // [Karl:] this might need to be changed for RIL
          crosstype==CRIL ? NEW[i][j]=2 : NEW[i][j] = 3;  // [Danny:] This should solve it 
        }
        if (markers[i][j] == MNOTAA) {
          NEW[i][j] = 5;
        }
        if (markers[i][j] == MNOTBB) {
          NEW[i][j] = 4;
        }
      }
    }
    fatal("Data augmentation failed", "");
  }
  delMQMMarkerMatrix(markers,*Nmark); // [Danny:] This looked suspicious, we were leaking memory here because we didn't clean it
  Free(mapdistance);
  Free(chr);
  return;
}
Exemplo n.º 5
0
void est_map_bci(int n_ind, int n_mar, int *geno, double *d,
                 int m, double p, double error_prob,
                 double *loglik, int maxit, double tol, int verbose)
{
    int i, j, j2, v, v2, it, flag=0, **Geno, n_states;
    double s, **alpha, **beta, **gamma, *cur_d, *rf;
    double ***tm, *temp;
    double curloglik;
    double initprob;
    int ndigits;
    double maxdif, tempdif;
    char pattern[100], text[200];

    n_states = 2*(m+1);
    initprob = -log((double)n_states);

    /* allocate space for beta and reorganize geno */
    reorg_geno(n_ind, n_mar, geno, &Geno);
    allocate_alpha(n_mar, n_states, &alpha);
    allocate_alpha(n_mar, n_states, &beta);
    allocate_dmatrix(n_states, n_states, &gamma);
    allocate_double(n_mar-1, &cur_d);
    allocate_double(n_mar-1, &rf);

    /* allocate space for the transition matrices */
    /* size n_states x n_states x (n_mar-1) */
    /* tm[state1][state2][interval] */
    tm = (double ***)R_alloc(n_states, sizeof(double **));
    tm[0] = (double **)R_alloc(n_states * n_states, sizeof(double *));
    for(i=1; i<n_states; i++) tm[i] = tm[i-1] + n_states;
    tm[0][0] = (double *)R_alloc(n_states * n_states * (n_mar - 1),
                                 sizeof(double));
    temp = tm[0][0];
    for(i=0; i < n_states; i++) {
        for(j=0; j < n_states; j++) {
            tm[i][j] = temp;
            temp += n_mar-1;
        }
    }

    /* digits in verbose output */
    if(verbose) {
        ndigits = (int)ceil(-log10(tol));
        if(ndigits > 16) ndigits=16;
        sprintf(pattern, "%s%d.%df", "%", ndigits+3, ndigits+1);
    }

    for(j=0; j<n_mar-1; j++) d[j] /= 100.0; /* convert to Morgans */

    /* begin EM algorithm */
    for(it=0; it<maxit; it++) {

        for(j=0; j<n_mar-1; j++) {
            cur_d[j] = d[j];
            rf[j] = 0.0;
        }

        /* calculate the transition matrices */
        step_bci(n_mar, n_states, tm, cur_d, m, p, maxit, tol);

        for(i=0; i<n_ind; i++) { /* i = individual */

            R_CheckUserInterrupt(); /* check for ^C */

            /* initialize alpha and beta */
            for(v=0; v<n_states; v++) {
                alpha[v][0] = initprob + emit_bci(Geno[0][i], v, error_prob, m);
                beta[v][n_mar-1] = 0.0;
            }

            /* forward-backward equations */
            for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) {

                for(v=0; v<n_states; v++) {
                    alpha[v][j] = alpha[0][j-1] + tm[0][v][j-1];

                    beta[v][j2] = beta[0][j2+1] + tm[v][0][j2] +
                        emit_bci(Geno[j2+1][i], 0, error_prob, m);

                    for(v2=1; v2<n_states; v2++) {
                        alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] +
                                             tm[v2][v][j-1]);
                        beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] +
                                             tm[v][v2][j2] +
                                             emit_bci(Geno[j2+1][i], v2, error_prob, m));
                    }

                    alpha[v][j] += emit_bci(Geno[j][i], v, error_prob, m);

                }

            }

            for(j=0; j<n_mar-1; j++) {

                /* calculate gamma = log Pr(v1, v2, O) */
                for(v=0, s=0.0; v<n_states; v++) {
                    for(v2=0; v2<n_states; v2++) {
                        gamma[v][v2] = alpha[v][j] + beta[v2][j+1] +
                            emit_bci(Geno[j+1][i], v2, error_prob, m) +
                            tm[v][v2][j];

                        if(v==0 && v2==0) s = gamma[v][v2];
                        else s = addlog(s, gamma[v][v2]);
                    }
                }

                for(v=0; v<n_states; v++) {
                    for(v2=0; v2<n_states; v2++) {
                        rf[j] += nrec_bci(v, v2, m) * exp(gamma[v][v2] - s);
                    }
                }
            }

        } /* loop over individuals */

        /* rescale */
        for(j=0; j<n_mar-1; j++) {
            rf[j] /= (double)n_ind;

            if(rf[j] < tol/100.0) rf[j] = tol/100.0;
            else if(rf[j] > 0.5-tol/100.0) rf[j] = 0.5-tol/100.0;

        }

        /* use map function to convert back to distances */
        for(j=0; j<n_mar-1; j++)
            d[j] = imf_stahl(rf[j], m, p, 1e-10, 1000);

        if(verbose>1) {
            /* print estimates as we go along*/
            Rprintf("   %4d ", it+1);
            maxdif=0.0;
            for(j=0; j<n_mar-1; j++) {
                tempdif = fabs(d[j] - cur_d[j])/(cur_d[j]+tol*100.0);
                if(maxdif < tempdif) maxdif = tempdif;
            }
            sprintf(text, "%s%s\n", "  max rel've change = ", pattern);
            Rprintf(text, maxdif);
        }

        /* check convergence */
        for(j=0, flag=0; j<n_mar-1; j++) {
            if(fabs(d[j] - cur_d[j]) > tol*(cur_d[j]+tol*100.0)) {
                flag = 1;
                break;
            }
        }

        if(!flag) break;

    } /* end EM algorithm */

    if(flag) warning("Didn't converge!\n");

    /* re-calculate transition matrices */
    step_bci(n_mar, n_states, tm, d, m, p, maxit, tol);

    /* calculate log likelihood */
    *loglik = 0.0;
    for(i=0; i<n_ind; i++) { /* i = individual */

        R_CheckUserInterrupt(); /* check for ^C */

        /* initialize alpha */
        for(v=0; v<n_states; v++)
            alpha[v][0] = initprob + emit_bci(Geno[0][i], v, error_prob, m);

        /* forward equations */
        for(j=1; j<n_mar; j++) {
            for(v=0; v<n_states; v++) {
                alpha[v][j] = alpha[0][j-1] + tm[0][v][j-1];
                for(v2=1; v2<n_states; v2++)
                    alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] +
                                         tm[v2][v][j-1]);
                alpha[v][j] += emit_bci(Geno[j][i], v, error_prob, m);
            }
        }

        curloglik = alpha[0][n_mar-1];
        for(v=1; v<n_states; v++)
            curloglik = addlog(curloglik, alpha[v][n_mar-1]);
        *loglik += curloglik;
    }

    if(verbose) {
        if(verbose < 2) {
            /* print final estimates */
            Rprintf("  no. iterations = %d\n", it+1);
            maxdif=0.0;
            for(j=0; j<n_mar-1; j++) {
                tempdif = fabs(d[j] - cur_d[j])/(cur_d[j]+tol*100.0);
                if(maxdif < tempdif) maxdif = tempdif;
            }
            sprintf(text, "%s%s\n", "  max rel've change at last step = ", pattern);
            Rprintf(text, maxdif);
        }

        Rprintf("  loglik: %10.4lf\n\n", *loglik);
    }

    /* convert distances back to cM */
    for(j=0; j<n_mar-1; j++) d[j] *= 100.0;
}
Exemplo n.º 6
0
void calc_pairprob(int n_ind, int n_pos, int n_gen, int *geno, 
		   double *rf, double *rf2, 
		   double error_prob, double *genoprob, 
		   double *pairprob, 
		   double initf(int), 
		   double emitf(int, int, double),
		   double stepf(int, int, double, double)) 
{
  int i, j, j2, v, v2, v3;
  double s=0.0, **alpha, **beta;
  int **Geno;
  double ***Genoprob, *****Pairprob;
  
  /* n_pos must be at least 2, or there are no pairs! */
  if(n_pos < 2) error("n_pos must be > 1 in calc_pairprob");

  /* allocate space for alpha and beta and 
     reorganize geno, genoprob, and pairprob */
  reorg_geno(n_ind, n_pos, geno, &Geno);
  reorg_genoprob(n_ind, n_pos, n_gen, genoprob, &Genoprob);
  reorg_pairprob(n_ind, n_pos, n_gen, pairprob, &Pairprob);
  allocate_alpha(n_pos, n_gen, &alpha);
  allocate_alpha(n_pos, n_gen, &beta);

  for(i=0; i<n_ind; i++) { /* i = individual */

    R_CheckUserInterrupt(); /* check for ^C */

    /* initialize alpha and beta */
    for(v=0; v<n_gen; v++) {
      alpha[v][0] = initf(v+1) + emitf(Geno[0][i], v+1, error_prob);
      beta[v][n_pos-1] = 0.0;
    }

    /* forward-backward equations */
    for(j=1,j2=n_pos-2; j<n_pos; j++, j2--) {
      
      for(v=0; v<n_gen; v++) {
	alpha[v][j] = alpha[0][j-1] + stepf(1, v+1, rf[j-1], rf2[j-1]);
	
	beta[v][j2] = beta[0][j2+1] + stepf(v+1,1,rf[j2], rf2[j2]) + 
	  emitf(Geno[j2+1][i],1,error_prob);

	for(v2=1; v2<n_gen; v2++) {
	  alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + 
			       stepf(v2+1,v+1,rf[j-1],rf2[j-1]));
	  beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + 
			       stepf(v+1,v2+1,rf[j2],rf2[j2]) +
			       emitf(Geno[j2+1][i],v2+1,error_prob));
	}

	alpha[v][j] += emitf(Geno[j][i],v+1,error_prob);
      }
    }

    /* calculate genotype probabilities */
    for(j=0; j<n_pos; j++) {
      s = Genoprob[0][j][i] = alpha[0][j] + beta[0][j];
      for(v=1; v<n_gen; v++) {
	Genoprob[v][j][i] = alpha[v][j] + beta[v][j];
	s = addlog(s, Genoprob[v][j][i]);
      }
      for(v=0; v<n_gen; v++) 
	Genoprob[v][j][i] = exp(Genoprob[v][j][i] - s);
    }

    /* calculate Pr(G[j], G[j+1] | marker data) for i = 1...n_pos-1 */
    for(j=0; j<n_pos-1; j++) {
      for(v=0; v<n_gen; v++) {
	for(v2=0; v2<n_gen; v2++) {
	  Pairprob[v][v2][j][j+1][i] = alpha[v][j] + beta[v2][j+1] +
	    stepf(v+1,v2+1,rf[j],rf2[j]) + 
	    emitf(Geno[j+1][i],v2+1,error_prob);
	  if(v==0 && v2==0) s=Pairprob[v][v2][j][j+1][i];
	  else s = addlog(s,Pairprob[v][v2][j][j+1][i]);
	}
      }
      /* scale to sum to 1 */
      for(v=0; v<n_gen; v++) 
	for(v2=0; v2<n_gen; v2++) 
	  Pairprob[v][v2][j][j+1][i] = 
	    exp(Pairprob[v][v2][j][j+1][i] - s);
    } 

    /* now calculate Pr(G[i], G[j] | marker data) for j > i+1 */
    for(j=0; j<n_pos-2; j++) {
      for(j2=j+2; j2<n_pos; j2++) {

	for(v=0; v<n_gen; v++) { /* genotype at pos'n j */
	  for(v2=0; v2<n_gen; v2++) { /* genotype at pos'n j2 */

	    Pairprob[v][v2][j][j2][i] = 0.0;

	    for(v3=0; v3<n_gen; v3++) { /* genotype at pos'n j2-1 */
	      s = Genoprob[v3][j2-1][i];
	      if(fabs(s) > TOL) /* avoid 0/0 */
		Pairprob[v][v2][j][j2][i] += Pairprob[v][v3][j][j2-1][i]*
		  Pairprob[v3][v2][j2-1][j2][i]/s;
	    }
		      
	  }
	} /* end loops over genotypes */

      }
    } /* end loops over pairs of positions */
	    
  } /* end loop over individuals */
}
Exemplo n.º 7
0
void est_rf(int n_ind, int n_mar, int *geno, double *rf, 
	    double erec(int, int, double), 
	    double logprec(int, int, double), 
	    int maxit, double tol, int meioses_per)
{
  int i, j1, j2, s, **Geno, n_mei=0, flag=0;
  double **Rf, next_rf=0.0, cur_rf=0.0;

  /* reorganize geno and rf */
  reorg_geno(n_ind, n_mar, geno, &Geno);
  reorg_errlod(n_mar, n_mar, rf, &Rf);

  for(j1=0; j1<n_mar; j1++) {

    /* count number of meioses */
    for(i=0, n_mei=0; i<n_ind; i++) 
      if(Geno[j1][i] != 0) n_mei += meioses_per;
    Rf[j1][j1] = (double) n_mei;
    
    R_CheckUserInterrupt(); /* check for ^C */

    for(j2=j1+1; j2<n_mar; j2++) {
      
      /* count meioses */
      n_mei = flag = 0;
      for(i=0; i<n_ind; i++) {
	if(Geno[j1][i] != 0 && Geno[j2][i] != 0) {
	  n_mei += meioses_per;
	  /* check if informatve */
	  if(fabs(logprec(Geno[j1][i], Geno[j2][i], 0.5) -
		  logprec(Geno[j1][i], Geno[j2][i], TOL)) > TOL) flag = 1;
	}
      }

      if(n_mei != 0 && flag == 1) {
	flag = 0;
	/* begin EM algorithm; start with cur_rf = 0.01 */
	for(s=0, cur_rf=0.01; s < maxit; s++) {
	  next_rf = 0.0; 
	  for(i=0; i<n_ind; i++) {
	    if(Geno[j1][i] != 0 && Geno[j2][i] != 0) 
	      next_rf += erec(Geno[j1][i], Geno[j2][i], cur_rf);
	  }

	  next_rf /= (double) n_mei;
	  
	  if(fabs(next_rf - cur_rf) < tol*(cur_rf+tol*100.0)) { 
	    flag = 1;
	    break;
	  }
	  cur_rf = next_rf;
	}
	if(!flag) warning("Markers (%d,%d) didn't converge\n", j1+1, j2+1);

	/* calculate LOD score */
	Rf[j1][j2] = next_rf;
	Rf[j2][j1] = 0.0;
	for(i=0; i<n_ind; i++) {
	  if(Geno[j1][i] != 0 && Geno[j2][i] != 0) {
	    Rf[j2][j1] += logprec(Geno[j1][i],Geno[j2][i], next_rf);
	    Rf[j2][j1] -= logprec(Geno[j1][i],Geno[j2][i], 0.5);
	  }
	}
	Rf[j2][j1] /= log(10.0);
	
      }
      else { /* no informative meioses */
	Rf[j1][j2] = NA_REAL;
	Rf[j2][j1] = 0.0;
      }
	  
    } /* end loops over markers */
  }
}
Exemplo n.º 8
0
void calc_genoprob(int n_ind, int n_pos, int n_gen, int *geno, 
		   double *rf, double *rf2, 
		   double error_prob, double *genoprob, 
		   double initf(int), 
		   double emitf(int, int, double),
		   double stepf(int, int, double, double)) 
{
  int i, j, j2, v, v2;
  double s, **alpha, **beta;
  int **Geno;
  double ***Genoprob;
  
  /* allocate space for alpha and beta and 
     reorganize geno and genoprob */
  reorg_geno(n_ind, n_pos, geno, &Geno);
  reorg_genoprob(n_ind, n_pos, n_gen, genoprob, &Genoprob);
  allocate_alpha(n_pos, n_gen, &alpha);
  allocate_alpha(n_pos, n_gen, &beta);

  for(i=0; i<n_ind; i++) { /* i = individual */

    R_CheckUserInterrupt(); /* check for ^C */

    /* initialize alpha and beta */
    for(v=0; v<n_gen; v++) {
      alpha[v][0] = initf(v+1) + emitf(Geno[0][i], v+1, error_prob);
      beta[v][n_pos-1] = 0.0;
    }

    /* forward-backward equations */
    for(j=1,j2=n_pos-2; j<n_pos; j++, j2--) {
      
      for(v=0; v<n_gen; v++) {
	alpha[v][j] = alpha[0][j-1] + stepf(1, v+1, rf[j-1], rf2[j-1]);
	
	beta[v][j2] = beta[0][j2+1] + stepf(v+1,1,rf[j2], rf2[j2]) + 
	  emitf(Geno[j2+1][i],1,error_prob);

	for(v2=1; v2<n_gen; v2++) {
	  alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + 
			       stepf(v2+1,v+1,rf[j-1],rf2[j-1]));
	  beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + 
			       stepf(v+1,v2+1,rf[j2],rf2[j2]) +
			       emitf(Geno[j2+1][i],v2+1,error_prob));
	}

	alpha[v][j] += emitf(Geno[j][i],v+1,error_prob);
      }
    }

    /* calculate genotype probabilities */
    for(j=0; j<n_pos; j++) {
      s = Genoprob[0][j][i] = alpha[0][j] + beta[0][j];
      for(v=1; v<n_gen; v++) {
	Genoprob[v][j][i] = alpha[v][j] + beta[v][j];
	s = addlog(s, Genoprob[v][j][i]);
      }
      for(v=0; v<n_gen; v++) 
	Genoprob[v][j][i] = exp(Genoprob[v][j][i] - s);
    } 

    /* the following is the old version */
    /*    for(j=0; j<n_pos; j++) {
      s = 0.0;
      for(v=0; v<n_gen; v++) 
	s += (Genoprob[v][j][i] = exp(alpha[v][j] + beta[v][j]));

      for(v=0; v<n_gen; v++) 
	Genoprob[v][j][i] /= s;
	} */


  } /* loop over individuals */
  

}
Exemplo n.º 9
0
void argmax_geno(int n_ind, int n_pos, int n_gen, int *geno, 
		 double *rf, double *rf2, 
		 double error_prob, int *argmax, 
		 double initf(int), 
		 double emitf(int, int, double),
		 double stepf(int, int, double, double)) 
{
  int i, j, v, v2;
  double s, t, *gamma, *tempgamma, *tempgamma2;
  int **Geno, **Argmax, **traceback;
  
  /* Read R's random seed */
  /* in the case of multiple "most likely" genotype sequences, 
     we pick from them at random */
  GetRNGstate();

  /* allocate space and 
     reorganize geno and argmax */
  reorg_geno(n_ind, n_pos, geno, &Geno);
  reorg_geno(n_ind, n_pos, argmax, &Argmax);
  allocate_imatrix(n_pos, n_gen, &traceback);
  allocate_double(n_gen, &gamma);
  allocate_double(n_gen, &tempgamma);
  allocate_double(n_gen, &tempgamma2);

  for(i=0; i<n_ind; i++) { /* i = individual */

    R_CheckUserInterrupt(); /* check for ^C */

    /* begin viterbi algorithm */
    if(n_pos > 1) { /* multiple markers */
      for(v=0; v<n_gen; v++) 
	gamma[v] = initf(v+1) + emitf(Geno[0][i], v+1, error_prob);
    
      for(j=0; j<n_pos-1; j++) {
	for(v=0; v<n_gen; v++) {
	  tempgamma[v] = s = gamma[0] + stepf(1, v+1, rf[j], rf2[j]);
	  traceback[j][v] = 0;
	  
	  for(v2=1; v2<n_gen; v2++) {
	    t = gamma[v2] + stepf(v2+1, v+1, rf[j], rf2[j]);
	    if(t > s || (fabs(t-s) < TOL && unif_rand() < 0.5)) {
	      tempgamma[v] = s = t;
	      traceback[j][v] = v2;
	    }
	  }
	  tempgamma2[v] = tempgamma[v] + emitf(Geno[j+1][i], v+1, error_prob);
	}
	for(v=0; v<n_gen; v++) gamma[v] = tempgamma2[v];
      }
    
      /* finish off viterbi and then traceback to get most 
	 likely sequence of genotypes */
      Argmax[n_pos-1][i] = 0;
      s = gamma[0];
      for(v=1; v<n_gen; v++) {
	if(gamma[v] > s || (fabs(gamma[v]-s) < TOL && 
			    unif_rand() < 0.5)) {
	  s = gamma[v];
	  Argmax[n_pos-1][i] = v;
	}
      }
      for(j=n_pos-2; j >= 0; j--) 
	Argmax[j][i] = traceback[j][Argmax[j+1][i]];
    }
    else {  /* for exactly one marker */
      s = initf(1) + emitf(Geno[0][i], 1, error_prob);
      Argmax[0][i] = 0;
      for(v=1; v<n_gen; v++) {
	t = initf(v+1)+emitf(Geno[0][i], v+1, error_prob);
	if(t > s || (fabs(t-s) < TOL && unif_rand() < 0.5)) {
	  s = t;
	  Argmax[0][i] = v;
	}
      }
    }
    
    /* code genotypes as 1, 2, ... */
    for(j=0; j<n_pos; j++) Argmax[j][i]++;
    
  } /* loop over individuals */
  
  
  /* write R's random seed */
  PutRNGstate();
}
Exemplo n.º 10
0
void est_map(int n_ind, int n_mar, int n_gen, int *geno, double *rf, 
	     double *rf2, double error_prob, double initf(int), 
	     double emitf(int, int, double),
	     double stepf(int, int, double, double), 
	     double nrecf1(int, int), double nrecf2(int, int), 
	     double *loglik, int maxit, double tol, int sexsp, 
	     int verbose)
{
  int i, j, j2, v, v2, it, flag=0, **Geno, ndigits;
  double s, **alpha, **beta, **gamma, *cur_rf, *cur_rf2;
  double curloglik, maxdif, temp;
  char pattern[100], text[200];
  
  /* allocate space for beta and reorganize geno */
  reorg_geno(n_ind, n_mar, geno, &Geno);
  allocate_alpha(n_mar, n_gen, &alpha);
  allocate_alpha(n_mar, n_gen, &beta);
  allocate_dmatrix(n_gen, n_gen, &gamma);
  allocate_double(n_mar-1, &cur_rf);
  allocate_double(n_mar-1, &cur_rf2);

  /* digits in verbose output */
  if(verbose) {
    ndigits = (int)ceil(-log10(tol));
    if(ndigits > 16) ndigits=16;
    sprintf(pattern, "%s%d.%df", "%", ndigits+3, ndigits+1);
  }

  /* begin EM algorithm */
  for(it=0; it<maxit; it++) {

    for(j=0; j<n_mar-1; j++) {
      cur_rf[j] = cur_rf2[j] = rf[j];
      rf[j] = 0.0;
      if(sexsp) {
	cur_rf2[j] = rf2[j];
	rf2[j] = 0.0;
      }
    }

    for(i=0; i<n_ind; i++) { /* i = individual */

      R_CheckUserInterrupt(); /* check for ^C */

      /* initialize alpha and beta */
      for(v=0; v<n_gen; v++) {
	alpha[v][0] = initf(v+1) + emitf(Geno[0][i], v+1, error_prob);
	beta[v][n_mar-1] = 0.0;
      }

      /* forward-backward equations */
      for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) {
	
	for(v=0; v<n_gen; v++) {
	  alpha[v][j] = alpha[0][j-1] + stepf(1, v+1, cur_rf[j-1], cur_rf2[j-1]);
	  
	  beta[v][j2] = beta[0][j2+1] + stepf(v+1,1,cur_rf[j2], cur_rf2[j2]) + 
	    emitf(Geno[j2+1][i],1,error_prob);
	  
	  for(v2=1; v2<n_gen; v2++) {
	    alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + 
				 stepf(v2+1,v+1,cur_rf[j-1],cur_rf2[j-1]));
	    beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + 
				 stepf(v+1,v2+1,cur_rf[j2],cur_rf2[j2]) +
				 emitf(Geno[j2+1][i],v2+1,error_prob));
	  }
	  
	  alpha[v][j] += emitf(Geno[j][i],v+1,error_prob);
		 
	}

      }

      for(j=0; j<n_mar-1; j++) {

	/* calculate gamma = log Pr(v1, v2, O) */
	for(v=0, s=0.0; v<n_gen; v++) {
	  for(v2=0; v2<n_gen; v2++) {
	    gamma[v][v2] = alpha[v][j] + beta[v2][j+1] + 
	      emitf(Geno[j+1][i], v2+1, error_prob) +
	      stepf(v+1, v2+1, cur_rf[j], cur_rf2[j]);

	    if(v==0 && v2==0) s = gamma[v][v2];
	    else s = addlog(s, gamma[v][v2]);
	  }
	}

	for(v=0; v<n_gen; v++) {
	  for(v2=0; v2<n_gen; v2++) {
	    rf[j] += nrecf1(v+1,v2+1) * exp(gamma[v][v2] - s);
	    if(sexsp) rf2[j] += nrecf2(v+1,v2+1) * exp(gamma[v][v2] - s);
	  }
	}
      }

    } /* loop over individuals */

    /* rescale */
    for(j=0; j<n_mar-1; j++) {
      rf[j] /= (double)n_ind;
      if(rf[j] < tol/1000.0) rf[j] = tol/1000.0;
      else if(rf[j] > 0.5-tol/1000.0) rf[j] = 0.5-tol/1000.0;
      
      if(sexsp) {
	rf2[j] /= (double)n_ind;
	if(rf2[j] < tol/1000.0) rf2[j] = tol/1000.0;
	else if(rf2[j] > 0.5-tol/1000.0) rf2[j] = 0.5-tol/1000.0;
      }
      else rf2[j] = rf[j];
    }

    if(verbose>1) {
      /* print estimates as we go along*/
      Rprintf("   %4d ", it+1);
      maxdif=0.0;
      for(j=0; j<n_mar-1; j++) {
	temp = fabs(rf[j] - cur_rf[j])/(cur_rf[j]+tol*100.0);
	if(maxdif < temp) maxdif = temp;
	if(sexsp) {
	  temp = fabs(rf2[j] - cur_rf2[j])/(cur_rf2[j]+tol*100.0);
	  if(maxdif < temp) maxdif = temp;
	}
      }
      sprintf(text, "%s%s\n", "  max rel've change = ", pattern);
      Rprintf(text, maxdif);
    }

    /* check convergence */
    for(j=0, flag=0; j<n_mar-1; j++) {
      if(fabs(rf[j] - cur_rf[j]) > tol*(cur_rf[j]+tol*100.0) || 
	 (sexsp && fabs(rf2[j] - cur_rf2[j]) > tol*(cur_rf2[j]+tol*100.0))) {
	flag = 1; 
	break;
      }
    }

    if(!flag) break;

  } /* end EM algorithm */
  
  if(flag) warning("Didn't converge!\n");

  /* calculate log likelihood */
  *loglik = 0.0;
  for(i=0; i<n_ind; i++) { /* i = individual */
    /* initialize alpha */
    for(v=0; v<n_gen; v++) 
      alpha[v][0] = initf(v+1) + emitf(Geno[0][i], v+1, error_prob);
    /* forward equations */
    for(j=1; j<n_mar; j++) {
      for(v=0; v<n_gen; v++) {
	alpha[v][j] = alpha[0][j-1] + 
	  stepf(1, v+1, rf[j-1], rf2[j-1]);
	for(v2=1; v2<n_gen; v2++) 
	  alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + 
			       stepf(v2+1,v+1,rf[j-1],rf2[j-1]));
	alpha[v][j] += emitf(Geno[j][i],v+1,error_prob);
      }
    }

    curloglik = alpha[0][n_mar-1];
    for(v=1; v<n_gen; v++) 
      curloglik = addlog(curloglik, alpha[v][n_mar-1]);
    *loglik += curloglik;
  }

  if(verbose) {
    if(verbose < 2) {
      /* print final estimates */
      Rprintf("  no. iterations = %d\n", it+1);
      maxdif=0.0;
      for(j=0; j<n_mar-1; j++) {
	temp = fabs(rf[j] - cur_rf[j])/(cur_rf[j]+tol*100.0);
	if(maxdif < temp) maxdif = temp;
	if(sexsp) {
	  temp = fabs(rf2[j] - cur_rf2[j])/(cur_rf2[j]+tol*100.0);
	  if(maxdif < temp) maxdif = temp;
	}
      }
      sprintf(text, "%s%s\n", "  max rel've change at last step = ", pattern);
      Rprintf(text, maxdif);
    }
    
    Rprintf("  loglik: %10.4lf\n\n", *loglik);
  }

}