Esempio n. 1
0
/**********************************************************************
 * 
 * sim_ril
 * 
 * n_chr   Number of chromosomes
 * n_mar   Number of markers on each chromosome (vector of length n_chr)
 * n_ril   Number of RILs to simulate
 * 
 * map     Vector of marker locations, of length sum(n_mar)
 *         First marker on each chromosome should be at 0.
 *
 * n_str   Number of parental strains (either 2, 4, or 8)
 *
 * m       Interference parameter (0 is no interference)
 * p       For Stahl model, proportion of chiasmata from the NI model
 *
 * include_x   Whether the last chromosome is the X chromosome
 *
 * random_cross  Indicates whether the order of the strains in the cross
 *               should be randomized.
 *
 * selfing If 1, use selfing; if 0, use sib mating
 *
 * cross   On output, the cross used for each line 
 *         (vector of length n_ril x n_str)
 *
 * ril     On output, the simulated data 
 *         (vector of length sum(n_mar) x n_ril)
 *
 * origgeno       Like ril, but with no missing data
 *
 * error_prob     Genotyping error probability (used nly with n_str==2)
 *
 * missing_prob   Rate of missing genotypes
 *
 * errors         Error indicators (n_mar x n_ril)
 *
 **********************************************************************/
void sim_ril(int n_chr, int *n_mar, int n_ril, double *map, 
	     int n_str, int m, double p, int include_x, 
	     int random_cross, int selfing, int *cross, int *ril,
	     int *origgeno, 
	     double error_prob, double missing_prob, int *errors)
{
  int i, j, k, ngen, tot_mar, curseg;
  struct individual par1, par2, kid1, kid2;
  int **Ril, **Cross, **Errors, **OrigGeno; 
  int maxwork, isX, flag, max_xo, *firstmarker;
  double **Map, maxlen, chrlen, *work;

 /* count total number of markers */
  for(i=0, tot_mar=0; i<n_chr; i++) 
    tot_mar += n_mar[i];

  reorg_geno(tot_mar, n_ril, ril, &Ril);
  reorg_geno(n_str, n_ril, cross, &Cross);
  reorg_geno(tot_mar, n_ril, errors, &Errors);
  reorg_geno(tot_mar, n_ril, origgeno, &OrigGeno);

  /* allocate space */
  Map = (double **)R_alloc(n_chr, sizeof(double *));
  Map[0] = map;
  for(i=1; i<n_chr; i++)
    Map[i] = Map[i-1] + n_mar[i-1];

  /* location of first marker */
  firstmarker = (int *)R_alloc(n_chr, sizeof(int));
  firstmarker[0] = 0;
  for(i=1; i<n_chr; i++) 
    firstmarker[i] = firstmarker[i-1] + n_mar[i-1];

  /* maximum chromosome length (in cM) */
  maxlen = Map[0][n_mar[0]-1];
  for(i=1; i<n_chr; i++)
    if(maxlen < Map[i][n_mar[i]-1])
      maxlen =  Map[i][n_mar[i]-1];

  /* allocate space for individuals */
  max_xo = (int)qpois(1e-10, maxlen/100.0, 0, 0)*6;
  if(!selfing) max_xo *= 5;
  allocate_individual(&par1, max_xo);
  allocate_individual(&par2, max_xo);
  allocate_individual(&kid1, max_xo);
  allocate_individual(&kid2, max_xo);
  maxwork = (int)qpois(1e-10, (m+1)*maxlen/50.0, 0, 0)*3;
  work = (double *)R_alloc(maxwork, sizeof(double));

  for(i=0; i<n_ril; i++) {

    /* set up cross */
    for(j=0; j<n_str; j++) Cross[i][j] = j+1;
    if(random_cross) int_permute(Cross[i], n_str);

    for(j=0; j<n_chr; j++) {
      isX = include_x && j==n_chr-1;

      chrlen = Map[j][n_mar[j]-1];

      par1.n_xo[0] = par1.n_xo[1] = par2.n_xo[0] = par2.n_xo[1] = 0;

      /* initial generations */
      if(n_str==2) {
	par1.allele[0][0] = par2.allele[0][0] = 1;
	par1.allele[1][0] = par2.allele[1][0] = 2;
      }
      else if(n_str==4) {
	par1.allele[0][0] = 1;
	par1.allele[1][0] = 2;
	par2.allele[0][0] = 3;
	par2.allele[1][0] = 4;
      }
      else { /* 8 strain case */
	par1.allele[0][0] = 1;
	par1.allele[1][0] = 2;
	par2.allele[0][0] = 3;
	par2.allele[1][0] = 4;

	docross(par1, par2, &kid1, chrlen, m, p, 0, 
	      &maxwork, &work);

	par1.allele[0][0] = 5;
	par1.allele[1][0] = 6;
	par2.allele[0][0] = 7;
	par2.allele[1][0] = 8;

	docross(par1, par2, &kid2, chrlen, m, p, isX,
	      &maxwork, &work);

	copy_individual(&kid1, &par1);
	copy_individual(&kid2, &par2);
      }

      /* start inbreeding */
      ngen=1;
      while(1) {
	R_CheckUserInterrupt(); /* check for ^C */

	docross(par1, par2, &kid1, chrlen, m, p, 0,
		&maxwork, &work);
	if(!selfing) 
	  docross(par1, par2, &kid2, chrlen, m, p, isX,
		  &maxwork, &work);

	/* are we done? */
	flag = 0;
	if(selfing) {
	  if(kid1.n_xo[0] == kid1.n_xo[1]) {
	    for(k=0; k<kid1.n_xo[0]; k++) {
	      if(kid1.allele[0][k] != kid1.allele[1][k] ||
		 fabs(kid1.xoloc[0][k] - kid1.xoloc[1][k]) > 1e-6) {
		flag = 1;
		break;
	      }
	    }
	    if(kid1.allele[0][kid1.n_xo[0]] != kid1.allele[1][kid1.n_xo[0]])
	      flag = 1;
	  }
	  else flag = 1;
	}
	else {
	  if(kid1.n_xo[0] == kid1.n_xo[1] && 
	     kid1.n_xo[0] == kid2.n_xo[0] && 
	     kid1.n_xo[0] == kid2.n_xo[1]) {
	    for(k=0; k<kid1.n_xo[0]; k++) {
	      if(kid1.allele[0][k] != kid1.allele[1][k] ||
		 kid1.allele[0][k] != kid2.allele[0][k] ||
		 kid1.allele[0][k] != kid2.allele[1][k] ||
		 fabs(kid1.xoloc[0][k] - kid1.xoloc[1][k]) > 1e-6 ||
		 fabs(kid1.xoloc[0][k] - kid2.xoloc[0][k]) > 1e-6 ||
		 fabs(kid1.xoloc[0][k] - kid2.xoloc[1][k]) > 1e-6) {
		flag = 1;
		break;
	      }
	    }
	    if(kid1.allele[0][kid1.n_xo[0]] != kid1.allele[1][kid1.n_xo[0]] ||
	       kid1.allele[0][kid1.n_xo[0]] != kid2.allele[0][kid1.n_xo[0]] ||
	       kid1.allele[0][kid1.n_xo[0]] != kid2.allele[1][kid1.n_xo[0]]) 
	      flag = 1;
	  }
	  else flag = 1;
	}

	if(!flag) break; /* done inbreeding */

	/* go to next generation */
	copy_individual(&kid1, &par1);
	if(selfing) copy_individual(&kid1, &par2);
	else copy_individual(&kid2, &par2);

      } /* end with inbreeding of this chromosome */

      /* fill in alleles */
      curseg = 0;
      for(k=0; k<n_mar[j]; k++) { /* loop over markers */
	while(curseg < kid1.n_xo[0] && Map[j][k] > kid1.xoloc[0][curseg]) 
	  curseg++;
	  
	OrigGeno[i][k+firstmarker[j]] = 
	  Ril[i][k+firstmarker[j]] = Cross[i][kid1.allele[0][curseg]-1];

	/* simulate missing ? */
	if(unif_rand() < missing_prob) {
	  Ril[i][k+firstmarker[j]] = 0;
	}
	else if(n_str == 2 && unif_rand() < error_prob) {
	  /* simulate error */
	  Ril[i][k+firstmarker[j]] = 3 - Ril[i][k+firstmarker[j]];
	  Errors[i][k+firstmarker[j]] = 1;
	}
      }

    } /* loop over chromosomes */

  } /* loop over lines */
}
Esempio n. 2
0
void mcmc_ms(int n_ind, int tot_mar,
             int *genotypes, double *phenotypes, double *xpx,
             int n_steps, int *n_qtl_id, int *qtl_id,
             double *neg_log_post, int *first_seen, int *index,
         int *indicate, double delta, int *n_qtl_id_list,
         double *post_list)
{
  int size, sizesqm1, err;
  int i, j, k, flag;
  double cur_neg_log_post, cur_rss, next_rss;
  double prob;
  double t1, t2, t3;

  size = tot_mar + 2;
  sizesqm1 = size * size - 1;

  /* calculate x'x matrix */
  calc_xpx(n_ind, size, genotypes, phenotypes, xpx);

  /* set up index for the initial sweep */
  j = 0;
  for(i=1; i <= tot_mar; i++) {
    if(indicate[i]) {
      index[j] = i;
      j++;
    }
  }

  /* sweep initial set of markers */
  sweep(xpx, size, index, j, &err);

  /* save rss for initial model */
  *neg_log_post = log(xpx[sizesqm1]) + (double)indicate[0] * delta /
    (double)(n_ind);
  /*  Rprintf("    %5d %2d %10.5lf %9.5lf %9.5lf\n", 0, indicate[0],
      xpx[sizesqm1], *neg_log_post, *neg_log_post);  */
  *n_qtl_id = indicate[0];
  *first_seen = 0;
  for(j=1, k=0; j<= tot_mar; j++) {
    if(indicate[j]) {
      qtl_id[k] = j;
      k++;
    }
  }
  post_list[0] = *neg_log_post;
  n_qtl_id_list[0] = indicate[0];

  /* index contains numbers 1, ..., tot_mar */
  for(j=0; j< tot_mar; j++) index[j] = j+1;

  /* now begin MCMC */
  for(i=1; i<= n_steps; i++) {

    /* permute list of markers */
    int_permute(index, tot_mar);

    for(j=0; j< tot_mar; j++) {
      /* calculate rss with and without this marker in the model */
      cur_rss = xpx[sizesqm1];
      sweep(xpx, size, index+j, 1, &err);
      next_rss = xpx[sizesqm1];

      /* calculate prob of keeping it or putting it into the model */
      t1 = -0.5 * (double)(n_ind) * log(next_rss);
      t2 = -0.5 * (double)(n_ind) * log(cur_rss);
      t3 = 0.5 * delta;
      if(indicate[index[j]])
    prob = 1.0/(1.0 + exp(t1 + t3 - t2));
      else
    prob = 1.0/(1.0 + exp(t2 + t3 - t1));

      /* take a bernoulli draw */
      if(unif_rand() < prob) { /* variable should be in model */
        /* var not yet in model */
        if(!indicate[index[j]]) {
          indicate[index[j]] = 1;
          indicate[0]++;
        }
        /* variable already in model */
        else {
          sweep(xpx, size, index+j, 1, &err);
        }
      }
      else { /* variable should not be in model */
        /* var currently in model */
        if(indicate[index[j]]) {
          indicate[index[j]] = 0;
          indicate[0]--;
        }
        /* variable not currently in model */
        else {
          sweep(xpx, size, index+j, 1, &err);
        }
      }
    }

    /* better than currest best model? */
    cur_neg_log_post = log(xpx[sizesqm1]) + (double)indicate[0] * delta /
      (double) n_ind;
    post_list[i] = cur_neg_log_post;
    n_qtl_id_list[i] = indicate[0];

    /*    Rprintf("    %5d %2d %10.5lf %9.5lf %9.5lf\n", i, indicate[0],
      xpx[sizesqm1], cur_neg_log_post, *neg_log_post);   */
    if(cur_neg_log_post < *neg_log_post) {  /* new model is an improvement */
      *neg_log_post = cur_neg_log_post;

      /* check that new model really is different */
      flag = 0;
      if(*n_qtl_id == indicate[0]) {
    for(j=1, k=0; j<= tot_mar; j++) {
      if(k < *n_qtl_id && qtl_id[k] == j) {
        if(!indicate[j]) {
          flag = 1;
          break;
        }
        k++;
      }
      else
        if(indicate[j]) {
          flag = 1;
          break;
        }
    }
      }
      else flag = 1;

      if(flag) {  /* new model really is different */
    *n_qtl_id = indicate[0];
    *first_seen = i;
    for(j=1, k=0; j<= tot_mar; j++) {
      if(indicate[j]) {
        qtl_id[k] = j;
        k++;
      }
    }
      }

    }
  }
  /*  Rprintf("\t%d\n", *n_qtl_id); */
}