/********************************************************************** * * sim_ril * * n_chr Number of chromosomes * n_mar Number of markers on each chromosome (vector of length n_chr) * n_ril Number of RILs to simulate * * map Vector of marker locations, of length sum(n_mar) * First marker on each chromosome should be at 0. * * n_str Number of parental strains (either 2, 4, or 8) * * m Interference parameter (0 is no interference) * p For Stahl model, proportion of chiasmata from the NI model * * include_x Whether the last chromosome is the X chromosome * * random_cross Indicates whether the order of the strains in the cross * should be randomized. * * selfing If 1, use selfing; if 0, use sib mating * * cross On output, the cross used for each line * (vector of length n_ril x n_str) * * ril On output, the simulated data * (vector of length sum(n_mar) x n_ril) * * origgeno Like ril, but with no missing data * * error_prob Genotyping error probability (used nly with n_str==2) * * missing_prob Rate of missing genotypes * * errors Error indicators (n_mar x n_ril) * **********************************************************************/ void sim_ril(int n_chr, int *n_mar, int n_ril, double *map, int n_str, int m, double p, int include_x, int random_cross, int selfing, int *cross, int *ril, int *origgeno, double error_prob, double missing_prob, int *errors) { int i, j, k, ngen, tot_mar, curseg; struct individual par1, par2, kid1, kid2; int **Ril, **Cross, **Errors, **OrigGeno; int maxwork, isX, flag, max_xo, *firstmarker; double **Map, maxlen, chrlen, *work; /* count total number of markers */ for(i=0, tot_mar=0; i<n_chr; i++) tot_mar += n_mar[i]; reorg_geno(tot_mar, n_ril, ril, &Ril); reorg_geno(n_str, n_ril, cross, &Cross); reorg_geno(tot_mar, n_ril, errors, &Errors); reorg_geno(tot_mar, n_ril, origgeno, &OrigGeno); /* allocate space */ Map = (double **)R_alloc(n_chr, sizeof(double *)); Map[0] = map; for(i=1; i<n_chr; i++) Map[i] = Map[i-1] + n_mar[i-1]; /* location of first marker */ firstmarker = (int *)R_alloc(n_chr, sizeof(int)); firstmarker[0] = 0; for(i=1; i<n_chr; i++) firstmarker[i] = firstmarker[i-1] + n_mar[i-1]; /* maximum chromosome length (in cM) */ maxlen = Map[0][n_mar[0]-1]; for(i=1; i<n_chr; i++) if(maxlen < Map[i][n_mar[i]-1]) maxlen = Map[i][n_mar[i]-1]; /* allocate space for individuals */ max_xo = (int)qpois(1e-10, maxlen/100.0, 0, 0)*6; if(!selfing) max_xo *= 5; allocate_individual(&par1, max_xo); allocate_individual(&par2, max_xo); allocate_individual(&kid1, max_xo); allocate_individual(&kid2, max_xo); maxwork = (int)qpois(1e-10, (m+1)*maxlen/50.0, 0, 0)*3; work = (double *)R_alloc(maxwork, sizeof(double)); for(i=0; i<n_ril; i++) { /* set up cross */ for(j=0; j<n_str; j++) Cross[i][j] = j+1; if(random_cross) int_permute(Cross[i], n_str); for(j=0; j<n_chr; j++) { isX = include_x && j==n_chr-1; chrlen = Map[j][n_mar[j]-1]; par1.n_xo[0] = par1.n_xo[1] = par2.n_xo[0] = par2.n_xo[1] = 0; /* initial generations */ if(n_str==2) { par1.allele[0][0] = par2.allele[0][0] = 1; par1.allele[1][0] = par2.allele[1][0] = 2; } else if(n_str==4) { par1.allele[0][0] = 1; par1.allele[1][0] = 2; par2.allele[0][0] = 3; par2.allele[1][0] = 4; } else { /* 8 strain case */ par1.allele[0][0] = 1; par1.allele[1][0] = 2; par2.allele[0][0] = 3; par2.allele[1][0] = 4; docross(par1, par2, &kid1, chrlen, m, p, 0, &maxwork, &work); par1.allele[0][0] = 5; par1.allele[1][0] = 6; par2.allele[0][0] = 7; par2.allele[1][0] = 8; docross(par1, par2, &kid2, chrlen, m, p, isX, &maxwork, &work); copy_individual(&kid1, &par1); copy_individual(&kid2, &par2); } /* start inbreeding */ ngen=1; while(1) { R_CheckUserInterrupt(); /* check for ^C */ docross(par1, par2, &kid1, chrlen, m, p, 0, &maxwork, &work); if(!selfing) docross(par1, par2, &kid2, chrlen, m, p, isX, &maxwork, &work); /* are we done? */ flag = 0; if(selfing) { if(kid1.n_xo[0] == kid1.n_xo[1]) { for(k=0; k<kid1.n_xo[0]; k++) { if(kid1.allele[0][k] != kid1.allele[1][k] || fabs(kid1.xoloc[0][k] - kid1.xoloc[1][k]) > 1e-6) { flag = 1; break; } } if(kid1.allele[0][kid1.n_xo[0]] != kid1.allele[1][kid1.n_xo[0]]) flag = 1; } else flag = 1; } else { if(kid1.n_xo[0] == kid1.n_xo[1] && kid1.n_xo[0] == kid2.n_xo[0] && kid1.n_xo[0] == kid2.n_xo[1]) { for(k=0; k<kid1.n_xo[0]; k++) { if(kid1.allele[0][k] != kid1.allele[1][k] || kid1.allele[0][k] != kid2.allele[0][k] || kid1.allele[0][k] != kid2.allele[1][k] || fabs(kid1.xoloc[0][k] - kid1.xoloc[1][k]) > 1e-6 || fabs(kid1.xoloc[0][k] - kid2.xoloc[0][k]) > 1e-6 || fabs(kid1.xoloc[0][k] - kid2.xoloc[1][k]) > 1e-6) { flag = 1; break; } } if(kid1.allele[0][kid1.n_xo[0]] != kid1.allele[1][kid1.n_xo[0]] || kid1.allele[0][kid1.n_xo[0]] != kid2.allele[0][kid1.n_xo[0]] || kid1.allele[0][kid1.n_xo[0]] != kid2.allele[1][kid1.n_xo[0]]) flag = 1; } else flag = 1; } if(!flag) break; /* done inbreeding */ /* go to next generation */ copy_individual(&kid1, &par1); if(selfing) copy_individual(&kid1, &par2); else copy_individual(&kid2, &par2); } /* end with inbreeding of this chromosome */ /* fill in alleles */ curseg = 0; for(k=0; k<n_mar[j]; k++) { /* loop over markers */ while(curseg < kid1.n_xo[0] && Map[j][k] > kid1.xoloc[0][curseg]) curseg++; OrigGeno[i][k+firstmarker[j]] = Ril[i][k+firstmarker[j]] = Cross[i][kid1.allele[0][curseg]-1]; /* simulate missing ? */ if(unif_rand() < missing_prob) { Ril[i][k+firstmarker[j]] = 0; } else if(n_str == 2 && unif_rand() < error_prob) { /* simulate error */ Ril[i][k+firstmarker[j]] = 3 - Ril[i][k+firstmarker[j]]; Errors[i][k+firstmarker[j]] = 1; } } } /* loop over chromosomes */ } /* loop over lines */ }
void mcmc_ms(int n_ind, int tot_mar, int *genotypes, double *phenotypes, double *xpx, int n_steps, int *n_qtl_id, int *qtl_id, double *neg_log_post, int *first_seen, int *index, int *indicate, double delta, int *n_qtl_id_list, double *post_list) { int size, sizesqm1, err; int i, j, k, flag; double cur_neg_log_post, cur_rss, next_rss; double prob; double t1, t2, t3; size = tot_mar + 2; sizesqm1 = size * size - 1; /* calculate x'x matrix */ calc_xpx(n_ind, size, genotypes, phenotypes, xpx); /* set up index for the initial sweep */ j = 0; for(i=1; i <= tot_mar; i++) { if(indicate[i]) { index[j] = i; j++; } } /* sweep initial set of markers */ sweep(xpx, size, index, j, &err); /* save rss for initial model */ *neg_log_post = log(xpx[sizesqm1]) + (double)indicate[0] * delta / (double)(n_ind); /* Rprintf(" %5d %2d %10.5lf %9.5lf %9.5lf\n", 0, indicate[0], xpx[sizesqm1], *neg_log_post, *neg_log_post); */ *n_qtl_id = indicate[0]; *first_seen = 0; for(j=1, k=0; j<= tot_mar; j++) { if(indicate[j]) { qtl_id[k] = j; k++; } } post_list[0] = *neg_log_post; n_qtl_id_list[0] = indicate[0]; /* index contains numbers 1, ..., tot_mar */ for(j=0; j< tot_mar; j++) index[j] = j+1; /* now begin MCMC */ for(i=1; i<= n_steps; i++) { /* permute list of markers */ int_permute(index, tot_mar); for(j=0; j< tot_mar; j++) { /* calculate rss with and without this marker in the model */ cur_rss = xpx[sizesqm1]; sweep(xpx, size, index+j, 1, &err); next_rss = xpx[sizesqm1]; /* calculate prob of keeping it or putting it into the model */ t1 = -0.5 * (double)(n_ind) * log(next_rss); t2 = -0.5 * (double)(n_ind) * log(cur_rss); t3 = 0.5 * delta; if(indicate[index[j]]) prob = 1.0/(1.0 + exp(t1 + t3 - t2)); else prob = 1.0/(1.0 + exp(t2 + t3 - t1)); /* take a bernoulli draw */ if(unif_rand() < prob) { /* variable should be in model */ /* var not yet in model */ if(!indicate[index[j]]) { indicate[index[j]] = 1; indicate[0]++; } /* variable already in model */ else { sweep(xpx, size, index+j, 1, &err); } } else { /* variable should not be in model */ /* var currently in model */ if(indicate[index[j]]) { indicate[index[j]] = 0; indicate[0]--; } /* variable not currently in model */ else { sweep(xpx, size, index+j, 1, &err); } } } /* better than currest best model? */ cur_neg_log_post = log(xpx[sizesqm1]) + (double)indicate[0] * delta / (double) n_ind; post_list[i] = cur_neg_log_post; n_qtl_id_list[i] = indicate[0]; /* Rprintf(" %5d %2d %10.5lf %9.5lf %9.5lf\n", i, indicate[0], xpx[sizesqm1], cur_neg_log_post, *neg_log_post); */ if(cur_neg_log_post < *neg_log_post) { /* new model is an improvement */ *neg_log_post = cur_neg_log_post; /* check that new model really is different */ flag = 0; if(*n_qtl_id == indicate[0]) { for(j=1, k=0; j<= tot_mar; j++) { if(k < *n_qtl_id && qtl_id[k] == j) { if(!indicate[j]) { flag = 1; break; } k++; } else if(indicate[j]) { flag = 1; break; } } } else flag = 1; if(flag) { /* new model really is different */ *n_qtl_id = indicate[0]; *first_seen = i; for(j=1, k=0; j<= tot_mar; j++) { if(indicate[j]) { qtl_id[k] = j; k++; } } } } } /* Rprintf("\t%d\n", *n_qtl_id); */ }