/********************************************************************** * step_bci * * Calculate transition probabilities (for all intervals) for * the Stahl model **********************************************************************/ void step_bci(int n_mar, int n_states, double ***tm, double *d, int m, double p, int maxit, double tol) { int i, v1, v2; double *the_distinct_tm; double *fms_bci_result; double lambda1, lambda2, rfp; allocate_double(2*m+1, &fms_bci_result); allocate_double(3*m+2, &the_distinct_tm); for(i=0; i<n_mar-1; i++) { R_CheckUserInterrupt(); /* check for ^C */ lambda1 = d[i]*(1-p)*(double)(m+1)*2.0; lambda2 = d[i]*p*2.0; rfp = 0.5*(1.0 - exp(-lambda2)); fms_bci(lambda1, fms_bci_result, m, tol, maxit); distinct_tm_bci(lambda1, the_distinct_tm, m, fms_bci_result); for(v1=0; v1<n_states; v1++) { for(v2=0; v2<n_states; v2++) { tm[v1][v2][i] = tm_bci(v1, v2, the_distinct_tm, m); if(p > 0) tm[v1][v2][i] = (1.0-rfp)*tm[v1][v2][i] + rfp*tm_bci(v1, (v2+m+1) % (2*m+2), the_distinct_tm, m); tm[v1][v2][i] = log(tm[v1][v2][i]); } } } }
void R_discan_mr(int *n_ind, int *n_pos, int *n_gen, int *geno, int *pheno, double *result) { int **Geno; double *means; reorg_geno(*n_ind, *n_pos, geno, &Geno); allocate_double(*n_gen, &means); discan_mr(*n_ind, *n_pos, *n_gen, Geno, pheno, result, means); }
void R_discan_im(int *n_ind, int *n_pos, int *n_gen, double *genoprob, int *pheno, double *result, int *maxit, double *tol) { double ***Genoprob, **work; double *means; reorg_genoprob(*n_ind, *n_pos, *n_gen, genoprob, &Genoprob); allocate_dmatrix(3, *n_gen, &work); allocate_double(*n_gen, &means); discan_im(*n_ind, *n_pos, *n_gen, Genoprob, pheno, result, *maxit, *tol, work, means); }
void calc_errorlod(int n_ind, int n_mar, int n_gen, int *geno, double error_prob, double *genoprob, double *errlod, double errorlod(int, double *, double)) { int i, j, k, **Geno; double *p, ***Genoprob, **Errlod; /* reorganize geno, genoprob and errlod */ reorg_geno(n_ind, n_mar, geno, &Geno); reorg_genoprob(n_ind, n_mar, n_gen, genoprob, &Genoprob); reorg_errlod(n_ind, n_mar, errlod, &Errlod); allocate_double(n_gen, &p); for(i=0; i<n_ind; i++) { R_CheckUserInterrupt(); /* check for ^C */ for(j=0; j<n_mar; j++) { for(k=0; k<n_gen; k++) p[k] = Genoprob[k][j][i]; Errlod[j][i] = errorlod(Geno[j][i], p, error_prob); } } }
void est_map_bci(int n_ind, int n_mar, int *geno, double *d, int m, double p, double error_prob, double *loglik, int maxit, double tol, int verbose) { int i, j, j2, v, v2, it, flag=0, **Geno, n_states; double s, **alpha, **beta, **gamma, *cur_d, *rf; double ***tm, *temp; double curloglik; double initprob; int ndigits; double maxdif, tempdif; char pattern[100], text[200]; n_states = 2*(m+1); initprob = -log((double)n_states); /* allocate space for beta and reorganize geno */ reorg_geno(n_ind, n_mar, geno, &Geno); allocate_alpha(n_mar, n_states, &alpha); allocate_alpha(n_mar, n_states, &beta); allocate_dmatrix(n_states, n_states, &gamma); allocate_double(n_mar-1, &cur_d); allocate_double(n_mar-1, &rf); /* allocate space for the transition matrices */ /* size n_states x n_states x (n_mar-1) */ /* tm[state1][state2][interval] */ tm = (double ***)R_alloc(n_states, sizeof(double **)); tm[0] = (double **)R_alloc(n_states * n_states, sizeof(double *)); for(i=1; i<n_states; i++) tm[i] = tm[i-1] + n_states; tm[0][0] = (double *)R_alloc(n_states * n_states * (n_mar - 1), sizeof(double)); temp = tm[0][0]; for(i=0; i < n_states; i++) { for(j=0; j < n_states; j++) { tm[i][j] = temp; temp += n_mar-1; } } /* digits in verbose output */ if(verbose) { ndigits = (int)ceil(-log10(tol)); if(ndigits > 16) ndigits=16; sprintf(pattern, "%s%d.%df", "%", ndigits+3, ndigits+1); } for(j=0; j<n_mar-1; j++) d[j] /= 100.0; /* convert to Morgans */ /* begin EM algorithm */ for(it=0; it<maxit; it++) { for(j=0; j<n_mar-1; j++) { cur_d[j] = d[j]; rf[j] = 0.0; } /* calculate the transition matrices */ step_bci(n_mar, n_states, tm, cur_d, m, p, maxit, tol); for(i=0; i<n_ind; i++) { /* i = individual */ R_CheckUserInterrupt(); /* check for ^C */ /* initialize alpha and beta */ for(v=0; v<n_states; v++) { alpha[v][0] = initprob + emit_bci(Geno[0][i], v, error_prob, m); beta[v][n_mar-1] = 0.0; } /* forward-backward equations */ for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) { for(v=0; v<n_states; v++) { alpha[v][j] = alpha[0][j-1] + tm[0][v][j-1]; beta[v][j2] = beta[0][j2+1] + tm[v][0][j2] + emit_bci(Geno[j2+1][i], 0, error_prob, m); for(v2=1; v2<n_states; v2++) { alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + tm[v2][v][j-1]); beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + tm[v][v2][j2] + emit_bci(Geno[j2+1][i], v2, error_prob, m)); } alpha[v][j] += emit_bci(Geno[j][i], v, error_prob, m); } } for(j=0; j<n_mar-1; j++) { /* calculate gamma = log Pr(v1, v2, O) */ for(v=0, s=0.0; v<n_states; v++) { for(v2=0; v2<n_states; v2++) { gamma[v][v2] = alpha[v][j] + beta[v2][j+1] + emit_bci(Geno[j+1][i], v2, error_prob, m) + tm[v][v2][j]; if(v==0 && v2==0) s = gamma[v][v2]; else s = addlog(s, gamma[v][v2]); } } for(v=0; v<n_states; v++) { for(v2=0; v2<n_states; v2++) { rf[j] += nrec_bci(v, v2, m) * exp(gamma[v][v2] - s); } } } } /* loop over individuals */ /* rescale */ for(j=0; j<n_mar-1; j++) { rf[j] /= (double)n_ind; /* if(rf[j] < tol/100.0) rf[j] = tol/100.0; else if(rf[j] > 0.5-tol/100.0) rf[j] = 0.5-tol/100.0; */ } /* use map function to convert back to distances */ for(j=0; j<n_mar-1; j++) d[j] = imf_stahl(rf[j], m, p, 1e-10, 1000); if(verbose>1) { /* print estimates as we go along*/ Rprintf(" %4d ", it+1); maxdif=0.0; for(j=0; j<n_mar-1; j++) { tempdif = fabs(d[j] - cur_d[j])/(cur_d[j]+tol*100.0); if(maxdif < tempdif) maxdif = tempdif; } sprintf(text, "%s%s\n", " max rel've change = ", pattern); Rprintf(text, maxdif); } /* check convergence */ for(j=0, flag=0; j<n_mar-1; j++) { if(fabs(d[j] - cur_d[j]) > tol*(cur_d[j]+tol*100.0)) { flag = 1; break; } } if(!flag) break; } /* end EM algorithm */ if(flag) warning("Didn't converge!\n"); /* re-calculate transition matrices */ step_bci(n_mar, n_states, tm, d, m, p, maxit, tol); /* calculate log likelihood */ *loglik = 0.0; for(i=0; i<n_ind; i++) { /* i = individual */ R_CheckUserInterrupt(); /* check for ^C */ /* initialize alpha */ for(v=0; v<n_states; v++) alpha[v][0] = initprob + emit_bci(Geno[0][i], v, error_prob, m); /* forward equations */ for(j=1; j<n_mar; j++) { for(v=0; v<n_states; v++) { alpha[v][j] = alpha[0][j-1] + tm[0][v][j-1]; for(v2=1; v2<n_states; v2++) alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + tm[v2][v][j-1]); alpha[v][j] += emit_bci(Geno[j][i], v, error_prob, m); } } curloglik = alpha[0][n_mar-1]; for(v=1; v<n_states; v++) curloglik = addlog(curloglik, alpha[v][n_mar-1]); *loglik += curloglik; } if(verbose) { if(verbose < 2) { /* print final estimates */ Rprintf(" no. iterations = %d\n", it+1); maxdif=0.0; for(j=0; j<n_mar-1; j++) { tempdif = fabs(d[j] - cur_d[j])/(cur_d[j]+tol*100.0); if(maxdif < tempdif) maxdif = tempdif; } sprintf(text, "%s%s\n", " max rel've change at last step = ", pattern); Rprintf(text, maxdif); } Rprintf(" loglik: %10.4lf\n\n", *loglik); } /* convert distances back to cM */ for(j=0; j<n_mar-1; j++) d[j] *= 100.0; }
void est_map_f2i(int n_ind, int n_mar, int *geno, double *d, int m, double p, double error_prob, double *loglik, int maxit, double tol, int verbose) { int i, j, j2, v, v2, it, flag=0, **Geno, n_states, n_bcstates; double s, **alpha, **beta, **gamma, *cur_d, *rf; double ***tm, *temp; double curloglik; double initprob; n_bcstates = 2*(m+1); n_states = n_bcstates*n_bcstates; initprob = -log((double)n_states); /* allocate space for beta and reorganize geno */ reorg_geno(n_ind, n_mar, geno, &Geno); allocate_alpha(n_mar, n_states, &alpha); allocate_alpha(n_mar, n_states, &beta); allocate_dmatrix(n_states, n_states, &gamma); allocate_double(n_mar-1, &cur_d); allocate_double(n_mar-1, &rf); /* allocate space for the [backcross] transition matrices */ /* size n_states x n_states x (n_mar-1) */ /* tm[state1][state2][interval] */ tm = (double ***)R_alloc(n_bcstates, sizeof(double **)); tm[0] = (double **)R_alloc(n_bcstates * n_bcstates, sizeof(double *)); for(i=1; i<n_bcstates; i++) tm[i] = tm[i-1] + n_bcstates; tm[0][0] = (double *)R_alloc(n_bcstates * n_bcstates * (n_mar - 1), sizeof(double)); temp = tm[0][0]; for(i=0; i < n_bcstates; i++) { for(j=0; j < n_bcstates; j++) { tm[i][j] = temp; temp += n_mar-1; } } if(verbose) { /* print initial estimates */ Rprintf(" "); for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]); Rprintf("\n"); } for(j=0; j<n_mar-1; j++) d[j] /= 100.0; /* convert to Morgans */ /* begin EM algorithm */ for(it=0; it<maxit; it++) { for(j=0; j<n_mar-1; j++) { cur_d[j] = d[j]; rf[j] = 0.0; } /* calculate the transition matrices [for BC] */ step_bci(n_mar, n_bcstates, tm, cur_d, m, p, maxit, tol); for(i=0; i<n_ind; i++) { /* i = individual */ R_CheckUserInterrupt(); /* check for ^C */ /* initialize alpha and beta */ for(v=0; v<n_states; v++) { alpha[v][0] = initprob + emit_f2i(Geno[0][i], v, error_prob, m, n_bcstates); beta[v][n_mar-1] = 0.0; } /* forward-backward equations */ for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) { for(v=0; v<n_states; v++) { alpha[v][j] = alpha[0][j-1] + step_f2i(0, v, j-1, tm, n_bcstates); beta[v][j2] = beta[0][j2+1] + step_f2i(v, 0, j2, tm, n_bcstates) + emit_f2i(Geno[j2+1][i], 0, error_prob, m, n_bcstates); for(v2=1; v2<n_states; v2++) { alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + step_f2i(v2, v, j-1, tm, n_bcstates)); beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + step_f2i(v, v2, j2, tm, n_bcstates) + emit_f2i(Geno[j2+1][i], v2, error_prob, m, n_bcstates)); } alpha[v][j] += emit_f2i(Geno[j][i], v, error_prob, m, n_bcstates); } } for(j=0; j<n_mar-1; j++) { /* calculate gamma = log Pr(v1, v2, O) */ for(v=0, s=0.0; v<n_states; v++) { for(v2=0; v2<n_states; v2++) { gamma[v][v2] = alpha[v][j] + beta[v2][j+1] + emit_f2i(Geno[j+1][i], v2, error_prob, m, n_bcstates) + step_f2i(v, v2, j, tm, n_bcstates); if(v==0 && v2==0) s = gamma[v][v2]; else s = addlog(s, gamma[v][v2]); } } for(v=0; v<n_states; v++) { for(v2=0; v2<n_states; v2++) { rf[j] += nrec_f2i(v, v2, m, n_bcstates) * exp(gamma[v][v2] - s); } } } } /* loop over individuals */ /* rescale */ for(j=0; j<n_mar-1; j++) { rf[j] /= (double)n_ind; /* if(rf[j] < tol/100.0) rf[j] = tol/100.0; else if(rf[j] > 0.5-tol/100.0) rf[j] = 0.5-tol/100.0; */ } /* use map function to convert back to distances */ for(j=0; j<n_mar-1; j++) d[j] = imf_stahl(rf[j], m, p, 1e-10, 1000); if(verbose > 1) { /* print some debugging stuff */ if(verbose == 2) Rprintf("Iteration"); Rprintf(" %4d ", it+1); if(verbose > 2) for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]*100.0); Rprintf("\n"); } /* check convergence */ for(j=0, flag=0; j<n_mar-1; j++) { if(fabs(d[j] - cur_d[j]) > tol*(cur_d[j]+tol*100.0)) { flag = 1; break; } } if(!flag) break; } /* end EM algorithm */ if(flag) warning("Didn't converge!\n"); /* re-calculate transition matrices */ step_bci(n_mar, n_bcstates, tm, d, m, p, maxit, tol); /* calculate log likelihood */ *loglik = 0.0; for(i=0; i<n_ind; i++) { /* i = individual */ /* initialize alpha */ for(v=0; v<n_states; v++) alpha[v][0] = initprob + emit_f2i(Geno[0][i], v, error_prob, m, n_bcstates); /* forward equations */ for(j=1; j<n_mar; j++) { for(v=0; v<n_states; v++) { alpha[v][j] = alpha[0][j-1] + step_f2i(0, v, j-1, tm, n_bcstates); for(v2=1; v2<n_states; v2++) alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + step_f2i(v2, v, j-1, tm, n_bcstates)); alpha[v][j] += emit_f2i(Geno[j][i], v, error_prob, m, n_bcstates); } } curloglik = alpha[0][n_mar-1]; for(v=1; v<n_states; v++) curloglik = addlog(curloglik, alpha[v][n_mar-1]); *loglik += curloglik; } /* convert distances back to cM */ for(j=0; j<n_mar-1; j++) d[j] *= 100.0; if(verbose) { /* print final estimates */ Rprintf(" %4d ", it+1); for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]); Rprintf("\n"); Rprintf("loglik: %10.4lf\n\n", *loglik); } }
void argmax_geno(int n_ind, int n_pos, int n_gen, int *geno, double *rf, double *rf2, double error_prob, int *argmax, double initf(int, int *), double emitf(int, int, double, int *), double stepf(int, int, double, double, int *)) { int i, j, v, v2; double s, t, *gamma, *tempgamma, *tempgamma2; int **Geno, **Argmax, **traceback; int cross_scheme[2]; /* cross scheme hidden in argmax argument; used by hmm_bcsft */ cross_scheme[0] = argmax[0]; cross_scheme[1] = argmax[1]; argmax[0] = geno[0]; argmax[1] = geno[1]; /* Read R's random seed */ /* in the case of multiple "most likely" genotype sequences, we pick from them at random */ GetRNGstate(); /* allocate space and reorganize geno and argmax */ reorg_geno(n_ind, n_pos, geno, &Geno); reorg_geno(n_ind, n_pos, argmax, &Argmax); allocate_imatrix(n_pos, n_gen, &traceback); allocate_double(n_gen, &gamma); allocate_double(n_gen, &tempgamma); allocate_double(n_gen, &tempgamma2); for(i=0; i<n_ind; i++) { /* i = individual */ R_CheckUserInterrupt(); /* check for ^C */ /* begin viterbi algorithm */ if(n_pos > 1) { /* multiple markers */ for(v=0; v<n_gen; v++) gamma[v] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme); for(j=0; j<n_pos-1; j++) { for(v=0; v<n_gen; v++) { tempgamma[v] = s = gamma[0] + stepf(1, v+1, rf[j], rf2[j], cross_scheme); traceback[j][v] = 0; for(v2=1; v2<n_gen; v2++) { t = gamma[v2] + stepf(v2+1, v+1, rf[j], rf2[j], cross_scheme); if(t > s || (fabs(t-s) < TOL && unif_rand() < 0.5)) { tempgamma[v] = s = t; traceback[j][v] = v2; } } tempgamma2[v] = tempgamma[v] + emitf(Geno[j+1][i], v+1, error_prob, cross_scheme); } for(v=0; v<n_gen; v++) gamma[v] = tempgamma2[v]; } /* finish off viterbi and then traceback to get most likely sequence of genotypes */ Argmax[n_pos-1][i] = 0; s = gamma[0]; for(v=1; v<n_gen; v++) { if(gamma[v] > s || (fabs(gamma[v]-s) < TOL && unif_rand() < 0.5)) { s = gamma[v]; Argmax[n_pos-1][i] = v; } } for(j=n_pos-2; j >= 0; j--) Argmax[j][i] = traceback[j][Argmax[j+1][i]]; } else { /* for exactly one marker */ s = initf(1, cross_scheme) + emitf(Geno[0][i], 1, error_prob, cross_scheme); Argmax[0][i] = 0; for(v=1; v<n_gen; v++) { t = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme); if(t > s || (fabs(t-s) < TOL && unif_rand() < 0.5)) { s = t; Argmax[0][i] = v; } } } /* code genotypes as 1, 2, ... */ for(j=0; j<n_pos; j++) Argmax[j][i]++; } /* loop over individuals */ /* write R's random seed */ PutRNGstate(); }
void est_map(int n_ind, int n_mar, int n_gen, int *geno, double *rf, double *rf2, double error_prob, double initf(int, int *), double emitf(int, int, double, int *), double stepf(int, int, double, double, int *), double nrecf1(int, int, double, int*), double nrecf2(int, int, double, int*), double *loglik, int maxit, double tol, int sexsp, int verbose) { int i, j, j2, v, v2, it, flag=0, **Geno, ndigits; double s, **alpha, **beta, **gamma, *cur_rf, *cur_rf2; double curloglik, maxdif, temp; char pattern[100], text[200]; int cross_scheme[2]; /* cross scheme hidden in loglik argument; used by hmm_bcsft */ cross_scheme[0] = (int) ftrunc(*loglik / 1000.0); cross_scheme[1] = ((int) *loglik) - 1000 * cross_scheme[0]; *loglik = 0.0; /* allocate space for beta and reorganize geno */ reorg_geno(n_ind, n_mar, geno, &Geno); allocate_alpha(n_mar, n_gen, &alpha); allocate_alpha(n_mar, n_gen, &beta); allocate_dmatrix(n_gen, n_gen, &gamma); allocate_double(n_mar-1, &cur_rf); allocate_double(n_mar-1, &cur_rf2); /* digits in verbose output */ if(verbose) { ndigits = (int)ceil(-log10(tol)); if(ndigits > 16) ndigits=16; sprintf(pattern, "%s%d.%df", "%", ndigits+3, ndigits+1); } /* begin EM algorithm */ for(it=0; it<maxit; it++) { for(j=0; j<n_mar-1; j++) { cur_rf[j] = cur_rf2[j] = rf[j]; rf[j] = 0.0; if(sexsp) { cur_rf2[j] = rf2[j]; rf2[j] = 0.0; } } for(i=0; i<n_ind; i++) { /* i = individual */ R_CheckUserInterrupt(); /* check for ^C */ /* initialize alpha and beta */ for(v=0; v<n_gen; v++) { alpha[v][0] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme); beta[v][n_mar-1] = 0.0; } /* forward-backward equations */ for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) { for(v=0; v<n_gen; v++) { alpha[v][j] = alpha[0][j-1] + stepf(1, v+1, cur_rf[j-1], cur_rf2[j-1], cross_scheme); beta[v][j2] = beta[0][j2+1] + stepf(v+1,1,cur_rf[j2], cur_rf2[j2], cross_scheme) + emitf(Geno[j2+1][i],1,error_prob, cross_scheme); for(v2=1; v2<n_gen; v2++) { alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + stepf(v2+1,v+1,cur_rf[j-1],cur_rf2[j-1], cross_scheme)); beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + stepf(v+1,v2+1,cur_rf[j2],cur_rf2[j2], cross_scheme) + emitf(Geno[j2+1][i],v2+1,error_prob, cross_scheme)); } alpha[v][j] += emitf(Geno[j][i],v+1,error_prob, cross_scheme); } } for(j=0; j<n_mar-1; j++) { /* calculate gamma = log Pr(v1, v2, O) */ for(v=0, s=0.0; v<n_gen; v++) { for(v2=0; v2<n_gen; v2++) { gamma[v][v2] = alpha[v][j] + beta[v2][j+1] + emitf(Geno[j+1][i], v2+1, error_prob, cross_scheme) + stepf(v+1, v2+1, cur_rf[j], cur_rf2[j], cross_scheme); if(v==0 && v2==0) s = gamma[v][v2]; else s = addlog(s, gamma[v][v2]); } } for(v=0; v<n_gen; v++) { for(v2=0; v2<n_gen; v2++) { rf[j] += nrecf1(v+1,v2+1, cur_rf[j], cross_scheme) * exp(gamma[v][v2] - s); if(sexsp) rf2[j] += nrecf2(v+1,v2+1, cur_rf[j], cross_scheme) * exp(gamma[v][v2] - s); } } } } /* loop over individuals */ /* rescale */ for(j=0; j<n_mar-1; j++) { rf[j] /= (double)n_ind; if(rf[j] < tol/1000.0) rf[j] = tol/1000.0; else if(rf[j] > 0.5-tol/1000.0) rf[j] = 0.5-tol/1000.0; if(sexsp) { rf2[j] /= (double)n_ind; if(rf2[j] < tol/1000.0) rf2[j] = tol/1000.0; else if(rf2[j] > 0.5-tol/1000.0) rf2[j] = 0.5-tol/1000.0; } else rf2[j] = rf[j]; } if(verbose>1) { /* print estimates as we go along*/ Rprintf(" %4d ", it+1); maxdif=0.0; for(j=0; j<n_mar-1; j++) { temp = fabs(rf[j] - cur_rf[j])/(cur_rf[j]+tol*100.0); if(maxdif < temp) maxdif = temp; if(sexsp) { temp = fabs(rf2[j] - cur_rf2[j])/(cur_rf2[j]+tol*100.0); if(maxdif < temp) maxdif = temp; } /* bsy add */ if(verbose > 2) Rprintf("%d %f %f\n", j+1, cur_rf[j], rf[j]); /* bsy add */ } sprintf(text, "%s%s\n", " max rel've change = ", pattern); Rprintf(text, maxdif); } /* check convergence */ for(j=0, flag=0; j<n_mar-1; j++) { if(fabs(rf[j] - cur_rf[j]) > tol*(cur_rf[j]+tol*100.0) || (sexsp && fabs(rf2[j] - cur_rf2[j]) > tol*(cur_rf2[j]+tol*100.0))) { flag = 1; break; } } if(!flag) break; } /* end EM algorithm */ if(flag) warning("Didn't converge!\n"); /* calculate log likelihood */ *loglik = 0.0; for(i=0; i<n_ind; i++) { /* i = individual */ /* initialize alpha */ for(v=0; v<n_gen; v++) { alpha[v][0] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme); } /* forward equations */ for(j=1; j<n_mar; j++) { for(v=0; v<n_gen; v++) { alpha[v][j] = alpha[0][j-1] + stepf(1, v+1, rf[j-1], rf2[j-1], cross_scheme); for(v2=1; v2<n_gen; v2++) alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + stepf(v2+1,v+1,rf[j-1],rf2[j-1], cross_scheme)); alpha[v][j] += emitf(Geno[j][i],v+1,error_prob, cross_scheme); } } curloglik = alpha[0][n_mar-1]; for(v=1; v<n_gen; v++) curloglik = addlog(curloglik, alpha[v][n_mar-1]); *loglik += curloglik; } if(verbose) { if(verbose < 2) { /* print final estimates */ Rprintf(" no. iterations = %d\n", it+1); maxdif=0.0; for(j=0; j<n_mar-1; j++) { temp = fabs(rf[j] - cur_rf[j])/(cur_rf[j]+tol*100.0); if(maxdif < temp) maxdif = temp; if(sexsp) { temp = fabs(rf2[j] - cur_rf2[j])/(cur_rf2[j]+tol*100.0); if(maxdif < temp) maxdif = temp; } } sprintf(text, "%s%s\n", " max rel've change at last step = ", pattern); Rprintf(text, maxdif); } Rprintf(" loglik: %10.4lf\n\n", *loglik); } }
void sim_geno(int n_ind, int n_pos, int n_gen, int n_draws, int *geno, double *rf, double *rf2, double error_prob, int *draws, double initf(int, int *), double emitf(int, int, double, int *), double stepf(int, int, double, double, int *)) { int i, k, j, v, v2; double s, **beta, *probs; int **Geno, ***Draws, curstate; int cross_scheme[2]; /* cross scheme hidden in draws argument; used by hmm_bcsft */ cross_scheme[0] = draws[0]; cross_scheme[1] = draws[1]; draws[0] = 0; draws[1] = 0; /* allocate space for beta and reorganize geno and draws */ /* Geno indexed as Geno[pos][ind] */ /* Draws indexed as Draws[rep][pos][ind] */ reorg_geno(n_ind, n_pos, geno, &Geno); reorg_draws(n_ind, n_pos, n_draws, draws, &Draws); allocate_alpha(n_pos, n_gen, &beta); allocate_double(n_gen, &probs); /* Read R's random seed */ GetRNGstate(); for(i=0; i<n_ind; i++) { /* i = individual */ R_CheckUserInterrupt(); /* check for ^C */ /* do backward equations */ /* initialize beta */ for(v=0; v<n_gen; v++) beta[v][n_pos-1] = 0.0; /* backward equations */ for(j=n_pos-2; j>=0; j--) { for(v=0; v<n_gen; v++) { beta[v][j] = beta[0][j+1] + stepf(v+1,1,rf[j], rf2[j], cross_scheme) + emitf(Geno[j+1][i],1,error_prob, cross_scheme); for(v2=1; v2<n_gen; v2++) beta[v][j] = addlog(beta[v][j], beta[v2][j+1] + stepf(v+1,v2+1,rf[j],rf2[j], cross_scheme) + emitf(Geno[j+1][i],v2+1,error_prob, cross_scheme)); } } for(k=0; k<n_draws; k++) { /* k = simulation replicate */ /* first draw */ /* calculate probs */ s = (probs[0] = initf(1, cross_scheme)+emitf(Geno[0][i],1,error_prob, cross_scheme)+beta[0][0]); for(v=1; v<n_gen; v++) { probs[v] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme) + beta[v][0]; s = addlog(s, probs[v]); } for(v=0; v<n_gen; v++) probs[v] = exp(probs[v] - s); /* make draw: returns a value from {1, 2, ..., n_gen} */ curstate = Draws[k][0][i] = sample_int(n_gen, probs); /* move along chromosome */ for(j=1; j<n_pos; j++) { /* calculate probs */ for(v=0; v<n_gen; v++) probs[v] = exp(stepf(curstate,v+1,rf[j-1],rf2[j-1], cross_scheme) + emitf(Geno[j][i],v+1,error_prob, cross_scheme) + beta[v][j] - beta[curstate-1][j-1]); /* make draw */ curstate = Draws[k][j][i] = sample_int(n_gen, probs); } } /* loop over replicates */ } /* loop over individuals */ /* write R's random seed */ PutRNGstate(); }
void fitqtl_imp_binary(int n_ind, int n_qtl, int *n_gen, int n_draws, int ***Draws, double **Cov, int n_cov, int *model, int n_int, double *pheno, int get_ests, double *lod, int *df, double *ests, double *ests_covar, double *design_mat, double tol, int maxit, int *matrix_rank) { /* create local variables */ int i, j, ii, jj, n_qc, itmp; /* loop variants and temp variables */ double llik, llik0, *LOD_array; double *the_ests, *the_covar, **TheEsts, ***TheCovar; double *dwork, **Ests_covar, tot_wt=0.0, *wts; double **Covar_mean, **Mean_covar, *mean_ests; /* for ests and cov matrix */ int *iwork, sizefull, n_trim, *index; /* number to trim from each end of the imputations */ n_trim = (int) floor( 0.5*log(n_draws)/log(2.0) ); /* initialization */ sizefull = 1; /* calculate the dimension of the design matrix for full model */ n_qc = n_qtl+n_cov; /* total number of QTLs and covariates */ /* for additive QTLs and covariates*/ for(i=0; i<n_qc; i++) sizefull += n_gen[i]; /* for interactions, loop thru all interactions */ for(i=0; i<n_int; i++) { for(j=0, itmp=1; j<n_qc; j++) { if(model[i*n_qc+j]) itmp *= n_gen[j]; } sizefull += itmp; } /* reorganize Ests_covar for easy use later */ /* and make space for estimates and covariance matrix */ if(get_ests) { reorg_errlod(sizefull, sizefull, ests_covar, &Ests_covar); allocate_double(sizefull*n_draws, &the_ests); allocate_double(sizefull*sizefull*n_draws, &the_covar); /* I need to save all of the estimates and covariance matrices */ reorg_errlod(sizefull, n_draws, the_ests, &TheEsts); reorg_genoprob(sizefull, sizefull, n_draws, the_covar, &TheCovar); allocate_dmatrix(sizefull, sizefull, &Mean_covar); allocate_dmatrix(sizefull, sizefull, &Covar_mean); allocate_double(sizefull, &mean_ests); allocate_double(n_draws, &wts); } /* allocate memory for working arrays, total memory is sizefull*n_ind+6*n_ind+4*sizefull for double array, and sizefull for integer array. All memory will be allocated one time and split later */ dwork = (double *)R_alloc(sizefull*n_ind+6*n_ind+4*sizefull, sizeof(double)); iwork = (int *)R_alloc(sizefull, sizeof(int)); index = (int *)R_alloc(n_draws, sizeof(int)); LOD_array = (double *)R_alloc(n_draws, sizeof(double)); /* calculate null model log10 likelihood */ llik0 = nullLODbin(pheno, n_ind); *matrix_rank = n_ind; /* loop over imputations */ for(i=0; i<n_draws; i++) { R_CheckUserInterrupt(); /* check for ^C */ /* calculate alternative model RSS */ llik = galtLODimpbin(pheno, n_ind, n_gen, n_qtl, Draws[i], Cov, n_cov, model, n_int, dwork, iwork, sizefull, get_ests, ests, Ests_covar, design_mat, tol, maxit, matrix_rank); /* calculate the LOD score in this imputation */ LOD_array[i] = (llik - llik0); /* if getting estimates, calculate the weights */ if(get_ests) { wts[i] = LOD_array[i]*log(10.0); if(i==0) tot_wt = wts[i]; else tot_wt = addlog(tot_wt, wts[i]); for(ii=0; ii<sizefull; ii++) { TheEsts[i][ii] = ests[ii]; for(jj=ii; jj<sizefull; jj++) TheCovar[i][ii][jj] = Ests_covar[ii][jj]; } } } /* end loop over imputations */ /* sort the lod scores, and trim the weights */ if(get_ests) { for(i=0; i<n_draws; i++) { index[i] = i; wts[i] = exp(wts[i]-tot_wt); } rsort_with_index(LOD_array, index, n_draws); for(i=0; i<n_trim; i++) wts[index[i]] = wts[index[n_draws-i-1]] = 0.0; /* re-scale wts */ tot_wt = 0.0; for(i=0; i<n_draws; i++) tot_wt += wts[i]; for(i=0; i<n_draws; i++) wts[i] /= tot_wt; } /* calculate the result LOD score */ *lod = wtaverage(LOD_array, n_draws); /* degree of freedom equals to the number of columns of x minus 1 (mean) */ *df = sizefull - 1; /* get means and variances and covariances of estimates */ if(get_ests) { for(i=0; i<n_draws; i++) { if(i==0) { for(ii=0; ii<sizefull; ii++) { mean_ests[ii] = TheEsts[i][ii] * wts[i]; for(jj=ii; jj<sizefull; jj++) { Mean_covar[ii][jj] = TheCovar[i][ii][jj] * wts[i]; Covar_mean[ii][jj] = 0.0; } } } else { for(ii=0; ii<sizefull; ii++) { mean_ests[ii] += TheEsts[i][ii]*wts[i]; for(jj=ii; jj<sizefull; jj++) { Mean_covar[ii][jj] += TheCovar[i][ii][jj]*wts[i]; Covar_mean[ii][jj] += (TheEsts[i][ii]-TheEsts[0][ii])* (TheEsts[i][jj]-TheEsts[0][jj])*wts[i]; } } } } for(i=0; i<sizefull; i++) { ests[i] = mean_ests[i]; for(j=i; j<sizefull; j++) { Covar_mean[i][j] = (Covar_mean[i][j] - (mean_ests[i]-TheEsts[0][i])* (mean_ests[j]-TheEsts[0][j]))*(double)n_draws/(double)(n_draws-1); Ests_covar[i][j] = Ests_covar[j][i] = Mean_covar[i][j] + Covar_mean[i][j]; } } } /* done getting estimates */ }
double discan_covar_em(int n_ind, int pos, int n_gen, int n_par, double ***Genoprob, double **Addcov, int n_addcov, double **Intcov, int n_intcov, int *pheno, double *start, int maxit, double tol, int verbose, int *ind_noqtl) { int i, j, k, kk, s, offset; double *jac, **Jac, *grad; double *temp, **wts; double fit, **temp1, **temp2; double *temp1s, *temp2s; double *newpar, *curpar; double newllik=0.0, curllik, sum; int info; double rcond, *junk; /* allocate space */ allocate_double(n_par*n_par, &jac); reorg_errlod(n_par, n_par, jac, &Jac); allocate_double(n_par, &grad); allocate_double(n_ind*n_gen, &temp); reorg_errlod(n_gen, n_ind, temp, &wts); allocate_double(n_ind*n_gen, &temp); reorg_errlod(n_gen, n_ind, temp, &temp1); allocate_double(n_ind*n_gen, &temp); reorg_errlod(n_gen, n_ind, temp, &temp2); allocate_double(n_ind, &temp1s); allocate_double(n_ind, &temp2s); allocate_double(n_par, &newpar); allocate_double(n_par, &curpar); allocate_double(n_par, &junk); /* initial wts */ for(i=0; i<n_ind; i++) for(j=0; j<n_gen; j++) wts[i][j] = Genoprob[j][pos][i]; for(i=0; i<n_par; i++) curpar[i] = start[i]; curllik = discan_covar_loglik(n_ind, pos, n_gen, n_par, curpar, Genoprob, Addcov, n_addcov, Intcov, n_intcov, pheno, ind_noqtl); if(verbose) Rprintf(" %10.5f\n", curllik); for(s=0; s<maxit; s++) { R_CheckUserInterrupt(); /* check for ^C */ /****** M STEP ******/ /* 0's in gradient and Jacobian */ for(j=0; j<n_par; j++) { grad[j] = 0.0; for(k=0; k<n_par; k++) Jac[j][k] = 0.0; } /* calculate gradient and Jacobian */ for(i=0; i<n_ind; i++) { temp1s[i] = temp2s[i] = 0.0; for(j=0; j<n_gen; j++) { if(!ind_noqtl[i]) fit = curpar[j]; else fit = 0.0; if(n_addcov > 0) { for(k=0; k<n_addcov; k++) fit += Addcov[k][i] * curpar[n_gen+k]; } if(!ind_noqtl[i] && n_intcov > 0 && j<n_gen-1) { for(k=0; k<n_intcov; k++) fit += Intcov[k][i] * curpar[n_gen+n_addcov+n_intcov*j+k]; } fit = exp(fit)/(1.0+exp(fit)); temp1s[i] += (temp1[i][j] = wts[i][j]*((double)pheno[i] - fit)); temp2s[i] += (temp2[i][j] = wts[i][j]*fit*(1.0-fit)); } } for(j=0; j<n_gen; j++) { for(i=0; i<n_ind; i++) { if(!ind_noqtl[i]) { grad[j] += temp1[i][j]; Jac[j][j] += temp2[i][j]; } } } for(k=0; k<n_addcov; k++) { for(i=0; i<n_ind; i++) { grad[k + n_gen] += Addcov[k][i] * temp1s[i]; for(kk=k; kk<n_addcov; kk++) Jac[kk+n_gen][k+n_gen] += Addcov[k][i]*Addcov[kk][i] * temp2s[i]; if(!ind_noqtl[i]) { for(j=0; j<n_gen; j++) Jac[k+n_gen][j] += Addcov[k][i] * temp2[i][j]; } } } for(j=0; j<n_gen-1; j++) { offset = n_gen + n_addcov + n_intcov*j; for(k=0; k<n_intcov; k++) { for(i=0; i<n_ind; i++) { if(!ind_noqtl[i]) { grad[k + offset] += Intcov[k][i] * temp1[i][j]; for(kk=k; kk<n_intcov; kk++) Jac[kk+offset][k+offset] += Intcov[k][i]*Intcov[kk][i]*temp2[i][j]; for(kk=0; kk<n_addcov; kk++) Jac[k+offset][kk+n_gen] += Intcov[k][i]*Addcov[kk][i]*temp2[i][j]; Jac[k+offset][j] += Intcov[k][i]*temp2[i][j]; } } } } if(verbose > 1) { Rprintf("grad: "); for(j=0; j<n_par; j++) Rprintf("%f ", grad[j]); Rprintf("\n"); Rprintf("Jac:\n"); for(j=0; j<n_par; j++) { for(k=0; k<n_par; k++) Rprintf("%f ", Jac[j][k]); Rprintf("\n"); } Rprintf("\n"); } /* dpoco and dposl from Linpack to calculate Jac^-1 %*% grad */ F77_CALL(dpoco)(jac, &n_par, &n_par, &rcond, junk, &info); if(fabs(rcond) < TOL || info != 0) { warning("Jacobian matrix is singular\n"); return(NA_REAL); } F77_CALL(dposl)(jac, &n_par, &n_par, grad); if(verbose > 1) { Rprintf(" solution: "); for(j=0; j<n_par; j++) Rprintf(" %f", grad[j]); Rprintf("\n"); } /* revised estimates */ for(j=0; j<n_par; j++) newpar[j] = curpar[j] + grad[j]; if(verbose>1) { for(j=0; j<n_par; j++) Rprintf("%f ", newpar[j]); Rprintf("\n"); } newllik = discan_covar_loglik(n_ind, pos, n_gen, n_par, newpar, Genoprob, Addcov, n_addcov, Intcov, n_intcov, pheno, ind_noqtl); if(verbose) { Rprintf(" %3d %10.5f %10.5f", s+1, newllik, newllik - curllik); if(newllik < curllik) Rprintf(" ***"); Rprintf("\n"); } if(newllik - curllik < tol) return(newllik); for(j=0; j<n_par; j++) curpar[j] = newpar[j]; curllik = newllik; /* e-step */ for(i=0; i<n_ind; i++) { sum=0.0; for(j=0; j<n_gen; j++) { fit = curpar[j]; if(n_addcov > 0) { for(k=0; k<n_addcov; k++) fit += Addcov[k][i] * curpar[n_gen+k]; } if(n_intcov > 0 && j<n_gen-1) { for(k=0; k<n_intcov; k++) fit += Intcov[k][i] * curpar[n_gen+n_addcov+n_intcov*j+k]; } fit = exp(fit); if(pheno[i]) sum += (wts[i][j] = Genoprob[j][pos][i] * fit/(1.0+fit)); else sum += (wts[i][j] = Genoprob[j][pos][i] / (1.0+fit)); } for(j=0; j<n_gen; j++) wts[i][j] /= sum; } } /* end of em-step */ /* didn't converge */ return(newllik); }
/********************************************************************** * * summary_scantwo * * Function to pull out the major bits from scantwo output * * n_pos: Total number of positions * n_phe: Number of phenotype columns * Lod: Array of LOD scores indexed as [phe][pos2][pos1] * diagonal = scanone results; upper.tri = add've LOD; lower = full * n_chr Number of distinct chromosomes * chr Index of chromosomes; length n_pos, taking values in 1..n_chr * pos cM positions; length n_pos * xchr Index of xchr; length n_chr; 0=autosome, 1=X chromosome * ScanoneX special X scanone; matrix indexed as [phe][pos] * * n_chrpair Number of pairs of chromosomes * Chrpair Matrix giving chrpair index for a pair of chromosomes * Pos1_jnt, Pos2_jnt On output, positions of maximum joint LOD * Matrices indexed as [phe][chrpair] * Pos1_add, Pos2_add On output, positions of maximum add've LOD * Matrices indexed as [phe][chrpair] * Pos1_int, Pos2_int On output, positions of maximum int've LOD * Matrices indexed as [phe][chrpair] * JNT_lod_* On output, joint and add've LOD at pos'ns with * maximum joint LOD; matrices indexed as [phe][chrpair] * ADD_lod_* On output, joint and add've LOD at pos'ns with * maximum add've LOD; matrices indexed as [phe][chrpair] * INT_lod_* On output, joint and add've LOD at pos'ns with * maximum int've LOD; matrices indexed as [phe][chrpair] * LOD_1qtl On output, maximum 1-QTL LOD for each pair of chr * (selected from either scanone or scanoneX * **********************************************************************/ void summary_scantwo(int n_pos, int n_phe, double ***Lod, int n_chr, int *chr, double *pos, int *xchr, double **ScanoneX, int n_chrpair, int **Chrpair, double **Pos1_jnt, double **Pos2_jnt, double **Pos1_add, double **Pos2_add, double **Pos1_int, double **Pos2_int, double **JNT_lod_full, double **JNT_lod_add, double **ADD_lod_full, double **ADD_lod_add, double **INT_lod_full, double **INT_lod_add, double **LOD_1qtl) { int i, j, k, c1, c2, thepair; double *maxone, *maxoneX; allocate_double(n_chr, &maxone); allocate_double(n_chr, &maxoneX); for(i=0; i<n_phe; i++) { /* loop over phenotype columns */ /* get maximum single-QTL results */ for(j=0; j<n_chr; j++) maxone[j] = maxoneX[j] = 0.0; for(j=0; j<n_pos; j++) { if(Lod[i][j][j] > maxone[chr[j]-1]) maxone[chr[j]-1] = Lod[i][j][j]; if(ScanoneX[i][j] > maxoneX[chr[j]-1]) maxoneX[chr[j]-1] = ScanoneX[i][j]; } /* zero out the matrices for the maximum LOD scores */ for(j=0; j<n_chrpair; j++) { JNT_lod_full[i][j] = JNT_lod_add[i][j] = ADD_lod_full[i][j] = ADD_lod_add[i][j] = INT_lod_full[i][j] = INT_lod_add[i][j] = Pos1_add[i][j] = Pos2_add[i][j] = Pos1_int[i][j] = Pos2_int[i][j] = Pos1_jnt[i][j] = Pos2_jnt[i][j] = 0.0; } /* maximum joint, add've, and int've LOD scores */ for(j=0; j<n_pos; j++) { for(k=j; k<n_pos; k++) { R_CheckUserInterrupt(); /* check for ^C */ c1 = chr[j]-1; c2 = chr[k]-1; thepair = Chrpair[c1][c2]; if(Lod[i][j][k] > JNT_lod_full[i][thepair]) { JNT_lod_full[i][thepair] = Lod[i][j][k]; JNT_lod_add[i][thepair] = Lod[i][k][j]; Pos1_jnt[i][thepair] = pos[j]; Pos2_jnt[i][thepair] = pos[k]; } if(Lod[i][k][j] > ADD_lod_add[i][thepair]) { ADD_lod_add[i][thepair] = Lod[i][k][j]; ADD_lod_full[i][thepair] = Lod[i][j][k]; Pos1_add[i][thepair] = pos[j]; Pos2_add[i][thepair] = pos[k]; } if(Lod[i][j][k] - Lod[i][k][j] > INT_lod_full[i][thepair] - INT_lod_add[i][thepair]) { INT_lod_full[i][thepair] = Lod[i][j][k]; INT_lod_add[i][thepair] = Lod[i][k][j]; Pos1_int[i][thepair] = pos[j]; Pos2_int[i][thepair] = pos[k]; } } } /* pull out biggest single-QTL LOD scores */ for(j=0; j<n_chr; j++) { for(k=j; k<n_chr; k++) { R_CheckUserInterrupt(); /* check for ^C */ thepair = Chrpair[j][k]; /* calculate the two 2 v 1 conditional LOD scores */ if(xchr[j] || xchr[k]) { if(maxoneX[j] > maxoneX[k]) LOD_1qtl[i][thepair] = maxoneX[j]; else LOD_1qtl[i][thepair] = maxoneX[k]; } else { if(maxone[j] > maxone[k]) LOD_1qtl[i][thepair] = maxone[j]; else LOD_1qtl[i][thepair] = maxone[k]; } } } } /* end loop over phenotype columns */ }