void est_map_bci(int n_ind, int n_mar, int *geno, double *d, int m, double p, double error_prob, double *loglik, int maxit, double tol, int verbose) { int i, j, j2, v, v2, it, flag=0, **Geno, n_states; double s, **alpha, **beta, **gamma, *cur_d, *rf; double ***tm, *temp; double curloglik; double initprob; int ndigits; double maxdif, tempdif; char pattern[100], text[200]; n_states = 2*(m+1); initprob = -log((double)n_states); /* allocate space for beta and reorganize geno */ reorg_geno(n_ind, n_mar, geno, &Geno); allocate_alpha(n_mar, n_states, &alpha); allocate_alpha(n_mar, n_states, &beta); allocate_dmatrix(n_states, n_states, &gamma); allocate_double(n_mar-1, &cur_d); allocate_double(n_mar-1, &rf); /* allocate space for the transition matrices */ /* size n_states x n_states x (n_mar-1) */ /* tm[state1][state2][interval] */ tm = (double ***)R_alloc(n_states, sizeof(double **)); tm[0] = (double **)R_alloc(n_states * n_states, sizeof(double *)); for(i=1; i<n_states; i++) tm[i] = tm[i-1] + n_states; tm[0][0] = (double *)R_alloc(n_states * n_states * (n_mar - 1), sizeof(double)); temp = tm[0][0]; for(i=0; i < n_states; i++) { for(j=0; j < n_states; j++) { tm[i][j] = temp; temp += n_mar-1; } } /* digits in verbose output */ if(verbose) { ndigits = (int)ceil(-log10(tol)); if(ndigits > 16) ndigits=16; sprintf(pattern, "%s%d.%df", "%", ndigits+3, ndigits+1); } for(j=0; j<n_mar-1; j++) d[j] /= 100.0; /* convert to Morgans */ /* begin EM algorithm */ for(it=0; it<maxit; it++) { for(j=0; j<n_mar-1; j++) { cur_d[j] = d[j]; rf[j] = 0.0; } /* calculate the transition matrices */ step_bci(n_mar, n_states, tm, cur_d, m, p, maxit, tol); for(i=0; i<n_ind; i++) { /* i = individual */ R_CheckUserInterrupt(); /* check for ^C */ /* initialize alpha and beta */ for(v=0; v<n_states; v++) { alpha[v][0] = initprob + emit_bci(Geno[0][i], v, error_prob, m); beta[v][n_mar-1] = 0.0; } /* forward-backward equations */ for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) { for(v=0; v<n_states; v++) { alpha[v][j] = alpha[0][j-1] + tm[0][v][j-1]; beta[v][j2] = beta[0][j2+1] + tm[v][0][j2] + emit_bci(Geno[j2+1][i], 0, error_prob, m); for(v2=1; v2<n_states; v2++) { alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + tm[v2][v][j-1]); beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + tm[v][v2][j2] + emit_bci(Geno[j2+1][i], v2, error_prob, m)); } alpha[v][j] += emit_bci(Geno[j][i], v, error_prob, m); } } for(j=0; j<n_mar-1; j++) { /* calculate gamma = log Pr(v1, v2, O) */ for(v=0, s=0.0; v<n_states; v++) { for(v2=0; v2<n_states; v2++) { gamma[v][v2] = alpha[v][j] + beta[v2][j+1] + emit_bci(Geno[j+1][i], v2, error_prob, m) + tm[v][v2][j]; if(v==0 && v2==0) s = gamma[v][v2]; else s = addlog(s, gamma[v][v2]); } } for(v=0; v<n_states; v++) { for(v2=0; v2<n_states; v2++) { rf[j] += nrec_bci(v, v2, m) * exp(gamma[v][v2] - s); } } } } /* loop over individuals */ /* rescale */ for(j=0; j<n_mar-1; j++) { rf[j] /= (double)n_ind; /* if(rf[j] < tol/100.0) rf[j] = tol/100.0; else if(rf[j] > 0.5-tol/100.0) rf[j] = 0.5-tol/100.0; */ } /* use map function to convert back to distances */ for(j=0; j<n_mar-1; j++) d[j] = imf_stahl(rf[j], m, p, 1e-10, 1000); if(verbose>1) { /* print estimates as we go along*/ Rprintf(" %4d ", it+1); maxdif=0.0; for(j=0; j<n_mar-1; j++) { tempdif = fabs(d[j] - cur_d[j])/(cur_d[j]+tol*100.0); if(maxdif < tempdif) maxdif = tempdif; } sprintf(text, "%s%s\n", " max rel've change = ", pattern); Rprintf(text, maxdif); } /* check convergence */ for(j=0, flag=0; j<n_mar-1; j++) { if(fabs(d[j] - cur_d[j]) > tol*(cur_d[j]+tol*100.0)) { flag = 1; break; } } if(!flag) break; } /* end EM algorithm */ if(flag) warning("Didn't converge!\n"); /* re-calculate transition matrices */ step_bci(n_mar, n_states, tm, d, m, p, maxit, tol); /* calculate log likelihood */ *loglik = 0.0; for(i=0; i<n_ind; i++) { /* i = individual */ R_CheckUserInterrupt(); /* check for ^C */ /* initialize alpha */ for(v=0; v<n_states; v++) alpha[v][0] = initprob + emit_bci(Geno[0][i], v, error_prob, m); /* forward equations */ for(j=1; j<n_mar; j++) { for(v=0; v<n_states; v++) { alpha[v][j] = alpha[0][j-1] + tm[0][v][j-1]; for(v2=1; v2<n_states; v2++) alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + tm[v2][v][j-1]); alpha[v][j] += emit_bci(Geno[j][i], v, error_prob, m); } } curloglik = alpha[0][n_mar-1]; for(v=1; v<n_states; v++) curloglik = addlog(curloglik, alpha[v][n_mar-1]); *loglik += curloglik; } if(verbose) { if(verbose < 2) { /* print final estimates */ Rprintf(" no. iterations = %d\n", it+1); maxdif=0.0; for(j=0; j<n_mar-1; j++) { tempdif = fabs(d[j] - cur_d[j])/(cur_d[j]+tol*100.0); if(maxdif < tempdif) maxdif = tempdif; } sprintf(text, "%s%s\n", " max rel've change at last step = ", pattern); Rprintf(text, maxdif); } Rprintf(" loglik: %10.4lf\n\n", *loglik); } /* convert distances back to cM */ for(j=0; j<n_mar-1; j++) d[j] *= 100.0; }
void est_map_f2i(int n_ind, int n_mar, int *geno, double *d, int m, double p, double error_prob, double *loglik, int maxit, double tol, int verbose) { int i, j, j2, v, v2, it, flag=0, **Geno, n_states, n_bcstates; double s, **alpha, **beta, **gamma, *cur_d, *rf; double ***tm, *temp; double curloglik; double initprob; n_bcstates = 2*(m+1); n_states = n_bcstates*n_bcstates; initprob = -log((double)n_states); /* allocate space for beta and reorganize geno */ reorg_geno(n_ind, n_mar, geno, &Geno); allocate_alpha(n_mar, n_states, &alpha); allocate_alpha(n_mar, n_states, &beta); allocate_dmatrix(n_states, n_states, &gamma); allocate_double(n_mar-1, &cur_d); allocate_double(n_mar-1, &rf); /* allocate space for the [backcross] transition matrices */ /* size n_states x n_states x (n_mar-1) */ /* tm[state1][state2][interval] */ tm = (double ***)R_alloc(n_bcstates, sizeof(double **)); tm[0] = (double **)R_alloc(n_bcstates * n_bcstates, sizeof(double *)); for(i=1; i<n_bcstates; i++) tm[i] = tm[i-1] + n_bcstates; tm[0][0] = (double *)R_alloc(n_bcstates * n_bcstates * (n_mar - 1), sizeof(double)); temp = tm[0][0]; for(i=0; i < n_bcstates; i++) { for(j=0; j < n_bcstates; j++) { tm[i][j] = temp; temp += n_mar-1; } } if(verbose) { /* print initial estimates */ Rprintf(" "); for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]); Rprintf("\n"); } for(j=0; j<n_mar-1; j++) d[j] /= 100.0; /* convert to Morgans */ /* begin EM algorithm */ for(it=0; it<maxit; it++) { for(j=0; j<n_mar-1; j++) { cur_d[j] = d[j]; rf[j] = 0.0; } /* calculate the transition matrices [for BC] */ step_bci(n_mar, n_bcstates, tm, cur_d, m, p, maxit, tol); for(i=0; i<n_ind; i++) { /* i = individual */ R_CheckUserInterrupt(); /* check for ^C */ /* initialize alpha and beta */ for(v=0; v<n_states; v++) { alpha[v][0] = initprob + emit_f2i(Geno[0][i], v, error_prob, m, n_bcstates); beta[v][n_mar-1] = 0.0; } /* forward-backward equations */ for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) { for(v=0; v<n_states; v++) { alpha[v][j] = alpha[0][j-1] + step_f2i(0, v, j-1, tm, n_bcstates); beta[v][j2] = beta[0][j2+1] + step_f2i(v, 0, j2, tm, n_bcstates) + emit_f2i(Geno[j2+1][i], 0, error_prob, m, n_bcstates); for(v2=1; v2<n_states; v2++) { alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + step_f2i(v2, v, j-1, tm, n_bcstates)); beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + step_f2i(v, v2, j2, tm, n_bcstates) + emit_f2i(Geno[j2+1][i], v2, error_prob, m, n_bcstates)); } alpha[v][j] += emit_f2i(Geno[j][i], v, error_prob, m, n_bcstates); } } for(j=0; j<n_mar-1; j++) { /* calculate gamma = log Pr(v1, v2, O) */ for(v=0, s=0.0; v<n_states; v++) { for(v2=0; v2<n_states; v2++) { gamma[v][v2] = alpha[v][j] + beta[v2][j+1] + emit_f2i(Geno[j+1][i], v2, error_prob, m, n_bcstates) + step_f2i(v, v2, j, tm, n_bcstates); if(v==0 && v2==0) s = gamma[v][v2]; else s = addlog(s, gamma[v][v2]); } } for(v=0; v<n_states; v++) { for(v2=0; v2<n_states; v2++) { rf[j] += nrec_f2i(v, v2, m, n_bcstates) * exp(gamma[v][v2] - s); } } } } /* loop over individuals */ /* rescale */ for(j=0; j<n_mar-1; j++) { rf[j] /= (double)n_ind; /* if(rf[j] < tol/100.0) rf[j] = tol/100.0; else if(rf[j] > 0.5-tol/100.0) rf[j] = 0.5-tol/100.0; */ } /* use map function to convert back to distances */ for(j=0; j<n_mar-1; j++) d[j] = imf_stahl(rf[j], m, p, 1e-10, 1000); if(verbose > 1) { /* print some debugging stuff */ if(verbose == 2) Rprintf("Iteration"); Rprintf(" %4d ", it+1); if(verbose > 2) for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]*100.0); Rprintf("\n"); } /* check convergence */ for(j=0, flag=0; j<n_mar-1; j++) { if(fabs(d[j] - cur_d[j]) > tol*(cur_d[j]+tol*100.0)) { flag = 1; break; } } if(!flag) break; } /* end EM algorithm */ if(flag) warning("Didn't converge!\n"); /* re-calculate transition matrices */ step_bci(n_mar, n_bcstates, tm, d, m, p, maxit, tol); /* calculate log likelihood */ *loglik = 0.0; for(i=0; i<n_ind; i++) { /* i = individual */ /* initialize alpha */ for(v=0; v<n_states; v++) alpha[v][0] = initprob + emit_f2i(Geno[0][i], v, error_prob, m, n_bcstates); /* forward equations */ for(j=1; j<n_mar; j++) { for(v=0; v<n_states; v++) { alpha[v][j] = alpha[0][j-1] + step_f2i(0, v, j-1, tm, n_bcstates); for(v2=1; v2<n_states; v2++) alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + step_f2i(v2, v, j-1, tm, n_bcstates)); alpha[v][j] += emit_f2i(Geno[j][i], v, error_prob, m, n_bcstates); } } curloglik = alpha[0][n_mar-1]; for(v=1; v<n_states; v++) curloglik = addlog(curloglik, alpha[v][n_mar-1]); *loglik += curloglik; } /* convert distances back to cM */ for(j=0; j<n_mar-1; j++) d[j] *= 100.0; if(verbose) { /* print final estimates */ Rprintf(" %4d ", it+1); for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]); Rprintf("\n"); Rprintf("loglik: %10.4lf\n\n", *loglik); } }