void R_discan_im(int *n_ind, int *n_pos, int *n_gen, double *genoprob, int *pheno, double *result, int *maxit, double *tol) { double ***Genoprob, **work; double *means; reorg_genoprob(*n_ind, *n_pos, *n_gen, genoprob, &Genoprob); allocate_dmatrix(3, *n_gen, &work); allocate_double(*n_gen, &means); discan_im(*n_ind, *n_pos, *n_gen, Genoprob, pheno, result, *maxit, *tol, work, means); }
void est_map_bci(int n_ind, int n_mar, int *geno, double *d, int m, double p, double error_prob, double *loglik, int maxit, double tol, int verbose) { int i, j, j2, v, v2, it, flag=0, **Geno, n_states; double s, **alpha, **beta, **gamma, *cur_d, *rf; double ***tm, *temp; double curloglik; double initprob; int ndigits; double maxdif, tempdif; char pattern[100], text[200]; n_states = 2*(m+1); initprob = -log((double)n_states); /* allocate space for beta and reorganize geno */ reorg_geno(n_ind, n_mar, geno, &Geno); allocate_alpha(n_mar, n_states, &alpha); allocate_alpha(n_mar, n_states, &beta); allocate_dmatrix(n_states, n_states, &gamma); allocate_double(n_mar-1, &cur_d); allocate_double(n_mar-1, &rf); /* allocate space for the transition matrices */ /* size n_states x n_states x (n_mar-1) */ /* tm[state1][state2][interval] */ tm = (double ***)R_alloc(n_states, sizeof(double **)); tm[0] = (double **)R_alloc(n_states * n_states, sizeof(double *)); for(i=1; i<n_states; i++) tm[i] = tm[i-1] + n_states; tm[0][0] = (double *)R_alloc(n_states * n_states * (n_mar - 1), sizeof(double)); temp = tm[0][0]; for(i=0; i < n_states; i++) { for(j=0; j < n_states; j++) { tm[i][j] = temp; temp += n_mar-1; } } /* digits in verbose output */ if(verbose) { ndigits = (int)ceil(-log10(tol)); if(ndigits > 16) ndigits=16; sprintf(pattern, "%s%d.%df", "%", ndigits+3, ndigits+1); } for(j=0; j<n_mar-1; j++) d[j] /= 100.0; /* convert to Morgans */ /* begin EM algorithm */ for(it=0; it<maxit; it++) { for(j=0; j<n_mar-1; j++) { cur_d[j] = d[j]; rf[j] = 0.0; } /* calculate the transition matrices */ step_bci(n_mar, n_states, tm, cur_d, m, p, maxit, tol); for(i=0; i<n_ind; i++) { /* i = individual */ R_CheckUserInterrupt(); /* check for ^C */ /* initialize alpha and beta */ for(v=0; v<n_states; v++) { alpha[v][0] = initprob + emit_bci(Geno[0][i], v, error_prob, m); beta[v][n_mar-1] = 0.0; } /* forward-backward equations */ for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) { for(v=0; v<n_states; v++) { alpha[v][j] = alpha[0][j-1] + tm[0][v][j-1]; beta[v][j2] = beta[0][j2+1] + tm[v][0][j2] + emit_bci(Geno[j2+1][i], 0, error_prob, m); for(v2=1; v2<n_states; v2++) { alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + tm[v2][v][j-1]); beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + tm[v][v2][j2] + emit_bci(Geno[j2+1][i], v2, error_prob, m)); } alpha[v][j] += emit_bci(Geno[j][i], v, error_prob, m); } } for(j=0; j<n_mar-1; j++) { /* calculate gamma = log Pr(v1, v2, O) */ for(v=0, s=0.0; v<n_states; v++) { for(v2=0; v2<n_states; v2++) { gamma[v][v2] = alpha[v][j] + beta[v2][j+1] + emit_bci(Geno[j+1][i], v2, error_prob, m) + tm[v][v2][j]; if(v==0 && v2==0) s = gamma[v][v2]; else s = addlog(s, gamma[v][v2]); } } for(v=0; v<n_states; v++) { for(v2=0; v2<n_states; v2++) { rf[j] += nrec_bci(v, v2, m) * exp(gamma[v][v2] - s); } } } } /* loop over individuals */ /* rescale */ for(j=0; j<n_mar-1; j++) { rf[j] /= (double)n_ind; /* if(rf[j] < tol/100.0) rf[j] = tol/100.0; else if(rf[j] > 0.5-tol/100.0) rf[j] = 0.5-tol/100.0; */ } /* use map function to convert back to distances */ for(j=0; j<n_mar-1; j++) d[j] = imf_stahl(rf[j], m, p, 1e-10, 1000); if(verbose>1) { /* print estimates as we go along*/ Rprintf(" %4d ", it+1); maxdif=0.0; for(j=0; j<n_mar-1; j++) { tempdif = fabs(d[j] - cur_d[j])/(cur_d[j]+tol*100.0); if(maxdif < tempdif) maxdif = tempdif; } sprintf(text, "%s%s\n", " max rel've change = ", pattern); Rprintf(text, maxdif); } /* check convergence */ for(j=0, flag=0; j<n_mar-1; j++) { if(fabs(d[j] - cur_d[j]) > tol*(cur_d[j]+tol*100.0)) { flag = 1; break; } } if(!flag) break; } /* end EM algorithm */ if(flag) warning("Didn't converge!\n"); /* re-calculate transition matrices */ step_bci(n_mar, n_states, tm, d, m, p, maxit, tol); /* calculate log likelihood */ *loglik = 0.0; for(i=0; i<n_ind; i++) { /* i = individual */ R_CheckUserInterrupt(); /* check for ^C */ /* initialize alpha */ for(v=0; v<n_states; v++) alpha[v][0] = initprob + emit_bci(Geno[0][i], v, error_prob, m); /* forward equations */ for(j=1; j<n_mar; j++) { for(v=0; v<n_states; v++) { alpha[v][j] = alpha[0][j-1] + tm[0][v][j-1]; for(v2=1; v2<n_states; v2++) alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + tm[v2][v][j-1]); alpha[v][j] += emit_bci(Geno[j][i], v, error_prob, m); } } curloglik = alpha[0][n_mar-1]; for(v=1; v<n_states; v++) curloglik = addlog(curloglik, alpha[v][n_mar-1]); *loglik += curloglik; } if(verbose) { if(verbose < 2) { /* print final estimates */ Rprintf(" no. iterations = %d\n", it+1); maxdif=0.0; for(j=0; j<n_mar-1; j++) { tempdif = fabs(d[j] - cur_d[j])/(cur_d[j]+tol*100.0); if(maxdif < tempdif) maxdif = tempdif; } sprintf(text, "%s%s\n", " max rel've change at last step = ", pattern); Rprintf(text, maxdif); } Rprintf(" loglik: %10.4lf\n\n", *loglik); } /* convert distances back to cM */ for(j=0; j<n_mar-1; j++) d[j] *= 100.0; }
void est_map_f2i(int n_ind, int n_mar, int *geno, double *d, int m, double p, double error_prob, double *loglik, int maxit, double tol, int verbose) { int i, j, j2, v, v2, it, flag=0, **Geno, n_states, n_bcstates; double s, **alpha, **beta, **gamma, *cur_d, *rf; double ***tm, *temp; double curloglik; double initprob; n_bcstates = 2*(m+1); n_states = n_bcstates*n_bcstates; initprob = -log((double)n_states); /* allocate space for beta and reorganize geno */ reorg_geno(n_ind, n_mar, geno, &Geno); allocate_alpha(n_mar, n_states, &alpha); allocate_alpha(n_mar, n_states, &beta); allocate_dmatrix(n_states, n_states, &gamma); allocate_double(n_mar-1, &cur_d); allocate_double(n_mar-1, &rf); /* allocate space for the [backcross] transition matrices */ /* size n_states x n_states x (n_mar-1) */ /* tm[state1][state2][interval] */ tm = (double ***)R_alloc(n_bcstates, sizeof(double **)); tm[0] = (double **)R_alloc(n_bcstates * n_bcstates, sizeof(double *)); for(i=1; i<n_bcstates; i++) tm[i] = tm[i-1] + n_bcstates; tm[0][0] = (double *)R_alloc(n_bcstates * n_bcstates * (n_mar - 1), sizeof(double)); temp = tm[0][0]; for(i=0; i < n_bcstates; i++) { for(j=0; j < n_bcstates; j++) { tm[i][j] = temp; temp += n_mar-1; } } if(verbose) { /* print initial estimates */ Rprintf(" "); for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]); Rprintf("\n"); } for(j=0; j<n_mar-1; j++) d[j] /= 100.0; /* convert to Morgans */ /* begin EM algorithm */ for(it=0; it<maxit; it++) { for(j=0; j<n_mar-1; j++) { cur_d[j] = d[j]; rf[j] = 0.0; } /* calculate the transition matrices [for BC] */ step_bci(n_mar, n_bcstates, tm, cur_d, m, p, maxit, tol); for(i=0; i<n_ind; i++) { /* i = individual */ R_CheckUserInterrupt(); /* check for ^C */ /* initialize alpha and beta */ for(v=0; v<n_states; v++) { alpha[v][0] = initprob + emit_f2i(Geno[0][i], v, error_prob, m, n_bcstates); beta[v][n_mar-1] = 0.0; } /* forward-backward equations */ for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) { for(v=0; v<n_states; v++) { alpha[v][j] = alpha[0][j-1] + step_f2i(0, v, j-1, tm, n_bcstates); beta[v][j2] = beta[0][j2+1] + step_f2i(v, 0, j2, tm, n_bcstates) + emit_f2i(Geno[j2+1][i], 0, error_prob, m, n_bcstates); for(v2=1; v2<n_states; v2++) { alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + step_f2i(v2, v, j-1, tm, n_bcstates)); beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + step_f2i(v, v2, j2, tm, n_bcstates) + emit_f2i(Geno[j2+1][i], v2, error_prob, m, n_bcstates)); } alpha[v][j] += emit_f2i(Geno[j][i], v, error_prob, m, n_bcstates); } } for(j=0; j<n_mar-1; j++) { /* calculate gamma = log Pr(v1, v2, O) */ for(v=0, s=0.0; v<n_states; v++) { for(v2=0; v2<n_states; v2++) { gamma[v][v2] = alpha[v][j] + beta[v2][j+1] + emit_f2i(Geno[j+1][i], v2, error_prob, m, n_bcstates) + step_f2i(v, v2, j, tm, n_bcstates); if(v==0 && v2==0) s = gamma[v][v2]; else s = addlog(s, gamma[v][v2]); } } for(v=0; v<n_states; v++) { for(v2=0; v2<n_states; v2++) { rf[j] += nrec_f2i(v, v2, m, n_bcstates) * exp(gamma[v][v2] - s); } } } } /* loop over individuals */ /* rescale */ for(j=0; j<n_mar-1; j++) { rf[j] /= (double)n_ind; /* if(rf[j] < tol/100.0) rf[j] = tol/100.0; else if(rf[j] > 0.5-tol/100.0) rf[j] = 0.5-tol/100.0; */ } /* use map function to convert back to distances */ for(j=0; j<n_mar-1; j++) d[j] = imf_stahl(rf[j], m, p, 1e-10, 1000); if(verbose > 1) { /* print some debugging stuff */ if(verbose == 2) Rprintf("Iteration"); Rprintf(" %4d ", it+1); if(verbose > 2) for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]*100.0); Rprintf("\n"); } /* check convergence */ for(j=0, flag=0; j<n_mar-1; j++) { if(fabs(d[j] - cur_d[j]) > tol*(cur_d[j]+tol*100.0)) { flag = 1; break; } } if(!flag) break; } /* end EM algorithm */ if(flag) warning("Didn't converge!\n"); /* re-calculate transition matrices */ step_bci(n_mar, n_bcstates, tm, d, m, p, maxit, tol); /* calculate log likelihood */ *loglik = 0.0; for(i=0; i<n_ind; i++) { /* i = individual */ /* initialize alpha */ for(v=0; v<n_states; v++) alpha[v][0] = initprob + emit_f2i(Geno[0][i], v, error_prob, m, n_bcstates); /* forward equations */ for(j=1; j<n_mar; j++) { for(v=0; v<n_states; v++) { alpha[v][j] = alpha[0][j-1] + step_f2i(0, v, j-1, tm, n_bcstates); for(v2=1; v2<n_states; v2++) alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + step_f2i(v2, v, j-1, tm, n_bcstates)); alpha[v][j] += emit_f2i(Geno[j][i], v, error_prob, m, n_bcstates); } } curloglik = alpha[0][n_mar-1]; for(v=1; v<n_states; v++) curloglik = addlog(curloglik, alpha[v][n_mar-1]); *loglik += curloglik; } /* convert distances back to cM */ for(j=0; j<n_mar-1; j++) d[j] *= 100.0; if(verbose) { /* print final estimates */ Rprintf(" %4d ", it+1); for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]); Rprintf("\n"); Rprintf("loglik: %10.4lf\n\n", *loglik); } }
void est_map(int n_ind, int n_mar, int n_gen, int *geno, double *rf, double *rf2, double error_prob, double initf(int, int *), double emitf(int, int, double, int *), double stepf(int, int, double, double, int *), double nrecf1(int, int, double, int*), double nrecf2(int, int, double, int*), double *loglik, int maxit, double tol, int sexsp, int verbose) { int i, j, j2, v, v2, it, flag=0, **Geno, ndigits; double s, **alpha, **beta, **gamma, *cur_rf, *cur_rf2; double curloglik, maxdif, temp; char pattern[100], text[200]; int cross_scheme[2]; /* cross scheme hidden in loglik argument; used by hmm_bcsft */ cross_scheme[0] = (int) ftrunc(*loglik / 1000.0); cross_scheme[1] = ((int) *loglik) - 1000 * cross_scheme[0]; *loglik = 0.0; /* allocate space for beta and reorganize geno */ reorg_geno(n_ind, n_mar, geno, &Geno); allocate_alpha(n_mar, n_gen, &alpha); allocate_alpha(n_mar, n_gen, &beta); allocate_dmatrix(n_gen, n_gen, &gamma); allocate_double(n_mar-1, &cur_rf); allocate_double(n_mar-1, &cur_rf2); /* digits in verbose output */ if(verbose) { ndigits = (int)ceil(-log10(tol)); if(ndigits > 16) ndigits=16; sprintf(pattern, "%s%d.%df", "%", ndigits+3, ndigits+1); } /* begin EM algorithm */ for(it=0; it<maxit; it++) { for(j=0; j<n_mar-1; j++) { cur_rf[j] = cur_rf2[j] = rf[j]; rf[j] = 0.0; if(sexsp) { cur_rf2[j] = rf2[j]; rf2[j] = 0.0; } } for(i=0; i<n_ind; i++) { /* i = individual */ R_CheckUserInterrupt(); /* check for ^C */ /* initialize alpha and beta */ for(v=0; v<n_gen; v++) { alpha[v][0] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme); beta[v][n_mar-1] = 0.0; } /* forward-backward equations */ for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) { for(v=0; v<n_gen; v++) { alpha[v][j] = alpha[0][j-1] + stepf(1, v+1, cur_rf[j-1], cur_rf2[j-1], cross_scheme); beta[v][j2] = beta[0][j2+1] + stepf(v+1,1,cur_rf[j2], cur_rf2[j2], cross_scheme) + emitf(Geno[j2+1][i],1,error_prob, cross_scheme); for(v2=1; v2<n_gen; v2++) { alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + stepf(v2+1,v+1,cur_rf[j-1],cur_rf2[j-1], cross_scheme)); beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + stepf(v+1,v2+1,cur_rf[j2],cur_rf2[j2], cross_scheme) + emitf(Geno[j2+1][i],v2+1,error_prob, cross_scheme)); } alpha[v][j] += emitf(Geno[j][i],v+1,error_prob, cross_scheme); } } for(j=0; j<n_mar-1; j++) { /* calculate gamma = log Pr(v1, v2, O) */ for(v=0, s=0.0; v<n_gen; v++) { for(v2=0; v2<n_gen; v2++) { gamma[v][v2] = alpha[v][j] + beta[v2][j+1] + emitf(Geno[j+1][i], v2+1, error_prob, cross_scheme) + stepf(v+1, v2+1, cur_rf[j], cur_rf2[j], cross_scheme); if(v==0 && v2==0) s = gamma[v][v2]; else s = addlog(s, gamma[v][v2]); } } for(v=0; v<n_gen; v++) { for(v2=0; v2<n_gen; v2++) { rf[j] += nrecf1(v+1,v2+1, cur_rf[j], cross_scheme) * exp(gamma[v][v2] - s); if(sexsp) rf2[j] += nrecf2(v+1,v2+1, cur_rf[j], cross_scheme) * exp(gamma[v][v2] - s); } } } } /* loop over individuals */ /* rescale */ for(j=0; j<n_mar-1; j++) { rf[j] /= (double)n_ind; if(rf[j] < tol/1000.0) rf[j] = tol/1000.0; else if(rf[j] > 0.5-tol/1000.0) rf[j] = 0.5-tol/1000.0; if(sexsp) { rf2[j] /= (double)n_ind; if(rf2[j] < tol/1000.0) rf2[j] = tol/1000.0; else if(rf2[j] > 0.5-tol/1000.0) rf2[j] = 0.5-tol/1000.0; } else rf2[j] = rf[j]; } if(verbose>1) { /* print estimates as we go along*/ Rprintf(" %4d ", it+1); maxdif=0.0; for(j=0; j<n_mar-1; j++) { temp = fabs(rf[j] - cur_rf[j])/(cur_rf[j]+tol*100.0); if(maxdif < temp) maxdif = temp; if(sexsp) { temp = fabs(rf2[j] - cur_rf2[j])/(cur_rf2[j]+tol*100.0); if(maxdif < temp) maxdif = temp; } /* bsy add */ if(verbose > 2) Rprintf("%d %f %f\n", j+1, cur_rf[j], rf[j]); /* bsy add */ } sprintf(text, "%s%s\n", " max rel've change = ", pattern); Rprintf(text, maxdif); } /* check convergence */ for(j=0, flag=0; j<n_mar-1; j++) { if(fabs(rf[j] - cur_rf[j]) > tol*(cur_rf[j]+tol*100.0) || (sexsp && fabs(rf2[j] - cur_rf2[j]) > tol*(cur_rf2[j]+tol*100.0))) { flag = 1; break; } } if(!flag) break; } /* end EM algorithm */ if(flag) warning("Didn't converge!\n"); /* calculate log likelihood */ *loglik = 0.0; for(i=0; i<n_ind; i++) { /* i = individual */ /* initialize alpha */ for(v=0; v<n_gen; v++) { alpha[v][0] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme); } /* forward equations */ for(j=1; j<n_mar; j++) { for(v=0; v<n_gen; v++) { alpha[v][j] = alpha[0][j-1] + stepf(1, v+1, rf[j-1], rf2[j-1], cross_scheme); for(v2=1; v2<n_gen; v2++) alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + stepf(v2+1,v+1,rf[j-1],rf2[j-1], cross_scheme)); alpha[v][j] += emitf(Geno[j][i],v+1,error_prob, cross_scheme); } } curloglik = alpha[0][n_mar-1]; for(v=1; v<n_gen; v++) curloglik = addlog(curloglik, alpha[v][n_mar-1]); *loglik += curloglik; } if(verbose) { if(verbose < 2) { /* print final estimates */ Rprintf(" no. iterations = %d\n", it+1); maxdif=0.0; for(j=0; j<n_mar-1; j++) { temp = fabs(rf[j] - cur_rf[j])/(cur_rf[j]+tol*100.0); if(maxdif < temp) maxdif = temp; if(sexsp) { temp = fabs(rf2[j] - cur_rf2[j])/(cur_rf2[j]+tol*100.0); if(maxdif < temp) maxdif = temp; } } sprintf(text, "%s%s\n", " max rel've change at last step = ", pattern); Rprintf(text, maxdif); } Rprintf(" loglik: %10.4lf\n\n", *loglik); } }
void fitqtl_imp_binary(int n_ind, int n_qtl, int *n_gen, int n_draws, int ***Draws, double **Cov, int n_cov, int *model, int n_int, double *pheno, int get_ests, double *lod, int *df, double *ests, double *ests_covar, double *design_mat, double tol, int maxit, int *matrix_rank) { /* create local variables */ int i, j, ii, jj, n_qc, itmp; /* loop variants and temp variables */ double llik, llik0, *LOD_array; double *the_ests, *the_covar, **TheEsts, ***TheCovar; double *dwork, **Ests_covar, tot_wt=0.0, *wts; double **Covar_mean, **Mean_covar, *mean_ests; /* for ests and cov matrix */ int *iwork, sizefull, n_trim, *index; /* number to trim from each end of the imputations */ n_trim = (int) floor( 0.5*log(n_draws)/log(2.0) ); /* initialization */ sizefull = 1; /* calculate the dimension of the design matrix for full model */ n_qc = n_qtl+n_cov; /* total number of QTLs and covariates */ /* for additive QTLs and covariates*/ for(i=0; i<n_qc; i++) sizefull += n_gen[i]; /* for interactions, loop thru all interactions */ for(i=0; i<n_int; i++) { for(j=0, itmp=1; j<n_qc; j++) { if(model[i*n_qc+j]) itmp *= n_gen[j]; } sizefull += itmp; } /* reorganize Ests_covar for easy use later */ /* and make space for estimates and covariance matrix */ if(get_ests) { reorg_errlod(sizefull, sizefull, ests_covar, &Ests_covar); allocate_double(sizefull*n_draws, &the_ests); allocate_double(sizefull*sizefull*n_draws, &the_covar); /* I need to save all of the estimates and covariance matrices */ reorg_errlod(sizefull, n_draws, the_ests, &TheEsts); reorg_genoprob(sizefull, sizefull, n_draws, the_covar, &TheCovar); allocate_dmatrix(sizefull, sizefull, &Mean_covar); allocate_dmatrix(sizefull, sizefull, &Covar_mean); allocate_double(sizefull, &mean_ests); allocate_double(n_draws, &wts); } /* allocate memory for working arrays, total memory is sizefull*n_ind+6*n_ind+4*sizefull for double array, and sizefull for integer array. All memory will be allocated one time and split later */ dwork = (double *)R_alloc(sizefull*n_ind+6*n_ind+4*sizefull, sizeof(double)); iwork = (int *)R_alloc(sizefull, sizeof(int)); index = (int *)R_alloc(n_draws, sizeof(int)); LOD_array = (double *)R_alloc(n_draws, sizeof(double)); /* calculate null model log10 likelihood */ llik0 = nullLODbin(pheno, n_ind); *matrix_rank = n_ind; /* loop over imputations */ for(i=0; i<n_draws; i++) { R_CheckUserInterrupt(); /* check for ^C */ /* calculate alternative model RSS */ llik = galtLODimpbin(pheno, n_ind, n_gen, n_qtl, Draws[i], Cov, n_cov, model, n_int, dwork, iwork, sizefull, get_ests, ests, Ests_covar, design_mat, tol, maxit, matrix_rank); /* calculate the LOD score in this imputation */ LOD_array[i] = (llik - llik0); /* if getting estimates, calculate the weights */ if(get_ests) { wts[i] = LOD_array[i]*log(10.0); if(i==0) tot_wt = wts[i]; else tot_wt = addlog(tot_wt, wts[i]); for(ii=0; ii<sizefull; ii++) { TheEsts[i][ii] = ests[ii]; for(jj=ii; jj<sizefull; jj++) TheCovar[i][ii][jj] = Ests_covar[ii][jj]; } } } /* end loop over imputations */ /* sort the lod scores, and trim the weights */ if(get_ests) { for(i=0; i<n_draws; i++) { index[i] = i; wts[i] = exp(wts[i]-tot_wt); } rsort_with_index(LOD_array, index, n_draws); for(i=0; i<n_trim; i++) wts[index[i]] = wts[index[n_draws-i-1]] = 0.0; /* re-scale wts */ tot_wt = 0.0; for(i=0; i<n_draws; i++) tot_wt += wts[i]; for(i=0; i<n_draws; i++) wts[i] /= tot_wt; } /* calculate the result LOD score */ *lod = wtaverage(LOD_array, n_draws); /* degree of freedom equals to the number of columns of x minus 1 (mean) */ *df = sizefull - 1; /* get means and variances and covariances of estimates */ if(get_ests) { for(i=0; i<n_draws; i++) { if(i==0) { for(ii=0; ii<sizefull; ii++) { mean_ests[ii] = TheEsts[i][ii] * wts[i]; for(jj=ii; jj<sizefull; jj++) { Mean_covar[ii][jj] = TheCovar[i][ii][jj] * wts[i]; Covar_mean[ii][jj] = 0.0; } } } else { for(ii=0; ii<sizefull; ii++) { mean_ests[ii] += TheEsts[i][ii]*wts[i]; for(jj=ii; jj<sizefull; jj++) { Mean_covar[ii][jj] += TheCovar[i][ii][jj]*wts[i]; Covar_mean[ii][jj] += (TheEsts[i][ii]-TheEsts[0][ii])* (TheEsts[i][jj]-TheEsts[0][jj])*wts[i]; } } } } for(i=0; i<sizefull; i++) { ests[i] = mean_ests[i]; for(j=i; j<sizefull; j++) { Covar_mean[i][j] = (Covar_mean[i][j] - (mean_ests[i]-TheEsts[0][i])* (mean_ests[j]-TheEsts[0][j]))*(double)n_draws/(double)(n_draws-1); Ests_covar[i][j] = Ests_covar[j][i] = Mean_covar[i][j] + Covar_mean[i][j]; } } } /* done getting estimates */ }
void scanone_em_covar(int n_ind, int n_pos, int n_gen, double ***Genoprob, double **Addcov, int n_addcov, double **Intcov, int n_intcov, double *pheno, double *weights, double *result, int maxit, double tol, int verbose, int *ind_noqtl) { int i, j, k, s, flag=0, n_par; double **wts, *param, *oldparam, regtol; double *x, *coef, *resid, *qty, *qraux, *work; int *jpvt, ny, ncol0, error_flag; double llik, oldllik=0.0, *work1, *work2, temp, sw; /* recenter phenotype to have mean 0, for possibly increased numerical stability */ for(i=0, temp=0.0; i<n_ind; i++) temp += pheno[i]; temp /= (double)n_ind; for(i=0; i<n_ind; i++) pheno[i] -= temp; n_par = 1 + n_gen + n_addcov + (n_gen-1)*n_intcov; /* Allocate space */ allocate_dmatrix(n_gen, n_ind, &wts); param = (double *)R_alloc(n_par, sizeof(double)); oldparam = (double *)R_alloc(n_par, sizeof(double)); work1 = (double *)R_alloc((n_par-1)*(n_par-1),sizeof(double)); work2 = (double *)R_alloc(n_par-1, sizeof(double)); ncol0 = 1+n_addcov; x = (double *)R_alloc(n_ind*ncol0, sizeof(double)); coef = (double *)R_alloc(ncol0, sizeof(double)); resid = (double *)R_alloc(n_ind, sizeof(double)); qty = (double *)R_alloc(n_ind, sizeof(double)); jpvt = (int *)R_alloc(ncol0, sizeof(int)); qraux = (double *)R_alloc(ncol0, sizeof(double)); work = (double *)R_alloc(2*ncol0, sizeof(double)); regtol = TOL; ny = 1; /* adjust phenotypes and covariates with weights */ /* Note: weights are actually sqrt(weights) */ sw = 0.0; for(i=0; i<n_ind; i++) { pheno[i] *= weights[i]; for(j=0; j<n_addcov; j++) Addcov[j][i] *= weights[i]; for(j=0; j<n_intcov; j++) Intcov[j][i] *= weights[i]; sw += log(weights[i]); /* sum of log weights */ } /* NULL model is now done in R ******************** (only do it once!) for(i=0; i<n_ind; i++) { x[i] = 1.0; for(j=0; j<n_addcov; j++) x[i+(j+1)*n_ind] = Addcov[j][i]; } j=0; F77_CALL(dqrls)(x, &n_ind, &ncol0, pheno, &ny, ®tol, coef, resid, qty, &k, jpvt, qraux, work); sigma0 = llik0 = 0.0; for(i=0; i<n_ind; i++) sigma0 += (resid[i] * resid[i]); sigma0 = sqrt(sigma0/(double)n_ind); for(i=0; i<n_ind; i++) llik0 += log10(dnorm(resid[i],0.0,sigma0,0)); Null model is now done in R ********************/ /* begin genome scan */ for(i=0; i<n_pos; i++) { /* loop over marker positions */ /* initial estimates */ for(j=0; j<n_ind; j++) for(k=0; k<n_gen; k++) wts[k][j] = Genoprob[k][i][j]; mstep_em_covar(n_ind, n_gen, Addcov, n_addcov, Intcov, n_intcov, pheno, weights, wts, oldparam, work1, work2, &error_flag, ind_noqtl); if(!error_flag) { /* only proceed if there's no error */ if(verbose) { estep_em_covar(n_ind, n_gen, i, Genoprob, Addcov, n_addcov, Intcov, n_intcov, pheno, weights, wts, oldparam, 0, ind_noqtl); oldllik = 0.0; for(k=0; k<n_ind; k++) { temp = 0.0; for(s=0; s < n_gen; s++) temp += wts[s][k]; oldllik += log(temp); } Rprintf(" %3d %12.6lf\n", i+1, oldllik); } /* begin EM iterations */ for(j=0; j<maxit; j++) { R_CheckUserInterrupt(); /* check for ^C */ estep_em_covar(n_ind, n_gen, i, Genoprob, Addcov, n_addcov, Intcov, n_intcov, pheno, weights, wts, oldparam, 1, ind_noqtl); mstep_em_covar(n_ind, n_gen, Addcov, n_addcov, Intcov, n_intcov, pheno, weights, wts, param, work1, work2, &error_flag, ind_noqtl); if(error_flag) { /* error: X'X singular; break out of EM */ flag=0; break; } if(verbose) { estep_em_covar(n_ind, n_gen, i, Genoprob, Addcov, n_addcov, Intcov, n_intcov, pheno, weights, wts, param, 0, ind_noqtl); llik = 0.0; for(k=0; k<n_ind; k++) { temp = 0.0; for(s=0; s < n_gen; s++) temp += wts[s][k]; llik += log(temp); } Rprintf(" %3d %4d %12.6lf\n", i+1, j+1, llik-oldllik); oldllik = llik; } /* check for convergence */ flag = 0; for(k=0; k<n_par; k++) { if(fabs(param[k]-oldparam[k]) > tol*(fabs(oldparam[k])+tol*100.0)) { flag = 1; break; } } if(!flag) break; for(k=0; k<n_par; k++) oldparam[k] = param[k]; } /* end of EM iterations */ if(flag) warning("Didn't converge!\n"); if(!error_flag) { /* skip if there was an error */ /* calculate log likelihood */ estep_em_covar(n_ind, n_gen, i, Genoprob, Addcov, n_addcov, Intcov, n_intcov, pheno, weights, wts, param, 0, ind_noqtl); llik = 0.0; for(k=0; k<n_ind; k++) { temp = 0.0; for(s=0; s < n_gen; s++) temp += wts[s][k]; llik += log(temp); } result[i] = -(llik+sw)/log(10.0); } else result[i] = NA_REAL; if(verbose) { if(error_flag) Rprintf(" %3d NA", i+1); else { Rprintf(" %3d %12.6lf\n", i+1, result[i]); Rprintf(" "); for(j=0; j<n_par; j++) Rprintf(" %7.4lf", param[j]); } Rprintf("\n\n"); } } /* no initial error */ } /* end loop over marker positions */ }