void R_scantwo_imp(int *n_ind, int *same_chr, int *n_pos1, int *n_pos2, int *n_gen1, int *n_gen2, int *n_draws, int *draws1, int *draws2, double *addcov, int *n_addcov, double *intcov, int *n_intcov, double *pheno, int *nphe, double *weights, double *result, int *n_col2drop, int *col2drop) { int ***Draws1, ***Draws2; double **Addcov, **Intcov; /* reorganize draws */ reorg_draws(*n_ind, *n_pos1, *n_draws, draws1, &Draws1); if(!(*same_chr)) reorg_draws(*n_ind, *n_pos2, *n_draws, draws2, &Draws2); /* reorganize addcov and intcov (if they are not empty) */ /* currently reorg_geno function is used to reorganized the data */ if(*n_addcov != 0) reorg_errlod(*n_ind, *n_addcov, addcov, &Addcov); if(*n_intcov != 0) reorg_errlod(*n_ind, *n_intcov, intcov, &Intcov); /* call the engine function scantwo_imp */ scantwo_imp(*n_ind, *same_chr, *n_pos1, *n_pos2, *n_gen1, *n_gen2, *n_draws, Draws1, Draws2, Addcov, *n_addcov, Intcov, *n_intcov, pheno, *nphe, weights, result, *n_col2drop, col2drop); }
void R_scantwo_2chr_em(int *n_ind, int *n_pos1, int *n_pos2, int *n_gen1, int *n_gen2, double *genoprob1, double *genoprob2, double *addcov, int *n_addcov, double *intcov, int *n_intcov, double *pheno, double *weights, double *result_full, double *result_add, int *maxit, double *tol, int *verbose) { double **Result_full, **Result_add, **Addcov=0, **Intcov=0; double ***Genoprob1, ***Genoprob2; reorg_genoprob(*n_ind, *n_pos1, *n_gen1, genoprob1, &Genoprob1); reorg_genoprob(*n_ind, *n_pos2, *n_gen2, genoprob2, &Genoprob2); reorg_errlod(*n_pos1, *n_pos2, result_full, &Result_full); reorg_errlod(*n_pos1, *n_pos2, result_add, &Result_add); /* reorganize addcov and intcov (if they are not empty) */ if(*n_addcov > 0) reorg_errlod(*n_ind, *n_addcov, addcov, &Addcov); if(*n_intcov > 0) reorg_errlod(*n_ind, *n_intcov, intcov, &Intcov); scantwo_2chr_em(*n_ind, *n_pos1, *n_pos2, *n_gen1, *n_gen2, Genoprob1, Genoprob2, Addcov, *n_addcov, Intcov, *n_intcov, pheno, weights, Result_full, Result_add, *maxit, *tol, *verbose); }
/* wrapper for calcPermPval */ void R_calcPermPval(double *peaks, int *nc_peaks, int *nr_peaks, double *perms, int *n_perms, double *pval) { double **Peaks, **Perms, **Pval; reorg_errlod(*nr_peaks, *nc_peaks, peaks, &Peaks); reorg_errlod(*n_perms, *nc_peaks, perms, &Perms); reorg_errlod(*nr_peaks, *nc_peaks, pval, &Pval); calcPermPval(Peaks, *nc_peaks, *nr_peaks, Perms, *n_perms, Pval); }
void R_discan_covar(int *n_ind, int *n_pos, int *n_gen, double *genoprob, double *addcov, int *n_addcov, double *intcov, int *n_intcov, int *pheno, double *start, double *result, int *maxit, double *tol, int *verbose, int *ind_noqtl) { double ***Genoprob, **Addcov, **Intcov; reorg_genoprob(*n_ind, *n_pos, *n_gen, genoprob, &Genoprob); if(*n_addcov > 0) reorg_errlod(*n_ind, *n_addcov, addcov, &Addcov); if(*n_intcov > 0) reorg_errlod(*n_ind, *n_intcov, intcov, &Intcov); discan_covar(*n_ind, *n_pos, *n_gen, Genoprob, Addcov, *n_addcov, Intcov, *n_intcov, pheno, start, result, *maxit, *tol, *verbose, ind_noqtl); }
void R_fitqtl_hk_binary(int *n_ind, int *n_qtl, int *n_gen, double *genoprob, int *n_cov, double *cov, int *model, int *n_int, double *pheno, int *get_ests, /* return variables */ double *lod, int *df, double *ests, double *ests_covar, double *design_mat, /* convergence */ double *tol, int *maxit) { double ***Genoprob=0, **Cov; int tot_gen, i, j, curpos; /* reorganize genotype probabilities */ if(*n_qtl > 0) { Genoprob = (double ***)R_alloc(*n_qtl, sizeof(double **)); tot_gen = 0; for(i=0; i < *n_qtl; i++) tot_gen += (n_gen[i]+1); Genoprob[0] = (double **)R_alloc(tot_gen, sizeof(double *)); for(i=1; i < *n_qtl; i++) Genoprob[i] = Genoprob[i-1] + (n_gen[i-1]+1); for(i=0, curpos=0; i < *n_qtl; i++) for(j=0; j<n_gen[i]+1; j++, curpos += *n_ind) Genoprob[i][j] = genoprob + curpos; } /* reorganize cov (if they are not empty) */ /* currently reorg_errlod function is used to reorganize the data */ if(*n_cov != 0) reorg_errlod(*n_ind, *n_cov, cov, &Cov); fitqtl_hk_binary(*n_ind, *n_qtl, n_gen, Genoprob, Cov, *n_cov, model, *n_int, pheno, *get_ests, lod, df, ests, ests_covar, design_mat, *tol, *maxit); }
void fitqtl_hk_binary(int n_ind, int n_qtl, int *n_gen, double ***Genoprob, double **Cov, int n_cov, int *model, int n_int, double *pheno, int get_ests, double *lod, int *df, double *ests, double *ests_covar, double *design_mat, double tol, int maxit) { /* create local variables */ int i, j, n_qc, itmp; /* loop variants and temp variables */ double llik, llik0; double *dwork, **Ests_covar; int *iwork, sizefull; /* initialization */ sizefull = 1; /* calculate the dimension of the design matrix for full model */ n_qc = n_qtl+n_cov; /* total number of QTLs and covariates */ /* for additive QTLs and covariates*/ for(i=0; i<n_qc; i++) sizefull += n_gen[i]; /* for interactions, loop thru all interactions */ for(i=0; i<n_int; i++) { for(j=0, itmp=1; j<n_qc; j++) { if(model[i*n_qc+j]) itmp *= n_gen[j]; } sizefull += itmp; } /* reorganize Ests_covar for easy use later */ /* and make space for estimates and covariance matrix */ if(get_ests) reorg_errlod(sizefull, sizefull, ests_covar, &Ests_covar); /* allocate memory for working arrays, total memory is sizefull*n_ind+6*n_ind+4*sizefull for double array, and sizefull for integer array. All memory will be allocated one time and split later */ dwork = (double *)R_alloc(sizefull*n_ind+6*n_ind+4*sizefull, sizeof(double)); iwork = (int *)R_alloc(sizefull, sizeof(int)); /* calculate null model log10 likelihood */ llik0 = nullLODbin(pheno, n_ind); R_CheckUserInterrupt(); /* check for ^C */ /* fit the model */ llik = galtLODHKbin(pheno, n_ind, n_gen, n_qtl, Genoprob, Cov, n_cov, model, n_int, dwork, iwork, sizefull, get_ests, ests, Ests_covar, design_mat, tol, maxit); *lod = llik - llik0; /* degree of freedom equals to the number of columns of x minus 1 (mean) */ *df = sizefull - 1; }
void R_scanone_mr(int *n_ind, int *n_pos, int *n_gen, int *geno, double *addcov, int *n_addcov, double *intcov, int *n_intcov, double *pheno, double *weights, double *result) { int **Geno; double **Addcov=0, **Intcov=0; reorg_geno(*n_ind, *n_pos, geno, &Geno); /* reorganize addcov and intcov (if they are not empty) */ if(*n_addcov > 0) reorg_errlod(*n_ind, *n_addcov, addcov, &Addcov); if(*n_intcov > 0) reorg_errlod(*n_ind, *n_intcov, intcov, &Intcov); scanone_mr(*n_ind, *n_pos, *n_gen, Geno, Addcov, *n_addcov, Intcov, *n_intcov, pheno, weights, result); }
void R_scantwo_1chr_em(int *n_ind, int *n_pos, int *n_gen, double *pairprob, double *addcov, int *n_addcov, double *intcov, int *n_intcov, double *pheno, double *weights, double *result, int *maxit, double *tol, int *verbose, int *n_col2drop, int *col2drop) { double **Result, **Addcov=0, **Intcov=0, *****Pairprob; reorg_pairprob(*n_ind, *n_pos, *n_gen, pairprob, &Pairprob); reorg_errlod(*n_pos, *n_pos, result, &Result); /* reorganize addcov and intcov (if they are not empty) */ if(*n_addcov > 0) reorg_errlod(*n_ind, *n_addcov, addcov, &Addcov); if(*n_intcov > 0) reorg_errlod(*n_ind, *n_intcov, intcov, &Intcov); scantwo_1chr_em(*n_ind, *n_pos, *n_gen, Pairprob, Addcov, *n_addcov, Intcov, *n_intcov, pheno, weights, Result, *maxit, *tol, *verbose, *n_col2drop, col2drop); }
void R_locate_xo(int *n_ind, int *n_mar, int *type, int *geno, double *map, double *location, int *nseen, int *ileft, int *iright, double *left, double *right, int *ntyped, int *full_info) { int **Geno, **iLeft, **iRight, **nTyped; double **Location, **Left, **Right; reorg_geno(*n_ind, *n_mar, geno, &Geno); reorg_errlod(*n_ind, (*type+1)*(*n_mar-1), location, &Location); if(*full_info) { reorg_errlod(*n_ind, (*type+1)*(*n_mar-1), left, &Left); reorg_errlod(*n_ind, (*type+1)*(*n_mar-1), right, &Right); reorg_geno(*n_ind, (*type+1)*(*n_mar-1), ileft, &iLeft); reorg_geno(*n_ind, (*type+1)*(*n_mar-1), iright, &iRight); reorg_geno(*n_ind, (*type+1)*(*n_mar-1), ntyped, &nTyped); } locate_xo(*n_ind, *n_mar, *type, Geno, map, Location, nseen, iLeft, iRight, Left, Right, nTyped, *full_info); }
void R_scanone_imp(int *n_ind, int *n_pos, int *n_gen, int *n_draws, int *draws, double *addcov, int *n_addcov, double *intcov, int *n_intcov, double *pheno, int *nphe, double *weights, double *result, int *ind_noqtl) { /* reorganize draws */ int ***Draws; double **Addcov, **Intcov, **Result; reorg_draws(*n_ind, *n_pos, *n_draws, draws, &Draws); reorg_errlod(*n_pos, *nphe, result, &Result); /* reorganize addcov and intcov (if they are not empty) */ /* currently reorg_errlod function is used to reorganize the data */ if(*n_addcov != 0) reorg_errlod(*n_ind, *n_addcov, addcov, &Addcov); if(*n_intcov != 0) reorg_errlod(*n_ind, *n_intcov, intcov, &Intcov); scanone_imp(*n_ind, *n_pos, *n_gen, *n_draws, Draws, Addcov, *n_addcov, Intcov, *n_intcov, pheno, *nphe, weights, Result, ind_noqtl); }
void R_scantwo_1chr_hk(int *n_ind, int *n_pos, int *n_gen, double *genoprob, double *pairprob, double *addcov, int *n_addcov, double *intcov, int *n_intcov, double *pheno, int* nphe, double *weights, double *result, int *n_col2drop, int *col2drop) { double ***Genoprob, ***Result, **Addcov=0, **Intcov=0, *****Pairprob; reorg_genoprob(*n_ind, *n_pos, *n_gen, genoprob, &Genoprob); reorg_pairprob(*n_ind, *n_pos, *n_gen, pairprob, &Pairprob); reorg_genoprob(*n_pos, *n_pos, *nphe, result, &Result); /* reorganize addcov and intcov (if they are not empty) */ if(*n_addcov > 0) reorg_errlod(*n_ind, *n_addcov, addcov, &Addcov); if(*n_intcov > 0) reorg_errlod(*n_ind, *n_intcov, intcov, &Intcov); scantwo_1chr_hk(*n_ind, *n_pos, *n_gen, Genoprob, Pairprob, Addcov, *n_addcov, Intcov, *n_intcov, pheno, *nphe, weights, Result, *n_col2drop, col2drop); }
void est_rf_bc(int *n_ind, int *n_mar, int *geno, double *rf) { int i, j1, j2, **Geno, a, b; double **Rf; /* reorganize geno and rf */ reorg_geno(*n_ind, *n_mar, geno, &Geno); reorg_errlod(*n_mar, *n_mar, rf, &Rf); for(j1=0; j1< *n_mar; j1++) { /* count meioses */ a = 0; for(i=0; i < *n_ind; i++) { if(Geno[j1][i] != 0) a++; } Rf[j1][j1] = (double) a; for(j2=j1+1; j2< *n_mar; j2++) { a=b=0; for(i=0; i< *n_ind; i++) { if(Geno[j1][i] != 0 && Geno[j2][i] != 0) { a++; if(Geno[j1][i] != Geno[j2][i]) b++; } } if(a != 0) { /* at least one informative meiosis */ /* if(b > a/2) b = a/2; */ Rf[j1][j2] = (double)b/(double)a; if(b==0) /* no recombinations */ Rf[j2][j1] = (double)a*log10(1.0-Rf[j1][j2]); else Rf[j2][j1] = (double)b*log10(Rf[j1][j2]) + (double)(a-b)*log10(1.0-Rf[j1][j2]); Rf[j2][j1] += (double)a*log10(2.0); } else { Rf[j1][j2] = NA_REAL; Rf[j2][j1] = 0.0; } } /* end loops over markers */ } }
void R_fitqtl_imp(int *n_ind, int *n_qtl, int *n_gen, int *n_draws, int *draws, int *n_cov, double *cov, int *model, int *n_int, double *pheno, int *get_ests, /* return variables */ double *lod, int *df, double *ests, double *ests_covar, double *design_mat, int *matrix_rank, double *residuals) { int ***Draws; double **Cov=0; /* reorganize draws */ reorg_draws(*n_ind, *n_qtl, *n_draws, draws, &Draws); /* reorganize cov (if they are not empty) */ /* currently reorg_errlod function is used to reorganize the data */ if(*n_cov != 0) reorg_errlod(*n_ind, *n_cov, cov, &Cov); fitqtl_imp(*n_ind, *n_qtl, n_gen, *n_draws, Draws, Cov, *n_cov, model, *n_int, pheno, *get_ests, lod, df, ests, ests_covar, design_mat, matrix_rank, residuals); }
void calc_errorlod(int n_ind, int n_mar, int n_gen, int *geno, double error_prob, double *genoprob, double *errlod, double errorlod(int, double *, double)) { int i, j, k, **Geno; double *p, ***Genoprob, **Errlod; /* reorganize geno, genoprob and errlod */ reorg_geno(n_ind, n_mar, geno, &Geno); reorg_genoprob(n_ind, n_mar, n_gen, genoprob, &Genoprob); reorg_errlod(n_ind, n_mar, errlod, &Errlod); allocate_double(n_gen, &p); for(i=0; i<n_ind; i++) { R_CheckUserInterrupt(); /* check for ^C */ for(j=0; j<n_mar; j++) { for(k=0; k<n_gen; k++) p[k] = Genoprob[k][j][i]; Errlod[j][i] = errorlod(Geno[j][i], p, error_prob); } } }
void R_summary_scantwo(int *n_pos, int *n_phe, double *lod, int *n_chr, int *chr, double *pos, int *xchr, double *scanoneX, int *n_chrpair, int *chr1, int *chr2, int *chrpair, double *pos1_jnt, double *pos2_jnt, double *pos1_add, double *pos2_add, double *pos1_int, double *pos2_int, double *jnt_lod_full, double *jnt_lod_add, double *add_lod_full, double *add_lod_add, double *int_lod_full, double *int_lod_add, double *lod_1qtl) { double ***Lod, **ScanoneX; double **Pos1_jnt, **Pos2_jnt; double **Pos1_add, **Pos2_add; double **Pos1_int, **Pos2_int; double **JNT_lod_full, **JNT_lod_add; double **ADD_lod_full, **ADD_lod_add; double **INT_lod_full, **INT_lod_add; double **LOD_1qtl; int i, j, k, **Chrpair; *n_chrpair = *n_chr*(*n_chr+1)/2; /* re-organize matrices */ reorg_genoprob(*n_pos, *n_pos, *n_phe, lod, &Lod); reorg_errlod(*n_chrpair, *n_phe, pos1_jnt, &Pos1_jnt); reorg_errlod(*n_chrpair, *n_phe, pos2_jnt, &Pos2_jnt); reorg_errlod(*n_chrpair, *n_phe, pos1_add, &Pos1_add); reorg_errlod(*n_chrpair, *n_phe, pos2_add, &Pos2_add); reorg_errlod(*n_chrpair, *n_phe, pos1_int, &Pos1_int); reorg_errlod(*n_chrpair, *n_phe, pos2_int, &Pos2_int); reorg_errlod(*n_chrpair, *n_phe, jnt_lod_full, &JNT_lod_full); reorg_errlod(*n_chrpair, *n_phe, jnt_lod_add, &JNT_lod_add); reorg_errlod(*n_chrpair, *n_phe, add_lod_full, &ADD_lod_full); reorg_errlod(*n_chrpair, *n_phe, add_lod_add, &ADD_lod_add); reorg_errlod(*n_chrpair, *n_phe, int_lod_full, &INT_lod_full); reorg_errlod(*n_chrpair, *n_phe, int_lod_add, &INT_lod_add); reorg_errlod(*n_chrpair, *n_phe, lod_1qtl, &LOD_1qtl); reorg_errlod(*n_pos, *n_phe, scanoneX, &ScanoneX); reorg_geno(*n_chr, *n_chr, chrpair, &Chrpair); for(i=0, k=0; i<*n_chr; i++) { for(j=i; j<*n_chr; j++, k++) { chr1[k] = i; chr2[k] = j; Chrpair[j][i] = Chrpair[i][j] = k; } } summary_scantwo(*n_pos, *n_phe, Lod, *n_chr, chr, pos, xchr, ScanoneX, *n_chrpair, Chrpair, Pos1_jnt, Pos2_jnt, Pos1_add, Pos2_add, Pos1_int, Pos2_int, JNT_lod_full, JNT_lod_add, ADD_lod_full, ADD_lod_add, INT_lod_full, INT_lod_add, LOD_1qtl); }
void scantwo_2chr_em(int n_ind, int n_pos1, int n_pos2, int n_gen1, int n_gen2, double ***Genoprob1, double ***Genoprob2, double **Addcov, int n_addcov, double **Intcov, int n_intcov, double *pheno, double *weights, double **Result_full, double **Result_add, int maxit, double tol, int verbose) { int error_flag, i, i1, i2, k1, k2, j, m, n_col[2], nit[2], r, flag=0; double *param, *oldparam, ***Wts12, **Wts1, **Wts2; double *wts, *work1, *work2, temp, ***Probs, oldllik=0.0, llik[2], sw; int n_col2drop=0, *allcol2drop=0; n_col[0] = (n_gen1+n_gen2-1) + n_addcov + (n_gen1+n_gen2-2)*n_intcov; n_col[1] = n_gen1*n_gen2 + n_addcov + (n_gen1*n_gen2-1)*n_intcov; /* allocate workspaces */ wts = (double *)R_alloc((2*n_gen1*n_gen2+n_gen1+n_gen2)*n_ind, sizeof(double)); reorg_errlod(n_ind, n_gen1, wts, &Wts1); reorg_errlod(n_ind, n_gen2, wts+n_gen1*n_ind, &Wts2); reorg_genoprob(n_ind, n_gen2, n_gen1, wts+(n_gen1+n_gen2)*n_ind, &Wts12); reorg_genoprob(n_ind, n_gen2, n_gen1, wts+(n_gen1*n_gen2+n_gen1+n_gen2)*n_ind, &Probs); work1 = (double *)R_alloc(n_col[1]*n_col[1], sizeof(double)); work2 = (double *)R_alloc(n_col[1], sizeof(double)); param = (double *)R_alloc(n_col[1]+1, sizeof(double)); oldparam = (double *)R_alloc(n_col[1]+1, sizeof(double)); /* recenter phenotype to have mean 0, for possibly increased numerical stability */ for(j=0, temp=0.0; j<n_ind; j++) temp += pheno[j]; temp /= (double)n_ind; for(j=0; j<n_ind; j++) pheno[j] -= temp; /* adjust phenotypes and covariates with weights */ /* Note: weights are actually sqrt(weights) */ sw = 0.0; for(i=0; i<n_ind; i++) { pheno[i] *= weights[i]; for(j=0; j<n_addcov; j++) Addcov[j][i] *= weights[i]; for(j=0; j<n_intcov; j++) Intcov[j][i] *= weights[i]; sw += log(weights[i]); /* sum of log weights */ } sw /= log(10.0); /* make log 10 */ /* begin loop over pairs of positions */ for(i1=0; i1<n_pos1; i1++) { for(i2=0; i2<n_pos2; i2++) { /* loop over positions */ nit[0] = nit[1] = 0; llik[0] = llik[1] = NA_REAL; /* copy the parts from Pairprob into Probs */ for(j=0; j<n_ind; j++) for(k1=0; k1<n_gen1; k1++) for(k2=0; k2<n_gen2; k2++) Probs[k1][k2][j] = Genoprob1[k1][i1][j]*Genoprob2[k2][i2][j]; for(m=0; m<2; m++) { /* loop over add've model and full model */ /* initial estimates */ /* marginal probabilities */ for(j=0; j<n_ind; j++) { for(k1=0; k1<n_gen1; k1++) { Wts1[k1][j] = 0.0; for(k2=0; k2<n_gen2; k2++) Wts1[k1][j] += Probs[k1][k2][j]; } for(k2=0; k2<n_gen2; k2++) { Wts2[k2][j] = 0.0; for(k1=0; k1<n_gen1; k1++) Wts2[k2][j] += Probs[k1][k2][j]; } } scantwo_em_mstep(n_ind, n_gen1, n_gen2, Addcov, n_addcov, Intcov, n_intcov, pheno, weights, Probs, Wts1, Wts2, oldparam, m, work1, work2, &error_flag, n_col2drop, allcol2drop, verbose); if(error_flag) { if(verbose>1) Rprintf(" [%3d %3d] %1d: Initial model had error.\n", i1+1, i2+1, m+1); } else { /* only proceed if there's no error */ oldllik = scantwo_em_loglik(n_ind, n_gen1, n_gen2, Probs, Wts12, Wts1, Wts2, Addcov, n_addcov, Intcov, n_intcov, pheno, weights, oldparam, m, n_col2drop, allcol2drop); if(verbose>2) Rprintf(" [%3d %3d] %1d %9.3lf\n", i1+1, i2+1, m+1, oldllik); for(r=0; r<maxit; r++) { /* loop over iterations */ R_CheckUserInterrupt(); /* check for ^C */ scantwo_em_estep(n_ind, n_gen1, n_gen2, Probs, Wts12, Wts1, Wts2, Addcov, n_addcov, Intcov, n_intcov, pheno, weights, oldparam, m, 1, n_col2drop, allcol2drop); scantwo_em_mstep(n_ind, n_gen1, n_gen2, Addcov, n_addcov, Intcov, n_intcov, pheno, weights, Wts12, Wts1, Wts2, param, m, work1, work2, &error_flag, n_col2drop, allcol2drop, verbose); if(error_flag) { flag=0; if(verbose>1) Rprintf(" [%3d %3d] %1d %4d: Error in mstep\n", i1+1, i2+1, m+1, r+1); break; } llik[m] = scantwo_em_loglik(n_ind, n_gen1, n_gen2, Probs, Wts12, Wts1, Wts2, Addcov, n_addcov, Intcov, n_intcov, pheno, weights, param, m, n_col2drop, allcol2drop); if(verbose>1) { /* print log likelihood */ if(verbose>2) Rprintf(" [%3d %3d] %1d %4d %9.6lf\n", i1+1, i2+1, m+1, r+1, (llik[m]-oldllik)); if(llik[m] < oldllik-tol) Rprintf("** [%3d %3d] %1d %4d %9.6lf **\n", i1+1, i2+1, m+1, r+1, (llik[m]-oldllik)); if(verbose>3) { /* print parameters */ for(j=0; j<n_col[m]; j++) Rprintf(" %7.3lf", param[j]); Rprintf("\n"); } } flag = 1; /* use log likelihood only to check for convergence */ if(llik[m]-oldllik < tol) { flag = 0; break; } for(j=0; j<n_col[m]+1; j++) oldparam[j] = param[j]; oldllik = llik[m]; } /* loop over EM iterations */ nit[m] = r+1; if(flag) { if(verbose>1) Rprintf("** [%3d %3d] %1d Didn't converge! **\n", i1+1, i2+1, m+1); warning("Didn't converge!\n"); } } /* no error in getting initial estimates */ } /* loop over model */ if(verbose>1) { /* print likelihoods */ Rprintf(" [%3d %3d] %4d %4d %9.6lf", i1+1, i2+1, nit[0], nit[1], llik[1]-llik[0]); if(llik[1] < llik[0]) Rprintf(" ****"); Rprintf("\n"); } Result_add[i2][i1] = -(llik[0]+sw); Result_full[i2][i1] = -(llik[1]+sw); /* sw = sum[log10(weights)] */ } /* position 2 */ } /* position 1 */ }
double discan_covar_em(int n_ind, int pos, int n_gen, int n_par, double ***Genoprob, double **Addcov, int n_addcov, double **Intcov, int n_intcov, int *pheno, double *start, int maxit, double tol, int verbose, int *ind_noqtl) { int i, j, k, kk, s, offset; double *jac, **Jac, *grad; double *temp, **wts; double fit, **temp1, **temp2; double *temp1s, *temp2s; double *newpar, *curpar; double newllik=0.0, curllik, sum; int info; double rcond, *junk; /* allocate space */ allocate_double(n_par*n_par, &jac); reorg_errlod(n_par, n_par, jac, &Jac); allocate_double(n_par, &grad); allocate_double(n_ind*n_gen, &temp); reorg_errlod(n_gen, n_ind, temp, &wts); allocate_double(n_ind*n_gen, &temp); reorg_errlod(n_gen, n_ind, temp, &temp1); allocate_double(n_ind*n_gen, &temp); reorg_errlod(n_gen, n_ind, temp, &temp2); allocate_double(n_ind, &temp1s); allocate_double(n_ind, &temp2s); allocate_double(n_par, &newpar); allocate_double(n_par, &curpar); allocate_double(n_par, &junk); /* initial wts */ for(i=0; i<n_ind; i++) for(j=0; j<n_gen; j++) wts[i][j] = Genoprob[j][pos][i]; for(i=0; i<n_par; i++) curpar[i] = start[i]; curllik = discan_covar_loglik(n_ind, pos, n_gen, n_par, curpar, Genoprob, Addcov, n_addcov, Intcov, n_intcov, pheno, ind_noqtl); if(verbose) Rprintf(" %10.5f\n", curllik); for(s=0; s<maxit; s++) { R_CheckUserInterrupt(); /* check for ^C */ /****** M STEP ******/ /* 0's in gradient and Jacobian */ for(j=0; j<n_par; j++) { grad[j] = 0.0; for(k=0; k<n_par; k++) Jac[j][k] = 0.0; } /* calculate gradient and Jacobian */ for(i=0; i<n_ind; i++) { temp1s[i] = temp2s[i] = 0.0; for(j=0; j<n_gen; j++) { if(!ind_noqtl[i]) fit = curpar[j]; else fit = 0.0; if(n_addcov > 0) { for(k=0; k<n_addcov; k++) fit += Addcov[k][i] * curpar[n_gen+k]; } if(!ind_noqtl[i] && n_intcov > 0 && j<n_gen-1) { for(k=0; k<n_intcov; k++) fit += Intcov[k][i] * curpar[n_gen+n_addcov+n_intcov*j+k]; } fit = exp(fit)/(1.0+exp(fit)); temp1s[i] += (temp1[i][j] = wts[i][j]*((double)pheno[i] - fit)); temp2s[i] += (temp2[i][j] = wts[i][j]*fit*(1.0-fit)); } } for(j=0; j<n_gen; j++) { for(i=0; i<n_ind; i++) { if(!ind_noqtl[i]) { grad[j] += temp1[i][j]; Jac[j][j] += temp2[i][j]; } } } for(k=0; k<n_addcov; k++) { for(i=0; i<n_ind; i++) { grad[k + n_gen] += Addcov[k][i] * temp1s[i]; for(kk=k; kk<n_addcov; kk++) Jac[kk+n_gen][k+n_gen] += Addcov[k][i]*Addcov[kk][i] * temp2s[i]; if(!ind_noqtl[i]) { for(j=0; j<n_gen; j++) Jac[k+n_gen][j] += Addcov[k][i] * temp2[i][j]; } } } for(j=0; j<n_gen-1; j++) { offset = n_gen + n_addcov + n_intcov*j; for(k=0; k<n_intcov; k++) { for(i=0; i<n_ind; i++) { if(!ind_noqtl[i]) { grad[k + offset] += Intcov[k][i] * temp1[i][j]; for(kk=k; kk<n_intcov; kk++) Jac[kk+offset][k+offset] += Intcov[k][i]*Intcov[kk][i]*temp2[i][j]; for(kk=0; kk<n_addcov; kk++) Jac[k+offset][kk+n_gen] += Intcov[k][i]*Addcov[kk][i]*temp2[i][j]; Jac[k+offset][j] += Intcov[k][i]*temp2[i][j]; } } } } if(verbose > 1) { Rprintf("grad: "); for(j=0; j<n_par; j++) Rprintf("%f ", grad[j]); Rprintf("\n"); Rprintf("Jac:\n"); for(j=0; j<n_par; j++) { for(k=0; k<n_par; k++) Rprintf("%f ", Jac[j][k]); Rprintf("\n"); } Rprintf("\n"); } /* dpoco and dposl from Linpack to calculate Jac^-1 %*% grad */ F77_CALL(dpoco)(jac, &n_par, &n_par, &rcond, junk, &info); if(fabs(rcond) < TOL || info != 0) { warning("Jacobian matrix is singular\n"); return(NA_REAL); } F77_CALL(dposl)(jac, &n_par, &n_par, grad); if(verbose > 1) { Rprintf(" solution: "); for(j=0; j<n_par; j++) Rprintf(" %f", grad[j]); Rprintf("\n"); } /* revised estimates */ for(j=0; j<n_par; j++) newpar[j] = curpar[j] + grad[j]; if(verbose>1) { for(j=0; j<n_par; j++) Rprintf("%f ", newpar[j]); Rprintf("\n"); } newllik = discan_covar_loglik(n_ind, pos, n_gen, n_par, newpar, Genoprob, Addcov, n_addcov, Intcov, n_intcov, pheno, ind_noqtl); if(verbose) { Rprintf(" %3d %10.5f %10.5f", s+1, newllik, newllik - curllik); if(newllik < curllik) Rprintf(" ***"); Rprintf("\n"); } if(newllik - curllik < tol) return(newllik); for(j=0; j<n_par; j++) curpar[j] = newpar[j]; curllik = newllik; /* e-step */ for(i=0; i<n_ind; i++) { sum=0.0; for(j=0; j<n_gen; j++) { fit = curpar[j]; if(n_addcov > 0) { for(k=0; k<n_addcov; k++) fit += Addcov[k][i] * curpar[n_gen+k]; } if(n_intcov > 0 && j<n_gen-1) { for(k=0; k<n_intcov; k++) fit += Intcov[k][i] * curpar[n_gen+n_addcov+n_intcov*j+k]; } fit = exp(fit); if(pheno[i]) sum += (wts[i][j] = Genoprob[j][pos][i] * fit/(1.0+fit)); else sum += (wts[i][j] = Genoprob[j][pos][i] / (1.0+fit)); } for(j=0; j<n_gen; j++) wts[i][j] /= sum; } } /* end of em-step */ /* didn't converge */ return(newllik); }
void fitqtl_imp_binary(int n_ind, int n_qtl, int *n_gen, int n_draws, int ***Draws, double **Cov, int n_cov, int *model, int n_int, double *pheno, int get_ests, double *lod, int *df, double *ests, double *ests_covar, double *design_mat, double tol, int maxit, int *matrix_rank) { /* create local variables */ int i, j, ii, jj, n_qc, itmp; /* loop variants and temp variables */ double llik, llik0, *LOD_array; double *the_ests, *the_covar, **TheEsts, ***TheCovar; double *dwork, **Ests_covar, tot_wt=0.0, *wts; double **Covar_mean, **Mean_covar, *mean_ests; /* for ests and cov matrix */ int *iwork, sizefull, n_trim, *index; /* number to trim from each end of the imputations */ n_trim = (int) floor( 0.5*log(n_draws)/log(2.0) ); /* initialization */ sizefull = 1; /* calculate the dimension of the design matrix for full model */ n_qc = n_qtl+n_cov; /* total number of QTLs and covariates */ /* for additive QTLs and covariates*/ for(i=0; i<n_qc; i++) sizefull += n_gen[i]; /* for interactions, loop thru all interactions */ for(i=0; i<n_int; i++) { for(j=0, itmp=1; j<n_qc; j++) { if(model[i*n_qc+j]) itmp *= n_gen[j]; } sizefull += itmp; } /* reorganize Ests_covar for easy use later */ /* and make space for estimates and covariance matrix */ if(get_ests) { reorg_errlod(sizefull, sizefull, ests_covar, &Ests_covar); allocate_double(sizefull*n_draws, &the_ests); allocate_double(sizefull*sizefull*n_draws, &the_covar); /* I need to save all of the estimates and covariance matrices */ reorg_errlod(sizefull, n_draws, the_ests, &TheEsts); reorg_genoprob(sizefull, sizefull, n_draws, the_covar, &TheCovar); allocate_dmatrix(sizefull, sizefull, &Mean_covar); allocate_dmatrix(sizefull, sizefull, &Covar_mean); allocate_double(sizefull, &mean_ests); allocate_double(n_draws, &wts); } /* allocate memory for working arrays, total memory is sizefull*n_ind+6*n_ind+4*sizefull for double array, and sizefull for integer array. All memory will be allocated one time and split later */ dwork = (double *)R_alloc(sizefull*n_ind+6*n_ind+4*sizefull, sizeof(double)); iwork = (int *)R_alloc(sizefull, sizeof(int)); index = (int *)R_alloc(n_draws, sizeof(int)); LOD_array = (double *)R_alloc(n_draws, sizeof(double)); /* calculate null model log10 likelihood */ llik0 = nullLODbin(pheno, n_ind); *matrix_rank = n_ind; /* loop over imputations */ for(i=0; i<n_draws; i++) { R_CheckUserInterrupt(); /* check for ^C */ /* calculate alternative model RSS */ llik = galtLODimpbin(pheno, n_ind, n_gen, n_qtl, Draws[i], Cov, n_cov, model, n_int, dwork, iwork, sizefull, get_ests, ests, Ests_covar, design_mat, tol, maxit, matrix_rank); /* calculate the LOD score in this imputation */ LOD_array[i] = (llik - llik0); /* if getting estimates, calculate the weights */ if(get_ests) { wts[i] = LOD_array[i]*log(10.0); if(i==0) tot_wt = wts[i]; else tot_wt = addlog(tot_wt, wts[i]); for(ii=0; ii<sizefull; ii++) { TheEsts[i][ii] = ests[ii]; for(jj=ii; jj<sizefull; jj++) TheCovar[i][ii][jj] = Ests_covar[ii][jj]; } } } /* end loop over imputations */ /* sort the lod scores, and trim the weights */ if(get_ests) { for(i=0; i<n_draws; i++) { index[i] = i; wts[i] = exp(wts[i]-tot_wt); } rsort_with_index(LOD_array, index, n_draws); for(i=0; i<n_trim; i++) wts[index[i]] = wts[index[n_draws-i-1]] = 0.0; /* re-scale wts */ tot_wt = 0.0; for(i=0; i<n_draws; i++) tot_wt += wts[i]; for(i=0; i<n_draws; i++) wts[i] /= tot_wt; } /* calculate the result LOD score */ *lod = wtaverage(LOD_array, n_draws); /* degree of freedom equals to the number of columns of x minus 1 (mean) */ *df = sizefull - 1; /* get means and variances and covariances of estimates */ if(get_ests) { for(i=0; i<n_draws; i++) { if(i==0) { for(ii=0; ii<sizefull; ii++) { mean_ests[ii] = TheEsts[i][ii] * wts[i]; for(jj=ii; jj<sizefull; jj++) { Mean_covar[ii][jj] = TheCovar[i][ii][jj] * wts[i]; Covar_mean[ii][jj] = 0.0; } } } else { for(ii=0; ii<sizefull; ii++) { mean_ests[ii] += TheEsts[i][ii]*wts[i]; for(jj=ii; jj<sizefull; jj++) { Mean_covar[ii][jj] += TheCovar[i][ii][jj]*wts[i]; Covar_mean[ii][jj] += (TheEsts[i][ii]-TheEsts[0][ii])* (TheEsts[i][jj]-TheEsts[0][jj])*wts[i]; } } } } for(i=0; i<sizefull; i++) { ests[i] = mean_ests[i]; for(j=i; j<sizefull; j++) { Covar_mean[i][j] = (Covar_mean[i][j] - (mean_ests[i]-TheEsts[0][i])* (mean_ests[j]-TheEsts[0][j]))*(double)n_draws/(double)(n_draws-1); Ests_covar[i][j] = Ests_covar[j][i] = Mean_covar[i][j] + Covar_mean[i][j]; } } } /* done getting estimates */ }
void est_rf(int n_ind, int n_mar, int *geno, double *rf, double erec(int, int, double, int *), double logprec(int, int, double, int *), int maxit, double tol, int meioses_per) { int i, j1, j2, s, **Geno, n_mei=0, flag=0; double **Rf, next_rf=0.0, cur_rf=0.0; int cross_scheme[2]; /* cross scheme hidden in rf argument; used by hmm_bcsft */ cross_scheme[0] = rf[0]; cross_scheme[1] = rf[1]; rf[0] = 0.0; rf[1] = 0.0; /* reorganize geno and rf */ reorg_geno(n_ind, n_mar, geno, &Geno); reorg_errlod(n_mar, n_mar, rf, &Rf); for(j1=0; j1<n_mar; j1++) { /* count number of meioses */ for(i=0, n_mei=0; i<n_ind; i++) if(Geno[j1][i] != 0) n_mei += meioses_per; Rf[j1][j1] = (double) n_mei; R_CheckUserInterrupt(); /* check for ^C */ for(j2=j1+1; j2<n_mar; j2++) { /* count meioses */ n_mei = flag = 0; for(i=0; i<n_ind; i++) { if(Geno[j1][i] != 0 && Geno[j2][i] != 0) { n_mei += meioses_per; /* check if informatve */ if(fabs(logprec(Geno[j1][i], Geno[j2][i], 0.5, cross_scheme) - logprec(Geno[j1][i], Geno[j2][i], TOL, cross_scheme)) > TOL) flag = 1; } } if(n_mei != 0 && flag == 1) { flag = 0; /* begin EM algorithm; start with cur_rf = 0.01 */ for(s=0, cur_rf=0.01; s < maxit; s++) { next_rf = 0.0; for(i=0; i<n_ind; i++) { if(Geno[j1][i] != 0 && Geno[j2][i] != 0) next_rf += erec(Geno[j1][i], Geno[j2][i], cur_rf, cross_scheme); } next_rf /= (double) n_mei; if(fabs(next_rf - cur_rf) < tol*(cur_rf+tol*100.0)) { flag = 1; break; } cur_rf = next_rf; } if(!flag) warning("Markers (%d,%d) didn't converge\n", j1+1, j2+1); /* calculate LOD score */ Rf[j1][j2] = next_rf; Rf[j2][j1] = 0.0; for(i=0; i<n_ind; i++) { if(Geno[j1][i] != 0 && Geno[j2][i] != 0) { Rf[j2][j1] += logprec(Geno[j1][i],Geno[j2][i], next_rf, cross_scheme); Rf[j2][j1] -= logprec(Geno[j1][i],Geno[j2][i], 0.5, cross_scheme); } } Rf[j2][j1] /= log(10.0); } else { /* no informative meioses */ Rf[j1][j2] = NA_REAL; Rf[j2][j1] = 0.0; } } /* end loops over markers */ } }