Пример #1
0
void R_discan_im(int *n_ind, int *n_pos, int *n_gen,
                 double *genoprob, int *pheno, double *result,
                 int *maxit, double *tol)
{
    double ***Genoprob, **work;
    double *means;

    reorg_genoprob(*n_ind, *n_pos, *n_gen, genoprob, &Genoprob);
    allocate_dmatrix(3, *n_gen, &work);
    allocate_double(*n_gen, &means);

    discan_im(*n_ind, *n_pos, *n_gen, Genoprob, pheno, result,
              *maxit, *tol, work, means);

}
Пример #2
0
Файл: hmm_bci.c Проект: smoe/qtl
void est_map_bci(int n_ind, int n_mar, int *geno, double *d, 
		 int m, double p, double error_prob, 
		 double *loglik, int maxit, double tol, int verbose)
{
  int i, j, j2, v, v2, it, flag=0, **Geno, n_states;
  double s, **alpha, **beta, **gamma, *cur_d, *rf;
  double ***tm, *temp;
  double curloglik;
  double initprob;
  int ndigits;
  double maxdif, tempdif;
  char pattern[100], text[200];
  
  n_states = 2*(m+1);  
  initprob = -log((double)n_states);

  /* allocate space for beta and reorganize geno */
  reorg_geno(n_ind, n_mar, geno, &Geno);
  allocate_alpha(n_mar, n_states, &alpha);
  allocate_alpha(n_mar, n_states, &beta);
  allocate_dmatrix(n_states, n_states, &gamma);
  allocate_double(n_mar-1, &cur_d);
  allocate_double(n_mar-1, &rf);

  /* allocate space for the transition matrices */
  /* size n_states x n_states x (n_mar-1) */
  /* tm[state1][state2][interval] */
  tm = (double ***)R_alloc(n_states, sizeof(double **));
  tm[0] = (double **)R_alloc(n_states * n_states, sizeof(double *));
  for(i=1; i<n_states; i++) tm[i] = tm[i-1] + n_states;
  tm[0][0] = (double *)R_alloc(n_states * n_states * (n_mar - 1), 
			       sizeof(double));
  temp = tm[0][0];
  for(i=0; i < n_states; i++) {
    for(j=0; j < n_states; j++) {
      tm[i][j] = temp;
      temp += n_mar-1;
    }
  }

  /* digits in verbose output */
  if(verbose) {
    ndigits = (int)ceil(-log10(tol));
    if(ndigits > 16) ndigits=16;
    sprintf(pattern, "%s%d.%df", "%", ndigits+3, ndigits+1);
  }

  for(j=0; j<n_mar-1; j++) d[j] /= 100.0; /* convert to Morgans */

  /* begin EM algorithm */
  for(it=0; it<maxit; it++) {

    for(j=0; j<n_mar-1; j++) {
      cur_d[j] = d[j];
      rf[j] = 0.0;
    }

    /* calculate the transition matrices */
    step_bci(n_mar, n_states, tm, cur_d, m, p, maxit, tol);

    for(i=0; i<n_ind; i++) { /* i = individual */

      R_CheckUserInterrupt(); /* check for ^C */

      /* initialize alpha and beta */
      for(v=0; v<n_states; v++) {
	alpha[v][0] = initprob + emit_bci(Geno[0][i], v, error_prob, m);
	beta[v][n_mar-1] = 0.0;
      }

      /* forward-backward equations */
      for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) {
	
	for(v=0; v<n_states; v++) {
	  alpha[v][j] = alpha[0][j-1] + tm[0][v][j-1];
	  
	  beta[v][j2] = beta[0][j2+1] + tm[v][0][j2] +
	    emit_bci(Geno[j2+1][i], 0, error_prob, m);
	  
	  for(v2=1; v2<n_states; v2++) {
	    alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + 
				 tm[v2][v][j-1]);
	    beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + 
				 tm[v][v2][j2] +
				 emit_bci(Geno[j2+1][i], v2, error_prob, m));
	  }
	  
	  alpha[v][j] += emit_bci(Geno[j][i], v, error_prob, m);
		 
	}

      }

      for(j=0; j<n_mar-1; j++) {

	/* calculate gamma = log Pr(v1, v2, O) */
	for(v=0, s=0.0; v<n_states; v++) {
	  for(v2=0; v2<n_states; v2++) {
	    gamma[v][v2] = alpha[v][j] + beta[v2][j+1] + 
	      emit_bci(Geno[j+1][i], v2, error_prob, m) +
	      tm[v][v2][j];

	    if(v==0 && v2==0) s = gamma[v][v2];
	    else s = addlog(s, gamma[v][v2]);
	  }
	}

	for(v=0; v<n_states; v++) {
	  for(v2=0; v2<n_states; v2++) {
	    rf[j] += nrec_bci(v, v2, m) * exp(gamma[v][v2] - s);
	  }
	}
      }

    } /* loop over individuals */

    /* rescale */
    for(j=0; j<n_mar-1; j++) {
      rf[j] /= (double)n_ind;
      /*
      if(rf[j] < tol/100.0) rf[j] = tol/100.0;
      else if(rf[j] > 0.5-tol/100.0) rf[j] = 0.5-tol/100.0;
      */
    }

    /* use map function to convert back to distances */
    for(j=0; j<n_mar-1; j++)
      d[j] = imf_stahl(rf[j], m, p, 1e-10, 1000);

    if(verbose>1) {
      /* print estimates as we go along*/
      Rprintf("   %4d ", it+1);
      maxdif=0.0;
      for(j=0; j<n_mar-1; j++) {
	tempdif = fabs(d[j] - cur_d[j])/(cur_d[j]+tol*100.0);
	if(maxdif < tempdif) maxdif = tempdif;
      }
      sprintf(text, "%s%s\n", "  max rel've change = ", pattern);
      Rprintf(text, maxdif);
    }

    /* check convergence */
    for(j=0, flag=0; j<n_mar-1; j++) {
      if(fabs(d[j] - cur_d[j]) > tol*(cur_d[j]+tol*100.0)) {
	flag = 1; 
	break;
      }
    }

    if(!flag) break;

  } /* end EM algorithm */
  
  if(flag) warning("Didn't converge!\n");

  /* re-calculate transition matrices */
  step_bci(n_mar, n_states, tm, d, m, p, maxit, tol);

  /* calculate log likelihood */
  *loglik = 0.0;
  for(i=0; i<n_ind; i++) { /* i = individual */

    R_CheckUserInterrupt(); /* check for ^C */

    /* initialize alpha */
    for(v=0; v<n_states; v++) 
      alpha[v][0] = initprob + emit_bci(Geno[0][i], v, error_prob, m);

    /* forward equations */
    for(j=1; j<n_mar; j++) {
      for(v=0; v<n_states; v++) {
	alpha[v][j] = alpha[0][j-1] + tm[0][v][j-1];
	for(v2=1; v2<n_states; v2++) 
	  alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + 
			       tm[v2][v][j-1]);
	alpha[v][j] += emit_bci(Geno[j][i], v, error_prob, m);
      }
    }

    curloglik = alpha[0][n_mar-1];
    for(v=1; v<n_states; v++) 
      curloglik = addlog(curloglik, alpha[v][n_mar-1]);
    *loglik += curloglik;
  }

  if(verbose) {
    if(verbose < 2) {
      /* print final estimates */
      Rprintf("  no. iterations = %d\n", it+1);
      maxdif=0.0;
      for(j=0; j<n_mar-1; j++) {
	tempdif = fabs(d[j] - cur_d[j])/(cur_d[j]+tol*100.0);
	if(maxdif < tempdif) maxdif = tempdif;
      }
      sprintf(text, "%s%s\n", "  max rel've change at last step = ", pattern);
      Rprintf(text, maxdif);
    }
    
    Rprintf("  loglik: %10.4lf\n\n", *loglik);
  }

  /* convert distances back to cM */
  for(j=0; j<n_mar-1; j++) d[j] *= 100.0;
}
Пример #3
0
void est_map_f2i(int n_ind, int n_mar, int *geno, double *d, 
		  int m, double p, double error_prob, 
		  double *loglik, int maxit, double tol, int verbose)
{
  int i, j, j2, v, v2, it, flag=0, **Geno, n_states, n_bcstates;
  double s, **alpha, **beta, **gamma, *cur_d, *rf;
  double ***tm, *temp;
  double curloglik;
  double initprob;
  
  n_bcstates = 2*(m+1);
  n_states = n_bcstates*n_bcstates;  
  initprob = -log((double)n_states);

  /* allocate space for beta and reorganize geno */
  reorg_geno(n_ind, n_mar, geno, &Geno);
  allocate_alpha(n_mar, n_states, &alpha);
  allocate_alpha(n_mar, n_states, &beta);
  allocate_dmatrix(n_states, n_states, &gamma);
  allocate_double(n_mar-1, &cur_d);
  allocate_double(n_mar-1, &rf);

  /* allocate space for the [backcross] transition matrices */
  /* size n_states x n_states x (n_mar-1) */
  /* tm[state1][state2][interval] */
  tm = (double ***)R_alloc(n_bcstates, sizeof(double **));
  tm[0] = (double **)R_alloc(n_bcstates * n_bcstates, sizeof(double *));
  for(i=1; i<n_bcstates; i++) tm[i] = tm[i-1] + n_bcstates;
  tm[0][0] = (double *)R_alloc(n_bcstates * n_bcstates * (n_mar - 1), 
			       sizeof(double));
  temp = tm[0][0];
  for(i=0; i < n_bcstates; i++) {
    for(j=0; j < n_bcstates; j++) {
      tm[i][j] = temp;
      temp += n_mar-1;
    }
  }

  if(verbose) {
    /* print initial estimates */
    Rprintf("      "); 
    for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]);
    Rprintf("\n"); 
  }

  for(j=0; j<n_mar-1; j++) d[j] /= 100.0; /* convert to Morgans */

  /* begin EM algorithm */
  for(it=0; it<maxit; it++) {

    for(j=0; j<n_mar-1; j++) {
      cur_d[j] = d[j];
      rf[j] = 0.0;
    }

    /* calculate the transition matrices [for BC] */
    step_bci(n_mar, n_bcstates, tm, cur_d, m, p, maxit, tol);

    for(i=0; i<n_ind; i++) { /* i = individual */

      R_CheckUserInterrupt(); /* check for ^C */

      /* initialize alpha and beta */
      for(v=0; v<n_states; v++) {
	alpha[v][0] = initprob + emit_f2i(Geno[0][i], v, error_prob, m, n_bcstates);
	beta[v][n_mar-1] = 0.0;
      }

      /* forward-backward equations */
      for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) {
	
	for(v=0; v<n_states; v++) {
	  alpha[v][j] = alpha[0][j-1] + step_f2i(0, v, j-1, tm, n_bcstates);
	  
	  beta[v][j2] = beta[0][j2+1] + step_f2i(v, 0, j2, tm, n_bcstates) +
	    emit_f2i(Geno[j2+1][i], 0, error_prob, m, n_bcstates);
	  
	  for(v2=1; v2<n_states; v2++) {
	    alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + 
				 step_f2i(v2, v, j-1, tm, n_bcstates));
	    beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + 
				 step_f2i(v, v2, j2, tm, n_bcstates) +
				 emit_f2i(Geno[j2+1][i], v2, error_prob, m, n_bcstates));
	  }
	  
	  alpha[v][j] += emit_f2i(Geno[j][i], v, error_prob, m, n_bcstates);
		 
	}

      }

      for(j=0; j<n_mar-1; j++) {

	/* calculate gamma = log Pr(v1, v2, O) */
	for(v=0, s=0.0; v<n_states; v++) {
	  for(v2=0; v2<n_states; v2++) {
	    gamma[v][v2] = alpha[v][j] + beta[v2][j+1] + 
	      emit_f2i(Geno[j+1][i], v2, error_prob, m, n_bcstates) +
	      step_f2i(v, v2, j, tm, n_bcstates);

	    if(v==0 && v2==0) s = gamma[v][v2];
	    else s = addlog(s, gamma[v][v2]);
	  }
	}

	for(v=0; v<n_states; v++) {
	  for(v2=0; v2<n_states; v2++) {
	    rf[j] += nrec_f2i(v, v2, m, n_bcstates) * exp(gamma[v][v2] - s);
	  }
	}
      }

    } /* loop over individuals */

    /* rescale */
    for(j=0; j<n_mar-1; j++) {
      rf[j] /= (double)n_ind;
      /*      
      if(rf[j] < tol/100.0) rf[j] = tol/100.0;
      else if(rf[j] > 0.5-tol/100.0) rf[j] = 0.5-tol/100.0;
      */
    }

    /* use map function to convert back to distances */
    for(j=0; j<n_mar-1; j++)
      d[j] = imf_stahl(rf[j], m, p, 1e-10, 1000);

    if(verbose > 1) { /* print some debugging stuff */
      if(verbose == 2) Rprintf("Iteration");
      Rprintf(" %4d ", it+1);
      if(verbose > 2) 
	for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]*100.0);
      Rprintf("\n"); 
    }

    /* check convergence */
    for(j=0, flag=0; j<n_mar-1; j++) {
      if(fabs(d[j] - cur_d[j]) > tol*(cur_d[j]+tol*100.0)) {
	flag = 1; 
	break;
      }
    }

    if(!flag) break;

  } /* end EM algorithm */
  
  if(flag) warning("Didn't converge!\n");

  /* re-calculate transition matrices */
  step_bci(n_mar, n_bcstates, tm, d, m, p, maxit, tol);

  /* calculate log likelihood */
  *loglik = 0.0;
  for(i=0; i<n_ind; i++) { /* i = individual */
    /* initialize alpha */
    for(v=0; v<n_states; v++) 
      alpha[v][0] = initprob + emit_f2i(Geno[0][i], v, error_prob, m, n_bcstates);

    /* forward equations */
    for(j=1; j<n_mar; j++) {
      for(v=0; v<n_states; v++) {
	alpha[v][j] = alpha[0][j-1] + step_f2i(0, v, j-1, tm, n_bcstates);
	for(v2=1; v2<n_states; v2++) 
	  alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + 
			       step_f2i(v2, v, j-1, tm, n_bcstates));
	alpha[v][j] += emit_f2i(Geno[j][i], v, error_prob, m, n_bcstates);
      }
    }

    curloglik = alpha[0][n_mar-1];
    for(v=1; v<n_states; v++) 
      curloglik = addlog(curloglik, alpha[v][n_mar-1]);
    *loglik += curloglik;
  }

  /* convert distances back to cM */
  for(j=0; j<n_mar-1; j++) d[j] *= 100.0;

  if(verbose) {
    /* print final estimates */
    Rprintf(" %4d ", it+1);
    for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]);
    Rprintf("\n");
    
    Rprintf("loglik: %10.4lf\n\n", *loglik);
  }

}
Пример #4
0
void est_map(int n_ind, int n_mar, int n_gen, int *geno, double *rf,
             double *rf2, double error_prob, double initf(int, int *),
             double emitf(int, int, double, int *),
             double stepf(int, int, double, double, int *),
             double nrecf1(int, int, double, int*), double nrecf2(int, int, double, int*),
             double *loglik, int maxit, double tol, int sexsp,
             int verbose)
{
    int i, j, j2, v, v2, it, flag=0, **Geno, ndigits;
    double s, **alpha, **beta, **gamma, *cur_rf, *cur_rf2;
    double curloglik, maxdif, temp;
    char pattern[100], text[200];
    int cross_scheme[2];

    /* cross scheme hidden in loglik argument; used by hmm_bcsft */
    cross_scheme[0] = (int) ftrunc(*loglik / 1000.0);
    cross_scheme[1] = ((int) *loglik) - 1000 * cross_scheme[0];
    *loglik = 0.0;

    /* allocate space for beta and reorganize geno */
    reorg_geno(n_ind, n_mar, geno, &Geno);
    allocate_alpha(n_mar, n_gen, &alpha);
    allocate_alpha(n_mar, n_gen, &beta);
    allocate_dmatrix(n_gen, n_gen, &gamma);
    allocate_double(n_mar-1, &cur_rf);
    allocate_double(n_mar-1, &cur_rf2);

    /* digits in verbose output */
    if(verbose) {
        ndigits = (int)ceil(-log10(tol));
        if(ndigits > 16) ndigits=16;
        sprintf(pattern, "%s%d.%df", "%", ndigits+3, ndigits+1);
    }

    /* begin EM algorithm */
    for(it=0; it<maxit; it++) {

        for(j=0; j<n_mar-1; j++) {
            cur_rf[j] = cur_rf2[j] = rf[j];
            rf[j] = 0.0;
            if(sexsp) {
                cur_rf2[j] = rf2[j];
                rf2[j] = 0.0;
            }
        }

        for(i=0; i<n_ind; i++) { /* i = individual */

            R_CheckUserInterrupt(); /* check for ^C */

            /* initialize alpha and beta */
            for(v=0; v<n_gen; v++) {
                alpha[v][0] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme);
                beta[v][n_mar-1] = 0.0;
            }

            /* forward-backward equations */
            for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) {

                for(v=0; v<n_gen; v++) {
                    alpha[v][j] = alpha[0][j-1] + stepf(1, v+1, cur_rf[j-1], cur_rf2[j-1], cross_scheme);
                    beta[v][j2] = beta[0][j2+1] + stepf(v+1,1,cur_rf[j2], cur_rf2[j2], cross_scheme) +
                        emitf(Geno[j2+1][i],1,error_prob, cross_scheme);

                    for(v2=1; v2<n_gen; v2++) {
                        alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] +
                                             stepf(v2+1,v+1,cur_rf[j-1],cur_rf2[j-1], cross_scheme));
                        beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] +
                                             stepf(v+1,v2+1,cur_rf[j2],cur_rf2[j2], cross_scheme) +
                                             emitf(Geno[j2+1][i],v2+1,error_prob, cross_scheme));
                    }

                    alpha[v][j] += emitf(Geno[j][i],v+1,error_prob, cross_scheme);
                }

            }

            for(j=0; j<n_mar-1; j++) {

                /* calculate gamma = log Pr(v1, v2, O) */
                for(v=0, s=0.0; v<n_gen; v++) {
                    for(v2=0; v2<n_gen; v2++) {
                        gamma[v][v2] = alpha[v][j] + beta[v2][j+1] +
                            emitf(Geno[j+1][i], v2+1, error_prob, cross_scheme) +
                            stepf(v+1, v2+1, cur_rf[j], cur_rf2[j], cross_scheme);

                        if(v==0 && v2==0) s = gamma[v][v2];
                        else s = addlog(s, gamma[v][v2]);
                    }
                }

                for(v=0; v<n_gen; v++) {
                    for(v2=0; v2<n_gen; v2++) {
                        rf[j] += nrecf1(v+1,v2+1, cur_rf[j], cross_scheme) * exp(gamma[v][v2] - s);
                        if(sexsp) rf2[j] += nrecf2(v+1,v2+1, cur_rf[j], cross_scheme) * exp(gamma[v][v2] - s);
                    }
                }
            }

        } /* loop over individuals */

        /* rescale */
        for(j=0; j<n_mar-1; j++) {
            rf[j] /= (double)n_ind;
            if(rf[j] < tol/1000.0) rf[j] = tol/1000.0;
            else if(rf[j] > 0.5-tol/1000.0) rf[j] = 0.5-tol/1000.0;

            if(sexsp) {
                rf2[j] /= (double)n_ind;
                if(rf2[j] < tol/1000.0) rf2[j] = tol/1000.0;
                else if(rf2[j] > 0.5-tol/1000.0) rf2[j] = 0.5-tol/1000.0;
            }
            else rf2[j] = rf[j];
        }

        if(verbose>1) {
            /* print estimates as we go along*/
            Rprintf("   %4d ", it+1);
            maxdif=0.0;
            for(j=0; j<n_mar-1; j++) {
                temp = fabs(rf[j] - cur_rf[j])/(cur_rf[j]+tol*100.0);
                if(maxdif < temp) maxdif = temp;
                if(sexsp) {
                    temp = fabs(rf2[j] - cur_rf2[j])/(cur_rf2[j]+tol*100.0);
                    if(maxdif < temp) maxdif = temp;
                }
                /* bsy add */
                if(verbose > 2)
                    Rprintf("%d %f %f\n", j+1, cur_rf[j], rf[j]);
                /* bsy add */
            }
            sprintf(text, "%s%s\n", "  max rel've change = ", pattern);
            Rprintf(text, maxdif);
        }

        /* check convergence */
        for(j=0, flag=0; j<n_mar-1; j++) {
            if(fabs(rf[j] - cur_rf[j]) > tol*(cur_rf[j]+tol*100.0) ||
               (sexsp && fabs(rf2[j] - cur_rf2[j]) > tol*(cur_rf2[j]+tol*100.0))) {
                flag = 1;
                break;
            }
        }

        if(!flag) break;

    } /* end EM algorithm */

    if(flag) warning("Didn't converge!\n");

    /* calculate log likelihood */
    *loglik = 0.0;
    for(i=0; i<n_ind; i++) { /* i = individual */
        /* initialize alpha */
        for(v=0; v<n_gen; v++) {
            alpha[v][0] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme);
        }
        /* forward equations */
        for(j=1; j<n_mar; j++) {
            for(v=0; v<n_gen; v++) {
                alpha[v][j] = alpha[0][j-1] +
                    stepf(1, v+1, rf[j-1], rf2[j-1], cross_scheme);

                for(v2=1; v2<n_gen; v2++)
                    alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] +
                                         stepf(v2+1,v+1,rf[j-1],rf2[j-1], cross_scheme));

                alpha[v][j] += emitf(Geno[j][i],v+1,error_prob, cross_scheme);
            }
        }

        curloglik = alpha[0][n_mar-1];
        for(v=1; v<n_gen; v++)
            curloglik = addlog(curloglik, alpha[v][n_mar-1]);

        *loglik += curloglik;
    }

    if(verbose) {
        if(verbose < 2) {
            /* print final estimates */
            Rprintf("  no. iterations = %d\n", it+1);
            maxdif=0.0;
            for(j=0; j<n_mar-1; j++) {
                temp = fabs(rf[j] - cur_rf[j])/(cur_rf[j]+tol*100.0);
                if(maxdif < temp) maxdif = temp;
                if(sexsp) {
                    temp = fabs(rf2[j] - cur_rf2[j])/(cur_rf2[j]+tol*100.0);
                    if(maxdif < temp) maxdif = temp;
                }
            }
            sprintf(text, "%s%s\n", "  max rel've change at last step = ", pattern);
            Rprintf(text, maxdif);
        }

        Rprintf("  loglik: %10.4lf\n\n", *loglik);
    }

}
Пример #5
0
void fitqtl_imp_binary(int n_ind, int n_qtl, int *n_gen, int n_draws,
                       int ***Draws, double **Cov, int n_cov,
                       int *model, int n_int, double *pheno, int get_ests,
                       double *lod, int *df, double *ests, double *ests_covar,
                       double *design_mat, double tol, int maxit, int *matrix_rank)
{

    /* create local variables */
    int i, j, ii, jj, n_qc, itmp; /* loop variants and temp variables */
    double llik, llik0, *LOD_array;
    double *the_ests, *the_covar, **TheEsts, ***TheCovar;
    double *dwork, **Ests_covar, tot_wt=0.0, *wts;
    double **Covar_mean, **Mean_covar, *mean_ests; /* for ests and cov matrix */
    int *iwork, sizefull, n_trim, *index;

    /* number to trim from each end of the imputations */
    n_trim = (int) floor( 0.5*log(n_draws)/log(2.0) );

    /* initialization */
    sizefull = 1;

    /* calculate the dimension of the design matrix for full model */
    n_qc = n_qtl+n_cov; /* total number of QTLs and covariates */
    /* for additive QTLs and covariates*/
    for(i=0; i<n_qc; i++)
        sizefull += n_gen[i];
    /* for interactions, loop thru all interactions */
    for(i=0; i<n_int; i++) {
        for(j=0, itmp=1; j<n_qc; j++) {
            if(model[i*n_qc+j])
                itmp *= n_gen[j];
        }
        sizefull += itmp;
    }

    /* reorganize Ests_covar for easy use later */
    /* and make space for estimates and covariance matrix */
    if(get_ests) {
        reorg_errlod(sizefull, sizefull, ests_covar, &Ests_covar);

        allocate_double(sizefull*n_draws, &the_ests);
        allocate_double(sizefull*sizefull*n_draws, &the_covar);

        /* I need to save all of the estimates and covariance matrices */
        reorg_errlod(sizefull, n_draws, the_ests, &TheEsts);
        reorg_genoprob(sizefull, sizefull, n_draws, the_covar, &TheCovar);

        allocate_dmatrix(sizefull, sizefull, &Mean_covar);
        allocate_dmatrix(sizefull, sizefull, &Covar_mean);
        allocate_double(sizefull, &mean_ests);
        allocate_double(n_draws, &wts);
    }

    /* allocate memory for working arrays, total memory is
       sizefull*n_ind+6*n_ind+4*sizefull for double array,
       and sizefull for integer array.
       All memory will be allocated one time and split later */
    dwork = (double *)R_alloc(sizefull*n_ind+6*n_ind+4*sizefull,
                              sizeof(double));
    iwork = (int *)R_alloc(sizefull, sizeof(int));
    index = (int *)R_alloc(n_draws, sizeof(int));
    LOD_array = (double *)R_alloc(n_draws, sizeof(double));


    /* calculate null model log10 likelihood */
    llik0 = nullLODbin(pheno, n_ind);

    *matrix_rank = n_ind;

    /* loop over imputations */
    for(i=0; i<n_draws; i++) {

        R_CheckUserInterrupt(); /* check for ^C */

        /* calculate alternative model RSS */
        llik = galtLODimpbin(pheno, n_ind, n_gen, n_qtl, Draws[i],
                             Cov, n_cov, model, n_int, dwork, iwork, sizefull,
                             get_ests, ests, Ests_covar, design_mat,
                             tol, maxit, matrix_rank);

        /* calculate the LOD score in this imputation */
        LOD_array[i] = (llik - llik0);

        /* if getting estimates, calculate the weights */
        if(get_ests) {
            wts[i] = LOD_array[i]*log(10.0);
            if(i==0) tot_wt = wts[i];
            else tot_wt = addlog(tot_wt, wts[i]);

            for(ii=0; ii<sizefull; ii++) {
                TheEsts[i][ii] = ests[ii];
                for(jj=ii; jj<sizefull; jj++)
                    TheCovar[i][ii][jj] = Ests_covar[ii][jj];
            }
        }

    } /* end loop over imputations */

    /* sort the lod scores, and trim the weights */
    if(get_ests) {
        for(i=0; i<n_draws; i++) {
            index[i] = i;
            wts[i] = exp(wts[i]-tot_wt);
        }

        rsort_with_index(LOD_array, index, n_draws);

        for(i=0; i<n_trim; i++)
            wts[index[i]] = wts[index[n_draws-i-1]] = 0.0;

        /* re-scale wts */
        tot_wt = 0.0;
        for(i=0; i<n_draws; i++) tot_wt += wts[i];
        for(i=0; i<n_draws; i++) wts[i] /= tot_wt;
    }

    /* calculate the result LOD score */
    *lod = wtaverage(LOD_array, n_draws);

    /* degree of freedom equals to the number of columns of x minus 1 (mean) */
    *df = sizefull - 1;

    /* get means and variances and covariances of estimates */
    if(get_ests) {
        for(i=0; i<n_draws; i++) {
            if(i==0) {
                for(ii=0; ii<sizefull; ii++) {
                    mean_ests[ii] = TheEsts[i][ii] * wts[i];
                    for(jj=ii; jj<sizefull; jj++) {
                        Mean_covar[ii][jj] = TheCovar[i][ii][jj] * wts[i];
                        Covar_mean[ii][jj] = 0.0;
                    }
                }
            }
            else {
                for(ii=0; ii<sizefull; ii++) {
                    mean_ests[ii] += TheEsts[i][ii]*wts[i];
                    for(jj=ii; jj<sizefull; jj++) {
                        Mean_covar[ii][jj] += TheCovar[i][ii][jj]*wts[i];
                        Covar_mean[ii][jj] += (TheEsts[i][ii]-TheEsts[0][ii])*
                            (TheEsts[i][jj]-TheEsts[0][jj])*wts[i];
                    }
                }
            }
        }

        for(i=0; i<sizefull; i++) {
            ests[i] = mean_ests[i];
            for(j=i; j<sizefull; j++) {
                Covar_mean[i][j] = (Covar_mean[i][j] - (mean_ests[i]-TheEsts[0][i])*
                                    (mean_ests[j]-TheEsts[0][j]))*(double)n_draws/(double)(n_draws-1);
                Ests_covar[i][j] = Ests_covar[j][i] = Mean_covar[i][j] + Covar_mean[i][j];
            }
        }
    } /* done getting estimates */

}
Пример #6
0
void scanone_em_covar(int n_ind, int n_pos, int n_gen, 
		      double ***Genoprob, double **Addcov, int n_addcov,
		      double **Intcov, int n_intcov, double *pheno, 
		      double *weights,
		      double *result, int maxit, double tol, int verbose,
		      int *ind_noqtl)
{
  int i, j, k, s, flag=0, n_par;
  double **wts, *param, *oldparam, regtol; 
  double *x, *coef, *resid, *qty, *qraux, *work;
  int *jpvt, ny, ncol0, error_flag;
  double llik, oldllik=0.0, *work1, *work2, temp, sw;

  /* recenter phenotype to have mean 0, for
     possibly increased numerical stability */
  for(i=0, temp=0.0; i<n_ind; i++) temp += pheno[i];
  temp /= (double)n_ind;
  for(i=0; i<n_ind; i++) pheno[i] -= temp;

  n_par = 1 + n_gen + n_addcov + (n_gen-1)*n_intcov;

  /* Allocate space */
  allocate_dmatrix(n_gen, n_ind, &wts);
  param = (double *)R_alloc(n_par, sizeof(double));
  oldparam = (double *)R_alloc(n_par, sizeof(double));
  work1 = (double *)R_alloc((n_par-1)*(n_par-1),sizeof(double));
  work2 = (double *)R_alloc(n_par-1, sizeof(double));

  ncol0 = 1+n_addcov;
  x = (double *)R_alloc(n_ind*ncol0, sizeof(double));
  coef = (double *)R_alloc(ncol0, sizeof(double));
  resid = (double *)R_alloc(n_ind, sizeof(double));
  qty = (double *)R_alloc(n_ind, sizeof(double));
  jpvt = (int *)R_alloc(ncol0, sizeof(int));
  qraux = (double *)R_alloc(ncol0, sizeof(double));
  work = (double *)R_alloc(2*ncol0, sizeof(double));
  regtol = TOL;
  ny = 1;

  /* adjust phenotypes and covariates with weights */
  /* Note: weights are actually sqrt(weights) */
  sw = 0.0;
  for(i=0; i<n_ind; i++) {
    pheno[i] *= weights[i];
    for(j=0; j<n_addcov; j++)
      Addcov[j][i] *= weights[i];
    for(j=0; j<n_intcov; j++)
      Intcov[j][i] *= weights[i];
    sw += log(weights[i]);   /* sum of log weights */
  }

  /* NULL model is now done in R ********************
     (only do it once!)
  for(i=0; i<n_ind; i++) {
    x[i] = 1.0;
    for(j=0; j<n_addcov; j++)
      x[i+(j+1)*n_ind] = Addcov[j][i];
  }
  j=0;
  F77_CALL(dqrls)(x, &n_ind, &ncol0, pheno, &ny, &regtol, coef,
	          resid, qty, &k, jpvt, qraux, work);
  sigma0 = llik0 = 0.0;
  for(i=0; i<n_ind; i++) sigma0 += (resid[i] * resid[i]);
  sigma0 = sqrt(sigma0/(double)n_ind);
  for(i=0; i<n_ind; i++) 
    llik0 += log10(dnorm(resid[i],0.0,sigma0,0));
  Null model is now done in R ********************/
  
  /* begin genome scan */
  for(i=0; i<n_pos; i++) { /* loop over marker positions */
    /* initial estimates */
    for(j=0; j<n_ind; j++)
      for(k=0; k<n_gen; k++) 
	wts[k][j] = Genoprob[k][i][j];
    mstep_em_covar(n_ind, n_gen, Addcov, n_addcov, Intcov, n_intcov,
		   pheno, weights, wts, oldparam, work1, work2, &error_flag,
		   ind_noqtl);
    
    if(!error_flag) { /* only proceed if there's no error */

      if(verbose) {
	estep_em_covar(n_ind, n_gen, i, Genoprob, Addcov, n_addcov,
		       Intcov, n_intcov, pheno, weights, wts, oldparam, 0,
		       ind_noqtl);
	oldllik = 0.0;
	for(k=0; k<n_ind; k++) {
	  temp = 0.0;
	  for(s=0; s < n_gen; s++) temp += wts[s][k];
	  oldllik += log(temp);
	}
	Rprintf("    %3d %12.6lf\n", i+1, oldllik);
      }
      
      /* begin EM iterations */
      for(j=0; j<maxit; j++) {

	R_CheckUserInterrupt(); /* check for ^C */

	estep_em_covar(n_ind, n_gen, i, Genoprob, Addcov, n_addcov,
		       Intcov, n_intcov, pheno, weights, wts, oldparam, 1,
		       ind_noqtl);
	mstep_em_covar(n_ind, n_gen, Addcov, n_addcov, Intcov, n_intcov,
		       pheno, weights, wts, param, work1, work2, &error_flag,
		       ind_noqtl);
	
	if(error_flag) { /* error: X'X singular; break out of EM */
	  flag=0; 
	  break; 
	}

	if(verbose) {
	  estep_em_covar(n_ind, n_gen, i, Genoprob, Addcov, n_addcov,
			 Intcov, n_intcov, pheno, weights, wts, param, 0,
			 ind_noqtl);
	  llik = 0.0;
	  for(k=0; k<n_ind; k++) {
	    temp = 0.0;
	    for(s=0; s < n_gen; s++) temp += wts[s][k];
	    llik += log(temp);
	  }
	  Rprintf("    %3d %4d %12.6lf\n", i+1, j+1, llik-oldllik);
	  oldllik = llik;
	}

	/* check for convergence */
	flag = 0;
	for(k=0; k<n_par; k++) {
	  if(fabs(param[k]-oldparam[k]) > tol*(fabs(oldparam[k])+tol*100.0)) {
	    flag = 1;
	    break; 
	  }
	}
	if(!flag) break;
	for(k=0; k<n_par; k++) oldparam[k] = param[k];
	
      } /* end of EM iterations */

      if(flag) warning("Didn't converge!\n");

      if(!error_flag) { /* skip if there was an error */
	/* calculate log likelihood */
	estep_em_covar(n_ind, n_gen, i, Genoprob, Addcov, n_addcov,
		       Intcov, n_intcov, pheno, weights, wts, param, 0,
		       ind_noqtl);
	llik = 0.0;
	for(k=0; k<n_ind; k++) {
	  temp = 0.0;
	  for(s=0; s < n_gen; s++) temp += wts[s][k];
	  llik += log(temp);
	}
	result[i] = -(llik+sw)/log(10.0);
      }
      else result[i] = NA_REAL;
      
      if(verbose) {
	if(error_flag) Rprintf("    %3d NA", i+1);
	else {
	  Rprintf("    %3d %12.6lf\n", i+1, result[i]);
	  Rprintf("        ");
	  for(j=0; j<n_par; j++) Rprintf(" %7.4lf", param[j]);
	}
	Rprintf("\n\n");
      }

    } /* no initial error */
  } /* end loop over marker positions */
}