示例#1
0
文件: hmm_mark.c 项目: amanicha/qtl
void calc_markprob(int n_ind, int n_pos, int n_gen, int *geno, 
		   double *rf, double *rf2, 
		   double error_prob, double *markprob, 
		   double initf(int),
		   double emitf(int, int, double),
		   double stepf(int, int, double, double)) 
{
  int i, j, j2, v, v2;
  double **betal, **betar; /* betas for left and right sides of the chromosome */
  int **Geno;
  double ***Markprob;
  
  /* allocate space for betal and betar and 
     reorganize geno and markprob */
  reorg_geno(n_ind, n_pos, geno, &Geno);
  reorg_genoprob(n_ind, n_pos, n_gen, markprob, &Markprob);
  allocate_alpha(n_pos, n_gen, &betal);
  allocate_alpha(n_pos, n_gen, &betar);

  for(i=0; i<n_ind; i++) { /* i = individual */

    /* initialize betal and betar */
    for(v=0; v<n_gen; v++) {
      betal[v][0] = 0.0;
      betar[v][n_pos-1] = 0.0;
    }

    /* backward equations */
    for(j=1,j2=n_pos-2; j<n_pos; j++, j2--) {
      
      for(v=0; v<n_gen; v++) {
	betal[v][j] = betal[0][j-1] + stepf(v+1, 1, rf[j-1], rf2[j-1]) +
	  emitf(Geno[j-1][i],1,error_prob);
	
	betar[v][j2] = betar[0][j2+1] + stepf(v+1,1,rf[j2], rf2[j2]) + 
	  emitf(Geno[j2+1][i],1,error_prob);

	for(v2=1; v2<n_gen; v2++) {
	  betal[v][j] = addlog(betal[v][j], betal[v2][j-1] + 
			       stepf(v+1,v2+1,rf[j-1],rf2[j-1]) +
			       emitf(Geno[j-1][i],v2+1,error_prob));
	  betar[v][j2] = addlog(betar[v][j2], betar[v2][j2+1] + 
			       stepf(v+1,v2+1,rf[j2],rf2[j2]) +
			       emitf(Geno[j2+1][i],v2+1,error_prob));
	}

      }
    }

    /* calculate genotype probabilities */
    for(j=0; j<n_pos; j++) 
      for(v=0; v<n_gen; v++) 
	Markprob[v][j][i] = exp(betal[v][j] + betar[v][j] + emitf(Geno[j][i], v+1, error_prob)); 
      
  } /* loop over individuals */
  
}
示例#2
0
文件: hmm_bci.c 项目: smoe/qtl
void est_map_bci(int n_ind, int n_mar, int *geno, double *d, 
		 int m, double p, double error_prob, 
		 double *loglik, int maxit, double tol, int verbose)
{
  int i, j, j2, v, v2, it, flag=0, **Geno, n_states;
  double s, **alpha, **beta, **gamma, *cur_d, *rf;
  double ***tm, *temp;
  double curloglik;
  double initprob;
  int ndigits;
  double maxdif, tempdif;
  char pattern[100], text[200];
  
  n_states = 2*(m+1);  
  initprob = -log((double)n_states);

  /* allocate space for beta and reorganize geno */
  reorg_geno(n_ind, n_mar, geno, &Geno);
  allocate_alpha(n_mar, n_states, &alpha);
  allocate_alpha(n_mar, n_states, &beta);
  allocate_dmatrix(n_states, n_states, &gamma);
  allocate_double(n_mar-1, &cur_d);
  allocate_double(n_mar-1, &rf);

  /* allocate space for the transition matrices */
  /* size n_states x n_states x (n_mar-1) */
  /* tm[state1][state2][interval] */
  tm = (double ***)R_alloc(n_states, sizeof(double **));
  tm[0] = (double **)R_alloc(n_states * n_states, sizeof(double *));
  for(i=1; i<n_states; i++) tm[i] = tm[i-1] + n_states;
  tm[0][0] = (double *)R_alloc(n_states * n_states * (n_mar - 1), 
			       sizeof(double));
  temp = tm[0][0];
  for(i=0; i < n_states; i++) {
    for(j=0; j < n_states; j++) {
      tm[i][j] = temp;
      temp += n_mar-1;
    }
  }

  /* digits in verbose output */
  if(verbose) {
    ndigits = (int)ceil(-log10(tol));
    if(ndigits > 16) ndigits=16;
    sprintf(pattern, "%s%d.%df", "%", ndigits+3, ndigits+1);
  }

  for(j=0; j<n_mar-1; j++) d[j] /= 100.0; /* convert to Morgans */

  /* begin EM algorithm */
  for(it=0; it<maxit; it++) {

    for(j=0; j<n_mar-1; j++) {
      cur_d[j] = d[j];
      rf[j] = 0.0;
    }

    /* calculate the transition matrices */
    step_bci(n_mar, n_states, tm, cur_d, m, p, maxit, tol);

    for(i=0; i<n_ind; i++) { /* i = individual */

      R_CheckUserInterrupt(); /* check for ^C */

      /* initialize alpha and beta */
      for(v=0; v<n_states; v++) {
	alpha[v][0] = initprob + emit_bci(Geno[0][i], v, error_prob, m);
	beta[v][n_mar-1] = 0.0;
      }

      /* forward-backward equations */
      for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) {
	
	for(v=0; v<n_states; v++) {
	  alpha[v][j] = alpha[0][j-1] + tm[0][v][j-1];
	  
	  beta[v][j2] = beta[0][j2+1] + tm[v][0][j2] +
	    emit_bci(Geno[j2+1][i], 0, error_prob, m);
	  
	  for(v2=1; v2<n_states; v2++) {
	    alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + 
				 tm[v2][v][j-1]);
	    beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + 
				 tm[v][v2][j2] +
				 emit_bci(Geno[j2+1][i], v2, error_prob, m));
	  }
	  
	  alpha[v][j] += emit_bci(Geno[j][i], v, error_prob, m);
		 
	}

      }

      for(j=0; j<n_mar-1; j++) {

	/* calculate gamma = log Pr(v1, v2, O) */
	for(v=0, s=0.0; v<n_states; v++) {
	  for(v2=0; v2<n_states; v2++) {
	    gamma[v][v2] = alpha[v][j] + beta[v2][j+1] + 
	      emit_bci(Geno[j+1][i], v2, error_prob, m) +
	      tm[v][v2][j];

	    if(v==0 && v2==0) s = gamma[v][v2];
	    else s = addlog(s, gamma[v][v2]);
	  }
	}

	for(v=0; v<n_states; v++) {
	  for(v2=0; v2<n_states; v2++) {
	    rf[j] += nrec_bci(v, v2, m) * exp(gamma[v][v2] - s);
	  }
	}
      }

    } /* loop over individuals */

    /* rescale */
    for(j=0; j<n_mar-1; j++) {
      rf[j] /= (double)n_ind;
      /*
      if(rf[j] < tol/100.0) rf[j] = tol/100.0;
      else if(rf[j] > 0.5-tol/100.0) rf[j] = 0.5-tol/100.0;
      */
    }

    /* use map function to convert back to distances */
    for(j=0; j<n_mar-1; j++)
      d[j] = imf_stahl(rf[j], m, p, 1e-10, 1000);

    if(verbose>1) {
      /* print estimates as we go along*/
      Rprintf("   %4d ", it+1);
      maxdif=0.0;
      for(j=0; j<n_mar-1; j++) {
	tempdif = fabs(d[j] - cur_d[j])/(cur_d[j]+tol*100.0);
	if(maxdif < tempdif) maxdif = tempdif;
      }
      sprintf(text, "%s%s\n", "  max rel've change = ", pattern);
      Rprintf(text, maxdif);
    }

    /* check convergence */
    for(j=0, flag=0; j<n_mar-1; j++) {
      if(fabs(d[j] - cur_d[j]) > tol*(cur_d[j]+tol*100.0)) {
	flag = 1; 
	break;
      }
    }

    if(!flag) break;

  } /* end EM algorithm */
  
  if(flag) warning("Didn't converge!\n");

  /* re-calculate transition matrices */
  step_bci(n_mar, n_states, tm, d, m, p, maxit, tol);

  /* calculate log likelihood */
  *loglik = 0.0;
  for(i=0; i<n_ind; i++) { /* i = individual */

    R_CheckUserInterrupt(); /* check for ^C */

    /* initialize alpha */
    for(v=0; v<n_states; v++) 
      alpha[v][0] = initprob + emit_bci(Geno[0][i], v, error_prob, m);

    /* forward equations */
    for(j=1; j<n_mar; j++) {
      for(v=0; v<n_states; v++) {
	alpha[v][j] = alpha[0][j-1] + tm[0][v][j-1];
	for(v2=1; v2<n_states; v2++) 
	  alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + 
			       tm[v2][v][j-1]);
	alpha[v][j] += emit_bci(Geno[j][i], v, error_prob, m);
      }
    }

    curloglik = alpha[0][n_mar-1];
    for(v=1; v<n_states; v++) 
      curloglik = addlog(curloglik, alpha[v][n_mar-1]);
    *loglik += curloglik;
  }

  if(verbose) {
    if(verbose < 2) {
      /* print final estimates */
      Rprintf("  no. iterations = %d\n", it+1);
      maxdif=0.0;
      for(j=0; j<n_mar-1; j++) {
	tempdif = fabs(d[j] - cur_d[j])/(cur_d[j]+tol*100.0);
	if(maxdif < tempdif) maxdif = tempdif;
      }
      sprintf(text, "%s%s\n", "  max rel've change at last step = ", pattern);
      Rprintf(text, maxdif);
    }
    
    Rprintf("  loglik: %10.4lf\n\n", *loglik);
  }

  /* convert distances back to cM */
  for(j=0; j<n_mar-1; j++) d[j] *= 100.0;
}
示例#3
0
文件: hmm_f2i.c 项目: amanicha/qtl
void est_map_f2i(int n_ind, int n_mar, int *geno, double *d, 
		  int m, double p, double error_prob, 
		  double *loglik, int maxit, double tol, int verbose)
{
  int i, j, j2, v, v2, it, flag=0, **Geno, n_states, n_bcstates;
  double s, **alpha, **beta, **gamma, *cur_d, *rf;
  double ***tm, *temp;
  double curloglik;
  double initprob;
  
  n_bcstates = 2*(m+1);
  n_states = n_bcstates*n_bcstates;  
  initprob = -log((double)n_states);

  /* allocate space for beta and reorganize geno */
  reorg_geno(n_ind, n_mar, geno, &Geno);
  allocate_alpha(n_mar, n_states, &alpha);
  allocate_alpha(n_mar, n_states, &beta);
  allocate_dmatrix(n_states, n_states, &gamma);
  allocate_double(n_mar-1, &cur_d);
  allocate_double(n_mar-1, &rf);

  /* allocate space for the [backcross] transition matrices */
  /* size n_states x n_states x (n_mar-1) */
  /* tm[state1][state2][interval] */
  tm = (double ***)R_alloc(n_bcstates, sizeof(double **));
  tm[0] = (double **)R_alloc(n_bcstates * n_bcstates, sizeof(double *));
  for(i=1; i<n_bcstates; i++) tm[i] = tm[i-1] + n_bcstates;
  tm[0][0] = (double *)R_alloc(n_bcstates * n_bcstates * (n_mar - 1), 
			       sizeof(double));
  temp = tm[0][0];
  for(i=0; i < n_bcstates; i++) {
    for(j=0; j < n_bcstates; j++) {
      tm[i][j] = temp;
      temp += n_mar-1;
    }
  }

  if(verbose) {
    /* print initial estimates */
    Rprintf("      "); 
    for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]);
    Rprintf("\n"); 
  }

  for(j=0; j<n_mar-1; j++) d[j] /= 100.0; /* convert to Morgans */

  /* begin EM algorithm */
  for(it=0; it<maxit; it++) {

    for(j=0; j<n_mar-1; j++) {
      cur_d[j] = d[j];
      rf[j] = 0.0;
    }

    /* calculate the transition matrices [for BC] */
    step_bci(n_mar, n_bcstates, tm, cur_d, m, p, maxit, tol);

    for(i=0; i<n_ind; i++) { /* i = individual */

      R_CheckUserInterrupt(); /* check for ^C */

      /* initialize alpha and beta */
      for(v=0; v<n_states; v++) {
	alpha[v][0] = initprob + emit_f2i(Geno[0][i], v, error_prob, m, n_bcstates);
	beta[v][n_mar-1] = 0.0;
      }

      /* forward-backward equations */
      for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) {
	
	for(v=0; v<n_states; v++) {
	  alpha[v][j] = alpha[0][j-1] + step_f2i(0, v, j-1, tm, n_bcstates);
	  
	  beta[v][j2] = beta[0][j2+1] + step_f2i(v, 0, j2, tm, n_bcstates) +
	    emit_f2i(Geno[j2+1][i], 0, error_prob, m, n_bcstates);
	  
	  for(v2=1; v2<n_states; v2++) {
	    alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + 
				 step_f2i(v2, v, j-1, tm, n_bcstates));
	    beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + 
				 step_f2i(v, v2, j2, tm, n_bcstates) +
				 emit_f2i(Geno[j2+1][i], v2, error_prob, m, n_bcstates));
	  }
	  
	  alpha[v][j] += emit_f2i(Geno[j][i], v, error_prob, m, n_bcstates);
		 
	}

      }

      for(j=0; j<n_mar-1; j++) {

	/* calculate gamma = log Pr(v1, v2, O) */
	for(v=0, s=0.0; v<n_states; v++) {
	  for(v2=0; v2<n_states; v2++) {
	    gamma[v][v2] = alpha[v][j] + beta[v2][j+1] + 
	      emit_f2i(Geno[j+1][i], v2, error_prob, m, n_bcstates) +
	      step_f2i(v, v2, j, tm, n_bcstates);

	    if(v==0 && v2==0) s = gamma[v][v2];
	    else s = addlog(s, gamma[v][v2]);
	  }
	}

	for(v=0; v<n_states; v++) {
	  for(v2=0; v2<n_states; v2++) {
	    rf[j] += nrec_f2i(v, v2, m, n_bcstates) * exp(gamma[v][v2] - s);
	  }
	}
      }

    } /* loop over individuals */

    /* rescale */
    for(j=0; j<n_mar-1; j++) {
      rf[j] /= (double)n_ind;
      /*      
      if(rf[j] < tol/100.0) rf[j] = tol/100.0;
      else if(rf[j] > 0.5-tol/100.0) rf[j] = 0.5-tol/100.0;
      */
    }

    /* use map function to convert back to distances */
    for(j=0; j<n_mar-1; j++)
      d[j] = imf_stahl(rf[j], m, p, 1e-10, 1000);

    if(verbose > 1) { /* print some debugging stuff */
      if(verbose == 2) Rprintf("Iteration");
      Rprintf(" %4d ", it+1);
      if(verbose > 2) 
	for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]*100.0);
      Rprintf("\n"); 
    }

    /* check convergence */
    for(j=0, flag=0; j<n_mar-1; j++) {
      if(fabs(d[j] - cur_d[j]) > tol*(cur_d[j]+tol*100.0)) {
	flag = 1; 
	break;
      }
    }

    if(!flag) break;

  } /* end EM algorithm */
  
  if(flag) warning("Didn't converge!\n");

  /* re-calculate transition matrices */
  step_bci(n_mar, n_bcstates, tm, d, m, p, maxit, tol);

  /* calculate log likelihood */
  *loglik = 0.0;
  for(i=0; i<n_ind; i++) { /* i = individual */
    /* initialize alpha */
    for(v=0; v<n_states; v++) 
      alpha[v][0] = initprob + emit_f2i(Geno[0][i], v, error_prob, m, n_bcstates);

    /* forward equations */
    for(j=1; j<n_mar; j++) {
      for(v=0; v<n_states; v++) {
	alpha[v][j] = alpha[0][j-1] + step_f2i(0, v, j-1, tm, n_bcstates);
	for(v2=1; v2<n_states; v2++) 
	  alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + 
			       step_f2i(v2, v, j-1, tm, n_bcstates));
	alpha[v][j] += emit_f2i(Geno[j][i], v, error_prob, m, n_bcstates);
      }
    }

    curloglik = alpha[0][n_mar-1];
    for(v=1; v<n_states; v++) 
      curloglik = addlog(curloglik, alpha[v][n_mar-1]);
    *loglik += curloglik;
  }

  /* convert distances back to cM */
  for(j=0; j<n_mar-1; j++) d[j] *= 100.0;

  if(verbose) {
    /* print final estimates */
    Rprintf(" %4d ", it+1);
    for(j=0; j<n_mar-1; j++) Rprintf("%.3lf ", d[j]);
    Rprintf("\n");
    
    Rprintf("loglik: %10.4lf\n\n", *loglik);
  }

}
示例#4
0
void calc_pairprob(int n_ind, int n_pos, int n_gen, int *geno,
                   double *rf, double *rf2,
                   double error_prob, double *genoprob,
                   double *pairprob,
                   double initf(int, int *),
                   double emitf(int, int, double, int *),
                   double stepf(int, int, double, double, int *))
{
    int i, j, j2, v, v2, v3;
    double s=0.0, **alpha, **beta;
    int **Geno;
    double ***Genoprob, *****Pairprob;
    int cross_scheme[2];

    /* cross scheme hidden in genoprob argument; used by hmm_bcsft */
    cross_scheme[0] = genoprob[0];
    cross_scheme[1] = genoprob[1];
    genoprob[0] = 0.0;
    genoprob[1] = 0.0;

    /* n_pos must be at least 2, or there are no pairs! */
    if(n_pos < 2) error("n_pos must be > 1 in calc_pairprob");

    /* allocate space for alpha and beta and
       reorganize geno, genoprob, and pairprob */
    reorg_geno(n_ind, n_pos, geno, &Geno);
    reorg_genoprob(n_ind, n_pos, n_gen, genoprob, &Genoprob);
    reorg_pairprob(n_ind, n_pos, n_gen, pairprob, &Pairprob);
    allocate_alpha(n_pos, n_gen, &alpha);
    allocate_alpha(n_pos, n_gen, &beta);

    for(i=0; i<n_ind; i++) { /* i = individual */

        R_CheckUserInterrupt(); /* check for ^C */

        /* initialize alpha and beta */
        for(v=0; v<n_gen; v++) {
            alpha[v][0] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme);
            beta[v][n_pos-1] = 0.0;
        }

        /* forward-backward equations */
        for(j=1,j2=n_pos-2; j<n_pos; j++, j2--) {

            for(v=0; v<n_gen; v++) {
                alpha[v][j] = alpha[0][j-1] + stepf(1, v+1, rf[j-1], rf2[j-1], cross_scheme);

                beta[v][j2] = beta[0][j2+1] + stepf(v+1,1,rf[j2], rf2[j2], cross_scheme) +
                    emitf(Geno[j2+1][i],1,error_prob, cross_scheme);

                for(v2=1; v2<n_gen; v2++) {
                    alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] +
                                         stepf(v2+1,v+1,rf[j-1],rf2[j-1], cross_scheme));
                    beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] +
                                         stepf(v+1,v2+1,rf[j2],rf2[j2], cross_scheme) +
                                         emitf(Geno[j2+1][i],v2+1,error_prob, cross_scheme));
                }

                alpha[v][j] += emitf(Geno[j][i],v+1,error_prob, cross_scheme);
            }
        }

        /* calculate genotype probabilities */
        for(j=0; j<n_pos; j++) {
            s = Genoprob[0][j][i] = alpha[0][j] + beta[0][j];
            for(v=1; v<n_gen; v++) {
                Genoprob[v][j][i] = alpha[v][j] + beta[v][j];
                s = addlog(s, Genoprob[v][j][i]);
            }
            for(v=0; v<n_gen; v++)
                Genoprob[v][j][i] = exp(Genoprob[v][j][i] - s);
        }

        /* calculate Pr(G[j], G[j+1] | marker data) for i = 1...n_pos-1 */
        for(j=0; j<n_pos-1; j++) {
            for(v=0; v<n_gen; v++) {
                for(v2=0; v2<n_gen; v2++) {
                    Pairprob[v][v2][j][j+1][i] = alpha[v][j] + beta[v2][j+1] +
                        stepf(v+1,v2+1,rf[j],rf2[j], cross_scheme) +
                        emitf(Geno[j+1][i],v2+1,error_prob, cross_scheme);
                    if(v==0 && v2==0) s=Pairprob[v][v2][j][j+1][i];
                    else s = addlog(s,Pairprob[v][v2][j][j+1][i]);
                }
            }
            /* scale to sum to 1 */
            for(v=0; v<n_gen; v++)
                for(v2=0; v2<n_gen; v2++)
                    Pairprob[v][v2][j][j+1][i] =
                        exp(Pairprob[v][v2][j][j+1][i] - s);
        }

        /* now calculate Pr(G[i], G[j] | marker data) for j > i+1 */
        for(j=0; j<n_pos-2; j++) {
            for(j2=j+2; j2<n_pos; j2++) {

                for(v=0; v<n_gen; v++) { /* genotype at pos'n j */
                    for(v2=0; v2<n_gen; v2++) { /* genotype at pos'n j2 */

                        Pairprob[v][v2][j][j2][i] = 0.0;

                        for(v3=0; v3<n_gen; v3++) { /* genotype at pos'n j2-1 */
                            s = Genoprob[v3][j2-1][i];
                            if(fabs(s) > TOL) /* avoid 0/0 */
                                Pairprob[v][v2][j][j2][i] += Pairprob[v][v3][j][j2-1][i]*
                                    Pairprob[v3][v2][j2-1][j2][i]/s;
                        }

                    }
                } /* end loops over genotypes */

            }
        } /* end loops over pairs of positions */

    } /* end loop over individuals */
}
示例#5
0
void calc_genoprob(int n_ind, int n_pos, int n_gen, int *geno,
                   double *rf, double *rf2,
                   double error_prob, double *genoprob,
                   double initf(int, int *),
                   double emitf(int, int, double, int *),
                   double stepf(int, int, double, double, int *))
{
    int i, j, j2, v, v2;
    double s, **alpha, **beta;
    int **Geno;
    double ***Genoprob;
    int cross_scheme[2];

    /* cross scheme hidden in genoprob argument; used by hmm_bcsft */
    cross_scheme[0] = genoprob[0];
    cross_scheme[1] = genoprob[1];
    genoprob[0] = 0.0;
    genoprob[1] = 0.0;

    /* allocate space for alpha and beta and
       reorganize geno and genoprob */
    reorg_geno(n_ind, n_pos, geno, &Geno);
    reorg_genoprob(n_ind, n_pos, n_gen, genoprob, &Genoprob);
    allocate_alpha(n_pos, n_gen, &alpha);
    allocate_alpha(n_pos, n_gen, &beta);

    for(i=0; i<n_ind; i++) { /* i = individual */

        R_CheckUserInterrupt(); /* check for ^C */

        /* initialize alpha and beta */
        for(v=0; v<n_gen; v++) {
            alpha[v][0] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme);
            beta[v][n_pos-1] = 0.0;
        }

        /* forward-backward equations */
        for(j=1,j2=n_pos-2; j<n_pos; j++, j2--) {

            for(v=0; v<n_gen; v++) {
                alpha[v][j] = alpha[0][j-1] + stepf(1, v+1, rf[j-1], rf2[j-1], cross_scheme);
                beta[v][j2] = beta[0][j2+1] + stepf(v+1,1,rf[j2], rf2[j2], cross_scheme) +
                    emitf(Geno[j2+1][i],1,error_prob, cross_scheme);

                for(v2=1; v2<n_gen; v2++) {
                    alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] +
                                         stepf(v2+1,v+1,rf[j-1],rf2[j-1], cross_scheme));
                    beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] +
                                         stepf(v+1,v2+1,rf[j2],rf2[j2], cross_scheme) +
                                         emitf(Geno[j2+1][i],v2+1,error_prob, cross_scheme));
                }

                alpha[v][j] += emitf(Geno[j][i],v+1,error_prob, cross_scheme);
            }
        }

        /* calculate genotype probabilities */
        for(j=0; j<n_pos; j++) {
            s = Genoprob[0][j][i] = alpha[0][j] + beta[0][j];
            for(v=1; v<n_gen; v++) {
                Genoprob[v][j][i] = alpha[v][j] + beta[v][j];
                s = addlog(s, Genoprob[v][j][i]);
            }
            for(v=0; v<n_gen; v++)
                Genoprob[v][j][i] = exp(Genoprob[v][j][i] - s);
        }

        /* the following is the old version */
        /*    for(j=0; j<n_pos; j++) {
              s = 0.0;
              for(v=0; v<n_gen; v++)
              s += (Genoprob[v][j][i] = exp(alpha[v][j] + beta[v][j]));

              for(v=0; v<n_gen; v++)
              Genoprob[v][j][i] /= s;
              } */


    } /* loop over individuals */


}
示例#6
0
void est_map(int n_ind, int n_mar, int n_gen, int *geno, double *rf,
             double *rf2, double error_prob, double initf(int, int *),
             double emitf(int, int, double, int *),
             double stepf(int, int, double, double, int *),
             double nrecf1(int, int, double, int*), double nrecf2(int, int, double, int*),
             double *loglik, int maxit, double tol, int sexsp,
             int verbose)
{
    int i, j, j2, v, v2, it, flag=0, **Geno, ndigits;
    double s, **alpha, **beta, **gamma, *cur_rf, *cur_rf2;
    double curloglik, maxdif, temp;
    char pattern[100], text[200];
    int cross_scheme[2];

    /* cross scheme hidden in loglik argument; used by hmm_bcsft */
    cross_scheme[0] = (int) ftrunc(*loglik / 1000.0);
    cross_scheme[1] = ((int) *loglik) - 1000 * cross_scheme[0];
    *loglik = 0.0;

    /* allocate space for beta and reorganize geno */
    reorg_geno(n_ind, n_mar, geno, &Geno);
    allocate_alpha(n_mar, n_gen, &alpha);
    allocate_alpha(n_mar, n_gen, &beta);
    allocate_dmatrix(n_gen, n_gen, &gamma);
    allocate_double(n_mar-1, &cur_rf);
    allocate_double(n_mar-1, &cur_rf2);

    /* digits in verbose output */
    if(verbose) {
        ndigits = (int)ceil(-log10(tol));
        if(ndigits > 16) ndigits=16;
        sprintf(pattern, "%s%d.%df", "%", ndigits+3, ndigits+1);
    }

    /* begin EM algorithm */
    for(it=0; it<maxit; it++) {

        for(j=0; j<n_mar-1; j++) {
            cur_rf[j] = cur_rf2[j] = rf[j];
            rf[j] = 0.0;
            if(sexsp) {
                cur_rf2[j] = rf2[j];
                rf2[j] = 0.0;
            }
        }

        for(i=0; i<n_ind; i++) { /* i = individual */

            R_CheckUserInterrupt(); /* check for ^C */

            /* initialize alpha and beta */
            for(v=0; v<n_gen; v++) {
                alpha[v][0] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme);
                beta[v][n_mar-1] = 0.0;
            }

            /* forward-backward equations */
            for(j=1,j2=n_mar-2; j<n_mar; j++, j2--) {

                for(v=0; v<n_gen; v++) {
                    alpha[v][j] = alpha[0][j-1] + stepf(1, v+1, cur_rf[j-1], cur_rf2[j-1], cross_scheme);
                    beta[v][j2] = beta[0][j2+1] + stepf(v+1,1,cur_rf[j2], cur_rf2[j2], cross_scheme) +
                        emitf(Geno[j2+1][i],1,error_prob, cross_scheme);

                    for(v2=1; v2<n_gen; v2++) {
                        alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] +
                                             stepf(v2+1,v+1,cur_rf[j-1],cur_rf2[j-1], cross_scheme));
                        beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] +
                                             stepf(v+1,v2+1,cur_rf[j2],cur_rf2[j2], cross_scheme) +
                                             emitf(Geno[j2+1][i],v2+1,error_prob, cross_scheme));
                    }

                    alpha[v][j] += emitf(Geno[j][i],v+1,error_prob, cross_scheme);
                }

            }

            for(j=0; j<n_mar-1; j++) {

                /* calculate gamma = log Pr(v1, v2, O) */
                for(v=0, s=0.0; v<n_gen; v++) {
                    for(v2=0; v2<n_gen; v2++) {
                        gamma[v][v2] = alpha[v][j] + beta[v2][j+1] +
                            emitf(Geno[j+1][i], v2+1, error_prob, cross_scheme) +
                            stepf(v+1, v2+1, cur_rf[j], cur_rf2[j], cross_scheme);

                        if(v==0 && v2==0) s = gamma[v][v2];
                        else s = addlog(s, gamma[v][v2]);
                    }
                }

                for(v=0; v<n_gen; v++) {
                    for(v2=0; v2<n_gen; v2++) {
                        rf[j] += nrecf1(v+1,v2+1, cur_rf[j], cross_scheme) * exp(gamma[v][v2] - s);
                        if(sexsp) rf2[j] += nrecf2(v+1,v2+1, cur_rf[j], cross_scheme) * exp(gamma[v][v2] - s);
                    }
                }
            }

        } /* loop over individuals */

        /* rescale */
        for(j=0; j<n_mar-1; j++) {
            rf[j] /= (double)n_ind;
            if(rf[j] < tol/1000.0) rf[j] = tol/1000.0;
            else if(rf[j] > 0.5-tol/1000.0) rf[j] = 0.5-tol/1000.0;

            if(sexsp) {
                rf2[j] /= (double)n_ind;
                if(rf2[j] < tol/1000.0) rf2[j] = tol/1000.0;
                else if(rf2[j] > 0.5-tol/1000.0) rf2[j] = 0.5-tol/1000.0;
            }
            else rf2[j] = rf[j];
        }

        if(verbose>1) {
            /* print estimates as we go along*/
            Rprintf("   %4d ", it+1);
            maxdif=0.0;
            for(j=0; j<n_mar-1; j++) {
                temp = fabs(rf[j] - cur_rf[j])/(cur_rf[j]+tol*100.0);
                if(maxdif < temp) maxdif = temp;
                if(sexsp) {
                    temp = fabs(rf2[j] - cur_rf2[j])/(cur_rf2[j]+tol*100.0);
                    if(maxdif < temp) maxdif = temp;
                }
                /* bsy add */
                if(verbose > 2)
                    Rprintf("%d %f %f\n", j+1, cur_rf[j], rf[j]);
                /* bsy add */
            }
            sprintf(text, "%s%s\n", "  max rel've change = ", pattern);
            Rprintf(text, maxdif);
        }

        /* check convergence */
        for(j=0, flag=0; j<n_mar-1; j++) {
            if(fabs(rf[j] - cur_rf[j]) > tol*(cur_rf[j]+tol*100.0) ||
               (sexsp && fabs(rf2[j] - cur_rf2[j]) > tol*(cur_rf2[j]+tol*100.0))) {
                flag = 1;
                break;
            }
        }

        if(!flag) break;

    } /* end EM algorithm */

    if(flag) warning("Didn't converge!\n");

    /* calculate log likelihood */
    *loglik = 0.0;
    for(i=0; i<n_ind; i++) { /* i = individual */
        /* initialize alpha */
        for(v=0; v<n_gen; v++) {
            alpha[v][0] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme);
        }
        /* forward equations */
        for(j=1; j<n_mar; j++) {
            for(v=0; v<n_gen; v++) {
                alpha[v][j] = alpha[0][j-1] +
                    stepf(1, v+1, rf[j-1], rf2[j-1], cross_scheme);

                for(v2=1; v2<n_gen; v2++)
                    alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] +
                                         stepf(v2+1,v+1,rf[j-1],rf2[j-1], cross_scheme));

                alpha[v][j] += emitf(Geno[j][i],v+1,error_prob, cross_scheme);
            }
        }

        curloglik = alpha[0][n_mar-1];
        for(v=1; v<n_gen; v++)
            curloglik = addlog(curloglik, alpha[v][n_mar-1]);

        *loglik += curloglik;
    }

    if(verbose) {
        if(verbose < 2) {
            /* print final estimates */
            Rprintf("  no. iterations = %d\n", it+1);
            maxdif=0.0;
            for(j=0; j<n_mar-1; j++) {
                temp = fabs(rf[j] - cur_rf[j])/(cur_rf[j]+tol*100.0);
                if(maxdif < temp) maxdif = temp;
                if(sexsp) {
                    temp = fabs(rf2[j] - cur_rf2[j])/(cur_rf2[j]+tol*100.0);
                    if(maxdif < temp) maxdif = temp;
                }
            }
            sprintf(text, "%s%s\n", "  max rel've change at last step = ", pattern);
            Rprintf(text, maxdif);
        }

        Rprintf("  loglik: %10.4lf\n\n", *loglik);
    }

}
示例#7
0
void sim_geno(int n_ind, int n_pos, int n_gen, int n_draws,
              int *geno, double *rf, double *rf2,
              double error_prob, int *draws,
              double initf(int, int *),
              double emitf(int, int, double, int *),
              double stepf(int, int, double, double, int *))
{
    int i, k, j, v, v2;
    double s, **beta, *probs;
    int **Geno, ***Draws, curstate;
    int cross_scheme[2];

    /* cross scheme hidden in draws argument; used by hmm_bcsft */
    cross_scheme[0] = draws[0];
    cross_scheme[1] = draws[1];
    draws[0] = 0;
    draws[1] = 0;

    /* allocate space for beta and
       reorganize geno and draws */
    /* Geno indexed as Geno[pos][ind] */
    /* Draws indexed as Draws[rep][pos][ind] */
    reorg_geno(n_ind, n_pos, geno, &Geno);
    reorg_draws(n_ind, n_pos, n_draws, draws, &Draws);
    allocate_alpha(n_pos, n_gen, &beta);
    allocate_double(n_gen, &probs);

    /* Read R's random seed */
    GetRNGstate();

    for(i=0; i<n_ind; i++) { /* i = individual */

        R_CheckUserInterrupt(); /* check for ^C */

        /* do backward equations */
        /* initialize beta */
        for(v=0; v<n_gen; v++) beta[v][n_pos-1] = 0.0;

        /* backward equations */
        for(j=n_pos-2; j>=0; j--) {

            for(v=0; v<n_gen; v++) {
                beta[v][j] = beta[0][j+1] + stepf(v+1,1,rf[j], rf2[j], cross_scheme) +
                    emitf(Geno[j+1][i],1,error_prob, cross_scheme);

                for(v2=1; v2<n_gen; v2++)
                    beta[v][j] = addlog(beta[v][j], beta[v2][j+1] +
                                        stepf(v+1,v2+1,rf[j],rf2[j], cross_scheme) +
                                        emitf(Geno[j+1][i],v2+1,error_prob, cross_scheme));
            }
        }

        for(k=0; k<n_draws; k++) { /* k = simulation replicate */

            /* first draw */
            /* calculate probs */
            s = (probs[0] = initf(1, cross_scheme)+emitf(Geno[0][i],1,error_prob, cross_scheme)+beta[0][0]);
            for(v=1; v<n_gen; v++) {
                probs[v] = initf(v+1, cross_scheme) + emitf(Geno[0][i], v+1, error_prob, cross_scheme) +
                    beta[v][0];
                s = addlog(s, probs[v]);
            }
            for(v=0; v<n_gen; v++) probs[v] = exp(probs[v] - s);

            /* make draw: returns a value from {1, 2, ..., n_gen} */
            curstate = Draws[k][0][i] = sample_int(n_gen, probs);

            /* move along chromosome */
            for(j=1; j<n_pos; j++) {
                /* calculate probs */
                for(v=0; v<n_gen; v++)
                    probs[v] = exp(stepf(curstate,v+1,rf[j-1],rf2[j-1], cross_scheme) +
                                   emitf(Geno[j][i],v+1,error_prob, cross_scheme) +
                                   beta[v][j] - beta[curstate-1][j-1]);
                /* make draw */
                curstate = Draws[k][j][i] = sample_int(n_gen, probs);
            }

        } /* loop over replicates */

    } /* loop over individuals */

    /* write R's random seed */
    PutRNGstate();

}
示例#8
0
void calc_genoprob_special(int n_ind, int n_pos, int n_gen, int *geno, 
			   double *rf, double *rf2, 
			   double error_prob, double *genoprob, 
			   double initf(int), 
			   double emitf(int, int, double),
			   double stepf(int, int, double, double)) 
{
  int i, j, j2, v, v2, curpos;
  double s, **alpha, **beta;
  int **Geno;
  double ***Genoprob;
  
  /* allocate space for alpha and beta and 
     reorganize geno and genoprob */
  reorg_geno(n_ind, n_pos, geno, &Geno);
  reorg_genoprob(n_ind, n_pos, n_gen, genoprob, &Genoprob);
  allocate_alpha(n_pos, n_gen, &alpha);
  allocate_alpha(n_pos, n_gen, &beta);

  for(i=0; i<n_ind; i++) { /* i = individual */

    for(curpos=0; curpos < n_pos; curpos++) {

      if(!Geno[curpos][i]) continue;

      R_CheckUserInterrupt(); /* check for ^C */

      /* initialize alpha and beta */
      for(v=0; v<n_gen; v++) {
	if(curpos==0) 
	  alpha[v][0] = initf(v+1) + emitf(Geno[0][i], v+1, error_prob);
	else
	  alpha[v][0] = initf(v+1) + emitf(Geno[0][i], v+1, TOL);
	beta[v][n_pos-1] = 0.0;
      }

      /* forward-backward equations */
      for(j=1,j2=n_pos-2; j<n_pos; j++, j2--) {
      
	for(v=0; v<n_gen; v++) {
	  alpha[v][j] = alpha[0][j-1] + stepf(1, v+1, rf[j-1], rf2[j-1]);
	
	  if(curpos==j2+1)
	    beta[v][j2] = beta[0][j2+1] + stepf(v+1,1,rf[j2], rf2[j2]) + 
	      emitf(Geno[j2+1][i],1,error_prob);
	  else 
	    beta[v][j2] = beta[0][j2+1] + stepf(v+1,1,rf[j2], rf2[j2]) + 
	      emitf(Geno[j2+1][i],1,TOL);

	  for(v2=1; v2<n_gen; v2++) {
	    alpha[v][j] = addlog(alpha[v][j], alpha[v2][j-1] + 
				 stepf(v2+1,v+1,rf[j-1],rf2[j-1]));
	    if(curpos==j2+1)
	      beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + 
				   stepf(v+1,v2+1,rf[j2],rf2[j2]) +
				   emitf(Geno[j2+1][i],v2+1,error_prob));
	    else
	      beta[v][j2] = addlog(beta[v][j2], beta[v2][j2+1] + 
				   stepf(v+1,v2+1,rf[j2],rf2[j2]) +
				   emitf(Geno[j2+1][i],v2+1,TOL));

	  }

	  if(curpos==j)
	    alpha[v][j] += emitf(Geno[j][i],v+1,error_prob);
	  else
	    alpha[v][j] += emitf(Geno[j][i],v+1,TOL);
	}
      }

      /* calculate genotype probabilities */
      s = Genoprob[0][curpos][i] = alpha[0][curpos] + beta[0][curpos];
      for(v=1; v<n_gen; v++) {
	Genoprob[v][curpos][i] = alpha[v][curpos] + beta[v][curpos];
	s = addlog(s, Genoprob[v][curpos][i]);
      }
      for(v=0; v<n_gen; v++) 
	Genoprob[v][curpos][i] = exp(Genoprob[v][curpos][i] - s);

    } /* end loop over current position */

  } /* loop over individuals */
}