コード例 #1
0
ファイル: background.c プロジェクト: CPFL/gmeme
int main(
  int argc,
  char **argv
) 
{
  int i;
  int order = atoi(argv[1]);
  double logcumback[1000];
  //char *string = "ACGTT";
  char *string = "ATTTTTTTT";
  int len = strlen(string);
  char *seq = NULL;
  Resize(seq, len+1, char);
  strcpy(seq, string);
  printf("order = %d len = %d string = %s\n", order, len, string);
  setup_hash_alph(DNAB);
  char *alpha = "ACGTX";
  BOOLEAN rc = TRUE;
  double *a_cp = get_markov_from_sequence(seq, alpha, rc, order);

  printf("# conditional probabilities\n");
  print_prob(a_cp, order+1, "", 0, alpha);

  log_cum_back(seq, a_cp, order, logcumback);
  for (i=0; i<len; i++) printf("%c %f %f\n", seq[i], exp(logcumback[i]), 
    exp(Log_back(logcumback,i,6)) ); 
  printf("All done\n");

  return(0);
} /* main */
コード例 #2
0
ファイル: tcm.c プロジェクト: BackofenLab/MEMERIS
double tcm_e_step(
  MODEL *model,			/* the model */
  DATASET *dataset  		/* the dataset */
)
{
  int i, j, k, ii;
  THETA logtheta1 = model->logtheta;	/* motif log(theta) */
  int w = model->w;			/* motif width */
  int n_samples = dataset->n_samples;	/* number of sequences */
  BOOLEAN invcomp = model->invcomp;     /* use reverse complement strand, too */
  int ndir = invcomp ? 2 : 1;           /* number of strands */
  double log_sigma = log(1.0/ndir);	/* log \sigma */
  double lambda = model->lambda;	/* \lambda of tcm model */
  double log_lambda = LOG(lambda);	/* log \lambda */
  double log_1mlambda = LOG(1-lambda);	/* log (1 - \lambda) */
  double logpX;				/* log likelihood; no erase or smooth */

  /* E step */

  convert_theta_to_log(model, dataset);

  /* calculate all the posterior offset probabilities */
  logpX = 0;

  for (i=0; i < n_samples; i++) {	/* sequence */
    SAMPLE *s = dataset->samples[i];
    int lseq = s->length;
    int m = lseq - w + 1;		/* number of possible sites */
    double *zi = s->z;			/* Pr(z_ij=1 | X_i, \theta) */
    double *not_o = s->not_o;		/* Pr(v_ij = 1) */
    double *lcb = s->logcumback;	/* cumulative background probability */

    if (lseq < w) continue;		/* sequence too short for motif */

	 /* added by M.H. */
	 /* use log sigma_ij * lambda * m instead of lambda if secondary structure information is given */
	 /* NOTE: log_sigma is the prior for + or - strand --> here only + strand --> log_sigma = 0 */
	 if (dataset->secondaryStructureFilename != NULL) {
	 
	 	/* first check if the maximum sigma * (lambda*m) > 1   --> if so P(Zij=1 | \phi) can be > 1 */
	   double Pcount = dataset->secondaryStructurePseudocount;
		double maxPrior = ((s->max_ss_value + Pcount) / (s->sum_ss_value + (m * Pcount))) * (lambda*m);

		if (maxPrior > 1.0 ) {
			/* compute new pseudocount that gives sigma_i_max = 1 = (max_ss_value + pseudocount) / (\sum (ss_value[i] + pseudocount)) * lambda * m */
			Pcount = (-1 * s->max_ss_value * lambda * m + s->sum_ss_value) / (m * (lambda - 1));
	
			/* for statistics keep maximum adjustment */
			if (Pcount - dataset->secondaryStructurePseudocount > MAXADJUST) {
				MAXADJUST = Pcount - dataset->secondaryStructurePseudocount;
			}
		}
	 	
		/* compute new sigmas with this pseudocount */
		double sum = s->sum_ss_value + (m * Pcount);  		 /* \sum ss_value[i] + m*pseudocount */
    	for (j=0; j < m; j++) {
	   	s->sigma[j] = (s->ss_value[j] + Pcount) / sum;
		}
	 }


    for (k=0; k<ndir; k++) {		/* strand */
      BOOLEAN ic = (k==1);		/* doing - strand */
      double *szik = s->sz[k];		/* Pr(X_i | z_ij=1, s_ijk=1, \theta) */

      for (j=0; j<m; j++) {		/* site start */
		
	 	   /* added by M.H. */
		   /* use the prior instead of lambda */
	 	   if (dataset->secondaryStructureFilename != NULL) {
			  double p = MIN(1.0, ( s->sigma[j] * lambda * m ) ) ;		/* rounding */
			  log_lambda = LOG(p);	
  			  log_1mlambda = LOG(1-p);	
		   }
		
       	 /* log Pr(X_ij | s_ijk=1, \theta0) \sigma (1-\lambda) */
			 double log_pXijtheta0 = log_sigma + log_1mlambda;	
         /* log Pr(X_ij | s_ijk=1, \theta1) \sigma \lambda */
			double log_pXijtheta1 = log_sigma + log_lambda;
         int off = ic ? lseq-w-j : j;	/* - strand offset from rgt. */
         char *res = ic ? s->resic+off : s->res+off;	/* integer sequence */

        /* calculate the probability of positions in the site under the
  	  background and foreground models
        */
        log_pXijtheta0 += Log_back(lcb, j, w);
        for (ii=0; ii<w; ii++) log_pXijtheta1 += logtheta1(ii, (int)res[ii]);
 
        /* set log szik to:
          Pr(X_i | z_ij=1, s_ijk=1, \theta) \sigma \lambda
        */
        szik[j] = log_pXijtheta1;
 
        /* set z_ij to log Pr(X_ij | \phi): (6-21-99 tlb)
          log(
	    \sigma * sum_{k=0}^{ndir-1} ( 
	      Pr(X_i|z_ij=1, s_ijk=1, \theta) \lambda +
	      Pr(X_i|z_ij=0, s_ijk=1, \theta) (1-\lambda) 
	    )
          )
        */
        zi[j] = k==0 ? LOGL_SUM(log_pXijtheta0, log_pXijtheta1) : 
          LOGL_SUM(zi[j], LOGL_SUM(log_pXijtheta0, log_pXijtheta1));
      } /* site start */
    } /* strand */

    /* compute log Pr(X | \phi) = sum_i,j log(Pr(X_ij)) */
    for (j=0; j<m; j++) {			/* site start */
      logpX += zi[j];				/* z_ij = log Pr(X_ij | \phi) */
    }

    /* sz_ijk : normalize, delog and account for erasing
      Pr(z_ij=1, s_ijk=1 | X_i, \phi) \approx
           P(z_ij=1, s_ijk=1 | X_i, \phi) P(v_ij = 1)
    */
    for (k=0; k<ndir; k++) {		/* strand */
      double *szik = s->sz[k];		/* Pr(X_i | z_ij=1, s_ijk=1, \phi) */
      for (j=0; j<m; j++) {		/* site start */
        /* note zi[j] holds Pr(X_ij|\phi) */
        szik[j] = MIN(1.0, exp(szik[j] - zi[j]) * not_o[j]);	/* roundoff */
      } /* site */
    } /* strand */

    /* z_ij : sum of sz_ijk */
    for (j=0; j<m; j++) {		/* site start */
      for (k=zi[j]=0; k<ndir; k++) {	/* strand */
        zi[j] += s->sz[k][j];
      } /* strand */
      zi[j] = MIN(1.0, zi[j]);		/* avoid roundoff errors */
    } /* site */
    for (j=m; j<lseq; j++) {		/* tail of sequence */
      zi[j] = 0;
    }

  } /* sequence */

  /* smooth so no window of size w has z_i which sum to greater than 1.0 */
  (void) smooth(w, model, dataset);

  return (logpX/log(2.0));
} /* tcm_e_step */