Ejemplo n.º 1
0
extern void copy_model(
  MODEL *m1, 				/* source */
  MODEL *m2,				/* destination */
  int alength				/* length of alphabet */
)
{
  int i;

  m2->mtype = m1->mtype;
  m2->min_w = m1->min_w;
  m2->max_w = m1->max_w;
  m2->pw = m1->pw;
  m2->min_nsites = m1->min_nsites;
  m2->max_nsites = m1->max_nsites;
  m2->psites = m1->psites;
  if (m1->maxima) {				/* copy maxima if they exist */
    Resize(m2->maxima, m1->nsites_dis+1, p_prob);
    bcopy((char *) m1->maxima, (char *) m2->maxima, 
      m1->nsites_dis*sizeof(p_prob));
  }
  m2->pal = m1->pal;
  m2->invcomp = m1->invcomp;
  m2->imotif = m1->imotif; 
  m2->w = m1->w;
  copy_theta(m1->theta, m2->theta, m1->w, alength);
  copy_theta(m1->obs, m2->obs, m1->w, alength);
  m2->lambda = m1->lambda;
  m2->lambda_obs = m1->lambda_obs;
  m2->nsites = m1->nsites;
  m2->nsites_obs = m1->nsites_obs;
  m2->nsites_dis = m1->nsites_dis;
  strcpy(m2->cons, m1->cons); 
  strcpy(m2->cons0, m1->cons0); 
  for (i=0; i<m1->w; i++) {
    m2->rentropy[i] = m1->rentropy[i];
  }
  m2->rel = m1->rel;
  m2->ic = m1->ic;
  m2->ll = m1->ll;
  m2->mll_0 = m1->mll_0;
  m2->mll_1 = m1->mll_1;
  m2->logpv = m1->logpv;
  m2->logev = m1->logev; 
  m2->ID = m1->ID; 
  m2->iter = m1->iter; 
  m2->iseq = m1->iseq;
  m2->ioff = m1->ioff;
} /* copy_model */
Ejemplo n.º 2
0
extern void copy_model(
  MODEL *m1, 				/* source */
  MODEL *m2,				/* destination */
  int alength				/* length of alphabet */
)
{
  int i;

  m2->mtype = m1->mtype;
  m2->c = m1->c; 
  for (i=0; i<m1->c; i++) {
    m2->w[i] = m1->w[i];
    copy_theta(m1->theta[i], m2->theta[i], m1->w[i], alength);
    copy_theta(m1->obs[i], m2->obs[i], m1->w[i], alength);
    m2->lambda[i] = m1->lambda[i];
    m2->rel[i] = m1->rel[i];
  }
  m2->pal = m1->pal;
  strcpy(m2->cons, m1->cons); 
  m2->nstrands = m1->nstrands; 
  for (i=0; i<4; i++) {
    m2->sigma[i] = m1->sigma[i];
    m2->d[i] = m1->d[i];
  }
  m2->e_ll_0 = m1->e_ll_0;
  m2->e_ll = m1->e_ll; 
  m2->ll = m1->ll; 
  m2->sig = m1->sig; 
  m2->lrt = m1->lrt; 
  m2->bon = m1->bon; 
  m2->root = m1->root; 
  m2->pvalue = m1->pvalue;
  m2->imotif = m1->imotif; 
#ifdef PARALLEL
  strcpy(m2->cons0, m1->cons0); 
  m2->ID = m1->ID; 
#endif
  m2->iter = m1->iter; 
}
Ejemplo n.º 3
0
extern void em(
  MODEL *model,			/* the model */
  DATASET *dataset,		/* the dataset */
  PRIORS *priors,		/* the priors */
  int maxiter,			/* maximum number of iterations */
  double distance		/* stopping criterion */
)
{
  int alength = dataset->alength;
  THETA theta_save;
  int iter;			/* iteration number */
  double (*E_STEP)(MODEL *, DATASET *); /* expectation step function */
  double (*E_STEP0)(MODEL *, DATASET *); /* expectation step function */
  /* maximization step function */
  void (*M_STEP)(MODEL *, DATASET *, PRIORS *, int);	
  int nc = model->c;		/* number of components of model */
  int max_w = model->w[nc-1];	/* width of last component */
  BOOLEAN converged = FALSE;	/* EM has converged */

  /* create a place to save old value of theta */
  create_2array(theta_save, double, max_w, alength);

  /* set up the correct type of EM to run */
  M_STEP = m_step;
  E_STEP = e_step;
  E_STEP0 = e_step;
  switch (model->mtype) {
    case Oops:
      E_STEP = e_step;
      break;
    case Zoops:
      E_STEP = zoops_e_step;
      break;
    case Tcm:
      E_STEP = tcm_e_step;
      break;
    default:
      fprintf(stderr,"Unknown model type in em()! \n");
      exit(1);
      break;
  }
  /* use like_e_step to set z matrix on iteration 0 if motifs were given */
  if (dataset->nmotifs > 0) {E_STEP0 = E_STEP; E_STEP = like_e_step;}

  /* get the probability that a site starting at position x_ij would
     NOT overlap a previously found motif; used in E_STEP.
  */
  get_not_o(dataset, model->w[1], FALSE);

  /* Perform EM for number of iterations or until no improvement */
  for (iter=0; iter < maxiter; iter++) {
    int w = model->w[nc-1];	/* width of model */
    THETA theta = model->theta[nc-1];	/* final theta of last component */

    if (iter > 0 && dataset->nmotifs > 0) E_STEP = E_STEP0;

    if (PRINTALL) ajFmtPrintF(outf,"\niter %d\n", iter);
#ifdef PARALLEL
    /* If we're running in parallel, only print from one node. */
    if (mpMyID() == 0)
#endif
    if ((!NO_STATUS) && ((iter % 10) == 0))
      fprintf(stderr, "\rem: w=%4d, iter=%4d                       ", w, iter);

    /* fix this later: save current contents of theta */
    copy_theta(theta, theta_save, w, alength);

    /* expectation step */
    model->ll = E_STEP(model, dataset);

    /* maximization step */
    M_STEP(model, dataset, priors, iter);

    /* print status if requested */
    if (PRINT_LL) {
      double m1, e1;
      double nsites = model->lambda[1] * ps(dataset, model->w[1]);
      calc_like(model, dataset);
      exp10_logx(model->sig, m1, e1);
      ajFmtPrintF(outf,"iter=%d w=%d ll=%8.2f e_ll=%8.2f nsites=%6.1f sig=%5.3fe%+04.0f",
      iter, model->w[1], model->ll, model->e_ll, nsites, m1, e1);
    }
    if (PRINTALL) {
      int c;
      for (c=0; c<nc; c++) {
        ajFmtPrintF(outf,"component %2d: lambda= %8.6f\n", c,
		    model->lambda[c]);
        print_theta(2, model->theta[c], model->w[c], "", dataset, NULL); 
        print_theta(2, model->obs[c], model->w[c], "", dataset, NULL); 
      }
    }
    if (PRINT_Z) print_zij(dataset, model);

    /* see if EM has converged */
    converged = check_convergence(theta_save, theta, w, distance, alength,
      iter, maxiter);

    if (converged) {iter++; break;}		/* done */
  }

  /* save the number of iterations (counting from zero)*/
  model->iter += iter;

  /* get the consensus of each component of the model */
  {
    THETA theta = model->theta[1];
    int w = model->w[1];
    char *cons = model->cons;
    cons = get_consensus(theta, w, dataset, 1, MINCONS); 
  }

  /* calculate the expected likelihood of the model */
  calc_like(model, dataset);

  free_2array(theta_save, max_w);
}