extern void copy_model( MODEL *m1, /* source */ MODEL *m2, /* destination */ int alength /* length of alphabet */ ) { int i; m2->mtype = m1->mtype; m2->min_w = m1->min_w; m2->max_w = m1->max_w; m2->pw = m1->pw; m2->min_nsites = m1->min_nsites; m2->max_nsites = m1->max_nsites; m2->psites = m1->psites; if (m1->maxima) { /* copy maxima if they exist */ Resize(m2->maxima, m1->nsites_dis+1, p_prob); bcopy((char *) m1->maxima, (char *) m2->maxima, m1->nsites_dis*sizeof(p_prob)); } m2->pal = m1->pal; m2->invcomp = m1->invcomp; m2->imotif = m1->imotif; m2->w = m1->w; copy_theta(m1->theta, m2->theta, m1->w, alength); copy_theta(m1->obs, m2->obs, m1->w, alength); m2->lambda = m1->lambda; m2->lambda_obs = m1->lambda_obs; m2->nsites = m1->nsites; m2->nsites_obs = m1->nsites_obs; m2->nsites_dis = m1->nsites_dis; strcpy(m2->cons, m1->cons); strcpy(m2->cons0, m1->cons0); for (i=0; i<m1->w; i++) { m2->rentropy[i] = m1->rentropy[i]; } m2->rel = m1->rel; m2->ic = m1->ic; m2->ll = m1->ll; m2->mll_0 = m1->mll_0; m2->mll_1 = m1->mll_1; m2->logpv = m1->logpv; m2->logev = m1->logev; m2->ID = m1->ID; m2->iter = m1->iter; m2->iseq = m1->iseq; m2->ioff = m1->ioff; } /* copy_model */
extern void copy_model( MODEL *m1, /* source */ MODEL *m2, /* destination */ int alength /* length of alphabet */ ) { int i; m2->mtype = m1->mtype; m2->c = m1->c; for (i=0; i<m1->c; i++) { m2->w[i] = m1->w[i]; copy_theta(m1->theta[i], m2->theta[i], m1->w[i], alength); copy_theta(m1->obs[i], m2->obs[i], m1->w[i], alength); m2->lambda[i] = m1->lambda[i]; m2->rel[i] = m1->rel[i]; } m2->pal = m1->pal; strcpy(m2->cons, m1->cons); m2->nstrands = m1->nstrands; for (i=0; i<4; i++) { m2->sigma[i] = m1->sigma[i]; m2->d[i] = m1->d[i]; } m2->e_ll_0 = m1->e_ll_0; m2->e_ll = m1->e_ll; m2->ll = m1->ll; m2->sig = m1->sig; m2->lrt = m1->lrt; m2->bon = m1->bon; m2->root = m1->root; m2->pvalue = m1->pvalue; m2->imotif = m1->imotif; #ifdef PARALLEL strcpy(m2->cons0, m1->cons0); m2->ID = m1->ID; #endif m2->iter = m1->iter; }
extern void em( MODEL *model, /* the model */ DATASET *dataset, /* the dataset */ PRIORS *priors, /* the priors */ int maxiter, /* maximum number of iterations */ double distance /* stopping criterion */ ) { int alength = dataset->alength; THETA theta_save; int iter; /* iteration number */ double (*E_STEP)(MODEL *, DATASET *); /* expectation step function */ double (*E_STEP0)(MODEL *, DATASET *); /* expectation step function */ /* maximization step function */ void (*M_STEP)(MODEL *, DATASET *, PRIORS *, int); int nc = model->c; /* number of components of model */ int max_w = model->w[nc-1]; /* width of last component */ BOOLEAN converged = FALSE; /* EM has converged */ /* create a place to save old value of theta */ create_2array(theta_save, double, max_w, alength); /* set up the correct type of EM to run */ M_STEP = m_step; E_STEP = e_step; E_STEP0 = e_step; switch (model->mtype) { case Oops: E_STEP = e_step; break; case Zoops: E_STEP = zoops_e_step; break; case Tcm: E_STEP = tcm_e_step; break; default: fprintf(stderr,"Unknown model type in em()! \n"); exit(1); break; } /* use like_e_step to set z matrix on iteration 0 if motifs were given */ if (dataset->nmotifs > 0) {E_STEP0 = E_STEP; E_STEP = like_e_step;} /* get the probability that a site starting at position x_ij would NOT overlap a previously found motif; used in E_STEP. */ get_not_o(dataset, model->w[1], FALSE); /* Perform EM for number of iterations or until no improvement */ for (iter=0; iter < maxiter; iter++) { int w = model->w[nc-1]; /* width of model */ THETA theta = model->theta[nc-1]; /* final theta of last component */ if (iter > 0 && dataset->nmotifs > 0) E_STEP = E_STEP0; if (PRINTALL) ajFmtPrintF(outf,"\niter %d\n", iter); #ifdef PARALLEL /* If we're running in parallel, only print from one node. */ if (mpMyID() == 0) #endif if ((!NO_STATUS) && ((iter % 10) == 0)) fprintf(stderr, "\rem: w=%4d, iter=%4d ", w, iter); /* fix this later: save current contents of theta */ copy_theta(theta, theta_save, w, alength); /* expectation step */ model->ll = E_STEP(model, dataset); /* maximization step */ M_STEP(model, dataset, priors, iter); /* print status if requested */ if (PRINT_LL) { double m1, e1; double nsites = model->lambda[1] * ps(dataset, model->w[1]); calc_like(model, dataset); exp10_logx(model->sig, m1, e1); ajFmtPrintF(outf,"iter=%d w=%d ll=%8.2f e_ll=%8.2f nsites=%6.1f sig=%5.3fe%+04.0f", iter, model->w[1], model->ll, model->e_ll, nsites, m1, e1); } if (PRINTALL) { int c; for (c=0; c<nc; c++) { ajFmtPrintF(outf,"component %2d: lambda= %8.6f\n", c, model->lambda[c]); print_theta(2, model->theta[c], model->w[c], "", dataset, NULL); print_theta(2, model->obs[c], model->w[c], "", dataset, NULL); } } if (PRINT_Z) print_zij(dataset, model); /* see if EM has converged */ converged = check_convergence(theta_save, theta, w, distance, alength, iter, maxiter); if (converged) {iter++; break;} /* done */ } /* save the number of iterations (counting from zero)*/ model->iter += iter; /* get the consensus of each component of the model */ { THETA theta = model->theta[1]; int w = model->w[1]; char *cons = model->cons; cons = get_consensus(theta, w, dataset, 1, MINCONS); } /* calculate the expected likelihood of the model */ calc_like(model, dataset); free_2array(theta_save, max_w); }