Esempio n. 1
0
/* Function: matassign2hmm()
 * 
 * Purpose:  Given an assignment of alignment columns to match vs.
 *           insert, finish the final part of the model construction 
 *           calculation that is constant between model construction
 *           algorithms.
 *           
 * Args:     msa       - multiple sequence alignment
 *           matassign - 1..alen bit flags for column assignments
 *           ret_hmm   - RETURN: counts-form HMM
 *           opt_tr    - optRETURN: array of tracebacks for aseq's
 *                         
 * Return:   <eslOK> on success.
 *           <eslENORESULT> if no consensus columns are identified.
 *
 *           ret_hmm and opt_tr alloc'ed here.
 */
static int
matassign2hmm(ESL_MSA *msa, int *matassign, P7_HMM **ret_hmm, P7_TRACE ***opt_tr)
{
  int        status;		/* return status                       */
  P7_HMM    *hmm = NULL;        /* RETURN: new hmm                     */
  P7_TRACE **tr  = NULL;        /* RETURN: 0..nseq-1 fake traces       */
  int      M;                   /* length of new model in match states */
  int      idx;                 /* counter over sequences              */
  int      apos;                /* counter for aligned columns         */
#ifdef p7_DEBUGGING
  char     errbuf[eslERRBUFSIZE];
#endif

  /* apply the model mask in the 'GC MM' row */
  do_modelmask(msa);

  /* How many match states in the HMM? */
  for (M = 0, apos = 1; apos <= msa->alen; apos++) 
    if (matassign[apos]) M++;
  if (M == 0) { status = eslENORESULT; goto ERROR; }

  /* Make fake tracebacks for each seq */
  ESL_ALLOC(tr, sizeof(P7_TRACE *) * msa->nseq);
  if ((status = p7_trace_FauxFromMSA(msa, matassign, p7_MSA_COORDS, tr))        != eslOK) goto ERROR;
  for (idx = 0; idx < msa->nseq; idx++)
    {
      if ((status = p7_trace_Doctor(tr[idx], NULL, NULL))                       != eslOK) goto ERROR;
#ifdef p7_DEBUGGING
      if ((status = p7_trace_Validate(tr[idx], msa->abc, msa->ax[idx], errbuf)) != eslOK) 
	ESL_XEXCEPTION(eslFAIL, "validation failed: %s", errbuf);
#endif
    }

  /* Build count model from tracebacks */
  if ((hmm    = p7_hmm_Create(M, msa->abc)) == NULL)  { status = eslEMEM; goto ERROR; }
  if ((status = p7_hmm_Zero(hmm))           != eslOK) goto ERROR;
  for (idx = 0; idx < msa->nseq; idx++) {
    if (tr[idx] == NULL) continue; /* skip rare examples of empty sequences */
    if ((status = p7_trace_Count(hmm, msa->ax[idx], msa->wgt[idx], tr[idx])) != eslOK) goto ERROR;
  }

  hmm->nseq     = msa->nseq;
  hmm->eff_nseq = msa->nseq;

  /* Transfer annotation from the MSA to the new model
   */
  if ((status = annotate_model(hmm, matassign, msa)) != eslOK) goto ERROR;

  /* Reset #=RF line of alignment to reflect our assignment
   * of match, delete. matassign is valid from 1..alen and is off
   * by one from msa->rf.
   */
  if (msa->rf == NULL)  ESL_ALLOC(msa->rf, sizeof(char) * (msa->alen + 1));
  for (apos = 1; apos <= msa->alen; apos++)
    msa->rf[apos-1] = matassign[apos] ? 'x' : '.';
  msa->rf[msa->alen] = '\0';

  if (opt_tr  != NULL) *opt_tr  = tr; 
  else                  p7_trace_DestroyArray(tr, msa->nseq);
  *ret_hmm = hmm;
  return eslOK;

 ERROR:
  if (tr     != NULL) p7_trace_DestroyArray(tr, msa->nseq);
  if (hmm    != NULL) p7_hmm_Destroy(hmm);
  if (opt_tr != NULL) *opt_tr = NULL;
  *ret_hmm = NULL;
  return status;
}
Esempio n. 2
0
static int
map_alignment(const char *msafile, const P7_HMM *hmm, ESL_SQ ***ret_sq, P7_TRACE ***ret_tr, int *ret_ntot)
{
  ESL_SQ       **sq        = NULL;
  P7_TRACE     **tr        = NULL;
  ESLX_MSAFILE  *afp       = NULL;
  ESL_MSA       *msa       = NULL;
  ESL_ALPHABET  *abc       = (ESL_ALPHABET *) hmm->abc; /* removing const'ness to make compiler happy. Safe. */
  int           *matassign = NULL;
  uint32_t       chksum    = 0;
  int            i,k;
  int            status;

  status = eslx_msafile_Open(&abc, msafile, NULL, eslMSAFILE_UNKNOWN, NULL, &afp);
  if (status != eslOK) eslx_msafile_OpenFailure(afp, status);

  status = eslx_msafile_Read(afp, &msa);
  if (status != eslOK) eslx_msafile_ReadFailure(afp, status);

  if (! (hmm->flags & p7H_CHKSUM)  )  esl_fatal("HMM has no checksum. --mapali unreliable without it.");
  if (! (hmm->flags & p7H_MAP)  )     esl_fatal("HMM has no map. --mapali can't work without it.");
  esl_msa_Checksum(msa, &chksum);
  if (hmm->checksum != chksum)        esl_fatal("--mapali MSA %s isn't same as the one HMM came from (checksum mismatch)", msafile);

  ESL_ALLOC(sq, sizeof(ESL_SQ *)   * msa->nseq);
  ESL_ALLOC(tr, sizeof(P7_TRACE *) * msa->nseq);
  ESL_ALLOC(matassign, sizeof(int) * (msa->alen + 1));

  esl_vec_ISet(matassign, msa->alen+1, 0);
  for (k = 1; k <= hmm->M; k++) matassign[hmm->map[k]] = 1;

  p7_trace_FauxFromMSA(msa, matassign, p7_DEFAULT, tr);

  /* The 'faux' core traces constructed by FauxFromMSA() may contain
   * D->I and I->D transitions.  They may *only* now be passed to
   * p7_tracealign_Seqs(), which can deal with these 'illegal'
   * transitions, in order to exactly reproduce the input --mapali
   * alignment.
   */

  for (i = 0; i < msa->nseq; i++)
    esl_sq_FetchFromMSA(msa, i, &(sq[i]));
      
  *ret_ntot = msa->nseq;
  *ret_tr   = tr;
  *ret_sq   = sq;

  eslx_msafile_Close(afp);
  esl_msa_Destroy(msa);
  free(matassign);
  return eslOK;

 ERROR:
  *ret_ntot = 0;
  *ret_tr   = NULL;
  *ret_sq   = NULL;
  if (afp       != NULL) eslx_msafile_Close(afp);
  if (msa       != NULL) esl_msa_Destroy(msa);
  if (matassign != NULL) free(matassign);
  return status;
}