/* Function:  hmmpgmd2msa()
 * Synopsis:  Convert an HMMPGMD-derived data stream to an MSA, based
 *            on the corresponding hmm
 *
 * Purpose:   Given a data stream from HMMPGMD of the form shown
 *            here, produce an MSA:
 *                 HMMD_SEARCH_STATS
 *                 P7_HITS array of size (nhits) from above?
 *                 then repeats of P7_DOMAIN and P7_ALIDISPLAY data
 *                 for the hits, where each hit with d domains
 *                 produces
 *                   d P7_DOMAINs
 *                   then
 *                   d P7_ALIDISPLAYs
 *            ... optionally adding a sequence with length matching
 *            that of the hmm, which will be included in the alignment.
 *
 *			  A further extension has been the ability to include or exclude
 *            sequences form the list of hits.
 *
 *            This function's expected use is as a helper function for
 *            the hmmer website, which gets the above data stream from
 *            hmmpgmd.
 *
 * Args :     data: a pointer to binary data in the format given above
 *            hmm:  the HMM against which the alidisplay traces and
 *                  additional sequences/traces are threaded to reach
 *                  the returned msa.
 *            qsq : optional sequence to be included in the output msa;
 *                  must have the same number of residues as the hmm
 *                  has states, as each residue i will be aligned to
 *                  state i.
 *            incl: optional array of sequence names, in the case of
 *            		hmmpgmd a list of ints, which are are excluded due
 *            		to the sequence threshold, but have been selected
 *            		to be included in the alignment.  This ties in
 *            		with the way jackhmmer is implemented on the
 *            		HMMER website.
 *       incl_size: required size of the incl array. zero if incl is null.
 *       	  excl: optional array of sequence names, in the case of
 *            		hmmpgmd a list of ints, which are are included as they
 *            		score above threshold, but have been selected
 *            		to be excluded from the alignment.
 *       excl_size: required size of the excl array. zero if excl is null.
 *
 *
 * Returns:   Pointer to completed MSA object. NULL on error
 *
 */
int
hmmpgmd2msa(void *data, P7_HMM *hmm, ESL_SQ *qsq, int *incl, int incl_size, int *excl, int excl_size, ESL_MSA **ret_msa) {
  int i, j;
  int c;
  int status;
  int set_included;

  /* trace of the query sequence with N residues onto model with N match states */
  P7_TRACE          *qtr         = NULL;
  int                extra_sqcnt = 0;

  /* vars used to read from the binary data */
  HMMD_SEARCH_STATS *stats   = NULL;              /* pointer to a single stats object, at the beginning of data */
  P7_HIT            *hits    = NULL;              /* an array of hits, at the appropriate offset in data */

  /* vars used in msa construction */
  P7_TOPHITS         th;
  P7_ALIDISPLAY     *ad, *ad2;
  ESL_MSA           *msa   = NULL;
  P7_DOMAIN         *dom   = NULL;

  char              *p     = (char*)data;        /*pointer used to walk along data, must be char* to allow pointer arithmetic */

  th.N = 0;
  th.unsrt = NULL;
  th.hit   = NULL;

  /* optionally build a faux trace for the query sequence: relative to core model (B->M_1..M_L->E) */
  if (qsq != NULL) {
    if (qsq->n != hmm->M) {
      status = eslFAIL;
      goto ERROR;
    }

    if ((qtr = p7_trace_Create())                      == NULL)  {status = eslFAIL;  goto ERROR; }
    if ((status = p7_trace_Append(qtr, p7T_B, 0, 0))   != eslOK) goto ERROR;
    for (i = 1; i <= qsq->n; i++)
      if ((status = p7_trace_Append(qtr, p7T_M, i, i)) != eslOK) goto ERROR;
    if ((status = p7_trace_Append(qtr, p7T_E, 0, 0))   != eslOK) goto ERROR;
    qtr->M = qsq->n;
    qtr->L = qsq->n;
    extra_sqcnt = 1;
  }

  /* get search stats + hit info */
  stats = (HMMD_SEARCH_STATS*)p;

  /* sanity check */
  if (   ( stats->Z_setby != p7_ZSETBY_NTARGETS    && stats->Z_setby != p7_ZSETBY_OPTION    && stats->Z_setby != p7_ZSETBY_FILEINFO )
      || ( stats->domZ_setby != p7_ZSETBY_NTARGETS && stats->domZ_setby != p7_ZSETBY_OPTION && stats->domZ_setby != p7_ZSETBY_FILEINFO )
      ||   stats->nhits > 10000000
      ||   stats->elapsed > 1000000
  ) {
    status = eslFAIL;
    goto ERROR;
  }

  /* ok, it looks legitimate */
  p    += sizeof(HMMD_SEARCH_STATS);
  hits  = (P7_HIT*)p;
  p    += sizeof(P7_HIT) * stats->nhits;

  /* create a tophits object, to be passed to p7_tophits_Alignment() */
  ESL_ALLOC( th.unsrt, sizeof(P7_HIT) * stats->nhits);
  memcpy( th.unsrt, hits, sizeof(P7_HIT) * stats->nhits);
  ESL_ALLOC( th.hit, sizeof(P7_HIT*) * stats->nhits);
  for (i=0; i<stats->nhits; i++) {
    th.hit[i] = &(th.unsrt[i]);
    if (   th.hit[i]->ndom > 10000
        || th.hit[i]->flags >  p7_IS_INCLUDED + p7_IS_REPORTED + p7_IS_NEW + p7_IS_DROPPED + p7_IS_DUPLICATE
    ) {
      status = eslFAIL;
      goto ERROR;
    }
  }

//  th.unsrt     = NULL;
  th.N         = stats->nhits;
  th.nreported = 0;
  th.nincluded = 0;
  th.is_sorted_by_sortkey = 0;
  th.is_sorted_by_seqidx  = 0;

  for (i = 0; i < th.N; i++) {
    ESL_ALLOC( th.hit[i]->dcl, sizeof(P7_DOMAIN) *  th.hit[i]->ndom);
    /* Go through the hits and set to be excluded or included as necessary */
    set_included = 0;
    if(th.hit[i]->flags & p7_IS_INCLUDED){
      if(excl_size > 0){
        for( c = 0; c < excl_size; c++){
          if(excl[c] == (long)(th.hit[i]->name) ){
            th.hit[i]->flags = p7_IS_DROPPED;
            th.hit[i]->nincluded = 0;
            break;
          }
        }
      }
    }else{
      if(incl_size > 0){
    	for( c = 0; c < incl_size; c++){
          if(incl[c] == (long)th.hit[i]->name ){
            th.hit[i]->flags = p7_IS_INCLUDED;
            set_included = 1;
          }
        }
      }
    }
    /* first grab all the P7_DOMAINs for the hit */
    for (j=0; j < th.hit[i]->ndom; j++) {
      dom = th.hit[i]->dcl + j;
      memcpy(dom , (P7_DOMAIN*)p, sizeof(P7_DOMAIN));
      /* Possibly set domains to be include if being
       * externally set via incl list*/
      if(set_included) th.hit[i]->dcl[j].is_included = 1;
      p += sizeof(P7_DOMAIN);
    }
    /* then grab the P7_ALIDISPLAYs for the hit */
    for (j=0; j < th.hit[i]->ndom; j++) {
      ad = (P7_ALIDISPLAY*)p;
      ESL_ALLOC(th.hit[i]->dcl[j].ad, sizeof(P7_ALIDISPLAY));
      ad2 = th.hit[i]->dcl[j].ad;

      ad2->memsize = ad->memsize;
      ad2->rfline = ad->rfline;
      ad2->mmline = ad->mmline;
      ad2->csline = ad->csline ;
      ad2->model  = ad->model ;
      ad2->mline  = ad->mline ;
      ad2->aseq   = ad->aseq ;
      ad2->ppline = ad->ppline;
      ad2->N      = ad->N;

      ad2->hmmname = ad->hmmname;
      ad2->hmmacc  = ad->hmmacc ;
      ad2->hmmdesc = ad->hmmdesc;
      ad2->hmmfrom = ad->hmmfrom;
      ad2->hmmto   = ad->hmmto;
      ad2->M       = ad->M;

      ad2->sqname  = ad->sqname;
      ad2->sqacc   = ad->sqacc ;
      ad2->sqdesc  = ad->sqdesc;
      ad2->sqfrom  = ad->sqfrom;
      ad2->sqto    = ad->sqto;
      ad2->L       = ad->L;

      p += sizeof(P7_ALIDISPLAY);

      ESL_ALLOC(ad2->mem, ad2->memsize);
      
      memcpy(ad2->mem, p, ad->memsize);
      
      p += ad2->memsize;
      p7_alidisplay_Deserialize(ad2);
    }
  }


  /* use the tophits and trace info above to produce an alignment */
  if ( (status = p7_tophits_Alignment(&th, hmm->abc, &qsq, &qtr, extra_sqcnt, p7_ALL_CONSENSUS_COLS, &msa)) != eslOK) goto ERROR;


  /* free memory */
  if (qtr != NULL) free(qtr);
  for (i = 0; i < th.N; i++) {
    for (j=0; j < th.hit[i]->ndom; j++)
      p7_alidisplay_Destroy(th.hit[i]->dcl[j].ad);

    if (th.hit[i]->dcl != NULL) free (th.hit[i]->dcl);
  }
  if (th.unsrt != NULL) free (th.unsrt);
  if (th.hit != NULL) free (th.hit);

  *ret_msa = msa;
  return eslOK;

ERROR:
  /* free memory */
  if (qtr != NULL) free(qtr);

  for (i = 0; i < th.N; i++) {
    for (j=0; j < th.hit[i]->ndom; j++)
      p7_alidisplay_Destroy(th.hit[i]->dcl[j].ad);

    if (th.hit[i]->dcl != NULL) free (th.hit[i]->dcl);
  }
  if (th.unsrt != NULL) free (th.unsrt);
  if (th.hit != NULL) free (th.hit);

  return status;
}
int 
main(int argc, char **argv)
{
  ESL_GETOPTS    *go      = esl_getopts_CreateDefaultApp(options, 1, argc, argv, banner, usage);
  char           *hmmfile = esl_opt_GetArg(go, 1);
  int             N       = esl_opt_GetInteger(go, "-N");
  ESL_STOPWATCH  *w       = esl_stopwatch_Create();
  ESL_RANDOMNESS *r       = esl_randomness_CreateFast(esl_opt_GetInteger(go, "-s"));
  ESL_ALPHABET   *abc     = NULL;
  P7_HMMFILE     *hfp     = NULL;
  P7_HMM         *hmm     = NULL;
  P7_BG          *bg      = NULL;
  P7_PROFILE     *gm      = NULL;
  P7_OPROFILE    *om      = NULL;
  P7_TRACE       *tr      = NULL;
  ESL_SQ         *sq      = NULL;
  P7_ALIDISPLAY  *ad      = NULL;
  int             i,z;

  if (p7_hmmfile_Open(hmmfile, NULL, &hfp) != eslOK) p7_Fail("Failed to open HMM file %s", hmmfile);
  if (p7_hmmfile_Read(hfp, &abc, &hmm)     != eslOK) p7_Fail("Failed to read HMM");
  p7_hmmfile_Close(hfp);
  

  bg = p7_bg_Create(abc);
  p7_bg_SetLength(bg, 0);
  gm = p7_profile_Create(hmm->M, abc);
  p7_ProfileConfig(hmm, bg, gm, 0, p7_UNIGLOCAL); /* that sets N,C,J to generate nothing */
  om = p7_oprofile_Create(gm->M, abc);
  p7_oprofile_Convert(gm, om);

  if (esl_opt_GetBoolean(go, "-p")) tr = p7_trace_CreateWithPP();
  else                              tr = p7_trace_Create();

  sq = esl_sq_CreateDigital(abc);

  esl_stopwatch_Start(w);
  for (i = 0; i < N; i++)
    {
      p7_ProfileEmit(r, hmm, gm, bg, sq, tr);
      esl_sq_SetName(sq, "random");

      if (! esl_opt_GetBoolean(go, "-b")) 
	{
	  if (esl_opt_GetBoolean(go, "-p")) 
	    for (z = 0; z < tr->N; z++)
	      if (tr->i[z] > 0) tr->pp[z] = esl_random(r);

	  ad = p7_alidisplay_Create(tr, 0, om, sq);
	  p7_alidisplay_Print(stdout, ad, 40, 80, FALSE);
	  p7_alidisplay_Destroy(ad);
	}
      p7_trace_Reuse(tr);
      esl_sq_Reuse(sq);
    }
  esl_stopwatch_Stop(w);
  esl_stopwatch_Display(stdout, w, "# CPU time: ");

  esl_sq_Destroy(sq);
  p7_trace_Destroy(tr);
  p7_oprofile_Destroy(om);
  p7_profile_Destroy(gm);
  p7_bg_Destroy(bg);
  p7_hmm_Destroy(hmm);
  esl_alphabet_Destroy(abc);
  esl_randomness_Destroy(r);
  esl_stopwatch_Destroy(w);
  esl_getopts_Destroy(go);
  return 0;
}
/* Function:  hmmpgmd2stats()
 * Synopsis:  Use a HMMPGMD-derived data stream to extract some simple
 *            statistics regarding its alignment.
 * Purpose:   Given a data stream from HMMPGMD of the form shown
 *            here, produce a vector of floats:
 *                 positions 0 to hmm->M-1 are:
 *                   Fraction of alignments which cover model at that position
 *                 positions hmm->M to hmm->M*2-1
 *                   Fraction of alignments which cover model at that position (mod hmm->M)     
 *                   with a similar residue
 *                 positions hmm->M*2 to hmm->M*3-1
 *                   Fraction of alignments which cover model at that position (mod hmm->M)
 *                   with the consensus residue
 *
 * Args :     data: a pointer to binary data in the format given above
 *            hmm:  the HMM against which the alidisplay traces and
 *                  additional sequences/traces are threaded to reach
 *                  the returned msa.
 *            
 * Returns:   the location where the output vector will be placed.
 *                      caller is responsible for freeing it later   
 *
 */
int hmmpgmd2stats(void *data, P7_HMM *hmm, float** statsOut) 
{
  int i, j, k;
  int status;

  /* trace of the query sequence with N residues onto model with N match states */
  P7_TRACE          *qtr         = NULL;

  /* vars used to read from the binary data */
  HMMD_SEARCH_STATS *stats   = NULL;              /* pointer to a single stats object, at the beginning of data */
  P7_HIT            *hits    = NULL;              /* an array of hits, at the appropriate offset in data */

  P7_TOPHITS         th;
  P7_DOMAIN         *dom;
  P7_ALIDISPLAY     *ad, *ad2;

  int *cover, *id, *similar; //store statistics result per hit
  int readPos, writePos;     //for converting alignment contents into model indexing

  char              *p     = (char*)data;        /*pointer used to walk along data, must be char* to allow pointer arithmetic */

  th.N = 0;
  th.unsrt = NULL;
  th.hit   = NULL;

  //storage for output
  ESL_ALLOC( *statsOut,   sizeof(float) * hmm->M * 3);

  //storage for accumulation per hit
  ESL_ALLOC( cover,   sizeof(int) * hmm->M);
  ESL_ALLOC( id,      sizeof(int) * hmm->M);
  ESL_ALLOC( similar, sizeof(int) * hmm->M);
  for(k = 0; k < hmm->M; k++)
  {
    cover[k] = 0;
    id[k] = 0;
    similar[k] = 0;
    
    (*statsOut)[k         ] = 0;
    (*statsOut)[k+hmm->M  ] = 0;
    (*statsOut)[k+hmm->M*2] = 0;
  }

  /* get search stats + hit info */
  stats = (HMMD_SEARCH_STATS*)p;

  /* sanity check */
  if (   ( stats->Z_setby != p7_ZSETBY_NTARGETS    && stats->Z_setby != p7_ZSETBY_OPTION    && stats->Z_setby != p7_ZSETBY_FILEINFO )
      || ( stats->domZ_setby != p7_ZSETBY_NTARGETS && stats->domZ_setby != p7_ZSETBY_OPTION && stats->domZ_setby != p7_ZSETBY_FILEINFO )
      ||   stats->nhits > 10000000
      ||   stats->elapsed > 1000000
  ) {
    status = eslFAIL;
    goto ERROR;
  }

  /* ok, it looks legitimate */
  p    += sizeof(HMMD_SEARCH_STATS);
  hits  = (P7_HIT*)p;
  p    += sizeof(P7_HIT) * stats->nhits;

  /* create a tophits object, use it to step through the alignments */
  ESL_ALLOC( th.unsrt, sizeof(P7_HIT) * stats->nhits);
  memcpy( th.unsrt, hits, sizeof(P7_HIT) * stats->nhits);
  ESL_ALLOC( th.hit, sizeof(P7_HIT*) * stats->nhits);
  for (i=0; i<stats->nhits; i++) {
    th.hit[i] = &(th.unsrt[i]);
    if (   th.hit[i]->ndom > 10000
        || th.hit[i]->flags >  p7_IS_INCLUDED + p7_IS_REPORTED + p7_IS_NEW + p7_IS_DROPPED + p7_IS_DUPLICATE
    ) {
      status = eslFAIL;
      goto ERROR;
    }
  }

  th.N         = stats->nhits;
  th.nreported = 0;
  th.nincluded = 0;
  th.is_sorted_by_sortkey = 0;
  th.is_sorted_by_seqidx  = 0;

  for (i = 0; i < th.N; i++) 
  {
    ESL_ALLOC( th.hit[i]->dcl, sizeof(P7_DOMAIN) *  th.hit[i]->ndom);
   
    if(th.hit[i]->flags & p7_IS_INCLUDED) th.nincluded++;

    /* first grab all the P7_DOMAINs for the hit */
    for (j=0; j < th.hit[i]->ndom; j++) 
    {
      dom = (P7_DOMAIN*)p;
      th.hit[i]->dcl[j].is_included = dom->is_included;
      p += sizeof(P7_DOMAIN);
    }
    
    /* then grab the P7_ALIDISPLAYs for the hit */
    for (j=0; j < th.hit[i]->ndom; j++) 
    {
      ad = (P7_ALIDISPLAY*)p;
      ESL_ALLOC(th.hit[i]->dcl[j].ad, sizeof(P7_ALIDISPLAY));
      ad2 = th.hit[i]->dcl[j].ad;

      ad2->memsize = ad->memsize;
      ad2->rfline = ad->rfline;
      ad2->mmline = ad->mmline;
      
      ad2->csline = ad->csline ;
      ad2->model  = ad->model ;
      ad2->mline  = ad->mline ;
      ad2->aseq   = ad->aseq ;
      ad2->ppline = ad->ppline;
      ad2->N      = ad->N;

      ad2->hmmname = ad->hmmname;
      ad2->hmmacc  = ad->hmmacc ;
      ad2->hmmdesc = ad->hmmdesc;
      ad2->hmmfrom = ad->hmmfrom;
      ad2->hmmto   = ad->hmmto;
      ad2->M       = ad->M;

      ad2->sqname  = ad->sqname;
      ad2->sqacc   = ad->sqacc ;
      ad2->sqdesc  = ad->sqdesc;
      ad2->sqfrom  = ad->sqfrom;
      ad2->sqto    = ad->sqto;
      ad2->L       = ad->L;    
      
      p += sizeof(P7_ALIDISPLAY);

      ESL_ALLOC(ad2->mem, ad2->memsize);
      
      memcpy(ad2->mem, p, ad->memsize);
      
      p += ad2->memsize;
      
      p7_alidisplay_Deserialize(ad2);
      
      if(th.hit[i]->flags & p7_IS_INCLUDED && th.hit[i]->dcl[j].is_included)
      {
        writePos = ad2->hmmfrom-1;  
        readPos = 0;
        while(readPos < ad2->N)
        {
          //check if model covers residue
          if(isupper(ad2->aseq[readPos]) || ad2->aseq[readPos] == '-')
          {          
            cover[writePos]++;
          
            //check mline for id
            if(isalpha(ad2->mline[readPos]))
            {
              id[writePos]++;
              similar[writePos]++;
            }
            //check mline for not-a-space
            else if(ad2->mline[readPos] == '+')
            {
              similar[writePos]++;
            }
            writePos++;
          }       
          readPos++;
        }
      }
    }
      
    //increment output, adjusting for overlaps
    for(k = 0; k < hmm->M; k++)
    {
      if(cover[k]) (*statsOut)[k]+=1.0;
      
      if(id[k]) (*statsOut)[k+hmm->M]+=(id[k]/cover[k]);
      id[k] = 0;
      
      if(similar[k]) (*statsOut)[k+hmm->M*2]+=(similar[k]/cover[k]);
      similar[k] = 0;
      
      cover[k] = 0;
    }
    
  }

  for(i = 0; i < hmm->M*3; i++)
  {
    (*statsOut)[i] = (*statsOut)[i]/(th.nincluded);
  }

  for(i = hmm->M; i < hmm->M*3; i++)
  {
    if((*statsOut)[i%hmm->M])
    {  
      (*statsOut)[i] = (*statsOut)[i]/(*statsOut)[i%hmm->M];
    }
    else
    {
      (*statsOut)[i] = 0.0;
    }
  }

  
  /* free memory */
  if (qtr != NULL) free(qtr);
  qtr = NULL;
  
  for (i = 0; i < th.N; i++) {
    for (j=0; j < th.hit[i]->ndom; j++)
      if(th.hit[i]->dcl[j].ad)
      {
        p7_alidisplay_Destroy(th.hit[i]->dcl[j].ad);
        th.hit[i]->dcl[j].ad = NULL;
      }
      
    if (th.hit[i]->dcl != NULL) free (th.hit[i]->dcl);
    th.hit[i]->dcl = NULL;
  }
  if (th.unsrt != NULL) free (th.unsrt);
  th.unsrt = NULL;
  if (th.hit != NULL) free (th.hit);
  th.hit = NULL;

  return eslOK;

ERROR:
  /* free memory */
  if (qtr != NULL) free(qtr);
  qtr = NULL;
  
  for (i = 0; i < th.N; i++) {
    for (j=0; j < th.hit[i]->ndom; j++)
      if(th.hit[i]->dcl[j].ad)
      {
        p7_alidisplay_Destroy(th.hit[i]->dcl[j].ad);
        th.hit[i]->dcl[j].ad = NULL;
      }
      
    if (th.hit[i]->dcl != NULL) free (th.hit[i]->dcl);
    th.hit[i]->dcl = NULL;
  }
  if (th.unsrt != NULL) free (th.unsrt);
  th.unsrt = NULL;
  if (th.hit != NULL) free (th.hit);
  th.hit = NULL;
  

  return status;
}
/* Function:  p7_alidisplay_Create()
 * Synopsis:  Create an alignment display, from trace and oprofile.
 * Incept:    SRE, Sun Dec 30 09:13:31 2007 [Janelia]
 *
 * Purpose:   Creates and returns an alignment display for domain number
 *            <which> in traceback <tr>, where the traceback
 *            corresponds to an alignment of optimized profile <om> to digital sequence
 *            <dsq>, and the unique name of that target
 *            sequence <dsq> is <sqname>. The <which> index starts at 0.
 *            
 *            It will be a little faster if the trace is indexed with
 *            <p7_trace_Index()> first. The number of domains is then
 *            in <tr->ndom>. If the caller wants to create alidisplays
 *            for all of these, it would loop <which> from
 *            <0..tr->ndom-1>.
 *           
 *            However, even without an index, the routine will work fine.
 *
 * Args:      tr     - traceback
 *            which  - domain number, 0..tr->ndom-1
 *            om     - optimized profile (query)
 *            sq     - digital sequence (target)
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <NULL> on allocation failure, or if something's internally corrupt 
 *            in the data.
 */
P7_ALIDISPLAY *
p7_alidisplay_Create(const P7_TRACE *tr, int which, const P7_OPROFILE *om, const ESL_SQ *sq)
{
  P7_ALIDISPLAY *ad       = NULL;
  char          *Alphabet = om->abc->sym;
  int            n, pos, z;
  int            z1,z2;
  int            k,x,i,s;
  int            hmm_namelen, hmm_acclen, hmm_desclen;
  int            sq_namelen,  sq_acclen,  sq_desclen;
  int            status;
  
  /* First figure out which piece of the trace (from first match to last match) 
   * we're going to represent, and how big it is.
   */
  if (tr->ndom > 0) {		/* if we have an index, this is a little faster: */
    for (z1 = tr->tfrom[which]; z1 < tr->N; z1++) if (tr->st[z1] == p7T_M) break;  /* find next M state      */
    if (z1 == tr->N) return NULL;                                                  /* no M? corrupt trace    */
    for (z2 = tr->tto[which];   z2 >= 0 ;   z2--) if (tr->st[z2] == p7T_M) break;  /* find prev M state      */
    if (z2 == -1) return NULL;                                                     /* no M? corrupt trace    */
  } else {			/* without an index, we can still do it fine:    */
    for (z1 = 0; which >= 0 && z1 < tr->N; z1++) if (tr->st[z1] == p7T_B) which--; /* find the right B state */
    if (z1 == tr->N) return NULL;                                                  /* no such domain <which> */
    for (; z1 < tr->N; z1++) if (tr->st[z1] == p7T_M) break;                       /* find next M state      */
    if (z1 == tr->N) return NULL;                                                  /* no M? corrupt trace    */
    for (z2 = z1; z2 < tr->N; z2++) if (tr->st[z2] == p7T_E) break;                /* find the next E state  */
    for (; z2 >= 0;    z2--) if (tr->st[z2] == p7T_M) break;                       /* find prev M state      */
    if (z2 == -1) return NULL;                                                     /* no M? corrupt trace    */
  }

  /* Now we know that z1..z2 in the trace will be represented in the
   * alidisplay; that's z2-z1+1 positions. We need a \0 trailer on all
   * our display lines, so allocate z2-z1+2. We know each position is
   * M, D, or I, so there's a 1:1 correspondence of trace positions
   * with alignment display positions.  We also know the display
   * starts and ends with M states.
   * 
   * So now let's allocate. The alidisplay is packed into a single
   * memory space, so this appears to be intricate, but it's just
   * bookkeeping.  
   */
  n = (z2-z1+2) * 3;                     /* model, mline, aseq mandatory         */
  if (om->rf[0]  != 0)    n += z2-z1+2;  /* optional reference line              */
  if (om->cs[0]  != 0)    n += z2-z1+2;  /* optional structure line              */
  if (tr->pp     != NULL) n += z2-z1+2;  /* optional posterior prob line         */
  hmm_namelen = strlen(om->name);                           n += hmm_namelen + 1;
  hmm_acclen  = (om->acc  != NULL ? strlen(om->acc)  : 0);  n += hmm_acclen  + 1;
  hmm_desclen = (om->desc != NULL ? strlen(om->desc) : 0);  n += hmm_desclen + 1;
  sq_namelen  = strlen(sq->name);                           n += sq_namelen  + 1;
  sq_acclen   = strlen(sq->acc);                            n += sq_acclen   + 1; /* sq->acc is "\0" when unset */
  sq_desclen  = strlen(sq->desc);                           n += sq_desclen  + 1; /* same for desc              */
  
  ESL_ALLOC(ad, sizeof(P7_ALIDISPLAY));
  ad->mem = NULL;

  pos = 0; 
  ad->memsize = sizeof(char) * n;
  ESL_ALLOC(ad->mem, ad->memsize);
  if (om->rf[0]  != 0) { ad->rfline = ad->mem + pos; pos += z2-z1+2; } else { ad->rfline = NULL; }
  if (om->cs[0]  != 0) { ad->csline = ad->mem + pos; pos += z2-z1+2; } else { ad->csline = NULL; }
  ad->model   = ad->mem + pos;  pos += z2-z1+2;
  ad->mline   = ad->mem + pos;  pos += z2-z1+2;
  ad->aseq    = ad->mem + pos;  pos += z2-z1+2;
  if (tr->pp != NULL)  { ad->ppline = ad->mem + pos;  pos += z2-z1+2;} else { ad->ppline = NULL; }
  ad->hmmname = ad->mem + pos;  pos += hmm_namelen +1;
  ad->hmmacc  = ad->mem + pos;  pos += hmm_acclen +1;
  ad->hmmdesc = ad->mem + pos;  pos += hmm_desclen +1;
  ad->sqname  = ad->mem + pos;  pos += sq_namelen +1;
  ad->sqacc   = ad->mem + pos;  pos += sq_acclen +1;
  ad->sqdesc  = ad->mem + pos;  pos += sq_desclen +1;

  strcpy(ad->hmmname, om->name);
  if (om->acc  != NULL) strcpy(ad->hmmacc,  om->acc);  else ad->hmmacc[0]  = 0;
  if (om->desc != NULL) strcpy(ad->hmmdesc, om->desc); else ad->hmmdesc[0] = 0;
  strcpy(ad->sqname,  sq->name);
  strcpy(ad->sqacc,   sq->acc);
  strcpy(ad->sqdesc,  sq->desc);

  /* Determine hit coords */
  ad->hmmfrom = tr->k[z1];
  ad->hmmto   = tr->k[z2];
  ad->M       = om->M;
  ad->sqfrom  = tr->i[z1];
  ad->sqto    = tr->i[z2];
  ad->L       = sq->n;

  /* optional rf line */
  if (ad->rfline != NULL) {
    for (z = z1; z <= z2; z++) ad->rfline[z-z1] = ((tr->st[z] == p7T_I) ? '.' : om->rf[tr->k[z]]);
    ad->rfline[z-z1] = '\0';
  }

  /* optional cs line */
  if (ad->csline != NULL) {
    for (z = z1; z <= z2; z++) ad->csline[z-z1] = ((tr->st[z] == p7T_I) ? '.' : om->cs[tr->k[z]]);
    ad->csline[z-z1] = '\0';
  }

  /* optional pp line */
  if (ad->ppline != NULL) {
    for (z = z1; z <= z2; z++) ad->ppline[z-z1] = ( (tr->st[z] == p7T_D) ? '.' : p7_alidisplay_EncodePostProb(tr->pp[z]));
    ad->ppline[z-z1] = '\0';
  }

  /* mandatory three alignment display lines: model, mline, aseq */
  for (z = z1; z <= z2; z++) 
    {
      k = tr->k[z];
      i = tr->i[z];
      x = sq->dsq[i];
      s = tr->st[z];

      switch (s) {
      case p7T_M:
	ad->model[z-z1] = om->consensus[k]; 
	if      (x == esl_abc_DigitizeSymbol(om->abc, om->consensus[k])) ad->mline[z-z1] = ad->model[z-z1];
	else if (p7_oprofile_FGetEmission(om, k, x) > 1.0)               ad->mline[z-z1] = '+'; /* >1 not >0; om has odds ratios, not scores */
	else                                                             ad->mline[z-z1] = ' ';
	ad->aseq  [z-z1] = toupper(Alphabet[x]);
	break;
	
      case p7T_I:
	ad->model [z-z1] = '.';
	ad->mline [z-z1] = ' ';
	ad->aseq  [z-z1] = tolower(Alphabet[x]);
	break;
	
      case p7T_D:
	ad->model [z-z1] = om->consensus[k]; 
	ad->mline [z-z1] = ' ';
	ad->aseq  [z-z1] = '-';
	break;

      default: ESL_XEXCEPTION(eslEINVAL, "invalid state in trace: not M,D,I");
      }
    }
  ad->model [z2-z1+1] = '\0';
  ad->mline [z2-z1+1] = '\0';
  ad->aseq  [z2-z1+1] = '\0';
  ad->N = z2-z1+1;
  return ad;

 ERROR:
  p7_alidisplay_Destroy(ad);
  return NULL;
}