示例#1
0
/* Function:  p7_profile_Create()
 * Synopsis:  Allocates a profile.
 *
 * Purpose:   Allocates for a profile of up to <M> nodes, for digital
 *            alphabet <abc>.
 *            
 *            Because this function might be in the critical path (in
 *            hmmscan, for example), we leave much of the model
 *            uninitialized, including scores and length model
 *            probabilities. The <p7_profile_Config()> call is what
 *            sets these.
 *            
 *            The reference pointer <gm->abc> is set to <abc>.
 *
 * Returns:   a pointer to the newly allocated profile.
 *
 * Throws:    <NULL> on allocation error.
 */
P7_PROFILE *
p7_profile_Create(int allocM, const ESL_ALPHABET *abc)
{
  P7_PROFILE *gm = NULL;
  int         x;
  int         status;

  /* level 0 */
  ESL_ALLOC(gm, sizeof(P7_PROFILE));
  gm->tsc       = NULL;
  gm->rsc       = NULL;
  gm->name      = NULL;
  gm->acc       = NULL;
  gm->desc      = NULL;
  gm->rf        = NULL;
  gm->mm        = NULL;
  gm->cs        = NULL;
  gm->consensus = NULL;

  /* level 1 */
  ESL_ALLOC(gm->tsc,       sizeof(float)   * (allocM+1) * p7P_NTRANS); /* 0..M */
  ESL_ALLOC(gm->rsc,       sizeof(float *) * abc->Kp);
  ESL_ALLOC(gm->rf,        sizeof(char)    * (allocM+2)); /* yes, +2: each is (0)1..M, +trailing \0  */
  ESL_ALLOC(gm->mm,        sizeof(char)    * (allocM+2));
  ESL_ALLOC(gm->cs,        sizeof(char)    * (allocM+2));
  ESL_ALLOC(gm->consensus, sizeof(char)    * (allocM+2));
  gm->rsc[0] = NULL;
  
  /* level 2 */
  ESL_ALLOC(gm->rsc[0], sizeof(float) * abc->Kp * (allocM+1) * p7P_NR);
  for (x = 1; x < abc->Kp; x++) 
    gm->rsc[x] = gm->rsc[0] + x * (allocM+1) * p7P_NR;

  /* Initialization of tsc[0], including removal of I0.  tsc[k-1,LM],tsc[k-1,GM] will be configured + overwritten later */
  esl_vec_FSet(gm->tsc, p7P_NTRANS, -eslINFINITY);  
  /* tsc[M] initialized and Im removed when we know actual M : see modelconfig.c */

  for (x = 0; x < abc->Kp; x++) {        
    P7P_MSC(gm, 0, x) = -eslINFINITY;                  /* no emissions from nonexistent M_0... */
    P7P_ISC(gm, 0, x) = -eslINFINITY;                  /* nor I_0...                           */
    /* I_M is initialized in profile config, when we know actual M, not just allocated max M   */
  }
  x = esl_abc_XGetGap(abc);	                       /* no emission can emit/score gap characters */
  esl_vec_FSet(gm->rsc[x], (allocM+1)*p7P_NR, -eslINFINITY);
  x = esl_abc_XGetMissing(abc);	                       /* no emission can emit/score missing data characters */
  esl_vec_FSet(gm->rsc[x], (allocM+1)*p7P_NR, -eslINFINITY);


  /* Set remaining info  */
  gm->M                = 0;
  gm->allocM           = allocM;
  gm->L                = -1;  	   /* "unset" flag */
  gm->nj               = -1.0f;    /* "unset" flag */
  gm->pglocal          = -1.0f;    /* "unset" flag */

  gm->roff             = -1;
  gm->eoff             = -1;
  gm->offs[p7_MOFFSET] = -1;
  gm->offs[p7_FOFFSET] = -1;
  gm->offs[p7_POFFSET] = -1;

  gm->name             = NULL;
  gm->acc              = NULL;
  gm->desc             = NULL;
  gm->rf[0]            = 0;     /* RF line is optional annotation; this flags that it's not set yet */
  gm->mm[0]            = 0;     /* likewise for MM annotation line */
  gm->cs[0]            = 0;     /* likewise for CS annotation line */
  gm->consensus[0]     = 0;
  
  for (x = 0; x < p7_NEVPARAM; x++) gm->evparam[x] = p7_EVPARAM_UNSET;
  for (x = 0; x < p7_NCUTOFFS; x++) gm->cutoff[x]  = p7_CUTOFF_UNSET;
  for (x = 0; x < p7_MAXABET;  x++) gm->compo[x]   = p7_COMPO_UNSET;

  gm->max_length  = -1;		/* "unset" */
  gm->abc         = abc;

  return gm;

 ERROR:
  p7_profile_Destroy(gm);
  return NULL;
}
示例#2
0
static int
make_digital_msa(ESL_SQ **sq, const ESL_MSA *premsa, P7_TRACE **tr, int nseq, const int *matuse, const int *matmap, int M, int alen, int optflags, ESL_MSA **ret_msa)
{
  const ESL_ALPHABET *abc = (sq == NULL) ? premsa->abc : sq[0]->abc;
  ESL_MSA      *msa = NULL;
  int           idx;
  int           apos;
  int           z;
  int           status;

  if ((msa = esl_msa_CreateDigital(abc, nseq, alen)) == NULL) { status = eslEMEM; goto ERROR;  }
  
  for (idx = 0; idx < nseq; idx++)
    {
      msa->ax[idx][0]      = eslDSQ_SENTINEL;
      for (apos = 1; apos <= alen; apos++) msa->ax[idx][apos] = esl_abc_XGetGap(abc);
      msa->ax[idx][alen+1] = eslDSQ_SENTINEL;

      apos = 1;
      for (z = 0; z < tr[idx]->N; z++)
	{
	  switch (tr[idx]->st[z]) {
	  case p7T_M:
	    msa->ax[idx][matmap[tr[idx]->k[z]]] = get_dsq_z(sq, premsa, tr, idx, z);
	    apos = matmap[tr[idx]->k[z]] + 1;
	    break;

	  case p7T_D:
	    if (matuse[tr[idx]->k[z]]) /* bug h77: if all col is deletes, do nothing; do NOT overwrite a column */
	      msa->ax[idx][matmap[tr[idx]->k[z]]] = esl_abc_XGetGap(abc); /* overwrites ~ in Dk column on X->Dk */
	    apos = matmap[tr[idx]->k[z]] + 1;
	    break;

	  case p7T_I:
	    if ( !(optflags & p7_TRIM) || (tr[idx]->k[z] != 0 && tr[idx]->k[z] != M)) {
	      msa->ax[idx][apos] = get_dsq_z(sq, premsa, tr, idx, z);
	      apos++;
	    }
	    break;
	    
	  case p7T_N:
	  case p7T_C:
	    if (! (optflags & p7_TRIM) && tr[idx]->i[z] > 0) {
	      msa->ax[idx][apos] = get_dsq_z(sq, premsa, tr, idx, z);
	      apos++;
	    }
	    break;
	    
	  case p7T_E:
	    apos = matmap[M]+1;	/* set position for C-terminal tail */
	    break;
	    
	  case p7T_X: 
	    /* Mark fragments (B->X and X->E containing core traces): 
	     * convert flanks from gaps to ~ 
	     */
	    if (tr[idx]->st[z-1] == p7T_B)
	      { /* B->X leader. This is a core trace and a fragment. Convert leading gaps to ~ */
		/* to set apos for an initial Ik: peek at next state for B->X->Ik; superfluous for ->{DM}k: */
		for (apos = 1; apos <= matmap[tr[idx]->k[z+1]]; apos++)
		  msa->ax[idx][apos] = esl_abc_XGetMissing(abc);
		/* tricky! apos is now exactly where it needs to be for X->Ik. all other cases except B->X->Ik set their own apos */
	      }
	    else if (tr[idx]->st[z+1] == p7T_E) 
	      { /* X->E trailer. This is a core trace and a fragment. Convert trailing gaps to ~ */
		/* don't need to set apos for trailer. There can't be any more residues in a core trace once we hit X->E */
		for (; apos <= alen; apos++)
		  msa->ax[idx][apos] = esl_abc_XGetMissing(abc);
	      }
	    else ESL_XEXCEPTION(eslECORRUPT, "make_digital_msa(): X state in unexpected position in trace"); 
	      
	    break;

	  default:
	    break;
	  }
	}
    }

  msa->nseq = nseq;
  msa->alen = alen;
  *ret_msa = msa;
  return eslOK;

 ERROR:
  if (msa) esl_msa_Destroy(msa);
  *ret_msa = NULL;
  return status;
}