Пример #1
0
/* Function:  cp9_Copy()
 * Synopsis:  Copy a CM plan 9 HMM.
 *
 * Purpose:   Copies cp9 hmm <src> to cp9 hmm <dst>, where <dst>
 *            has already been allocated to be of sufficient size.
 *
 *            <src> should be properly normalized, no check is done to
 *            ensure that. If <src> is logoddsified (src->flags &
 *            CPLAN9_HASBITS) its bit scores will be copied to <dst>,
 *            otherwise they are invalid and won't be copied.
 *
 * Returns:   <eslOK> on success.
 *
 * Throws:    <eslEMEM> on allocation error; <eslEINVAL> if <dst> is too small
 *            to fit <src>.
 */
int
cp9_Copy(const CP9_t *src, CP9_t *dst)
{
    int status;
    int k;
    int src_has_bits = (src->flags & CPLAN9_HASBITS) ? TRUE : FALSE;

    if (src->M != dst->M) return eslEINVAL;

    dst->abc = src->abc;

    for(k = 0; k <= src->M; k++) {
        esl_vec_FCopy(src->t[k],   cp9_NTRANS,  dst->t[k]);
        esl_vec_FCopy(src->mat[k], src->abc->K, dst->mat[k]);
        esl_vec_FCopy(src->ins[k], src->abc->K, dst->ins[k]);
    }
    esl_vec_FCopy(src->begin, src->M+1, dst->begin);
    esl_vec_FCopy(src->end,   src->M+1, dst->end);
    if(src_has_bits) {
        esl_vec_ICopy(src->bsc_mem, src->M+1, dst->bsc_mem);
        esl_vec_ICopy(src->esc_mem, src->M+1, dst->esc_mem);
    }

    /* exploit linear-memory of these 2d arrays */
    if(src_has_bits) {
        esl_vec_ICopy(src->tsc_mem, cp9_NTRANS   * (src->M+1), dst->tsc_mem);
        esl_vec_ICopy(src->msc_mem, src->abc->Kp * (src->M+1), dst->msc_mem);
        esl_vec_ICopy(src->isc_mem, src->abc->Kp * (src->M+1), dst->isc_mem);
        esl_vec_ICopy(src->otsc,    cp9O_NTRANS  * (src->M+1), dst->otsc);
    }

    /* EL info */
    dst->el_self     = src->el_self;
    dst->el_selfsc   = src->el_selfsc;
    esl_vec_ICopy(src->has_el,     src->M+1,    dst->has_el);
    esl_vec_ICopy(src->el_from_ct, src->M+2,    dst->el_from_ct);
    for(k = 0; k <= src->M+1; k++) {
        if(src->el_from_ct[k] > 0) {
            ESL_ALLOC(dst->el_from_idx[k],  sizeof(int) * src->el_from_ct[k]);
            ESL_ALLOC(dst->el_from_cmnd[k], sizeof(int) * src->el_from_ct[k]);
            esl_vec_ICopy(src->el_from_idx[k],  src->el_from_ct[k], dst->el_from_idx[k]);
            esl_vec_ICopy(src->el_from_cmnd[k], src->el_from_ct[k], dst->el_from_cmnd[k]);
        }
    }

    dst->null2_omega = src->null2_omega;
    dst->null3_omega = src->null3_omega;
    esl_vec_FCopy(src->null, src->abc->K, dst->null);

    dst->p1    = src->p1;
    dst->flags = src->flags;

    return eslOK;

ERROR:
    return status;
}
Пример #2
0
/* Function:  esl_ct2simplewuss()
 * Incept:    ER, Wed Aug 22 13:31:54 EDT 2012 [Janelia]
 *
 * Purpose:   Convert a CT array <ct> for <n> residues (1..n) to a simple WUSS
 *            format string <ss>. <ss> must be allocated for at least
 *            n+1 chars (+1 for the terminal NUL). 
 *
 *            This function can be used with the <ct> of a secondary
 *            structure including arbitrary pseudoknots, or for the 
 *            <ct> or a tertiary structure (say cWH, tWH, cSS,... H bonds). 
 *
 *            The string <ss> has basepairs annotated as <>, Aa, Bb, ..., Zz;
 *            unpaired bases are annotated as '.'.
 *
 *            Attemting to convert a <ct> that requires more letters
 *            than [A-Z] will return an <eslEINVAL> error.
 *
 *            Attempting to convert a <ct> that involves triplet interactions
 *            will return an <eslEINVAL> error.
 *
 * Returns:   <eslOK> on success.
 * 
 * Throws:    <eslEMEM> on allocation failure.
 *            <eslEINCONCEIVABLE> on internal failure.
 */
int
esl_ct2simplewuss(int *ct, int n, char *ss)
{
  int        rb[26];                /* array that delimits the right bound of a pseudoknot character */
  ESL_STACK *pda    = NULL;         /* stack for "main" secondary structure */
  ESL_STACK *auxpk  = NULL;	    /* aux stack for pseudoknot */
  int       *cct    = NULL;         /* copy of ct vector */
  int        leftbound, rightbound; /* left and right bound to find basepairs belonging to a given pseudoknot */
  int        xpk = 0;               /* number of pseudoknot chararactes used */
  int        npk = 0;               /* number of pseudoknots */
  int        npairs = 0;            /* total number of basepairs */
  int        npairs_reached = 0;    /* number of basepairs found so far */
  int        found_partner;         /* true if we've found left partner of a given base in stack pda */
  int        i,j,k;                 /* sequence indices */
  int        x;                     /* index for pseudoknot characters */
  int        status = eslEMEM;	    /* exit status 'til proven otherwise */

  /* total number of basepairs */
  for (j = 1; j <= n; j ++) { if (ct[j] > 0 && j < ct[j]) npairs ++; }
  
  /* Copy of ct; if a pseudoknotted structure, cct will be modified later.
   */
  ESL_ALLOC(cct, sizeof(int)*(n+1));
  esl_vec_ICopy(ct, (n+1), cct);
  
  /* Initialize rightbounds for all 26 pseudoknot indices */
  for (x = 0; x < 26; x ++) rb[x] = -1;

  /* init ss[] to single stranded */
  for (j = 0; j < n; j ++) { ss[j] = '.'; }  
  ss[n] = '\0'; 
 
  /* Initialization*/
  if ((pda   = esl_stack_ICreate()) == NULL) goto FINISH;
  if ((auxpk = esl_stack_ICreate()) == NULL) goto FINISH;
  
  for (j = 1; j <= n; j++)
    {
      if (cct[j] == 0)	/* unpaired: push j. */
	{
	  if (esl_stack_IPush(pda, j) != eslOK) goto FINISH;
	}
      else if (cct[j] > j) /* left side of a bp: push j. */
	{
	  if (esl_stack_IPush(pda, j) != eslOK) goto FINISH;
	}
      else   /* right side of a bp; main routine: fingh the left partner */
	{
	  found_partner = FALSE;

	  /* Pop back until we find the left partner of j;
	   * In case this is not a nested structure, finding
	   * the left partner of j will require to put bases 
	   * aside into stack auxpk.
	   */	 
	  while (esl_stack_ObjectCount(pda)) 
	    {
	      if (esl_stack_IPop(pda, &i) != eslOK) goto FINISH;
	      
	      if (cct[i] == j)  /* we found the i,j pair. */
		{
		  found_partner = TRUE;
		  npairs_reached ++;	

		  ss[i-1] = '<';
		  ss[j-1] = '>';
		  break;
		}
	      
	      else if (cct[i] == 0) 
		{
		  if (ct[i] == 0) ss[i-1] = '.';
		}

	      else /* cct[i]>0, != j: i is paired, but not to j: pseudoknot! */
		{
		  /* i is in the way to find j's left partner. 
		   * Move i to stack auxpk; resolve pseudoknot(s) after we've found partern for j.
		   */ 
		  if (esl_stack_IPush(auxpk, i) != eslOK) goto FINISH;
		}
	    } 
	  
	  if (!found_partner) {
	    esl_stack_Destroy(pda); esl_stack_Destroy(auxpk); free(cct); 
	    ESL_EXCEPTION(eslEINVAL, "Cannot find left partner (%d) of base %d. Likely a triplet", ct[j], j);
	  }
	} /* finished finding the left partner of j */
      
      /* After we've found the left partner of j, resolve pks found along the way.
       * Then, remove the pseudoknotted based from cct so we can find the rest of the structure.
       */
      if (esl_stack_ObjectCount(auxpk)) {

	/* init for first pseudoknot */
	leftbound  = cct[j];
	rightbound = leftbound + 1;
	xpk        = -1;            /* start with 'A' if possible again */

	while (esl_stack_IPop(auxpk, &i) == eslOK) {

	  for (k = rightbound-1; k > leftbound; k --) 
	    {
	      if      (cct[k] == 0)          { continue; } 
	      else if (cct[k] >  rightbound) { continue; } 
	      else if (cct[k] == i)          { break; }                  /* i continues the given pseudoknot */
	      else                           { k = leftbound; break; }   /* a new pseudoknot */		    		
	    }
	  
	  if (k == leftbound) /* a new pseudoknot */
	    {
	      npk ++;
	      xpk ++;
	      /* figure out if we can use this alphabet index, or bump it up if necessary */
	      while (i < rb[xpk]) { xpk ++; }
	      
	      leftbound  = (rightbound < cct[i])? rightbound : cct[j];
	      rightbound = cct[i];
	    }
	      
	  npairs_reached ++;
	  if (xpk+(int)('a') <= (int)('z')) {

	    /* update the rightbound of this pk index if necessary */
	    if (cct[i] > rb[xpk]) rb[xpk] = cct[i];
	    
	    /* Add pk indices for this basepair */
	    ss[i-1]      = (char)(xpk+(int)('A'));
	    ss[cct[i]-1] = (char)(xpk+(int)('a'));
	    
	    /* remove pseudoknotted pair from cct */
	    cct[i]     = 0;
	    cct[ct[i]] = 0;
	  }
	  else  ESL_EXCEPTION(eslEINVAL, "Don't have enough letters to describe all different pseudoknots.");	      
	    	  
	} 	
      } /* while there is something in auxpk stack */

    } /* finished loop over j: end position on seq, 1..n*/ 
  
  status = eslOK;

 ERROR:
 FINISH:
  if (npairs != npairs_reached) 		  
    ESL_EXCEPTION(eslFAIL, "found %d out of %d pairs.", npairs_reached, npairs);
  if (pda   != NULL) esl_stack_Destroy(pda);
  if (auxpk != NULL) esl_stack_Destroy(auxpk);
  if (cct   != NULL) free(cct);
  return status;
}
Пример #3
0
/* Function:  esl_ct2wuss()
 * Incept:    SRE, Wed Feb 16 11:22:53 2005 [St. Louis]
 *
 * Purpose:   Convert a CT array <ct> for <n> residues (1..n) to a WUSS
 *            format string <ss>. <ss> must be allocated for at least
 *            n+1 chars (+1 for the terminal NUL). 
 *
 *            ER, Sat Aug 18 13:22:03 EDT 2012 
 *            esl\_ct2wuss() extended to deal with pseudoknots structures.
 *            Pseudoknots are annotated as AA...aa, BB...bb,..., ZZ..zz.
 *            Attemting to convert a <ct> that requires more letters
 *            than [A-Z] will return an <eslEINVAL> error.
 *
 *            Attempting to convert a <ct> that involves triplet interactions
 *            will return an <eslEINVAL> error.
 *
 * Returns:   <eslOK> on success.
 * 
 * Throws:    <eslEMEM> on allocation failure.
 *            <eslEINCONCEIVABLE> on internal failure.
 */
int
esl_ct2wuss(int *ct, int n, char *ss)
{
  int        rb[26];                /* array that delimits the right bound of a pseudoknot character */
  ESL_STACK *pda    = NULL;         /* stack for "main" secondary structure */
  ESL_STACK *auxpk  = NULL;	    /* aux stack for pseudoknot */
  ESL_STACK *auxss  = NULL;	    /* aux stack for single stranded */
  int       *cct    = NULL;         /* copy of ct vector */
  int        nfaces;                /* number of faces in a cWW structure */
  int        minface;               /* max depth of faces in a cWW structure */
  int        leftbound, rightbound; /* left and right bound to find basepairs belonging to a given pseudoknot */
  int        xpk = 0;               /* number of pseudoknot chararactes used */
  int        npk = 0;               /* number of pseudoknots */
  int        npairs = 0;            /* total number of basepairs */
  int        npairs_reached = 0;    /* number of basepairs found so far */
  int        found_partner;         /* true if we've found left partner of a given base in stack pda */
  int        i,j,k;                 /* sequence indices */
  int        x;                     /* index for pseudoknot characters */
  int        status = eslEMEM;	    /* exit status 'til proven otherwise */

  /* total number of basepairs */
  for (j = 1; j <= n; j ++) { if (ct[j] > 0 && j < ct[j]) npairs ++; }
  
  /* Copy of ct; if a pseudoknotted structure, cct will be modified later.
   */
  ESL_ALLOC(cct, sizeof(int)*(n+1));
  esl_vec_ICopy(ct, (n+1), cct);
  
  /* Initialize rightbounds for all 26 pseudoknot indices */
  for (x = 0; x < 26; x ++) rb[x] = -1;

  /* init ss[] to single stranded */
  for (j = 0; j < n; j ++) { ss[j] = ':'; }  
  ss[n] = '\0'; 
 
  /* Initialization*/
  if ((pda   = esl_stack_ICreate()) == NULL) goto FINISH;
  if ((auxpk = esl_stack_ICreate()) == NULL) goto FINISH;
  if ((auxss = esl_stack_ICreate()) == NULL) goto FINISH;
  
  for (j = 1; j <= n; j++)
    {
      if (cct[j] == 0)	/* unpaired: push j. */
	{
	  if (esl_stack_IPush(pda, j) != eslOK) goto FINISH;
	}
      else if (cct[j] > j) /* left side of a bp: push j. */
	{
	  if (esl_stack_IPush(pda, j) != eslOK) goto FINISH;
	}
      else   /* right side of a bp; main routine: fingh the left partner */
	{
	  found_partner = FALSE;
	  /* Pop back until we find the left partner of j;
	   * In case this is not a nested structure, finding
	   * the left partner of j will require to put bases 
	   * aside into stack auxpk.
	   *
	   * After we find the left partner of j,
	   * store single stranded residues in auxss;
	   * keep track of #faces and the maximum face depth.
	   */
	  nfaces  = 0;
	  minface = -1;
	 
	  while (esl_stack_ObjectCount(pda)) 
	    {
	      if (esl_stack_IPop(pda, &i) != eslOK) goto FINISH;
	      
	      if (i < 0) 		/* a face counter */
		{
		  nfaces++;
		  if (i < minface) minface = i;
		}

	      else if (cct[i] == j)  /* we found the i,j pair. */
		{
		  found_partner = TRUE;
		  npairs_reached ++;	
		  /* Now we know i,j pair; and we know how many faces are
		   * above them; and we know the max depth of those faces.
		   * That's enough to label the pair in WUSS notation.
		   * if nfaces == 0, minface is -1; <> a closing bp of a hairpin.
		   * if nfaces == 1, inherit minface, we're continuing a stem.
		   * if nfaces > 1, bump minface in depth; we're closing a bifurc.
		   */
		  if (nfaces > 1 && minface > -4) minface--;
		  switch (minface) {
		  case -1: ss[i-1] = '<'; ss[j-1] = '>'; break;
		  case -2: ss[i-1] = '('; ss[j-1] = ')'; break;
		  case -3: ss[i-1] = '['; ss[j-1] = ']'; break;
		  case -4: ss[i-1] = '{'; ss[j-1] = '}'; break;
		  default:
		    esl_stack_Destroy(pda); esl_stack_Destroy(auxpk); esl_stack_Destroy(auxss); free(cct); 
		    ESL_EXCEPTION(eslEINCONCEIVABLE, "no such face code");
		  }
		  if (esl_stack_IPush(pda, minface) != eslOK) goto FINISH;
		  
		  /* Now, aux contains all the unpaired residues we need to label,
		   * according to the # of faces "above" them:
		   *  nfaces = 0: hairpin loop
		   *  nfaces = 1: bulge or interior loop
		   *  nfaces > 1: multifurc
		   */
		  while (esl_stack_IPop(auxss, &i) == eslOK)
		    {
		      switch (nfaces) {
			
		      case 0:  ss[i-1] = '_'; break;
		      case 1:  ss[i-1] = '-'; break;
		      default: ss[i-1] = ','; break; /* nfaces > 1 */
		      }
		    }
		  break;
		}
	      
	      else if (cct[i] == 0) 
		{
		  /* add to auxss only if originally sigle stranded */
		  if (ct[i] == 0) { if (esl_stack_IPush(auxss, i) != eslOK) goto FINISH; }
		}

	      else /* cct[i]>0, != j: i is paired, but not to j: pseudoknot! */
		{
		  /* i is in the way to find j's left partner. 
		   * Move i to stack auxpk; resolve pseudoknot(s) after we've found partern for j.
		   */ 
		  if (esl_stack_IPush(auxpk, i) != eslOK) goto FINISH;
		}
	    } 
	  
	  if (!found_partner) {
	    esl_stack_Destroy(pda); esl_stack_Destroy(auxpk); esl_stack_Destroy(auxss); free(cct); 
	    ESL_EXCEPTION(eslEINVAL, "Cannot find left partner (%d) of base %d. Likely a triplet", ct[j], j);
	  }
	} /* finished finding the left partner of j */
      
      /* After we've found the left partner of j, resolve pks found along the way.
       * Then, remove the pseudoknotted based from cct so we can find the rest of the structure.
       */
      if (esl_stack_ObjectCount(auxpk)) {

	/* init for first pseudoknot */
	leftbound  = cct[j];
	rightbound = leftbound + 1;
	xpk        = -1;            /* start with 'A' if possible again */

	while (esl_stack_IPop(auxpk, &i) == eslOK) {

	  for (k = rightbound-1; k > leftbound; k --) 
	    {
	      if      (cct[k] == 0)          { continue; } 
	      else if (cct[k] >  rightbound) { continue; } 
	      else if (cct[k] == i)          { break; }                  /* i continues the given pseudoknot */
	      else                           { k = leftbound; break; }   /* a new pseudoknot */		    		
	    }
	  
	  if (k == leftbound) /* a new pseudoknot */
	    {
	      npk ++;
	      xpk ++;
	      /* figure out if we can use this alphabet index, or bump it up if necessary */
	      while (i < rb[xpk]) { xpk ++; }
	      
	      leftbound  = (rightbound < cct[i])? rightbound : cct[j];
	      rightbound = cct[i];
	    }
	      
	  npairs_reached ++;
	  if (xpk+(int)('a') <= (int)('z')) {

	    /* update the rightbound of this pk index if necessary */
	    if (cct[i] > rb[xpk]) rb[xpk] = cct[i];
	    
	    /* Add pk indices for this basepair */
	    ss[i-1]      = (char)(xpk+(int)('A'));
	    ss[cct[i]-1] = (char)(xpk+(int)('a'));
	    
	    /* remove pseudoknotted pair from cct */
	    cct[i]     = 0;
	    cct[ct[i]] = 0;
	  }
	  else  ESL_EXCEPTION(eslEINVAL, "Don't have enough letters to describe all different pseudoknots.");	      
	    	  
	} 	
      } /* while there is something in auxpk stack */

    } /* finished loop over j: end position on seq, 1..n*/ 
  
  status = eslOK;

 ERROR:
 FINISH:
  if (npairs != npairs_reached) 		  
    ESL_EXCEPTION(eslFAIL, "found %d out of %d pairs.", npairs_reached, npairs);
  if (pda   != NULL) esl_stack_Destroy(pda);
  if (auxpk != NULL) esl_stack_Destroy(auxpk);
  if (auxss != NULL) esl_stack_Destroy(auxss);
  if (cct   != NULL) free(cct);
  return status;
}