void output_clusters(ESL_GETOPTS *go, ESL_TREE *T, ESL_KEYHASH *kh) { ESL_STACK *ns = esl_stack_ICreate(); ESL_STACK *s2 = esl_stack_ICreate(); double threshold = esl_opt_GetReal(go, "-x"); int v,v2; int nc = 0; esl_stack_IPush(ns, 0); while (esl_stack_IPop(ns, &v) == eslOK) { /* v may only be an internal node. */ if (T->ld[v] < threshold) { nc++; printf("Cluster %d: %g\n", nc, T->ld[v]); esl_stack_IPush(s2, T->right[v]); esl_stack_IPush(s2, T->left[v]); while (esl_stack_IPop(s2, &v2) == eslOK) { if (v2 <= 0) printf("= %s \t%d\t%g\n", esl_keyhash_Get(kh, -v2), nc, T->ld[v]); else { esl_stack_IPush(s2, T->right[v2]); esl_stack_IPush(s2, T->left[v2]); } } printf("\n\n"); continue; } if (T->right[v] > 0) esl_stack_IPush(ns, T->right[v]); else printf("Singleton:\n= %s\t%c\t%c\n\n", esl_keyhash_Get(kh, -T->right[v]), '-', '-'); if (T->left[v] > 0) esl_stack_IPush(ns, T->left[v]); else printf("Singleton:\n= %s\t%c\t%c\n\n", esl_keyhash_Get(kh, -T->left[v]), '-', '-'); } esl_stack_Destroy(ns); esl_stack_Destroy(s2); }
/* Function: esl_wuss2ct() * Incept: SRE, Tue Feb 15 08:44:54 2005 [St. Louis] * * Purpose: Given a secondary structure string <ss>, <0..len-1>, * in WUSS notation, convert it to a CT array, <1..len>, * in <ct>. Caller provides a <ct> allocated for at least * <len+1> ints. <ct[i]> is the position that residue i * base pairs to, or 0 if i is unpaired. <ct[0]> is undefined * (but if you care: it is set to 0). * * WUSS notation is interpreted loosely here, as input * WUSS. Any matching bracket pair or upper/lower case * alphabetic pair is interpreted as a base pair; any other * WUSS annotation is interpreted as unpaired. * * Returns: <eslOK> on success. Returns <eslESYNTAX> if the WUSS * string isn't valid. * * Throws: <eslEMEM> on allocation failure. */ int esl_wuss2ct(char *ss, int len, int *ct) { ESL_STACK *pda[27]; /* 1 secondary structure + up to 26 levels of pk's */ int i; int pos, pair; int status; /* success or failure return status */ /* Initialization: always initialize the main pda (0); * we'll init the pk pda's on demand. */ if ((pda[0] = esl_stack_ICreate()) == NULL) goto FINISH; for (i = 1; i <= 26; i++) pda[i] = NULL; for (pos = 0; pos <= len; pos++) ct[pos] = 0; for (pos = 1; pos <= len; pos++) { if (!isprint((int) ss[pos-1])) /* armor against garbage */ { status = eslESYNTAX; goto FINISH; } /* left side of a pair: push position onto stack 0 (pos = 1..L) */ else if (ss[pos-1] == '<' || ss[pos-1] == '(' || ss[pos-1] == '[' || ss[pos-1] == '{') { if ((status = esl_stack_IPush(pda[0], pos)) != eslOK) goto FINISH; } /* right side of a pair; resolve pair; check for agreement */ else if (ss[pos-1] == '>' || ss[pos-1] == ')' || ss[pos-1] == ']' || ss[pos-1] == '}') { if (esl_stack_IPop(pda[0], &pair) == eslEOD) { status = eslESYNTAX; goto FINISH;} /* no closing bracket */ else if ((ss[pair-1] == '<' && ss[pos-1] != '>') || (ss[pair-1] == '(' && ss[pos-1] != ')') || (ss[pair-1] == '[' && ss[pos-1] != ']') || (ss[pair-1] == '{' && ss[pos-1] != '}')) { status = eslESYNTAX; goto FINISH; } /* brackets don't match */ else { ct[pos] = pair; ct[pair] = pos; } } /* same stuff for pseudoknots */ else if (isupper((int) ss[pos-1])) { /* Create the PK stacks on demand. */ i = ss[pos-1] - 'A' + 1; if (pda[i] == NULL) if ((pda[i] = esl_stack_ICreate()) == NULL) { status = eslEMEM; goto FINISH; } if ((status = esl_stack_IPush(pda[i], pos)) != eslOK) goto FINISH; } else if (islower((int) ss[pos-1])) { i = ss[pos-1] - 'a' + 1; if (pda[i] == NULL || esl_stack_IPop(pda[i], &pair) == eslEOD) { status = eslESYNTAX; goto FINISH;} else { ct[pos] = pair; ct[pair] = pos; } } else if (strchr(":,_-.~", ss[pos-1]) == NULL) { status = eslESYNTAX; goto FINISH; } /* bogus character */ } status = eslOK; FINISH: for (i = 0; i <= 26; i++) if (pda[i] != NULL) { /* nothing should be left on stacks */ if (esl_stack_ObjectCount(pda[i]) != 0) status = eslESYNTAX; esl_stack_Destroy(pda[i]); } return status; }
/* Function: esl_ct2wuss() * Incept: SRE, Wed Feb 16 11:22:53 2005 [St. Louis] * * Purpose: Convert a CT array <ct> for <n> residues (1..n) to a WUSS * format string <ss>. <ss> must be allocated for at least * n+1 chars (+1 for the terminal NUL). * * Currently limited to nonpseudoknotted structures. Attempting * to convert a pseudoknot-containing <ct> will return an * <eslEINVAL> error. * * Returns: <eslOK> on success. * <eslEINVAL> if <ct> contains a pseudoknot. * * Throws: <eslEMEM> on allocation failure. * <eslEINCONCEIVABLE> on internal failure. */ int esl_ct2wuss(int *ct, int n, char *ss) { ESL_STACK *pda = NULL; /* main stack */ ESL_STACK *aux = NULL; /* aux storage */ int status = eslEMEM; /* exit status 'til proven otherwise */ int i,j; int nfaces; int minface; ss[0] = '\0'; /* in case we abort, and caller does something dumb w/ ss */ if ((pda = esl_stack_ICreate()) == NULL) goto FINISH; if ((aux = esl_stack_ICreate()) == NULL) goto FINISH; for (j = 1; j <= n; j++) { if (ct[j] == 0) /* unpaired: push j. */ { if (esl_stack_IPush(pda, j) != eslOK) goto FINISH; } else if (ct[j] > j) /* left side of a bp: push j. */ { if (esl_stack_IPush(pda, j) != eslOK) goto FINISH; } else /* right side of a bp; main routine: resolve a subseq */ { /* Pop back until we find the left partner of i; * store SS residues in aux; * keep track of #faces and the maximum face depth. */ nfaces = 0; minface = -1; while (1) { if (esl_stack_IPop(pda, &i) != eslOK) goto FINISH; if (i < 0) /* a face counter */ { nfaces++; if (i < minface) minface = i; } else if (ct[i] == j) break; /* we found the i,j pair. */ else if (ct[i] == 0) { if (esl_stack_IPush(aux, i) != eslOK) goto FINISH; } else /* ct[i]>0, != j: i is paired, but not to j: pseudoknot! */ { esl_stack_Destroy(pda); esl_stack_Destroy(aux); ESL_EXCEPTION(eslEINVAL, "pseudoknots not permitted yet"); } } /* Now we know i,j pair; and we know how many faces are * above them; and we know the max depth of those faces. * That's enough to label the pair in WUSS notation. * if nfaces == 0, minface is -1; <> a closing bp of a hairpin. * if nfaces == 1, inherit minface, we're continuing a stem. * if nfaces > 1, bump minface in depth; we're closing a bifurc. */ if (nfaces > 1 && minface > -4) minface--; switch (minface) { case -1: ss[i-1] = '<'; ss[j-1] = '>'; break; case -2: ss[i-1] = '('; ss[j-1] = ')'; break; case -3: ss[i-1] = '['; ss[j-1] = ']'; break; case -4: ss[i-1] = '{'; ss[j-1] = '}'; break; default: esl_stack_Destroy(pda); esl_stack_Destroy(aux); ESL_EXCEPTION(eslEINCONCEIVABLE, "no such face code"); } if (esl_stack_IPush(pda, minface) != eslOK) goto FINISH; /* Now, aux contains all the unpaired residues we need to label, * according to the # of faces "above" them: * nfaces = 0: hairpin loop * nfaces = 1: bulge or interior loop * nfaces > 1: multifurc */ while (esl_stack_IPop(aux, &i) == eslOK) { switch (nfaces) { case 0: ss[i-1] = '_'; break; case 1: ss[i-1] = '-'; break; default: ss[i-1] = ','; break; /* nfaces > 1 */ } } } /* finished processing a subseq enclosed by a bp */ } /* finished loop over j: end position on seq, 1..n*/ /* Anything that's left in the pda is either a face counter * or external single-strand. Face counters are negative; * position indices are positive. */ while (esl_stack_IPop(pda, &i) == eslOK) if (i > 0) ss[i-1] = ':'; ss[n] = '\0'; status = eslOK; FINISH: if (pda != NULL) esl_stack_Destroy(pda); if (aux != NULL) esl_stack_Destroy(aux); return status; }
/* Function: esl_ct2simplewuss() * Incept: ER, Wed Aug 22 13:31:54 EDT 2012 [Janelia] * * Purpose: Convert a CT array <ct> for <n> residues (1..n) to a simple WUSS * format string <ss>. <ss> must be allocated for at least * n+1 chars (+1 for the terminal NUL). * * This function can be used with the <ct> of a secondary * structure including arbitrary pseudoknots, or for the * <ct> or a tertiary structure (say cWH, tWH, cSS,... H bonds). * * The string <ss> has basepairs annotated as <>, Aa, Bb, ..., Zz; * unpaired bases are annotated as '.'. * * Attemting to convert a <ct> that requires more letters * than [A-Z] will return an <eslEINVAL> error. * * Attempting to convert a <ct> that involves triplet interactions * will return an <eslEINVAL> error. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. * <eslEINCONCEIVABLE> on internal failure. */ int esl_ct2simplewuss(int *ct, int n, char *ss) { int rb[26]; /* array that delimits the right bound of a pseudoknot character */ ESL_STACK *pda = NULL; /* stack for "main" secondary structure */ ESL_STACK *auxpk = NULL; /* aux stack for pseudoknot */ int *cct = NULL; /* copy of ct vector */ int leftbound, rightbound; /* left and right bound to find basepairs belonging to a given pseudoknot */ int xpk = 0; /* number of pseudoknot chararactes used */ int npk = 0; /* number of pseudoknots */ int npairs = 0; /* total number of basepairs */ int npairs_reached = 0; /* number of basepairs found so far */ int found_partner; /* true if we've found left partner of a given base in stack pda */ int i,j,k; /* sequence indices */ int x; /* index for pseudoknot characters */ int status = eslEMEM; /* exit status 'til proven otherwise */ /* total number of basepairs */ for (j = 1; j <= n; j ++) { if (ct[j] > 0 && j < ct[j]) npairs ++; } /* Copy of ct; if a pseudoknotted structure, cct will be modified later. */ ESL_ALLOC(cct, sizeof(int)*(n+1)); esl_vec_ICopy(ct, (n+1), cct); /* Initialize rightbounds for all 26 pseudoknot indices */ for (x = 0; x < 26; x ++) rb[x] = -1; /* init ss[] to single stranded */ for (j = 0; j < n; j ++) { ss[j] = '.'; } ss[n] = '\0'; /* Initialization*/ if ((pda = esl_stack_ICreate()) == NULL) goto FINISH; if ((auxpk = esl_stack_ICreate()) == NULL) goto FINISH; for (j = 1; j <= n; j++) { if (cct[j] == 0) /* unpaired: push j. */ { if (esl_stack_IPush(pda, j) != eslOK) goto FINISH; } else if (cct[j] > j) /* left side of a bp: push j. */ { if (esl_stack_IPush(pda, j) != eslOK) goto FINISH; } else /* right side of a bp; main routine: fingh the left partner */ { found_partner = FALSE; /* Pop back until we find the left partner of j; * In case this is not a nested structure, finding * the left partner of j will require to put bases * aside into stack auxpk. */ while (esl_stack_ObjectCount(pda)) { if (esl_stack_IPop(pda, &i) != eslOK) goto FINISH; if (cct[i] == j) /* we found the i,j pair. */ { found_partner = TRUE; npairs_reached ++; ss[i-1] = '<'; ss[j-1] = '>'; break; } else if (cct[i] == 0) { if (ct[i] == 0) ss[i-1] = '.'; } else /* cct[i]>0, != j: i is paired, but not to j: pseudoknot! */ { /* i is in the way to find j's left partner. * Move i to stack auxpk; resolve pseudoknot(s) after we've found partern for j. */ if (esl_stack_IPush(auxpk, i) != eslOK) goto FINISH; } } if (!found_partner) { esl_stack_Destroy(pda); esl_stack_Destroy(auxpk); free(cct); ESL_EXCEPTION(eslEINVAL, "Cannot find left partner (%d) of base %d. Likely a triplet", ct[j], j); } } /* finished finding the left partner of j */ /* After we've found the left partner of j, resolve pks found along the way. * Then, remove the pseudoknotted based from cct so we can find the rest of the structure. */ if (esl_stack_ObjectCount(auxpk)) { /* init for first pseudoknot */ leftbound = cct[j]; rightbound = leftbound + 1; xpk = -1; /* start with 'A' if possible again */ while (esl_stack_IPop(auxpk, &i) == eslOK) { for (k = rightbound-1; k > leftbound; k --) { if (cct[k] == 0) { continue; } else if (cct[k] > rightbound) { continue; } else if (cct[k] == i) { break; } /* i continues the given pseudoknot */ else { k = leftbound; break; } /* a new pseudoknot */ } if (k == leftbound) /* a new pseudoknot */ { npk ++; xpk ++; /* figure out if we can use this alphabet index, or bump it up if necessary */ while (i < rb[xpk]) { xpk ++; } leftbound = (rightbound < cct[i])? rightbound : cct[j]; rightbound = cct[i]; } npairs_reached ++; if (xpk+(int)('a') <= (int)('z')) { /* update the rightbound of this pk index if necessary */ if (cct[i] > rb[xpk]) rb[xpk] = cct[i]; /* Add pk indices for this basepair */ ss[i-1] = (char)(xpk+(int)('A')); ss[cct[i]-1] = (char)(xpk+(int)('a')); /* remove pseudoknotted pair from cct */ cct[i] = 0; cct[ct[i]] = 0; } else ESL_EXCEPTION(eslEINVAL, "Don't have enough letters to describe all different pseudoknots."); } } /* while there is something in auxpk stack */ } /* finished loop over j: end position on seq, 1..n*/ status = eslOK; ERROR: FINISH: if (npairs != npairs_reached) ESL_EXCEPTION(eslFAIL, "found %d out of %d pairs.", npairs_reached, npairs); if (pda != NULL) esl_stack_Destroy(pda); if (auxpk != NULL) esl_stack_Destroy(auxpk); if (cct != NULL) free(cct); return status; }
/* Function: esl_ct2wuss() * Incept: SRE, Wed Feb 16 11:22:53 2005 [St. Louis] * * Purpose: Convert a CT array <ct> for <n> residues (1..n) to a WUSS * format string <ss>. <ss> must be allocated for at least * n+1 chars (+1 for the terminal NUL). * * ER, Sat Aug 18 13:22:03 EDT 2012 * esl\_ct2wuss() extended to deal with pseudoknots structures. * Pseudoknots are annotated as AA...aa, BB...bb,..., ZZ..zz. * Attemting to convert a <ct> that requires more letters * than [A-Z] will return an <eslEINVAL> error. * * Attempting to convert a <ct> that involves triplet interactions * will return an <eslEINVAL> error. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. * <eslEINCONCEIVABLE> on internal failure. */ int esl_ct2wuss(int *ct, int n, char *ss) { int rb[26]; /* array that delimits the right bound of a pseudoknot character */ ESL_STACK *pda = NULL; /* stack for "main" secondary structure */ ESL_STACK *auxpk = NULL; /* aux stack for pseudoknot */ ESL_STACK *auxss = NULL; /* aux stack for single stranded */ int *cct = NULL; /* copy of ct vector */ int nfaces; /* number of faces in a cWW structure */ int minface; /* max depth of faces in a cWW structure */ int leftbound, rightbound; /* left and right bound to find basepairs belonging to a given pseudoknot */ int xpk = 0; /* number of pseudoknot chararactes used */ int npk = 0; /* number of pseudoknots */ int npairs = 0; /* total number of basepairs */ int npairs_reached = 0; /* number of basepairs found so far */ int found_partner; /* true if we've found left partner of a given base in stack pda */ int i,j,k; /* sequence indices */ int x; /* index for pseudoknot characters */ int status = eslEMEM; /* exit status 'til proven otherwise */ /* total number of basepairs */ for (j = 1; j <= n; j ++) { if (ct[j] > 0 && j < ct[j]) npairs ++; } /* Copy of ct; if a pseudoknotted structure, cct will be modified later. */ ESL_ALLOC(cct, sizeof(int)*(n+1)); esl_vec_ICopy(ct, (n+1), cct); /* Initialize rightbounds for all 26 pseudoknot indices */ for (x = 0; x < 26; x ++) rb[x] = -1; /* init ss[] to single stranded */ for (j = 0; j < n; j ++) { ss[j] = ':'; } ss[n] = '\0'; /* Initialization*/ if ((pda = esl_stack_ICreate()) == NULL) goto FINISH; if ((auxpk = esl_stack_ICreate()) == NULL) goto FINISH; if ((auxss = esl_stack_ICreate()) == NULL) goto FINISH; for (j = 1; j <= n; j++) { if (cct[j] == 0) /* unpaired: push j. */ { if (esl_stack_IPush(pda, j) != eslOK) goto FINISH; } else if (cct[j] > j) /* left side of a bp: push j. */ { if (esl_stack_IPush(pda, j) != eslOK) goto FINISH; } else /* right side of a bp; main routine: fingh the left partner */ { found_partner = FALSE; /* Pop back until we find the left partner of j; * In case this is not a nested structure, finding * the left partner of j will require to put bases * aside into stack auxpk. * * After we find the left partner of j, * store single stranded residues in auxss; * keep track of #faces and the maximum face depth. */ nfaces = 0; minface = -1; while (esl_stack_ObjectCount(pda)) { if (esl_stack_IPop(pda, &i) != eslOK) goto FINISH; if (i < 0) /* a face counter */ { nfaces++; if (i < minface) minface = i; } else if (cct[i] == j) /* we found the i,j pair. */ { found_partner = TRUE; npairs_reached ++; /* Now we know i,j pair; and we know how many faces are * above them; and we know the max depth of those faces. * That's enough to label the pair in WUSS notation. * if nfaces == 0, minface is -1; <> a closing bp of a hairpin. * if nfaces == 1, inherit minface, we're continuing a stem. * if nfaces > 1, bump minface in depth; we're closing a bifurc. */ if (nfaces > 1 && minface > -4) minface--; switch (minface) { case -1: ss[i-1] = '<'; ss[j-1] = '>'; break; case -2: ss[i-1] = '('; ss[j-1] = ')'; break; case -3: ss[i-1] = '['; ss[j-1] = ']'; break; case -4: ss[i-1] = '{'; ss[j-1] = '}'; break; default: esl_stack_Destroy(pda); esl_stack_Destroy(auxpk); esl_stack_Destroy(auxss); free(cct); ESL_EXCEPTION(eslEINCONCEIVABLE, "no such face code"); } if (esl_stack_IPush(pda, minface) != eslOK) goto FINISH; /* Now, aux contains all the unpaired residues we need to label, * according to the # of faces "above" them: * nfaces = 0: hairpin loop * nfaces = 1: bulge or interior loop * nfaces > 1: multifurc */ while (esl_stack_IPop(auxss, &i) == eslOK) { switch (nfaces) { case 0: ss[i-1] = '_'; break; case 1: ss[i-1] = '-'; break; default: ss[i-1] = ','; break; /* nfaces > 1 */ } } break; } else if (cct[i] == 0) { /* add to auxss only if originally sigle stranded */ if (ct[i] == 0) { if (esl_stack_IPush(auxss, i) != eslOK) goto FINISH; } } else /* cct[i]>0, != j: i is paired, but not to j: pseudoknot! */ { /* i is in the way to find j's left partner. * Move i to stack auxpk; resolve pseudoknot(s) after we've found partern for j. */ if (esl_stack_IPush(auxpk, i) != eslOK) goto FINISH; } } if (!found_partner) { esl_stack_Destroy(pda); esl_stack_Destroy(auxpk); esl_stack_Destroy(auxss); free(cct); ESL_EXCEPTION(eslEINVAL, "Cannot find left partner (%d) of base %d. Likely a triplet", ct[j], j); } } /* finished finding the left partner of j */ /* After we've found the left partner of j, resolve pks found along the way. * Then, remove the pseudoknotted based from cct so we can find the rest of the structure. */ if (esl_stack_ObjectCount(auxpk)) { /* init for first pseudoknot */ leftbound = cct[j]; rightbound = leftbound + 1; xpk = -1; /* start with 'A' if possible again */ while (esl_stack_IPop(auxpk, &i) == eslOK) { for (k = rightbound-1; k > leftbound; k --) { if (cct[k] == 0) { continue; } else if (cct[k] > rightbound) { continue; } else if (cct[k] == i) { break; } /* i continues the given pseudoknot */ else { k = leftbound; break; } /* a new pseudoknot */ } if (k == leftbound) /* a new pseudoknot */ { npk ++; xpk ++; /* figure out if we can use this alphabet index, or bump it up if necessary */ while (i < rb[xpk]) { xpk ++; } leftbound = (rightbound < cct[i])? rightbound : cct[j]; rightbound = cct[i]; } npairs_reached ++; if (xpk+(int)('a') <= (int)('z')) { /* update the rightbound of this pk index if necessary */ if (cct[i] > rb[xpk]) rb[xpk] = cct[i]; /* Add pk indices for this basepair */ ss[i-1] = (char)(xpk+(int)('A')); ss[cct[i]-1] = (char)(xpk+(int)('a')); /* remove pseudoknotted pair from cct */ cct[i] = 0; cct[ct[i]] = 0; } else ESL_EXCEPTION(eslEINVAL, "Don't have enough letters to describe all different pseudoknots."); } } /* while there is something in auxpk stack */ } /* finished loop over j: end position on seq, 1..n*/ status = eslOK; ERROR: FINISH: if (npairs != npairs_reached) ESL_EXCEPTION(eslFAIL, "found %d out of %d pairs.", npairs_reached, npairs); if (pda != NULL) esl_stack_Destroy(pda); if (auxpk != NULL) esl_stack_Destroy(auxpk); if (auxss != NULL) esl_stack_Destroy(auxss); if (cct != NULL) free(cct); return status; }