/* Function: cp9_Copy() * Synopsis: Copy a CM plan 9 HMM. * * Purpose: Copies cp9 hmm <src> to cp9 hmm <dst>, where <dst> * has already been allocated to be of sufficient size. * * <src> should be properly normalized, no check is done to * ensure that. If <src> is logoddsified (src->flags & * CPLAN9_HASBITS) its bit scores will be copied to <dst>, * otherwise they are invalid and won't be copied. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation error; <eslEINVAL> if <dst> is too small * to fit <src>. */ int cp9_Copy(const CP9_t *src, CP9_t *dst) { int status; int k; int src_has_bits = (src->flags & CPLAN9_HASBITS) ? TRUE : FALSE; if (src->M != dst->M) return eslEINVAL; dst->abc = src->abc; for(k = 0; k <= src->M; k++) { esl_vec_FCopy(src->t[k], cp9_NTRANS, dst->t[k]); esl_vec_FCopy(src->mat[k], src->abc->K, dst->mat[k]); esl_vec_FCopy(src->ins[k], src->abc->K, dst->ins[k]); } esl_vec_FCopy(src->begin, src->M+1, dst->begin); esl_vec_FCopy(src->end, src->M+1, dst->end); if(src_has_bits) { esl_vec_ICopy(src->bsc_mem, src->M+1, dst->bsc_mem); esl_vec_ICopy(src->esc_mem, src->M+1, dst->esc_mem); } /* exploit linear-memory of these 2d arrays */ if(src_has_bits) { esl_vec_ICopy(src->tsc_mem, cp9_NTRANS * (src->M+1), dst->tsc_mem); esl_vec_ICopy(src->msc_mem, src->abc->Kp * (src->M+1), dst->msc_mem); esl_vec_ICopy(src->isc_mem, src->abc->Kp * (src->M+1), dst->isc_mem); esl_vec_ICopy(src->otsc, cp9O_NTRANS * (src->M+1), dst->otsc); } /* EL info */ dst->el_self = src->el_self; dst->el_selfsc = src->el_selfsc; esl_vec_ICopy(src->has_el, src->M+1, dst->has_el); esl_vec_ICopy(src->el_from_ct, src->M+2, dst->el_from_ct); for(k = 0; k <= src->M+1; k++) { if(src->el_from_ct[k] > 0) { ESL_ALLOC(dst->el_from_idx[k], sizeof(int) * src->el_from_ct[k]); ESL_ALLOC(dst->el_from_cmnd[k], sizeof(int) * src->el_from_ct[k]); esl_vec_ICopy(src->el_from_idx[k], src->el_from_ct[k], dst->el_from_idx[k]); esl_vec_ICopy(src->el_from_cmnd[k], src->el_from_ct[k], dst->el_from_cmnd[k]); } } dst->null2_omega = src->null2_omega; dst->null3_omega = src->null3_omega; esl_vec_FCopy(src->null, src->abc->K, dst->null); dst->p1 = src->p1; dst->flags = src->flags; return eslOK; ERROR: return status; }
/* Function: esl_ct2simplewuss() * Incept: ER, Wed Aug 22 13:31:54 EDT 2012 [Janelia] * * Purpose: Convert a CT array <ct> for <n> residues (1..n) to a simple WUSS * format string <ss>. <ss> must be allocated for at least * n+1 chars (+1 for the terminal NUL). * * This function can be used with the <ct> of a secondary * structure including arbitrary pseudoknots, or for the * <ct> or a tertiary structure (say cWH, tWH, cSS,... H bonds). * * The string <ss> has basepairs annotated as <>, Aa, Bb, ..., Zz; * unpaired bases are annotated as '.'. * * Attemting to convert a <ct> that requires more letters * than [A-Z] will return an <eslEINVAL> error. * * Attempting to convert a <ct> that involves triplet interactions * will return an <eslEINVAL> error. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. * <eslEINCONCEIVABLE> on internal failure. */ int esl_ct2simplewuss(int *ct, int n, char *ss) { int rb[26]; /* array that delimits the right bound of a pseudoknot character */ ESL_STACK *pda = NULL; /* stack for "main" secondary structure */ ESL_STACK *auxpk = NULL; /* aux stack for pseudoknot */ int *cct = NULL; /* copy of ct vector */ int leftbound, rightbound; /* left and right bound to find basepairs belonging to a given pseudoknot */ int xpk = 0; /* number of pseudoknot chararactes used */ int npk = 0; /* number of pseudoknots */ int npairs = 0; /* total number of basepairs */ int npairs_reached = 0; /* number of basepairs found so far */ int found_partner; /* true if we've found left partner of a given base in stack pda */ int i,j,k; /* sequence indices */ int x; /* index for pseudoknot characters */ int status = eslEMEM; /* exit status 'til proven otherwise */ /* total number of basepairs */ for (j = 1; j <= n; j ++) { if (ct[j] > 0 && j < ct[j]) npairs ++; } /* Copy of ct; if a pseudoknotted structure, cct will be modified later. */ ESL_ALLOC(cct, sizeof(int)*(n+1)); esl_vec_ICopy(ct, (n+1), cct); /* Initialize rightbounds for all 26 pseudoknot indices */ for (x = 0; x < 26; x ++) rb[x] = -1; /* init ss[] to single stranded */ for (j = 0; j < n; j ++) { ss[j] = '.'; } ss[n] = '\0'; /* Initialization*/ if ((pda = esl_stack_ICreate()) == NULL) goto FINISH; if ((auxpk = esl_stack_ICreate()) == NULL) goto FINISH; for (j = 1; j <= n; j++) { if (cct[j] == 0) /* unpaired: push j. */ { if (esl_stack_IPush(pda, j) != eslOK) goto FINISH; } else if (cct[j] > j) /* left side of a bp: push j. */ { if (esl_stack_IPush(pda, j) != eslOK) goto FINISH; } else /* right side of a bp; main routine: fingh the left partner */ { found_partner = FALSE; /* Pop back until we find the left partner of j; * In case this is not a nested structure, finding * the left partner of j will require to put bases * aside into stack auxpk. */ while (esl_stack_ObjectCount(pda)) { if (esl_stack_IPop(pda, &i) != eslOK) goto FINISH; if (cct[i] == j) /* we found the i,j pair. */ { found_partner = TRUE; npairs_reached ++; ss[i-1] = '<'; ss[j-1] = '>'; break; } else if (cct[i] == 0) { if (ct[i] == 0) ss[i-1] = '.'; } else /* cct[i]>0, != j: i is paired, but not to j: pseudoknot! */ { /* i is in the way to find j's left partner. * Move i to stack auxpk; resolve pseudoknot(s) after we've found partern for j. */ if (esl_stack_IPush(auxpk, i) != eslOK) goto FINISH; } } if (!found_partner) { esl_stack_Destroy(pda); esl_stack_Destroy(auxpk); free(cct); ESL_EXCEPTION(eslEINVAL, "Cannot find left partner (%d) of base %d. Likely a triplet", ct[j], j); } } /* finished finding the left partner of j */ /* After we've found the left partner of j, resolve pks found along the way. * Then, remove the pseudoknotted based from cct so we can find the rest of the structure. */ if (esl_stack_ObjectCount(auxpk)) { /* init for first pseudoknot */ leftbound = cct[j]; rightbound = leftbound + 1; xpk = -1; /* start with 'A' if possible again */ while (esl_stack_IPop(auxpk, &i) == eslOK) { for (k = rightbound-1; k > leftbound; k --) { if (cct[k] == 0) { continue; } else if (cct[k] > rightbound) { continue; } else if (cct[k] == i) { break; } /* i continues the given pseudoknot */ else { k = leftbound; break; } /* a new pseudoknot */ } if (k == leftbound) /* a new pseudoknot */ { npk ++; xpk ++; /* figure out if we can use this alphabet index, or bump it up if necessary */ while (i < rb[xpk]) { xpk ++; } leftbound = (rightbound < cct[i])? rightbound : cct[j]; rightbound = cct[i]; } npairs_reached ++; if (xpk+(int)('a') <= (int)('z')) { /* update the rightbound of this pk index if necessary */ if (cct[i] > rb[xpk]) rb[xpk] = cct[i]; /* Add pk indices for this basepair */ ss[i-1] = (char)(xpk+(int)('A')); ss[cct[i]-1] = (char)(xpk+(int)('a')); /* remove pseudoknotted pair from cct */ cct[i] = 0; cct[ct[i]] = 0; } else ESL_EXCEPTION(eslEINVAL, "Don't have enough letters to describe all different pseudoknots."); } } /* while there is something in auxpk stack */ } /* finished loop over j: end position on seq, 1..n*/ status = eslOK; ERROR: FINISH: if (npairs != npairs_reached) ESL_EXCEPTION(eslFAIL, "found %d out of %d pairs.", npairs_reached, npairs); if (pda != NULL) esl_stack_Destroy(pda); if (auxpk != NULL) esl_stack_Destroy(auxpk); if (cct != NULL) free(cct); return status; }
/* Function: esl_ct2wuss() * Incept: SRE, Wed Feb 16 11:22:53 2005 [St. Louis] * * Purpose: Convert a CT array <ct> for <n> residues (1..n) to a WUSS * format string <ss>. <ss> must be allocated for at least * n+1 chars (+1 for the terminal NUL). * * ER, Sat Aug 18 13:22:03 EDT 2012 * esl\_ct2wuss() extended to deal with pseudoknots structures. * Pseudoknots are annotated as AA...aa, BB...bb,..., ZZ..zz. * Attemting to convert a <ct> that requires more letters * than [A-Z] will return an <eslEINVAL> error. * * Attempting to convert a <ct> that involves triplet interactions * will return an <eslEINVAL> error. * * Returns: <eslOK> on success. * * Throws: <eslEMEM> on allocation failure. * <eslEINCONCEIVABLE> on internal failure. */ int esl_ct2wuss(int *ct, int n, char *ss) { int rb[26]; /* array that delimits the right bound of a pseudoknot character */ ESL_STACK *pda = NULL; /* stack for "main" secondary structure */ ESL_STACK *auxpk = NULL; /* aux stack for pseudoknot */ ESL_STACK *auxss = NULL; /* aux stack for single stranded */ int *cct = NULL; /* copy of ct vector */ int nfaces; /* number of faces in a cWW structure */ int minface; /* max depth of faces in a cWW structure */ int leftbound, rightbound; /* left and right bound to find basepairs belonging to a given pseudoknot */ int xpk = 0; /* number of pseudoknot chararactes used */ int npk = 0; /* number of pseudoknots */ int npairs = 0; /* total number of basepairs */ int npairs_reached = 0; /* number of basepairs found so far */ int found_partner; /* true if we've found left partner of a given base in stack pda */ int i,j,k; /* sequence indices */ int x; /* index for pseudoknot characters */ int status = eslEMEM; /* exit status 'til proven otherwise */ /* total number of basepairs */ for (j = 1; j <= n; j ++) { if (ct[j] > 0 && j < ct[j]) npairs ++; } /* Copy of ct; if a pseudoknotted structure, cct will be modified later. */ ESL_ALLOC(cct, sizeof(int)*(n+1)); esl_vec_ICopy(ct, (n+1), cct); /* Initialize rightbounds for all 26 pseudoknot indices */ for (x = 0; x < 26; x ++) rb[x] = -1; /* init ss[] to single stranded */ for (j = 0; j < n; j ++) { ss[j] = ':'; } ss[n] = '\0'; /* Initialization*/ if ((pda = esl_stack_ICreate()) == NULL) goto FINISH; if ((auxpk = esl_stack_ICreate()) == NULL) goto FINISH; if ((auxss = esl_stack_ICreate()) == NULL) goto FINISH; for (j = 1; j <= n; j++) { if (cct[j] == 0) /* unpaired: push j. */ { if (esl_stack_IPush(pda, j) != eslOK) goto FINISH; } else if (cct[j] > j) /* left side of a bp: push j. */ { if (esl_stack_IPush(pda, j) != eslOK) goto FINISH; } else /* right side of a bp; main routine: fingh the left partner */ { found_partner = FALSE; /* Pop back until we find the left partner of j; * In case this is not a nested structure, finding * the left partner of j will require to put bases * aside into stack auxpk. * * After we find the left partner of j, * store single stranded residues in auxss; * keep track of #faces and the maximum face depth. */ nfaces = 0; minface = -1; while (esl_stack_ObjectCount(pda)) { if (esl_stack_IPop(pda, &i) != eslOK) goto FINISH; if (i < 0) /* a face counter */ { nfaces++; if (i < minface) minface = i; } else if (cct[i] == j) /* we found the i,j pair. */ { found_partner = TRUE; npairs_reached ++; /* Now we know i,j pair; and we know how many faces are * above them; and we know the max depth of those faces. * That's enough to label the pair in WUSS notation. * if nfaces == 0, minface is -1; <> a closing bp of a hairpin. * if nfaces == 1, inherit minface, we're continuing a stem. * if nfaces > 1, bump minface in depth; we're closing a bifurc. */ if (nfaces > 1 && minface > -4) minface--; switch (minface) { case -1: ss[i-1] = '<'; ss[j-1] = '>'; break; case -2: ss[i-1] = '('; ss[j-1] = ')'; break; case -3: ss[i-1] = '['; ss[j-1] = ']'; break; case -4: ss[i-1] = '{'; ss[j-1] = '}'; break; default: esl_stack_Destroy(pda); esl_stack_Destroy(auxpk); esl_stack_Destroy(auxss); free(cct); ESL_EXCEPTION(eslEINCONCEIVABLE, "no such face code"); } if (esl_stack_IPush(pda, minface) != eslOK) goto FINISH; /* Now, aux contains all the unpaired residues we need to label, * according to the # of faces "above" them: * nfaces = 0: hairpin loop * nfaces = 1: bulge or interior loop * nfaces > 1: multifurc */ while (esl_stack_IPop(auxss, &i) == eslOK) { switch (nfaces) { case 0: ss[i-1] = '_'; break; case 1: ss[i-1] = '-'; break; default: ss[i-1] = ','; break; /* nfaces > 1 */ } } break; } else if (cct[i] == 0) { /* add to auxss only if originally sigle stranded */ if (ct[i] == 0) { if (esl_stack_IPush(auxss, i) != eslOK) goto FINISH; } } else /* cct[i]>0, != j: i is paired, but not to j: pseudoknot! */ { /* i is in the way to find j's left partner. * Move i to stack auxpk; resolve pseudoknot(s) after we've found partern for j. */ if (esl_stack_IPush(auxpk, i) != eslOK) goto FINISH; } } if (!found_partner) { esl_stack_Destroy(pda); esl_stack_Destroy(auxpk); esl_stack_Destroy(auxss); free(cct); ESL_EXCEPTION(eslEINVAL, "Cannot find left partner (%d) of base %d. Likely a triplet", ct[j], j); } } /* finished finding the left partner of j */ /* After we've found the left partner of j, resolve pks found along the way. * Then, remove the pseudoknotted based from cct so we can find the rest of the structure. */ if (esl_stack_ObjectCount(auxpk)) { /* init for first pseudoknot */ leftbound = cct[j]; rightbound = leftbound + 1; xpk = -1; /* start with 'A' if possible again */ while (esl_stack_IPop(auxpk, &i) == eslOK) { for (k = rightbound-1; k > leftbound; k --) { if (cct[k] == 0) { continue; } else if (cct[k] > rightbound) { continue; } else if (cct[k] == i) { break; } /* i continues the given pseudoknot */ else { k = leftbound; break; } /* a new pseudoknot */ } if (k == leftbound) /* a new pseudoknot */ { npk ++; xpk ++; /* figure out if we can use this alphabet index, or bump it up if necessary */ while (i < rb[xpk]) { xpk ++; } leftbound = (rightbound < cct[i])? rightbound : cct[j]; rightbound = cct[i]; } npairs_reached ++; if (xpk+(int)('a') <= (int)('z')) { /* update the rightbound of this pk index if necessary */ if (cct[i] > rb[xpk]) rb[xpk] = cct[i]; /* Add pk indices for this basepair */ ss[i-1] = (char)(xpk+(int)('A')); ss[cct[i]-1] = (char)(xpk+(int)('a')); /* remove pseudoknotted pair from cct */ cct[i] = 0; cct[ct[i]] = 0; } else ESL_EXCEPTION(eslEINVAL, "Don't have enough letters to describe all different pseudoknots."); } } /* while there is something in auxpk stack */ } /* finished loop over j: end position on seq, 1..n*/ status = eslOK; ERROR: FINISH: if (npairs != npairs_reached) ESL_EXCEPTION(eslFAIL, "found %d out of %d pairs.", npairs_reached, npairs); if (pda != NULL) esl_stack_Destroy(pda); if (auxpk != NULL) esl_stack_Destroy(auxpk); if (auxss != NULL) esl_stack_Destroy(auxss); if (cct != NULL) free(cct); return status; }