static int a2m_padding_digital(ESL_MSA *msa, char **csflag, int *nins, int ncons) { ESL_DSQ *ax = NULL; /* new aligned sequence - will be swapped into msa->ax[] */ ESL_DSQ gapsym = esl_abc_XGetGap(msa->abc); int apos, cpos, spos; /* position counters for alignment 0..alen, consensus cols 0..cpos-1, sequence position 0..slen-1 */ int alen; int icount; int idx; int status; alen = ncons; for (cpos = 0; cpos <= ncons; cpos++) alen += nins[cpos]; ESL_ALLOC(msa->rf, sizeof(char) * (alen+1)); for (apos = 0, cpos = 0; cpos <= ncons; cpos++) { for (icount = 0; icount < nins[cpos]; icount++) msa->rf[apos++] = '.'; if (cpos < ncons) msa->rf[apos++] = 'x'; } msa->rf[apos] = '\0'; for (idx = 0; idx < msa->nseq; idx++) { ESL_ALLOC(ax, sizeof(ESL_DSQ) * (alen + 2)); ax[0] = eslDSQ_SENTINEL; apos = spos = 0; for (cpos = 0; cpos <= ncons; cpos++) { icount = 0; while (csflag[idx][spos] == FALSE) { ax[apos+1] = msa->ax[idx][spos+1]; apos++; spos++; icount++; } while (icount < nins[cpos]) { ax[apos+1] = gapsym; apos++; icount++; } if (cpos < ncons) { ax[apos+1] = msa->ax[idx][spos+1]; apos++; spos++; } } ESL_DASSERT1( (msa->ax[idx][spos+1] == eslDSQ_SENTINEL) ); ESL_DASSERT1( (apos == alen) ); ax[alen+1] = eslDSQ_SENTINEL; free(msa->ax[idx]); msa->ax[idx] = ax; ax = NULL; } msa->alen = alen; return eslOK; ERROR: if (ax) free(ax); return status; }
/* Function: rejustify_insertions_digital() * Synopsis: * Incept: SRE, Thu Oct 23 13:06:12 2008 [Janelia] * * Purpose: * * Args: msa - alignment to rejustify * digital mode: ax[0..nseq-1][1..alen] and abc is valid * text mode: aseq[0..nseq-1][0..alen-1] * inserts - # of inserted columns following node k, for k=0.1..M * inserts[0] is for N state; inserts[M] is for C state * matmap - index of column associated with node k [k=0.1..M; matmap[0] = 0] * this is an alignment column index 1..alen, same offset as <ax> * if applied to text mode aseq or annotation, remember to -1 * if no residues use match state k, matmap[k] is the * index of the last column used before node k's columns * start: thus matmap[k]+1 is always the start of * node k's insertion (if any). * matuse - TRUE if an alignment column is associated with node k: [k=0.1..M; matuse[0] = 0]. * if matuse[k] == 0, every sequence deleted at node k, * and we're collapsing the column rather than showing all * gaps. * * Note: The insertion for node k is of length <inserts[k]> columns, * and in 1..alen coords it runs from * matmap[k]+1 .. matmap[k+1]-matuse[k+1]. * * * Returns: * * Throws: (no abnormal error conditions) * * Xref: */ static int rejustify_insertions_digital(ESL_MSA *msa, const int *inserts, const int *matmap, const int *matuse, int M) { int idx; int k; int apos; int nins; int npos, opos; for (idx = 0; idx < msa->nseq; idx++) { for (k = 0; k < M; k++) if (inserts[k] > 1) { for (nins = 0, apos = matmap[k]+1; apos <= matmap[k+1]-matuse[k+1]; apos++) if (esl_abc_XIsResidue(msa->abc, msa->ax[idx][apos])) nins++; if (k == 0) nins = 0; /* N-terminus is right justified */ else nins /= 2; /* split in half; nins now = # of residues left left-justified */ opos = npos = matmap[k+1]-matuse[k+1]; while (opos >= matmap[k]+1+nins) { if (esl_abc_XIsGap(msa->abc, msa->ax[idx][opos])) opos--; else { msa->ax[idx][npos] = msa->ax[idx][opos]; if (msa->pp != NULL && msa->pp[idx] != NULL) msa->pp[idx][npos-1] = msa->pp[idx][opos-1]; npos--; opos--; } } while (npos >= matmap[k]+1+nins) { msa->ax[idx][npos] = esl_abc_XGetGap(msa->abc); if (msa->pp != NULL && msa->pp[idx] != NULL) msa->pp[idx][npos-1] = '.'; npos--; } } } return eslOK; }
/* Function: p7_profile_Create() * Synopsis: Allocates a profile. * * Purpose: Allocates for a profile of up to <M> nodes, for digital * alphabet <abc>. * * Because this function might be in the critical path (in * hmmscan, for example), we leave much of the model * uninitialized, including scores and length model * probabilities. The <p7_profile_Config()> call is what * sets these. * * The reference pointer <gm->abc> is set to <abc>. * * Returns: a pointer to the newly allocated profile. * * Throws: <NULL> on allocation error. */ P7_PROFILE * p7_profile_Create(int allocM, const ESL_ALPHABET *abc) { P7_PROFILE *gm = NULL; int x; int status; /* level 0 */ ESL_ALLOC(gm, sizeof(P7_PROFILE)); gm->tsc = NULL; gm->rsc = NULL; gm->name = NULL; gm->acc = NULL; gm->desc = NULL; gm->rf = NULL; gm->mm = NULL; gm->cs = NULL; gm->consensus = NULL; /* level 1 */ ESL_ALLOC(gm->tsc, sizeof(float) * (allocM+1) * p7P_NTRANS); /* 0..M */ ESL_ALLOC(gm->rsc, sizeof(float *) * abc->Kp); ESL_ALLOC(gm->rf, sizeof(char) * (allocM+2)); /* yes, +2: each is (0)1..M, +trailing \0 */ ESL_ALLOC(gm->mm, sizeof(char) * (allocM+2)); ESL_ALLOC(gm->cs, sizeof(char) * (allocM+2)); ESL_ALLOC(gm->consensus, sizeof(char) * (allocM+2)); gm->rsc[0] = NULL; /* level 2 */ ESL_ALLOC(gm->rsc[0], sizeof(float) * abc->Kp * (allocM+1) * p7P_NR); for (x = 1; x < abc->Kp; x++) gm->rsc[x] = gm->rsc[0] + x * (allocM+1) * p7P_NR; /* Initialization of tsc[0], including removal of I0. tsc[k-1,LM],tsc[k-1,GM] will be configured + overwritten later */ esl_vec_FSet(gm->tsc, p7P_NTRANS, -eslINFINITY); /* tsc[M] initialized and Im removed when we know actual M : see modelconfig.c */ for (x = 0; x < abc->Kp; x++) { P7P_MSC(gm, 0, x) = -eslINFINITY; /* no emissions from nonexistent M_0... */ P7P_ISC(gm, 0, x) = -eslINFINITY; /* nor I_0... */ /* I_M is initialized in profile config, when we know actual M, not just allocated max M */ } x = esl_abc_XGetGap(abc); /* no emission can emit/score gap characters */ esl_vec_FSet(gm->rsc[x], (allocM+1)*p7P_NR, -eslINFINITY); x = esl_abc_XGetMissing(abc); /* no emission can emit/score missing data characters */ esl_vec_FSet(gm->rsc[x], (allocM+1)*p7P_NR, -eslINFINITY); /* Set remaining info */ gm->M = 0; gm->allocM = allocM; gm->L = -1; /* "unset" flag */ gm->nj = -1.0f; /* "unset" flag */ gm->pglocal = -1.0f; /* "unset" flag */ gm->roff = -1; gm->eoff = -1; gm->offs[p7_MOFFSET] = -1; gm->offs[p7_FOFFSET] = -1; gm->offs[p7_POFFSET] = -1; gm->name = NULL; gm->acc = NULL; gm->desc = NULL; gm->rf[0] = 0; /* RF line is optional annotation; this flags that it's not set yet */ gm->mm[0] = 0; /* likewise for MM annotation line */ gm->cs[0] = 0; /* likewise for CS annotation line */ gm->consensus[0] = 0; for (x = 0; x < p7_NEVPARAM; x++) gm->evparam[x] = p7_EVPARAM_UNSET; for (x = 0; x < p7_NCUTOFFS; x++) gm->cutoff[x] = p7_CUTOFF_UNSET; for (x = 0; x < p7_MAXABET; x++) gm->compo[x] = p7_COMPO_UNSET; gm->max_length = -1; /* "unset" */ gm->abc = abc; return gm; ERROR: p7_profile_Destroy(gm); return NULL; }
static int make_digital_msa(ESL_SQ **sq, const ESL_MSA *premsa, P7_TRACE **tr, int nseq, const int *matuse, const int *matmap, int M, int alen, int optflags, ESL_MSA **ret_msa) { const ESL_ALPHABET *abc = (sq == NULL) ? premsa->abc : sq[0]->abc; ESL_MSA *msa = NULL; int idx; int apos; int z; int status; if ((msa = esl_msa_CreateDigital(abc, nseq, alen)) == NULL) { status = eslEMEM; goto ERROR; } for (idx = 0; idx < nseq; idx++) { msa->ax[idx][0] = eslDSQ_SENTINEL; for (apos = 1; apos <= alen; apos++) msa->ax[idx][apos] = esl_abc_XGetGap(abc); msa->ax[idx][alen+1] = eslDSQ_SENTINEL; apos = 1; for (z = 0; z < tr[idx]->N; z++) { switch (tr[idx]->st[z]) { case p7T_M: msa->ax[idx][matmap[tr[idx]->k[z]]] = get_dsq_z(sq, premsa, tr, idx, z); apos = matmap[tr[idx]->k[z]] + 1; break; case p7T_D: if (matuse[tr[idx]->k[z]]) /* bug h77: if all col is deletes, do nothing; do NOT overwrite a column */ msa->ax[idx][matmap[tr[idx]->k[z]]] = esl_abc_XGetGap(abc); /* overwrites ~ in Dk column on X->Dk */ apos = matmap[tr[idx]->k[z]] + 1; break; case p7T_I: if ( !(optflags & p7_TRIM) || (tr[idx]->k[z] != 0 && tr[idx]->k[z] != M)) { msa->ax[idx][apos] = get_dsq_z(sq, premsa, tr, idx, z); apos++; } break; case p7T_N: case p7T_C: if (! (optflags & p7_TRIM) && tr[idx]->i[z] > 0) { msa->ax[idx][apos] = get_dsq_z(sq, premsa, tr, idx, z); apos++; } break; case p7T_E: apos = matmap[M]+1; /* set position for C-terminal tail */ break; case p7T_X: /* Mark fragments (B->X and X->E containing core traces): * convert flanks from gaps to ~ */ if (tr[idx]->st[z-1] == p7T_B) { /* B->X leader. This is a core trace and a fragment. Convert leading gaps to ~ */ /* to set apos for an initial Ik: peek at next state for B->X->Ik; superfluous for ->{DM}k: */ for (apos = 1; apos <= matmap[tr[idx]->k[z+1]]; apos++) msa->ax[idx][apos] = esl_abc_XGetMissing(abc); /* tricky! apos is now exactly where it needs to be for X->Ik. all other cases except B->X->Ik set their own apos */ } else if (tr[idx]->st[z+1] == p7T_E) { /* X->E trailer. This is a core trace and a fragment. Convert trailing gaps to ~ */ /* don't need to set apos for trailer. There can't be any more residues in a core trace once we hit X->E */ for (; apos <= alen; apos++) msa->ax[idx][apos] = esl_abc_XGetMissing(abc); } else ESL_XEXCEPTION(eslECORRUPT, "make_digital_msa(): X state in unexpected position in trace"); break; default: break; } } } msa->nseq = nseq; msa->alen = alen; *ret_msa = msa; return eslOK; ERROR: if (msa) esl_msa_Destroy(msa); *ret_msa = NULL; return status; }