/* Function: matassign2hmm() * * Purpose: Given an assignment of alignment columns to match vs. * insert, finish the final part of the model construction * calculation that is constant between model construction * algorithms. * * Args: msa - multiple sequence alignment * matassign - 1..alen bit flags for column assignments * ret_hmm - RETURN: counts-form HMM * opt_tr - optRETURN: array of tracebacks for aseq's * * Return: <eslOK> on success. * <eslENORESULT> if no consensus columns are identified. * * ret_hmm and opt_tr alloc'ed here. */ static int matassign2hmm(ESL_MSA *msa, int *matassign, P7_HMM **ret_hmm, P7_TRACE ***opt_tr) { int status; /* return status */ P7_HMM *hmm = NULL; /* RETURN: new hmm */ P7_TRACE **tr = NULL; /* RETURN: 0..nseq-1 fake traces */ int M; /* length of new model in match states */ int idx; /* counter over sequences */ int apos; /* counter for aligned columns */ #ifdef p7_DEBUGGING char errbuf[eslERRBUFSIZE]; #endif /* apply the model mask in the 'GC MM' row */ do_modelmask(msa); /* How many match states in the HMM? */ for (M = 0, apos = 1; apos <= msa->alen; apos++) if (matassign[apos]) M++; if (M == 0) { status = eslENORESULT; goto ERROR; } /* Make fake tracebacks for each seq */ ESL_ALLOC(tr, sizeof(P7_TRACE *) * msa->nseq); if ((status = p7_trace_FauxFromMSA(msa, matassign, p7_MSA_COORDS, tr)) != eslOK) goto ERROR; for (idx = 0; idx < msa->nseq; idx++) { if ((status = p7_trace_Doctor(tr[idx], NULL, NULL)) != eslOK) goto ERROR; #ifdef p7_DEBUGGING if ((status = p7_trace_Validate(tr[idx], msa->abc, msa->ax[idx], errbuf)) != eslOK) ESL_XEXCEPTION(eslFAIL, "validation failed: %s", errbuf); #endif } /* Build count model from tracebacks */ if ((hmm = p7_hmm_Create(M, msa->abc)) == NULL) { status = eslEMEM; goto ERROR; } if ((status = p7_hmm_Zero(hmm)) != eslOK) goto ERROR; for (idx = 0; idx < msa->nseq; idx++) { if (tr[idx] == NULL) continue; /* skip rare examples of empty sequences */ if ((status = p7_trace_Count(hmm, msa->ax[idx], msa->wgt[idx], tr[idx])) != eslOK) goto ERROR; } hmm->nseq = msa->nseq; hmm->eff_nseq = msa->nseq; /* Transfer annotation from the MSA to the new model */ if ((status = annotate_model(hmm, matassign, msa)) != eslOK) goto ERROR; /* Reset #=RF line of alignment to reflect our assignment * of match, delete. matassign is valid from 1..alen and is off * by one from msa->rf. */ if (msa->rf == NULL) ESL_ALLOC(msa->rf, sizeof(char) * (msa->alen + 1)); for (apos = 1; apos <= msa->alen; apos++) msa->rf[apos-1] = matassign[apos] ? 'x' : '.'; msa->rf[msa->alen] = '\0'; if (opt_tr != NULL) *opt_tr = tr; else p7_trace_DestroyArray(tr, msa->nseq); *ret_hmm = hmm; return eslOK; ERROR: if (tr != NULL) p7_trace_DestroyArray(tr, msa->nseq); if (hmm != NULL) p7_hmm_Destroy(hmm); if (opt_tr != NULL) *opt_tr = NULL; *ret_hmm = NULL; return status; }
static int map_alignment(const char *msafile, const P7_HMM *hmm, ESL_SQ ***ret_sq, P7_TRACE ***ret_tr, int *ret_ntot) { ESL_SQ **sq = NULL; P7_TRACE **tr = NULL; ESLX_MSAFILE *afp = NULL; ESL_MSA *msa = NULL; ESL_ALPHABET *abc = (ESL_ALPHABET *) hmm->abc; /* removing const'ness to make compiler happy. Safe. */ int *matassign = NULL; uint32_t chksum = 0; int i,k; int status; status = eslx_msafile_Open(&abc, msafile, NULL, eslMSAFILE_UNKNOWN, NULL, &afp); if (status != eslOK) eslx_msafile_OpenFailure(afp, status); status = eslx_msafile_Read(afp, &msa); if (status != eslOK) eslx_msafile_ReadFailure(afp, status); if (! (hmm->flags & p7H_CHKSUM) ) esl_fatal("HMM has no checksum. --mapali unreliable without it."); if (! (hmm->flags & p7H_MAP) ) esl_fatal("HMM has no map. --mapali can't work without it."); esl_msa_Checksum(msa, &chksum); if (hmm->checksum != chksum) esl_fatal("--mapali MSA %s isn't same as the one HMM came from (checksum mismatch)", msafile); ESL_ALLOC(sq, sizeof(ESL_SQ *) * msa->nseq); ESL_ALLOC(tr, sizeof(P7_TRACE *) * msa->nseq); ESL_ALLOC(matassign, sizeof(int) * (msa->alen + 1)); esl_vec_ISet(matassign, msa->alen+1, 0); for (k = 1; k <= hmm->M; k++) matassign[hmm->map[k]] = 1; p7_trace_FauxFromMSA(msa, matassign, p7_DEFAULT, tr); /* The 'faux' core traces constructed by FauxFromMSA() may contain * D->I and I->D transitions. They may *only* now be passed to * p7_tracealign_Seqs(), which can deal with these 'illegal' * transitions, in order to exactly reproduce the input --mapali * alignment. */ for (i = 0; i < msa->nseq; i++) esl_sq_FetchFromMSA(msa, i, &(sq[i])); *ret_ntot = msa->nseq; *ret_tr = tr; *ret_sq = sq; eslx_msafile_Close(afp); esl_msa_Destroy(msa); free(matassign); return eslOK; ERROR: *ret_ntot = 0; *ret_tr = NULL; *ret_sq = NULL; if (afp != NULL) eslx_msafile_Close(afp); if (msa != NULL) esl_msa_Destroy(msa); if (matassign != NULL) free(matassign); return status; }