/* Function: P7PriorifyHMM() * * Purpose: Add pseudocounts to an HMM using Dirichlet priors, * and renormalize the HMM. * * Args: hmm -- the HMM to add counts to (counts form) * pri -- the Dirichlet prior to use * * Return: (void) * HMM returns in probability form. */ void P7PriorifyHMM(struct plan7_s *hmm, struct p7prior_s *pri) { int k; /* counter for model position */ float d; /* a denominator */ /* Model-dependent transitions are handled simply; Laplace. */ FSet(hmm->begin+2, hmm->M-1, 0.); /* wipe internal BM entries */ FSet(hmm->end+1, hmm->M-1, 0.); /* wipe internal ME exits */ d = hmm->tbd1 + hmm->begin[1] + 2.; hmm->tbd1 = (hmm->tbd1 + 1.)/ d; hmm->begin[1] = (hmm->begin[1] + 1.)/ d; hmm->end[hmm->M] = 1.0; /* Main model transitions and emissions */ for (k = 1; k < hmm->M; k++) { P7PriorifyTransitionVector(hmm->t[k], pri); P7PriorifyEmissionVector(hmm->mat[k], pri, pri->mnum, pri->mq, pri->m, NULL); P7PriorifyEmissionVector(hmm->ins[k], pri, pri->inum, pri->iq, pri->i, NULL); } P7PriorifyEmissionVector(hmm->mat[hmm->M], pri, pri->mnum, pri->mq, pri->m, NULL); Plan7Renormalize(hmm); }
/* Function: default_nucleic_prior() * * Purpose: Set the default DNA prior. (for now, almost a Laplace) */ static struct p7prior_s * default_nucleic_prior(void) { struct p7prior_s *pri; pri = P7AllocPrior(); pri->strategy = PRI_DCHLET; /* The use of the Pfam-trained amino acid transition priors * here is TOTALLY bogus. But it works better than a straight * Laplace, esp. for Maxmodelmaker(). For example, a Laplace * prior builds M=1 models for a single sequence GAATTC (at * one time an open "bug"). */ pri->tnum = 1; pri->tq[0] = 1.; pri->t[0][TMM] = 0.7939; pri->t[0][TMI] = 0.0278; pri->t[0][TMD] = 0.0135; pri->t[0][TIM] = 0.1551; pri->t[0][TII] = 0.1331; pri->t[0][TDM] = 0.9002; pri->t[0][TDD] = 0.5630; pri->mnum = 1; pri->mq[0] = 1.; FSet(pri->m[0], Alphabet_size, 1.); pri->inum = 1; pri->iq[0] = 1.; FSet(pri->i[0], Alphabet_size, 1.); return pri; }
/* Function: StrMarkov0() * Date: SRE, Fri Oct 29 11:08:31 1999 [St. Louis] * * Purpose: Returns a random string s1 with the same * length and zero-th order Markov properties * as s2. * * s1 and s2 may be identical, to randomize s2 * in place. * * Args: s1 - allocated space for random string * s2 - string to base s1's properties on. * * Returns: 1 on success; 0 if s2 doesn't look alphabetical. */ int StrMarkov0(char *s1, char *s2) { int len; int pos; float p[26]; /* symbol probabilities */ /* First, verify that the string is entirely alphabetic. */ len = strlen(s2); for (pos = 0; pos < len; pos++) if (! isalpha(s2[pos])) return 0; /* Collect zeroth order counts and convert to frequencies. */ FSet(p, 26, 0.); for (pos = 0; pos < len; pos++) p[(int)(toupper(s2[pos]) - 'A')] += 1.0; FNorm(p, 26); /* Generate a random string using those p's. */ for (pos = 0; pos < len; pos++) s1[pos] = FChoose(p, 26) + 'A'; s1[pos] = '\0'; return 1; }
/* Function: Plan7FSConfig() * Date: SRE, Fri Jan 2 15:34:40 1998 [StL] * * Purpose: Set the alignment independent parameters of * a Plan7 model to hmmfs (multihit Smith/Waterman) configuration. * * See comments on Plan7SWConfig() for explanation of * how pentry and pexit are used. * * Args: hmm - the Plan7 model w/ data-dep prob's valid * pentry - probability of an internal entry somewhere; * will be evenly distributed over M-1 match states * pexit - probability of an internal exit somewhere; * will be distributed over M-1 match states. * * Return: (void) * HMM probabilities are modified. */ void Plan7FSConfig(struct plan7_s *hmm, float pentry, float pexit) { float basep; /* p1 for exits: the base p */ int k; /* counter over states */ /* Configure special states. */ hmm->xt[XTN][MOVE] = 1-hmm->p1; /* allow N-terminal tail */ hmm->xt[XTN][LOOP] = hmm->p1; hmm->xt[XTE][MOVE] = 0.5; /* allow loops / multihits */ hmm->xt[XTE][LOOP] = 0.5; hmm->xt[XTC][MOVE] = 1-hmm->p1; /* allow C-terminal tail */ hmm->xt[XTC][LOOP] = hmm->p1; hmm->xt[XTJ][MOVE] = 1.-hmm->p1; /* allow J junction between domains */ hmm->xt[XTJ][LOOP] = hmm->p1; /* Configure entry. */ hmm->begin[1] = (1. - pentry) * (1. - hmm->tbd1); FSet(hmm->begin+2, hmm->M-1, (pentry * (1.-hmm->tbd1)) / (float)(hmm->M-1)); /* Configure exit. */ hmm->end[hmm->M] = 1.0; basep = pexit / (float) (hmm->M-1); for (k = 1; k < hmm->M; k++) hmm->end[k] = basep / (1. - basep * (float) (k-1)); Plan7RenormalizeExits(hmm); hmm->flags &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */ }
/* Function: Plan7LSConfig() * * Purpose: Set the alignment independent parameters of a Plan7 model * to hmmls (global in HMM, local in sequence) configuration. * * Args: hmm - the plan7 model * * Return: (void); * the HMM probabilities are modified. */ void Plan7LSConfig(struct plan7_s *hmm) { hmm->xt[XTN][MOVE] = 1.-hmm->p1; /* allow N-terminal tail */ hmm->xt[XTN][LOOP] = hmm->p1; hmm->xt[XTE][MOVE] = 0.5; /* expectation 2 domains/seq */ hmm->xt[XTE][LOOP] = 0.5; hmm->xt[XTC][MOVE] = 1.-hmm->p1; /* allow C-terminal tail */ hmm->xt[XTC][LOOP] = hmm->p1; hmm->xt[XTJ][MOVE] = 1.-hmm->p1; /* allow J junction state */ hmm->xt[XTJ][LOOP] = hmm->p1; FSet(hmm->begin+2, hmm->M-1, 0.); /* start at M1/D1 */ hmm->begin[1] = 1. - hmm->tbd1; FSet(hmm->end+1, hmm->M-1, 0.); /* end at M_m/D_m */ hmm->end[hmm->M] = 1.; Plan7RenormalizeExits(hmm); hmm->flags &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */ }
/* Function: Plan7GlobalConfig() * * Purpose: Set the alignment-independent, algorithm-dependent parameters * of a Plan7 model to global (Needleman/Wunsch) configuration. * * Like a non-looping hmmls, since we actually allow flanking * N and C terminal sequence. * * Args: hmm - the plan7 model * * Return: (void) * The HMM is modified; algorithm dependent parameters are set. * Previous scores are invalidated if they existed. */ void Plan7GlobalConfig(struct plan7_s *hmm) { hmm->xt[XTN][MOVE] = 1. - hmm->p1; /* allow N-terminal tail */ hmm->xt[XTN][LOOP] = hmm->p1; hmm->xt[XTE][MOVE] = 1.; /* only 1 domain/sequence ("global" alignment) */ hmm->xt[XTE][LOOP] = 0.; hmm->xt[XTC][MOVE] = 1. - hmm->p1; /* allow C-terminal tail */ hmm->xt[XTC][LOOP] = hmm->p1; hmm->xt[XTJ][MOVE] = 0.; /* J state unused */ hmm->xt[XTJ][LOOP] = 1.; FSet(hmm->begin+2, hmm->M-1, 0.); /* disallow internal entries. */ hmm->begin[1] = 1. - hmm->tbd1; FSet(hmm->end+1, hmm->M-1, 0.); /* disallow internal exits. */ hmm->end[hmm->M] = 1.; Plan7RenormalizeExits(hmm); hmm->flags &= ~PLAN7_HASBITS; /* reconfig invalidates log-odds scores */ }
/* Function: Plan7ESTConfig() * * Purpose: Configure a Plan7 model for EST Smith/Waterman * analysis. * * OUTDATED; DO NOT USE WITHOUT RECHECKING * * Args: hmm - hmm to configure. * aacode - 0..63 vector mapping genetic code to amino acids * estmodel - 20x64 translation matrix, w/ codon bias and substitution error * dna2 - probability of a -1 frameshift in a triplet * dna4 - probability of a +1 frameshift in a triplet */ void Plan7ESTConfig(struct plan7_s *hmm, int *aacode, float **estmodel, float dna2, float dna4) { int k; int x; float p; float *tripnull; /* UNFINISHED!!! */ /* configure specials */ hmm->xt[XTN][MOVE] = 1./351.; hmm->xt[XTN][LOOP] = 350./351.; hmm->xt[XTE][MOVE] = 1.; hmm->xt[XTE][LOOP] = 0.; hmm->xt[XTC][MOVE] = 1./351.; hmm->xt[XTC][LOOP] = 350./351.; hmm->xt[XTJ][MOVE] = 1.; hmm->xt[XTJ][LOOP] = 0.; /* configure entry/exit */ hmm->begin[1] = 0.5; FSet(hmm->begin+2, hmm->M-1, 0.5 / ((float)hmm->M - 1.)); hmm->end[hmm->M] = 1.; FSet(hmm->end, hmm->M-1, 0.5 / ((float)hmm->M - 1.)); /* configure dna triplet/frameshift emissions */ for (k = 1; k <= hmm->M; k++) { /* translate aa to triplet probabilities */ for (x = 0; x < 64; x++) { p = hmm->mat[k][aacode[x]] * estmodel[aacode[x]][x] * (1.-dna2-dna4); hmm->dnam[x][k] = Prob2Score(p, tripnull[x]); p = hmm->ins[k][aacode[x]] * estmodel[aacode[x]][x] * (1.-dna2-dna4); hmm->dnai[x][k] = Prob2Score(p, tripnull[x]); } hmm->dnam[64][k] = 0; /* ambiguous codons score 0 (danger?) */ hmm->dna2 = Prob2Score(dna2, 1.); hmm->dna4 = Prob2Score(dna4, 1.); } }
/* Function: P7LaplacePrior() * * Purpose: Create a Laplace plus-one prior. (single component Dirichlets). * Global alphabet info is assumed to have been set already. * * Args: (void) * * Return: prior. Allocated here; call FreePrior() to free it. */ struct p7prior_s * P7LaplacePrior(void) { struct p7prior_s *pri; pri = P7AllocPrior(); pri->strategy = PRI_DCHLET; pri->tnum = 1; pri->tq[0] = 1.; FSet(pri->t[0], 8, 1.); pri->mnum = 1; pri->mq[0] = 1.; FSet(pri->m[0], Alphabet_size, 1.); pri->inum = 1; pri->iq[0] = 1.; FSet(pri->i[0], Alphabet_size, 1.); return pri; }
/* Function: StrMarkov1() * Date: SRE, Fri Oct 29 11:22:20 1999 [St. Louis] * * Purpose: Returns a random string s1 with the same * length and first order Markov properties * as s2. * * s1 and s2 may be identical, to randomize s2 * in place. * * Args: s1 - allocated space for random string * s2 - string to base s1's properties on. * * Returns: 1 on success; 0 if s2 doesn't look alphabetical. */ int StrMarkov1(char *s1, char *s2) { int len; int pos; int x,y; int i; /* initial symbol */ float p[26][26]; /* symbol probabilities */ /* First, verify that the string is entirely alphabetic. */ len = strlen(s2); for (pos = 0; pos < len; pos++) if (! isalpha(s2[pos])) return 0; /* Collect first order counts and convert to frequencies. */ for (x = 0; x < 26; x++) FSet(p[x], 26, 0.); i = x = toupper(s2[0]) - 'A'; for (pos = 1; pos < len; pos++) { y = toupper(s2[pos]) - 'A'; p[x][y] += 1.0; x = y; } for (x = 0; x < 26; x++) FNorm(p[x], 26); /* Generate a random string using those p's. */ x = i; s1[0] = x + 'A'; for (pos = 1; pos < len; pos++) { y = FChoose(p[x], 26); s1[pos] = y + 'A'; x = y; } s1[pos] = '\0'; return 1; }
/* Function: ZeroPlan7() * * Purpose: Zeros the counts/probabilities fields in a model. * Leaves null model untouched. */ void ZeroPlan7(struct plan7_s *hmm) { int k; for (k = 1; k < hmm->M; k++) { FSet(hmm->t[k], 7, 0.); FSet(hmm->mat[k], Alphabet_size, 0.); FSet(hmm->ins[k], Alphabet_size, 0.); } FSet(hmm->mat[hmm->M], Alphabet_size, 0.); hmm->tbd1 = 0.; FSet(hmm->begin+1, hmm->M, 0.); FSet(hmm->end+1, hmm->M, 0.); for (k = 0; k < 4; k++) FSet(hmm->xt[k], 2, 0.); hmm->flags &= ~PLAN7_HASBITS; /* invalidates scores */ hmm->flags &= ~PLAN7_HASPROB; /* invalidates probabilities */ }
/* Function: P7PriorifyHMM() * * Purpose: Add pseudocounts to an HMM using Dirichlet priors, * and renormalize the HMM. * * Args: hmm -- the HMM to add counts to (counts form) * pri -- the Dirichlet prior to use * * Return: (void) * HMM returns in probability form. */ void P7PriorifyHMM(struct plan7_s *hmm, struct p7prior_s *pri) { int k; /* counter for model position */ float d; /* a denominator */ float tq[MAXDCHLET]; /* prior distribution over mixtures */ float mq[MAXDCHLET]; /* prior distribution over mixtures */ float iq[MAXDCHLET]; /* prior distribution over mixtures */ /* Model-dependent transitions are handled simply; Laplace. */ FSet(hmm->begin+2, hmm->M-1, 0.); /* wipe internal BM entries */ FSet(hmm->end+1, hmm->M-1, 0.); /* wipe internal ME exits */ d = hmm->tbd1 + hmm->begin[1] + 2.; hmm->tbd1 = (hmm->tbd1 + 1.)/ d; hmm->begin[1] = (hmm->begin[1] + 1.)/ d; hmm->end[hmm->M] = 1.0; /* Main model transitions and emissions */ for (k = 1; k < hmm->M; k++) { /* The following code chunk is experimental. * Collaboration with Michael Asman, Erik Sonnhammer, CGR Stockholm. * Only activated if X-PR* annotation has been used, in which * priors are overridden and a single Dirichlet component is * specified for each column (using structural annotation). * If X-PR* annotation is not used, which is usually the case, * the following code has no effect (observe how the real prior * distributions are copied into tq, mq, iq). */ if (hmm->tpri != NULL && hmm->tpri[k] >= 0) { if (hmm->tpri[k] >= pri->tnum) Die("X-PRT annotation out of range"); FSet(tq, pri->tnum, 0.0); tq[hmm->tpri[k]] = 1.0; } else FCopy(tq, pri->tq, pri->tnum); if (hmm->mpri != NULL && hmm->mpri[k] >= 0) { if (hmm->mpri[k] >= pri->mnum) Die("X-PRM annotation out of range"); FSet(mq, pri->mnum, 0.0); mq[hmm->mpri[k]] = 1.0; } else FCopy(mq, pri->mq, pri->mnum); if (hmm->ipri != NULL && hmm->ipri[k] >= 0) { if (hmm->ipri[k] >= pri->inum) Die("X-PRI annotation out of range"); FSet(iq, pri->inum, 0.0); iq[hmm->ipri[k]] = 1.0; } else FCopy(iq, pri->iq, pri->inum); /* This is the main line of the code: */ P7PriorifyTransitionVector(hmm->t[k], pri, tq); P7PriorifyEmissionVector(hmm->mat[k], pri, pri->mnum, mq, pri->m, NULL); P7PriorifyEmissionVector(hmm->ins[k], pri, pri->inum, iq, pri->i, NULL); } /* We repeat the above steps just for the final match state, M. */ if (hmm->mpri != NULL && hmm->mpri[hmm->M] >= 0) { if (hmm->mpri[hmm->M] >= pri->mnum) Die("X-PRM annotation out of range"); FSet(mq, pri->mnum, 0.0); mq[hmm->mpri[hmm->M]] = 1.0; } else FCopy(mq, pri->mq, pri->mnum); P7PriorifyEmissionVector(hmm->mat[hmm->M], pri, pri->mnum, mq, pri->m, NULL); /* Now we're done. Convert the counts-based HMM to probabilities. */ Plan7Renormalize(hmm); }
int main(int argc, char **argv) { const char *hmmfile; /* file to read HMMs from */ FILE *fp; /* output file handle */ HMMFILE *hmmfp; /* opened hmmfile for reading */ struct plan7_s *hmm; /* HMM to generate from */ int L; /* length of a sequence */ int i; /* counter over sequences */ char *ofile; /* output sequence file */ int nseq; /* number of seqs to sample */ int seed; /* random number generator seed */ int be_quiet; /* TRUE to silence header/footer */ int do_alignment; /* TRUE to output in aligned format */ int do_consensus; /* TRUE to do a single consensus seq */ AjBool ajselex; AjBool ajcons; AjPFile inf=NULL; AjPFile outf=NULL; AjPStr instr=NULL; AjPStr outstr=NULL; #ifdef MEMDEBUG unsigned long histid1, histid2, orig_size, current_size; orig_size = malloc_inuse(&histid1); fprintf(stderr, "[... memory debugging is ON ...]\n"); #endif /*********************************************** * Parse command line ***********************************************/ nseq = 10; be_quiet = FALSE; do_alignment = FALSE; do_consensus = FALSE; ofile = NULL; embInitPV("ohmmemit",argc,argv,"HMMER",VERSION); ajselex = ajAcdGetBoolean("selex"); ajcons = ajAcdGetBoolean("consensus"); nseq = ajAcdGetInt("number"); seed = ajAcdGetInt("seed"); inf = ajAcdGetInfile("infile"); outf = ajAcdGetOutfile("outfile"); if(!seed) seed = time ((time_t *) NULL); if(ajselex) do_alignment=TRUE; else do_alignment=FALSE; if(ajcons) do_consensus=TRUE; else do_consensus=FALSE; instr = ajStrNewC((char *)ajFileGetNameC(inf)); outstr = ajStrNewC((char *)ajFileGetNameC(outf)); hmmfile = ajStrGetPtr(instr); sre_srandom(seed); if (do_alignment && do_consensus) ajFatal("Sorry, -selex and -consensus are incompatible.\n"); if (nseq != 10 && do_consensus) ajWarn("-consensus overrides -number (# of sampled seqs)"); /*********************************************** * Open HMM file (might be in HMMERDB or current directory). * Read a single HMM from it. ***********************************************/ if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) ajFatal("Failed to open HMM file %s\n", hmmfile); if (!HMMFileRead(hmmfp, &hmm)) ajFatal("Failed to read any HMMs from %s\n", hmmfile); HMMFileClose(hmmfp); if (hmm == NULL) ajFatal("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); /* Configure the HMM to shut off N,J,C emission: so we * do a simple single pass through the model. */ Plan7NakedConfig(hmm); Plan7Renormalize(hmm); /*********************************************** * Open the output file, or stdout ***********************************************/ fp = ajFileGetFileptr(outf); /*********************************************** * Show the options banner ***********************************************/ be_quiet=TRUE; if (! be_quiet) { printf("HMM file: %s\n", hmmfile); if (! do_consensus) { printf("Number of seqs: %d\n", nseq); printf("Random seed: %d\n", seed); } printf("- - - - - - - - - - - - - - - - - - - - - - - - - " "- - - - - - -\n\n"); } /*********************************************** * Do the work. * If we're generating an alignment, we have to collect * all our traces, then output. If we're generating unaligned * sequences, we can emit one at a time. ***********************************************/ if (do_consensus) { char *seq; SQINFO sqinfo; /* info about sequence (name/desc) */ EmitConsensusSequence(hmm, &seq, NULL, &L, NULL); strcpy(sqinfo.name, "consensus"); sqinfo.len = L; sqinfo.flags = SQINFO_NAME | SQINFO_LEN; WriteSeq(fp, kPearson, seq, &sqinfo); free(seq); } else if (do_alignment) { struct p7trace_s **tr; char **dsq; SQINFO *sqinfo; char **aseq; AINFO ainfo; float *wgt; dsq = MallocOrDie(sizeof(char *) * nseq); tr = MallocOrDie(sizeof(struct p7trace_s *) * nseq); sqinfo = MallocOrDie(sizeof(SQINFO) * nseq); wgt = MallocOrDie(sizeof(float) * nseq); FSet(wgt, nseq, 1.0); for (i = 0; i < nseq; i++) { EmitSequence(hmm, &(dsq[i]), &L, &(tr[i])); sprintf(sqinfo[i].name, "seq%d", i+1); sqinfo[i].len = L; sqinfo[i].flags = SQINFO_NAME | SQINFO_LEN; } P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, FALSE, &aseq, &ainfo); /* Output the alignment */ WriteSELEX(fp, aseq, &ainfo, 50); if (ofile != NULL && !be_quiet) printf("Alignment saved in file %s\n", ofile); /* Free memory */ for (i = 0; i < nseq; i++) { P7FreeTrace(tr[i]); free(dsq[i]); } FreeAlignment(aseq, &ainfo); free(sqinfo); free(dsq); free(wgt); free(tr); } else /* unaligned sequence output */ { struct p7trace_s *tr; char *dsq; char *seq; SQINFO sqinfo; for (i = 0; i < nseq; i++) { EmitSequence(hmm, &dsq, &L, &tr); sprintf(sqinfo.name, "seq%d", i+1); sqinfo.len = L; sqinfo.flags = SQINFO_NAME | SQINFO_LEN; seq = DedigitizeSequence(dsq, L); WriteSeq(fp, kPearson, seq, &sqinfo); P7FreeTrace(tr); free(dsq); free(seq); } } ajFileClose(&outf); FreePlan7(hmm); SqdClean(); #ifdef MEMDEBUG current_size = malloc_inuse(&histid2); if (current_size != orig_size) malloc_list(2, histid1, histid2); else fprintf(stderr, "[No memory leaks.]\n"); #endif ajStrDel(&instr); ajStrDel(&outstr); ajFileClose(&inf); ajFileClose(&outf); embExit(); return 0; }
/* Function: MSAVerifyParse() * Date: SRE, Sat Jun 5 14:24:24 1999 [Madison, 1999 worm mtg] * * Purpose: Last function called after a multiple alignment is * parsed. Checks that parse was successful; makes sure * required information is present; makes sure required * information is consistent. Some fields that are * only use during parsing may be freed (sqlen, for * example). * * Some fields in msa may be modified (msa->alen is set, * for example). * * Args: msa - the multiple alignment * sqname, aseq must be set * nseq must be correct * alen need not be set; will be set here. * wgt will be set here if not already set * * Returns: (void) * Will Die() here with diagnostics on error. * * Example: */ void MSAVerifyParse(MSA *msa) { int idx; if (msa->nseq == 0) Die("Parse error: no sequences were found for alignment %s", msa->name != NULL ? msa->name : ""); msa->alen = msa->sqlen[0]; /* We can rely on msa->sqname[] being valid for any index, * because of the way the line parsers always store any name * they add to the index. */ for (idx = 0; idx < msa->nseq; idx++) { /* aseq is required. */ if (msa->aseq[idx] == NULL) Die("Parse error: No sequence for %s in alignment %s", msa->sqname[idx], msa->name != NULL ? msa->name : ""); /* either all weights must be set, or none of them */ if ((msa->flags & MSA_SET_WGT) && msa->wgt[idx] == -1.0) Die("Parse error: some weights are set, but %s doesn't have one in alignment %s", msa->sqname[idx], msa->name != NULL ? msa->name : ""); /* all aseq must be same length. */ if (msa->sqlen[idx] != msa->alen) Die("Parse error: sequence %s: length %d, expected %d in alignment %s", msa->sqname[idx], msa->sqlen[idx], msa->alen, msa->name != NULL ? msa->name : ""); /* if SS is present, must have length right */ if (msa->ss != NULL && msa->ss[idx] != NULL && msa->sslen[idx] != msa->alen) Die("Parse error: #=GR SS annotation for %s: length %d, expected %d in alignment %s", msa->sqname[idx], msa->sslen[idx], msa->alen, msa->name != NULL ? msa->name : ""); /* if SA is present, must have length right */ if (msa->sa != NULL && msa->sa[idx] != NULL && msa->salen[idx] != msa->alen) Die("Parse error: #=GR SA annotation for %s: length %d, expected %d in alignment %s", msa->sqname[idx], msa->salen[idx], msa->alen, msa->name != NULL ? msa->name : ""); } /* if cons SS is present, must have length right */ if (msa->ss_cons != NULL && strlen(msa->ss_cons) != msa->alen) Die("Parse error: #=GC SS_cons annotation: length %d, expected %d in alignment %s", strlen(msa->ss_cons), msa->alen, msa->name != NULL ? msa->name : ""); /* if cons SA is present, must have length right */ if (msa->sa_cons != NULL && strlen(msa->sa_cons) != msa->alen) Die("Parse error: #=GC SA_cons annotation: length %d, expected %d in alignment %s", strlen(msa->sa_cons), msa->alen, msa->name != NULL ? msa->name : ""); /* if RF is present, must have length right */ if (msa->rf != NULL && strlen(msa->rf) != msa->alen) Die("Parse error: #=GC RF annotation: length %d, expected %d in alignment %s", strlen(msa->rf), msa->alen, msa->name != NULL ? msa->name : ""); /* Check that all or no weights are set */ if (!(msa->flags & MSA_SET_WGT)) FSet(msa->wgt, msa->nseq, 1.0); /* default weights */ /* Clean up a little from the parser */ if (msa->sqlen != NULL) { free(msa->sqlen); msa->sqlen = NULL; } if (msa->sslen != NULL) { free(msa->sslen); msa->sslen = NULL; } if (msa->salen != NULL) { free(msa->salen); msa->salen = NULL; } return; }
/* Function: ReadSELEX() * Date: SRE, Sun Jun 6 18:24:09 1999 [St. Louis] * * Purpose: Parse an alignment read from an open SELEX format * alignment file. (SELEX is a single alignment format). * Return the alignment, or NULL if we've already read the * alignment or there's no alignment data in the file. * * Limitations: SELEX is the only remaining multipass parser for * alignment files. It cannot read from gzip or from stdin. * It Die()'s here if you try. The reason for this * that SELEX allows space characters as gaps, so we don't * know the borders of an alignment block until we've seen * the whole block. I could rewrite to allow single-pass * parsing (by storing the whole block in memory) but * since SELEX is now legacy, why bother. * * Note that the interface is totally kludged: fastest * possible adaptation of old ReadSELEX() to the new * MSA interface. * * Args: afp - open alignment file * * Returns: MSA * - an alignment object * caller responsible for an MSAFree() * NULL if no alignment data. */ MSA * ReadSELEX(MSAFILE *afp) { MSA *msa; /* RETURN: mult seq alignment */ FILE *fp; /* ptr to opened seqfile */ char **aseqs; /* aligned seqs */ int num = 0; /* number of seqs read */ char buffer[LINEBUFLEN]; /* input buffer for lines */ char bufcpy[LINEBUFLEN]; /* strtok'able copy of buffer */ struct block_struc { /** alignment data for a block: */ int lcol; /* furthest left aligned sym */ int rcol; /* furthest right aligned sym */ } *blocks = NULL; int blocknum; /* number of blocks in file */ char *nptr; /* ptr to start of name on line */ char *sptr; /* ptr into sequence on line */ int currnum; /* num. seqs in given block */ int currblock; /* index for blocks */ int i; /* loop counter */ int seqidx; /* counter for seqs */ int alen; /* length of alignment */ int warn_names; /* becomes TRUE if names don't match between blocks */ int headnum; /* seqidx in per-sequence header info */ int currlen; int count; int have_cs = 0; int have_rf = 0; AINFO base_ainfo, *ainfo; /* hack: used to be passed ptr to AINFO */ /* Convert from MSA interface to what old ReadSELEX() did: * - copy our open fp, rather than opening file * - verify that we're not reading a gzip or stdin */ if (feof(afp->f)) return NULL; if (afp->do_gzip || afp->do_stdin) Die("Can't read a SELEX format alignment from a pipe, stdin, or gzip'ed file"); fp = afp->f; ainfo = &base_ainfo; /*************************************************** * First pass across file. * Count seqs, get names, determine column info * Determine what sorts of info are active in this file. ***************************************************/ InitAinfo(ainfo); /* get first line of the block * (non-comment, non-blank) */ do { if (fgets(buffer, LINEBUFLEN, fp) == NULL) { squid_errno = SQERR_NODATA; return 0; } strcpy(bufcpy, buffer); if (*buffer == '#') { if (strncmp(buffer, "#=CS", 4) == 0) have_cs = 1; else if (strncmp(buffer, "#=RF", 4) == 0) have_rf = 1; } } while ((nptr = strtok(bufcpy, WHITESPACE)) == NULL || (strchr(commentsyms, *nptr) != NULL)); blocknum = 0; warn_names = FALSE; while (!feof(fp)) { /* allocate for info about this block. */ if (blocknum == 0) blocks = (struct block_struc *) MallocOrDie (sizeof(struct block_struc)); else blocks = (struct block_struc *) ReallocOrDie (blocks, (blocknum+1) * sizeof(struct block_struc)); blocks[blocknum].lcol = LINEBUFLEN+1; blocks[blocknum].rcol = -1; currnum = 0; while (nptr != NULL) /* becomes NULL when this block ends. */ { /* First block only: save names */ if (blocknum == 0) { if (currnum == 0) ainfo->sqinfo = (SQINFO *) MallocOrDie (sizeof(SQINFO)); else ainfo->sqinfo = (SQINFO *) ReallocOrDie (ainfo->sqinfo, (currnum + 1) * sizeof(SQINFO)); ainfo->sqinfo[currnum].flags = 0; SetSeqinfoString(&(ainfo->sqinfo[currnum]), nptr, SQINFO_NAME); } else /* in each additional block: check names */ { if (strcmp(ainfo->sqinfo[currnum].name, nptr) != 0) warn_names = TRUE; } currnum++; /* check rcol, lcol */ if ((sptr = strtok(NULL, WHITESPACE)) != NULL) { /* is this the furthest left we've seen word 2 in this block? */ if (sptr - bufcpy < blocks[blocknum].lcol) blocks[blocknum].lcol = sptr - bufcpy; /* look for right side in buffer */ for (sptr = buffer + strlen(buffer) - 1; strchr(WHITESPACE, *sptr) != NULL; sptr --) /* do nothing */ ; if (sptr - buffer > blocks[blocknum].rcol) blocks[blocknum].rcol = sptr - buffer; } /* get the next line; blank line means end of block */ do { if (fgets(buffer, LINEBUFLEN, fp) == NULL) { nptr = NULL; break; } strcpy(bufcpy, buffer); if (strncmp(buffer, "#=SS", 4) == 0) ainfo->sqinfo[currnum-1].flags |= SQINFO_SS; else if (strncmp(buffer, "#=SA", 4) == 0) ainfo->sqinfo[currnum-1].flags |= SQINFO_SA; else if (strncmp(buffer, "#=CS", 4) == 0) have_cs = 1; else if (strncmp(buffer, "#=RF", 4) == 0) have_rf = 1; if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL) break; } while (strchr(commentsyms, *nptr) != NULL); } /* check that number of sequences matches expected */ if (blocknum == 0) num = currnum; else if (currnum != num) Die("Parse error in ReadSELEX()"); blocknum++; /* get first line of next block * (non-comment, non-blank) */ do { if (fgets(buffer, LINEBUFLEN, fp) == NULL) { nptr = NULL; break; } strcpy(bufcpy, buffer); } while ((nptr = strtok(bufcpy, WHITESPACE)) == NULL || (strchr(commentsyms, *nptr) != NULL)); } /*************************************************** * Get ready for second pass: * figure out the length of the alignment * malloc space * rewind the file ***************************************************/ alen = 0; for (currblock = 0; currblock < blocknum; currblock++) alen += blocks[currblock].rcol - blocks[currblock].lcol + 1; rewind(fp); /* allocations. we can't use AllocateAlignment because of * the way we already used ainfo->sqinfo. */ aseqs = (char **) MallocOrDie (num * sizeof(char *)); if (have_cs) ainfo->cs = (char *) MallocOrDie ((alen+1) * sizeof(char)); if (have_rf) ainfo->rf = (char *) MallocOrDie ((alen+1) * sizeof(char)); for (i = 0; i < num; i++) { aseqs[i] = (char *) MallocOrDie ((alen+1) * sizeof(char)); if (ainfo->sqinfo[i].flags & SQINFO_SS) ainfo->sqinfo[i].ss = (char *) MallocOrDie ((alen+1) * sizeof(char)); if (ainfo->sqinfo[i].flags & SQINFO_SA) ainfo->sqinfo[i].sa = (char *) MallocOrDie ((alen+1) * sizeof(char)); } ainfo->alen = alen; ainfo->nseq = num; ainfo->wgt = (float *) MallocOrDie (sizeof(float) * num); FSet(ainfo->wgt, num, 1.0); /*************************************************** * Second pass across file. Parse header; assemble sequences ***************************************************/ /* We've now made a complete first pass over the file. We know how * many blocks it contains, we know the number of seqs in the first * block, and we know every block has the same number of blocks; * so we can be a bit more cavalier about error-checking as we * make the second pass. */ /* Look for header */ headnum = 0; for (;;) { if (fgets(buffer, LINEBUFLEN, fp) == NULL) Die("Parse error in ReadSELEX()"); strcpy(bufcpy, buffer); if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL) continue; /* skip blank lines */ if (strcmp(nptr, "#=AU") == 0 && (sptr = strtok(NULL, "\n")) != NULL) ainfo->au = Strdup(sptr); else if (strcmp(nptr, "#=ID") == 0 && (sptr = strtok(NULL, "\n")) != NULL) ainfo->name = Strdup(sptr); else if (strcmp(nptr, "#=AC") == 0 && (sptr = strtok(NULL, "\n")) != NULL) ainfo->acc = Strdup(sptr); else if (strcmp(nptr, "#=DE") == 0 && (sptr = strtok(NULL, "\n")) != NULL) ainfo->desc = Strdup(sptr); else if (strcmp(nptr, "#=GA") == 0) { if ((sptr = strtok(NULL, WHITESPACE)) == NULL) Die("Parse error in #=GA line in ReadSELEX()"); ainfo->ga1 = atof(sptr); if ((sptr = strtok(NULL, WHITESPACE)) == NULL) Die("Parse error in #=GA line in ReadSELEX()"); ainfo->ga2 = atof(sptr); ainfo->flags |= AINFO_GA; } else if (strcmp(nptr, "#=TC") == 0) { if ((sptr = strtok(NULL, WHITESPACE)) == NULL) Die("Parse error in #=TC line in ReadSELEX()"); ainfo->tc1 = atof(sptr); if ((sptr = strtok(NULL, WHITESPACE)) == NULL) Die("Parse error in #=TC line in ReadSELEX()"); ainfo->tc2 = atof(sptr); ainfo->flags |= AINFO_TC; } else if (strcmp(nptr, "#=NC") == 0) { if ((sptr = strtok(NULL, WHITESPACE)) == NULL) Die("Parse error in #=NC line in ReadSELEX()"); ainfo->nc1 = atof(sptr); if ((sptr = strtok(NULL, WHITESPACE)) == NULL) Die("Parse error in #=NC line in ReadSELEX()"); ainfo->nc2 = atof(sptr); ainfo->flags |= AINFO_NC; } else if (strcmp(nptr, "#=SQ") == 0) /* per-sequence header info */ { /* first field is the name */ if ((sptr = strtok(NULL, WHITESPACE)) == NULL) Die("Parse error in #=SQ line in ReadSELEX()"); if (strcmp(sptr, ainfo->sqinfo[headnum].name) != 0) warn_names = TRUE; /* second field is the weight */ if ((sptr = strtok(NULL, WHITESPACE)) == NULL) Die("Parse error in #=SQ line in ReadSELEX()"); if (!IsReal(sptr)) Die("Parse error in #=SQ line in ReadSELEX(): weight is not a number"); ainfo->wgt[headnum] = atof(sptr); /* third field is database source id */ if ((sptr = strtok(NULL, WHITESPACE)) == NULL) Die("Parse error in #=SQ line in ReadSELEX(): incomplete line"); SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_ID); /* fourth field is database accession number */ if ((sptr = strtok(NULL, WHITESPACE)) == NULL) Die("Parse error in #=SQ line in ReadSELEX(): incomplete line"); SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_ACC); /* fifth field is start..stop::olen */ if ((sptr = strtok(NULL, ".:")) == NULL) Die("Parse error in #=SQ line in ReadSELEX(): incomplete line"); SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_START); if ((sptr = strtok(NULL, ".:")) == NULL) Die("Parse error in #=SQ line in ReadSELEX(): incomplete line"); SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_STOP); if ((sptr = strtok(NULL, ":\t ")) == NULL) Die("Parse error in #=SQ line in ReadSELEX(): incomplete line"); SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_OLEN); /* rest of line is optional description */ if ((sptr = strtok(NULL, "\n")) != NULL) SetSeqinfoString(&(ainfo->sqinfo[headnum]), sptr, SQINFO_DESC); headnum++; } else if (strcmp(nptr, "#=CS") == 0) break; else if (strcmp(nptr, "#=RF") == 0) break; else if (strchr(commentsyms, *nptr) == NULL) break; /* non-comment, non-header */ } currlen = 0; for (currblock = 0 ; currblock < blocknum; currblock++) { /* parse the block */ seqidx = 0; while (nptr != NULL) { /* Consensus structure */ if (strcmp(nptr, "#=CS") == 0) { if (! copy_alignment_line(ainfo->cs, currlen, strlen(nptr)-1, buffer, blocks[currblock].lcol, blocks[currblock].rcol, (char) '.')) Die("Parse error in #=CS line in ReadSELEX()"); } /* Reference coordinates */ else if (strcmp(nptr, "#=RF") == 0) { if (! copy_alignment_line(ainfo->rf, currlen, strlen(nptr)-1, buffer, blocks[currblock].lcol, blocks[currblock].rcol, (char) '.')) Die("Parse error in #=RF line in ReadSELEX()"); } /* Individual secondary structure */ else if (strcmp(nptr, "#=SS") == 0) { if (! copy_alignment_line(ainfo->sqinfo[seqidx-1].ss, currlen, strlen(nptr)-1, buffer, blocks[currblock].lcol, blocks[currblock].rcol, (char) '.')) Die("Parse error in #=SS line in ReadSELEX()"); } /* Side chain % surface accessibility code */ else if (strcmp(nptr, "#=SA") == 0) { if (! copy_alignment_line(ainfo->sqinfo[seqidx-1].sa, currlen, strlen(nptr)-1, buffer, blocks[currblock].lcol, blocks[currblock].rcol, (char) '.')) Die("Parse error in #=SA line in ReadSELEX()"); } /* Aligned sequence; avoid unparsed machine comments */ else if (strncmp(nptr, "#=", 2) != 0) { if (! copy_alignment_line(aseqs[seqidx], currlen, strlen(nptr)-1, buffer, blocks[currblock].lcol, blocks[currblock].rcol, (char) '.')) Die("Parse error in alignment line in ReadSELEX()"); seqidx++; } /* get next line */ for (;;) { nptr = NULL; if (fgets(buffer, LINEBUFLEN, fp) == NULL) break; /* EOF */ strcpy(bufcpy, buffer); if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL) break; /* blank */ if (strncmp(buffer, "#=", 2) == 0) break; /* machine comment */ if (strchr(commentsyms, *nptr) == NULL) break; /* data */ } } /* end of a block */ currlen += blocks[currblock].rcol - blocks[currblock].lcol + 1; /* get line 1 of next block */ for (;;) { if (fgets(buffer, LINEBUFLEN, fp) == NULL) break; /* no data */ strcpy(bufcpy, buffer); if ((nptr = strtok(bufcpy, WHITESPACE)) == NULL) continue; /* blank */ if (strncmp(buffer, "#=", 2) == 0) break; /* machine comment */ if (strchr(commentsyms, *nptr) == NULL) break; /* non-comment */ } } /* end of the file */ /* Lengths in sqinfo are for raw sequence (ungapped), * and SS, SA are 0..rlen-1 not 0..alen-1. * Only the seqs with structures come out of here with lengths set. */ for (seqidx = 0; seqidx < num; seqidx++) { int apos, rpos; /* secondary structures */ if (ainfo->sqinfo[seqidx].flags & SQINFO_SS) { for (apos = rpos = 0; apos < alen; apos++) if (! isgap(aseqs[seqidx][apos])) { ainfo->sqinfo[seqidx].ss[rpos] = ainfo->sqinfo[seqidx].ss[apos]; rpos++; } ainfo->sqinfo[seqidx].ss[rpos] = '\0'; } /* Surface accessibility */ if (ainfo->sqinfo[seqidx].flags & SQINFO_SA) { for (apos = rpos = 0; apos < alen; apos++) if (! isgap(aseqs[seqidx][apos])) { ainfo->sqinfo[seqidx].sa[rpos] = ainfo->sqinfo[seqidx].sa[apos]; rpos++; } ainfo->sqinfo[seqidx].sa[rpos] = '\0'; } } /* NULL-terminate all the strings */ if (ainfo->rf != NULL) ainfo->rf[alen] = '\0'; if (ainfo->cs != NULL) ainfo->cs[alen] = '\0'; for (seqidx = 0; seqidx < num; seqidx++) aseqs[seqidx][alen] = '\0'; /* find raw sequence lengths for sqinfo */ for (seqidx = 0; seqidx < num; seqidx++) { count = 0; for (sptr = aseqs[seqidx]; *sptr != '\0'; sptr++) if (!isgap(*sptr)) count++; ainfo->sqinfo[seqidx].len = count; ainfo->sqinfo[seqidx].flags |= SQINFO_LEN; } /*************************************************** * Garbage collection and return ***************************************************/ free(blocks); if (warn_names) Warn("sequences may be in different orders in blocks of %s?", afp->fname); /* Convert back to MSA structure. (Wasteful kludge.) */ msa = MSAFromAINFO(aseqs, ainfo); MSAVerifyParse(msa); FreeAlignment(aseqs, ainfo); return msa; }
int main(int argc, char **argv) { const char *hmmfile; /* file to read HMMs from */ HMMFILE *hmmfp; /* opened hmmfile for reading */ const char *seqfile; /* file to read target sequence from */ char **rseq; /* raw, unaligned sequences */ SQINFO *sqinfo; /* info associated with sequences */ char **dsq; /* digitized raw sequences */ int nseq; /* number of sequences */ char **aseq; /* aligned sequences */ AINFO ainfo; /* alignment information */ float *wgt; /* per-sequence weights */ int i; struct plan7_s *hmm; /* HMM to align to */ struct p7trace_s **tr; /* traces for aligned sequences */ int be_quiet; /* TRUE to suppress verbose banner */ int matchonly; /* TRUE to show only match state syms */ const char *outfile; /* optional alignment output file */ FILE *ofp; /* handle on alignment output file */ AjPFile ajwithali; /* name of additional alignment file to align */ AjPFile ajmapali; /* name of additional alignment file to map */ AjBool ajmatch=ajFalse; AjPFile outf=NULL; AjPStr outfname=NULL; AjPFile inf=NULL; AjPStr infname=NULL; AjPSeqset seqset=NULL; AjPStr ajseqfile=NULL; char* mapali=NULL; char* withali=NULL; #ifdef MEMDEBUG unsigned long histid1, histid2, orig_size, current_size; orig_size = malloc_inuse(&histid1); fprintf(stderr, "[... memory debugging is ON ...]\n"); #endif /*********************************************** * Parse command line ***********************************************/ matchonly = FALSE; outfile = NULL; be_quiet = FALSE; withali = NULL; mapali = NULL; embInitPV("ohmmalign",argc,argv,"HMMER",VERSION); ajmatch = ajAcdGetBoolean("matchonly"); if(ajmatch) matchonly=TRUE; else matchonly=FALSE; ajmapali = ajAcdGetInfile("mapalifile"); if (ajmapali) mapali = ajCharNewS(ajFileGetNameS(ajmapali)); ajFileClose(&ajmapali); ajwithali = ajAcdGetInfile("withalifile"); if (ajwithali) withali = ajCharNewS(ajFileGetNameS(ajwithali)); ajFileClose(&ajwithali); be_quiet=TRUE; outf = ajAcdGetOutfile("outfile"); outfname = ajStrNewC((char *)ajFileGetNameC(outf)); if(*ajStrGetPtr(outfname)>31) ajFileClose(&outf); outfile = ajStrGetPtr(outfname); inf = ajAcdGetInfile("hmmfile"); infname = ajStrNewC((char *)ajFileGetNameC(inf)); ajFileClose(&inf); hmmfile = ajStrGetPtr(infname); seqset = ajAcdGetSeqset("sequences"); ajseqfile = ajStrNewC(ajStrGetPtr(seqset->Filename)); seqfile = ajStrGetPtr(ajseqfile); /*********************************************** * Open HMM file (might be in HMMERDB or current directory). * Read a single HMM from it. * * Currently hmmalign disallows the J state and * only allows one domain per sequence. To preserve * the S/W entry information, the J state is explicitly * disallowed, rather than calling a Plan7*Config() function. * this is a workaround in 2.1 for the 2.0.x "yo!" bug. ***********************************************/ if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) ajFatal("Failed to open HMM file %s\n", hmmfile); if (!HMMFileRead(hmmfp, &hmm)) ajFatal("Failed to read any HMMs from %s\n", hmmfile); HMMFileClose(hmmfp); if (hmm == NULL) ajFatal("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); hmm->xt[XTE][MOVE] = 1.; /* only 1 domain/sequence ("global" alignment) */ hmm->xt[XTE][LOOP] = 0.; P7Logoddsify(hmm, TRUE); /* do we have the map we might need? */ if (mapali != NULL && ! (hmm->flags & PLAN7_MAP)) ajFatal("HMMER: HMM file %s has no map; you can't use --mapali.", hmmfile); /*********************************************** * Open sequence file in current directory. * Read all seqs from it. ***********************************************/ /* if (! SeqfileFormat(seqfile, &format, NULL)) switch (squid_errno) { case SQERR_NOFILE: ajFatal("Sequence file %s could not be opened for reading", seqfile); case SQERR_FORMAT: default: ajFatal("Failed to determine format of sequence file %s", seqfile); } if (! ReadMultipleRseqs(seqfile, format, &rseq, &sqinfo, &nseq)) ajFatal("Failed to read any sequences from file %s", seqfile); */ emboss_rseqs(seqset,&rseq,&sqinfo,&nseq); /*********************************************** * Show the banner ***********************************************/ be_quiet=TRUE; if (! be_quiet) { /* Banner(stdout, banner); */ printf( "HMM file: %s\n", hmmfile); printf( "Sequence file: %s\n", seqfile); printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n"); } /*********************************************** * Do the work ***********************************************/ /* Allocations and initializations. */ dsq = MallocOrDie(sizeof(char *) * nseq); tr = MallocOrDie(sizeof(struct p7trace_s *) * nseq); /* Align each sequence to the model, collect traces */ for (i = 0; i < nseq; i++) { dsq[i] = DigitizeSequence(rseq[i], sqinfo[i].len); if (P7ViterbiSize(sqinfo[i].len, hmm->M) <= RAMLIMIT) (void) P7Viterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i])); else (void) P7SmallViterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i])); } /* Include an aligned alignment, if desired. */ if (mapali != NULL) include_alignment(mapali, hmm, TRUE, &rseq, &dsq, &sqinfo, &tr, &nseq); if (withali != NULL) include_alignment(withali, hmm, FALSE, &rseq, &dsq, &sqinfo, &tr, &nseq); /* Turn traces into a multiple alignment */ wgt = MallocOrDie(sizeof(float) * nseq); FSet(wgt, nseq, 1.0); P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, matchonly, &aseq, &ainfo); /*********************************************** * Output the alignment ***********************************************/ if (outfile != NULL && (ofp = fopen(outfile, "w")) != NULL) { WriteSELEX(ofp, aseq, &ainfo, 50); printf("Alignment saved in file %s\n", outfile); fclose(ofp); } else WriteSELEX(stdout, aseq, &ainfo, 50); /*********************************************** * Cleanup and exit ***********************************************/ for (i = 0; i < nseq; i++) { P7FreeTrace(tr[i]); FreeSequence(rseq[i], &(sqinfo[i])); free(dsq[i]); } FreeAlignment(aseq, &ainfo); FreePlan7(hmm); free(sqinfo); free(rseq); free(dsq); free(wgt); free(tr); SqdClean(); ajStrDel(&outfname); ajStrDel(&infname); ajStrDel(&ajseqfile); #ifdef MEMDEBUG current_size = malloc_inuse(&histid2); if (current_size != orig_size) malloc_list(2, histid1, histid2); else fprintf(stderr, "[No memory leaks.]\n"); #endif ajSeqsetDel(&seqset); ajFileClose(&ajwithali); ajFileClose(&ajmapali); embExit(); return 0; }