static void main_loop_serial(struct plan7_s *hmm, const char* seq, int seqLen, struct threshold_s *thresh, int do_forward, int do_null2, int do_xnu, struct histogram_s *histogram, struct tophit_s *ghit, struct tophit_s *dhit, int *ret_nseq, TaskStateInfo& ti) { //get HMMERTaskLocalData HMMERTaskLocalData *tld = getHMMERTaskLocalData(); alphabet_s *al = &tld->al; struct dpmatrix_s *mx; // DP matrix, growable struct p7trace_s *tr; // traceback unsigned char *dsq; // digitized target sequence float sc; // score of an HMM search double pvalue; // pvalue of an HMM score double evalue; // evalue of an HMM score // Create a DP matrix; initially only two rows big, but growable; // we overalloc by 25 rows (L dimension) when we grow; not growable // in model dimension, since we know the hmm size mx = CreatePlan7Matrix(1, hmm->M, 25, 0); assert(seqLen > 0); dsq = DigitizeSequence(seq, seqLen); if (do_xnu && al->Alphabet_type == hmmAMINO) { XNU(dsq, seqLen); } // 1. Recover a trace by Viterbi. // In extreme cases, the alignment may be literally impossible; // in which case, the score comes out ridiculously small (but not // necessarily <= -INFTY, because we're not terribly careful // about underflow issues), and tr will be returned as NULL. if (P7ViterbiSpaceOK(seqLen, hmm->M, mx)) { sc = P7Viterbi(dsq, seqLen, hmm, mx, &tr); } else { sc = P7SmallViterbi(dsq, seqLen, hmm, mx, &tr, ti.progress); } // 2. If we're using Forward scores, calculate the // whole sequence score; this overrides anything // PostprocessSignificantHit() is going to do to the per-seq score. if (do_forward) { sc = P7Forward(dsq, seqLen, hmm, NULL); if (do_null2) sc -= TraceScoreCorrection(hmm, tr, dsq); } // 2. Store score/pvalue for global alignment; will sort on score, // which in hmmsearch is monotonic with E-value. // Keep all domains in a significant sequence hit. // We can only make a lower bound estimate of E-value since // we don't know the final value of nseq yet, so the list // of hits we keep in memory is >= the list we actually // output. // pvalue = PValue(hmm, sc); evalue = thresh->Z ? (double) thresh->Z * pvalue : (double) pvalue; if (sc >= thresh->globT && evalue <= thresh->globE) { sc = PostprocessSignificantHit(ghit, dhit, tr, hmm, dsq, seqLen, (char *)"sequence", //todo: sqinfo.name, NULL, NULL, do_forward, sc, do_null2, thresh, FALSE); // FALSE-> not hmmpfam mode, hmmsearch mode } AddToHistogram(histogram, sc); P7FreeTrace(tr); free(dsq); FreePlan7Matrix(mx); return; }
/* Function: EmitSequence() * Date: SRE, Sun Mar 8 12:28:03 1998 [St. Louis] * * Purpose: Given a model, sample a sequence and/or traceback. * * Args: hmm - the model * ret_dsq - RETURN: generated digitized sequence (pass NULL if unwanted) * ret_L - RETURN: length of generated sequence * ret_tr - RETURN: generated trace (pass NULL if unwanted) * * Returns: void */ void EmitSequence(struct plan7_s *hmm, char **ret_dsq, int *ret_L, struct p7trace_s **ret_tr) { struct p7trace_s *tr; enum p7stype type; /* current state type */ int k; /* current node index */ char *dsq; /* generated sequence, digitized */ int L; /* length of sequence */ int alloc_tlen; /* allocated space for traceback */ int alloc_L; /* allocated space for sequence */ int tpos; /* position in traceback */ int sym; /* a generated symbol index */ float t[4]; /* little array for choosing M transition from */ /* Initialize; allocations */ P7AllocTrace(64, &tr); alloc_tlen = 64; dsq = MallocOrDie(sizeof(char) * 64); alloc_L = 64; TraceSet(tr, 0, STS, 0, 0); TraceSet(tr, 1, STN, 0, 0); dsq[0] = (char) Alphabet_iupac; L = 1; k = 0; type = STN; tpos = 2; while (type != STT) { /* Deal with state transition */ switch (type) { case STB: type = STM; k = FChoose(hmm->begin+1, hmm->M) + 1; break; case STI: type = (FChoose(hmm->t[k]+TIM, 2) == 0) ? STM : STI; if (type == STM) k++; break; case STN: type = (FChoose(hmm->xt[XTN], 2) == LOOP) ? STN : STB; k = 0; break; case STE: type = (FChoose(hmm->xt[XTE], 2) == LOOP) ? STJ : STC; k = 0; break; case STC: type = (FChoose(hmm->xt[XTC], 2) == LOOP) ? STC : STT; k = 0; break; case STJ: type = (FChoose(hmm->xt[XTJ], 2) == LOOP) ? STJ : STB; k = 0; break; case STD: if (k < hmm->M) { type = (FChoose(hmm->t[k]+TDM, 2) == 0) ? STM : STD; k++; } else { type = STE; k = 0; } break; case STM: if (k < hmm->M) { FCopy(t, hmm->t[k], 3); t[3] = hmm->end[k]; switch (FChoose(t,4)) { case 0: k++; type = STM; break; case 1: type = STI; break; case 2: k++; type = STD; break; case 3: k=0; type = STE; break; default: Die("never happens"); } } else { k = 0; type = STE; } break; case STT: case STBOGUS: default: Die("can't happen."); } /* Choose a symbol emission, if necessary */ sym = -1; if (type == STM) sym = FChoose(hmm->mat[k], Alphabet_size); else if (type == STI) sym = FChoose(hmm->ins[k], Alphabet_size); else if ((type == STN && tr->statetype[tpos-1] == STN) || (type == STC && tr->statetype[tpos-1] == STC) || (type == STJ && tr->statetype[tpos-1] == STJ)) sym = FChoose(hmm->null, Alphabet_size); /* Add to the traceback; deal with realloc if necessary */ TraceSet(tr, tpos, type, k, (sym != -1) ? L : 0); tpos++; if (tpos == alloc_tlen) { alloc_tlen += 64; P7ReallocTrace(tr, alloc_tlen); } /* Add to the digitized seq; deal with realloc, if necessary */ if (sym != -1) { dsq[L] = (char) sym; L++; if (L+1 == alloc_L) { /* L+1 leaves room for sentinel byte + \0 */ alloc_L += 64; dsq = ReallocOrDie(dsq, sizeof(char) * alloc_L); } } } /* Finish off the trace */ tr->tlen = tpos; /* Finish off the dsq with sentinel byte and null terminator. * Emitted Sequence length is L-1. */ dsq[L] = (char) Alphabet_iupac; dsq[L+1] = '\0'; L--; /* Return */ if (ret_dsq != NULL) *ret_dsq = dsq; else free(dsq); if (ret_L != NULL) *ret_L = L; if (ret_tr != NULL) *ret_tr = tr; else P7FreeTrace(tr); return; }
int main(void) { struct p7trace_s *tr; /* traceback of an alignment */ int master_tid; /* PVM TID of our master */ char *hmmfile; /* file to read HMM(s) from */ HMMFILE *hmmfp; /* opened hmmfile for reading */ struct plan7_s *hmm; char *seq; char *dsq; int len; int nhmm; /* number of HMM to work on */ float sc; int my_idx = -1; /* my index, 0..nslaves-1 */ float globT; /* T parameter: keep only hits > globT bits */ double globE; /* E parameter: keep hits < globE E-value */ double pvalue; /* Z*pvalue = Evalue */ int Z; /* nseq to base E value calculation on */ int send_trace; /* TRUE if score is significant */ int do_xnu; /* TRUE to do XNU filter on seq */ int do_forward; /* TRUE to use Forward() scores not Viterbi */ int do_null2; /* TRUE to correct scores w/ ad hoc null2 */ int alphatype; /* alphabet type, hmmAMINO or hmmNUCLEIC */ int code; /* return code after initialization */ /* Register leave_pvm() cleanup function so any exit() call * first calls pvm_exit(). */ if (atexit(leave_pvm) != 0) { pvm_exit(); Die("slave couldn't register leave_pvm()"); } /***************************************************************** * initialization. * Master broadcasts to us: * 1) len of HMM file name (int) * 2) name of HMM file (string) * 3) length of sequence string (int) * 4) sequence (string) * 5) globT threshold * 6) globE threshold * 7) Z * 8) do_xnu flag * 9) do_forward flag * 10) do_null2 flag * 11) alphabet type * We receive the broadcast and open the files. ******************************************************************/ master_tid = pvm_parent(); /* who's our master? */ pvm_recv(master_tid, HMMPVM_INIT); pvm_upkint(&len, 1, 1); hmmfile = MallocOrDie(sizeof(char *) * (len+1)); pvm_upkstr(hmmfile); pvm_upkint(&len, 1, 1); seq = MallocOrDie(sizeof(char *) * (len+1)); pvm_upkstr(seq); pvm_upkfloat(&globT, 1, 1); pvm_upkdouble(&globE, 1, 1); pvm_upkint(&Z, 1, 1); pvm_upkint(&do_xnu, 1, 1); pvm_upkint(&do_forward, 1, 1); pvm_upkint(&do_null2, 1, 1); pvm_upkint(&alphatype, 1, 1); SetAlphabet(alphatype); /* Open HMM file (maybe in HMMERDB) */ code = HMMPVM_OK; if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) code = HMMPVM_NO_HMMFILE; else if (hmmfp->gsi == NULL) code = HMMPVM_NO_INDEX; /* report our status. */ pvm_initsend(PvmDataDefault); pvm_pkint(&code, 1, 1); pvm_send(master_tid, HMMPVM_RESULTS); dsq = DigitizeSequence(seq, len); if (do_xnu) XNU(dsq, len); /***************************************************************** * Main loop. * Receive an integer 0..nhmm-1 for which HMM to search against. * If we receive a -1, we shut down. *****************************************************************/ for (;;) { pvm_recv(master_tid, HMMPVM_WORK); pvm_upkint(&nhmm, 1, 1); if (my_idx < 0) my_idx = nhmm; /* first time thru, remember what index we are. */ if (nhmm == -1) break; /* shutdown signal */ /* move to our assigned HMM in the HMM file, and read it */ HMMFilePositionByIndex(hmmfp, nhmm); if (! HMMFileRead(hmmfp, &hmm)) Die("unexpected end of HMM file"); if (hmm == NULL) Die("unexpected failure to parse HMM file"); P7Logoddsify(hmm, TRUE); /* Score sequence, do alignment (Viterbi), recover trace */ if (P7ViterbiSize(len, hmm->M) <= RAMLIMIT) { SQD_DPRINTF1(("P7Viterbi(): Estimated size %d Mb\n", P7ViterbiSize(len, hmm->M))); sc = P7Viterbi(dsq, len, hmm, &tr); } else { SQD_DPRINTF1(("P7SmallViterbi() called; %d Mb > %d\n", P7ViterbiSize(len, hmm->M), RAMLIMIT)); sc = P7SmallViterbi(dsq, len, hmm, &tr); } if (do_forward) sc = P7Forward(dsq, len, hmm, NULL); if (do_null2) sc -= TraceScoreCorrection(hmm, tr, dsq); pvalue = PValue(hmm, sc); send_trace = (sc > globT && pvalue * (float) Z < globE) ? 1 : 0; /* return output */ pvm_initsend(PvmDataDefault); pvm_pkint(&my_idx, 1, 1); /* tell master who we are */ pvm_pkstr(hmm->name); /* double check that we did the right thing */ pvm_pkfloat(&sc, 1, 1); pvm_pkdouble(&pvalue, 1, 1); pvm_pkint(&send_trace, 1, 1); /* flag for whether a trace structure is coming */ if (send_trace) PVMPackTrace(tr); pvm_send(master_tid, HMMPVM_RESULTS); /* cleanup */ FreePlan7(hmm); P7FreeTrace(tr); } /*********************************************** * Cleanup, return. ***********************************************/ HMMFileClose(hmmfp); free(seq); free(dsq); free(hmmfile); return 0; }
int main(int argc, char **argv) { const char *hmmfile; /* file to read HMMs from */ FILE *fp; /* output file handle */ HMMFILE *hmmfp; /* opened hmmfile for reading */ struct plan7_s *hmm; /* HMM to generate from */ int L; /* length of a sequence */ int i; /* counter over sequences */ char *ofile; /* output sequence file */ int nseq; /* number of seqs to sample */ int seed; /* random number generator seed */ int be_quiet; /* TRUE to silence header/footer */ int do_alignment; /* TRUE to output in aligned format */ int do_consensus; /* TRUE to do a single consensus seq */ AjBool ajselex; AjBool ajcons; AjPFile inf=NULL; AjPFile outf=NULL; AjPStr instr=NULL; AjPStr outstr=NULL; #ifdef MEMDEBUG unsigned long histid1, histid2, orig_size, current_size; orig_size = malloc_inuse(&histid1); fprintf(stderr, "[... memory debugging is ON ...]\n"); #endif /*********************************************** * Parse command line ***********************************************/ nseq = 10; be_quiet = FALSE; do_alignment = FALSE; do_consensus = FALSE; ofile = NULL; embInitPV("ohmmemit",argc,argv,"HMMER",VERSION); ajselex = ajAcdGetBoolean("selex"); ajcons = ajAcdGetBoolean("consensus"); nseq = ajAcdGetInt("number"); seed = ajAcdGetInt("seed"); inf = ajAcdGetInfile("infile"); outf = ajAcdGetOutfile("outfile"); if(!seed) seed = time ((time_t *) NULL); if(ajselex) do_alignment=TRUE; else do_alignment=FALSE; if(ajcons) do_consensus=TRUE; else do_consensus=FALSE; instr = ajStrNewC((char *)ajFileGetNameC(inf)); outstr = ajStrNewC((char *)ajFileGetNameC(outf)); hmmfile = ajStrGetPtr(instr); sre_srandom(seed); if (do_alignment && do_consensus) ajFatal("Sorry, -selex and -consensus are incompatible.\n"); if (nseq != 10 && do_consensus) ajWarn("-consensus overrides -number (# of sampled seqs)"); /*********************************************** * Open HMM file (might be in HMMERDB or current directory). * Read a single HMM from it. ***********************************************/ if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) ajFatal("Failed to open HMM file %s\n", hmmfile); if (!HMMFileRead(hmmfp, &hmm)) ajFatal("Failed to read any HMMs from %s\n", hmmfile); HMMFileClose(hmmfp); if (hmm == NULL) ajFatal("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); /* Configure the HMM to shut off N,J,C emission: so we * do a simple single pass through the model. */ Plan7NakedConfig(hmm); Plan7Renormalize(hmm); /*********************************************** * Open the output file, or stdout ***********************************************/ fp = ajFileGetFileptr(outf); /*********************************************** * Show the options banner ***********************************************/ be_quiet=TRUE; if (! be_quiet) { printf("HMM file: %s\n", hmmfile); if (! do_consensus) { printf("Number of seqs: %d\n", nseq); printf("Random seed: %d\n", seed); } printf("- - - - - - - - - - - - - - - - - - - - - - - - - " "- - - - - - -\n\n"); } /*********************************************** * Do the work. * If we're generating an alignment, we have to collect * all our traces, then output. If we're generating unaligned * sequences, we can emit one at a time. ***********************************************/ if (do_consensus) { char *seq; SQINFO sqinfo; /* info about sequence (name/desc) */ EmitConsensusSequence(hmm, &seq, NULL, &L, NULL); strcpy(sqinfo.name, "consensus"); sqinfo.len = L; sqinfo.flags = SQINFO_NAME | SQINFO_LEN; WriteSeq(fp, kPearson, seq, &sqinfo); free(seq); } else if (do_alignment) { struct p7trace_s **tr; char **dsq; SQINFO *sqinfo; char **aseq; AINFO ainfo; float *wgt; dsq = MallocOrDie(sizeof(char *) * nseq); tr = MallocOrDie(sizeof(struct p7trace_s *) * nseq); sqinfo = MallocOrDie(sizeof(SQINFO) * nseq); wgt = MallocOrDie(sizeof(float) * nseq); FSet(wgt, nseq, 1.0); for (i = 0; i < nseq; i++) { EmitSequence(hmm, &(dsq[i]), &L, &(tr[i])); sprintf(sqinfo[i].name, "seq%d", i+1); sqinfo[i].len = L; sqinfo[i].flags = SQINFO_NAME | SQINFO_LEN; } P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, FALSE, &aseq, &ainfo); /* Output the alignment */ WriteSELEX(fp, aseq, &ainfo, 50); if (ofile != NULL && !be_quiet) printf("Alignment saved in file %s\n", ofile); /* Free memory */ for (i = 0; i < nseq; i++) { P7FreeTrace(tr[i]); free(dsq[i]); } FreeAlignment(aseq, &ainfo); free(sqinfo); free(dsq); free(wgt); free(tr); } else /* unaligned sequence output */ { struct p7trace_s *tr; char *dsq; char *seq; SQINFO sqinfo; for (i = 0; i < nseq; i++) { EmitSequence(hmm, &dsq, &L, &tr); sprintf(sqinfo.name, "seq%d", i+1); sqinfo.len = L; sqinfo.flags = SQINFO_NAME | SQINFO_LEN; seq = DedigitizeSequence(dsq, L); WriteSeq(fp, kPearson, seq, &sqinfo); P7FreeTrace(tr); free(dsq); free(seq); } } ajFileClose(&outf); FreePlan7(hmm); SqdClean(); #ifdef MEMDEBUG current_size = malloc_inuse(&histid2); if (current_size != orig_size) malloc_list(2, histid1, histid2); else fprintf(stderr, "[No memory leaks.]\n"); #endif ajStrDel(&instr); ajStrDel(&outstr); ajFileClose(&inf); ajFileClose(&outf); embExit(); return 0; }
/* Function: EmitConsensusSequence() * Date: SRE, Wed Nov 11 11:08:59 1998 [St. Louis] * * Purpose: Generate a "consensus sequence". For the purposes * of a profile HMM, this is defined as: * - for each node: * - if StateOccupancy() says that M is used * with probability >= 0.5, this M is "consensus". * Then, choose maximally likely residue. * if P>0.5 (protein) or P>0.9 (DNA), make * it upper case; else make it lower case. * - if StateOccupancy() says that I * is used with P >= 0.5, this I is "consensus"; * use it 1/(1-TII) times (its expectation value). * Generate an "x" from each I. * * The function expects that the model is config'ed * by Plan7NakedConfig(): that is, for a single global pass * with no N,C,J involvement. * * * Args: hmm - the model * ret_seq - RETURN: consensus sequence (pass NULL if unwanted) * ret_dsq - RETURN: digitized consensus sequence (pass NULL if unwanted) * ret_L - RETURN: length of generated sequence * ret_tr - RETURN: generated trace (pass NULL if unwanted) * * Returns: void */ void EmitConsensusSequence(struct plan7_s *hmm, char **ret_seq, char **ret_dsq, int *ret_L, struct p7trace_s **ret_tr) { struct p7trace_s *tr; /* RETURN: traceback */ char *dsq, *seq; /* sequence in digitized and undigitized form */ float *mp, *ip, *dp; /* state occupancies from StateOccupancy() */ int nmat, ndel, nins; /* number of matches, deletes, inserts used */ int k; /* counter for nodes */ int tpos; /* position in trace */ int i; /* position in seq (equiv pos in dsq is i+1 */ int x; /* symbol choice (M) or # symbols (I) */ float mthresh; /* >= this, show symbol as upper case */ if (Alphabet_type == hmmAMINO) mthresh = 0.5; else mthresh = 0.9; StateOccupancy(hmm, &mp, &ip, &dp); /* First pass: how many states do we need in the trace? * how long will the sequence be? */ nmat = ndel = nins = 0; for (k = 1; k <= hmm->M; k++) { if (mp[k] >= 0.5) nmat++; else ndel++; if (k < hmm->M && ip[k] >= 0.5) nins += (int) (1.f / (1.f - hmm->t[k][TII])); } /* Allocations */ P7AllocTrace(6 + nmat + ndel + nins, &tr); dsq = MallocOrDie(sizeof(char) * (nmat+nins+3)); seq = MallocOrDie(sizeof(char) * (nmat+nins+1)); /* Main pass. * Construct consensus trace, seq, and dsq. */ TraceSet(tr, 0, STS, 0, 0); TraceSet(tr, 1, STN, 0, 0); TraceSet(tr, 2, STB, 0, 0); dsq[0] = Alphabet_iupac; /* guard byte */ tpos = 3; i = 0; for (k = 1; k <= hmm->M; k++) { if (mp[k] >= 0.5) { x = FMax(hmm->mat[k], Alphabet_size); TraceSet(tr, tpos, STM, k, i+1); seq[i] = Alphabet[x]; dsq[i+1] = x; if (hmm->mat[k][x] < mthresh) seq[i] = tolower((int) seq[i]); i++; tpos++; } else { TraceSet(tr, tpos, STD, k, 0); tpos++; } if (k < hmm->M && ip[k] >= 0.5) { x = (int) (1.f / (1.f - hmm->t[k][TII])); while (x--) { TraceSet(tr, tpos, STI, k, i+1); seq[i] = 'x'; dsq[i+1] = Alphabet_iupac - 1; i++; tpos++; } } } TraceSet(tr, tpos, STE, 0, 0); tpos++; TraceSet(tr, tpos, STC, 0, 0); tpos++; TraceSet(tr, tpos, STT, 0, 0); tpos++; dsq[i+1] = Alphabet_iupac; free(mp); free(ip); free(dp); if (ret_seq != NULL) *ret_seq = seq; else free(seq); if (ret_dsq != NULL) *ret_dsq = dsq; else free(dsq); if (ret_L != NULL) *ret_L = i; if (ret_tr != NULL) *ret_tr = tr; else P7FreeTrace(tr); }
int main(int argc, char **argv) { const char *hmmfile; /* file to read HMMs from */ HMMFILE *hmmfp; /* opened hmmfile for reading */ const char *seqfile; /* file to read target sequence from */ char **rseq; /* raw, unaligned sequences */ SQINFO *sqinfo; /* info associated with sequences */ char **dsq; /* digitized raw sequences */ int nseq; /* number of sequences */ char **aseq; /* aligned sequences */ AINFO ainfo; /* alignment information */ float *wgt; /* per-sequence weights */ int i; struct plan7_s *hmm; /* HMM to align to */ struct p7trace_s **tr; /* traces for aligned sequences */ int be_quiet; /* TRUE to suppress verbose banner */ int matchonly; /* TRUE to show only match state syms */ const char *outfile; /* optional alignment output file */ FILE *ofp; /* handle on alignment output file */ AjPFile ajwithali; /* name of additional alignment file to align */ AjPFile ajmapali; /* name of additional alignment file to map */ AjBool ajmatch=ajFalse; AjPFile outf=NULL; AjPStr outfname=NULL; AjPFile inf=NULL; AjPStr infname=NULL; AjPSeqset seqset=NULL; AjPStr ajseqfile=NULL; char* mapali=NULL; char* withali=NULL; #ifdef MEMDEBUG unsigned long histid1, histid2, orig_size, current_size; orig_size = malloc_inuse(&histid1); fprintf(stderr, "[... memory debugging is ON ...]\n"); #endif /*********************************************** * Parse command line ***********************************************/ matchonly = FALSE; outfile = NULL; be_quiet = FALSE; withali = NULL; mapali = NULL; embInitPV("ohmmalign",argc,argv,"HMMER",VERSION); ajmatch = ajAcdGetBoolean("matchonly"); if(ajmatch) matchonly=TRUE; else matchonly=FALSE; ajmapali = ajAcdGetInfile("mapalifile"); if (ajmapali) mapali = ajCharNewS(ajFileGetNameS(ajmapali)); ajFileClose(&ajmapali); ajwithali = ajAcdGetInfile("withalifile"); if (ajwithali) withali = ajCharNewS(ajFileGetNameS(ajwithali)); ajFileClose(&ajwithali); be_quiet=TRUE; outf = ajAcdGetOutfile("outfile"); outfname = ajStrNewC((char *)ajFileGetNameC(outf)); if(*ajStrGetPtr(outfname)>31) ajFileClose(&outf); outfile = ajStrGetPtr(outfname); inf = ajAcdGetInfile("hmmfile"); infname = ajStrNewC((char *)ajFileGetNameC(inf)); ajFileClose(&inf); hmmfile = ajStrGetPtr(infname); seqset = ajAcdGetSeqset("sequences"); ajseqfile = ajStrNewC(ajStrGetPtr(seqset->Filename)); seqfile = ajStrGetPtr(ajseqfile); /*********************************************** * Open HMM file (might be in HMMERDB or current directory). * Read a single HMM from it. * * Currently hmmalign disallows the J state and * only allows one domain per sequence. To preserve * the S/W entry information, the J state is explicitly * disallowed, rather than calling a Plan7*Config() function. * this is a workaround in 2.1 for the 2.0.x "yo!" bug. ***********************************************/ if ((hmmfp = HMMFileOpen(hmmfile, "HMMERDB")) == NULL) ajFatal("Failed to open HMM file %s\n", hmmfile); if (!HMMFileRead(hmmfp, &hmm)) ajFatal("Failed to read any HMMs from %s\n", hmmfile); HMMFileClose(hmmfp); if (hmm == NULL) ajFatal("HMM file %s corrupt or in incorrect format? Parse failed", hmmfile); hmm->xt[XTE][MOVE] = 1.; /* only 1 domain/sequence ("global" alignment) */ hmm->xt[XTE][LOOP] = 0.; P7Logoddsify(hmm, TRUE); /* do we have the map we might need? */ if (mapali != NULL && ! (hmm->flags & PLAN7_MAP)) ajFatal("HMMER: HMM file %s has no map; you can't use --mapali.", hmmfile); /*********************************************** * Open sequence file in current directory. * Read all seqs from it. ***********************************************/ /* if (! SeqfileFormat(seqfile, &format, NULL)) switch (squid_errno) { case SQERR_NOFILE: ajFatal("Sequence file %s could not be opened for reading", seqfile); case SQERR_FORMAT: default: ajFatal("Failed to determine format of sequence file %s", seqfile); } if (! ReadMultipleRseqs(seqfile, format, &rseq, &sqinfo, &nseq)) ajFatal("Failed to read any sequences from file %s", seqfile); */ emboss_rseqs(seqset,&rseq,&sqinfo,&nseq); /*********************************************** * Show the banner ***********************************************/ be_quiet=TRUE; if (! be_quiet) { /* Banner(stdout, banner); */ printf( "HMM file: %s\n", hmmfile); printf( "Sequence file: %s\n", seqfile); printf("- - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -\n\n"); } /*********************************************** * Do the work ***********************************************/ /* Allocations and initializations. */ dsq = MallocOrDie(sizeof(char *) * nseq); tr = MallocOrDie(sizeof(struct p7trace_s *) * nseq); /* Align each sequence to the model, collect traces */ for (i = 0; i < nseq; i++) { dsq[i] = DigitizeSequence(rseq[i], sqinfo[i].len); if (P7ViterbiSize(sqinfo[i].len, hmm->M) <= RAMLIMIT) (void) P7Viterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i])); else (void) P7SmallViterbi(dsq[i], sqinfo[i].len, hmm, &(tr[i])); } /* Include an aligned alignment, if desired. */ if (mapali != NULL) include_alignment(mapali, hmm, TRUE, &rseq, &dsq, &sqinfo, &tr, &nseq); if (withali != NULL) include_alignment(withali, hmm, FALSE, &rseq, &dsq, &sqinfo, &tr, &nseq); /* Turn traces into a multiple alignment */ wgt = MallocOrDie(sizeof(float) * nseq); FSet(wgt, nseq, 1.0); P7Traces2Alignment(dsq, sqinfo, wgt, nseq, hmm->M, tr, matchonly, &aseq, &ainfo); /*********************************************** * Output the alignment ***********************************************/ if (outfile != NULL && (ofp = fopen(outfile, "w")) != NULL) { WriteSELEX(ofp, aseq, &ainfo, 50); printf("Alignment saved in file %s\n", outfile); fclose(ofp); } else WriteSELEX(stdout, aseq, &ainfo, 50); /*********************************************** * Cleanup and exit ***********************************************/ for (i = 0; i < nseq; i++) { P7FreeTrace(tr[i]); FreeSequence(rseq[i], &(sqinfo[i])); free(dsq[i]); } FreeAlignment(aseq, &ainfo); FreePlan7(hmm); free(sqinfo); free(rseq); free(dsq); free(wgt); free(tr); SqdClean(); ajStrDel(&outfname); ajStrDel(&infname); ajStrDel(&ajseqfile); #ifdef MEMDEBUG current_size = malloc_inuse(&histid2); if (current_size != orig_size) malloc_list(2, histid1, histid2); else fprintf(stderr, "[No memory leaks.]\n"); #endif ajSeqsetDel(&seqset); ajFileClose(&ajwithali); ajFileClose(&ajmapali); embExit(); return 0; }
/* Function: include_alignment() * Date: SRE, Sun Jul 5 15:25:13 1998 [St. Louis] * * Purpose: Given the name of a multiple alignment file, * align that alignment to the HMM, and add traces * to an existing array of traces. If do_mapped * is TRUE, we use the HMM's map file. If not, * we use P7ViterbiAlignAlignment(). * * Args: seqfile - name of alignment file * hmm - model to align to * do_mapped- TRUE if we're to use the HMM's alignment map * rsq - RETURN: array of rseqs to add to * dsq - RETURN: array of dsq to add to * sqinfo - RETURN: array of SQINFO to add to * tr - RETURN: array of traces to add to * nseq - RETURN: number of seqs * * Returns: new, realloc'ed arrays for rsq, dsq, sqinfo, tr; nseq is * increased to nseq+ainfo.nseq. */ void include_alignment(char *seqfile, struct plan7_s *hmm, int do_mapped, char ***rsq, char ***dsq, SQINFO **sqinfo, struct p7trace_s ***tr, int *nseq) { int format; /* format of alignment file */ char **aseq; /* aligned seqs */ char **newdsq; char **newrseq; AINFO ainfo; /* info that goes with aseq */ int idx; /* counter over aseqs */ struct p7trace_s *master; /* master trace */ struct p7trace_s **addtr; /* individual traces for aseq */ if (! SeqfileFormat(seqfile, &format, NULL)) switch (squid_errno) { case SQERR_NOFILE: ajFatal("Alignment file %s could not be opened for reading", seqfile); /*FALLTHRU*/ /* a white lie to shut lint up */ case SQERR_FORMAT: default: ajFatal("Failed to determine format of alignment file %s", seqfile); } /* read the alignment from file */ if (! ReadAlignment(seqfile, format, &aseq, &ainfo)) ajFatal("Failed to read aligned sequence file %s", seqfile); for (idx = 0; idx < ainfo.nseq; idx++) s2upper(aseq[idx]); /* Verify checksums before mapping */ if (do_mapped && GCGMultchecksum(aseq, ainfo.nseq) != hmm->checksum) ajFatal("The checksums for alignment file %s and the HMM alignment map don't match.", seqfile); /* Get a master trace */ if (do_mapped) master = MasterTraceFromMap(hmm->map, hmm->M, ainfo.alen); else master = P7ViterbiAlignAlignment(aseq, &ainfo, hmm); /* convert to individual traces */ ImposeMasterTrace(aseq, ainfo.nseq, master, &addtr); /* add those traces to existing ones */ *tr = MergeTraceArrays(*tr, *nseq, addtr, ainfo.nseq); /* additional bookkeeping: add to dsq, sqinfo */ *rsq = ReallocOrDie((*rsq), sizeof(char *) * (*nseq + ainfo.nseq)); DealignAseqs(aseq, ainfo.nseq, &newrseq); for (idx = *nseq; idx < *nseq + ainfo.nseq; idx++) (*rsq)[idx] = newrseq[idx - (*nseq)]; free(newrseq); *dsq = ReallocOrDie((*dsq), sizeof(char *) * (*nseq + ainfo.nseq)); DigitizeAlignment(aseq, &ainfo, &newdsq); for (idx = *nseq; idx < *nseq + ainfo.nseq; idx++) (*dsq)[idx] = newdsq[idx - (*nseq)]; free(newdsq); /* unnecessarily complex, but I can't be bothered... */ *sqinfo = ReallocOrDie((*sqinfo), sizeof(SQINFO) * (*nseq + ainfo.nseq)); for (idx = *nseq; idx < *nseq + ainfo.nseq; idx++) SeqinfoCopy(&((*sqinfo)[idx]), &(ainfo.sqinfo[idx - (*nseq)])); *nseq = *nseq + ainfo.nseq; /* Cleanup */ P7FreeTrace(master); FreeAlignment(aseq, &ainfo); /* Return */ return; }