int main(int argc, char **argv) { AjPAlign align; AjPSeq a; AjPSeq b; AjPSeqout seqout; AjPStr m; AjPStr n; AjPStr merged = NULL; ajuint lena; ajuint lenb; const char *p; const char *q; ajint start1 = 0; ajint start2 = 0; float *path; ajint *compass; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; float gapopen; float gapextend; ajulong maxarr = 1000; ajulong len; /* arbitrary. realloc'd if needed */ size_t stlen; float score; ajint begina; ajint beginb; embInit("merger", argc, argv); a = ajAcdGetSeq("asequence"); b = ajAcdGetSeq("bsequence"); seqout = ajAcdGetSeqout("outseq"); matrix = ajAcdGetMatrixf("datafile"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); align = ajAcdGetAlign("outfile"); gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); AJCNEW(path, maxarr); AJCNEW(compass, maxarr); /* ** make the two sequences lowercase so we can show which one we are ** using in the merge by uppercasing it */ ajSeqFmtLower(a); ajSeqFmtLower(b); m = ajStrNew(); n = ajStrNew(); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); begina = ajSeqGetBegin(a); beginb = ajSeqGetBegin(b); lena = ajSeqGetLen(a); lenb = ajSeqGetLen(b); if(lenb > (ULONG_MAX/(ajulong)(lena+1))) ajFatal("Sequences too big. Try 'supermatcher'"); len = lena*lenb; if(len>maxarr) { ajDebug("merger: resize path, len to %d (%d * $d)\n", len, lena, lenb); stlen = (size_t) len; AJCRESIZE(path,stlen); AJCRESIZE(compass,stlen); maxarr=len; } p = ajSeqGetSeqC(a); q = ajSeqGetSeqC(b); ajStrAssignC(&m,""); ajStrAssignC(&n,""); score = embAlignPathCalc(p,q,lena,lenb,gapopen,gapextend,path,sub,cvt, compass, ajFalse); /*score = embAlignScoreNWMatrix(path,compass,gapopen,gapextend, a,b,lena,lenb,sub,cvt, &start1,&start2);*/ embAlignWalkNWMatrix(path,a,b,&m,&n,lena,lenb, &start1,&start2,gapopen, gapextend,compass); /* ** now construct the merged sequence, uppercase the bits of the two ** input sequences which are used in the merger */ merger_Merge(align, &merged,p,q,m,n,start1,start2, ajSeqGetNameC(a),ajSeqGetNameC(b)); embAlignReportGlobal(align, a, b, m, n, start1, start2, gapopen, gapextend, score, matrix, begina, beginb); ajAlignWrite(align); ajAlignReset(align); /* write the merged sequence */ ajSeqAssignSeqS(a, merged); ajSeqoutWriteSeq(seqout, a); ajSeqoutClose(seqout); ajSeqoutDel(&seqout); ajSeqDel(&a); ajSeqDel(&b); ajAlignClose(align); ajAlignDel(&align); ajStrDel(&merged); AJFREE(compass); AJFREE(path); ajStrDel(&n); ajStrDel(&m); embExit(); return 0; }
int main(int argc, char **argv) { AjPAlign align; AjPSeqall seqall; AjPSeq a; AjPSeq b; AjPStr alga; AjPStr algb; AjPStr ss; ajuint lena; ajuint lenb; const char *p; const char *q; ajint start1 = 0; ajint start2 = 0; ajint *compass; float* ix; float* iy; float* m; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; float gapopen; float gapextend; float endgapopen; float endgapextend; size_t maxarr = 1000; /* arbitrary. realloc'd if needed */ size_t len; float score; AjBool dobrief = ajTrue; AjBool endweight = ajFalse; /* should end gap penalties be applied */ float id = 0.; float sim = 0.; float idx = 0.; float simx = 0.; AjPStr tmpstr = NULL; embInit("needle", argc, argv); matrix = ajAcdGetMatrixf("datafile"); a = ajAcdGetSeq("asequence"); ajSeqTrim(a); seqall = ajAcdGetSeqall("bsequence"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); endgapopen = ajAcdGetFloat("endopen"); endgapextend = ajAcdGetFloat("endextend"); dobrief = ajAcdGetBoolean("brief"); endweight = ajAcdGetBoolean("endweight"); align = ajAcdGetAlign("outfile"); gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); AJCNEW0(compass, maxarr); AJCNEW0(m, maxarr); AJCNEW0(ix, maxarr); AJCNEW0(iy, maxarr); alga = ajStrNew(); algb = ajStrNew(); ss = ajStrNew(); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); lena = ajSeqGetLen(a); while(ajSeqallNext(seqall,&b)) { ajSeqTrim(b); lenb = ajSeqGetLen(b); if(lenb > (LONG_MAX/(size_t)(lena+1))) ajDie("Sequences too big. Try 'stretcher'"); len = (size_t)lena*(size_t)lenb; if(len>maxarr) { AJCRESIZETRY0(compass,(size_t)maxarr,len); if(!compass) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY0(m,(size_t)maxarr,len); if(!m) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY0(ix,(size_t)maxarr,len); if(!ix) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY0(iy,(size_t)maxarr,len); if(!iy) ajDie("Sequences too big. Try 'stretcher'"); maxarr=len; } p = ajSeqGetSeqC(a); q = ajSeqGetSeqC(b); ajStrAssignC(&alga,""); ajStrAssignC(&algb,""); score = embAlignPathCalcWithEndGapPenalties(p, q, lena, lenb, gapopen, gapextend, endgapopen, endgapextend, &start1, &start2, sub, cvt, m, ix, iy, compass, ajFalse, endweight); embAlignWalkNWMatrixUsingCompass(p, q, &alga, &algb, lena, lenb, &start1, &start2, compass); embAlignReportGlobal(align, a, b, alga, algb, start1, start2, gapopen, gapextend, score, matrix, ajSeqGetOffset(a), ajSeqGetOffset(b)); if(!dobrief) { embAlignCalcSimilarity(alga,algb,sub,cvt,lena,lenb,&id,&sim,&idx, &simx); ajFmtPrintS(&tmpstr,"Longest_Identity = %5.2f%%\n", id); ajFmtPrintAppS(&tmpstr,"Longest_Similarity = %5.2f%%\n", sim); ajFmtPrintAppS(&tmpstr,"Shortest_Identity = %5.2f%%\n", idx); ajFmtPrintAppS(&tmpstr,"Shortest_Similarity = %5.2f%%", simx); ajAlignSetSubHeaderApp(align, tmpstr); } ajAlignWrite(align); ajAlignReset(align); } ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&seqall); ajSeqDel(&a); ajSeqDel(&b); AJFREE(compass); AJFREE(ix); AJFREE(iy); AJFREE(m); ajStrDel(&alga); ajStrDel(&algb); ajStrDel(&ss); ajStrDel(&tmpstr); embExit(); return 0; }
int main(int argc, char **argv) { /* ACD data item variables */ AjPFile hmmfile = NULL; AjPSeqset seqfile = NULL; AjPFile mapali = NULL; AjPFile withali = NULL; AjPAlign o = NULL; AjBool m = ajFalse; AjBool q = ajFalse; /* Housekeeping variables */ AjPStr cmd = NULL; AjPStr fmt = NULL; AjBool fmtok = ajFalse; AjPStr rnd = NULL; AjPSeqout rndo = NULL; /* ACD file processing */ embInitPV("ehmmalign",argc,argv,"HMMERNEW",VERSION); hmmfile = ajAcdGetInfile("hmmfile"); seqfile = ajAcdGetSeqset("seqfile"); mapali = ajAcdGetInfile("mapali"); withali = ajAcdGetInfile("withali"); o = ajAcdGetAlign("o"); m = ajAcdGetBoolean("m"); q = ajAcdGetBoolean("q"); /* MAIN APPLICATION CODE */ /* 1. Housekeeping */ cmd = ajStrNew(); fmt = ajStrNew(); rnd = ajStrNew(); /* 2. Re-write seqfile to a temporary file in a format (fasta) HMMER can understand. We cannot just pass the name of seqfile to HMMER as the name provided might be a USA which HMMER would not understand. */ ajFilenameSetTempname(&rnd); rndo = ajSeqoutNew(); if(!ajSeqoutOpenFilename(rndo, rnd)) ajFatal("Terminal ajSeqFileNewOut failure. Email EMBOSS helpdesk!\n"); ajSeqoutSetFormatC(rndo, "fasta"); ajSeqoutWriteSet(rndo, seqfile); ajSeqoutClose(rndo); ajSeqoutDel(&rndo); /* 3. Build hmmalign command line */ /* Command line is built in this order: i. Application name. ii. HMMER 'options' (in order they appear in ACD file) iii.HMMER 'options' (that don't appear in ACD file) iv. HMMER & new parameters. */ ajFmtPrintS(&cmd, "%S ", ajAcdGetpathC("hmmalign")); if(mapali) ajFmtPrintAppS(&cmd, " --mapali %s ", ajFileGetNameC(mapali)); if(withali) ajFmtPrintAppS(&cmd, " --withali %s ", ajFileGetNameC(withali)); if(m) ajStrAppendC(&cmd, " -m "); if(q) ajStrAppendC(&cmd, " -q "); /* Ensure output alignment is in user-specified format. */ fmtok=ajTrue; ajStrAssignS(&fmt, ajAlignGetFormat(o)); /* fasta and a2m are identical formats. */ if(ajStrMatchC(fmt, "fasta")) ajStrAssignC(&fmt, "A2M"); else if(ajStrMatchC(fmt, "a2m")) ajStrAssignC(&fmt, "A2M"); else if(ajStrMatchC(fmt, "msf")) ajStrAssignC(&fmt, "MSF"); else if(ajStrMatchC(fmt, "phylip")) ajStrAssignC(&fmt, "PHYLIP"); /* hmmer also supports stockholm, SELEX & Clustal output, EMBOSS does not. EMBOSS supports unknown/multiple/simple and srs output, hmmer does not. */ else fmtok = ajFalse; if(!fmtok) { /* This could be replaced with code to reformat the file. */ ajWarn("Specified output alignment format ('o' ACD option) is " "not understood by HMMER. Using stockholm format instead."); ajStrAssignC(&fmt, "Stockholm"); } /* rnd is the name of the rewritten seqfile. MUST specify FASTA format explicitly. */ ajFmtPrintAppS(&cmd, " --informat FASTA --outformat %S -o %s %s %S", fmt, ajAlignGetFilename(o), ajFileGetNameC(hmmfile), rnd); /* 4. Close ACD files */ ajFileClose(&hmmfile); ajSeqsetDel(&seqfile); ajFileClose(&mapali); ajFileClose(&withali); ajAlignClose(o); ajAlignDel(&o); /* 5. Call hmmalign */ ajFmtPrint("\n%S\n\n", cmd); ajSysExecS(cmd); /* 6. Exit cleanly */ ajSysFileUnlinkS(rnd); ajStrDel(&cmd); ajStrDel(&fmt); ajStrDel(&rnd); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqall queryseqs; AjPSeqset targetseqs; AjPSeq queryseq; const AjPSeq targetseq; AjPStr queryaln = 0; AjPStr targetaln = 0; AjPFile errorf; AjBool show = ajFalse; const char *queryseqc; const char *targetseqc; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; ajint *compass = NULL; float *path = NULL; float gapopen; float gapextend; float score; float minscore; ajuint j, k; ajint querystart = 0; ajint targetstart = 0; ajint queryend = 0; ajint targetend = 0; ajint width = 0; AjPTable kmers = 0; ajint wordlen = 6; ajint oldmax = 0; ajint newmax = 0; ajuint ntargetseqs; ajuint nkmers; AjPAlign align = NULL; EmbPWordMatch maxmatch; /* match with maximum score */ /* Cursors for the current sequence being scanned, ** i.e., until which location it was scanned. ** Separate cursor/location entries for each sequence in the seqset. */ ajuint* lastlocation; EmbPWordRK* wordsw = NULL; AjPList* matchlist = NULL; embInit("supermatcher", argc, argv); matrix = ajAcdGetMatrixf("datafile"); queryseqs = ajAcdGetSeqall("asequence"); targetseqs= ajAcdGetSeqset("bsequence"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); wordlen = ajAcdGetInt("wordlen"); align = ajAcdGetAlign("outfile"); errorf = ajAcdGetOutfile("errorfile"); width = ajAcdGetInt("width"); /* width for banded Smith-Waterman */ minscore = ajAcdGetFloat("minscore"); gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); embWordLength(wordlen); /* seqset sequence is the reference sequence for SAM format */ ajAlignSetRefSeqIndx(align, 1); ajSeqsetTrim(targetseqs); ntargetseqs = ajSeqsetGetSize(targetseqs); AJCNEW0(matchlist, ntargetseqs); /* get tables of words */ for(k=0;k<ntargetseqs;k++) { targetseq = ajSeqsetGetseqSeq(targetseqs, k); embWordGetTable(&kmers, targetseq); ajDebug("Number of distinct kmers found so far: %d\n", ajTableGetLength(kmers)); } AJCNEW0(lastlocation, ntargetseqs); if(ajTableGetLength(kmers)<1) ajErr("no kmers found"); nkmers = embWordRabinKarpInit(kmers, &wordsw, wordlen, targetseqs); while(ajSeqallNext(queryseqs,&queryseq)) { ajSeqTrim(queryseq); queryaln = ajStrNewRes(1+ajSeqGetLen(queryseq)); ajDebug("Read '%S'\n", ajSeqGetNameS(queryseq)); for(k=0;k<ntargetseqs;k++) { lastlocation[k]=0; matchlist[k] = ajListstrNew(); } embWordRabinKarpSearch(ajSeqGetSeqS(queryseq), targetseqs, (const EmbPWordRK*)wordsw, wordlen, nkmers, matchlist, lastlocation, ajFalse); for(k=0;k<ajSeqsetGetSize(targetseqs);k++) { targetseq = ajSeqsetGetseqSeq(targetseqs, k); ajDebug("Processing '%S'\n", ajSeqGetNameS(targetseq)); if(ajListGetLength(matchlist[k])==0) { ajFmtPrintF(errorf, "No wordmatch start points for " "%s vs %s. No alignment\n", ajSeqGetNameC(queryseq),ajSeqGetNameC(targetseq)); embWordMatchListDelete(&matchlist[k]); continue; } /* only the maximum match is used as seed * (if there is more than one location with the maximum match * only the first one is used) * TODO: we should add a new option to make above limit optional */ maxmatch = embWordMatchFirstMax(matchlist[k]); supermatcher_findendpoints(maxmatch,targetseq, queryseq, &targetstart, &querystart, &targetend, &queryend); targetaln=ajStrNewRes(1+ajSeqGetLen(targetseq)); queryseqc = ajSeqGetSeqC(queryseq); targetseqc = ajSeqGetSeqC(targetseq); ajStrAssignC(&queryaln,""); ajStrAssignC(&targetaln,""); ajDebug("++ %S v %S start:%d %d end:%d %d\n", ajSeqGetNameS(targetseq), ajSeqGetNameS(queryseq), targetstart, querystart, targetend, queryend); newmax = (targetend-targetstart+2)*width; if(newmax > oldmax) { AJCRESIZE0(path,oldmax,newmax); AJCRESIZE0(compass,oldmax,newmax); oldmax=newmax; ajDebug("++ memory re/allocation for path/compass arrays" " to size: %d\n", newmax); } else { AJCSET0(path,newmax); AJCSET0(compass,newmax); } ajDebug("Calling embAlignPathCalcSWFast " "%d..%d [%d/%d] %d..%d [%d/%d] width:%d\n", querystart, queryend, (queryend - querystart + 1), ajSeqGetLen(queryseq), targetstart, targetend, (targetend - targetstart + 1), ajSeqGetLen(targetseq), width); score = embAlignPathCalcSWFast(&targetseqc[targetstart], &queryseqc[querystart], targetend-targetstart+1, queryend-querystart+1, 0,width, gapopen,gapextend, path,sub,cvt, compass,show); if(score>minscore) { embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend, targetseq,queryseq, &targetaln,&queryaln, targetend-targetstart+1, queryend-querystart+1, 0,width, &targetstart,&querystart); if(!ajAlignFormatShowsSequences(align)) { ajAlignDefineCC(align, ajStrGetPtr(targetaln), ajStrGetPtr(queryaln), ajSeqGetNameC(targetseq), ajSeqGetNameC(queryseq)); ajAlignSetScoreR(align, score); } else { ajDebug(" queryaln:%S \ntargetaln:%S\n", queryaln,targetaln); embAlignReportLocal(align, queryseq, targetseq, queryaln, targetaln, querystart, targetstart, gapopen, gapextend, score, matrix, 1 + ajSeqGetOffset(queryseq), 1 + ajSeqGetOffset(targetseq) ); } ajAlignWrite(align); ajAlignReset(align); } ajStrDel(&targetaln); embWordMatchListDelete(&matchlist[k]); } ajStrDel(&queryaln); } for(k=0;k<nkmers;k++) { AJFREE(wordsw[k]->seqindxs); AJFREE(wordsw[k]->nSeqMatches); for(j=0;j<wordsw[k]->nseqs;j++) AJFREE(wordsw[k]->locs[j]); AJFREE(wordsw[k]->nnseqlocs); AJFREE(wordsw[k]->locs); AJFREE(wordsw[k]); } embWordFreeTable(&kmers); if(!ajAlignFormatShowsSequences(align)) ajMatrixfDel(&matrix); AJFREE(path); AJFREE(compass); AJFREE(kmers); AJFREE(wordsw); AJFREE(matchlist); AJFREE(lastlocation); ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&queryseqs); ajSeqDel(&queryseq); ajSeqsetDel(&targetseqs); ajFileClose(&errorf); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqset seqset; AjPSeqall seqall; AjPSeq queryseq; const AjPSeq targetseq; ajint wordlen; AjPTable wordsTable = NULL; AjPList* matchlist = NULL; AjPFile logfile; AjPFeattable* seqsetftables = NULL; AjPFeattable seqallseqftable = NULL; AjPFeattabOut ftoutforseqsetseq = NULL; AjPFeattabOut ftoutforseqallseq = NULL; AjPAlign align = NULL; AjIList iter = NULL; ajint targetstart; ajint querystart; ajint len; ajuint i, j; ajulong nAllMatches = 0; ajulong sumAllScore = 0; AjBool dumpAlign = ajTrue; AjBool dumpFeature = ajTrue; AjBool checkmode = ajFalse; EmbPWordRK* wordsw = NULL; ajuint npatterns = 0; ajuint seqsetsize; ajuint nmatches; ajuint* nmatchesseqset; ajuint* lastlocation; /* Cursors for Rabin-Karp search. */ /* Shows until what point the query sequence was * scanned for a pattern sequences in the seqset. */ char* paddedheader = NULL; const char* header; AjPStr padding; header = "Pattern %S #pat-sequences #all-matches avg-match-length\n"; padding = ajStrNew(); embInit("wordmatch", argc, argv); wordlen = ajAcdGetInt("wordsize"); seqset = ajAcdGetSeqset("asequence"); seqall = ajAcdGetSeqall("bsequence"); logfile = ajAcdGetOutfile("logfile"); dumpAlign = ajAcdGetToggle("dumpalign"); dumpFeature = ajAcdGetToggle("dumpfeat"); if(dumpAlign) { align = ajAcdGetAlign("outfile"); ajAlignSetExternal(align, ajTrue); } seqsetsize = ajSeqsetGetSize(seqset); ajSeqsetTrim(seqset); AJCNEW0(matchlist, seqsetsize); AJCNEW0(seqsetftables, seqsetsize); AJCNEW0(nmatchesseqset, seqsetsize); if (dumpFeature) { ftoutforseqsetseq = ajAcdGetFeatout("aoutfeat"); ftoutforseqallseq = ajAcdGetFeatout("boutfeat"); } checkmode = !dumpFeature && !dumpAlign; embWordLength(wordlen); ajFmtPrintF(logfile, "Small sequence/file for constructing" " target patterns: %S\n", ajSeqsetGetUsa(seqset)); ajFmtPrintF(logfile, "Large sequence/file to be scanned" " for patterns: %S\n", ajSeqallGetUsa(seqall)); ajFmtPrintF(logfile, "Number of sequences in the patterns file: %u\n", seqsetsize); ajFmtPrintF(logfile, "Pattern/word length: %u\n", wordlen); for(i=0;i<seqsetsize;i++) { targetseq = ajSeqsetGetseqSeq(seqset, i); embWordGetTable(&wordsTable, targetseq); } AJCNEW0(lastlocation, seqsetsize); if(ajTableGetLength(wordsTable)>0) { npatterns = embWordRabinKarpInit(wordsTable, &wordsw, wordlen, seqset); ajFmtPrintF(logfile, "Number of patterns/words found: %u\n", npatterns); while(ajSeqallNext(seqall,&queryseq)) { for(i=0;i<seqsetsize;i++) { lastlocation[i]=0; if (!checkmode) matchlist[i] = ajListstrNew(); } nmatches = embWordRabinKarpSearch( ajSeqGetSeqS(queryseq), seqset, (EmbPWordRK const *)wordsw, wordlen, npatterns, matchlist, lastlocation, checkmode); nAllMatches += nmatches; if (checkmode) continue; for(i=0;i<seqsetsize;i++) { if(ajListGetLength(matchlist[i])>0) { iter = ajListIterNewread(matchlist[i]) ; while(embWordMatchIter(iter, &targetstart, &querystart, &len, &targetseq)) { if(dumpAlign) { ajAlignDefineSS(align, targetseq, queryseq); ajAlignSetScoreI(align, len); /* ungapped alignment means same length * for both sequences */ ajAlignSetSubRange(align, targetstart, 1, len, ajSeqIsReversed(targetseq), ajSeqGetLen(targetseq), querystart, 1, len, ajSeqIsReversed(queryseq), ajSeqGetLen(queryseq)); } } if(dumpAlign) { ajAlignWrite(align); ajAlignReset(align); } if(ajListGetLength(matchlist[i])>0 && dumpFeature) { embWordMatchListConvToFeat(matchlist[i], &seqsetftables[i], &seqallseqftable, targetseq, queryseq); ajFeattableWrite(ftoutforseqallseq, seqallseqftable); ajFeattableDel(&seqallseqftable); } ajListIterDel(&iter); } embWordMatchListDelete(&matchlist[i]); } } /* search completed, now report statistics */ for(i=0;i<npatterns;i++) { sumAllScore += wordsw[i]->lenMatches; for(j=0;j<wordsw[i]->nseqs;j++) nmatchesseqset[wordsw[i]->seqindxs[j]] += wordsw[i]->nSeqMatches[j]; } ajFmtPrintF(logfile, "Number of sequences in the file scanned " "for patterns: %u\n", ajSeqallGetCount(seqall)); ajFmtPrintF(logfile, "Number of all matches: %Lu" " (wordmatch finds exact matches only)\n", nAllMatches); if(nAllMatches>0) { ajFmtPrintF(logfile, "Sum of match lengths: %Lu\n", sumAllScore); ajFmtPrintF(logfile, "Average match length: %.2f\n", sumAllScore*1.0/nAllMatches); ajFmtPrintF(logfile, "\nDistribution of the matches among pattern" " sequences:\n"); ajFmtPrintF(logfile, "-----------------------------------------" "-----------\n"); for(i=0;i<ajSeqsetGetSize(seqset);i++) { if (nmatchesseqset[i]>0) ajFmtPrintF(logfile, "%-42s: %8u\n", ajSeqGetNameC(ajSeqsetGetseqSeq(seqset, i)), nmatchesseqset[i]); ajFeattableWrite(ftoutforseqsetseq, seqsetftables[i]); ajFeattableDel(&seqsetftables[i]); } ajFmtPrintF(logfile, "\nPattern statistics:\n"); ajFmtPrintF(logfile, "-------------------\n"); if(wordlen>7) ajStrAppendCountK(&padding, ' ', wordlen-7); paddedheader = ajFmtString(header,padding); ajFmtPrintF(logfile, paddedheader); for(i=0;i<npatterns;i++) if (wordsw[i]->nMatches>0) ajFmtPrintF(logfile, "%-7s: %12u %12u %17.2f\n", wordsw[i]->word->fword, wordsw[i]->nseqs, wordsw[i]->nMatches, wordsw[i]->lenMatches*1.0/wordsw[i]->nMatches); } } for(i=0;i<npatterns;i++) { for(j=0;j<wordsw[i]->nseqs;j++) AJFREE(wordsw[i]->locs[j]); AJFREE(wordsw[i]->locs); AJFREE(wordsw[i]->seqindxs); AJFREE(wordsw[i]->nnseqlocs); AJFREE(wordsw[i]->nSeqMatches); AJFREE(wordsw[i]); } embWordFreeTable(&wordsTable); AJFREE(wordsw); AJFREE(matchlist); AJFREE(lastlocation); AJFREE(nmatchesseqset); AJFREE(seqsetftables); if(dumpAlign) { ajAlignClose(align); ajAlignDel(&align); } if(dumpFeature) { ajFeattabOutDel(&ftoutforseqsetseq); ajFeattabOutDel(&ftoutforseqallseq); } ajFileClose(&logfile); ajSeqallDel(&seqall); ajSeqsetDel(&seqset); ajSeqDel(&queryseq); ajStrDel(&padding); AJFREE(paddedheader); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqall seqall; AjPSeq seq = NULL; AjPFile inf = NULL; AjPStr strand = NULL; AjPStr substr = NULL; AjPStr name = NULL; AjPStr mname = NULL; AjPStr tname = NULL; AjPStr pname = NULL; AjPStr line = NULL; AjPStr cons = NULL; AjPStr m = NULL; AjPStr n = NULL; AjPAlign align= NULL; /* JISON, replaces AjPOutfile outf */ ajint type; ajint begin; ajint end; ajulong len; ajint i; ajint j; float **fmatrix=NULL; ajint mlen; float maxfs; ajint thresh; float gapopen; float gapextend; float opencoeff; float extendcoeff; const char *p; ajulong maxarr = 1000; ajulong alen; float *path; ajint *compass; size_t stlen; embInit("prophet", argc, argv); seqall = ajAcdGetSeqall("sequence"); inf = ajAcdGetInfile("infile"); opencoeff = ajAcdGetFloat("gapopen"); extendcoeff = ajAcdGetFloat("gapextend"); align = ajAcdGetAlign("outfile"); /*JISON replacing outfile */ opencoeff = ajRoundFloat(opencoeff, 8); extendcoeff = ajRoundFloat(extendcoeff, 8); substr = ajStrNew(); name = ajStrNew(); mname = ajStrNew(); tname = ajStrNew(); line = ajStrNew(); m = ajStrNewC(""); n = ajStrNewC(""); type = prophet_getType(inf,&tname); if(!type) ajFatal("Unrecognised profile/matrix file format"); prophet_read_profile(inf,&pname,&mname,&mlen,&gapopen,&gapextend,&thresh, &maxfs, &cons); ajAlignSetMatrixName(align, mname); AJCNEW(fmatrix, mlen); for(i=0;i<mlen;++i) { AJCNEW(fmatrix[i], AZ); if(!ajReadlineTrim(inf,&line)) ajFatal("Missing matrix line"); p = ajStrGetPtr(line); p = ajSysFuncStrtok(p," \t"); for(j=0;j<AZ;++j) { sscanf(p,"%f",&fmatrix[i][j]); p = ajSysFuncStrtok(NULL," \t"); } } AJCNEW(path, maxarr); AJCNEW(compass, maxarr); while(ajSeqallNext(seqall, &seq)) { begin = ajSeqallGetseqBegin(seqall); end = ajSeqallGetseqEnd(seqall); ajStrAssignC(&name,ajSeqGetNameC(seq)); strand = ajSeqGetSeqCopyS(seq); ajStrAssignSubC(&substr,ajStrGetPtr(strand),begin-1,end-1); len = ajStrGetLen(substr); if(len > (ULONG_MAX/(ajulong)(mlen+1))) ajFatal("Sequences too big. Try 'supermatcher'"); alen = len*mlen; if(alen>maxarr) { stlen = (size_t) alen; AJCRESIZE(path,stlen); AJCRESIZE(compass,stlen); maxarr=alen; } ajStrAssignC(&m,""); ajStrAssignC(&n,""); /* JISON used to be prophet_scan_profile(substr,pname,name,mlen,fmatrix, outf,cons,opencoeff, extendcoeff,path,compass,&m,&n,len); */ /* JISON new call and reset align */ prophet_scan_profile(substr,name,pname,mlen,fmatrix, align,cons,opencoeff, extendcoeff,path,compass,&m,&n,(ajint)len); ajAlignReset(align); ajStrDel(&strand); } for(i=0;i<mlen;++i) AJFREE (fmatrix[i]); AJFREE (fmatrix); AJFREE(path); AJFREE(compass); ajStrDel(&line); ajStrDel(&cons); ajStrDel(&name); ajStrDel(&pname); ajStrDel(&mname); ajStrDel(&tname); ajStrDel(&substr); ajStrDel(&m); ajStrDel(&n); ajSeqDel(&seq); ajFileClose(&inf); ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&seqall); embExit(); return 0; }