int main(int argc, char **argv) { AjPSeqset seqset; const AjPSeq seq1; const AjPSeq seq2; ajint wordlen; AjPTable seq1MatchTable = NULL; AjPList matchlist ; AjPGraph graph = 0; ajuint i; ajuint j; float total=0; ajuint acceptableticks[]= { 1,10,50,100,200,500,1000,1500,10000,50000, 100000,500000,1000000,5000000 }; ajint numbofticks = 10; ajint gap,tickgap; AjBool boxit = AJTRUE; AjBool dumpfeat = AJFALSE; float xmargin; float ymargin; float k; char ptr[10]; float ticklen; float onefifth; AjPFeattable *tabptr = NULL; AjPFeattabOut seq1out = NULL; AjPStr sajb = NULL; float flen1; float flen2; ajuint tui; embInit("polydot", argc, argv); wordlen = ajAcdGetInt("wordsize"); seqset = ajAcdGetSeqset("sequences"); graph = ajAcdGetGraph("graph"); gap = ajAcdGetInt("gap"); boxit = ajAcdGetBoolean("boxit"); seq1out = ajAcdGetFeatout("outfeat"); dumpfeat = ajAcdGetToggle("dumpfeat"); sajb = ajStrNew(); embWordLength(wordlen); AJCNEW(lines,ajSeqsetGetSize(seqset)); AJCNEW(pts,ajSeqsetGetSize(seqset)); AJCNEW(tabptr,ajSeqsetGetSize(seqset)); for(i=0;i<ajSeqsetGetSize(seqset);i++) { seq1 = ajSeqsetGetseqSeq(seqset, i); total += ajSeqGetLen(seq1); } total +=(float)(gap*(ajSeqsetGetSize(seqset)-1)); xmargin = total*(float)0.15; ymargin = total*(float)0.15; ticklen = xmargin*(float)0.1; onefifth = xmargin*(float)0.2; i = 0; while(acceptableticks[i]*numbofticks < ajSeqsetGetLen(seqset)) i++; if(i<=13) tickgap = acceptableticks[i]; else tickgap = acceptableticks[13]; ajGraphAppendTitleS(graph, ajSeqsetGetUsa(seqset)); ajGraphOpenWin(graph, (float)0.0-xmargin,(total+xmargin)*(float)1.35, (float)0.0-ymargin, total+ymargin); ajGraphicsSetCharscale((float)0.3); for(i=0;i<ajSeqsetGetSize(seqset);i++) { which = i; seq1 = ajSeqsetGetseqSeq(seqset, i); tui = ajSeqGetLen(seq1); flen1 = (float) tui; if(embWordGetTable(&seq1MatchTable, seq1)){ /* get table of words */ for(j=0;j<ajSeqsetGetSize(seqset);j++) { seq2 = ajSeqsetGetseqSeq(seqset, j); tui = ajSeqGetLen(seq2); flen2 = (float) tui; if(boxit) ajGraphicsDrawposRect(xstart,ystart, xstart+flen1, ystart+flen2); matchlist = embWordBuildMatchTable(seq1MatchTable, seq2, ajTrue); if(matchlist) polydot_plotMatches(matchlist); if(i<j && dumpfeat) embWordMatchListConvToFeat(matchlist,&tabptr[i], &tabptr[j],seq1, seq2); if(matchlist) /* free the match structures */ embWordMatchListDelete(&matchlist); if(j==0) { for(k=0.0;k<ajSeqGetLen(seq1);k+=tickgap) { ajGraphicsDrawposLine(xstart+k,ystart,xstart+k, ystart-ticklen); sprintf(ptr,"%d",(ajint)k); ajGraphicsDrawposTextAtmid(xstart+k, ystart-(onefifth), ptr); } ajGraphicsDrawposTextAtmid( xstart+(flen1/(float)2.0), ystart-(3*onefifth), ajStrGetPtr(ajSeqsetGetseqNameS(seqset, i))); } if(i==0) { for(k=0.0;k<ajSeqGetLen(seq2);k+=tickgap) { ajGraphicsDrawposLine(xstart,ystart+k,xstart-ticklen, ystart+k); sprintf(ptr,"%d",(ajint)k); ajGraphicsDrawposTextAtend(xstart-(onefifth), ystart+k, ptr); } ajGraphicsDrawposTextAtlineJustify( xstart-(3*onefifth), ystart+(flen2/(float)2.0), xstart-(3*onefifth),ystart+flen2, ajStrGetPtr(ajSeqsetGetseqNameS(seqset, j)),0.5); } ystart += flen2+(float)gap; } } embWordFreeTable(&seq1MatchTable); seq1MatchTable = NULL; xstart += flen1+(float)gap; ystart = 0.0; } ajGraphicsDrawposTextAtstart(total+onefifth,total-(onefifth), "No. Length Lines Points Sequence"); for(i=0;i<ajSeqsetGetSize(seqset);i++) { seq1 = ajSeqsetGetseqSeq(seqset, i); ajFmtPrintS(&sajb,"%3u %6d %5d %6d %s",i+1, ajSeqGetLen(seq1),lines[i], pts[i],ajSeqGetNameC(seq1)); ajGraphicsDrawposTextAtstart(total+onefifth,total-(onefifth*(i+2)), ajStrGetPtr(sajb)); } if(dumpfeat && seq1out) { for(i=0;i<ajSeqsetGetSize(seqset);i++) { ajFeattableWrite(seq1out, tabptr[i]); ajFeattableDel(&tabptr[i]); } } ajGraphicsClose(); ajGraphxyDel(&graph); ajStrDel(&sajb); AJFREE(lines); AJFREE(pts); AJFREE(tabptr); ajSeqsetDel(&seqset); ajFeattabOutDel(&seq1out);; embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqset seqset; AjPSeqall seqall; AjPSeq queryseq; const AjPSeq targetseq; ajint wordlen; AjPTable wordsTable = NULL; AjPList* matchlist = NULL; AjPFile logfile; AjPFeattable* seqsetftables = NULL; AjPFeattable seqallseqftable = NULL; AjPFeattabOut ftoutforseqsetseq = NULL; AjPFeattabOut ftoutforseqallseq = NULL; AjPAlign align = NULL; AjIList iter = NULL; ajint targetstart; ajint querystart; ajint len; ajuint i, j; ajulong nAllMatches = 0; ajulong sumAllScore = 0; AjBool dumpAlign = ajTrue; AjBool dumpFeature = ajTrue; AjBool checkmode = ajFalse; EmbPWordRK* wordsw = NULL; ajuint npatterns = 0; ajuint seqsetsize; ajuint nmatches; ajuint* nmatchesseqset; ajuint* lastlocation; /* Cursors for Rabin-Karp search. */ /* Shows until what point the query sequence was * scanned for a pattern sequences in the seqset. */ char* paddedheader = NULL; const char* header; AjPStr padding; header = "Pattern %S #pat-sequences #all-matches avg-match-length\n"; padding = ajStrNew(); embInit("wordmatch", argc, argv); wordlen = ajAcdGetInt("wordsize"); seqset = ajAcdGetSeqset("asequence"); seqall = ajAcdGetSeqall("bsequence"); logfile = ajAcdGetOutfile("logfile"); dumpAlign = ajAcdGetToggle("dumpalign"); dumpFeature = ajAcdGetToggle("dumpfeat"); if(dumpAlign) { align = ajAcdGetAlign("outfile"); ajAlignSetExternal(align, ajTrue); } seqsetsize = ajSeqsetGetSize(seqset); ajSeqsetTrim(seqset); AJCNEW0(matchlist, seqsetsize); AJCNEW0(seqsetftables, seqsetsize); AJCNEW0(nmatchesseqset, seqsetsize); if (dumpFeature) { ftoutforseqsetseq = ajAcdGetFeatout("aoutfeat"); ftoutforseqallseq = ajAcdGetFeatout("boutfeat"); } checkmode = !dumpFeature && !dumpAlign; embWordLength(wordlen); ajFmtPrintF(logfile, "Small sequence/file for constructing" " target patterns: %S\n", ajSeqsetGetUsa(seqset)); ajFmtPrintF(logfile, "Large sequence/file to be scanned" " for patterns: %S\n", ajSeqallGetUsa(seqall)); ajFmtPrintF(logfile, "Number of sequences in the patterns file: %u\n", seqsetsize); ajFmtPrintF(logfile, "Pattern/word length: %u\n", wordlen); for(i=0;i<seqsetsize;i++) { targetseq = ajSeqsetGetseqSeq(seqset, i); embWordGetTable(&wordsTable, targetseq); } AJCNEW0(lastlocation, seqsetsize); if(ajTableGetLength(wordsTable)>0) { npatterns = embWordRabinKarpInit(wordsTable, &wordsw, wordlen, seqset); ajFmtPrintF(logfile, "Number of patterns/words found: %u\n", npatterns); while(ajSeqallNext(seqall,&queryseq)) { for(i=0;i<seqsetsize;i++) { lastlocation[i]=0; if (!checkmode) matchlist[i] = ajListstrNew(); } nmatches = embWordRabinKarpSearch( ajSeqGetSeqS(queryseq), seqset, (EmbPWordRK const *)wordsw, wordlen, npatterns, matchlist, lastlocation, checkmode); nAllMatches += nmatches; if (checkmode) continue; for(i=0;i<seqsetsize;i++) { if(ajListGetLength(matchlist[i])>0) { iter = ajListIterNewread(matchlist[i]) ; while(embWordMatchIter(iter, &targetstart, &querystart, &len, &targetseq)) { if(dumpAlign) { ajAlignDefineSS(align, targetseq, queryseq); ajAlignSetScoreI(align, len); /* ungapped alignment means same length * for both sequences */ ajAlignSetSubRange(align, targetstart, 1, len, ajSeqIsReversed(targetseq), ajSeqGetLen(targetseq), querystart, 1, len, ajSeqIsReversed(queryseq), ajSeqGetLen(queryseq)); } } if(dumpAlign) { ajAlignWrite(align); ajAlignReset(align); } if(ajListGetLength(matchlist[i])>0 && dumpFeature) { embWordMatchListConvToFeat(matchlist[i], &seqsetftables[i], &seqallseqftable, targetseq, queryseq); ajFeattableWrite(ftoutforseqallseq, seqallseqftable); ajFeattableDel(&seqallseqftable); } ajListIterDel(&iter); } embWordMatchListDelete(&matchlist[i]); } } /* search completed, now report statistics */ for(i=0;i<npatterns;i++) { sumAllScore += wordsw[i]->lenMatches; for(j=0;j<wordsw[i]->nseqs;j++) nmatchesseqset[wordsw[i]->seqindxs[j]] += wordsw[i]->nSeqMatches[j]; } ajFmtPrintF(logfile, "Number of sequences in the file scanned " "for patterns: %u\n", ajSeqallGetCount(seqall)); ajFmtPrintF(logfile, "Number of all matches: %Lu" " (wordmatch finds exact matches only)\n", nAllMatches); if(nAllMatches>0) { ajFmtPrintF(logfile, "Sum of match lengths: %Lu\n", sumAllScore); ajFmtPrintF(logfile, "Average match length: %.2f\n", sumAllScore*1.0/nAllMatches); ajFmtPrintF(logfile, "\nDistribution of the matches among pattern" " sequences:\n"); ajFmtPrintF(logfile, "-----------------------------------------" "-----------\n"); for(i=0;i<ajSeqsetGetSize(seqset);i++) { if (nmatchesseqset[i]>0) ajFmtPrintF(logfile, "%-42s: %8u\n", ajSeqGetNameC(ajSeqsetGetseqSeq(seqset, i)), nmatchesseqset[i]); ajFeattableWrite(ftoutforseqsetseq, seqsetftables[i]); ajFeattableDel(&seqsetftables[i]); } ajFmtPrintF(logfile, "\nPattern statistics:\n"); ajFmtPrintF(logfile, "-------------------\n"); if(wordlen>7) ajStrAppendCountK(&padding, ' ', wordlen-7); paddedheader = ajFmtString(header,padding); ajFmtPrintF(logfile, paddedheader); for(i=0;i<npatterns;i++) if (wordsw[i]->nMatches>0) ajFmtPrintF(logfile, "%-7s: %12u %12u %17.2f\n", wordsw[i]->word->fword, wordsw[i]->nseqs, wordsw[i]->nMatches, wordsw[i]->lenMatches*1.0/wordsw[i]->nMatches); } } for(i=0;i<npatterns;i++) { for(j=0;j<wordsw[i]->nseqs;j++) AJFREE(wordsw[i]->locs[j]); AJFREE(wordsw[i]->locs); AJFREE(wordsw[i]->seqindxs); AJFREE(wordsw[i]->nnseqlocs); AJFREE(wordsw[i]->nSeqMatches); AJFREE(wordsw[i]); } embWordFreeTable(&wordsTable); AJFREE(wordsw); AJFREE(matchlist); AJFREE(lastlocation); AJFREE(nmatchesseqset); AJFREE(seqsetftables); if(dumpAlign) { ajAlignClose(align); ajAlignDel(&align); } if(dumpFeature) { ajFeattabOutDel(&ftoutforseqsetseq); ajFeattabOutDel(&ftoutforseqallseq); } ajFileClose(&logfile); ajSeqallDel(&seqall); ajSeqsetDel(&seqset); ajSeqDel(&queryseq); ajStrDel(&padding); AJFREE(paddedheader); embExit(); return 0; }