static void remap_DelTable(AjPTable * table) { void **keyarray = NULL; /* array for table */ void **valarray = NULL; /* array for table */ ajint i; PValue value; if(ajTableGetLength(*table)) { ajTableToarrayKeysValues(*table, &keyarray, &valarray); for(i = 0; keyarray[i]; i++) { value = (PValue) valarray[i]; ajStrDel(&(value->iso)); AJFREE(valarray[i]); /* free the ajint* value */ ajStrDel((AjPStr*)&keyarray[i]); } AJFREE(keyarray); AJFREE(valarray); } ajTableFree(table); return; }
AjBool ensCacheTrace(const EnsPCache cache, ajuint level) { double ratio = 0.0; AjPStr indent = NULL; if(!cache) return ajFalse; indent = ajStrNew(); ajStrAppendCountK(&indent, ' ', level * 2); if(cache->Hit || cache->Miss) ratio = (double) cache->Hit / ((double) cache->Hit + (double) cache->Miss); ajDebug("%SensCache trace %p\n" "%S Label '%S'\n" "%S List %p length: %u\n" "%S Table %p length: %u\n" "%S Type %d\n" "%S Synchron %B\n" "%S MaxBytes %u\n" "%S MaxCount %u\n" "%S MaxSize %u\n" "%S Bytes %u\n" "%S Count %u\n" "%S Dropped %u\n" "%S Removed %u\n" "%S Stored %u\n" "%S Hit %u\n" "%S Miss %u\n" "%S Hit/(Hit + Miss) %f\n", indent, cache, indent, cache->Label, indent, cache->List, ajListGetLength(cache->List), indent, cache->Table, ajTableGetLength(cache->Table), indent, cache->Type, indent, cache->Synchron, indent, cache->MaxBytes, indent, cache->MaxCount, indent, cache->MaxSize, indent, cache->Bytes, indent, cache->Count, indent, cache->Dropped, indent, cache->Removed, indent, cache->Stored, indent, cache->Hit, indent, cache->Miss, indent, ratio); ajStrDel(&indent); return ajTrue; }
static void remap_CutList(AjPFile outfile, const AjPTable hittable, AjBool isos, AjBool html, ajint mincuts, ajint maxcuts) { const PValue value; void **keyarray = NULL; /* array for table */ ajint i; /* print title */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# Enzymes that cut Frequency"); if(isos) ajFmtPrintF(outfile, "\tIsoschizomers\n"); else ajFmtPrintF(outfile, "\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); if(ajTableGetLength(hittable)) { ajTableToarrayKeys(hittable, &keyarray); qsort(keyarray, ajTableGetLength(hittable), sizeof (*keyarray), ajStrVcmp); /* enzymes that cut the required number of times */ if(html) ajFmtPrintF(outfile, "<PRE>"); for(i = 0; keyarray[i]; i++) { value = ajTableFetchS(hittable,keyarray[i]); if(value->count >= mincuts && value->count <= maxcuts) ajFmtPrintF(outfile, "%10S\t %d\t%S\n", (AjPStr) keyarray[i], value->count, value->iso); } ajFmtPrintF(outfile, "\n"); if(html) ajFmtPrintF(outfile, "</PRE>\n"); } /* enzymes that cut <mincuts */ /* print title */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# Enzymes which cut less frequently than the "); ajFmtPrintF(outfile, "MINCUTS criterion\n# Enzymes < MINCUTS Frequency"); if(isos) ajFmtPrintF(outfile, "\tIsoschizomers\n"); else ajFmtPrintF(outfile, "\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); if(ajTableGetLength(hittable)) { /* print out results */ if(html) ajFmtPrintF(outfile, "<PRE>"); for(i = 0; keyarray[i]; i++) { value = ajTableFetchS(hittable,keyarray[i]); if(value->count < mincuts) ajFmtPrintF(outfile, "%10S\t %d\t%S\n", (AjPStr) keyarray[i], value->count, value->iso); } ajFmtPrintF(outfile, "\n"); if(html) ajFmtPrintF(outfile, "</PRE>\n"); } /* enzymes that cut >maxcuts */ /* print title */ if(html) ajFmtPrintF(outfile, "<H2>"); ajFmtPrintF(outfile, "\n\n# Enzymes which cut more frequently than the "); ajFmtPrintF(outfile, "MAXCUTS criterion\n# Enzymes > MAXCUTS Frequency"); if(isos) ajFmtPrintF(outfile, "\tIsoschizomers\n"); else ajFmtPrintF(outfile, "\n"); if(html) ajFmtPrintF(outfile, "</H2>\n"); if(ajTableGetLength(hittable)) { /* print out results */ if(html) ajFmtPrintF(outfile, "<PRE>"); for(i = 0; keyarray[i]; i++) { value = ajTableFetchS(hittable,keyarray[i]); if(value->count > maxcuts) ajFmtPrintF(outfile, "%10S\t %d\t%S\n", (AjPStr) keyarray[i], value->count, value->iso); } ajFmtPrintF(outfile, "\n"); if(html) ajFmtPrintF(outfile, "</PRE>\n"); AJFREE(keyarray); } return; }
int main(int argc, char **argv) { /* Variable declarations */ AjPOutfile outfile = NULL; AjBool obsolete = ajTrue; AjPObo obo = NULL; AjPObo oboanc = NULL; AjPObo oboparent = NULL; AjPOboall oboall = NULL; AjPOboin oboin = NULL; AjPStr oboqryup = NULL; AjPStr oboqryanc = NULL; AjPTable alltable = NULL; AjPTable newtable = NULL; AjPStr up = NULL; AjPList uplist = NULL; ajuint iterms = 0; AjPStr topid = NULL; AjPStr obodb = NULL; AjBool saved = ajFalse; /* ACD processing */ embInit("ontogetcommon", argc, argv); oboall = ajAcdGetOboall("oboterms"); outfile = ajAcdGetOutobo("outfile"); obsolete = ajAcdGetBoolean("obsolete"); oboin = ajOboinNew(); oboparent = ajOboNew(); oboanc = ajOboNew(); uplist = ajListNew(); alltable = ajTablestrNew(600); newtable = ajTablestrNew(600); while(ajOboallNext(oboall, &obo)) { saved = ajFalse; if(!obsolete && ajOboIsObsolete(obo)) continue; if(!iterms) { ajDebug("store id '%S'\n", ajOboGetId(obo)); ajStrAssignS(&obodb, ajOboGetDb(obo)); ajTablePut(alltable, ajStrNewS(ajOboGetId(obo)), NULL); saved = ajTrue; } else { ajDebug("test id '%S'\n", ajOboGetId(obo)); if(ajTableMatchS(alltable, ajOboGetId(obo))) { ajDebug("keep id '%S'\n", ajOboGetId(obo)); ajTablePut(newtable, ajStrNewS(ajOboGetId(obo)), NULL); saved = ajTrue; } } if(saved) ajStrAssignS(&topid, ajOboGetId(obo)); if(ajOboGetParents(obo, uplist)) /* that was the root */ { while(ajListstrPop(uplist, &up)) { ajDebug("up: '%S'\n", up); ajFmtPrintS(&oboqryup, "%S-id:%S", ajOboGetDb(obo), up); ajOboinQryS(oboin, oboqryup); while(ajOboinRead(oboin, oboparent)) { if(!obsolete && ajOboIsObsolete(oboparent)) continue; if(!iterms) { ajDebug("store parent '%S'\n", ajOboGetId(oboparent)); ajTablePut(alltable, ajStrNewS(ajOboGetId(oboparent)), NULL); } else { ajDebug("test parent '%S'\n", ajOboGetId(oboparent)); if(ajTableMatchS(alltable, ajOboGetId(oboparent))) { ajDebug("keep parent '%S'\n", ajOboGetId(oboparent)); ajTablePut(newtable, ajStrNewS(ajOboGetId(oboparent)), NULL); if(!saved) { ajStrAssignS(&topid, ajOboGetId(oboparent)); saved = ajTrue; } } } if(!ajOboGetParents(oboparent, uplist)) continue; } ajStrDel(&up); } } if(iterms) ajTableMergeAnd(alltable, newtable); ajDebug("id: '%S' saved %u\n", ajOboGetId(obo), ajTableGetLength(alltable)); if(!ajTableGetLength(alltable)) ajDie("Query '%S' no matching ancestor found for obo term '%S:%S'", ajOboallGetQryS(oboall), ajOboGetDb(obo), ajOboGetId(obo)); iterms++; } ajFmtPrintS(&oboqryanc, "%S-id:%S", obodb, topid); ajOboinQryS(oboin, oboqryanc); while(ajOboinRead(oboin, oboanc)) { ajObooutWrite(outfile, oboanc); } /* Memory clean-up and exit */ ajOboallDel(&oboall); ajOboinDel(&oboin); ajOboDel(&oboanc); ajOboDel(&oboparent); ajListFree(&uplist); ajTablestrFreeKey(&alltable); ajTablestrFreeKey(&newtable); ajStrDel(&oboqryup); ajStrDel(&oboqryanc); ajStrDel(&obodb); ajStrDel(&topid); ajOutfileClose(&outfile); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqall queryseqs; AjPSeqset targetseqs; AjPSeq queryseq; const AjPSeq targetseq; AjPStr queryaln = 0; AjPStr targetaln = 0; AjPFile errorf; AjBool show = ajFalse; const char *queryseqc; const char *targetseqc; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; ajint *compass = NULL; float *path = NULL; float gapopen; float gapextend; float score; float minscore; ajuint j, k; ajint querystart = 0; ajint targetstart = 0; ajint queryend = 0; ajint targetend = 0; ajint width = 0; AjPTable kmers = 0; ajint wordlen = 6; ajint oldmax = 0; ajint newmax = 0; ajuint ntargetseqs; ajuint nkmers; AjPAlign align = NULL; EmbPWordMatch maxmatch; /* match with maximum score */ /* Cursors for the current sequence being scanned, ** i.e., until which location it was scanned. ** Separate cursor/location entries for each sequence in the seqset. */ ajuint* lastlocation; EmbPWordRK* wordsw = NULL; AjPList* matchlist = NULL; embInit("supermatcher", argc, argv); matrix = ajAcdGetMatrixf("datafile"); queryseqs = ajAcdGetSeqall("asequence"); targetseqs= ajAcdGetSeqset("bsequence"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); wordlen = ajAcdGetInt("wordlen"); align = ajAcdGetAlign("outfile"); errorf = ajAcdGetOutfile("errorfile"); width = ajAcdGetInt("width"); /* width for banded Smith-Waterman */ minscore = ajAcdGetFloat("minscore"); gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); embWordLength(wordlen); /* seqset sequence is the reference sequence for SAM format */ ajAlignSetRefSeqIndx(align, 1); ajSeqsetTrim(targetseqs); ntargetseqs = ajSeqsetGetSize(targetseqs); AJCNEW0(matchlist, ntargetseqs); /* get tables of words */ for(k=0;k<ntargetseqs;k++) { targetseq = ajSeqsetGetseqSeq(targetseqs, k); embWordGetTable(&kmers, targetseq); ajDebug("Number of distinct kmers found so far: %d\n", ajTableGetLength(kmers)); } AJCNEW0(lastlocation, ntargetseqs); if(ajTableGetLength(kmers)<1) ajErr("no kmers found"); nkmers = embWordRabinKarpInit(kmers, &wordsw, wordlen, targetseqs); while(ajSeqallNext(queryseqs,&queryseq)) { ajSeqTrim(queryseq); queryaln = ajStrNewRes(1+ajSeqGetLen(queryseq)); ajDebug("Read '%S'\n", ajSeqGetNameS(queryseq)); for(k=0;k<ntargetseqs;k++) { lastlocation[k]=0; matchlist[k] = ajListstrNew(); } embWordRabinKarpSearch(ajSeqGetSeqS(queryseq), targetseqs, (const EmbPWordRK*)wordsw, wordlen, nkmers, matchlist, lastlocation, ajFalse); for(k=0;k<ajSeqsetGetSize(targetseqs);k++) { targetseq = ajSeqsetGetseqSeq(targetseqs, k); ajDebug("Processing '%S'\n", ajSeqGetNameS(targetseq)); if(ajListGetLength(matchlist[k])==0) { ajFmtPrintF(errorf, "No wordmatch start points for " "%s vs %s. No alignment\n", ajSeqGetNameC(queryseq),ajSeqGetNameC(targetseq)); embWordMatchListDelete(&matchlist[k]); continue; } /* only the maximum match is used as seed * (if there is more than one location with the maximum match * only the first one is used) * TODO: we should add a new option to make above limit optional */ maxmatch = embWordMatchFirstMax(matchlist[k]); supermatcher_findendpoints(maxmatch,targetseq, queryseq, &targetstart, &querystart, &targetend, &queryend); targetaln=ajStrNewRes(1+ajSeqGetLen(targetseq)); queryseqc = ajSeqGetSeqC(queryseq); targetseqc = ajSeqGetSeqC(targetseq); ajStrAssignC(&queryaln,""); ajStrAssignC(&targetaln,""); ajDebug("++ %S v %S start:%d %d end:%d %d\n", ajSeqGetNameS(targetseq), ajSeqGetNameS(queryseq), targetstart, querystart, targetend, queryend); newmax = (targetend-targetstart+2)*width; if(newmax > oldmax) { AJCRESIZE0(path,oldmax,newmax); AJCRESIZE0(compass,oldmax,newmax); oldmax=newmax; ajDebug("++ memory re/allocation for path/compass arrays" " to size: %d\n", newmax); } else { AJCSET0(path,newmax); AJCSET0(compass,newmax); } ajDebug("Calling embAlignPathCalcSWFast " "%d..%d [%d/%d] %d..%d [%d/%d] width:%d\n", querystart, queryend, (queryend - querystart + 1), ajSeqGetLen(queryseq), targetstart, targetend, (targetend - targetstart + 1), ajSeqGetLen(targetseq), width); score = embAlignPathCalcSWFast(&targetseqc[targetstart], &queryseqc[querystart], targetend-targetstart+1, queryend-querystart+1, 0,width, gapopen,gapextend, path,sub,cvt, compass,show); if(score>minscore) { embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend, targetseq,queryseq, &targetaln,&queryaln, targetend-targetstart+1, queryend-querystart+1, 0,width, &targetstart,&querystart); if(!ajAlignFormatShowsSequences(align)) { ajAlignDefineCC(align, ajStrGetPtr(targetaln), ajStrGetPtr(queryaln), ajSeqGetNameC(targetseq), ajSeqGetNameC(queryseq)); ajAlignSetScoreR(align, score); } else { ajDebug(" queryaln:%S \ntargetaln:%S\n", queryaln,targetaln); embAlignReportLocal(align, queryseq, targetseq, queryaln, targetaln, querystart, targetstart, gapopen, gapextend, score, matrix, 1 + ajSeqGetOffset(queryseq), 1 + ajSeqGetOffset(targetseq) ); } ajAlignWrite(align); ajAlignReset(align); } ajStrDel(&targetaln); embWordMatchListDelete(&matchlist[k]); } ajStrDel(&queryaln); } for(k=0;k<nkmers;k++) { AJFREE(wordsw[k]->seqindxs); AJFREE(wordsw[k]->nSeqMatches); for(j=0;j<wordsw[k]->nseqs;j++) AJFREE(wordsw[k]->locs[j]); AJFREE(wordsw[k]->nnseqlocs); AJFREE(wordsw[k]->locs); AJFREE(wordsw[k]); } embWordFreeTable(&kmers); if(!ajAlignFormatShowsSequences(align)) ajMatrixfDel(&matrix); AJFREE(path); AJFREE(compass); AJFREE(kmers); AJFREE(wordsw); AJFREE(matchlist); AJFREE(lastlocation); ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&queryseqs); ajSeqDel(&queryseq); ajSeqsetDel(&targetseqs); ajFileClose(&errorf); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeqset seqset; AjPSeqall seqall; AjPSeq queryseq; const AjPSeq targetseq; ajint wordlen; AjPTable wordsTable = NULL; AjPList* matchlist = NULL; AjPFile logfile; AjPFeattable* seqsetftables = NULL; AjPFeattable seqallseqftable = NULL; AjPFeattabOut ftoutforseqsetseq = NULL; AjPFeattabOut ftoutforseqallseq = NULL; AjPAlign align = NULL; AjIList iter = NULL; ajint targetstart; ajint querystart; ajint len; ajuint i, j; ajulong nAllMatches = 0; ajulong sumAllScore = 0; AjBool dumpAlign = ajTrue; AjBool dumpFeature = ajTrue; AjBool checkmode = ajFalse; EmbPWordRK* wordsw = NULL; ajuint npatterns = 0; ajuint seqsetsize; ajuint nmatches; ajuint* nmatchesseqset; ajuint* lastlocation; /* Cursors for Rabin-Karp search. */ /* Shows until what point the query sequence was * scanned for a pattern sequences in the seqset. */ char* paddedheader = NULL; const char* header; AjPStr padding; header = "Pattern %S #pat-sequences #all-matches avg-match-length\n"; padding = ajStrNew(); embInit("wordmatch", argc, argv); wordlen = ajAcdGetInt("wordsize"); seqset = ajAcdGetSeqset("asequence"); seqall = ajAcdGetSeqall("bsequence"); logfile = ajAcdGetOutfile("logfile"); dumpAlign = ajAcdGetToggle("dumpalign"); dumpFeature = ajAcdGetToggle("dumpfeat"); if(dumpAlign) { align = ajAcdGetAlign("outfile"); ajAlignSetExternal(align, ajTrue); } seqsetsize = ajSeqsetGetSize(seqset); ajSeqsetTrim(seqset); AJCNEW0(matchlist, seqsetsize); AJCNEW0(seqsetftables, seqsetsize); AJCNEW0(nmatchesseqset, seqsetsize); if (dumpFeature) { ftoutforseqsetseq = ajAcdGetFeatout("aoutfeat"); ftoutforseqallseq = ajAcdGetFeatout("boutfeat"); } checkmode = !dumpFeature && !dumpAlign; embWordLength(wordlen); ajFmtPrintF(logfile, "Small sequence/file for constructing" " target patterns: %S\n", ajSeqsetGetUsa(seqset)); ajFmtPrintF(logfile, "Large sequence/file to be scanned" " for patterns: %S\n", ajSeqallGetUsa(seqall)); ajFmtPrintF(logfile, "Number of sequences in the patterns file: %u\n", seqsetsize); ajFmtPrintF(logfile, "Pattern/word length: %u\n", wordlen); for(i=0;i<seqsetsize;i++) { targetseq = ajSeqsetGetseqSeq(seqset, i); embWordGetTable(&wordsTable, targetseq); } AJCNEW0(lastlocation, seqsetsize); if(ajTableGetLength(wordsTable)>0) { npatterns = embWordRabinKarpInit(wordsTable, &wordsw, wordlen, seqset); ajFmtPrintF(logfile, "Number of patterns/words found: %u\n", npatterns); while(ajSeqallNext(seqall,&queryseq)) { for(i=0;i<seqsetsize;i++) { lastlocation[i]=0; if (!checkmode) matchlist[i] = ajListstrNew(); } nmatches = embWordRabinKarpSearch( ajSeqGetSeqS(queryseq), seqset, (EmbPWordRK const *)wordsw, wordlen, npatterns, matchlist, lastlocation, checkmode); nAllMatches += nmatches; if (checkmode) continue; for(i=0;i<seqsetsize;i++) { if(ajListGetLength(matchlist[i])>0) { iter = ajListIterNewread(matchlist[i]) ; while(embWordMatchIter(iter, &targetstart, &querystart, &len, &targetseq)) { if(dumpAlign) { ajAlignDefineSS(align, targetseq, queryseq); ajAlignSetScoreI(align, len); /* ungapped alignment means same length * for both sequences */ ajAlignSetSubRange(align, targetstart, 1, len, ajSeqIsReversed(targetseq), ajSeqGetLen(targetseq), querystart, 1, len, ajSeqIsReversed(queryseq), ajSeqGetLen(queryseq)); } } if(dumpAlign) { ajAlignWrite(align); ajAlignReset(align); } if(ajListGetLength(matchlist[i])>0 && dumpFeature) { embWordMatchListConvToFeat(matchlist[i], &seqsetftables[i], &seqallseqftable, targetseq, queryseq); ajFeattableWrite(ftoutforseqallseq, seqallseqftable); ajFeattableDel(&seqallseqftable); } ajListIterDel(&iter); } embWordMatchListDelete(&matchlist[i]); } } /* search completed, now report statistics */ for(i=0;i<npatterns;i++) { sumAllScore += wordsw[i]->lenMatches; for(j=0;j<wordsw[i]->nseqs;j++) nmatchesseqset[wordsw[i]->seqindxs[j]] += wordsw[i]->nSeqMatches[j]; } ajFmtPrintF(logfile, "Number of sequences in the file scanned " "for patterns: %u\n", ajSeqallGetCount(seqall)); ajFmtPrintF(logfile, "Number of all matches: %Lu" " (wordmatch finds exact matches only)\n", nAllMatches); if(nAllMatches>0) { ajFmtPrintF(logfile, "Sum of match lengths: %Lu\n", sumAllScore); ajFmtPrintF(logfile, "Average match length: %.2f\n", sumAllScore*1.0/nAllMatches); ajFmtPrintF(logfile, "\nDistribution of the matches among pattern" " sequences:\n"); ajFmtPrintF(logfile, "-----------------------------------------" "-----------\n"); for(i=0;i<ajSeqsetGetSize(seqset);i++) { if (nmatchesseqset[i]>0) ajFmtPrintF(logfile, "%-42s: %8u\n", ajSeqGetNameC(ajSeqsetGetseqSeq(seqset, i)), nmatchesseqset[i]); ajFeattableWrite(ftoutforseqsetseq, seqsetftables[i]); ajFeattableDel(&seqsetftables[i]); } ajFmtPrintF(logfile, "\nPattern statistics:\n"); ajFmtPrintF(logfile, "-------------------\n"); if(wordlen>7) ajStrAppendCountK(&padding, ' ', wordlen-7); paddedheader = ajFmtString(header,padding); ajFmtPrintF(logfile, paddedheader); for(i=0;i<npatterns;i++) if (wordsw[i]->nMatches>0) ajFmtPrintF(logfile, "%-7s: %12u %12u %17.2f\n", wordsw[i]->word->fword, wordsw[i]->nseqs, wordsw[i]->nMatches, wordsw[i]->lenMatches*1.0/wordsw[i]->nMatches); } } for(i=0;i<npatterns;i++) { for(j=0;j<wordsw[i]->nseqs;j++) AJFREE(wordsw[i]->locs[j]); AJFREE(wordsw[i]->locs); AJFREE(wordsw[i]->seqindxs); AJFREE(wordsw[i]->nnseqlocs); AJFREE(wordsw[i]->nSeqMatches); AJFREE(wordsw[i]); } embWordFreeTable(&wordsTable); AJFREE(wordsw); AJFREE(matchlist); AJFREE(lastlocation); AJFREE(nmatchesseqset); AJFREE(seqsetftables); if(dumpAlign) { ajAlignClose(align); ajAlignDel(&align); } if(dumpFeature) { ajFeattabOutDel(&ftoutforseqsetseq); ajFeattabOutDel(&ftoutforseqallseq); } ajFileClose(&logfile); ajSeqallDel(&seqall); ajSeqsetDel(&seqset); ajSeqDel(&queryseq); ajStrDel(&padding); AJFREE(paddedheader); embExit(); return 0; }