int main(int argc, char **argv) { AjPSeqall seq1; AjPSeqset seq2; AjPSeq a; const AjPSeq b; AjPStr m = 0; AjPStr n = 0; AjPFile errorf; AjBool show = ajFalse; ajint lena = 0; ajint lenb = 0; const char *p; const char *q; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; ajint *compass = NULL; float *path = NULL; float gapopen; float gapextend; float score; ajint begina; ajint i; ajuint k; ajint beginb; ajint start1 = 0; ajint start2 = 0; ajint end1 = 0; ajint end2 = 0; ajint width = 0; AjPTable seq1MatchTable = 0; ajint wordlen = 6; ajint oldmax = 0; AjPAlign align = NULL; embInit("supermatcher", argc, argv); matrix = ajAcdGetMatrixf("datafile"); seq1 = ajAcdGetSeqall("asequence"); seq2 = ajAcdGetSeqset("bsequence"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); wordlen = ajAcdGetInt("wordlen"); align = ajAcdGetAlign("outfile"); errorf = ajAcdGetOutfile("errorfile"); width = ajAcdGetInt("width"); /* not the same as awidth */ gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); embWordLength(wordlen); ajSeqsetTrim(seq2); while(ajSeqallNext(seq1,&a)) { ajSeqTrim(a); begina = 1 + ajSeqGetOffset(a); m = ajStrNewRes(1+ajSeqGetLen(a)); lena = ajSeqGetLen(a); ajDebug("Read '%S'\n", ajSeqGetNameS(a)); if(!embWordGetTable(&seq1MatchTable, a)) /* get table of words */ ajErr("Could not generate table for %s\n", ajSeqGetNameC(a)); for(k=0;k<ajSeqsetGetSize(seq2);k++) { b = ajSeqsetGetseqSeq(seq2, k); lenb = ajSeqGetLen(b); beginb = 1 + ajSeqGetOffset(b); ajDebug("Processing '%S'\n", ajSeqGetNameS(b)); p = ajSeqGetSeqC(a); q = ajSeqGetSeqC(b); if(!supermatcher_findstartpoints(seq1MatchTable,b,a, &start1, &start2, &end1, &end2)) { ajFmtPrintF(errorf, "No wordmatch start points for " "%s vs %s. No alignment\n", ajSeqGetNameC(a),ajSeqGetNameC(b)); continue; } n=ajStrNewRes(1+ajSeqGetLen(b)); ajStrAssignC(&m,""); ajStrAssignC(&n,""); ajDebug("++ %S v %S start:%d %d end:%d %d\n", ajSeqGetNameS(a), ajSeqGetNameS(b), start1, start2, end1, end2); if(end1-start1+1 > oldmax) { oldmax = ((end1-start1)+1); AJRESIZE(path,oldmax*width*sizeof(float)); AJRESIZE(compass,oldmax*width*sizeof(ajint)); ajDebug("++ resize to oldmax: %d\n", oldmax); } for(i=0;i<((end1-start1)+1)*width;i++) path[i] = 0.0; ajDebug("Calling embAlignPathCalcFast " "%d..%d [%d/%d] %d..%d [%d/%d]\n", start1, end1, (end1 - start1 + 1), lena, start2, end2, (end2 - start2 + 1), lenb); score = embAlignPathCalcSWFast(&p[start1],&q[start2], end1-start1+1,end2-start2+1, 0,width, gapopen,gapextend, path,sub,cvt, compass,show); embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend,a,b, &m,&n,end1-start1+1,end2-start2+1, 0,width, &start1,&start2); if(!ajAlignFormatShowsSequences(align)) { ajAlignDefineCC(align, ajStrGetPtr(m), ajStrGetPtr(n), ajSeqGetNameC(a), ajSeqGetNameC(b)); ajAlignSetScoreR(align, score); } else { embAlignReportLocal(align, a, b, m,n,start1,start2, gapopen, gapextend, score,matrix, begina, beginb); } ajAlignWrite(align); ajAlignReset(align); ajStrDel(&n); } embWordFreeTable(&seq1MatchTable); /* free table of words */ seq1MatchTable=0; ajStrDel(&m); } if(!ajAlignFormatShowsSequences(align)) { ajMatrixfDel(&matrix); } AJFREE(path); AJFREE(compass); ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&seq1); ajSeqDel(&a); ajSeqsetDel(&seq2); ajFileClose(&errorf); embExit(); return 0; }
static ajint supermatcher_findstartpoints(AjPTable seq1MatchTable, const AjPSeq b, const AjPSeq a, ajint *start1, ajint *start2, ajint *end1, ajint *end2) { ajint max = -10; ajint offset = 0; AjPList matchlist = NULL; AjPList ordered = NULL; ajint amax; ajint bmax; ajint bega; ajint begb; amax = ajSeqGetLen(a)-1; bmax = ajSeqGetLen(b)-1; bega = ajSeqGetOffset(a); begb = ajSeqGetOffset(b); ajDebug("supermatcher_findstartpoints len %d %d off %d %d\n", amax, bmax, bega, begb); matchlist = embWordBuildMatchTable(seq1MatchTable, b, ajTrue); if(!matchlist) return 0; else if(!matchlist->Count) { embWordMatchListDelete(&matchlist); return 0; } /* order and add if the gap is gapmax or less */ /* create list header bit*/ ordered = ajListNew(); supermatcher_orderandconcat(matchlist, ordered); /* this sets global structure conmax to point to a matchlist element */ ajListMap(ordered,supermatcher_findmax, &max); ajDebug("findstart conmax off:%d count:%d total:%d\n", conmax->offset, conmax->count, conmax->total, ajListGetLength(conmax->list)); offset = conmax->offset; /* the offset is all we needed! we can delete everything */ ajListMap(ordered,supermatcher_removelists, NULL); ajListFree(&ordered); embWordMatchListDelete(&matchlist); /* free the match structures */ if(offset > 0) { *start1 = offset; *start2 = 0; } else { *start2 = 0-offset; *start1 = 0; } *end1 = *start1; *end2 = *start2; ajDebug("++ end1 %d -> %d end2 %d -> %d\n", *end1, amax, *end2, bmax); while(*end1<amax && *end2<bmax) { (*end1)++; (*end2)++; } ajDebug("++ end1 %d end2 %d\n", *end1, *end2); ajDebug("supermatcher_findstartpoints has %d..%d [%d] %d..%d [%d]\n", *start1, *end1, ajSeqGetLen(a), *start2, *end2, ajSeqGetLen(b)); return 1; }
int main(int argc, char **argv) { AjPAlign align; AjPSeqall seqall; AjPSeq a; AjPSeq b; AjPStr alga; AjPStr algb; AjPStr ss; ajuint lena; ajuint lenb; const char *p; const char *q; ajint start1 = 0; ajint start2 = 0; float *path; ajint *compass; float* ix; float* iy; float* m; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; float gapopen; float gapextend; float endgapopen; float endgapextend; ajulong maxarr = 1000; /* arbitrary. realloc'd if needed */ ajulong len; float score; AjBool dobrief = ajTrue; AjBool endweight = ajFalse; /* whether end gap penalties should be applied */ float id = 0.; float sim = 0.; float idx = 0.; float simx = 0.; AjPStr tmpstr = NULL; size_t stlen; embInit("needle", argc, argv); matrix = ajAcdGetMatrixf("datafile"); a = ajAcdGetSeq("asequence"); ajSeqTrim(a); seqall = ajAcdGetSeqall("bsequence"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); endgapopen = ajAcdGetFloat("endopen"); endgapextend = ajAcdGetFloat("endextend"); dobrief = ajAcdGetBoolean("brief"); endweight = ajAcdGetBoolean("endweight"); align = ajAcdGetAlign("outfile"); gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); AJCNEW(path, maxarr); AJCNEW(compass, maxarr); AJCNEW(m, maxarr); AJCNEW(ix, maxarr); AJCNEW(iy, maxarr); alga = ajStrNew(); algb = ajStrNew(); ss = ajStrNew(); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); lena = ajSeqGetLen(a); while(ajSeqallNext(seqall,&b)) { ajSeqTrim(b); lenb = ajSeqGetLen(b); if(lenb > (ULONG_MAX/(ajulong)(lena+1))) ajFatal("Sequences too big. Try 'stretcher' or 'supermatcher'"); len = lena*lenb; if(len>maxarr) { stlen = (size_t) len; AJCRESIZETRY(path,stlen); if(!path) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY(compass,stlen); if(!compass) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY(m,stlen); if(!m) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY(ix,stlen); if(!ix) ajDie("Sequences too big. Try 'stretcher'"); AJCRESIZETRY(iy,stlen); if(!iy) ajDie("Sequences too big. Try 'stretcher'"); maxarr=len; } p = ajSeqGetSeqC(a); q = ajSeqGetSeqC(b); ajStrAssignC(&alga,""); ajStrAssignC(&algb,""); score = embAlignPathCalcWithEndGapPenalties(p, q, lena, lenb, gapopen, gapextend, endgapopen, endgapextend, &start1, &start2, path, sub, cvt, m, ix, iy, compass, ajTrue, endweight); embAlignWalkNWMatrixUsingCompass(p, q, &alga, &algb, lena, lenb, &start1, &start2, compass); embAlignReportGlobal(align, a, b, alga, algb, start1, start2, gapopen, gapextend, score, matrix, ajSeqGetOffset(a), ajSeqGetOffset(b)); if(!dobrief) { embAlignCalcSimilarity(alga,algb,sub,cvt,lena,lenb,&id,&sim,&idx, &simx); ajFmtPrintS(&tmpstr,"Longest_Identity = %5.2f%%\n", id); ajFmtPrintAppS(&tmpstr,"Longest_Similarity = %5.2f%%\n", sim); ajFmtPrintAppS(&tmpstr,"Shortest_Identity = %5.2f%%\n", idx); ajFmtPrintAppS(&tmpstr,"Shortest_Similarity = %5.2f%%", simx); ajAlignSetSubHeaderApp(align, tmpstr); } ajAlignWrite(align); ajAlignReset(align); } ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&seqall); ajSeqDel(&a); ajSeqDel(&b); AJFREE(compass); AJFREE(path); AJFREE(ix); AJFREE(iy); AJFREE(m); ajStrDel(&alga); ajStrDel(&algb); ajStrDel(&ss); ajStrDel(&tmpstr); embExit(); return 0; }
int main(int argc, char **argv) { AjPSeq seq1; AjPSeq seq2; ajint wordlen; AjPTable seq1MatchTable = 0; AjPList matchlist = NULL; AjPGraph graph = NULL; AjPGraph xygraph = NULL; AjBool boxit; /* ** Different ticks as they need to be different for x and y due to ** length of string being important on x */ ajuint acceptableticksx[]= { 1,10,50,100,500,1000,1500,10000, 500000,1000000,5000000 }; ajuint acceptableticks[]= { 1,10,50,100,200,500,1000,2000,5000,10000,15000, 500000,1000000,5000000 }; ajint numbofticks = 10; float xmargin; float ymargin; float ticklen; float tickgap; float onefifth = 0.0; ajint i; float k; float max; char ptr[10]; ajint begin1; ajint begin2; ajint end1; ajint end2; ajuint len1; ajuint len2; float fbegin1; float fbegin2; float fend1; float fend2; float flen1; float flen2; AjBool stretch; embInit("dottup", argc, argv); wordlen = ajAcdGetInt("wordsize"); seq1 = ajAcdGetSeq("asequence"); seq2 = ajAcdGetSeq("bsequence"); graph = ajAcdGetGraph("graph"); boxit = ajAcdGetBoolean("boxit"); stretch = ajAcdGetToggle("stretch"); xygraph = ajAcdGetGraphxy("xygraph"); begin1 = ajSeqGetBegin(seq1); begin2 = ajSeqGetBegin(seq2); end1 = ajSeqGetEnd(seq1); end2 = ajSeqGetEnd(seq2); len1 = end1 - begin1 + 1; len2 = end2 - begin2 + 1; flen1 = (float) len1; flen2 = (float) len2; fbegin1 = (float) begin1; fbegin2 = (float) begin2; fend1 = (float) end1; fend2 = (float) end2; offset1 = fbegin1; offset2 = fbegin2; ajSeqTrim(seq1); ajSeqTrim(seq2); embWordLength(wordlen); if(embWordGetTable(&seq1MatchTable, seq1)) matchlist = embWordBuildMatchTable(seq1MatchTable, seq2, ajTrue); if(stretch) { dottup_stretchplot(xygraph,matchlist,seq1,seq2,begin1,begin2,end1, end2); if(matchlist) embWordMatchListDelete(&matchlist); /* free the match structures */ } else { /* only here if stretch is false */ max= flen1; if(flen2 > max) max = flen2; xmargin = ymargin = max * (float)0.15; ajGraphOpenWin(graph, fbegin1-ymargin,fend1+ymargin, fbegin2-xmargin,(float)fend2+xmargin); ajGraphicsSetCharscale(0.5); if(matchlist) dottup_plotMatches(matchlist); if(boxit) { ajGraphicsDrawposRect(fbegin1, fbegin2, fend1, fend2); i = 0; while(acceptableticksx[i]*numbofticks < len1) i++; if(i<=13) tickgap = (float) acceptableticksx[i]; else tickgap = (float) acceptableticksx[10]; ticklen = xmargin * (float) 0.1; onefifth = xmargin * (float)0.2; ajGraphicsDrawposTextAtmid(fbegin1+flen1*(float)0.5, fbegin1-(onefifth*(float)3.0), ajGraphGetYlabelC(graph)); if(len2/len1 > 10 ) { /* a lot smaller then just label start and end */ ajGraphicsDrawposLine(fbegin1,fbegin2,fbegin1, fbegin2-ticklen); sprintf(ptr,"%u",ajSeqGetOffset(seq1)); ajGraphicsDrawposTextAtmid(fbegin1,fbegin2-(onefifth),ptr); ajGraphicsDrawposLine(fend1,fbegin2, fend1,fbegin2-ticklen); sprintf(ptr,"%d",end1); ajGraphicsDrawposTextAtmid(fend1,fbegin2-(onefifth),ptr); } else for(k=fbegin1;k<fend1;k+=tickgap) { ajGraphicsDrawposLine(k,fbegin2,k,fbegin2-ticklen); sprintf(ptr,"%d",(ajint)k); ajGraphicsDrawposTextAtmid( k,fbegin2-(onefifth),ptr); } i = 0; while(acceptableticks[i]*numbofticks < len2) i++; tickgap = (float) acceptableticks[i]; ticklen = ymargin*(float)0.1; onefifth = ymargin*(float)0.2; ajGraphicsDrawposTextAtlineJustify(fbegin1-(onefifth*(float)4.), fbegin2+flen2*(float)0.5, fbegin2-(onefifth*(float)4.), fbegin2+flen2, ajGraphGetXlabelC(graph), 0.5); if(len1/len2 > 10 ) { /* a lot smaller then just label start and end */ ajGraphicsDrawposLine(fbegin1,fbegin2,fbegin1-ticklen, fbegin2); sprintf(ptr,"%u",ajSeqGetOffset(seq2)); ajGraphicsDrawposTextAtend(fbegin1-(onefifth),fbegin2,ptr); ajGraphicsDrawposLine(fbegin1,fend2,fbegin1-ticklen, fend2); sprintf(ptr,"%d",end2); ajGraphicsDrawposTextAtend(fbegin2-(onefifth),fend2,ptr); } else for(k=fbegin2;k<fend2;k+=tickgap) { ajGraphicsDrawposLine(fbegin1,k,fbegin1-ticklen,k); sprintf(ptr,"%d",(ajint)k); ajGraphicsDrawposTextAtend(fbegin1-(onefifth),k,ptr); } } } ajGraphicsClose(); ajSeqDel(&seq1); ajSeqDel(&seq2); ajGraphxyDel(&graph); ajGraphxyDel(&xygraph); embWordFreeTable(&seq1MatchTable); if(matchlist) embWordMatchListDelete(&matchlist); /* free the match structures */ embExit(); return 0; }
void embPatternSeqSearch (AjPFeattable ftable, const AjPSeq seq, const AjPPatternSeq pat, AjBool reverse) { const void *tidy; ajuint hits; ajuint i; AjPPatComp pattern; EmbPMatMatch m = NULL; AjPFeature sf = NULL; AjPSeq revseq = NULL; AjPList list = ajListNew(); AjPStr seqstr = ajStrNew(); AjPStr seqname = ajStrNew(); AjPStr tmp = ajStrNew(); ajint adj; ajint begin; AjBool isreversed; ajint seqlen; seqlen = ajSeqGetLen(seq); if(!seqlen) return; isreversed = ajSeqIsReversedTrue(seq); if(isreversed) seqlen += ajSeqGetOffset(seq); begin = ajSeqGetBeginTrue(seq); adj = ajSeqGetEndTrue(seq); if(!ajStrGetLen(featMotifProt)) ajStrAssignC(&featMotifProt, "SO:0001067"); if(!ajStrGetLen(featMotifNuc)) ajStrAssignC(&featMotifNuc, "SO:0000714"); ajStrAssignS(&seqname,ajSeqGetNameS(seq)); pattern = ajPatternSeqGetCompiled(pat); if (reverse) { revseq = ajSeqNewSeq(seq); ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), begin-1,adj-1); ajSeqstrReverse(&seqstr); } else ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), begin-1,adj-1); ajStrFmtUpper(&seqstr); /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n" "'%S'\n", seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq), ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq), seqstr);*/ ajDebug("embPatternSeqSearch '%S' protein: %B reverse: %B\n", pattern->pattern, pat->Protein, reverse); embPatFuzzSearchII(pattern,begin,seqname,seqstr,list, ajPatternSeqGetMismatch(pat),&hits,&tidy); ajDebug ("embPatternSeqSearch: found %d hits\n",hits); if(!reverse) ajListReverse(list); for(i=0;i<hits;++i) { ajListPop(list,(void **)&m); if (reverse) sf = ajFeatNew(ftable, NULL, featMotifNuc, adj - m->start - m->len + begin + 1, adj - m->start + begin, 0.0, '-', 0); else { if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable)) sf = ajFeatNewProt(ftable, NULL, featMotifProt, m->start, m->start + m->len - 1, 0.0); else sf = ajFeatNew(ftable, NULL, featMotifNuc, m->start, m->start + m->len - 1, 0.0, '.', 0); } if(isreversed) ajFeatReverse(sf, seqlen); /* ajUser("isrev: %B reverse: %B begin: %d adj: %d " "start: %d len: %d seqlen: %d %d..%d '%c'\n", isreversed, reverse, begin, adj, m->start, m->len, seqlen, sf->Start, sf->End, sf->Strand); */ ajFeatSetScore(sf, (float) (m->len - m->mm)); ajFmtPrintS(&tmp, "*pat %S: %S", ajPatternSeqGetName(pat), ajPatternSeqGetPattern(pat)); ajFeatTagAdd(sf,NULL,tmp); if(m->mm) { ajFmtPrintS(&tmp, "*mismatch %d", m->mm); ajFeatTagAdd(sf, NULL, tmp); } embMatMatchDel(&m); } ajStrDel(&seqname); ajStrDel(&seqstr); ajStrDel(&tmp); ajListFree(&list); if (reverse) ajSeqDel(&revseq); return; }
void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq, const AjPPatternRegex pat, AjBool reverse) { ajint pos=0; ajint off; ajint len; AjPFeature sf = NULL; AjPStr substr = NULL; AjPStr seqstr = NULL; AjPStr tmpstr = NULL; AjPStr tmp = ajStrNew(); AjPRegexp patexp = ajPatternRegexGetCompiled(pat); ajint adj; AjBool isreversed; AjPSeq revseq; ajint seqlen; seqlen = ajSeqGetLen(seq); if(!seqlen) return; isreversed = ajSeqIsReversedTrue(seq); if(isreversed) seqlen += ajSeqGetOffset(seq); pos = ajSeqGetBeginTrue(seq); adj = ajSeqGetEndTrue(seq); if(!ajStrGetLen(featMotifProt)) ajStrAssignC(&featMotifProt, "SO:0001067"); if(!ajStrGetLen(featMotifNuc)) ajStrAssignC(&featMotifNuc, "SO:0000714"); /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n", pos, adj, reverse, isreversed);*/ /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n", seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq), ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/ if (reverse) { revseq = ajSeqNewSeq(seq); ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1); ajSeqstrReverse(&seqstr); } ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1); ajStrFmtUpper(&seqstr); while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr)) { off = ajRegOffset(patexp); len = ajRegLenI(patexp, 0); if(off || len) { ajRegSubI(patexp, 0, &substr); ajRegPost(patexp, &tmp); ajStrAssignS(&seqstr, substr); ajStrAppendS(&seqstr, tmp); pos += off; /*ajDebug("match pos: %d adj: %d len: %d off:%d\n", pos, adj, len, off);*/ if (reverse) sf = ajFeatNew(ftable, NULL, featMotifNuc, adj - pos - len + 2, adj - pos + 1, 0.0, '-', 0); else { if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable)) sf = ajFeatNewProt(ftable, NULL, featMotifProt, pos, pos + len - 1, 0.0); else sf = ajFeatNew(ftable, NULL, featMotifNuc, pos, pos + len - 1, 0.0, '.', 0); } if(isreversed) ajFeatReverse(sf, seqlen); ajFmtPrintS (&tmpstr,"*pat %S: %S", ajPatternRegexGetName(pat), ajPatternRegexGetPattern(pat)); ajFeatTagAdd (sf,NULL,tmpstr); pos += 1; ajStrCutStart(&seqstr, 1); } else { pos++; ajStrCutStart(&seqstr, 1); } } ajStrDel(&tmpstr); ajStrDel(&tmp); ajStrDel(&substr); ajStrDel(&seqstr); if(reverse) ajSeqDel(&revseq); return; }
int main(int argc, char **argv) { AjPSeqall queryseqs; AjPSeqset targetseqs; AjPSeq queryseq; const AjPSeq targetseq; AjPStr queryaln = 0; AjPStr targetaln = 0; AjPFile errorf; AjBool show = ajFalse; const char *queryseqc; const char *targetseqc; AjPMatrixf matrix; AjPSeqCvt cvt = 0; float **sub; ajint *compass = NULL; float *path = NULL; float gapopen; float gapextend; float score; float minscore; ajuint j, k; ajint querystart = 0; ajint targetstart = 0; ajint queryend = 0; ajint targetend = 0; ajint width = 0; AjPTable kmers = 0; ajint wordlen = 6; ajint oldmax = 0; ajint newmax = 0; ajuint ntargetseqs; ajuint nkmers; AjPAlign align = NULL; EmbPWordMatch maxmatch; /* match with maximum score */ /* Cursors for the current sequence being scanned, ** i.e., until which location it was scanned. ** Separate cursor/location entries for each sequence in the seqset. */ ajuint* lastlocation; EmbPWordRK* wordsw = NULL; AjPList* matchlist = NULL; embInit("supermatcher", argc, argv); matrix = ajAcdGetMatrixf("datafile"); queryseqs = ajAcdGetSeqall("asequence"); targetseqs= ajAcdGetSeqset("bsequence"); gapopen = ajAcdGetFloat("gapopen"); gapextend = ajAcdGetFloat("gapextend"); wordlen = ajAcdGetInt("wordlen"); align = ajAcdGetAlign("outfile"); errorf = ajAcdGetOutfile("errorfile"); width = ajAcdGetInt("width"); /* width for banded Smith-Waterman */ minscore = ajAcdGetFloat("minscore"); gapopen = ajRoundFloat(gapopen, 8); gapextend = ajRoundFloat(gapextend, 8); sub = ajMatrixfGetMatrix(matrix); cvt = ajMatrixfGetCvt(matrix); embWordLength(wordlen); /* seqset sequence is the reference sequence for SAM format */ ajAlignSetRefSeqIndx(align, 1); ajSeqsetTrim(targetseqs); ntargetseqs = ajSeqsetGetSize(targetseqs); AJCNEW0(matchlist, ntargetseqs); /* get tables of words */ for(k=0;k<ntargetseqs;k++) { targetseq = ajSeqsetGetseqSeq(targetseqs, k); embWordGetTable(&kmers, targetseq); ajDebug("Number of distinct kmers found so far: %d\n", ajTableGetLength(kmers)); } AJCNEW0(lastlocation, ntargetseqs); if(ajTableGetLength(kmers)<1) ajErr("no kmers found"); nkmers = embWordRabinKarpInit(kmers, &wordsw, wordlen, targetseqs); while(ajSeqallNext(queryseqs,&queryseq)) { ajSeqTrim(queryseq); queryaln = ajStrNewRes(1+ajSeqGetLen(queryseq)); ajDebug("Read '%S'\n", ajSeqGetNameS(queryseq)); for(k=0;k<ntargetseqs;k++) { lastlocation[k]=0; matchlist[k] = ajListstrNew(); } embWordRabinKarpSearch(ajSeqGetSeqS(queryseq), targetseqs, (const EmbPWordRK*)wordsw, wordlen, nkmers, matchlist, lastlocation, ajFalse); for(k=0;k<ajSeqsetGetSize(targetseqs);k++) { targetseq = ajSeqsetGetseqSeq(targetseqs, k); ajDebug("Processing '%S'\n", ajSeqGetNameS(targetseq)); if(ajListGetLength(matchlist[k])==0) { ajFmtPrintF(errorf, "No wordmatch start points for " "%s vs %s. No alignment\n", ajSeqGetNameC(queryseq),ajSeqGetNameC(targetseq)); embWordMatchListDelete(&matchlist[k]); continue; } /* only the maximum match is used as seed * (if there is more than one location with the maximum match * only the first one is used) * TODO: we should add a new option to make above limit optional */ maxmatch = embWordMatchFirstMax(matchlist[k]); supermatcher_findendpoints(maxmatch,targetseq, queryseq, &targetstart, &querystart, &targetend, &queryend); targetaln=ajStrNewRes(1+ajSeqGetLen(targetseq)); queryseqc = ajSeqGetSeqC(queryseq); targetseqc = ajSeqGetSeqC(targetseq); ajStrAssignC(&queryaln,""); ajStrAssignC(&targetaln,""); ajDebug("++ %S v %S start:%d %d end:%d %d\n", ajSeqGetNameS(targetseq), ajSeqGetNameS(queryseq), targetstart, querystart, targetend, queryend); newmax = (targetend-targetstart+2)*width; if(newmax > oldmax) { AJCRESIZE0(path,oldmax,newmax); AJCRESIZE0(compass,oldmax,newmax); oldmax=newmax; ajDebug("++ memory re/allocation for path/compass arrays" " to size: %d\n", newmax); } else { AJCSET0(path,newmax); AJCSET0(compass,newmax); } ajDebug("Calling embAlignPathCalcSWFast " "%d..%d [%d/%d] %d..%d [%d/%d] width:%d\n", querystart, queryend, (queryend - querystart + 1), ajSeqGetLen(queryseq), targetstart, targetend, (targetend - targetstart + 1), ajSeqGetLen(targetseq), width); score = embAlignPathCalcSWFast(&targetseqc[targetstart], &queryseqc[querystart], targetend-targetstart+1, queryend-querystart+1, 0,width, gapopen,gapextend, path,sub,cvt, compass,show); if(score>minscore) { embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend, targetseq,queryseq, &targetaln,&queryaln, targetend-targetstart+1, queryend-querystart+1, 0,width, &targetstart,&querystart); if(!ajAlignFormatShowsSequences(align)) { ajAlignDefineCC(align, ajStrGetPtr(targetaln), ajStrGetPtr(queryaln), ajSeqGetNameC(targetseq), ajSeqGetNameC(queryseq)); ajAlignSetScoreR(align, score); } else { ajDebug(" queryaln:%S \ntargetaln:%S\n", queryaln,targetaln); embAlignReportLocal(align, queryseq, targetseq, queryaln, targetaln, querystart, targetstart, gapopen, gapextend, score, matrix, 1 + ajSeqGetOffset(queryseq), 1 + ajSeqGetOffset(targetseq) ); } ajAlignWrite(align); ajAlignReset(align); } ajStrDel(&targetaln); embWordMatchListDelete(&matchlist[k]); } ajStrDel(&queryaln); } for(k=0;k<nkmers;k++) { AJFREE(wordsw[k]->seqindxs); AJFREE(wordsw[k]->nSeqMatches); for(j=0;j<wordsw[k]->nseqs;j++) AJFREE(wordsw[k]->locs[j]); AJFREE(wordsw[k]->nnseqlocs); AJFREE(wordsw[k]->locs); AJFREE(wordsw[k]); } embWordFreeTable(&kmers); if(!ajAlignFormatShowsSequences(align)) ajMatrixfDel(&matrix); AJFREE(path); AJFREE(compass); AJFREE(kmers); AJFREE(wordsw); AJFREE(matchlist); AJFREE(lastlocation); ajAlignClose(align); ajAlignDel(&align); ajSeqallDel(&queryseqs); ajSeqDel(&queryseq); ajSeqsetDel(&targetseqs); ajFileClose(&errorf); embExit(); return 0; }