Example #1
0
int main(int argc, char **argv)
{
    AjPSeqall seqall;
    AjPSeq seq;
    AjPTable table = 0;
    AjPFile outf;
    ajint wordsize;
    ajint mincount;

    embInit("wordcount", argc, argv);

    seqall = ajAcdGetSeqall("sequence1");

    wordsize = ajAcdGetInt("wordsize");
    outf     = ajAcdGetOutfile("outfile");
    mincount = ajAcdGetInt("mincount");

    embWordLength(wordsize);

    while (ajSeqallNext(seqall, &seq))
    {
        embWordGetTable(&table, seq);		/* get table of words   */
    }

    embWordPrintTableFI(table, mincount, outf); /* print table of words */
    /*
     **  test if table can be added to
     **  if(getWordTable(&table, seq, wordcount)) ?? get table of words ??
     **  {
     **       printWordTable(table);              ?? print table of words ??
     **  }
     */
    embWordFreeTable(&table);	/* free table of words */

    ajSeqallDel(&seqall);
    ajSeqDel(&seq);
    ajFileClose(&outf);

    embExit();

    return 0;
}
Example #2
0
int main(int argc, char **argv)
{
    AjPSeqall seq1;
    AjPSeqset seq2;
    AjPSeq a;
    const AjPSeq b;
    AjPStr m = 0;
    AjPStr n = 0;

    AjPFile errorf;
    AjBool show = ajFalse;

    ajint    lena = 0;
    ajint    lenb = 0;

    const char   *p;
    const char   *q;

    AjPMatrixf matrix;
    AjPSeqCvt cvt = 0;
    float **sub;
    ajint *compass = NULL;
    float *path = NULL;

    float gapopen;
    float gapextend;
    float score;


    ajint begina;
    ajint i;
    ajuint k;
    ajint beginb;
    ajint start1 = 0;
    ajint start2 = 0;
    ajint end1   = 0;
    ajint end2   = 0;
    ajint width  = 0;
    AjPTable seq1MatchTable = 0;
    ajint wordlen = 6;
    ajint oldmax = 0;

    AjPAlign align = NULL;

    embInit("supermatcher", argc, argv);

    matrix    = ajAcdGetMatrixf("datafile");
    seq1      = ajAcdGetSeqall("asequence");
    seq2      = ajAcdGetSeqset("bsequence");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    wordlen   = ajAcdGetInt("wordlen");
    align     = ajAcdGetAlign("outfile");
    errorf    = ajAcdGetOutfile("errorfile");
    width     = ajAcdGetInt("width");	/* not the same as awidth */

    gapopen   = ajRoundFloat(gapopen, 8);
    gapextend = ajRoundFloat(gapextend, 8);

    sub = ajMatrixfGetMatrix(matrix);
    cvt = ajMatrixfGetCvt(matrix);

    embWordLength(wordlen);

    ajSeqsetTrim(seq2);

    while(ajSeqallNext(seq1,&a))
    {
        ajSeqTrim(a);
	begina = 1 + ajSeqGetOffset(a);

	m = ajStrNewRes(1+ajSeqGetLen(a));

	lena = ajSeqGetLen(a);

	ajDebug("Read '%S'\n", ajSeqGetNameS(a));

	if(!embWordGetTable(&seq1MatchTable, a)) /* get table of words */
	    ajErr("Could not generate table for %s\n",
		  ajSeqGetNameC(a));

	for(k=0;k<ajSeqsetGetSize(seq2);k++)
	{
	    b      = ajSeqsetGetseqSeq(seq2, k);
	    lenb   = ajSeqGetLen(b);
	    beginb = 1 + ajSeqGetOffset(b);

	    ajDebug("Processing '%S'\n", ajSeqGetNameS(b));
	    p = ajSeqGetSeqC(a);
	    q = ajSeqGetSeqC(b);

	    if(!supermatcher_findstartpoints(seq1MatchTable,b,a,
					     &start1, &start2,
					     &end1, &end2))
	    {
		ajFmtPrintF(errorf,
			    "No wordmatch start points for "
			    "%s vs %s. No alignment\n",
			    ajSeqGetNameC(a),ajSeqGetNameC(b));
		continue;
	    }
	    
        n=ajStrNewRes(1+ajSeqGetLen(b));
        ajStrAssignC(&m,"");
        ajStrAssignC(&n,"");

	    ajDebug("++ %S v %S start:%d %d end:%d %d\n",
		    ajSeqGetNameS(a), ajSeqGetNameS(b),
		    start1, start2, end1, end2);

	    if(end1-start1+1 > oldmax)
	    {
		oldmax = ((end1-start1)+1);
		AJRESIZE(path,oldmax*width*sizeof(float));
		AJRESIZE(compass,oldmax*width*sizeof(ajint));
		ajDebug("++ resize to oldmax: %d\n", oldmax);
	    }

	    for(i=0;i<((end1-start1)+1)*width;i++)
		path[i] = 0.0;

	    ajDebug("Calling embAlignPathCalcFast "
		     "%d..%d [%d/%d] %d..%d [%d/%d]\n",
		     start1, end1, (end1 - start1 + 1), lena,
		     start2, end2, (end2 - start2 + 1), lenb);

	    score = embAlignPathCalcSWFast(&p[start1],&q[start2],
                                           end1-start1+1,end2-start2+1,
                                           0,width,
                                           gapopen,gapextend,
                                           path,sub,cvt,
                                           compass,show);

	    embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend,a,b,
					 &m,&n,end1-start1+1,end2-start2+1,
					 0,width,
                                         &start1,&start2);

		if(!ajAlignFormatShowsSequences(align))
		{
		    ajAlignDefineCC(align, ajStrGetPtr(m),
		            ajStrGetPtr(n), ajSeqGetNameC(a),
		            ajSeqGetNameC(b));
		    ajAlignSetScoreR(align, score);
		}
		else
		{
		    embAlignReportLocal(align, a, b,
		            m,n,start1,start2,
		            gapopen, gapextend,
		            score,matrix, begina, beginb);
		}
		ajAlignWrite(align);
		ajAlignReset(align);
	    ajStrDel(&n);
	}

	embWordFreeTable(&seq1MatchTable); /* free table of words */
	seq1MatchTable=0;

	ajStrDel(&m);

    }

    if(!ajAlignFormatShowsSequences(align))
    {
        ajMatrixfDel(&matrix);        
    }
    
    AJFREE(path);
    AJFREE(compass);

    ajAlignClose(align);
    ajAlignDel(&align);
    ajSeqallDel(&seq1);
    ajSeqDel(&a);
    ajSeqsetDel(&seq2);
    ajFileClose(&errorf);

    embExit();

    return 0;
}
Example #3
0
int main(int argc, char **argv)
{
    AjPSeq seq1;
    AjPSeq seq2;
    ajint wordlen;
    AjPTable seq1MatchTable = 0;
    AjPList matchlist = NULL;
    AjPGraph graph    = NULL;
    AjPGraph xygraph  = NULL;
    AjBool boxit;
    /*
    ** Different ticks as they need to be different for x and y due to
    ** length of string being important on x
    */
    ajuint acceptableticksx[]=
    {
	1,10,50,100,500,1000,1500,10000,
	500000,1000000,5000000
    };
    ajuint acceptableticks[]=
    {
	1,10,50,100,200,500,1000,2000,5000,10000,15000,
	500000,1000000,5000000
    };
    ajint numbofticks = 10;
    float xmargin;
    float ymargin;
    float ticklen;
    float tickgap;
    float onefifth = 0.0;
    ajint i;
    float k;
    float max;
    char ptr[10];
    ajint begin1;
    ajint begin2;
    ajint end1;
    ajint end2;
    ajuint len1;
    ajuint len2;
    float fbegin1;
    float fbegin2;
    float fend1;
    float fend2;
    float flen1;
    float flen2;
    AjBool stretch;
    
    embInit("dottup", argc, argv);

    wordlen = ajAcdGetInt("wordsize");
    seq1    = ajAcdGetSeq("asequence");
    seq2    = ajAcdGetSeq("bsequence");
    graph   = ajAcdGetGraph("graph");
    boxit   = ajAcdGetBoolean("boxit");
    stretch = ajAcdGetToggle("stretch");
    xygraph = ajAcdGetGraphxy("xygraph");

    begin1 = ajSeqGetBegin(seq1);
    begin2 = ajSeqGetBegin(seq2);
    end1   = ajSeqGetEnd(seq1);
    end2   = ajSeqGetEnd(seq2);
    len1   = end1 - begin1 + 1;
    len2   = end2 - begin2 + 1;

    flen1  = (float) len1;
    flen2  = (float) len2;
    fbegin1 = (float) begin1;
    fbegin2 = (float) begin2;
    fend1   = (float) end1;
    fend2   = (float) end2;

    offset1 = fbegin1;
    offset2 = fbegin2;

    ajSeqTrim(seq1);
    ajSeqTrim(seq2);

    embWordLength(wordlen);
    if(embWordGetTable(&seq1MatchTable, seq1))
	matchlist = embWordBuildMatchTable(seq1MatchTable, seq2, ajTrue);


    if(stretch)
    {
	dottup_stretchplot(xygraph,matchlist,seq1,seq2,begin1,begin2,end1,
			   end2);
	if(matchlist)
	    embWordMatchListDelete(&matchlist); /* free the match structures */
    }

    else
    {
	/* only here if stretch is false */

	max= flen1;
	if(flen2 > max)
	    max = flen2;

	xmargin = ymargin = max * (float)0.15;

	ajGraphOpenWin(graph, fbegin1-ymargin,fend1+ymargin,
		       fbegin2-xmargin,(float)fend2+xmargin);

	ajGraphicsSetCharscale(0.5);

	if(matchlist)
	    dottup_plotMatches(matchlist);

	if(boxit)
	{
	    ajGraphicsDrawposRect(fbegin1, fbegin2, fend1, fend2);
	    i = 0;
	    while(acceptableticksx[i]*numbofticks < len1)
		i++;

	    if(i<=13)
		tickgap = (float) acceptableticksx[i];
	    else
		tickgap = (float) acceptableticksx[10];

	    ticklen = xmargin * (float) 0.1;
	    onefifth  = xmargin * (float)0.2;
	    ajGraphicsDrawposTextAtmid(fbegin1+flen1*(float)0.5,
                                       fbegin1-(onefifth*(float)3.0),
                                       ajGraphGetYlabelC(graph));

	    if(len2/len1 > 10 )
	    {
		/* a lot smaller then just label start and end */
		ajGraphicsDrawposLine(fbegin1,fbegin2,fbegin1,
			    fbegin2-ticklen);
		sprintf(ptr,"%u",ajSeqGetOffset(seq1));
		ajGraphicsDrawposTextAtmid(fbegin1,fbegin2-(onefifth),ptr);
		
		ajGraphicsDrawposLine(fend1,fbegin2,
			    fend1,fbegin2-ticklen);
		sprintf(ptr,"%d",end1);
		ajGraphicsDrawposTextAtmid(fend1,fbegin2-(onefifth),ptr);
	    }
	    else
		for(k=fbegin1;k<fend1;k+=tickgap)
		{
		    ajGraphicsDrawposLine(k,fbegin2,k,fbegin2-ticklen);
		    sprintf(ptr,"%d",(ajint)k);
		    ajGraphicsDrawposTextAtmid( k,fbegin2-(onefifth),ptr);
		}

	    i = 0;
	    while(acceptableticks[i]*numbofticks < len2)
		i++;

	    tickgap   = (float) acceptableticks[i];
	    ticklen   = ymargin*(float)0.1;
	    onefifth  = ymargin*(float)0.2;
	    ajGraphicsDrawposTextAtlineJustify(fbegin1-(onefifth*(float)4.),
                                               fbegin2+flen2*(float)0.5,
                                               fbegin2-(onefifth*(float)4.),
                                               fbegin2+flen2,
                                               ajGraphGetXlabelC(graph),
                                               0.5);

	    if(len1/len2 > 10 )
	    {
		/* a lot smaller then just label start and end */
		ajGraphicsDrawposLine(fbegin1,fbegin2,fbegin1-ticklen,
			    fbegin2);
		sprintf(ptr,"%u",ajSeqGetOffset(seq2));
		ajGraphicsDrawposTextAtend(fbegin1-(onefifth),fbegin2,ptr);

		ajGraphicsDrawposLine(fbegin1,fend2,fbegin1-ticklen,
			    fend2);
		sprintf(ptr,"%d",end2);
		ajGraphicsDrawposTextAtend(fbegin2-(onefifth),fend2,ptr);
	    }
	    else
		for(k=fbegin2;k<fend2;k+=tickgap)
		{
		    ajGraphicsDrawposLine(fbegin1,k,fbegin1-ticklen,k);
		    sprintf(ptr,"%d",(ajint)k);
		    ajGraphicsDrawposTextAtend(fbegin1-(onefifth),k,ptr);
		}
	}
    }

    ajGraphicsClose();
    ajSeqDel(&seq1);
    ajSeqDel(&seq2);
    ajGraphxyDel(&graph);
    ajGraphxyDel(&xygraph);

    embWordFreeTable(&seq1MatchTable);

    if(matchlist)
	embWordMatchListDelete(&matchlist); /* free the match structures */

    embExit();

    return 0;
}
Example #4
0
int main(int argc, char **argv)
{

    AjPSeqset seqset;
    const AjPSeq seq1;
    const AjPSeq seq2;
    ajint wordlen;
    AjPTable seq1MatchTable = NULL;
    AjPList matchlist ;
    AjPGraph graph = 0;
    ajuint i;
    ajuint j;
    float total=0;
    ajuint acceptableticks[]=
    {
	1,10,50,100,200,500,1000,1500,10000,50000,
	100000,500000,1000000,5000000
    };
    ajint numbofticks = 10;
    ajint gap,tickgap;
    AjBool boxit    = AJTRUE;
    AjBool dumpfeat = AJFALSE;
    float xmargin;
    float ymargin;
    float k;
    char ptr[10];
    float ticklen;
    float onefifth;
    AjPFeattable *tabptr = NULL;
    AjPFeattabOut seq1out = NULL;
    AjPStr sajb = NULL;
    float flen1;
    float flen2;
    ajuint tui;
    
    embInit("polydot", argc, argv);

    wordlen  = ajAcdGetInt("wordsize");
    seqset   = ajAcdGetSeqset("sequences");
    graph    = ajAcdGetGraph("graph");
    gap      = ajAcdGetInt("gap");
    boxit    = ajAcdGetBoolean("boxit");
    seq1out  = ajAcdGetFeatout("outfeat");
    dumpfeat = ajAcdGetToggle("dumpfeat");

    sajb = ajStrNew();
    embWordLength(wordlen);

    AJCNEW(lines,ajSeqsetGetSize(seqset));
    AJCNEW(pts,ajSeqsetGetSize(seqset));
    AJCNEW(tabptr,ajSeqsetGetSize(seqset));

    for(i=0;i<ajSeqsetGetSize(seqset);i++)
    {
	seq1 = ajSeqsetGetseqSeq(seqset, i);
	total += ajSeqGetLen(seq1);

    }
    
    total +=(float)(gap*(ajSeqsetGetSize(seqset)-1));
    
    xmargin = total*(float)0.15;
    ymargin = total*(float)0.15;
    
    ticklen = xmargin*(float)0.1;
    onefifth  = xmargin*(float)0.2;
    
    i = 0;
    while(acceptableticks[i]*numbofticks < ajSeqsetGetLen(seqset))
	i++;
    
    if(i<=13)
	tickgap = acceptableticks[i];
    else
	tickgap = acceptableticks[13];
    
    ajGraphAppendTitleS(graph, ajSeqsetGetUsa(seqset));

    ajGraphOpenWin(graph, (float)0.0-xmargin,(total+xmargin)*(float)1.35,
		   (float)0.0-ymargin,
		   total+ymargin);
    ajGraphicsSetCharscale((float)0.3);
    
    
    for(i=0;i<ajSeqsetGetSize(seqset);i++)
    {
	which = i;
	seq1 = ajSeqsetGetseqSeq(seqset, i);
	tui = ajSeqGetLen(seq1);
	flen1 = (float) tui;

	if(embWordGetTable(&seq1MatchTable, seq1)){ /* get table of words */
	    for(j=0;j<ajSeqsetGetSize(seqset);j++)
	    {
		seq2 = ajSeqsetGetseqSeq(seqset, j);
		tui  = ajSeqGetLen(seq2);
		flen2 = (float) tui;

		if(boxit)
		    ajGraphicsDrawposRect(xstart,ystart,
                                          xstart+flen1,
                                          ystart+flen2);

		matchlist = embWordBuildMatchTable(seq1MatchTable, seq2,
						   ajTrue);
		if(matchlist)
		    polydot_plotMatches(matchlist);

		if(i<j && dumpfeat)
		    embWordMatchListConvToFeat(matchlist,&tabptr[i],
					       &tabptr[j],seq1,
					       seq2);

		if(matchlist)	       /* free the match structures */
		    embWordMatchListDelete(&matchlist);

		if(j==0)
		{
		    for(k=0.0;k<ajSeqGetLen(seq1);k+=tickgap)
		    {
			ajGraphicsDrawposLine(xstart+k,ystart,xstart+k,
				    ystart-ticklen);

			sprintf(ptr,"%d",(ajint)k);
			ajGraphicsDrawposTextAtmid(xstart+k,
                                                   ystart-(onefifth),
                                                   ptr);
		    }
		    ajGraphicsDrawposTextAtmid(
                        xstart+(flen1/(float)2.0),
                        ystart-(3*onefifth),
                        ajStrGetPtr(ajSeqsetGetseqNameS(seqset, i)));
		}

		if(i==0)
		{
		    for(k=0.0;k<ajSeqGetLen(seq2);k+=tickgap)
		    {
			ajGraphicsDrawposLine(xstart,ystart+k,xstart-ticklen,
				    ystart+k);

			sprintf(ptr,"%d",(ajint)k);
			ajGraphicsDrawposTextAtend(xstart-(onefifth),
                                                   ystart+k,
                                                   ptr);
		    }
		    ajGraphicsDrawposTextAtlineJustify(
                        xstart-(3*onefifth),
                        ystart+(flen2/(float)2.0),
                        xstart-(3*onefifth),ystart+flen2,
                        ajStrGetPtr(ajSeqsetGetseqNameS(seqset, j)),0.5);
		}
		ystart += flen2+(float)gap;
	    }
	}
	embWordFreeTable(&seq1MatchTable);
	seq1MatchTable = NULL;
	xstart += flen1+(float)gap;
	ystart = 0.0;
    }
    
    ajGraphicsDrawposTextAtstart(total+onefifth,total-(onefifth),
		     "No. Length  Lines  Points Sequence");
    
    for(i=0;i<ajSeqsetGetSize(seqset);i++)
    {
	seq1 = ajSeqsetGetseqSeq(seqset, i);
	ajFmtPrintS(&sajb,"%3u %6d %5d %6d %s",i+1,
		    ajSeqGetLen(seq1),lines[i],
		    pts[i],ajSeqGetNameC(seq1));

	ajGraphicsDrawposTextAtstart(total+onefifth,total-(onefifth*(i+2)),
                                     ajStrGetPtr(sajb));
    }
    
    if(dumpfeat && seq1out)
    {
	for(i=0;i<ajSeqsetGetSize(seqset);i++)
	{
	    ajFeattableWrite(seq1out, tabptr[i]);
	    ajFeattableDel(&tabptr[i]);
	}
    }
    
    ajGraphicsClose();
    ajGraphxyDel(&graph);

    ajStrDel(&sajb);
    AJFREE(lines);
    AJFREE(pts);
    AJFREE(tabptr);

    ajSeqsetDel(&seqset);
    ajFeattabOutDel(&seq1out);;

    embExit();

    return 0;
}
Example #5
0
int main(int argc, char **argv)
{
    AjPSeqall queryseqs;
    AjPSeqset targetseqs;
    AjPSeq queryseq;
    const AjPSeq targetseq;
    AjPStr queryaln = 0;
    AjPStr targetaln = 0;

    AjPFile errorf;
    AjBool show = ajFalse;

    const char   *queryseqc;
    const char   *targetseqc;

    AjPMatrixf matrix;
    AjPSeqCvt cvt = 0;
    float **sub;
    ajint *compass = NULL;
    float *path = NULL;

    float gapopen;
    float gapextend;
    float score;
    float minscore;

    ajuint j, k;
    ajint querystart = 0;
    ajint targetstart = 0;
    ajint queryend   = 0;
    ajint targetend   = 0;
    ajint width  = 0;
    AjPTable kmers = 0;
    ajint wordlen = 6;
    ajint oldmax = 0;
    ajint newmax = 0;

    ajuint ntargetseqs;
    ajuint nkmers;

    AjPAlign align = NULL;
    EmbPWordMatch maxmatch; /* match with maximum score */

    /* Cursors for the current sequence being scanned,
    ** i.e., until which location it was scanned.
    ** Separate cursor/location entries for each sequence in the seqset.
    */
    ajuint* lastlocation;

    EmbPWordRK* wordsw = NULL;
    AjPList* matchlist = NULL;

    embInit("supermatcher", argc, argv);

    matrix    = ajAcdGetMatrixf("datafile");
    queryseqs = ajAcdGetSeqall("asequence");
    targetseqs= ajAcdGetSeqset("bsequence");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    wordlen   = ajAcdGetInt("wordlen");
    align     = ajAcdGetAlign("outfile");
    errorf    = ajAcdGetOutfile("errorfile");
    width     = ajAcdGetInt("width");	/* width for banded Smith-Waterman */
    minscore  = ajAcdGetFloat("minscore");

    gapopen   = ajRoundFloat(gapopen, 8);
    gapextend = ajRoundFloat(gapextend, 8);

    sub = ajMatrixfGetMatrix(matrix);
    cvt = ajMatrixfGetCvt(matrix);

    embWordLength(wordlen);

    /* seqset sequence is the reference sequence for SAM format */
    ajAlignSetRefSeqIndx(align, 1);

    ajSeqsetTrim(targetseqs);

    ntargetseqs = ajSeqsetGetSize(targetseqs);

    AJCNEW0(matchlist, ntargetseqs);

    /* get tables of words */
    for(k=0;k<ntargetseqs;k++)
    {
	targetseq = ajSeqsetGetseqSeq(targetseqs, k);
	embWordGetTable(&kmers, targetseq);
	ajDebug("Number of distinct kmers found so far: %d\n",
		ajTableGetLength(kmers));
    }
    AJCNEW0(lastlocation, ntargetseqs);

    if(ajTableGetLength(kmers)<1)
	ajErr("no kmers found");

    nkmers = embWordRabinKarpInit(kmers, &wordsw, wordlen, targetseqs);

    while(ajSeqallNext(queryseqs,&queryseq))
    {
	ajSeqTrim(queryseq);

	queryaln = ajStrNewRes(1+ajSeqGetLen(queryseq));

	ajDebug("Read '%S'\n", ajSeqGetNameS(queryseq));

	for(k=0;k<ntargetseqs;k++)
	{
	    lastlocation[k]=0;
	    matchlist[k] = ajListstrNew();
	}

	embWordRabinKarpSearch(ajSeqGetSeqS(queryseq), targetseqs,
		(const EmbPWordRK*)wordsw, wordlen, nkmers,
		matchlist, lastlocation, ajFalse);


	for(k=0;k<ajSeqsetGetSize(targetseqs);k++)
	{
	    targetseq      = ajSeqsetGetseqSeq(targetseqs, k);

	    ajDebug("Processing '%S'\n", ajSeqGetNameS(targetseq));

	    if(ajListGetLength(matchlist[k])==0)
	    {
		ajFmtPrintF(errorf,
		            "No wordmatch start points for "
		            "%s vs %s. No alignment\n",
		            ajSeqGetNameC(queryseq),ajSeqGetNameC(targetseq));
		embWordMatchListDelete(&matchlist[k]);
		continue;
	    }


	    /* only the maximum match is used as seed
	     * (if there is more than one location with the maximum match
	     * only the first one is used)
	     * TODO: we should add a new option to make above limit optional
	     */
	    maxmatch = embWordMatchFirstMax(matchlist[k]);

	    supermatcher_findendpoints(maxmatch,targetseq, queryseq,
		    &targetstart, &querystart,
		    &targetend, &queryend);

	    targetaln=ajStrNewRes(1+ajSeqGetLen(targetseq));
	    queryseqc = ajSeqGetSeqC(queryseq);
	    targetseqc = ajSeqGetSeqC(targetseq);

	    ajStrAssignC(&queryaln,"");
	    ajStrAssignC(&targetaln,"");

	    ajDebug("++ %S v %S start:%d %d end:%d %d\n",
		    ajSeqGetNameS(targetseq), ajSeqGetNameS(queryseq),
		    targetstart, querystart, targetend, queryend);

	    newmax = (targetend-targetstart+2)*width;

	    if(newmax > oldmax)
	    {
		AJCRESIZE0(path,oldmax,newmax);
		AJCRESIZE0(compass,oldmax,newmax);
		oldmax=newmax;
		ajDebug("++ memory re/allocation for path/compass arrays"
			" to size: %d\n", newmax);
	    }
	    else
	    {
		AJCSET0(path,newmax);
		AJCSET0(compass,newmax);
	    }

	    ajDebug("Calling embAlignPathCalcSWFast "
		    "%d..%d [%d/%d] %d..%d [%d/%d] width:%d\n",
		    querystart, queryend, (queryend - querystart + 1),
		    ajSeqGetLen(queryseq),
		    targetstart, targetend, (targetend - targetstart + 1),
		    ajSeqGetLen(targetseq),
		    width);

	    score = embAlignPathCalcSWFast(&targetseqc[targetstart],
	                                   &queryseqc[querystart],
	                                   targetend-targetstart+1,
	                                   queryend-querystart+1,
	                                   0,width,
	                                   gapopen,gapextend,
	                                   path,sub,cvt,
	                                   compass,show);
	    if(score>minscore)
	    {
		embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend,
		                         targetseq,queryseq,
		                         &targetaln,&queryaln,
		                         targetend-targetstart+1,
		                         queryend-querystart+1,
		                         0,width,
		                         &targetstart,&querystart);

		if(!ajAlignFormatShowsSequences(align))
		{
		    ajAlignDefineCC(align, ajStrGetPtr(targetaln),
		                    ajStrGetPtr(queryaln),
		                    ajSeqGetNameC(targetseq),
		                    ajSeqGetNameC(queryseq));
		    ajAlignSetScoreR(align, score);
		}
		else
		{
		    ajDebug(" queryaln:%S \ntargetaln:%S\n",
		            queryaln,targetaln);
		    embAlignReportLocal(align,
			    queryseq, targetseq,
			    queryaln, targetaln,
			    querystart, targetstart,
			    gapopen, gapextend,
			    score, matrix,
			    1 + ajSeqGetOffset(queryseq),
			    1 + ajSeqGetOffset(targetseq)
		    );
		}
		ajAlignWrite(align);
		ajAlignReset(align);
	    }
	    ajStrDel(&targetaln);

	    embWordMatchListDelete(&matchlist[k]);
	}

	ajStrDel(&queryaln);
    }


    for(k=0;k<nkmers;k++)
    {
	AJFREE(wordsw[k]->seqindxs);
	AJFREE(wordsw[k]->nSeqMatches);

	for(j=0;j<wordsw[k]->nseqs;j++)
	    AJFREE(wordsw[k]->locs[j]);

	AJFREE(wordsw[k]->nnseqlocs);
	AJFREE(wordsw[k]->locs);
	AJFREE(wordsw[k]);
    }

    embWordFreeTable(&kmers);

    if(!ajAlignFormatShowsSequences(align))
	ajMatrixfDel(&matrix);
    
    AJFREE(path);
    AJFREE(compass);
    AJFREE(kmers);
    AJFREE(wordsw);

    AJFREE(matchlist);
    AJFREE(lastlocation);

    ajAlignClose(align);
    ajAlignDel(&align);
    ajSeqallDel(&queryseqs);
    ajSeqDel(&queryseq);
    ajSeqsetDel(&targetseqs);
    ajFileClose(&errorf);

    embExit();

    return 0;
}
int main(int argc, char **argv)
{
    AjPSeqset seqset;
    AjPSeqall seqall;
    AjPSeq queryseq;
    const AjPSeq targetseq;
    ajint wordlen;
    AjPTable wordsTable = NULL;
    AjPList* matchlist = NULL;
    AjPFile logfile;
    AjPFeattable* seqsetftables = NULL;
    AjPFeattable seqallseqftable = NULL;
    AjPFeattabOut ftoutforseqsetseq = NULL;
    AjPFeattabOut ftoutforseqallseq = NULL;
    AjPAlign align = NULL;
    AjIList iter = NULL;
    ajint targetstart;
    ajint querystart;
    ajint len;
    ajuint i, j;
    ajulong nAllMatches = 0;
    ajulong sumAllScore = 0;
    AjBool dumpAlign = ajTrue;
    AjBool dumpFeature = ajTrue;
    AjBool checkmode = ajFalse;
    EmbPWordRK* wordsw = NULL;
    ajuint npatterns = 0;
    ajuint seqsetsize;
    ajuint nmatches;
    ajuint* nmatchesseqset;
    ajuint* lastlocation; /* Cursors for Rabin-Karp search. */
                          /* Shows until what point the query sequence was
                           *  scanned for a pattern sequences in the seqset.
                          */
    char* paddedheader = NULL;
    const char* header;
    AjPStr padding;

    header = "Pattern %S  #pat-sequences  #all-matches  avg-match-length\n";
    padding = ajStrNew();

    embInit("wordmatch", argc, argv);

    wordlen = ajAcdGetInt("wordsize");
    seqset  = ajAcdGetSeqset("asequence");
    seqall  = ajAcdGetSeqall("bsequence");
    logfile = ajAcdGetOutfile("logfile");
    dumpAlign = ajAcdGetToggle("dumpalign");
    dumpFeature = ajAcdGetToggle("dumpfeat");

    if(dumpAlign)
    {
        align = ajAcdGetAlign("outfile");
        ajAlignSetExternal(align, ajTrue);
    }

    seqsetsize = ajSeqsetGetSize(seqset);
    ajSeqsetTrim(seqset);
    AJCNEW0(matchlist, seqsetsize);
    AJCNEW0(seqsetftables, seqsetsize);
    AJCNEW0(nmatchesseqset, seqsetsize);

    if (dumpFeature)
    {
        ftoutforseqsetseq =  ajAcdGetFeatout("aoutfeat");
        ftoutforseqallseq =  ajAcdGetFeatout("boutfeat");
    }

    checkmode = !dumpFeature && !dumpAlign;
    embWordLength(wordlen);

    ajFmtPrintF(logfile, "Small sequence/file for constructing"
	    " target patterns: %S\n", ajSeqsetGetUsa(seqset));
    ajFmtPrintF(logfile, "Large sequence/file to be scanned"
	    " for patterns: %S\n", ajSeqallGetUsa(seqall));
    ajFmtPrintF(logfile, "Number of sequences in the patterns file: %u\n",
            seqsetsize);
    ajFmtPrintF(logfile, "Pattern/word length: %u\n", wordlen);

    for(i=0;i<seqsetsize;i++)
    {
        targetseq = ajSeqsetGetseqSeq(seqset, i);
        embWordGetTable(&wordsTable, targetseq);
    }

    AJCNEW0(lastlocation, seqsetsize);

    if(ajTableGetLength(wordsTable)>0)
    {
        npatterns = embWordRabinKarpInit(wordsTable,
                                       &wordsw, wordlen, seqset);
        ajFmtPrintF(logfile, "Number of patterns/words found: %u\n", npatterns);

        while(ajSeqallNext(seqall,&queryseq))
        {
            for(i=0;i<seqsetsize;i++)
            {
                lastlocation[i]=0;

                if (!checkmode)
                    matchlist[i] = ajListstrNew();
            }

            nmatches = embWordRabinKarpSearch(
                    ajSeqGetSeqS(queryseq), seqset,
                    (EmbPWordRK const *)wordsw, wordlen, npatterns,
                    matchlist, lastlocation, checkmode);
            nAllMatches += nmatches;

            if (checkmode)
        	continue;

            for(i=0;i<seqsetsize;i++)
            {
                if(ajListGetLength(matchlist[i])>0)
                {
                    iter = ajListIterNewread(matchlist[i]) ;

                    while(embWordMatchIter(iter, &targetstart, &querystart, &len,
                            &targetseq))
                    {
                        if(dumpAlign)
                        {
                            ajAlignDefineSS(align, targetseq, queryseq);
                            ajAlignSetScoreI(align, len);
                            /* ungapped alignment means same length
                             *  for both sequences
                            */
                            ajAlignSetSubRange(align, targetstart, 1, len,
                                    ajSeqIsReversed(targetseq),
                                    ajSeqGetLen(targetseq),
                                    querystart, 1, len,
                                    ajSeqIsReversed(queryseq),
                                    ajSeqGetLen(queryseq));
                        }
                    }

                    if(dumpAlign)
                    {
                	ajAlignWrite(align);
                	ajAlignReset(align);
                    }

                    if(ajListGetLength(matchlist[i])>0 && dumpFeature)
                    {
                        embWordMatchListConvToFeat(matchlist[i],
                                                   &seqsetftables[i],
                                                   &seqallseqftable,
                                                   targetseq, queryseq);
                        ajFeattableWrite(ftoutforseqallseq, seqallseqftable);
                        ajFeattableDel(&seqallseqftable);
                    }

                    ajListIterDel(&iter);
                }

                embWordMatchListDelete(&matchlist[i]);
            }
        }

        /* search completed, now report statistics */
        for(i=0;i<npatterns;i++)
        {
            sumAllScore += wordsw[i]->lenMatches;

            for(j=0;j<wordsw[i]->nseqs;j++)
        	nmatchesseqset[wordsw[i]->seqindxs[j]] +=
        		wordsw[i]->nSeqMatches[j];
        }

        ajFmtPrintF(logfile, "Number of sequences in the file scanned "
                "for patterns: %u\n", ajSeqallGetCount(seqall));
        ajFmtPrintF(logfile, "Number of all matches: %Lu"
                " (wordmatch finds exact matches only)\n", nAllMatches);

        if(nAllMatches>0)
        {
            ajFmtPrintF(logfile, "Sum of match lengths: %Lu\n", sumAllScore);
            ajFmtPrintF(logfile, "Average match length: %.2f\n",
        	    sumAllScore*1.0/nAllMatches);

            ajFmtPrintF(logfile, "\nDistribution of the matches among pattern"
        	    " sequences:\n");
            ajFmtPrintF(logfile, "-----------------------------------------"
        	    "-----------\n");

            for(i=0;i<ajSeqsetGetSize(seqset);i++)
            {
        	if (nmatchesseqset[i]>0)
        	    ajFmtPrintF(logfile, "%-42s: %8u\n",
        	                ajSeqGetNameC(ajSeqsetGetseqSeq(seqset, i)),
        	                nmatchesseqset[i]);

        	ajFeattableWrite(ftoutforseqsetseq, seqsetftables[i]);
        	ajFeattableDel(&seqsetftables[i]);
            }

            ajFmtPrintF(logfile, "\nPattern statistics:\n");
            ajFmtPrintF(logfile, "-------------------\n");
            if(wordlen>7)
        	ajStrAppendCountK(&padding, ' ', wordlen-7);
            paddedheader = ajFmtString(header,padding);
            ajFmtPrintF(logfile, paddedheader);

            for(i=0;i<npatterns;i++)
        	if (wordsw[i]->nMatches>0)
        	    ajFmtPrintF(logfile, "%-7s: %12u  %12u %17.2f\n",
        	                wordsw[i]->word->fword, wordsw[i]->nseqs,
        	                wordsw[i]->nMatches,
        	                wordsw[i]->lenMatches*1.0/wordsw[i]->nMatches);
        }

    }

    for(i=0;i<npatterns;i++)
    {
        for(j=0;j<wordsw[i]->nseqs;j++)
            AJFREE(wordsw[i]->locs[j]);

        AJFREE(wordsw[i]->locs);
        AJFREE(wordsw[i]->seqindxs);
        AJFREE(wordsw[i]->nnseqlocs);
        AJFREE(wordsw[i]->nSeqMatches);
        AJFREE(wordsw[i]);
    }

    embWordFreeTable(&wordsTable);

    AJFREE(wordsw);
    AJFREE(matchlist);
    AJFREE(lastlocation);
    AJFREE(nmatchesseqset);
    AJFREE(seqsetftables);

    if(dumpAlign)
    {
        ajAlignClose(align);
        ajAlignDel(&align);
    }

    if(dumpFeature)
    {
        ajFeattabOutDel(&ftoutforseqsetseq);
        ajFeattabOutDel(&ftoutforseqallseq);
    }

    ajFileClose(&logfile);

    ajSeqallDel(&seqall);
    ajSeqsetDel(&seqset);
    ajSeqDel(&queryseq);
    ajStrDel(&padding);
    AJFREE(paddedheader);

    embExit();

    return 0;
}