コード例 #1
0
int main(int argc, char **argv)
{
    AjPSeqall seq1;
    AjPSeqset seq2;
    AjPSeq a;
    const AjPSeq b;
    AjPStr m = 0;
    AjPStr n = 0;

    AjPFile errorf;
    AjBool show = ajFalse;

    ajint    lena = 0;
    ajint    lenb = 0;

    const char   *p;
    const char   *q;

    AjPMatrixf matrix;
    AjPSeqCvt cvt = 0;
    float **sub;
    ajint *compass = NULL;
    float *path = NULL;

    float gapopen;
    float gapextend;
    float score;


    ajint begina;
    ajint i;
    ajuint k;
    ajint beginb;
    ajint start1 = 0;
    ajint start2 = 0;
    ajint end1   = 0;
    ajint end2   = 0;
    ajint width  = 0;
    AjPTable seq1MatchTable = 0;
    ajint wordlen = 6;
    ajint oldmax = 0;

    AjPAlign align = NULL;

    embInit("supermatcher", argc, argv);

    matrix    = ajAcdGetMatrixf("datafile");
    seq1      = ajAcdGetSeqall("asequence");
    seq2      = ajAcdGetSeqset("bsequence");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    wordlen   = ajAcdGetInt("wordlen");
    align     = ajAcdGetAlign("outfile");
    errorf    = ajAcdGetOutfile("errorfile");
    width     = ajAcdGetInt("width");	/* not the same as awidth */

    gapopen   = ajRoundFloat(gapopen, 8);
    gapextend = ajRoundFloat(gapextend, 8);

    sub = ajMatrixfGetMatrix(matrix);
    cvt = ajMatrixfGetCvt(matrix);

    embWordLength(wordlen);

    ajSeqsetTrim(seq2);

    while(ajSeqallNext(seq1,&a))
    {
        ajSeqTrim(a);
	begina = 1 + ajSeqGetOffset(a);

	m = ajStrNewRes(1+ajSeqGetLen(a));

	lena = ajSeqGetLen(a);

	ajDebug("Read '%S'\n", ajSeqGetNameS(a));

	if(!embWordGetTable(&seq1MatchTable, a)) /* get table of words */
	    ajErr("Could not generate table for %s\n",
		  ajSeqGetNameC(a));

	for(k=0;k<ajSeqsetGetSize(seq2);k++)
	{
	    b      = ajSeqsetGetseqSeq(seq2, k);
	    lenb   = ajSeqGetLen(b);
	    beginb = 1 + ajSeqGetOffset(b);

	    ajDebug("Processing '%S'\n", ajSeqGetNameS(b));
	    p = ajSeqGetSeqC(a);
	    q = ajSeqGetSeqC(b);

	    if(!supermatcher_findstartpoints(seq1MatchTable,b,a,
					     &start1, &start2,
					     &end1, &end2))
	    {
		ajFmtPrintF(errorf,
			    "No wordmatch start points for "
			    "%s vs %s. No alignment\n",
			    ajSeqGetNameC(a),ajSeqGetNameC(b));
		continue;
	    }
	    
        n=ajStrNewRes(1+ajSeqGetLen(b));
        ajStrAssignC(&m,"");
        ajStrAssignC(&n,"");

	    ajDebug("++ %S v %S start:%d %d end:%d %d\n",
		    ajSeqGetNameS(a), ajSeqGetNameS(b),
		    start1, start2, end1, end2);

	    if(end1-start1+1 > oldmax)
	    {
		oldmax = ((end1-start1)+1);
		AJRESIZE(path,oldmax*width*sizeof(float));
		AJRESIZE(compass,oldmax*width*sizeof(ajint));
		ajDebug("++ resize to oldmax: %d\n", oldmax);
	    }

	    for(i=0;i<((end1-start1)+1)*width;i++)
		path[i] = 0.0;

	    ajDebug("Calling embAlignPathCalcFast "
		     "%d..%d [%d/%d] %d..%d [%d/%d]\n",
		     start1, end1, (end1 - start1 + 1), lena,
		     start2, end2, (end2 - start2 + 1), lenb);

	    score = embAlignPathCalcSWFast(&p[start1],&q[start2],
                                           end1-start1+1,end2-start2+1,
                                           0,width,
                                           gapopen,gapextend,
                                           path,sub,cvt,
                                           compass,show);

	    embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend,a,b,
					 &m,&n,end1-start1+1,end2-start2+1,
					 0,width,
                                         &start1,&start2);

		if(!ajAlignFormatShowsSequences(align))
		{
		    ajAlignDefineCC(align, ajStrGetPtr(m),
		            ajStrGetPtr(n), ajSeqGetNameC(a),
		            ajSeqGetNameC(b));
		    ajAlignSetScoreR(align, score);
		}
		else
		{
		    embAlignReportLocal(align, a, b,
		            m,n,start1,start2,
		            gapopen, gapextend,
		            score,matrix, begina, beginb);
		}
		ajAlignWrite(align);
		ajAlignReset(align);
	    ajStrDel(&n);
	}

	embWordFreeTable(&seq1MatchTable); /* free table of words */
	seq1MatchTable=0;

	ajStrDel(&m);

    }

    if(!ajAlignFormatShowsSequences(align))
    {
        ajMatrixfDel(&matrix);        
    }
    
    AJFREE(path);
    AJFREE(compass);

    ajAlignClose(align);
    ajAlignDel(&align);
    ajSeqallDel(&seq1);
    ajSeqDel(&a);
    ajSeqsetDel(&seq2);
    ajFileClose(&errorf);

    embExit();

    return 0;
}
コード例 #2
0
static ajint supermatcher_findstartpoints(AjPTable seq1MatchTable,
					  const AjPSeq b,
					  const AjPSeq a, ajint *start1,
					  ajint *start2, ajint *end1,
					  ajint *end2)
{
    ajint max = -10;
    ajint offset = 0;
    AjPList matchlist = NULL;
    AjPList ordered = NULL;
    ajint amax;
    ajint bmax;
    ajint bega;
    ajint begb;

    amax = ajSeqGetLen(a)-1;
    bmax = ajSeqGetLen(b)-1;
    bega = ajSeqGetOffset(a);
    begb = ajSeqGetOffset(b);


    ajDebug("supermatcher_findstartpoints len %d %d off %d %d\n",
	     amax, bmax, bega, begb);
    matchlist = embWordBuildMatchTable(seq1MatchTable, b, ajTrue);

    if(!matchlist)
	return 0;
    else if(!matchlist->Count)
    {
        embWordMatchListDelete(&matchlist);
	return 0;
    }


    /* order and add if the gap is gapmax or less */

    /* create list header bit*/
    ordered = ajListNew();

    supermatcher_orderandconcat(matchlist, ordered);

    /* this sets global structure conmax to point to a matchlist element */
    ajListMap(ordered,supermatcher_findmax, &max);

    ajDebug("findstart conmax off:%d count:%d total:%d\n",
	    conmax->offset, conmax->count, conmax->total,
	    ajListGetLength(conmax->list));
    offset = conmax->offset;

    /* the offset is all we needed! we can delete everything */

    ajListMap(ordered,supermatcher_removelists, NULL);
    ajListFree(&ordered);
    embWordMatchListDelete(&matchlist);	/* free the match structures */


    if(offset > 0)
    {
	*start1 = offset;
	*start2 = 0;
    }
    else
    {
	*start2 = 0-offset;
	*start1 = 0;
    }
    *end1 = *start1;
    *end2 = *start2;

    ajDebug("++ end1 %d -> %d end2 %d -> %d\n", *end1, amax, *end2, bmax);
    while(*end1<amax && *end2<bmax)
    {
	(*end1)++;
	(*end2)++;
    }

    ajDebug("++ end1 %d end2 %d\n", *end1, *end2);
    
    
    ajDebug("supermatcher_findstartpoints has %d..%d [%d] %d..%d [%d]\n",
	    *start1, *end1, ajSeqGetLen(a), *start2, *end2, ajSeqGetLen(b));

    return 1;
}
コード例 #3
0
int main(int argc, char **argv)
{
    AjPAlign align;
    AjPSeqall seqall;
    AjPSeq a;
    AjPSeq b;
    AjPStr alga;
    AjPStr algb;
    AjPStr ss;

    ajuint    lena;
    ajuint    lenb;

    const char   *p;
    const char   *q;

    ajint start1 = 0;
    ajint start2 = 0;

    float *path;
    ajint *compass;
    float* ix;
    float* iy;
    float* m;

    AjPMatrixf matrix;
    AjPSeqCvt cvt = 0;
    float **sub;

    float gapopen;
    float gapextend;
    float endgapopen;
    float endgapextend;
    ajulong maxarr = 1000; 	/* arbitrary. realloc'd if needed */
    ajulong len;			

    float score;

    AjBool dobrief = ajTrue;
    AjBool endweight = ajFalse; /* whether end gap penalties should be applied */

    float id   = 0.;
    float sim  = 0.;
    float idx  = 0.;
    float simx = 0.;

    AjPStr tmpstr = NULL;

    size_t stlen;

    embInit("needle", argc, argv);

    matrix    = ajAcdGetMatrixf("datafile");
    a         = ajAcdGetSeq("asequence");
    ajSeqTrim(a);
    seqall    = ajAcdGetSeqall("bsequence");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    endgapopen   = ajAcdGetFloat("endopen");
    endgapextend = ajAcdGetFloat("endextend");
    dobrief   = ajAcdGetBoolean("brief");
    endweight   = ajAcdGetBoolean("endweight");

    align     = ajAcdGetAlign("outfile");

    gapopen = ajRoundFloat(gapopen, 8);
    gapextend = ajRoundFloat(gapextend, 8);

    AJCNEW(path, maxarr);
    AJCNEW(compass, maxarr);
    AJCNEW(m, maxarr);
    AJCNEW(ix, maxarr);
    AJCNEW(iy, maxarr);

    alga  = ajStrNew();
    algb  = ajStrNew();
    ss = ajStrNew();

    sub = ajMatrixfGetMatrix(matrix);
    cvt = ajMatrixfGetCvt(matrix);

    lena = ajSeqGetLen(a);

    while(ajSeqallNext(seqall,&b))
    {
	ajSeqTrim(b);
	lenb = ajSeqGetLen(b);

	if(lenb > (ULONG_MAX/(ajulong)(lena+1)))
	   ajFatal("Sequences too big. Try 'stretcher' or 'supermatcher'");

	len = lena*lenb;

	if(len>maxarr)
	{
	    stlen = (size_t) len;
	    AJCRESIZETRY(path,stlen);
	    if(!path)
		ajDie("Sequences too big. Try 'stretcher'");
	    AJCRESIZETRY(compass,stlen);
	    if(!compass)
		ajDie("Sequences too big. Try 'stretcher'");
        AJCRESIZETRY(m,stlen);
        if(!m)
        ajDie("Sequences too big. Try 'stretcher'");
        AJCRESIZETRY(ix,stlen);
        if(!ix)
        ajDie("Sequences too big. Try 'stretcher'");
        AJCRESIZETRY(iy,stlen);
        if(!iy)
        ajDie("Sequences too big. Try 'stretcher'");
	    maxarr=len;
	}


	p = ajSeqGetSeqC(a);
	q = ajSeqGetSeqC(b);

	ajStrAssignC(&alga,"");
	ajStrAssignC(&algb,"");

	score = embAlignPathCalcWithEndGapPenalties(p, q, lena, lenb,
	        gapopen, gapextend, endgapopen, endgapextend,
	        &start1, &start2, path, sub, cvt,
	        m, ix, iy, compass, ajTrue, endweight);



	embAlignWalkNWMatrixUsingCompass(p, q, &alga, &algb,
	        lena, lenb, &start1, &start2,
	        compass);
		
	embAlignReportGlobal(align, a, b, alga, algb,
			     start1, start2,
			     gapopen, gapextend,
			     score, matrix,
			     ajSeqGetOffset(a), ajSeqGetOffset(b));

	if(!dobrief)
	{
	  embAlignCalcSimilarity(alga,algb,sub,cvt,lena,lenb,&id,&sim,&idx,
				 &simx);
	  ajFmtPrintS(&tmpstr,"Longest_Identity = %5.2f%%\n",
			 id);
	  ajFmtPrintAppS(&tmpstr,"Longest_Similarity = %5.2f%%\n",
			 sim);
	  ajFmtPrintAppS(&tmpstr,"Shortest_Identity = %5.2f%%\n",
			 idx);
	  ajFmtPrintAppS(&tmpstr,"Shortest_Similarity = %5.2f%%",
			 simx);
	  ajAlignSetSubHeaderApp(align, tmpstr);
	}
	ajAlignWrite(align);
	ajAlignReset(align);

    }

    ajAlignClose(align);
    ajAlignDel(&align);

    ajSeqallDel(&seqall);
    ajSeqDel(&a);
    ajSeqDel(&b);

    AJFREE(compass);
    AJFREE(path);
    AJFREE(ix);
    AJFREE(iy);
    AJFREE(m);

    ajStrDel(&alga);
    ajStrDel(&algb);
    ajStrDel(&ss);
    ajStrDel(&tmpstr);

    embExit();

    return 0;
}
コード例 #4
0
int main(int argc, char **argv)
{
    AjPSeq seq1;
    AjPSeq seq2;
    ajint wordlen;
    AjPTable seq1MatchTable = 0;
    AjPList matchlist = NULL;
    AjPGraph graph    = NULL;
    AjPGraph xygraph  = NULL;
    AjBool boxit;
    /*
    ** Different ticks as they need to be different for x and y due to
    ** length of string being important on x
    */
    ajuint acceptableticksx[]=
    {
	1,10,50,100,500,1000,1500,10000,
	500000,1000000,5000000
    };
    ajuint acceptableticks[]=
    {
	1,10,50,100,200,500,1000,2000,5000,10000,15000,
	500000,1000000,5000000
    };
    ajint numbofticks = 10;
    float xmargin;
    float ymargin;
    float ticklen;
    float tickgap;
    float onefifth = 0.0;
    ajint i;
    float k;
    float max;
    char ptr[10];
    ajint begin1;
    ajint begin2;
    ajint end1;
    ajint end2;
    ajuint len1;
    ajuint len2;
    float fbegin1;
    float fbegin2;
    float fend1;
    float fend2;
    float flen1;
    float flen2;
    AjBool stretch;
    
    embInit("dottup", argc, argv);

    wordlen = ajAcdGetInt("wordsize");
    seq1    = ajAcdGetSeq("asequence");
    seq2    = ajAcdGetSeq("bsequence");
    graph   = ajAcdGetGraph("graph");
    boxit   = ajAcdGetBoolean("boxit");
    stretch = ajAcdGetToggle("stretch");
    xygraph = ajAcdGetGraphxy("xygraph");

    begin1 = ajSeqGetBegin(seq1);
    begin2 = ajSeqGetBegin(seq2);
    end1   = ajSeqGetEnd(seq1);
    end2   = ajSeqGetEnd(seq2);
    len1   = end1 - begin1 + 1;
    len2   = end2 - begin2 + 1;

    flen1  = (float) len1;
    flen2  = (float) len2;
    fbegin1 = (float) begin1;
    fbegin2 = (float) begin2;
    fend1   = (float) end1;
    fend2   = (float) end2;

    offset1 = fbegin1;
    offset2 = fbegin2;

    ajSeqTrim(seq1);
    ajSeqTrim(seq2);

    embWordLength(wordlen);
    if(embWordGetTable(&seq1MatchTable, seq1))
	matchlist = embWordBuildMatchTable(seq1MatchTable, seq2, ajTrue);


    if(stretch)
    {
	dottup_stretchplot(xygraph,matchlist,seq1,seq2,begin1,begin2,end1,
			   end2);
	if(matchlist)
	    embWordMatchListDelete(&matchlist); /* free the match structures */
    }

    else
    {
	/* only here if stretch is false */

	max= flen1;
	if(flen2 > max)
	    max = flen2;

	xmargin = ymargin = max * (float)0.15;

	ajGraphOpenWin(graph, fbegin1-ymargin,fend1+ymargin,
		       fbegin2-xmargin,(float)fend2+xmargin);

	ajGraphicsSetCharscale(0.5);

	if(matchlist)
	    dottup_plotMatches(matchlist);

	if(boxit)
	{
	    ajGraphicsDrawposRect(fbegin1, fbegin2, fend1, fend2);
	    i = 0;
	    while(acceptableticksx[i]*numbofticks < len1)
		i++;

	    if(i<=13)
		tickgap = (float) acceptableticksx[i];
	    else
		tickgap = (float) acceptableticksx[10];

	    ticklen = xmargin * (float) 0.1;
	    onefifth  = xmargin * (float)0.2;
	    ajGraphicsDrawposTextAtmid(fbegin1+flen1*(float)0.5,
                                       fbegin1-(onefifth*(float)3.0),
                                       ajGraphGetYlabelC(graph));

	    if(len2/len1 > 10 )
	    {
		/* a lot smaller then just label start and end */
		ajGraphicsDrawposLine(fbegin1,fbegin2,fbegin1,
			    fbegin2-ticklen);
		sprintf(ptr,"%u",ajSeqGetOffset(seq1));
		ajGraphicsDrawposTextAtmid(fbegin1,fbegin2-(onefifth),ptr);
		
		ajGraphicsDrawposLine(fend1,fbegin2,
			    fend1,fbegin2-ticklen);
		sprintf(ptr,"%d",end1);
		ajGraphicsDrawposTextAtmid(fend1,fbegin2-(onefifth),ptr);
	    }
	    else
		for(k=fbegin1;k<fend1;k+=tickgap)
		{
		    ajGraphicsDrawposLine(k,fbegin2,k,fbegin2-ticklen);
		    sprintf(ptr,"%d",(ajint)k);
		    ajGraphicsDrawposTextAtmid( k,fbegin2-(onefifth),ptr);
		}

	    i = 0;
	    while(acceptableticks[i]*numbofticks < len2)
		i++;

	    tickgap   = (float) acceptableticks[i];
	    ticklen   = ymargin*(float)0.1;
	    onefifth  = ymargin*(float)0.2;
	    ajGraphicsDrawposTextAtlineJustify(fbegin1-(onefifth*(float)4.),
                                               fbegin2+flen2*(float)0.5,
                                               fbegin2-(onefifth*(float)4.),
                                               fbegin2+flen2,
                                               ajGraphGetXlabelC(graph),
                                               0.5);

	    if(len1/len2 > 10 )
	    {
		/* a lot smaller then just label start and end */
		ajGraphicsDrawposLine(fbegin1,fbegin2,fbegin1-ticklen,
			    fbegin2);
		sprintf(ptr,"%u",ajSeqGetOffset(seq2));
		ajGraphicsDrawposTextAtend(fbegin1-(onefifth),fbegin2,ptr);

		ajGraphicsDrawposLine(fbegin1,fend2,fbegin1-ticklen,
			    fend2);
		sprintf(ptr,"%d",end2);
		ajGraphicsDrawposTextAtend(fbegin2-(onefifth),fend2,ptr);
	    }
	    else
		for(k=fbegin2;k<fend2;k+=tickgap)
		{
		    ajGraphicsDrawposLine(fbegin1,k,fbegin1-ticklen,k);
		    sprintf(ptr,"%d",(ajint)k);
		    ajGraphicsDrawposTextAtend(fbegin1-(onefifth),k,ptr);
		}
	}
    }

    ajGraphicsClose();
    ajSeqDel(&seq1);
    ajSeqDel(&seq2);
    ajGraphxyDel(&graph);
    ajGraphxyDel(&xygraph);

    embWordFreeTable(&seq1MatchTable);

    if(matchlist)
	embWordMatchListDelete(&matchlist); /* free the match structures */

    embExit();

    return 0;
}
コード例 #5
0
void embPatternSeqSearch (AjPFeattable ftable, const AjPSeq seq,
			  const AjPPatternSeq pat, AjBool reverse)
{
    const void *tidy;
    ajuint hits;
    ajuint i;
    AjPPatComp pattern;
    EmbPMatMatch m = NULL;
    AjPFeature sf  = NULL;
    AjPSeq revseq  = NULL;
    AjPList list   = ajListNew();
    AjPStr seqstr  = ajStrNew();
    AjPStr seqname = ajStrNew();
    AjPStr tmp     = ajStrNew();
    ajint adj;
    ajint begin;
    AjBool isreversed;
    ajint seqlen;

    seqlen = ajSeqGetLen(seq);
    if(!seqlen)
        return;

    isreversed = ajSeqIsReversedTrue(seq);

    if(isreversed)
	seqlen += ajSeqGetOffset(seq);

    begin = ajSeqGetBeginTrue(seq);
    adj = ajSeqGetEndTrue(seq);

    if(!ajStrGetLen(featMotifProt))
        ajStrAssignC(&featMotifProt, "SO:0001067");

    if(!ajStrGetLen(featMotifNuc))
        ajStrAssignC(&featMotifNuc, "SO:0000714");

    ajStrAssignS(&seqname,ajSeqGetNameS(seq));
    pattern = ajPatternSeqGetCompiled(pat);

    if (reverse)
    {
        revseq = ajSeqNewSeq(seq);
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq),
			begin-1,adj-1);
        ajSeqstrReverse(&seqstr);
    }
    else
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq),
			begin-1,adj-1);

    ajStrFmtUpper(&seqstr);
    /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n"
	   "'%S'\n",
	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq),
	   seqstr);*/

    ajDebug("embPatternSeqSearch '%S' protein: %B reverse: %B\n",
	    pattern->pattern, pat->Protein, reverse);
    embPatFuzzSearchII(pattern,begin,seqname,seqstr,list,
                       ajPatternSeqGetMismatch(pat),&hits,&tidy);

    ajDebug ("embPatternSeqSearch: found %d hits\n",hits);

    if(!reverse)
	ajListReverse(list);

    for(i=0;i<hits;++i)
    {
        ajListPop(list,(void **)&m);

 	if (reverse)
	    sf = ajFeatNew(ftable, NULL, featMotifNuc,
                           adj - m->start - m->len + begin + 1,
                           adj - m->start + begin,
                           0.0, '-', 0);
	else
        {
	    if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
                sf = ajFeatNewProt(ftable, NULL, featMotifProt,
                                   m->start,
                                   m->start + m->len - 1,
                                   0.0);
            else
                sf = ajFeatNew(ftable, NULL, featMotifNuc,
                               m->start,
                               m->start + m->len - 1,
                               0.0, '.', 0);
        }
        
	if(isreversed)
	    ajFeatReverse(sf, seqlen);

	/*
	ajUser("isrev: %B reverse: %B begin: %d adj: %d "
	       "start: %d len: %d seqlen: %d %d..%d '%c'\n",
	       isreversed, reverse, begin, adj, m->start, m->len, seqlen,
	       sf->Start, sf->End, sf->Strand);
	*/

	ajFeatSetScore(sf, (float) (m->len - m->mm));

        ajFmtPrintS(&tmp, "*pat %S: %S",
                    ajPatternSeqGetName(pat),
                    ajPatternSeqGetPattern(pat));
        ajFeatTagAdd(sf,NULL,tmp);

        if(m->mm)
        {
            ajFmtPrintS(&tmp, "*mismatch %d", m->mm);
            ajFeatTagAdd(sf, NULL, tmp);
        }

        embMatMatchDel(&m);
    }

    ajStrDel(&seqname);
    ajStrDel(&seqstr);
    ajStrDel(&tmp);
    ajListFree(&list);

    if (reverse)
        ajSeqDel(&revseq);

    return;
}
コード例 #6
0
void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq,
			    const AjPPatternRegex pat, AjBool reverse)
{
    ajint pos=0;
    ajint off;
    ajint len;
    AjPFeature sf    = NULL;
    AjPStr substr    = NULL;
    AjPStr seqstr    = NULL;
    AjPStr tmpstr = NULL;
    AjPStr tmp       = ajStrNew();
    AjPRegexp patexp = ajPatternRegexGetCompiled(pat);
    ajint adj;
    AjBool isreversed;
    AjPSeq revseq;
    ajint seqlen;

    seqlen = ajSeqGetLen(seq);
    if(!seqlen)
        return;

    isreversed = ajSeqIsReversedTrue(seq);

    if(isreversed)
	seqlen += ajSeqGetOffset(seq);

    pos = ajSeqGetBeginTrue(seq);
    adj = ajSeqGetEndTrue(seq);

    if(!ajStrGetLen(featMotifProt))
        ajStrAssignC(&featMotifProt, "SO:0001067");

    if(!ajStrGetLen(featMotifNuc))
        ajStrAssignC(&featMotifNuc, "SO:0000714");

    /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n",
	   pos, adj, reverse, isreversed);*/
    /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n",
	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/

    if (reverse)
    {
        revseq = ajSeqNewSeq(seq);
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1);
        ajSeqstrReverse(&seqstr);
    }

    ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1);

    ajStrFmtUpper(&seqstr);

    while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr))
    {
	off = ajRegOffset(patexp);
	len = ajRegLenI(patexp, 0);

	if(off || len)
	{
	    ajRegSubI(patexp, 0, &substr);
	    ajRegPost(patexp, &tmp);
	    ajStrAssignS(&seqstr, substr);
            ajStrAppendS(&seqstr, tmp);
	    pos += off;

	    /*ajDebug("match pos: %d adj: %d len: %d off:%d\n",
                    pos, adj, len, off);*/
            if (reverse)
                sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   adj - pos - len + 2,
                                   adj - pos + 1,
                                   0.0, '-', 0);
	    else
            {
                if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
                    sf = ajFeatNewProt(ftable, NULL, featMotifProt,
                                       pos, pos + len - 1,
                                       0.0);
                else
                    sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   pos, pos + len - 1,
                                   0.0, '.', 0);
            }
            
	    if(isreversed)
		ajFeatReverse(sf, seqlen);

	    ajFmtPrintS (&tmpstr,"*pat %S: %S",
			 ajPatternRegexGetName(pat),
                         ajPatternRegexGetPattern(pat));
	    ajFeatTagAdd (sf,NULL,tmpstr);
	    pos += 1;
	    ajStrCutStart(&seqstr, 1);
	}
	else
	{
	    pos++;
	    ajStrCutStart(&seqstr, 1);
	}
    }

    ajStrDel(&tmpstr);
    ajStrDel(&tmp);
    ajStrDel(&substr);
    ajStrDel(&seqstr);

    if(reverse)
	ajSeqDel(&revseq);

    return;
}
コード例 #7
0
int main(int argc, char **argv)
{
    AjPSeqall queryseqs;
    AjPSeqset targetseqs;
    AjPSeq queryseq;
    const AjPSeq targetseq;
    AjPStr queryaln = 0;
    AjPStr targetaln = 0;

    AjPFile errorf;
    AjBool show = ajFalse;

    const char   *queryseqc;
    const char   *targetseqc;

    AjPMatrixf matrix;
    AjPSeqCvt cvt = 0;
    float **sub;
    ajint *compass = NULL;
    float *path = NULL;

    float gapopen;
    float gapextend;
    float score;
    float minscore;

    ajuint j, k;
    ajint querystart = 0;
    ajint targetstart = 0;
    ajint queryend   = 0;
    ajint targetend   = 0;
    ajint width  = 0;
    AjPTable kmers = 0;
    ajint wordlen = 6;
    ajint oldmax = 0;
    ajint newmax = 0;

    ajuint ntargetseqs;
    ajuint nkmers;

    AjPAlign align = NULL;
    EmbPWordMatch maxmatch; /* match with maximum score */

    /* Cursors for the current sequence being scanned,
    ** i.e., until which location it was scanned.
    ** Separate cursor/location entries for each sequence in the seqset.
    */
    ajuint* lastlocation;

    EmbPWordRK* wordsw = NULL;
    AjPList* matchlist = NULL;

    embInit("supermatcher", argc, argv);

    matrix    = ajAcdGetMatrixf("datafile");
    queryseqs = ajAcdGetSeqall("asequence");
    targetseqs= ajAcdGetSeqset("bsequence");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    wordlen   = ajAcdGetInt("wordlen");
    align     = ajAcdGetAlign("outfile");
    errorf    = ajAcdGetOutfile("errorfile");
    width     = ajAcdGetInt("width");	/* width for banded Smith-Waterman */
    minscore  = ajAcdGetFloat("minscore");

    gapopen   = ajRoundFloat(gapopen, 8);
    gapextend = ajRoundFloat(gapextend, 8);

    sub = ajMatrixfGetMatrix(matrix);
    cvt = ajMatrixfGetCvt(matrix);

    embWordLength(wordlen);

    /* seqset sequence is the reference sequence for SAM format */
    ajAlignSetRefSeqIndx(align, 1);

    ajSeqsetTrim(targetseqs);

    ntargetseqs = ajSeqsetGetSize(targetseqs);

    AJCNEW0(matchlist, ntargetseqs);

    /* get tables of words */
    for(k=0;k<ntargetseqs;k++)
    {
	targetseq = ajSeqsetGetseqSeq(targetseqs, k);
	embWordGetTable(&kmers, targetseq);
	ajDebug("Number of distinct kmers found so far: %d\n",
		ajTableGetLength(kmers));
    }
    AJCNEW0(lastlocation, ntargetseqs);

    if(ajTableGetLength(kmers)<1)
	ajErr("no kmers found");

    nkmers = embWordRabinKarpInit(kmers, &wordsw, wordlen, targetseqs);

    while(ajSeqallNext(queryseqs,&queryseq))
    {
	ajSeqTrim(queryseq);

	queryaln = ajStrNewRes(1+ajSeqGetLen(queryseq));

	ajDebug("Read '%S'\n", ajSeqGetNameS(queryseq));

	for(k=0;k<ntargetseqs;k++)
	{
	    lastlocation[k]=0;
	    matchlist[k] = ajListstrNew();
	}

	embWordRabinKarpSearch(ajSeqGetSeqS(queryseq), targetseqs,
		(const EmbPWordRK*)wordsw, wordlen, nkmers,
		matchlist, lastlocation, ajFalse);


	for(k=0;k<ajSeqsetGetSize(targetseqs);k++)
	{
	    targetseq      = ajSeqsetGetseqSeq(targetseqs, k);

	    ajDebug("Processing '%S'\n", ajSeqGetNameS(targetseq));

	    if(ajListGetLength(matchlist[k])==0)
	    {
		ajFmtPrintF(errorf,
		            "No wordmatch start points for "
		            "%s vs %s. No alignment\n",
		            ajSeqGetNameC(queryseq),ajSeqGetNameC(targetseq));
		embWordMatchListDelete(&matchlist[k]);
		continue;
	    }


	    /* only the maximum match is used as seed
	     * (if there is more than one location with the maximum match
	     * only the first one is used)
	     * TODO: we should add a new option to make above limit optional
	     */
	    maxmatch = embWordMatchFirstMax(matchlist[k]);

	    supermatcher_findendpoints(maxmatch,targetseq, queryseq,
		    &targetstart, &querystart,
		    &targetend, &queryend);

	    targetaln=ajStrNewRes(1+ajSeqGetLen(targetseq));
	    queryseqc = ajSeqGetSeqC(queryseq);
	    targetseqc = ajSeqGetSeqC(targetseq);

	    ajStrAssignC(&queryaln,"");
	    ajStrAssignC(&targetaln,"");

	    ajDebug("++ %S v %S start:%d %d end:%d %d\n",
		    ajSeqGetNameS(targetseq), ajSeqGetNameS(queryseq),
		    targetstart, querystart, targetend, queryend);

	    newmax = (targetend-targetstart+2)*width;

	    if(newmax > oldmax)
	    {
		AJCRESIZE0(path,oldmax,newmax);
		AJCRESIZE0(compass,oldmax,newmax);
		oldmax=newmax;
		ajDebug("++ memory re/allocation for path/compass arrays"
			" to size: %d\n", newmax);
	    }
	    else
	    {
		AJCSET0(path,newmax);
		AJCSET0(compass,newmax);
	    }

	    ajDebug("Calling embAlignPathCalcSWFast "
		    "%d..%d [%d/%d] %d..%d [%d/%d] width:%d\n",
		    querystart, queryend, (queryend - querystart + 1),
		    ajSeqGetLen(queryseq),
		    targetstart, targetend, (targetend - targetstart + 1),
		    ajSeqGetLen(targetseq),
		    width);

	    score = embAlignPathCalcSWFast(&targetseqc[targetstart],
	                                   &queryseqc[querystart],
	                                   targetend-targetstart+1,
	                                   queryend-querystart+1,
	                                   0,width,
	                                   gapopen,gapextend,
	                                   path,sub,cvt,
	                                   compass,show);
	    if(score>minscore)
	    {
		embAlignWalkSWMatrixFast(path,compass,gapopen,gapextend,
		                         targetseq,queryseq,
		                         &targetaln,&queryaln,
		                         targetend-targetstart+1,
		                         queryend-querystart+1,
		                         0,width,
		                         &targetstart,&querystart);

		if(!ajAlignFormatShowsSequences(align))
		{
		    ajAlignDefineCC(align, ajStrGetPtr(targetaln),
		                    ajStrGetPtr(queryaln),
		                    ajSeqGetNameC(targetseq),
		                    ajSeqGetNameC(queryseq));
		    ajAlignSetScoreR(align, score);
		}
		else
		{
		    ajDebug(" queryaln:%S \ntargetaln:%S\n",
		            queryaln,targetaln);
		    embAlignReportLocal(align,
			    queryseq, targetseq,
			    queryaln, targetaln,
			    querystart, targetstart,
			    gapopen, gapextend,
			    score, matrix,
			    1 + ajSeqGetOffset(queryseq),
			    1 + ajSeqGetOffset(targetseq)
		    );
		}
		ajAlignWrite(align);
		ajAlignReset(align);
	    }
	    ajStrDel(&targetaln);

	    embWordMatchListDelete(&matchlist[k]);
	}

	ajStrDel(&queryaln);
    }


    for(k=0;k<nkmers;k++)
    {
	AJFREE(wordsw[k]->seqindxs);
	AJFREE(wordsw[k]->nSeqMatches);

	for(j=0;j<wordsw[k]->nseqs;j++)
	    AJFREE(wordsw[k]->locs[j]);

	AJFREE(wordsw[k]->nnseqlocs);
	AJFREE(wordsw[k]->locs);
	AJFREE(wordsw[k]);
    }

    embWordFreeTable(&kmers);

    if(!ajAlignFormatShowsSequences(align))
	ajMatrixfDel(&matrix);
    
    AJFREE(path);
    AJFREE(compass);
    AJFREE(kmers);
    AJFREE(wordsw);

    AJFREE(matchlist);
    AJFREE(lastlocation);

    ajAlignClose(align);
    ajAlignDel(&align);
    ajSeqallDel(&queryseqs);
    ajSeqDel(&queryseq);
    ajSeqsetDel(&targetseqs);
    ajFileClose(&errorf);

    embExit();

    return 0;
}