Пример #1
0
static void splitter_ProcessChunk (AjPSeqout seqout, const AjPSeq seq,
                                   ajuint start, ajuint end, const AjPStr name,
                                   AjBool feature)
{
    AjPStr str;

    AjPFeattable new_feattable = NULL;
    AjPSeq subseq;

    ajDebug("splitter_ProcessChunk %d..%d '%S' %B\n",
            start, end, name, feature);

    str    = ajStrNew();
    subseq = ajSeqNew();
  
  
    new_feattable = ajFeattableNew(name);
    subseq->Fttable = new_feattable;
    ajFeattableSetNuc(new_feattable);

    ajStrAssignSubC(&str,ajSeqGetSeqC(seq),start,end);
    ajSeqAssignSeqS(subseq,str);

    if(feature)
        splitter_AddSubSeqFeat(subseq->Fttable,start,end,seq);

    ajSeqAssignNameS(subseq, name);
    splitter_write(seqout,subseq,seq);

    ajStrDel(&str);
    ajSeqDel(&subseq);

    return;
}
Пример #2
0
static void tranalign_AddGaps(AjPSeq newseq,
			      const AjPSeq nseq, const AjPSeq pseq,
			      ajlong npos)
{

    AjPStr newstr = NULL;
    ajuint ppos = 0;

    newstr = ajStrNew();

    for(; ppos<ajSeqGetLen(pseq); ppos++)
    	if(ajSeqGetSeqC(pseq)[ppos] == '-')
    	    ajStrAppendC(&newstr, "---");
	else
	{
    	    ajStrAppendSubS(&newstr, ajSeqGetSeqS(nseq), npos, npos+2);
    	    npos+=3;
    	}

    ajDebug("aligned seq=%S\n", newstr);
    ajSeqAssignSeqS(newseq, newstr);

    ajStrDel(&newstr);

    return;
}
Пример #3
0
int main(int argc, char **argv)
{
    AjPSeqall  seqall;
    AjPSeq     a;
    AjPSeqout  outf;
    AjPStr     substr;
    AjPStr     back;
    AjPStr     gctable;
    AjPCod     codon = NULL;
 
    ajint      gctablenum;

    ajint beg;
    ajint end;

    embInit("backtranambig", argc, argv);

    seqall    = ajAcdGetSeqall("sequence");
    outf      = ajAcdGetSeqoutall("outfile");
    gctable   = ajAcdGetListSingle("table");
    ajStrToInt(gctable, &gctablenum);

    codon = ajCodNewCodenum(gctablenum);
    while(ajSeqallNext(seqall, &a))
    {
        substr = ajStrNew();
        beg    = ajSeqGetBegin(a);
        end    = ajSeqGetEnd(a);
        ajStrAssignSubC(&substr,ajSeqGetSeqC(a),beg-1,end-1);

        back = ajStrNew();
        ajCodBacktranslateAmbig(&back,substr,codon);

        ajSeqAssignSeqS (a, back);
        ajSeqSetNuc(a);

        ajSeqoutWriteSeq(outf,a);
    }

    ajSeqoutClose(outf);

    ajStrDel(&back);
    ajStrDel(&substr);
    ajSeqoutDel(&outf);
    ajCodDel(&codon);
    ajStrDel(&gctable);
    ajSeqallDel(&seqall);
    ajSeqDel(&a);

    embExit();

    return 0;
}
Пример #4
0
int main(int argc, char **argv)
{
    AjPList list = NULL;
    AjPSeq seq;
    AjPSeq seq2;
    AjPStr aa0str = 0;
    AjPStr aa1str = 0;
    const char *s1;
    const char *s2;
    char *strret = NULL;
    ajuint i;
    ajuint j;
    ajuint k;
    ajint l;
    ajint abovethresh;
    ajint total;
    ajint starti = 0;
    ajint startj = 0;
    ajint windowsize;
    float thresh;
    AjPGraph graph   = NULL;
    AjPGraph xygraph = NULL;
    float flen1;
    float flen2;
    ajuint len1;
    ajuint len2;

    AjPTime ajtime = NULL;
    time_t tim;
    AjBool boxit=AJTRUE;
    /* Different ticks as they need to be different for x and y due to
       length of string being important on x */
    ajuint acceptableticksx[]=
    {
	1,10,50,100,500,1000,1500,10000,
	500000,1000000,5000000
    };
    ajuint acceptableticks[]=
    {
	1,10,50,100,200,500,1000,2000,5000,10000,15000,
	500000,1000000,5000000
    };
    ajint numbofticks = 10;
    float xmargin;
    float ymargin;
    float ticklen;
    float tickgap;
    float onefifth;
    float k2;
    float max;
    char ptr[10];
    AjPMatrix matrix = NULL;
    ajint** sub;
    AjPSeqCvt cvt;
    AjPStr  subt = NULL;

    ajint b1;
    ajint b2;
    ajint e1;
    ajint e2;
    AjPStr se1;
    AjPStr se2;
    ajint ithresh;
    AjBool stretch;
    PPoint ppt = NULL;
    float xa[1];
    float ya[1];
    AjPGraphdata gdata=NULL;
    AjPStr tit   = NULL;
    AjIList iter = NULL;
    float x1 = 0.;
    float x2 = 0.;
    float y1 = 0.;
    float y2 = 0.;
    ajuint tui;
    
    se1 = ajStrNew();
    se2 = ajStrNew();

    embInit("dotmatcher", argc, argv);
    
    seq        = ajAcdGetSeq("asequence");
    seq2       = ajAcdGetSeq("bsequence");
    stretch    = ajAcdGetToggle("stretch");
    graph      = ajAcdGetGraph("graph");
    xygraph    = ajAcdGetGraphxy("xygraph");
    windowsize = ajAcdGetInt("windowsize");
    ithresh    = ajAcdGetInt("threshold");
    matrix     = ajAcdGetMatrix("matrixfile");
    
    sub = ajMatrixGetMatrix(matrix);
    cvt = ajMatrixGetCvt(matrix);
    
    thresh = (float)ithresh;

    ajtime = ajTimeNew();

    tim = time(0);
    ajTimeSetLocal(ajtime, tim);
    
    b1 = ajSeqGetBegin(seq);
    b2 = ajSeqGetBegin(seq2);
    e1 = ajSeqGetEnd(seq);
    e2 = ajSeqGetEnd(seq2);
    len1 = ajSeqGetLen(seq);
    len2 = ajSeqGetLen(seq2);
    tui   = ajSeqGetLen(seq);
    flen1 = (float) tui;
    tui   = ajSeqGetLen(seq2);
    flen2 = (float) tui;
    
    ajStrAssignSubC(&se1,ajSeqGetSeqC(seq),b1-1,e1-1);
    ajStrAssignSubC(&se2,ajSeqGetSeqC(seq2),b2-1,e2-1);
    ajSeqAssignSeqS(seq,se1);
    ajSeqAssignSeqS(seq2,se2);
    
    
    s1 = ajStrGetPtr(ajSeqGetSeqS(seq));
    s2 = ajStrGetPtr(ajSeqGetSeqS(seq2));
    
    
    aa0str = ajStrNewRes(1+len1); /* length plus trailing blank */
    aa1str = ajStrNewRes(1+len2);
    
    list = ajListNew();
    
    
    for(i=0;i<len1;i++)
	ajStrAppendK(&aa0str,(char)ajSeqcvtGetCodeK(cvt, *s1++));
    
    for(i=0;i<len2;i++)
	ajStrAppendK(&aa1str,(char)ajSeqcvtGetCodeK(cvt, *s2++));
    
    max = (float)len1;
    if(len2 > max)
	max = (float) len2;
    
    xmargin = ymargin = max *(float)0.15;
    ticklen = xmargin*(float)0.1;
    onefifth  = xmargin*(float)0.2;
    
    subt = ajStrNewC((strret=
		      ajFmtString("(windowsize = %d, threshold = %3.2f  %D)",
				  windowsize,thresh,ajtime)));
    
    

    if(!stretch)
    {
	if( ajStrGetLen(ajGraphGetSubtitleS(graph)) <=1)
	    ajGraphSetSubtitleS(graph,subt);

	ajGraphOpenWin(graph, (float)0.0-ymargin,(max*(float)1.35)+ymargin,
		       (float)0.0-xmargin,(float)max+xmargin);

	ajGraphicsDrawposTextAtmid(flen1*(float)0.5,
                                   (float)0.0-(xmargin/(float)2.0),
		       ajGraphGetXlabelC(graph));
	ajGraphicsDrawposTextAtlineJustify((float)0.0-(xmargin*(float)0.75),
                                           flen2*(float)0.5,
			(float)0.0-(xmargin*(float)0.75),flen1,
			ajGraphGetYlabelC(graph),0.5);

	ajGraphicsSetCharscale(0.5);
    }
    
    
    
    s1= ajStrGetPtr(aa0str);
    s2 = ajStrGetPtr(aa1str);
    
    for(j=0; (ajint)j < (ajint)len2-windowsize;j++)
    {
	i =0;
	total = 0;
	abovethresh =0;

	k = j;
	for(l=0;l<windowsize;l++)
	    total = total + sub[(ajint)s1[i++]][(ajint)s2[k++]];

	if(total >= thresh)
	{
	    abovethresh=1;
	    starti = i-windowsize;
	    startj = k-windowsize;
	}

	while(i < len1 && k < len2)
	{
	    total = total - sub[(ajint)s1[i-windowsize]]
		[(ajint)s2[k-windowsize]];
	    total = total + sub[(ajint)s1[i]][(ajint)s2[k]];

	    if(abovethresh)
	    {
		if(total < thresh)
		{
		    abovethresh = 0;
		    /* draw the line */
		    dotmatcher_pushpoint(&list,(float)starti,(float)startj,
					 (float)i-1,(float)k-1,stretch);
		}
	    }
	    else if(total >= thresh)
	    {
		starti = i-windowsize;
		startj = k-windowsize;
		abovethresh= 1;
	    }
	    i++;
	    k++;
	}

	if(abovethresh)
	    /* draw the line */
	    dotmatcher_pushpoint(&list,(float)starti,(float)startj,
				 (float)i-1,(float)k-1,
				 stretch);
    }
    
    for(i=0; (ajint)i < (ajint)len1-windowsize;i++)
    {
	j = 0;
	total = 0;
	abovethresh =0;

	k = i;
	for(l=0;l<windowsize;l++)
	    total = total + sub[(ajint)s1[k++]][(ajint)s2[j++]];

	if(total >= thresh)
	{
	    abovethresh=1;
	    starti = k-windowsize;
	    startj = j-windowsize;
	}

	while(k < len1 && j < len2)
	{
	    total = total - sub[(ajint)s1[k-windowsize]]
		[(ajint)s2[j-windowsize]];
	    total = total + sub[(ajint)s1[k]][(ajint)s2[j]];

	    if(abovethresh)
	    {
		if(total < thresh)
		{
		    abovethresh = 0;
		    /* draw the line */
		    dotmatcher_pushpoint(&list,(float)starti,(float)startj,
					 (float)k-1,(float)j-1,stretch);
		}
	    }
	    else if(total >= thresh)
	    {
		starti = k-windowsize;
		startj = j-windowsize;
		abovethresh= 1;
	    }
	    j++;
	    k++;
	}

	if(abovethresh)
	    /* draw the line */
	    dotmatcher_pushpoint(&list,(float)starti,(float)startj,
				 (float)k-1,(float)j-1,
				 stretch);
    }
    
    if(boxit && !stretch)
    {
	ajGraphicsDrawposRect(0.0,0.0,flen1, flen2);

	i=0;
	while(acceptableticksx[i]*numbofticks < len1)
	    i++;

	if(i<=13)
	    tickgap = (float)acceptableticksx[i];
	else
	    tickgap = (float)acceptableticksx[10];
	ticklen   = xmargin*(float)0.1;
	onefifth  = xmargin*(float)0.2;

	if(len2/len1 > 10 )
	{
	    /* if a lot smaller then just label start and end */
	    ajGraphicsDrawposLine((float)0.0,(float)0.0,(float)0.0,(float)0.0-ticklen);
	    sprintf(ptr,"%d",b1-1);
	    ajGraphicsDrawposTextAtmid((float)0.0,(float)0.0-(onefifth),ptr);

	    ajGraphicsDrawposLine(flen1,(float)0.0,
			flen1,(float)0.0-ticklen);
	    sprintf(ptr,"%d",len1+b1-1);
	    ajGraphicsDrawposTextAtmid(flen1,(float)0.0-(onefifth),ptr);

	}
	else
	    for(k2=0.0;k2<len1;k2+=tickgap)
	    {
		ajGraphicsDrawposLine(k2,(float)0.0,k2,(float)0.0-ticklen);
		sprintf(ptr,"%d",(ajint)k2+b1-1);
		ajGraphicsDrawposTextAtmid(k2,(float)0.0-(onefifth),ptr);
	    }

	i = 0;
	while(acceptableticks[i]*numbofticks < len2)
	    i++;

	tickgap   = (float)acceptableticks[i];
	ticklen   = ymargin*(float)0.01;
	onefifth  = ymargin*(float)0.02;

	if(len1/len2 > 10 )
	{
	    /* if a lot smaller then just label start and end */
	    ajGraphicsDrawposLine((float)0.0,(float)0.0,(float)0.0-ticklen,(float)0.0);
	    sprintf(ptr,"%d",b2-1);
	    ajGraphicsDrawposTextAtend((float)0.0-(onefifth),(float)0.0,ptr);

	    ajGraphicsDrawposLine((float)0.0,flen2,(float)0.0-ticklen,
			flen2);
	    sprintf(ptr,"%d",len2+b2-1);
	    ajGraphicsDrawposTextAtend((float)0.0-(onefifth),flen2,ptr);
	}
	else
	    for(k2=0.0;k2<len2;k2+=tickgap)
	    {
		ajGraphicsDrawposLine((float)0.0,k2,(float)0.0-ticklen,k2);
		sprintf(ptr,"%d",(ajint)k2+b2-1);
		ajGraphicsDrawposTextAtend((float)0.0-(onefifth),k2,ptr);
	    }
    }
    
    
    if(!stretch)
	ajGraphicsClose();
    else			/* the xy graph for -stretch */
    {
	tit = ajStrNew();
	ajFmtPrintS(&tit,"%S",ajGraphGetTitleS(xygraph));


	gdata = ajGraphdataNewI(1);
	xa[0] = (float)b1;
	ya[0] = (float)b2;

	ajGraphSetTitleC(xygraph,ajStrGetPtr(tit));

	ajGraphSetXlabelC(xygraph,ajSeqGetNameC(seq));
	ajGraphSetYlabelC(xygraph,ajSeqGetNameC(seq2));

	ajGraphdataSetTypeC(gdata,"2D Plot Float");
	ajGraphdataSetTitleS(gdata,subt);
	ajGraphdataSetMinmax(gdata,(float)b1,(float)e1,(float)b2,
			       (float)e2);
	ajGraphdataSetTruescale(gdata,(float)b1,(float)e1,(float)b2,
			       (float)e2);
	ajGraphxySetXstartF(xygraph,(float)b1);
	ajGraphxySetXendF(xygraph,(float)e1);
	ajGraphxySetYstartF(xygraph,(float)b2);
	ajGraphxySetYendF(xygraph,(float)e2);

	ajGraphxySetXrangeII(xygraph,b1,e1);
	ajGraphxySetYrangeII(xygraph,b2,e2);


	if(list)
	{
	    iter = ajListIterNewread(list);
	    while((ppt = ajListIterGet(iter)))
	    {
		x1 = ppt->x1+b1-1;
		y1 = ppt->y1+b2-1;
		x2 = ppt->x2+b1-1;
		y2 = ppt->y2+b2-1;
		ajGraphAddLine(xygraph,x1,y1,x2,y2,0);
		AJFREE(ppt);
	    }
	    ajListIterDel(&iter);
	}

	ajGraphdataAddXY(gdata,xa,ya);
	ajGraphDataReplace(xygraph,gdata);


	ajGraphxyDisplay(xygraph,ajFalse);
	ajGraphicsClose();

	ajStrDel(&tit);
    }
    
    
    
    ajListFree(&list);

    ajSeqDel(&seq);
    ajSeqDel(&seq2);
    ajGraphxyDel(&graph);
    ajGraphxyDel(&xygraph);
    ajMatrixDel(&matrix);
    ajTimeDel(&ajtime);
    
    /* deallocate memory */
    ajStrDel(&aa0str);
    ajStrDel(&aa1str);
    ajStrDel(&se1);
    ajStrDel(&se2);
    ajStrDel(&subt);

    AJFREE(strret);			/* created withing ajFmtString */
    
    embExit();

    return 0;
}
Пример #5
0
static void extractfeat_WriteOut(AjPSeqout seqout, AjPStr *featstr,
				 AjBool compall, AjBool sense, ajint firstpos,
				 ajint lastpos, ajint before, ajint after,
				 const AjPSeq seq, AjBool remote,
				 const AjPStr type,
				 AjBool featinname, const AjPStr describestr)
{
    AjPSeq newseq = NULL;
    AjPStr name   = NULL;	/* new name of the sequence */
    AjPStr value  = NULL;	/* string value of start or end position */
    AjPStr desc   = NULL;	/* sequence description */
    AjBool forward = sense;

    if(compall)
        forward = ajFalse;

    ajDebug("WriteOut %S_%d_%d [%S] %d all:%B fwd:%B remote:%B\n",
           ajSeqGetNameS(seq), firstpos+1, lastpos+1, type,
           ajStrGetLen(*featstr), compall, sense, remote);
 
    /* see if there is a sequence to be written out */
    if(!ajStrGetLen(*featstr))
    {
        ajWarn("feature %S_%d_%d [%S] "
               "not written out because it has zero length\n",
               ajSeqGetNameS(seq), firstpos+1, lastpos+1, type);
        ajDebug("feature not written out because it has length=0 "
		"(probably first time round)\n");
    	return;
    }

    /* see if must abort because there were Remote IDs in the features */
    if(remote)
    {
        ajWarn("feature not written out because it has Remote IDs\n");
        ajDebug("feature not written out because it has Remote IDs\n");
        return;
    }

    ajDebug("feature = %d bases\n", ajStrGetLen(*featstr));

    /* featstr may be edited, so it is a AjPStr* */
    extractfeat_BeforeAfter (seq, featstr, firstpos, lastpos, before,
			     after, forward);

    ajDebug("feature+before/after = %d bases\n", ajStrGetLen(*featstr));

    /* set the extracted sequence */
    newseq = ajSeqNew();
    ajSeqAssignSeqS(newseq, *featstr);

     /* create a nice name for the new sequence */
    name = ajStrNew();
    ajStrAppendS(&name, ajSeqGetNameS(seq));
    ajStrAppendC(&name, "_");
    value = ajStrNew();
    ajStrFromInt(&value, firstpos+1);
    ajStrAppendS(&name, value);
    ajStrAppendC(&name, "_");
    ajStrFromInt(&value, lastpos+1);
    ajStrAppendS(&name, value);

    /* add the type of feature to the name, if required */
    if(featinname)
    {
    	ajStrAppendC(&name, "_");
    	ajStrAppendS(&name, type);
    }

    ajSeqAssignNameS(newseq, name);

    /* set the sequence description with the 'type' added */
    desc = ajStrNew();
    ajStrAppendC(&desc, "[");
    ajStrAppendS(&desc, type);
    ajStrAppendC(&desc, "] ");
    if(ajStrGetLen(describestr))
    	ajStrAppendS(&desc, describestr);

    ajStrAppendS(&desc, ajSeqGetDescS(seq));
    ajSeqAssignDescS(newseq, desc);

    /* set the type */
    if(ajSeqIsNuc(seq))
        ajSeqSetNuc(newseq);
    else
        ajSeqSetProt(newseq);


    /* write the new sequence */
    ajSeqoutWriteSeq(seqout, newseq);


    ajSeqDel(&newseq);
    ajStrDel(&name);
    ajStrDel(&value);
    ajStrDel(&desc);

    return;
}
Пример #6
0
int main(int argc, char **argv)
{
    AjPAlign align;
    AjPSeq a;
    AjPSeq b;
    AjPSeqout seqout;

    AjPStr m;
    AjPStr n;

    AjPStr merged = NULL;

    ajuint lena;
    ajuint lenb;

    const char   *p;
    const char   *q;

    ajint start1 = 0;
    ajint start2 = 0;

    float *path;
    ajint *compass;

    AjPMatrixf matrix;
    AjPSeqCvt  cvt = 0;
    float **sub;

    float gapopen;
    float gapextend;
    ajulong maxarr = 1000;
    ajulong len;		  /* arbitrary. realloc'd if needed */
    size_t  stlen;

    float score;
    ajint begina;
    ajint beginb;

    embInit("merger", argc, argv);

    a         = ajAcdGetSeq("asequence");
    b         = ajAcdGetSeq("bsequence");
    seqout    = ajAcdGetSeqout("outseq");
    matrix    = ajAcdGetMatrixf("datafile");
    gapopen   = ajAcdGetFloat("gapopen");
    gapextend = ajAcdGetFloat("gapextend");
    align     = ajAcdGetAlign("outfile");

    gapopen = ajRoundFloat(gapopen, 8);
    gapextend = ajRoundFloat(gapextend, 8);

    AJCNEW(path, maxarr);
    AJCNEW(compass, maxarr);

    /*
    **  make the two sequences lowercase so we can show which one we are
    **  using in the merge by uppercasing it
    */

    ajSeqFmtLower(a);
    ajSeqFmtLower(b);

    m = ajStrNew();
    n = ajStrNew();

    sub = ajMatrixfGetMatrix(matrix);
    cvt = ajMatrixfGetCvt(matrix);

    begina = ajSeqGetBegin(a);
    beginb = ajSeqGetBegin(b);

    lena = ajSeqGetLen(a);
    lenb = ajSeqGetLen(b);

    if(lenb > (ULONG_MAX/(ajulong)(lena+1)))
	ajFatal("Sequences too big. Try 'supermatcher'");

    len  = lena*lenb;

    if(len>maxarr)
    {

	ajDebug("merger: resize path, len to %d (%d * $d)\n",
		len, lena, lenb);

	stlen = (size_t) len;
        AJCRESIZE(path,stlen);
        AJCRESIZE(compass,stlen);
        maxarr=len;
    }


    p = ajSeqGetSeqC(a);
    q = ajSeqGetSeqC(b);

    ajStrAssignC(&m,"");
    ajStrAssignC(&n,"");

    score = embAlignPathCalc(p,q,lena,lenb,gapopen,gapextend,path,sub,cvt,
		     compass, ajFalse);

    /*score = embAlignScoreNWMatrix(path,compass,gapopen,gapextend,
                                  a,b,lena,lenb,sub,cvt,
				  &start1,&start2);*/

    embAlignWalkNWMatrix(path,a,b,&m,&n,lena,lenb, &start1,&start2,gapopen,
			 gapextend,compass);

    /*
    ** now construct the merged sequence, uppercase the bits of the two
    ** input sequences which are used in the merger
    */
    merger_Merge(align, &merged,p,q,m,n,start1,start2,
		 ajSeqGetNameC(a),ajSeqGetNameC(b));

    embAlignReportGlobal(align, a, b, m, n,
			 start1, start2, gapopen, gapextend,
			 score, matrix, begina, beginb);

    ajAlignWrite(align);
    ajAlignReset(align);

    /* write the merged sequence */
    ajSeqAssignSeqS(a, merged);
    ajSeqoutWriteSeq(seqout, a);
    ajSeqoutClose(seqout);
    ajSeqoutDel(&seqout);

    ajSeqDel(&a);
    ajSeqDel(&b);

    ajAlignClose(align);
    ajAlignDel(&align);
    ajStrDel(&merged);

    AJFREE(compass);
    AJFREE(path);

    ajStrDel(&n);
    ajStrDel(&m);

    embExit();

    return 0;
}
Пример #7
0
int main(int argc, char **argv)
{

    AjPSeqset seqset = NULL;
    AjPStr refseq;	/* input name/number of reference sequence */
    ajint  nrefseq;	/* numeric reference sequence */
    AjPMatrix matrix;	/* scoring matrix structure */
    ajint **sub;	/* integer scoring matrix */
    AjPSeqCvt cvt = 0;	/* conversion table for scoring matrix */
    float identity;
    ajint ident;
    float fplural;
    AjPStr cons;
    AjPSeq consensus;

    const AjPSeq ref;
    const AjPSeq seq;
    ajuint i;

    AjBool html;
    AjBool doheader;
    AjBool dousa;
    AjBool doname;
    AjBool doseqlength;
    AjBool doalignlength;
    AjBool dogaps;
    AjBool dogapcount;
    AjBool doidcount;
    AjBool dosimcount;
    AjBool dodifcount;
    AjBool dochange;
    AjBool dodesc;
    AjBool dowt;

    ajint  seqlength;
    ajint  alignlength;
    ajint  gaps;
    ajint  gapcount;
    ajint  idcount;
    ajint  simcount;
    ajint  difcount;
    float  change;

    AjPFile outfile;

    const AjPStr usa;
    const AjPStr name;
    AjPStr altusa;			/* default name when the real name
					   is not known */
    AjPStr altname;

    AjPStr xxx = NULL;

    embInit("infoalign", argc, argv);


    seqset  = ajAcdGetSeqset("sequence");
    refseq  = ajAcdGetString("refseq");
    matrix  = ajAcdGetMatrix("matrix");

    ajSeqsetFill(seqset);

    outfile = ajAcdGetOutfile("outfile");

    html          = ajAcdGetBoolean("html");
    doheader      = ajAcdGetBoolean("heading");
    dousa         = ajAcdGetBoolean("usa");
    doname        = ajAcdGetBoolean("name");
    doseqlength   = ajAcdGetBoolean("seqlength");
    doalignlength = ajAcdGetBoolean("alignlength");
    dogaps        = ajAcdGetBoolean("gaps");
    dogapcount    = ajAcdGetBoolean("gapcount");
    doidcount     = ajAcdGetBoolean("idcount");
    dosimcount    = ajAcdGetBoolean("simcount");
    dodifcount    = ajAcdGetBoolean("diffcount");
    dochange      = ajAcdGetBoolean("change");
    dodesc        = ajAcdGetBoolean("description");
    dowt          = ajAcdGetBoolean("weight");

    /* consensus parameters */
    fplural   = ajAcdGetFloat("plurality");
    identity  = ajAcdGetFloat("identity");


    cons      = ajStrNew();
    consensus = ajSeqNew();
    altusa    = ajStrNewC("-");
    altname   = ajStrNewC("-");


    /* get conversion table and scoring matrix */
    cvt = ajMatrixGetCvt(matrix);
    sub = ajMatrixGetMatrix(matrix);

    /* get the number of the reference sequence */
    nrefseq = infoalign_Getrefseq(refseq, seqset);

    /* change the % plurality to the fraction of absolute total weight */
    fplural = ajSeqsetGetTotweight(seqset) * fplural / 100;

    /*
    ** change the % identity to the number of identical sequences at a
    ** position required for consensus
    */
    ident = ajSeqsetGetSize(seqset) * (ajint)identity / 100;

    /* get the consensus sequence */
    embConsCalc(seqset, matrix, ajSeqsetGetSize(seqset), ajSeqsetGetLen(seqset),
		 fplural, 0.0, ident, ajFalse, &cons);
    ajSeqAssignSeqS(consensus, cons);

    ajSeqAssignNameS(consensus,(xxx=ajStrNewC("Consensus")));

    /* get the reference sequence */
    if(nrefseq == -1)
	ref = consensus;
    else
	ref = ajSeqsetGetseqSeq(seqset, nrefseq);


    /* start the HTML table */
    if(html)
	ajFmtPrintF(outfile,"<table border cellpadding=4 bgcolor="
		    "\"#FFFFF0\">\n");

    /* print the header information */
    if(doheader)
    {
	/* start the HTML table title line and output the Name header */
	if(html)			
	    ajFmtPrintF(outfile, "<tr>");
	else
	    ajFmtPrintF(outfile, "%s", "# ");

	if(dousa)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>USA</th>");
	    else
		ajFmtPrintF(outfile, "%-16s", "USA");
	}

	if(doname)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Name</th>");
	    else
		ajFmtPrintF(outfile, "%-12s", "Name");
	}

	if(doseqlength)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Sequence Length</th>");
	    else
		ajFmtPrintF(outfile, "SeqLen\t");
	}

	if(doalignlength)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Aligned Length</th>");
	    else
		ajFmtPrintF(outfile, "AlignLen\t");
	}

	if(dogaps)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Gaps</th>");
	    else
		ajFmtPrintF(outfile, "Gaps\t");
	}

	if(dogapcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Gap Length</th>");
	    else
		ajFmtPrintF(outfile, "GapLen\t");
	}

	if(doidcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Identity</th>");
	    else
		ajFmtPrintF(outfile, "Ident\t");
	}

	if(dosimcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Similarity</th>");
	    else
		ajFmtPrintF(outfile, "Similar\t");
	}

	if(dodifcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Difference</th>");
	    else
		ajFmtPrintF(outfile, "Differ\t");
	}

	if(dochange)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>%% Change</th>");
	    else
		ajFmtPrintF(outfile, "%% Change\t");
	}

        if(dowt)
        {
            if(html)
                ajFmtPrintF(outfile, "<th>Weight</th>");
            else
                ajFmtPrintF(outfile, "Weight\t");
        }


	if(dodesc)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Description</th>");
	    else
		ajFmtPrintF(outfile, "Description");
	}

	/* end the HTML table title line */
	if(html)
	    ajFmtPrintF(outfile, "</tr>\n");
	else
	    ajFmtPrintF(outfile, "\n");
    }


    for(i=0; i<ajSeqsetGetSize(seqset); i++)
    {
    	seq = ajSeqsetGetseqSeq(seqset, i);

	/* get the usa ('-' if unknown) */
	usa = ajSeqGetUsaS(seq);
	if(ajStrGetLen(usa) == 0)
	    usa = altusa;

	/* get the name ('-' if unknown) */
	name = ajSeqGetNameS(seq);
	if(ajStrGetLen(name) == 0)
	    name = altname;

	/* get the stats from the comparison to the reference sequence */
	infoalign_Compare(ref, seq, sub, cvt, &seqlength, &alignlength,
			  &gaps, &gapcount, &idcount, &simcount,
			  &difcount, &change);

	/* start table line */
	if(html)
	    ajFmtPrintF(outfile, "<tr>");

	if(dousa)
	    infoalign_OutputStr(outfile, usa, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount || dogaps ||
				 doseqlength || doalignlength || doname), 18);
	
	if(doname)
	    infoalign_OutputStr(outfile, name, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount || dogaps ||
				 doseqlength || doalignlength), 14);
	
	if(doseqlength)
	    infoalign_OutputInt(outfile, seqlength, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount ||
				 dogaps || doalignlength));
	
	if(doalignlength)
	    infoalign_OutputInt(outfile, alignlength, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount || dogaps));
	
	if(dogaps)
	    infoalign_OutputInt(outfile, gaps, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount));
	
	if(dogapcount)
	    infoalign_OutputInt(outfile, gapcount, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount));
	
	if(doidcount)
	    infoalign_OutputInt(outfile, idcount, html,
				(dodesc || dowt || dochange ||
                                 dodifcount || dosimcount));
	
	if(dosimcount)
	    infoalign_OutputInt(outfile, simcount, html, 
                                (dodesc || dowt || dochange ||
				 dodifcount));
	
	if(dodifcount)
	    infoalign_OutputInt(outfile, difcount, html, 
           		        (dodesc || dowt || dochange));
	
	if(dochange)
	    infoalign_OutputFloat(outfile, change, html, (dodesc || dowt) );
	
        if(dowt)
            infoalign_OutputFloat(outfile, ajSeqsetGetseqWeight(seqset,i), html, 
            	dodesc);

	if(dodesc)
	    infoalign_OutputStr(outfile, ajSeqGetDescS(seq), html, ajFalse, 
	    	NOLIMIT);
	
	/* end table line */
	if(html)
	    ajFmtPrintF(outfile, "</tr>\n");
	else
	    ajFmtPrintF(outfile, "\n");
    }
    
    
    /* end the HTML table */
    if(html)
	ajFmtPrintF(outfile, "</table>\n");
    
    ajFileClose(&outfile);
    
    /* tidy up */
    ajStrDel(&altusa);
    ajStrDel(&altname);
    ajStrDel(&xxx);
    ajSeqDel(&consensus);

    ajSeqsetDel(&seqset);
    ajStrDel(&refseq);
    ajMatrixDel(&matrix);
    ajStrDel(&cons);

    embExit();
    return 0;
}
Пример #8
0
int main(int argc, char **argv)
{
    AjPSeqall	seqall;
    AjPSeqout	seqout;
    AjPSeq seq = NULL;
    AjPStr str = NULL;
    AjPStr desc = NULL;
    ajint  tail3;
    ajint  tail5 = 0;
    ajint  minlength;
    ajint  mismatches;
    AjBool reverse;
    AjBool fiveprime;
    AjBool cvttolower;

    embInit("trimest", argc, argv);

    seqall 	= ajAcdGetSeqall("sequence");
    seqout 	= ajAcdGetSeqoutall("outseq");
    minlength 	= ajAcdGetInt("minlength");
    mismatches 	= ajAcdGetInt("mismatches");
    reverse	= ajAcdGetBoolean("reverse");
    fiveprime	= ajAcdGetBoolean("fiveprime");
    cvttolower  = ajAcdGetToggle("tolower");
    

    str = ajStrNew();

    while(ajSeqallNext(seqall, &seq))
    {
        /* get sequence description */
        ajStrAssignS(&desc, ajSeqGetDescS(seq));

        /* get positions to cut in 5' poly-T and 3' poly-A tails */
	if(fiveprime)
	    tail5 = trimest_get_tail(seq, 5, minlength, mismatches);
	tail3 = trimest_get_tail(seq, 3, minlength, mismatches);

	/* get a COPY of the sequence string */
	ajStrAssignS(&str, ajSeqGetSeqS(seq));

        /* cut off longest of 3' or 5' tail */
	if(tail5 > tail3)
	{
	    /* if 5' poly-T tail, then reverse the sequence */
	    ajDebug("Tail=%d\n", tail5);
            if(cvttolower)
                trimest_tolower(&str, 0, tail5-1);
            else
	        ajStrKeepRange(&str, tail5, ajSeqGetLen(seq)-1);
	    ajStrAppendC(&desc, " [poly-T tail removed]");

	}
	else if(tail3 > tail5)
	{
	    /* remove 3' poly-A tail */
	    ajDebug("Tail=%d\n", tail3);
            if(cvttolower)
                trimest_tolower(&str, ajSeqGetLen(seq)-tail3,
				ajSeqGetLen(seq));
            else
	        ajStrKeepRange(&str, 0, ajSeqGetLen(seq)-tail3-1);
            ajStrAppendC(&desc, " [poly-A tail removed]");

	}

        /* write sequence out */
	ajSeqAssignSeqS(seq, str);

	/* reverse complement if poly-T found */
	if(tail5 > tail3 && reverse)
	{
	    ajSeqReverseForce(seq);
	    ajStrAppendC(&desc, " [reverse complement]");
	}

        /* set description */
        ajSeqAssignDescS(seq, desc);

	ajSeqoutWriteSeq(seqout, seq);
    }

    ajSeqoutClose(seqout);

    ajStrDel(&str);
    ajStrDel(&desc);

    ajSeqallDel(&seqall);
    ajSeqDel(&seq);
    ajSeqoutDel(&seqout);

    embExit();

    return 0;
}
Пример #9
0
int main(int argc, char **argv)
{
    AjPSeqout outseq = NULL;
    AjPList list     = NULL;
    AjPSeq seq       = NULL;
    AjPStr insert    = NULL;
    AjPStr seqstr    = NULL;
    AjPStr* seqr     = NULL;
    AjPFile data     = NULL;
    ajint start   = 0;
    ajint length  = 0;
    ajint amount  = 0;
    ajint scmax   = 0;
    ajint extra   = 0;

    embInit("makeprotseq", argc, argv);

    data     = ajAcdGetInfile("pepstatsfile");
    insert   = ajAcdGetString("insert");
    start    = ajAcdGetInt("start");
    length   = ajAcdGetInt("length");
    amount   = ajAcdGetInt("amount");
    outseq   = ajAcdGetSeqoutall("outseq");

    list = ajListstrNew();

    /* this is checked by acd
    if(amount <=0 || length <= 0)
    ajFatal("Amount or length is 0 or less. "
                 "Unable to create any sequences"); */

    /* if insert, make sure sequence is large enough */
    if(ajStrGetLen(insert))
    {
        length -= ajStrGetLen(insert);
        /* start= start <= 1 ? 0 : --start; */ /* checked in acd */
        start--;

        if(length <= 0)
            ajFatal("Sequence smaller than inserted part. "
                    "Unable to create sequences.");
    }

    /* make the list of AjPStr to be used in sequence creation */
    if(data)
    {
        ajDebug("Distribution datafile '%s' given checking type\n",
                ajFileGetPrintnameC(data));
        seqstr = ajStrNew();
        ajReadlineTrim(data,&seqstr);

        if(ajStrFindC(seqstr,"PEPSTATS") == 0)
        {
            makeprotseq_parse_pepstats(&list,data);
        }
        else
        {
            ajWarn("Not pepstats file. Making completely random sequences.");
            makeprotseq_default_chars(&list);
        }

        ajStrDel(&seqstr);
        ajFileClose(&data);
    }
    else
        makeprotseq_default_chars(&list);

    /* if insert, make sure type is correct */
    /* typecheking code is not working, uncomment and test after it is
    if(ajStrGetLen(insert))
    {
    seqstr = ajStrNew();
    if(prot)
        ajStrAssignC(&seqstr,"pureprotein");
    if(!ajSeqTypeCheckS(&insert,seqstr))
        ajFatal("Insert not the same sequence type as sequence itself.");
    ajStrDel(&seqstr);
    } */

    /* array allows fast creation of a sequences */
    scmax = (ajuint) ajListstrToarray(list,&seqr);
    if(!scmax)
        ajFatal("No strings in list. No characters to make the sequence.");

    ajDebug("Distribution array done.\nscmax '%d', extra '%d', first '%S'\n",
            scmax,extra,seqr[0]);

    ajRandomSeed();

    while(amount-- > 0)
    {
        seqstr = makeprotseq_random_sequence(seqr,scmax,length);

        if(ajStrGetLen(insert))
            ajStrInsertS(&seqstr,start,insert);

        ajStrFmtLower(&seqstr);
        seq = ajSeqNew();

        ajSeqAssignSeqS(seq, seqstr);
        ajSeqSetProt(seq);

        ajSeqoutWriteSeq(outseq, seq);
        ajSeqDel(&seq);
        ajStrDel(&seqstr);
    }

    ajSeqoutClose(outseq);
    ajSeqoutDel(&outseq);
    ajListstrFreeData(&list);
    ajStrDel(&insert);
    AJFREE(seqr);

    embExit();

    return 0;
}
Пример #10
0
AjPSeqout get_orf(
  AjPSeqout seqout,
  AjPSeqall seqall,
  AjPStr tablestr,
  ajuint minsize,
  ajuint maxsize,
  AjPStr findstr,
  AjBool methionine,
  AjBool circular,
  AjBool reverse,
  ajint around
) {
  ajint table;
  ajint find;
  AjPSeq seq = NULL;
  AjPTrn trnTable;
  AjPStr sseq = NULL;    /* sequence string */

  /* ORF number to append to name of sequence to create unique name */
  ajint orf_no;


  AjBool sense;    /* ajTrue = forward sense */
  ajint len;

  /* initialise the translation table */
  ajStrToInt(tablestr, &table);
  trnTable = ajTrnNewI(table);

  /* what sort of ORF are we looking for */
  ajStrToInt(findstr, &find);

  /*
  ** get the minimum size converted to protein length if storing
  ** protein sequences
  */
  if (find == P_STOP2STOP || find == P_START2STOP || find == AROUND_START) {
    minsize /= 3;
    maxsize /= 3;
  }

  while (ajSeqallNext(seqall, &seq)) {
    orf_no = 1;           /* number of the next ORF */
    sense = ajTrue;           /* forward sense initially */

    /* get the length of the sequence */
    len = ajSeqGetLen(seq);

    /*
    ** if the sequence is circular, append it to itself to triple its
    **  length so can deal easily with wrapped ORFs, but don't update
    ** len
    */
    if (circular) {
      ajStrAssignS(&sseq, ajSeqGetSeqS(seq));
      ajStrAppendS(&sseq, ajSeqGetSeqS(seq));
      ajStrAppendS(&sseq, ajSeqGetSeqS(seq));
      ajSeqAssignSeqS(seq, sseq);
    }

    /* find the ORFs */
    getorf_FindORFs(seq, len, trnTable, minsize, maxsize, seqout, sense,
                    circular, find, &orf_no, methionine, around, &record);

    /* now reverse complement the sequence and do it again */
    if (reverse) {
      sense = ajFalse;
      ajSeqReverseForce(seq);
      getorf_FindORFs(seq, len, trnTable, minsize, maxsize, seqout, sense,
                      circular, find, &orf_no, methionine,
                      around);
    }
  }
  ajTrnDel(&trnTable);
  ajSeqDel(&seq);
  ajStrDel(&sseq);
}