예제 #1
0
파일: splitter.c 프로젝트: WenchaoLin/JAMg
static void splitter_ProcessChunk (AjPSeqout seqout, const AjPSeq seq,
                                   ajuint start, ajuint end, const AjPStr name,
                                   AjBool feature)
{
    AjPStr str;

    AjPFeattable new_feattable = NULL;
    AjPSeq subseq;

    ajDebug("splitter_ProcessChunk %d..%d '%S' %B\n",
            start, end, name, feature);

    str    = ajStrNew();
    subseq = ajSeqNew();
  
  
    new_feattable = ajFeattableNew(name);
    subseq->Fttable = new_feattable;
    ajFeattableSetNuc(new_feattable);

    ajStrAssignSubC(&str,ajSeqGetSeqC(seq),start,end);
    ajSeqAssignSeqS(subseq,str);

    if(feature)
        splitter_AddSubSeqFeat(subseq->Fttable,start,end,seq);

    ajSeqAssignNameS(subseq, name);
    splitter_write(seqout,subseq,seq);

    ajStrDel(&str);
    ajSeqDel(&subseq);

    return;
}
예제 #2
0
static void extractfeat_WriteOut(AjPSeqout seqout, AjPStr *featstr,
				 AjBool compall, AjBool sense, ajint firstpos,
				 ajint lastpos, ajint before, ajint after,
				 const AjPSeq seq, AjBool remote,
				 const AjPStr type,
				 AjBool featinname, const AjPStr describestr)
{
    AjPSeq newseq = NULL;
    AjPStr name   = NULL;	/* new name of the sequence */
    AjPStr value  = NULL;	/* string value of start or end position */
    AjPStr desc   = NULL;	/* sequence description */
    AjBool forward = sense;

    if(compall)
        forward = ajFalse;

    ajDebug("WriteOut %S_%d_%d [%S] %d all:%B fwd:%B remote:%B\n",
           ajSeqGetNameS(seq), firstpos+1, lastpos+1, type,
           ajStrGetLen(*featstr), compall, sense, remote);
 
    /* see if there is a sequence to be written out */
    if(!ajStrGetLen(*featstr))
    {
        ajWarn("feature %S_%d_%d [%S] "
               "not written out because it has zero length\n",
               ajSeqGetNameS(seq), firstpos+1, lastpos+1, type);
        ajDebug("feature not written out because it has length=0 "
		"(probably first time round)\n");
    	return;
    }

    /* see if must abort because there were Remote IDs in the features */
    if(remote)
    {
        ajWarn("feature not written out because it has Remote IDs\n");
        ajDebug("feature not written out because it has Remote IDs\n");
        return;
    }

    ajDebug("feature = %d bases\n", ajStrGetLen(*featstr));

    /* featstr may be edited, so it is a AjPStr* */
    extractfeat_BeforeAfter (seq, featstr, firstpos, lastpos, before,
			     after, forward);

    ajDebug("feature+before/after = %d bases\n", ajStrGetLen(*featstr));

    /* set the extracted sequence */
    newseq = ajSeqNew();
    ajSeqAssignSeqS(newseq, *featstr);

     /* create a nice name for the new sequence */
    name = ajStrNew();
    ajStrAppendS(&name, ajSeqGetNameS(seq));
    ajStrAppendC(&name, "_");
    value = ajStrNew();
    ajStrFromInt(&value, firstpos+1);
    ajStrAppendS(&name, value);
    ajStrAppendC(&name, "_");
    ajStrFromInt(&value, lastpos+1);
    ajStrAppendS(&name, value);

    /* add the type of feature to the name, if required */
    if(featinname)
    {
    	ajStrAppendC(&name, "_");
    	ajStrAppendS(&name, type);
    }

    ajSeqAssignNameS(newseq, name);

    /* set the sequence description with the 'type' added */
    desc = ajStrNew();
    ajStrAppendC(&desc, "[");
    ajStrAppendS(&desc, type);
    ajStrAppendC(&desc, "] ");
    if(ajStrGetLen(describestr))
    	ajStrAppendS(&desc, describestr);

    ajStrAppendS(&desc, ajSeqGetDescS(seq));
    ajSeqAssignDescS(newseq, desc);

    /* set the type */
    if(ajSeqIsNuc(seq))
        ajSeqSetNuc(newseq);
    else
        ajSeqSetProt(newseq);


    /* write the new sequence */
    ajSeqoutWriteSeq(seqout, newseq);


    ajSeqDel(&newseq);
    ajStrDel(&name);
    ajStrDel(&value);
    ajStrDel(&desc);

    return;
}
예제 #3
0
파일: infoalign.c 프로젝트: WenchaoLin/JAMg
int main(int argc, char **argv)
{

    AjPSeqset seqset = NULL;
    AjPStr refseq;	/* input name/number of reference sequence */
    ajint  nrefseq;	/* numeric reference sequence */
    AjPMatrix matrix;	/* scoring matrix structure */
    ajint **sub;	/* integer scoring matrix */
    AjPSeqCvt cvt = 0;	/* conversion table for scoring matrix */
    float identity;
    ajint ident;
    float fplural;
    AjPStr cons;
    AjPSeq consensus;

    const AjPSeq ref;
    const AjPSeq seq;
    ajuint i;

    AjBool html;
    AjBool doheader;
    AjBool dousa;
    AjBool doname;
    AjBool doseqlength;
    AjBool doalignlength;
    AjBool dogaps;
    AjBool dogapcount;
    AjBool doidcount;
    AjBool dosimcount;
    AjBool dodifcount;
    AjBool dochange;
    AjBool dodesc;
    AjBool dowt;

    ajint  seqlength;
    ajint  alignlength;
    ajint  gaps;
    ajint  gapcount;
    ajint  idcount;
    ajint  simcount;
    ajint  difcount;
    float  change;

    AjPFile outfile;

    const AjPStr usa;
    const AjPStr name;
    AjPStr altusa;			/* default name when the real name
					   is not known */
    AjPStr altname;

    AjPStr xxx = NULL;

    embInit("infoalign", argc, argv);


    seqset  = ajAcdGetSeqset("sequence");
    refseq  = ajAcdGetString("refseq");
    matrix  = ajAcdGetMatrix("matrix");

    ajSeqsetFill(seqset);

    outfile = ajAcdGetOutfile("outfile");

    html          = ajAcdGetBoolean("html");
    doheader      = ajAcdGetBoolean("heading");
    dousa         = ajAcdGetBoolean("usa");
    doname        = ajAcdGetBoolean("name");
    doseqlength   = ajAcdGetBoolean("seqlength");
    doalignlength = ajAcdGetBoolean("alignlength");
    dogaps        = ajAcdGetBoolean("gaps");
    dogapcount    = ajAcdGetBoolean("gapcount");
    doidcount     = ajAcdGetBoolean("idcount");
    dosimcount    = ajAcdGetBoolean("simcount");
    dodifcount    = ajAcdGetBoolean("diffcount");
    dochange      = ajAcdGetBoolean("change");
    dodesc        = ajAcdGetBoolean("description");
    dowt          = ajAcdGetBoolean("weight");

    /* consensus parameters */
    fplural   = ajAcdGetFloat("plurality");
    identity  = ajAcdGetFloat("identity");


    cons      = ajStrNew();
    consensus = ajSeqNew();
    altusa    = ajStrNewC("-");
    altname   = ajStrNewC("-");


    /* get conversion table and scoring matrix */
    cvt = ajMatrixGetCvt(matrix);
    sub = ajMatrixGetMatrix(matrix);

    /* get the number of the reference sequence */
    nrefseq = infoalign_Getrefseq(refseq, seqset);

    /* change the % plurality to the fraction of absolute total weight */
    fplural = ajSeqsetGetTotweight(seqset) * fplural / 100;

    /*
    ** change the % identity to the number of identical sequences at a
    ** position required for consensus
    */
    ident = ajSeqsetGetSize(seqset) * (ajint)identity / 100;

    /* get the consensus sequence */
    embConsCalc(seqset, matrix, ajSeqsetGetSize(seqset), ajSeqsetGetLen(seqset),
		 fplural, 0.0, ident, ajFalse, &cons);
    ajSeqAssignSeqS(consensus, cons);

    ajSeqAssignNameS(consensus,(xxx=ajStrNewC("Consensus")));

    /* get the reference sequence */
    if(nrefseq == -1)
	ref = consensus;
    else
	ref = ajSeqsetGetseqSeq(seqset, nrefseq);


    /* start the HTML table */
    if(html)
	ajFmtPrintF(outfile,"<table border cellpadding=4 bgcolor="
		    "\"#FFFFF0\">\n");

    /* print the header information */
    if(doheader)
    {
	/* start the HTML table title line and output the Name header */
	if(html)			
	    ajFmtPrintF(outfile, "<tr>");
	else
	    ajFmtPrintF(outfile, "%s", "# ");

	if(dousa)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>USA</th>");
	    else
		ajFmtPrintF(outfile, "%-16s", "USA");
	}

	if(doname)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Name</th>");
	    else
		ajFmtPrintF(outfile, "%-12s", "Name");
	}

	if(doseqlength)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Sequence Length</th>");
	    else
		ajFmtPrintF(outfile, "SeqLen\t");
	}

	if(doalignlength)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Aligned Length</th>");
	    else
		ajFmtPrintF(outfile, "AlignLen\t");
	}

	if(dogaps)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Gaps</th>");
	    else
		ajFmtPrintF(outfile, "Gaps\t");
	}

	if(dogapcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Gap Length</th>");
	    else
		ajFmtPrintF(outfile, "GapLen\t");
	}

	if(doidcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Identity</th>");
	    else
		ajFmtPrintF(outfile, "Ident\t");
	}

	if(dosimcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Similarity</th>");
	    else
		ajFmtPrintF(outfile, "Similar\t");
	}

	if(dodifcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Difference</th>");
	    else
		ajFmtPrintF(outfile, "Differ\t");
	}

	if(dochange)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>%% Change</th>");
	    else
		ajFmtPrintF(outfile, "%% Change\t");
	}

        if(dowt)
        {
            if(html)
                ajFmtPrintF(outfile, "<th>Weight</th>");
            else
                ajFmtPrintF(outfile, "Weight\t");
        }


	if(dodesc)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Description</th>");
	    else
		ajFmtPrintF(outfile, "Description");
	}

	/* end the HTML table title line */
	if(html)
	    ajFmtPrintF(outfile, "</tr>\n");
	else
	    ajFmtPrintF(outfile, "\n");
    }


    for(i=0; i<ajSeqsetGetSize(seqset); i++)
    {
    	seq = ajSeqsetGetseqSeq(seqset, i);

	/* get the usa ('-' if unknown) */
	usa = ajSeqGetUsaS(seq);
	if(ajStrGetLen(usa) == 0)
	    usa = altusa;

	/* get the name ('-' if unknown) */
	name = ajSeqGetNameS(seq);
	if(ajStrGetLen(name) == 0)
	    name = altname;

	/* get the stats from the comparison to the reference sequence */
	infoalign_Compare(ref, seq, sub, cvt, &seqlength, &alignlength,
			  &gaps, &gapcount, &idcount, &simcount,
			  &difcount, &change);

	/* start table line */
	if(html)
	    ajFmtPrintF(outfile, "<tr>");

	if(dousa)
	    infoalign_OutputStr(outfile, usa, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount || dogaps ||
				 doseqlength || doalignlength || doname), 18);
	
	if(doname)
	    infoalign_OutputStr(outfile, name, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount || dogaps ||
				 doseqlength || doalignlength), 14);
	
	if(doseqlength)
	    infoalign_OutputInt(outfile, seqlength, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount ||
				 dogaps || doalignlength));
	
	if(doalignlength)
	    infoalign_OutputInt(outfile, alignlength, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount || dogaps));
	
	if(dogaps)
	    infoalign_OutputInt(outfile, gaps, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount));
	
	if(dogapcount)
	    infoalign_OutputInt(outfile, gapcount, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount));
	
	if(doidcount)
	    infoalign_OutputInt(outfile, idcount, html,
				(dodesc || dowt || dochange ||
                                 dodifcount || dosimcount));
	
	if(dosimcount)
	    infoalign_OutputInt(outfile, simcount, html, 
                                (dodesc || dowt || dochange ||
				 dodifcount));
	
	if(dodifcount)
	    infoalign_OutputInt(outfile, difcount, html, 
           		        (dodesc || dowt || dochange));
	
	if(dochange)
	    infoalign_OutputFloat(outfile, change, html, (dodesc || dowt) );
	
        if(dowt)
            infoalign_OutputFloat(outfile, ajSeqsetGetseqWeight(seqset,i), html, 
            	dodesc);

	if(dodesc)
	    infoalign_OutputStr(outfile, ajSeqGetDescS(seq), html, ajFalse, 
	    	NOLIMIT);
	
	/* end table line */
	if(html)
	    ajFmtPrintF(outfile, "</tr>\n");
	else
	    ajFmtPrintF(outfile, "\n");
    }
    
    
    /* end the HTML table */
    if(html)
	ajFmtPrintF(outfile, "</table>\n");
    
    ajFileClose(&outfile);
    
    /* tidy up */
    ajStrDel(&altusa);
    ajStrDel(&altname);
    ajStrDel(&xxx);
    ajSeqDel(&consensus);

    ajSeqsetDel(&seqset);
    ajStrDel(&refseq);
    ajMatrixDel(&matrix);
    ajStrDel(&cons);

    embExit();
    return 0;
}
예제 #4
0
파일: tranalign.c 프로젝트: WenchaoLin/JAMg
int main(int argc, char **argv)
{
    AjPSeqall nucseq;		/* input nucleic sequences */
    AjPSeqset protseq;		/* input aligned protein sequences */
    AjPSeqout seqout;
    AjPSeq nseq;		/* next nucleic sequence to align */
    const AjPSeq pseq;		/* next protein sequence use in alignment */
    AjPTrn trnTable;
    AjPSeq pep;			/* translation of nseq */
    AjPStr tablelist;
    ajint table;
    AjPSeqset outseqset;	/* set of aligned nucleic sequences */
    ajint proteinseqcount = 0;
    AjPStr degapstr = NULL;
    /* used to check if it matches with START removed */
    AjPStr degapstr2 = NULL;
    AjPStr codon = NULL;	/* holds temporary codon to check if is START */
    char aa;			/* translated putative START codon */
    ajint type;			/* returned type of the putative START codon */
    /* start position of guide protein in translation */
    ajlong pos = 0;
    AjPSeq newseq = NULL;	/* output aligned nucleic sequence */
    ajint frame;

    embInit("tranalign", argc, argv);

    nucseq    = ajAcdGetSeqall("asequence");
    protseq   = ajAcdGetSeqset("bsequence");
    tablelist = ajAcdGetListSingle("table");
    seqout    = ajAcdGetSeqoutset("outseq");

    outseqset = ajSeqsetNew();
    degapstr  = ajStrNew();

    /* initialise the translation table */
    ajStrToInt(tablelist, &table);
    trnTable = ajTrnNewI(table);

    ajSeqsetFill(protseq);

    while(ajSeqallNext(nucseq, &nseq))
    {
    	if((pseq = ajSeqsetGetseqSeq(protseq, proteinseqcount++)) == NULL)
    	    ajErr("No guide protein sequence available for "
		  "nucleic sequence %S",
		  ajSeqGetNameS(nseq));

	ajDebug("Aligning %S and %S\n",
		ajSeqGetNameS(nseq), ajSeqGetNameS(pseq));

        /* get copy of pseq string with no gaps */
        ajStrAssignS(&degapstr, ajSeqGetSeqS(pseq));
        ajStrRemoveGap(&degapstr);

        /*
	** for each translation frame look for subset of pep that
	** matches pseq
	*/
        for(frame = 1; frame <4; frame++)
	{
	    ajDebug("trying frame %d\n", frame);
            pep = ajTrnSeqOrig(trnTable, nseq, frame);
            degapstr2 = ajStrNew();
            ajStrAssignRef(&degapstr2, degapstr);
            pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr);

            /* 
            ** we might have a START codon that should be translated as 'M'
            ** we need to check if there is a match after a possible START
            ** codon 
            */
            if(pos == -1 && ajStrGetLen(degapstr) > 1 && 
                (ajStrGetPtr(degapstr)[0] == 'M' ||
		 ajStrGetPtr(degapstr)[0] == 'm'))
	      {
                /* see if pep minus the first character is a match */
                ajStrCutStart(&degapstr2, 1);
                pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr2); 

                /*
		** pos is >= 1 if we have a match that is after the first
		** residue
		*/
                if(pos >= 1)
		{
                    /* point back at the putative START Methionine */
                    pos--;
                    /* test if first codon is a START */
                    codon = ajStrNew();
                    ajStrAssignSubS(&codon, ajSeqGetSeqS(nseq), 
                                (pos*3)+frame-1, (pos*3)+frame+2);
                    type = ajTrnCodonstrTypeS(trnTable, codon, &aa);

                    if(type != 1)
                    {
                        /* first codon is not a valid START, force a mismatch */
                        pos = -1;
                    }
                    ajStrDel(&codon);
                
            	}
		else
		{
                    /* force 'pos == 0' to be treated as a mismatch */
            	    pos = -1;
		}
            }

            ajStrDel(&degapstr2);
            ajSeqDel(&pep);

            if(pos != -1)
            	break;
        }

        if(pos == -1)
	    ajErr("Guide protein sequence %S not found in nucleic sequence %S",
		  ajSeqGetNameS(pseq), ajSeqGetNameS(nseq));
	else
	{
	    ajDebug("got a match with frame=%d\n", frame);
            /* extract the coding region of nseq with gaps */
            newseq = ajSeqNew();
            ajSeqSetNuc(newseq);
            ajSeqAssignNameS(newseq, ajSeqGetNameS(nseq));
            ajSeqAssignDescS(newseq, ajSeqGetDescS(nseq));
            tranalign_AddGaps(newseq, nseq, pseq, (pos*3)+frame-1);

            /* output the gapped nucleic sequence */
            ajSeqsetApp(outseqset, newseq);

            ajSeqDel(&newseq);
        }

        ajStrRemoveWhiteExcess(&degapstr);
    }

    ajSeqoutWriteSet(seqout, outseqset);
    ajSeqoutClose(seqout);

    ajTrnDel(&trnTable);
    ajSeqsetDel(&outseqset);
    ajStrDel(&degapstr);
    ajStrDel(&degapstr2);

    ajSeqallDel(&nucseq);
    ajSeqDel(&nseq);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&protseq);
    ajStrDel(&tablelist);

    embExit();

    return 0;
}
예제 #5
0
파일: descseq.c 프로젝트: WenchaoLin/JAMg
int main(int argc, char **argv)
{
    AjPSeqout seqout;
    AjPSeq seq;
    AjPStr name = NULL;
    AjPStr desc = NULL;
    AjPStr temp = NULL;
    AjBool append;

    embInit("descseq", argc, argv);

    seqout = ajAcdGetSeqout("outseq");
    seq    = ajAcdGetSeq("sequence");
    append = ajAcdGetBoolean("append");
    name   = ajAcdGetString("name");
    desc   = ajAcdGetString("description");


    /* if appending, then do this */
    if(append)
    {
	/* is there a name? */
	if(ajStrGetLen(name))
	{
	    ajStrAssignS(&temp, ajSeqGetNameS(seq));
	    ajStrAppendS(&temp, name);
	    ajSeqAssignNameS(seq, temp);
	}

	/* is there a description? */
	if(ajStrGetLen(desc))
	{
	    ajStrAssignS(&temp, ajSeqGetDescS(seq));
	    ajStrAppendS(&temp, desc);
	    ajSeqAssignDescS(seq, temp);
	}

	/* otherwise, just overwrite the existing values */
    }
    else
    {
	/* is there a name? */
	if(ajStrGetLen(name))
	    ajSeqAssignNameS(seq, name);

	/* is there a description? */
	if(ajStrGetLen(desc))
	    ajSeqAssignDescS(seq, desc);
    }

    ajSeqoutWriteSeq(seqout, seq);
    ajSeqoutClose(seqout);

    ajSeqDel(&seq);
    ajSeqoutDel(&seqout);
    ajStrDel(&name);
    ajStrDel(&desc);
    ajStrDel(&temp);

    embExit();

    return 0;
}