Example #1
0
static void splitter_write(AjPSeqout default_seqout,
                           AjPSeq subseq, const AjPSeq seq)
{
    /* set the description of the subsequence */
    ajSeqAssignDescS(subseq, ajSeqGetDescS(seq));

    /* set the type of the subsequence */
    ajSeqType(subseq);

    ajSeqoutWriteSeq(default_seqout, subseq);

    return;
}
Example #2
0
int main(int argc, char **argv)
{
    ajint begin, end;
    AjPSeqall seqall;
    AjPSeq seq;
    EmbPShow ss;
    AjPFile outfile;
    AjPStr tablename;
    ajint table;
    AjPRange uppercase;
    AjPRange highlight;
    AjBool threeletter;
    AjBool numberseq;
    AjBool nameseq;
    ajint width;
    ajint length;
    ajint margin;
    AjBool description;
    ajint offset;
    AjBool html;
    AjPStr descriptionline;
    ajint orfminsize;
    AjPTrn trnTable;
    AjBool translation;
    AjBool reverse;
    AjBool cutlist;
    AjBool flat;
    EmbPMatMatch mm = NULL;

    AjPStr *framelist;
    AjBool frames[6];   /* frames to be translated 1 to 3, -1 to -3 */
	 
    /* stuff for tables and lists of enzymes and hits */
    ajint default_mincuts = 1;
    ajint default_maxcuts = 2000000000;
    AjPTable hittable; /* enzyme hits */

    /* stuff lifted from Alan's 'restrict.c' */
    AjPStr enzymes = NULL;
    ajint mincuts;
    ajint maxcuts;
    ajint sitelen;
    AjBool single;
    AjBool blunt;
    AjBool sticky;
    AjBool ambiguity;
    AjBool plasmid;
    AjBool commercial;
    AjBool limit;
    AjBool methyl;
    AjPFile enzfile  = NULL;
    AjPFile equfile  = NULL;
    AjPFile methfile = NULL;
    AjPTable retable = NULL;
    ajint hits;
    AjPList restrictlist = NULL;

    embInit("remap", argc, argv);

    seqall      = ajAcdGetSeqall("sequence");
    outfile     = ajAcdGetOutfile("outfile");
    tablename   = ajAcdGetListSingle("table");
    uppercase   = ajAcdGetRange("uppercase");
    highlight   = ajAcdGetRange("highlight");
    threeletter = ajAcdGetBoolean("threeletter");
    numberseq   = ajAcdGetBoolean("number");
    width       = ajAcdGetInt("width");
    length      = ajAcdGetInt("length");
    margin      = ajAcdGetInt("margin");
    nameseq     = ajAcdGetBoolean("name");
    description = ajAcdGetBoolean("description");
    offset      = ajAcdGetInt("offset");
    html        = ajAcdGetBoolean("html");
    orfminsize  = ajAcdGetInt("orfminsize");
    translation = ajAcdGetBoolean("translation");
    reverse     = ajAcdGetBoolean("reverse");
    cutlist     = ajAcdGetBoolean("cutlist");
    flat        = ajAcdGetBoolean("flatreformat");
    framelist   = ajAcdGetList("frame");
    
    /*  restriction enzyme stuff */
    mincuts    = ajAcdGetInt("mincuts");
    maxcuts    = ajAcdGetInt("maxcuts");
    sitelen    = ajAcdGetInt("sitelen");
    single     = ajAcdGetBoolean("single");
    blunt      = ajAcdGetBoolean("blunt");
    sticky     = ajAcdGetBoolean("sticky");
    ambiguity  = ajAcdGetBoolean("ambiguity");
    plasmid    = ajAcdGetBoolean("plasmid");
    commercial = ajAcdGetBoolean("commercial");
    limit      = ajAcdGetBoolean("limit");
    enzymes    = ajAcdGetString("enzymes");
    methfile   = ajAcdGetDatafile("mfile");
    methyl     = ajAcdGetBoolean("methylation");
    
    if(!blunt  && !sticky)
	ajFatal("Blunt/Sticky end cutters shouldn't both be disabled.");

    /* get the number of the genetic code used */
    ajStrToInt(tablename, &table);
    trnTable = ajTrnNewI(table);

    /* read the local file of enzymes names */
    remap_read_file_of_enzyme_names(&enzymes);

    /* get the frames to be translated */
    remap_GetFrames(framelist, frames);
	 
    while(ajSeqallNext(seqall, &seq))
    {
	/* get begin and end positions */
	begin = ajSeqGetBegin(seq)-1;
	end   = ajSeqGetEnd(seq)-1;

	/* do the name and description */
	if(nameseq)
	{
	    if(html)
		ajFmtPrintF(outfile, "<H2>%S</H2>\n",
				   ajSeqGetNameS(seq));
	    else
		ajFmtPrintF(outfile, "%S\n", ajSeqGetNameS(seq));
	}

	if(description)
	{
	    /*
	    **  wrap the description line at the width of the sequence
	    **  plus margin
	    */
	    if(html)
		ajFmtPrintF(outfile, "<H3>%S</H3>\n",
				   ajSeqGetDescS(seq));
	    else
	    {
		descriptionline = ajStrNew();
		ajStrAssignS(&descriptionline, ajSeqGetDescS(seq));
		ajStrFmtWrap(&descriptionline, width+margin);
		ajFmtPrintF(outfile, "%S\n", descriptionline);
		ajStrDel(&descriptionline);
	    }
	}

	/* get the restriction cut sites */
	/*
	**  most of this is lifted from the program 'restrict.c' by Alan
	**  Bleasby
	 */
	if(single)
	    maxcuts=mincuts=1;
	retable = ajTablestrNew(EQUGUESS);
	enzfile = ajDatafileNewInNameC(ENZDATA);
	if(!enzfile)
	    ajFatal("Cannot locate enzyme file. Run REBASEEXTRACT");

	if(limit)
	{
	    equfile = ajDatafileNewInNameC(EQUDATA);
	    if(!equfile)
		limit = ajFalse;
	    else
		remap_read_equiv(&equfile, &retable, commercial);
	}

	ajFileSeek(enzfile, 0L, 0);
	restrictlist = ajListNew();
	/* search for hits, but don't use mincuts and maxcuts criteria yet */
	hits = embPatRestrictMatch(seq, begin+1, end+1, enzfile, methfile,
                                   enzymes, sitelen,plasmid, ambiguity,
                                   default_mincuts, default_maxcuts, blunt,
                                   sticky, commercial, methyl,
				   restrictlist);

	ajDebug("Remap found %d hits\n", hits);

	if(hits)
	{
	    /* this bit is lifted from printHits */
	    embPatRestrictRestrict(restrictlist, hits, !limit,
					  ajFalse);
	    if(limit)
		remap_RestrictPreferred(restrictlist,retable);
	}


	ajFileClose(&enzfile);
	ajFileClose(&methfile);


	/*
	** Remove those violating the mincuts and maxcuts
	** criteria, but save them in hittable for printing out later.
	** Keep a count of how many hits each enzyme gets in hittable.
	*/
        hittable = ajTablestrNewCase(TABLEGUESS);
	remap_RemoveMinMax(restrictlist, hittable, mincuts, maxcuts);


	/* make the Show Object */
	ss = embShowNew(seq, begin, end, width, length, margin, html, offset);

	if(html)
	    ajFmtPrintF(outfile, "<PRE>");

	/* create the format to display */
	embShowAddBlank(ss);
	embShowAddRE(ss, 1, restrictlist, plasmid, flat);
	embShowAddSeq(ss, numberseq, threeletter, uppercase, highlight);

	if(!numberseq)
	    embShowAddTicknum(ss);
	embShowAddTicks(ss);

	if(reverse)
	{
	    embShowAddComp(ss, numberseq);
	    embShowAddRE(ss, -1, restrictlist, plasmid, flat);
	}


	if(translation)
	{
	    if(reverse)
		embShowAddBlank(ss);

            if(frames[0])	    
	      embShowAddTran(ss, trnTable, 1, threeletter,
			     numberseq, NULL, orfminsize,
			     AJFALSE, AJFALSE, AJFALSE, AJFALSE);
            if(frames[1])
	      embShowAddTran(ss, trnTable, 2, threeletter,
			     numberseq, NULL, orfminsize,
			     AJFALSE, AJFALSE, AJFALSE, AJFALSE);
            if(frames[2])
	      embShowAddTran(ss, trnTable, 3, threeletter,
			     numberseq, NULL, orfminsize,
			     AJFALSE, AJFALSE, AJFALSE, AJFALSE);
	    
	    if(reverse)
	    {
		embShowAddTicks(ss);
                if(frames[5])
		  embShowAddTran(ss, trnTable, -3, threeletter,
			         numberseq, NULL, orfminsize,
			         AJFALSE, AJFALSE, AJFALSE, AJFALSE);
                if(frames[4])
		  embShowAddTran(ss, trnTable, -2, threeletter,
			         numberseq, NULL, orfminsize,
			         AJFALSE, AJFALSE, AJFALSE, AJFALSE);
                if(frames[3])
		  embShowAddTran(ss, trnTable, -1, threeletter,
			         numberseq, NULL, orfminsize,
			         AJFALSE, AJFALSE, AJFALSE, AJFALSE);
	    }
	}

	embShowPrint(outfile, ss);

	/* display a list of the Enzymes that cut and don't cut */
	if(cutlist)
	{
	    remap_CutList(outfile, hittable,
	    		limit, html, mincuts, maxcuts);
	    remap_NoCutList(outfile, hittable, html, enzymes, blunt,
			sticky, sitelen, commercial, ambiguity, 
			limit, retable);
	}

	/* add a gratuitous newline at the end of the sequence */
	ajFmtPrintF(outfile, "\n");

	/* tidy up */
	embShowDel(&ss);

	while(ajListPop(restrictlist,(void **)&mm))
	    embMatMatchDel(&mm);
	ajListFree(&restrictlist);

        remap_DelTable(&hittable);

	ajTablestrFree(&retable);
    }


    ajTrnDel(&trnTable);

    ajSeqallDel(&seqall);
    ajSeqDel(&seq);
    ajFileClose(&outfile);
    ajStrDel(&tablename);
    ajStrDel(&enzymes);
    ajStrDelarray(&framelist);

    ajRangeDel(&uppercase);
    ajRangeDel(&highlight);

    embExit();

    return 0;
}
Example #3
0
static void extractfeat_WriteOut(AjPSeqout seqout, AjPStr *featstr,
				 AjBool compall, AjBool sense, ajint firstpos,
				 ajint lastpos, ajint before, ajint after,
				 const AjPSeq seq, AjBool remote,
				 const AjPStr type,
				 AjBool featinname, const AjPStr describestr)
{
    AjPSeq newseq = NULL;
    AjPStr name   = NULL;	/* new name of the sequence */
    AjPStr value  = NULL;	/* string value of start or end position */
    AjPStr desc   = NULL;	/* sequence description */
    AjBool forward = sense;

    if(compall)
        forward = ajFalse;

    ajDebug("WriteOut %S_%d_%d [%S] %d all:%B fwd:%B remote:%B\n",
           ajSeqGetNameS(seq), firstpos+1, lastpos+1, type,
           ajStrGetLen(*featstr), compall, sense, remote);
 
    /* see if there is a sequence to be written out */
    if(!ajStrGetLen(*featstr))
    {
        ajWarn("feature %S_%d_%d [%S] "
               "not written out because it has zero length\n",
               ajSeqGetNameS(seq), firstpos+1, lastpos+1, type);
        ajDebug("feature not written out because it has length=0 "
		"(probably first time round)\n");
    	return;
    }

    /* see if must abort because there were Remote IDs in the features */
    if(remote)
    {
        ajWarn("feature not written out because it has Remote IDs\n");
        ajDebug("feature not written out because it has Remote IDs\n");
        return;
    }

    ajDebug("feature = %d bases\n", ajStrGetLen(*featstr));

    /* featstr may be edited, so it is a AjPStr* */
    extractfeat_BeforeAfter (seq, featstr, firstpos, lastpos, before,
			     after, forward);

    ajDebug("feature+before/after = %d bases\n", ajStrGetLen(*featstr));

    /* set the extracted sequence */
    newseq = ajSeqNew();
    ajSeqAssignSeqS(newseq, *featstr);

     /* create a nice name for the new sequence */
    name = ajStrNew();
    ajStrAppendS(&name, ajSeqGetNameS(seq));
    ajStrAppendC(&name, "_");
    value = ajStrNew();
    ajStrFromInt(&value, firstpos+1);
    ajStrAppendS(&name, value);
    ajStrAppendC(&name, "_");
    ajStrFromInt(&value, lastpos+1);
    ajStrAppendS(&name, value);

    /* add the type of feature to the name, if required */
    if(featinname)
    {
    	ajStrAppendC(&name, "_");
    	ajStrAppendS(&name, type);
    }

    ajSeqAssignNameS(newseq, name);

    /* set the sequence description with the 'type' added */
    desc = ajStrNew();
    ajStrAppendC(&desc, "[");
    ajStrAppendS(&desc, type);
    ajStrAppendC(&desc, "] ");
    if(ajStrGetLen(describestr))
    	ajStrAppendS(&desc, describestr);

    ajStrAppendS(&desc, ajSeqGetDescS(seq));
    ajSeqAssignDescS(newseq, desc);

    /* set the type */
    if(ajSeqIsNuc(seq))
        ajSeqSetNuc(newseq);
    else
        ajSeqSetProt(newseq);


    /* write the new sequence */
    ajSeqoutWriteSeq(seqout, newseq);


    ajSeqDel(&newseq);
    ajStrDel(&name);
    ajStrDel(&value);
    ajStrDel(&desc);

    return;
}
Example #4
0
int main(int argc, char **argv)
{

    AjPSeqset seqset = NULL;
    AjPStr refseq;	/* input name/number of reference sequence */
    ajint  nrefseq;	/* numeric reference sequence */
    AjPMatrix matrix;	/* scoring matrix structure */
    ajint **sub;	/* integer scoring matrix */
    AjPSeqCvt cvt = 0;	/* conversion table for scoring matrix */
    float identity;
    ajint ident;
    float fplural;
    AjPStr cons;
    AjPSeq consensus;

    const AjPSeq ref;
    const AjPSeq seq;
    ajuint i;

    AjBool html;
    AjBool doheader;
    AjBool dousa;
    AjBool doname;
    AjBool doseqlength;
    AjBool doalignlength;
    AjBool dogaps;
    AjBool dogapcount;
    AjBool doidcount;
    AjBool dosimcount;
    AjBool dodifcount;
    AjBool dochange;
    AjBool dodesc;
    AjBool dowt;

    ajint  seqlength;
    ajint  alignlength;
    ajint  gaps;
    ajint  gapcount;
    ajint  idcount;
    ajint  simcount;
    ajint  difcount;
    float  change;

    AjPFile outfile;

    const AjPStr usa;
    const AjPStr name;
    AjPStr altusa;			/* default name when the real name
					   is not known */
    AjPStr altname;

    AjPStr xxx = NULL;

    embInit("infoalign", argc, argv);


    seqset  = ajAcdGetSeqset("sequence");
    refseq  = ajAcdGetString("refseq");
    matrix  = ajAcdGetMatrix("matrix");

    ajSeqsetFill(seqset);

    outfile = ajAcdGetOutfile("outfile");

    html          = ajAcdGetBoolean("html");
    doheader      = ajAcdGetBoolean("heading");
    dousa         = ajAcdGetBoolean("usa");
    doname        = ajAcdGetBoolean("name");
    doseqlength   = ajAcdGetBoolean("seqlength");
    doalignlength = ajAcdGetBoolean("alignlength");
    dogaps        = ajAcdGetBoolean("gaps");
    dogapcount    = ajAcdGetBoolean("gapcount");
    doidcount     = ajAcdGetBoolean("idcount");
    dosimcount    = ajAcdGetBoolean("simcount");
    dodifcount    = ajAcdGetBoolean("diffcount");
    dochange      = ajAcdGetBoolean("change");
    dodesc        = ajAcdGetBoolean("description");
    dowt          = ajAcdGetBoolean("weight");

    /* consensus parameters */
    fplural   = ajAcdGetFloat("plurality");
    identity  = ajAcdGetFloat("identity");


    cons      = ajStrNew();
    consensus = ajSeqNew();
    altusa    = ajStrNewC("-");
    altname   = ajStrNewC("-");


    /* get conversion table and scoring matrix */
    cvt = ajMatrixGetCvt(matrix);
    sub = ajMatrixGetMatrix(matrix);

    /* get the number of the reference sequence */
    nrefseq = infoalign_Getrefseq(refseq, seqset);

    /* change the % plurality to the fraction of absolute total weight */
    fplural = ajSeqsetGetTotweight(seqset) * fplural / 100;

    /*
    ** change the % identity to the number of identical sequences at a
    ** position required for consensus
    */
    ident = ajSeqsetGetSize(seqset) * (ajint)identity / 100;

    /* get the consensus sequence */
    embConsCalc(seqset, matrix, ajSeqsetGetSize(seqset), ajSeqsetGetLen(seqset),
		 fplural, 0.0, ident, ajFalse, &cons);
    ajSeqAssignSeqS(consensus, cons);

    ajSeqAssignNameS(consensus,(xxx=ajStrNewC("Consensus")));

    /* get the reference sequence */
    if(nrefseq == -1)
	ref = consensus;
    else
	ref = ajSeqsetGetseqSeq(seqset, nrefseq);


    /* start the HTML table */
    if(html)
	ajFmtPrintF(outfile,"<table border cellpadding=4 bgcolor="
		    "\"#FFFFF0\">\n");

    /* print the header information */
    if(doheader)
    {
	/* start the HTML table title line and output the Name header */
	if(html)			
	    ajFmtPrintF(outfile, "<tr>");
	else
	    ajFmtPrintF(outfile, "%s", "# ");

	if(dousa)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>USA</th>");
	    else
		ajFmtPrintF(outfile, "%-16s", "USA");
	}

	if(doname)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Name</th>");
	    else
		ajFmtPrintF(outfile, "%-12s", "Name");
	}

	if(doseqlength)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Sequence Length</th>");
	    else
		ajFmtPrintF(outfile, "SeqLen\t");
	}

	if(doalignlength)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Aligned Length</th>");
	    else
		ajFmtPrintF(outfile, "AlignLen\t");
	}

	if(dogaps)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Gaps</th>");
	    else
		ajFmtPrintF(outfile, "Gaps\t");
	}

	if(dogapcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Gap Length</th>");
	    else
		ajFmtPrintF(outfile, "GapLen\t");
	}

	if(doidcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Identity</th>");
	    else
		ajFmtPrintF(outfile, "Ident\t");
	}

	if(dosimcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Similarity</th>");
	    else
		ajFmtPrintF(outfile, "Similar\t");
	}

	if(dodifcount)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Difference</th>");
	    else
		ajFmtPrintF(outfile, "Differ\t");
	}

	if(dochange)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>%% Change</th>");
	    else
		ajFmtPrintF(outfile, "%% Change\t");
	}

        if(dowt)
        {
            if(html)
                ajFmtPrintF(outfile, "<th>Weight</th>");
            else
                ajFmtPrintF(outfile, "Weight\t");
        }


	if(dodesc)
	{
	    if(html)
		ajFmtPrintF(outfile, "<th>Description</th>");
	    else
		ajFmtPrintF(outfile, "Description");
	}

	/* end the HTML table title line */
	if(html)
	    ajFmtPrintF(outfile, "</tr>\n");
	else
	    ajFmtPrintF(outfile, "\n");
    }


    for(i=0; i<ajSeqsetGetSize(seqset); i++)
    {
    	seq = ajSeqsetGetseqSeq(seqset, i);

	/* get the usa ('-' if unknown) */
	usa = ajSeqGetUsaS(seq);
	if(ajStrGetLen(usa) == 0)
	    usa = altusa;

	/* get the name ('-' if unknown) */
	name = ajSeqGetNameS(seq);
	if(ajStrGetLen(name) == 0)
	    name = altname;

	/* get the stats from the comparison to the reference sequence */
	infoalign_Compare(ref, seq, sub, cvt, &seqlength, &alignlength,
			  &gaps, &gapcount, &idcount, &simcount,
			  &difcount, &change);

	/* start table line */
	if(html)
	    ajFmtPrintF(outfile, "<tr>");

	if(dousa)
	    infoalign_OutputStr(outfile, usa, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount || dogaps ||
				 doseqlength || doalignlength || doname), 18);
	
	if(doname)
	    infoalign_OutputStr(outfile, name, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount || dogaps ||
				 doseqlength || doalignlength), 14);
	
	if(doseqlength)
	    infoalign_OutputInt(outfile, seqlength, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount ||
				 dogaps || doalignlength));
	
	if(doalignlength)
	    infoalign_OutputInt(outfile, alignlength, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount || dogaps));
	
	if(dogaps)
	    infoalign_OutputInt(outfile, gaps, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount || dogapcount));
	
	if(dogapcount)
	    infoalign_OutputInt(outfile, gapcount, html,
				(dodesc || dowt || dochange ||
				 dodifcount || dosimcount ||
				 doidcount));
	
	if(doidcount)
	    infoalign_OutputInt(outfile, idcount, html,
				(dodesc || dowt || dochange ||
                                 dodifcount || dosimcount));
	
	if(dosimcount)
	    infoalign_OutputInt(outfile, simcount, html, 
                                (dodesc || dowt || dochange ||
				 dodifcount));
	
	if(dodifcount)
	    infoalign_OutputInt(outfile, difcount, html, 
           		        (dodesc || dowt || dochange));
	
	if(dochange)
	    infoalign_OutputFloat(outfile, change, html, (dodesc || dowt) );
	
        if(dowt)
            infoalign_OutputFloat(outfile, ajSeqsetGetseqWeight(seqset,i), html, 
            	dodesc);

	if(dodesc)
	    infoalign_OutputStr(outfile, ajSeqGetDescS(seq), html, ajFalse, 
	    	NOLIMIT);
	
	/* end table line */
	if(html)
	    ajFmtPrintF(outfile, "</tr>\n");
	else
	    ajFmtPrintF(outfile, "\n");
    }
    
    
    /* end the HTML table */
    if(html)
	ajFmtPrintF(outfile, "</table>\n");
    
    ajFileClose(&outfile);
    
    /* tidy up */
    ajStrDel(&altusa);
    ajStrDel(&altname);
    ajStrDel(&xxx);
    ajSeqDel(&consensus);

    ajSeqsetDel(&seqset);
    ajStrDel(&refseq);
    ajMatrixDel(&matrix);
    ajStrDel(&cons);

    embExit();
    return 0;
}
Example #5
0
int main(int argc, char **argv)
{
    AjPSeqall nucseq;		/* input nucleic sequences */
    AjPSeqset protseq;		/* input aligned protein sequences */
    AjPSeqout seqout;
    AjPSeq nseq;		/* next nucleic sequence to align */
    const AjPSeq pseq;		/* next protein sequence use in alignment */
    AjPTrn trnTable;
    AjPSeq pep;			/* translation of nseq */
    AjPStr tablelist;
    ajint table;
    AjPSeqset outseqset;	/* set of aligned nucleic sequences */
    ajint proteinseqcount = 0;
    AjPStr degapstr = NULL;
    /* used to check if it matches with START removed */
    AjPStr degapstr2 = NULL;
    AjPStr codon = NULL;	/* holds temporary codon to check if is START */
    char aa;			/* translated putative START codon */
    ajint type;			/* returned type of the putative START codon */
    /* start position of guide protein in translation */
    ajlong pos = 0;
    AjPSeq newseq = NULL;	/* output aligned nucleic sequence */
    ajint frame;

    embInit("tranalign", argc, argv);

    nucseq    = ajAcdGetSeqall("asequence");
    protseq   = ajAcdGetSeqset("bsequence");
    tablelist = ajAcdGetListSingle("table");
    seqout    = ajAcdGetSeqoutset("outseq");

    outseqset = ajSeqsetNew();
    degapstr  = ajStrNew();

    /* initialise the translation table */
    ajStrToInt(tablelist, &table);
    trnTable = ajTrnNewI(table);

    ajSeqsetFill(protseq);

    while(ajSeqallNext(nucseq, &nseq))
    {
    	if((pseq = ajSeqsetGetseqSeq(protseq, proteinseqcount++)) == NULL)
    	    ajErr("No guide protein sequence available for "
		  "nucleic sequence %S",
		  ajSeqGetNameS(nseq));

	ajDebug("Aligning %S and %S\n",
		ajSeqGetNameS(nseq), ajSeqGetNameS(pseq));

        /* get copy of pseq string with no gaps */
        ajStrAssignS(&degapstr, ajSeqGetSeqS(pseq));
        ajStrRemoveGap(&degapstr);

        /*
	** for each translation frame look for subset of pep that
	** matches pseq
	*/
        for(frame = 1; frame <4; frame++)
	{
	    ajDebug("trying frame %d\n", frame);
            pep = ajTrnSeqOrig(trnTable, nseq, frame);
            degapstr2 = ajStrNew();
            ajStrAssignRef(&degapstr2, degapstr);
            pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr);

            /* 
            ** we might have a START codon that should be translated as 'M'
            ** we need to check if there is a match after a possible START
            ** codon 
            */
            if(pos == -1 && ajStrGetLen(degapstr) > 1 && 
                (ajStrGetPtr(degapstr)[0] == 'M' ||
		 ajStrGetPtr(degapstr)[0] == 'm'))
	      {
                /* see if pep minus the first character is a match */
                ajStrCutStart(&degapstr2, 1);
                pos = ajStrFindCaseS(ajSeqGetSeqS(pep), degapstr2); 

                /*
		** pos is >= 1 if we have a match that is after the first
		** residue
		*/
                if(pos >= 1)
		{
                    /* point back at the putative START Methionine */
                    pos--;
                    /* test if first codon is a START */
                    codon = ajStrNew();
                    ajStrAssignSubS(&codon, ajSeqGetSeqS(nseq), 
                                (pos*3)+frame-1, (pos*3)+frame+2);
                    type = ajTrnCodonstrTypeS(trnTable, codon, &aa);

                    if(type != 1)
                    {
                        /* first codon is not a valid START, force a mismatch */
                        pos = -1;
                    }
                    ajStrDel(&codon);
                
            	}
		else
		{
                    /* force 'pos == 0' to be treated as a mismatch */
            	    pos = -1;
		}
            }

            ajStrDel(&degapstr2);
            ajSeqDel(&pep);

            if(pos != -1)
            	break;
        }

        if(pos == -1)
	    ajErr("Guide protein sequence %S not found in nucleic sequence %S",
		  ajSeqGetNameS(pseq), ajSeqGetNameS(nseq));
	else
	{
	    ajDebug("got a match with frame=%d\n", frame);
            /* extract the coding region of nseq with gaps */
            newseq = ajSeqNew();
            ajSeqSetNuc(newseq);
            ajSeqAssignNameS(newseq, ajSeqGetNameS(nseq));
            ajSeqAssignDescS(newseq, ajSeqGetDescS(nseq));
            tranalign_AddGaps(newseq, nseq, pseq, (pos*3)+frame-1);

            /* output the gapped nucleic sequence */
            ajSeqsetApp(outseqset, newseq);

            ajSeqDel(&newseq);
        }

        ajStrRemoveWhiteExcess(&degapstr);
    }

    ajSeqoutWriteSet(seqout, outseqset);
    ajSeqoutClose(seqout);

    ajTrnDel(&trnTable);
    ajSeqsetDel(&outseqset);
    ajStrDel(&degapstr);
    ajStrDel(&degapstr2);

    ajSeqallDel(&nucseq);
    ajSeqDel(&nseq);
    ajSeqoutDel(&seqout);
    ajSeqsetDel(&protseq);
    ajStrDel(&tablelist);

    embExit();

    return 0;
}
Example #6
0
static void primersearch_store_hits(const Primer primdata,
			       AjPList fhits, AjPList rhits,
			       const AjPSeq seq, AjBool reverse)
{
    ajint amplen = 0;
    AjIList fi;
    AjIList ri;

    PHit primerhit = NULL;

    fi = ajListIterNewread(fhits);
    while(!ajListIterDone(fi))
    {
	EmbPMatMatch fm = NULL;
	EmbPMatMatch rm = NULL;
	amplen = 0;

	fm = ajListIterGet(fi);
	ri = ajListIterNewread(rhits);
	while(!ajListIterDone(ri))
	{
	    ajint seqlen = ajSeqGetLen(seq);
	    ajint s = (fm->start);
	    ajint e;

	    rm = ajListIterGet(ri);
	    e = (rm->start-1);
	    amplen = seqlen-(s-1)-e;

	    if (amplen > 0)	   /* no point making a hit if -ve length! */
	    {
		primerhit = NULL;
		AJNEW(primerhit);
		primerhit->desc=NULL;	 /* must be NULL for ajStrAss */
		primerhit->seqname=NULL; /* must be NULL for ajStrAss */
		primerhit->acc=NULL;
		primerhit->forward=NULL;
		primerhit->reverse=NULL;
		ajStrAssignC(&primerhit->seqname,ajSeqGetNameC(seq));
		ajStrAssignS(&primerhit->desc, ajSeqGetDescS(seq));
		ajStrAssignS(&primerhit->acc, ajSeqGetAccS(seq));
		primerhit->forward_pos = fm->start;
		primerhit->reverse_pos = rm->start;
		primerhit->forward_mismatch = fm->mm;
		primerhit->reverse_mismatch = rm->mm;
		primerhit->amplen = amplen;
		if(!reverse)
		{
		    ajStrAssignS(&primerhit->forward,
				 primdata->forward->patstr);
		    ajStrAssignS(&primerhit->reverse,
				 primdata->reverse->patstr);
		}
		else
		{
		    ajStrAssignS(&primerhit->forward,
				 primdata->reverse->patstr);
		    ajStrAssignS(&primerhit->reverse,
				 primdata->forward->patstr);
		}
		ajListPushAppend(primdata->hitlist, primerhit);


	    }
	}
	/*
	**  clean up rListIter here as it will be new'ed again next
	**  time through
	*/
	ajListIterDel(&ri);
    }

    ajListIterDel(&fi);
    return;
}
Example #7
0
int main(int argc, char **argv)
{
    AjPSeqout seqout;
    AjPSeq seq;
    AjPStr name = NULL;
    AjPStr desc = NULL;
    AjPStr temp = NULL;
    AjBool append;

    embInit("descseq", argc, argv);

    seqout = ajAcdGetSeqout("outseq");
    seq    = ajAcdGetSeq("sequence");
    append = ajAcdGetBoolean("append");
    name   = ajAcdGetString("name");
    desc   = ajAcdGetString("description");


    /* if appending, then do this */
    if(append)
    {
	/* is there a name? */
	if(ajStrGetLen(name))
	{
	    ajStrAssignS(&temp, ajSeqGetNameS(seq));
	    ajStrAppendS(&temp, name);
	    ajSeqAssignNameS(seq, temp);
	}

	/* is there a description? */
	if(ajStrGetLen(desc))
	{
	    ajStrAssignS(&temp, ajSeqGetDescS(seq));
	    ajStrAppendS(&temp, desc);
	    ajSeqAssignDescS(seq, temp);
	}

	/* otherwise, just overwrite the existing values */
    }
    else
    {
	/* is there a name? */
	if(ajStrGetLen(name))
	    ajSeqAssignNameS(seq, name);

	/* is there a description? */
	if(ajStrGetLen(desc))
	    ajSeqAssignDescS(seq, desc);
    }

    ajSeqoutWriteSeq(seqout, seq);
    ajSeqoutClose(seqout);

    ajSeqDel(&seq);
    ajSeqoutDel(&seqout);
    ajStrDel(&name);
    ajStrDel(&desc);
    ajStrDel(&temp);

    embExit();

    return 0;
}
Example #8
0
int main(int argc, char **argv)
{
    AjPSeqall	seqall;
    AjPSeqout	seqout;
    AjPSeq seq = NULL;
    AjPStr str = NULL;
    AjPStr desc = NULL;
    ajint  tail3;
    ajint  tail5 = 0;
    ajint  minlength;
    ajint  mismatches;
    AjBool reverse;
    AjBool fiveprime;
    AjBool cvttolower;

    embInit("trimest", argc, argv);

    seqall 	= ajAcdGetSeqall("sequence");
    seqout 	= ajAcdGetSeqoutall("outseq");
    minlength 	= ajAcdGetInt("minlength");
    mismatches 	= ajAcdGetInt("mismatches");
    reverse	= ajAcdGetBoolean("reverse");
    fiveprime	= ajAcdGetBoolean("fiveprime");
    cvttolower  = ajAcdGetToggle("tolower");
    

    str = ajStrNew();

    while(ajSeqallNext(seqall, &seq))
    {
        /* get sequence description */
        ajStrAssignS(&desc, ajSeqGetDescS(seq));

        /* get positions to cut in 5' poly-T and 3' poly-A tails */
	if(fiveprime)
	    tail5 = trimest_get_tail(seq, 5, minlength, mismatches);
	tail3 = trimest_get_tail(seq, 3, minlength, mismatches);

	/* get a COPY of the sequence string */
	ajStrAssignS(&str, ajSeqGetSeqS(seq));

        /* cut off longest of 3' or 5' tail */
	if(tail5 > tail3)
	{
	    /* if 5' poly-T tail, then reverse the sequence */
	    ajDebug("Tail=%d\n", tail5);
            if(cvttolower)
                trimest_tolower(&str, 0, tail5-1);
            else
	        ajStrKeepRange(&str, tail5, ajSeqGetLen(seq)-1);
	    ajStrAppendC(&desc, " [poly-T tail removed]");

	}
	else if(tail3 > tail5)
	{
	    /* remove 3' poly-A tail */
	    ajDebug("Tail=%d\n", tail3);
            if(cvttolower)
                trimest_tolower(&str, ajSeqGetLen(seq)-tail3,
				ajSeqGetLen(seq));
            else
	        ajStrKeepRange(&str, 0, ajSeqGetLen(seq)-tail3-1);
            ajStrAppendC(&desc, " [poly-A tail removed]");

	}

        /* write sequence out */
	ajSeqAssignSeqS(seq, str);

	/* reverse complement if poly-T found */
	if(tail5 > tail3 && reverse)
	{
	    ajSeqReverseForce(seq);
	    ajStrAppendC(&desc, " [reverse complement]");
	}

        /* set description */
        ajSeqAssignDescS(seq, desc);

	ajSeqoutWriteSeq(seqout, seq);
    }

    ajSeqoutClose(seqout);

    ajStrDel(&str);
    ajStrDel(&desc);

    ajSeqallDel(&seqall);
    ajSeqDel(&seq);
    ajSeqoutDel(&seqout);

    embExit();

    return 0;
}
Example #9
0
void emboss_copy(AjPSeqset seqset, char ***retseqs, AINFO *info)
{
    ajint n;
    ajint maxlen;
    ajint len;
    char **seqs;
    const AjPSeq seq = NULL;
    ajint i=0;
    const AjPStr fmt=NULL;
    const char *p=NULL;
    char  c='\0';
    /*
    char *q=NULL;
    AjPSelexseq   sqdata=NULL;
    AjPSelexdata sdata=NULL;
    */
    ajint cnt=0;
    info->name = NULL;
    info->rf=NULL;
    info->cs=NULL;
    info->desc=NULL;
    info->acc=NULL;
    info->au=NULL;
    info->flags=0;

    AjPStr tmpstr = NULL;

    ajSeqsetFill(seqset);

    fmt = ajSeqsetGetFormat(seqset);
    n = ajSeqsetGetSize(seqset);
    ajSeqsetFmtUpper(seqset);

    maxlen = ajSeqsetGetLen(seqset);


    /* First allocate and copy sequences */
    AJCNEW0(seqs,n);
    for(i=0; i<n; ++i)
    {
        seqs[i] = ajCharNewRes(maxlen+1);
        strcpy(seqs[i],ajSeqGetSeqC(ajSeqsetGetseqSeq(seqset,i)));
    }

    info->sqinfo = (SQINFO *) calloc (sizeof(SQINFO), n);

    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].flags = 0;
        strcpy(info->sqinfo[i].name,"");
        strcpy(info->sqinfo[i].id,"");
        strcpy(info->sqinfo[i].acc,"");
        strcpy(info->sqinfo[i].desc,"");
        info->sqinfo[i].len = 0;
        info->sqinfo[i].start = 0;
        info->sqinfo[i].stop = 0;
        info->sqinfo[i].olen = 0;
        info->sqinfo[i].type = 0;
        info->sqinfo[i].ss = NULL;
        info->sqinfo[i].sa =NULL;
    }

    AJCNEW0(info->wgt,n);

    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].flags = 0;
        info->wgt[i] = ajSeqsetGetseqWeight(seqset,i);
    }
    info->nseq = n;
    info->alen = maxlen;

    for(i=0; i<n; ++i)
    {
        seq = ajSeqsetGetseqSeq(seqset,i);
        if((len=ajStrGetLen(ajSeqGetNameS(seq))))
        {
            if(len>= SQINFO_NAMELEN)
                len = SQINFO_NAMELEN - 1;
            ajStrAssignSubS(&tmpstr, ajSeqGetNameS(seq), 0, len);
            strcpy(info->sqinfo[i].id,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_ID;
            strcpy(info->sqinfo[i].name,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_NAME;
        }
        if((len=ajStrGetLen(ajSeqGetAccS(seq))))
        {
            if(len>= SQINFO_NAMELEN)
                len = SQINFO_NAMELEN - 1;
            ajStrAssignSubS(&tmpstr, ajSeqGetAccS(seq), 0, len);
            strcpy(info->sqinfo[i].acc,ajStrGetPtr(tmpstr));
            info->sqinfo[i].flags |= SQINFO_ACC;
        }
    }
    seq = ajSeqsetGetseqSeq(seqset,0);
    info->cs = ajCharNewS(ajSeqGetSeqS(seq));
    info->name = ajCharNewS(ajSeqGetNameS(seq));
    info->acc = ajCharNewS(ajSeqGetAccS(seq));
    info->desc = ajCharNewS(ajSeqGetDescS(seq));
    info->rf = ajCharNewS(ajSeqGetSeqS(seq));

    /*
        info->rf = ajCharNewS(seq);

    	len = ajStrGetLen(seq->Selexdata->name);
    	info->name = ajCharNewRes(len+1);
    	strcpy(info->name,ajStrGetPtr(seq->Selexdata->name));
    	len = ajStrGetLen(seq->Selexdata->de);
    	info->desc = ajCharNewRes(len+1);

    	sdata = seq->Selexdata;
    	strcpy(info->desc,ajStrGetPtr(sdata->de));
    	len = ajStrGetLen(sdata->ac);
    	info->acc = ajCharNewRes(len+1);
    	strcpy(info->acc,ajStrGetPtr(sdata->ac));
    	len = ajStrGetLen(sdata->au);
    	info->au = ajCharNewRes(len+1);
    	strcpy(info->au,ajStrGetPtr(sdata->au));
    	if(sdata->tc[0] || sdata->tc[1])
    	{
    	    info->flags |= AINFO_TC;
    	    info->tc1 = sdata->tc[0];
    	    info->tc2 = sdata->tc[1];
    	}
    	if(sdata->nc[0] || sdata->nc[1])
    	{
    	    info->flags |= AINFO_NC;
    	    info->nc1 = sdata->nc[0];
    	    info->nc2 = sdata->nc[1];
    	}
    	if(sdata->ga[0] || sdata->ga[1])
    	{
    	    info->flags |= AINFO_GA;
    	    info->ga1 = sdata->ga[0];
    	    info->ga2 = sdata->ga[1];
    	}

    	for(i=0;i<n;++i)
    	{
    	    seq = ajSeqsetGetseqSeq(seqset,i);
    	    sqdata = seq->Selexdata->sq;
    	    if((len=ajStrGetLen(sqdata->name)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name));
    		else
    		    strncpy(info->sqinfo[i].name,ajStrGetPtr(sqdata->name),63);
    		info->sqinfo[i].name[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_NAME;
    	    }
    / *
    	    if((len=ajStrGetLen(sqdata->id)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].id,ajStrGetPtr(sqdata->id));
    		else
    		    strncpy(info->sqinfo[i]->id,ajStrGetPtr(sqdata->id),63);
    		info->sqinfo[i].id[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_ID;
    	    }
    * /

    	    strcpy(info->sqinfo[i].id,info->sqinfo[i].name);
    	    info->sqinfo[i].flags |= SQINFO_ID;
    	    if((len=ajStrGetLen(sqdata->ac)))
    	    {
    		if(len<64)
    		    strcpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac));
    		else
    		    strncpy(info->sqinfo[i].acc,ajStrGetPtr(sqdata->ac),63);
    		info->sqinfo[i].acc[63]='\0';
    		info->sqinfo[i].flags |= SQINFO_ACC;
    	    }
    	    if((len=ajStrGetLen(sqdata->de)))
    	    {
    		if(len<127)
    		    strcpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de));
    		else
    		    strncpy(info->sqinfo[i].desc,ajStrGetPtr(sqdata->de),127);
    		info->sqinfo[i].desc[127]='\0';
    		info->sqinfo[i].flags |= SQINFO_DESC;
    	    }
    	    if(sqdata->start || sqdata->stop || sqdata ->len)
    	    {
    		info->sqinfo[i].start = sqdata->start;
    		info->sqinfo[i].stop  = sqdata->stop;
    		info->sqinfo[i].olen  = sqdata->len;
    		info->sqinfo[i].flags |= SQINFO_START;
    		info->sqinfo[i].flags |= SQINFO_STOP;
    		info->sqinfo[i].flags |= SQINFO_OLEN;
    	    }

    	    if(ajStrGetLen(seq->Selexdata->ss))
    	    {

    		info->sqinfo[i].ss = ajCharNewRes(maxlen+1);
    		p = ajStrGetPtr(seq->Selexdata->ss);
    		q = info->sqinfo[i].ss;
    		while((c==*p))
    		{
    		    if(c=='.' || c==' ' || c=='_' || c=='-')
    			*q++ = c;
    		    ++p;
    		}
    		*q = '\0';
    		info->sqinfo[i].flags |= SQINFO_SS;
    	    }
    	}
        }
    / *
        }
    */


    for(i=0; i<n; ++i)
    {
        info->sqinfo[i].type = kOtherSeq;
        if(ajSeqsetIsDna(seqset))
            info->sqinfo[i].type = kDNA;
        if(ajSeqsetIsRna(seqset))
            info->sqinfo[i].type = kRNA;
        if(ajSeqsetIsProt(seqset))
            info->sqinfo[i].type = kAmino;
        info->sqinfo[i].flags |= SQINFO_TYPE;

        seq = ajSeqsetGetseqSeq(seqset,i);

        p = ajSeqGetSeqC(seq);
        cnt = 0;
        while((c=*p))
        {
            if(!(c=='.' || c==' ' || c=='_' || c=='-' || c=='~'))
                ++cnt;
            ++p;
        }
        info->sqinfo[i].len = cnt;
        info->sqinfo[i].flags |= SQINFO_LEN;
    }


    *retseqs = seqs;
    ajStrDel(&tmpstr);

    return;
}
Example #10
0
int main(ajint argc, char **argv)
{

    AjPFile datafile;
    AjPFile outf = NULL;
    AjPSeqall seqall;
    AjPSeq ajseq = NULL;
    ajuint i;
    ajint verb;
    ajint window;
    ajint pt;
    ajint which;
    ajint weighted;
    ajint t  = 0;
    ajint tc = 0;
    ajint mode;
    ajint min_seg;
    const AjPStr seqdes;

    float min_P;

    struct hept_pref *h;


    embInit("newcoils",argc,argv);

    window   = ajAcdGetInt("window");
    weighted = ajAcdGetInt("weighted");
    verb     = ajAcdGetInt("verb");
    mode     = ajAcdGetInt("mode");
    min_P    = ajAcdGetFloat("minp");
    min_seg  = ajAcdGetInt("minseg");
    outf     = ajAcdGetOutfile("outfile");
    datafile = ajAcdGetDatafile("datafile");
    seqall   = ajAcdGetSeqall("sequence");

    ajseq = ajSeqNew();

    h = newcoils_read_matrix(datafile);

    if(verb)
    {
	for(i=0; i<strlen(NCAAs); ++i)
	    if(NCAAs[i] != '_')
	    {
		pt = (int)(NCAAs[i]-'A');
		ajFmtPrintF(outf,"AA %c %4.2f %4.2f %4.2f %4.2f %4.2f %4.2f "
			    "%4.2f\n",NCAAs[i],h->m[pt][0],h->m[pt][1],
			    h->m[pt][2],h->m[pt][3],h->m[pt][4],
			    h->m[pt][5],h->m[pt][6]);
	    }

	for(i=0; i<(ajuint)h->n; ++i)
	    ajFmtPrintF(outf,"Window %4d %1d %f %f %f %f %f\n",h->f[i].win,
			h->f[i].w,h->f[i].m_cc,h->f[i].sd_cc,h->f[i].m_g,
			h->f[i].sd_g,h->f[i].sc);
    }

    /* See if there is a file for the chosen window length/weight scheme */
    which = -1;
    for(i=0; i<(ajuint)h->n; ++i)
    {
	if((h->f[i].win == window) && (h->f[i].w == weighted))
	{				/* match */
	    if(verb)
		ajFmtPrintF(outf,"Found fitting data for win %4d w %d\n",
			    window,weighted);
	    which = i;
	}
    }

    while(ajSeqallNext(seqall, &ajseq))
    {

	seqdes = ajSeqGetDescS(ajseq);
	newcoils_pred_coils(outf,ajSeqGetSeqC(ajseq),ajSeqGetNameC(ajseq),
			    seqdes,h,window,
			    which,weighted,mode,min_P,&t,&tc,min_seg); 

    }

    if (outf)
	ajFileClose(&outf);

    ajSeqDel(&ajseq);

    embExit();
    return 0;
}