Пример #1
0
static void remap_RenamePreferred(const AjPList list, const AjPTable table, 
				   AjPList newlist) 
{
    AjIList iter = NULL;
    AjPStr key   = NULL;
    const AjPStr value = NULL;
    AjPStr name  = NULL;

    iter = ajListIterNewread(list);
               
    while((key = (AjPStr)ajListIterGet(iter)))
    {
        /* 
        ** If a key-value entry found, write the new value to the new list
        ** else write the old key name to the new list
        */
        value = ajTableFetchS(table, key);
        name = ajStrNew();
        if(value)
	{
	    ajDebug("Rename: %S renamed to %S\n", key, value);
            ajStrAssignS(&name, value);
	}
	else
	{
	    ajDebug("Rename: %S not found\n", key);
	    ajStrAssignS(&name, key);
	}
        ajListstrPushAppend(newlist, name);
    }
                      
    ajListIterDel(&iter);

    return; 
}
Пример #2
0
static void remap_RemoveMinMax(AjPList restrictlist,
	AjPTable hittable, ajint mincuts, ajint maxcuts)
{

    AjIList miter;		/* iterator for matches list */
    EmbPMatMatch m = NULL;	/* restriction enzyme match structure */
    PValue value;
    AjPStr key  = NULL;
    AjPStr keyv = NULL;


    key = ajStrNew();

    /* if no hits then ignore much of this routine */
    if(ajListGetLength(restrictlist))
    {
        /* count the enzymes */
	miter = ajListIterNewread(restrictlist);
	while((m = ajListIterGet(miter)) != NULL)
	{
	    ajStrAssignS(&key, m->cod);

	    /* increment the count of key */
	    value = (PValue) ajTableFetchmodS(hittable, key);
	    if(value == NULL)
	    {
		AJNEW0(value);
		value->count = 1;
		value->iso = ajStrNew();
		ajStrAssignS(&(value->iso), m->iso);
		keyv = ajStrNew();
		ajStrAssignS(&keyv,key);
		ajTablePut(hittable, (void *)keyv, (void *)value);
	    }
	    else
		value->count++;
	}
	ajListIterDel(&miter);


	/* now remove enzymes from restrictlist if <mincuts | >maxcuts */
	miter = ajListIterNew(restrictlist);
	while((m = ajListIterGet(miter)) != NULL)
	{
	    value = (PValue) ajTableFetchmodS(hittable, (m->cod));
            if(value->count < mincuts || value->count > maxcuts)
	    {
            	ajListIterRemove(miter);
                embMatMatchDel(&m);
            }
	}
	ajListIterDel(&miter);
    }

    ajStrDel(&key);

    return;
}
Пример #3
0
static void primersearch_print_hits(const AjPList primerList, AjPFile outf)
{
    /* iterate through list of hits */
    AjIList lIter;

    ajint count = 1;
    lIter = ajListIterNewread(primerList);
    while(!ajListIterDone(lIter))
    {
	Primer primer = ajListIterGet(lIter);
	AjIList hIter = ajListIterNewread(primer->hitlist);
	count = 1;

	ajFmtPrintF(outf, "\nPrimer name %s\n", ajStrGetPtr(primer->Name));

	while(!ajListIterDone(hIter))
	{
	    PHit hit = ajListIterGet(hIter);
	    ajFmtPrintF(outf, "Amplimer %d\n", count);
	    ajFmtPrintF(outf, "\tSequence: %s %s \n\t%s\n",
			ajStrGetPtr(hit->seqname),
			ajStrGetPtr(hit->acc), ajStrGetPtr(hit->desc));
	    ajFmtPrintF(outf, "\t%s hits forward strand at %d with %d "
			"mismatches\n",
			ajStrGetPtr(hit->forward), hit->forward_pos,
			hit->forward_mismatch);
	    ajFmtPrintF(outf, "\t%s hits reverse strand at [%d] with %d "
			"mismatches\n",
			ajStrGetPtr(hit->reverse), (hit->reverse_pos),
			(hit->reverse_mismatch));
	    ajFmtPrintF(outf, "\tAmplimer length: %d bp\n", hit->amplen);
	    count++;
	}
	ajListIterDel(&hIter);
    }
    ajListIterDel(&lIter);

    return;
}
Пример #4
0
static void splitter_AddSubSeqFeat(AjPFeattable ftable, ajuint start,
                                   ajuint end, const AjPSeq oldseq)
{
    AjPFeattable old_feattable = NULL;
    AjIList iter = NULL;

    old_feattable = ajSeqGetFeatCopy(oldseq);

    if(!old_feattable)
        return;

    iter = ajListIterNewread(old_feattable->Features);

    while(!ajListIterDone(iter))
    {
        AjPFeature gf = ajListIterGet(iter);

        AjPFeature copy = NULL;


        if (((ajFeatGetEnd(gf) < start + 1) &&
             (gf->End2 == 0 || gf->End2 < start + 1)) ||
            ((ajFeatGetStart(gf) > end + 1) &&
             (gf->Start2 == 0 || gf->Start2 > end + 1)))
        {
            continue;
        }

        copy = ajFeatNewFeat(gf);
        copy->Start = copy->Start - start;
        copy->End = copy->End - start;

        if (copy->Start2 > 0)
            copy->Start2 = copy->Start2 - start;

        if (copy->End2 > 0)
            copy->End2 = copy->End2 - start;

        ajFeatTrimOffRange (copy, 0, 1, end - start + 1, AJTRUE, AJTRUE);

        ajFeattableAdd(ftable, copy);
    }

    ajFeattableDel(&old_feattable);
    ajListIterDel(&iter);

    return;
}
Пример #5
0
static void primersearch_clean_hitlist(AjPList* hlist)
{
    AjIList lIter;

    lIter = ajListIterNewread(*hlist);
    while(!ajListIterDone(lIter))
    {
	EmbPMatMatch fm = ajListIterGet(lIter);
	embMatMatchDel(&fm);
    }
    ajListFree(hlist);
    ajListFree(hlist);
    ajListIterDel(&lIter);

    return;
}
Пример #6
0
static void supermatcher_matchListOrder(void **x,void *cl)
{
    EmbPWordMatch p;
    AjPList ordered;
    ajint offset;
    AjIList listIter;
    concat *con;
    concat *c=NULL;

    p = (EmbPWordMatch)*x;
    ordered = (AjPList) cl;

    offset = (*p).seq1start-(*p).seq2start;

    /* iterate through ordered list to find if it exists already*/
    listIter = ajListIterNewread(ordered);

    while(!ajListIterDone( listIter))
    {
	con = ajListIterGet(listIter);
	if(con->offset == offset)
	{
	    /* found so add count and set offset to the new value */
	    con->offset = offset;
	    con->total+= (*p).length;
	    con->count++;
	    ajListPushAppend(con->list,p);
	    ajListIterDel(&listIter);
	    return;
	}
    }
    ajListIterDel(&listIter);

    /* not found so add it */
    AJNEW(c);
    c->offset = offset;
    c->total  = (*p).length;
    c->count  = 1;
    c->list   = ajListNew();
    ajListPushAppend(c->list,p);
    ajListPushAppend(ordered, c);

    return;
}
Пример #7
0
static void primersearch_primer_search(const AjPList primerList,
				       const AjPSeq seq)
{
    AjIList listIter;

    /* test each list node against this sequence */
    listIter = ajListIterNewread(primerList);
    while(!ajListIterDone(listIter))
    {
	Primer curr_primer = ajListIterGet(listIter);

	primersearch_scan_seq(curr_primer, seq, AJFALSE);
	primersearch_scan_seq(curr_primer, seq, AJTRUE);
    }

    ajListIterDel(&listIter);

    return;
}
Пример #8
0
AjBool ajRestermlistClone(const AjPList src, AjPList dest)
{
    AjIList iter;
    AjPResterm termout = NULL;
    AjPResterm termin = NULL;

    if(ajListGetLength(dest))
	return ajFalse;

    iter = ajListIterNewread(src);

    while ((termin = (AjPResterm) ajListIterGet(iter)))
    {
	termout = ajRestermNewResterm(termin);
	ajListPushAppend(dest, termout);
    }

    ajListIterDel(&iter);

    return ajTrue;
}
Пример #9
0
AjBool ajResquerylistClone(const AjPList src, AjPList dest)
{
    AjIList iter;
    AjPResquery qryout = NULL;
    AjPResquery qryin = NULL;

    if(ajListGetLength(dest))
	return ajFalse;

    iter = ajListIterNewread(src);

    while ((qryin = (AjPResquery) ajListIterGet(iter)))
    {
	qryout = ajResqueryNewResquery(qryin);
	ajListPushAppend(dest, qryout);
    }

    ajListIterDel(&iter);

    return ajTrue;
}
Пример #10
0
static void primersearch_free_primer(void **x, void *cl)
{
    Primer* p;
    Primer primdata;
    AjIList lIter;

    (void) cl;				/* make it used */

    p = (Primer*) x;
    primdata = *p;

    primersearch_free_pguts(&primdata->forward);
    primersearch_free_pguts(&primdata->reverse);
    ajStrDel(&primdata->Name);

    /* clean up hitlist */
    lIter = ajListIterNewread(primdata->hitlist);
    while(!ajListIterDone(lIter))
    {
	PHit phit = ajListIterGet(lIter);
	ajStrDel(&phit->forward);
	ajStrDel(&phit->reverse);
	ajStrDel(&phit->seqname);
	ajStrDel(&phit->acc);
	ajStrDel(&phit->desc);

	AJFREE(phit);
    }

    ajListFree(&primdata->hitlist);
    ajListFree(&primdata->hitlist);
    ajListIterDel(&lIter);

    AJFREE(primdata);

    return;
}
Пример #11
0
int main(int argc, char **argv)
{
    AjPList list = NULL;
    AjPSeq seq;
    AjPSeq seq2;
    AjPStr aa0str = 0;
    AjPStr aa1str = 0;
    const char *s1;
    const char *s2;
    char *strret = NULL;
    ajuint i;
    ajuint j;
    ajuint k;
    ajint l;
    ajint abovethresh;
    ajint total;
    ajint starti = 0;
    ajint startj = 0;
    ajint windowsize;
    float thresh;
    AjPGraph graph   = NULL;
    AjPGraph xygraph = NULL;
    float flen1;
    float flen2;
    ajuint len1;
    ajuint len2;

    AjPTime ajtime = NULL;
    time_t tim;
    AjBool boxit=AJTRUE;
    /* Different ticks as they need to be different for x and y due to
       length of string being important on x */
    ajuint acceptableticksx[]=
    {
	1,10,50,100,500,1000,1500,10000,
	500000,1000000,5000000
    };
    ajuint acceptableticks[]=
    {
	1,10,50,100,200,500,1000,2000,5000,10000,15000,
	500000,1000000,5000000
    };
    ajint numbofticks = 10;
    float xmargin;
    float ymargin;
    float ticklen;
    float tickgap;
    float onefifth;
    float k2;
    float max;
    char ptr[10];
    AjPMatrix matrix = NULL;
    ajint** sub;
    AjPSeqCvt cvt;
    AjPStr  subt = NULL;

    ajint b1;
    ajint b2;
    ajint e1;
    ajint e2;
    AjPStr se1;
    AjPStr se2;
    ajint ithresh;
    AjBool stretch;
    PPoint ppt = NULL;
    float xa[1];
    float ya[1];
    AjPGraphdata gdata=NULL;
    AjPStr tit   = NULL;
    AjIList iter = NULL;
    float x1 = 0.;
    float x2 = 0.;
    float y1 = 0.;
    float y2 = 0.;
    ajuint tui;
    
    se1 = ajStrNew();
    se2 = ajStrNew();

    embInit("dotmatcher", argc, argv);
    
    seq        = ajAcdGetSeq("asequence");
    seq2       = ajAcdGetSeq("bsequence");
    stretch    = ajAcdGetToggle("stretch");
    graph      = ajAcdGetGraph("graph");
    xygraph    = ajAcdGetGraphxy("xygraph");
    windowsize = ajAcdGetInt("windowsize");
    ithresh    = ajAcdGetInt("threshold");
    matrix     = ajAcdGetMatrix("matrixfile");
    
    sub = ajMatrixGetMatrix(matrix);
    cvt = ajMatrixGetCvt(matrix);
    
    thresh = (float)ithresh;

    ajtime = ajTimeNew();

    tim = time(0);
    ajTimeSetLocal(ajtime, tim);
    
    b1 = ajSeqGetBegin(seq);
    b2 = ajSeqGetBegin(seq2);
    e1 = ajSeqGetEnd(seq);
    e2 = ajSeqGetEnd(seq2);
    len1 = ajSeqGetLen(seq);
    len2 = ajSeqGetLen(seq2);
    tui   = ajSeqGetLen(seq);
    flen1 = (float) tui;
    tui   = ajSeqGetLen(seq2);
    flen2 = (float) tui;
    
    ajStrAssignSubC(&se1,ajSeqGetSeqC(seq),b1-1,e1-1);
    ajStrAssignSubC(&se2,ajSeqGetSeqC(seq2),b2-1,e2-1);
    ajSeqAssignSeqS(seq,se1);
    ajSeqAssignSeqS(seq2,se2);
    
    
    s1 = ajStrGetPtr(ajSeqGetSeqS(seq));
    s2 = ajStrGetPtr(ajSeqGetSeqS(seq2));
    
    
    aa0str = ajStrNewRes(1+len1); /* length plus trailing blank */
    aa1str = ajStrNewRes(1+len2);
    
    list = ajListNew();
    
    
    for(i=0;i<len1;i++)
	ajStrAppendK(&aa0str,(char)ajSeqcvtGetCodeK(cvt, *s1++));
    
    for(i=0;i<len2;i++)
	ajStrAppendK(&aa1str,(char)ajSeqcvtGetCodeK(cvt, *s2++));
    
    max = (float)len1;
    if(len2 > max)
	max = (float) len2;
    
    xmargin = ymargin = max *(float)0.15;
    ticklen = xmargin*(float)0.1;
    onefifth  = xmargin*(float)0.2;
    
    subt = ajStrNewC((strret=
		      ajFmtString("(windowsize = %d, threshold = %3.2f  %D)",
				  windowsize,thresh,ajtime)));
    
    

    if(!stretch)
    {
	if( ajStrGetLen(ajGraphGetSubtitleS(graph)) <=1)
	    ajGraphSetSubtitleS(graph,subt);

	ajGraphOpenWin(graph, (float)0.0-ymargin,(max*(float)1.35)+ymargin,
		       (float)0.0-xmargin,(float)max+xmargin);

	ajGraphicsDrawposTextAtmid(flen1*(float)0.5,
                                   (float)0.0-(xmargin/(float)2.0),
		       ajGraphGetXlabelC(graph));
	ajGraphicsDrawposTextAtlineJustify((float)0.0-(xmargin*(float)0.75),
                                           flen2*(float)0.5,
			(float)0.0-(xmargin*(float)0.75),flen1,
			ajGraphGetYlabelC(graph),0.5);

	ajGraphicsSetCharscale(0.5);
    }
    
    
    
    s1= ajStrGetPtr(aa0str);
    s2 = ajStrGetPtr(aa1str);
    
    for(j=0; (ajint)j < (ajint)len2-windowsize;j++)
    {
	i =0;
	total = 0;
	abovethresh =0;

	k = j;
	for(l=0;l<windowsize;l++)
	    total = total + sub[(ajint)s1[i++]][(ajint)s2[k++]];

	if(total >= thresh)
	{
	    abovethresh=1;
	    starti = i-windowsize;
	    startj = k-windowsize;
	}

	while(i < len1 && k < len2)
	{
	    total = total - sub[(ajint)s1[i-windowsize]]
		[(ajint)s2[k-windowsize]];
	    total = total + sub[(ajint)s1[i]][(ajint)s2[k]];

	    if(abovethresh)
	    {
		if(total < thresh)
		{
		    abovethresh = 0;
		    /* draw the line */
		    dotmatcher_pushpoint(&list,(float)starti,(float)startj,
					 (float)i-1,(float)k-1,stretch);
		}
	    }
	    else if(total >= thresh)
	    {
		starti = i-windowsize;
		startj = k-windowsize;
		abovethresh= 1;
	    }
	    i++;
	    k++;
	}

	if(abovethresh)
	    /* draw the line */
	    dotmatcher_pushpoint(&list,(float)starti,(float)startj,
				 (float)i-1,(float)k-1,
				 stretch);
    }
    
    for(i=0; (ajint)i < (ajint)len1-windowsize;i++)
    {
	j = 0;
	total = 0;
	abovethresh =0;

	k = i;
	for(l=0;l<windowsize;l++)
	    total = total + sub[(ajint)s1[k++]][(ajint)s2[j++]];

	if(total >= thresh)
	{
	    abovethresh=1;
	    starti = k-windowsize;
	    startj = j-windowsize;
	}

	while(k < len1 && j < len2)
	{
	    total = total - sub[(ajint)s1[k-windowsize]]
		[(ajint)s2[j-windowsize]];
	    total = total + sub[(ajint)s1[k]][(ajint)s2[j]];

	    if(abovethresh)
	    {
		if(total < thresh)
		{
		    abovethresh = 0;
		    /* draw the line */
		    dotmatcher_pushpoint(&list,(float)starti,(float)startj,
					 (float)k-1,(float)j-1,stretch);
		}
	    }
	    else if(total >= thresh)
	    {
		starti = k-windowsize;
		startj = j-windowsize;
		abovethresh= 1;
	    }
	    j++;
	    k++;
	}

	if(abovethresh)
	    /* draw the line */
	    dotmatcher_pushpoint(&list,(float)starti,(float)startj,
				 (float)k-1,(float)j-1,
				 stretch);
    }
    
    if(boxit && !stretch)
    {
	ajGraphicsDrawposRect(0.0,0.0,flen1, flen2);

	i=0;
	while(acceptableticksx[i]*numbofticks < len1)
	    i++;

	if(i<=13)
	    tickgap = (float)acceptableticksx[i];
	else
	    tickgap = (float)acceptableticksx[10];
	ticklen   = xmargin*(float)0.1;
	onefifth  = xmargin*(float)0.2;

	if(len2/len1 > 10 )
	{
	    /* if a lot smaller then just label start and end */
	    ajGraphicsDrawposLine((float)0.0,(float)0.0,(float)0.0,(float)0.0-ticklen);
	    sprintf(ptr,"%d",b1-1);
	    ajGraphicsDrawposTextAtmid((float)0.0,(float)0.0-(onefifth),ptr);

	    ajGraphicsDrawposLine(flen1,(float)0.0,
			flen1,(float)0.0-ticklen);
	    sprintf(ptr,"%d",len1+b1-1);
	    ajGraphicsDrawposTextAtmid(flen1,(float)0.0-(onefifth),ptr);

	}
	else
	    for(k2=0.0;k2<len1;k2+=tickgap)
	    {
		ajGraphicsDrawposLine(k2,(float)0.0,k2,(float)0.0-ticklen);
		sprintf(ptr,"%d",(ajint)k2+b1-1);
		ajGraphicsDrawposTextAtmid(k2,(float)0.0-(onefifth),ptr);
	    }

	i = 0;
	while(acceptableticks[i]*numbofticks < len2)
	    i++;

	tickgap   = (float)acceptableticks[i];
	ticklen   = ymargin*(float)0.01;
	onefifth  = ymargin*(float)0.02;

	if(len1/len2 > 10 )
	{
	    /* if a lot smaller then just label start and end */
	    ajGraphicsDrawposLine((float)0.0,(float)0.0,(float)0.0-ticklen,(float)0.0);
	    sprintf(ptr,"%d",b2-1);
	    ajGraphicsDrawposTextAtend((float)0.0-(onefifth),(float)0.0,ptr);

	    ajGraphicsDrawposLine((float)0.0,flen2,(float)0.0-ticklen,
			flen2);
	    sprintf(ptr,"%d",len2+b2-1);
	    ajGraphicsDrawposTextAtend((float)0.0-(onefifth),flen2,ptr);
	}
	else
	    for(k2=0.0;k2<len2;k2+=tickgap)
	    {
		ajGraphicsDrawposLine((float)0.0,k2,(float)0.0-ticklen,k2);
		sprintf(ptr,"%d",(ajint)k2+b2-1);
		ajGraphicsDrawposTextAtend((float)0.0-(onefifth),k2,ptr);
	    }
    }
    
    
    if(!stretch)
	ajGraphicsClose();
    else			/* the xy graph for -stretch */
    {
	tit = ajStrNew();
	ajFmtPrintS(&tit,"%S",ajGraphGetTitleS(xygraph));


	gdata = ajGraphdataNewI(1);
	xa[0] = (float)b1;
	ya[0] = (float)b2;

	ajGraphSetTitleC(xygraph,ajStrGetPtr(tit));

	ajGraphSetXlabelC(xygraph,ajSeqGetNameC(seq));
	ajGraphSetYlabelC(xygraph,ajSeqGetNameC(seq2));

	ajGraphdataSetTypeC(gdata,"2D Plot Float");
	ajGraphdataSetTitleS(gdata,subt);
	ajGraphdataSetMinmax(gdata,(float)b1,(float)e1,(float)b2,
			       (float)e2);
	ajGraphdataSetTruescale(gdata,(float)b1,(float)e1,(float)b2,
			       (float)e2);
	ajGraphxySetXstartF(xygraph,(float)b1);
	ajGraphxySetXendF(xygraph,(float)e1);
	ajGraphxySetYstartF(xygraph,(float)b2);
	ajGraphxySetYendF(xygraph,(float)e2);

	ajGraphxySetXrangeII(xygraph,b1,e1);
	ajGraphxySetYrangeII(xygraph,b2,e2);


	if(list)
	{
	    iter = ajListIterNewread(list);
	    while((ppt = ajListIterGet(iter)))
	    {
		x1 = ppt->x1+b1-1;
		y1 = ppt->y1+b2-1;
		x2 = ppt->x2+b1-1;
		y2 = ppt->y2+b2-1;
		ajGraphAddLine(xygraph,x1,y1,x2,y2,0);
		AJFREE(ppt);
	    }
	    ajListIterDel(&iter);
	}

	ajGraphdataAddXY(gdata,xa,ya);
	ajGraphDataReplace(xygraph,gdata);


	ajGraphxyDisplay(xygraph,ajFalse);
	ajGraphicsClose();

	ajStrDel(&tit);
    }
    
    
    
    ajListFree(&list);

    ajSeqDel(&seq);
    ajSeqDel(&seq2);
    ajGraphxyDel(&graph);
    ajGraphxyDel(&xygraph);
    ajMatrixDel(&matrix);
    ajTimeDel(&ajtime);
    
    /* deallocate memory */
    ajStrDel(&aa0str);
    ajStrDel(&aa1str);
    ajStrDel(&se1);
    ajStrDel(&se2);
    ajStrDel(&subt);

    AJFREE(strret);			/* created withing ajFmtString */
    
    embExit();

    return 0;
}
Пример #12
0
static void extractfeat_FeatSeqExtract(const AjPSeq seq, AjPSeqout seqout,
				       AjPFeattable featab, ajint before,
				       ajint after, AjBool join,
				       AjBool featinname,
				       const AjPStr describe)
{
    AjIList iter = NULL;
    AjPFeature gf = NULL;
    AjBool  single;		/* ajtrue = is not a multiple */
    AjBool  parent;		/* ajtrue = is a parent of a multiple */
    AjBool  child;		/* ajTrue = is a child of a multiple */
    AjBool  compall;		/* ajTrue = reverse comp all of join */
    AjBool  sense;		/* ajTrue = forward sense */
    AjBool  remote;		/* ajTrue = remote ID */
    AjPStr  type = NULL;	/* name of feature */
    AjPStr  featseq = NULL;	/* feature sequence string */
    AjPStr  tmpseq = NULL;	/* temporary sequence string */
    ajint   firstpos;
    ajint   lastpos;	        /* bounds of feature in sequence */
    AjPStr  describeout = NULL;	/* tag names/values to add to descriptions */
    ajuint count = 0;

    /* For all features... */
    if(featab && ajFeattableGetSize(featab))
    {
	/* initialise details of a feature */
        featseq = ajStrNew();
        tmpseq  = ajStrNew();
        type    = ajStrNew();
        remote  = ajFalse;
        compall = ajFalse;
        sense   = ajTrue;
        firstpos = 0;
        lastpos  = 0;
        describeout = ajStrNew();


	iter = ajListIterNewread(featab->Features);
	while(!ajListIterDone(iter))
	{
	    gf = ajListIterGet(iter) ;

	    /*
	    ** Determine what sort of thing this feature is. Only one of
	    ** these will be true.
	    ** True if this is part of a multiple join and it is not
	    ** the parent
	    */
	    child = ajFalse;

	    /* True if this is part of a multiple join and it is the parent */
	    parent = ajFalse;

	    /* True if this is not part of a multiple join */
	    single = ajFalse;

            if(ajFeatIsMultiple(gf))
	    {
            	if(ajFeatIsChild(gf))
            	    child = ajTrue;
		else
            	    parent = ajTrue;
            }
	    else
            	single = ajTrue;

	    /* 
	    ** If not wish to assembling joins(), then force all features
	    ** to be treated as single 
	    */
	    if(!join)
	    {
	    	child = ajFalse;
	    	parent = ajFalse;
	    	single = ajTrue;
	    }



	    ajDebug("feature %S %d-%d is parent %B, child %B, single %B\n",
		    ajFeatGetType(gf), ajFeatGetStart(gf), ajFeatGetEnd(gf),
		    parent, child, single);
/*
	    ajUser("feature %S %d-%d is parent %B, child %B, single %B",
		    ajFeatGetType(gf), ajFeatGetStart(gf), ajFeatGetEnd(gf),
		    parent, child, single);
*/
	    /*
	    ** If single or parent, write out any stored previous feature
	    ** sequence
	    */	    
            if(count++ && !child)
	    {
            	extractfeat_WriteOut(seqout, &featseq, compall, sense,
				     firstpos, lastpos, before, after, seq,
				     remote, type, 
				     featinname, describeout);

                /* reset joined feature information */
                ajStrSetClear(&featseq);
                ajStrSetClear(&tmpseq);
                ajStrSetClear(&type);
		ajStrSetClear(&describeout);
                remote = ajFalse;
                compall = ajFalse;
                sense = ajTrue;
                firstpos = 0;
                lastpos = 0;
            }


	    /* if parent, note if have Complemented Join */
            if(parent)
                compall = ajFeatIsCompMult(gf);

	    /*
	    ** Get the sense of the feature
	    ** NB.  if complementing several joined features, then pretend they
	    ** are forward sense until its possible to  reverse-complement
	    ** them all together.
	    */
	    if(!compall && ajFeatGetStrand(gf) == '-')
	        sense = ajFalse;
	    
	    /* get 'type' name of feature */
	    if(single || parent)
	    	ajStrAssignS(&type, ajFeatGetType(gf));
	    
	    /*
	    ** if single or parent, get 'before' + 'after' sequence
	    ** positions
	    */
            if(single || parent)
	    {
                firstpos = ajFeatGetStart(gf)-1;
                lastpos = ajFeatGetEnd(gf)-1;
            }
	    
	    /* if child, update the boundary positions */
            if(child)
	    {
                if(sense)
                    lastpos = ajFeatGetEnd(gf)-1;
		else
		    firstpos = ajFeatGetStart(gf)-1;
            }
	    
            extractfeat_MatchPatternDescribe(gf, describe, &describeout);
	    
	    /* get feature sequence(complement if required) */
            if(!child)
            {
                if(join)
                    ajFeatGetSeqJoin(gf, featab, seq, &tmpseq);
                else
                    ajFeatGetSeq(gf, seq, &tmpseq);
                ajDebug("extracted feature = %d bases\n", ajStrGetLen(tmpseq));
                /*ajUser("extracted feature = %d bases", ajStrGetLen(tmpseq));*/
            	ajStrAssignS(&featseq, tmpseq);
	    }
	}
	ajListIterDel(&iter) ;
	
	/*
	** write out any previous sequence(s)
	** - add before + after, complement all
	*/
        extractfeat_WriteOut(seqout, &featseq, compall, sense,
			     firstpos, lastpos, before, after,
			     seq, remote, type, 
			     featinname, describeout);
	
        ajStrDel(&featseq);
        ajStrDel(&tmpseq);
        ajStrDel(&type);
        ajStrDel(&describeout);
    }
    
    return;
}
Пример #13
0
static AjBool assemoutWriteNextBam(AjPOutfile outfile, const AjPAssem assem)
{
    AjPFile outf = ajOutfileGetFile(outfile);
    AjPSeqBamHeader header = NULL;
    AjPAssemContig c = NULL;
    AjPSeqBam bam;
    AjPAssemRead   r = NULL;
    AjPAssemContig* contigs = NULL;
    AjPAssemTag    t = NULL;
    AjIList j = NULL;
    AjPSeqBamBgzf gzfile = NULL;
    AjPStr headertext=NULL;
    const AjPStr rgheadertext=NULL;
    AjBool ret = ajTrue;
    ajint i=0;
    ajulong ncontigs=0UL;

    if(!outf) return ajFalse;
    if(!assem) return ajFalse;

    if(!assem->Hasdata)
    {
        if(ajListGetLength(assem->ContigsOrder))
            ncontigs = ajListToarray(assem->ContigsOrder, (void***)&contigs);
        else
            ncontigs = ajTableToarrayValues(assem->Contigs, (void***)&contigs);

        ajFmtPrintS(&headertext, "@HD\tVN:1.3\tSO:%s\n",
                    ajAssemGetSortorderC(assem));
        header = ajSeqBamHeaderNewN((ajuint) ncontigs);

        gzfile = ajSeqBamBgzfNew(ajFileGetFileptr(outf), "w");
        outfile->OutData = gzfile;

        while (contigs[i])   /* contigs */
        {
            c = contigs[i];

            if(ajStrMatchC(c->Name, "*"))
            {
                i++;
                continue;
            }

            header->target_name[i] = strdup(ajStrGetPtr(c->Name));
            header->target_len[i++] = c->Length;

            ajFmtPrintAppS(&headertext, "@SQ\tSN:%S\tLN:%d",
                           c->Name, c->Length);

            if(c->URI)
                ajFmtPrintAppS(&headertext, "\tUR:%S", c->URI);

            if(c->MD5)
                ajFmtPrintAppS(&headertext, "\tM5:%S", c->MD5);

            if(c->Species)
                ajFmtPrintAppS(&headertext, "\tSP:%S", c->Species);

            ajFmtPrintAppS(&headertext, "\n");


            j = ajListIterNewread(c->Tags);
            while (!ajListIterDone(j))
            {
                t = ajListIterGet(j);
                ajFmtPrintAppS(&headertext,
                               "@CO\t%S %u %u %S\n", t->Name, t->x1, t->y1,
                               t->Comment);
            }
            ajListIterDel(&j);
        }

        rgheadertext = assemSAMGetReadgroupHeaderlines(assem);
        if(rgheadertext)
            ajStrAppendS(&headertext, rgheadertext);

        ajSeqBamHeaderSetTextC(header, ajStrGetPtr(headertext));
        ajSeqBamHeaderWrite(gzfile, header);

        ajSeqBamHeaderDel(&header);
        ajStrDel(&headertext);

        AJFREE(contigs);

        if(!assem->BamHeader)
            return ajTrue;
    }

    /* data */

    gzfile = outfile->OutData;

    AJNEW0(bam);
    bam->m_data=10;
    AJCNEW0(bam->data, bam->m_data);

    j = ajListIterNewread(assem->Reads);

    while (!ajListIterDone(j))  /* reads */
    {
	r = ajListIterGet(j);
	assemoutWriteBamAlignment(gzfile, r, bam);
    }

    ajListIterDel(&j);

    AJFREE(bam->data);
    AJFREE(bam);

    /* ajSeqBamBgzfClose(gzfile);*/

    return ret;
}
Пример #14
0
void ajResourceTrace(const AjPResource thys)
{
    AjIList iter;
    AjPStr tmpstr;
    ajuint i;
    AjPReslink reslink = NULL;
    AjPResquery resqry = NULL;
    AjPResterm resterm = NULL;
    if(!thys)
    {
        ajDebug("ajResourceTrace NULL\n");
        return;
    }

    ajDebug("\najResourceTrace\n");
    ajDebug("        Id: %S\n", thys->Id);
    ajDebug("       Acc: %S\n", thys->Acc);
    ajDebug("      Name: %S\n", thys->Name);
    ajDebug("      Desc: %S\n", thys->Desc);
    ajDebug("       Url: %S\n", thys->Url);

    ajDebug("     Idalt: %Lu\n", ajListGetLength(thys->Idalt));
    if(ajListGetLength(thys->Idalt))
    {
        i=0;
        iter = ajListIterNewread(thys->Idalt);
        while(!ajListIterDone(iter))
        {
            tmpstr = ajListIterGet(iter);
            ajDebug("%15d: %S\n", ++i, tmpstr);
        }
        ajListIterDel(&iter);
    }
    
    ajDebug("       Cat: %Lu\n", ajListGetLength(thys->Cat));
    if(ajListGetLength(thys->Cat))
    {
        i = 0;
        iter = ajListIterNewread(thys->Cat);
        while(!ajListIterDone(iter))
        {
            tmpstr = ajListIterGet(iter);
            ajDebug("%15d: %S\n", ++i, tmpstr);
        }
        ajListIterDel(&iter);
    }
    
    ajDebug("     Taxon: %Lu\n", ajListGetLength(thys->Taxon));
    if(ajListGetLength(thys->Taxon))
    {
        i = 0;
        iter = ajListIterNewread(thys->Taxon);
        while(!ajListIterDone(iter))
        {
            resterm = ajListIterGet(iter);
            ajDebug("%15d: %S | %S\n", ++i,  resterm->Id, resterm->Name);
        }
        ajListIterDel(&iter);
    }
    
    ajDebug("   Edamtpc: %Lu\n", ajListGetLength(thys->Edamtpc));
    if(ajListGetLength(thys->Edamtpc))
    {
        i = 0;
        iter = ajListIterNewread(thys->Edamtpc);
        while(!ajListIterDone(iter))
        {
            resterm = ajListIterGet(iter);
            ajDebug("%15d: %S | %S\n", ++i,  resterm->Id, resterm->Name);
        }
        ajListIterDel(&iter);
    }
    
    ajDebug("   Edamdat: %Lu\n", ajListGetLength(thys->Edamdat));
    if(ajListGetLength(thys->Edamdat))
    {
        i = 0;
        iter = ajListIterNewread(thys->Edamdat);
        while(!ajListIterDone(iter))
        {
            resterm = ajListIterGet(iter);
            ajDebug("%15d: %S | %S\n", ++i, resterm->Id, resterm->Name);
        }
        ajListIterDel(&iter);
    }
    
    ajDebug("    Edamid: %Lu\n", ajListGetLength(thys->Edamid));
    if(ajListGetLength(thys->Edamid))
    {
        i = 0;
        iter = ajListIterNewread(thys->Edamid);
        while(!ajListIterDone(iter))
        {
            resterm = ajListIterGet(iter);
            ajDebug("%15d: %S | %S\n", ++i,  resterm->Id, resterm->Name);
        }
        ajListIterDel(&iter);
    }
    
    ajDebug("   Edamfmt: %Lu\n", ajListGetLength(thys->Edamfmt));
    if(ajListGetLength(thys->Edamfmt))
    {
        i = 0;
        iter = ajListIterNewread(thys->Edamfmt);
        while(!ajListIterDone(iter))
        {
            resterm = ajListIterGet(iter);
            ajDebug("%15d: %S | %S\n", ++i,  resterm->Id, resterm->Name);
        }
        ajListIterDel(&iter);
    }
    
    ajDebug("      Xref: %Lu\n", ajListGetLength(thys->Xref));
    if(ajListGetLength(thys->Xref))
    {
        i = 0;
        iter = ajListIterNewread(thys->Xref);
        while(!ajListIterDone(iter))
        {
            reslink = ajListIterGet(iter);
            ajDebug("%15d: %S | %S\n", ++i, reslink->Source, reslink->Term);
        }
        ajListIterDel(&iter);
    }
    
    ajDebug("     Query: %Lu\n", ajListGetLength(thys->Query));
    if(ajListGetLength(thys->Query))
    {
        i = 0;
        iter = ajListIterNewread(thys->Query);
        while(!ajListIterDone(iter))
        {
            resqry = ajListIterGet(iter);
            ajDebug("%15d: %S | %S | %S | %S\n",
                    ++i, resqry->Datatype, resqry->Format,
                    resqry->Term, resqry->Url);
        }
        ajListIterDel(&iter);
    }
    
    ajDebug("  Example: %Lu\n", ajListGetLength(thys->Example));
    if(ajListGetLength(thys->Example))
    {
        i = 0;
        iter = ajListIterNewread(thys->Example);
        while(!ajListIterDone(iter))
        {
            tmpstr = ajListIterGet(iter);
            ajDebug("%15d: %S\n", ++i, tmpstr);
        }
        ajListIterDel(&iter);
    }
    
    return;
}
Пример #15
0
static AjBool assemoutWriteNextSam(AjPOutfile outfile, const AjPAssem assem)
{
    AjPFile outf = ajOutfileGetFile(outfile);
    AjPAssemContig c = NULL;
    AjPAssemRead   r = NULL;
    AjPAssemTag    t = NULL;
    AjPAssemContig* contigs = NULL;
    AjIList j = NULL;
    AjPStr argstr = NULL;
    const AjPStr headertext = NULL;
    ajint n = 0;
    ajulong i = 0UL;
    AjBool ret = ajTrue;

    if(!outf || !assem)
	return ajFalse;

    ajDebug("assemoutWriteSam: # of contigs = %d\n", n);

    if(!assem->Hasdata)
    {
        ajFmtPrintF(outf, "@HD\tVN:1.3\tSO:%s\n", ajAssemGetSortorderC(assem));

        /* Program record */
        argstr = ajStrNewS(ajUtilGetCmdline());
        ajStrExchangeKK(&argstr, '\n', ' ');
        ajFmtPrintF(outf, "@PG\tID:%S\tVN:%S\tCL:%S\n",
                    ajUtilGetProgram(), ajNamValueVersion(), argstr);
        ajStrDel(&argstr);


        if(ajListGetLength(assem->ContigsOrder))
            ajListToarray(assem->ContigsOrder, (void***)&contigs);
        else
            ajTableToarrayValues(assem->Contigs, (void***)&contigs);

        while (contigs[i])   /* contigs */
        {
            c = contigs[i++];

            if(!ajStrMatchC(c->Name, "*"))
            {
                ajFmtPrintF(outf, "@SQ\tSN:%S\tLN:%d", c->Name, c->Length);

                if(c->URI)
                    ajFmtPrintF(outf, "\tUR:%S", c->URI);

                if(c->MD5)
                    ajFmtPrintF(outf, "\tM5:%S", c->MD5);

                if(c->Species)
                    ajFmtPrintF(outf, "\tSP:%S", c->Species);

                ajFmtPrintF(outf, "\n");

                j = ajListIterNewread(c->Tags);
                while (!ajListIterDone(j))
                {
                    t = ajListIterGet(j);
                    ajFmtPrintF(outf, "@CO\t%S %u %u %S\n",
                                t->Name, t->x1, t->y1,
                                t->Comment);
                }
                ajListIterDel(&j);
            }
        }

        headertext = assemSAMGetReadgroupHeaderlines(assem);
        if(headertext)
            ajFmtPrintF(outf,"%S", headertext);

        AJFREE(contigs);

        if(!assem->BamHeader)
            return ajTrue;
    }


    /* data */

    j = ajListIterNewread(assem->Reads);
    if(ajListGetLength(assem->ContigsOrder))
        i = ajListToarray(assem->ContigsOrder, (void***)&contigs);
    else
        i = ajTableToarrayValues(assem->Contigs, (void***)&contigs);

    while (!ajListIterDone(j))  /* reads */
    {
	r = ajListIterGet(j);
	assemoutWriteSamAlignment(outf, r, contigs, (ajuint) i);
    }

    ajListIterDel(&j);
    AJFREE(contigs);

    return ret;
}
Пример #16
0
static AjBool assemoutWriteNextMaf(AjPOutfile outfile, const AjPAssem assem)
{
    AjPFile outf = ajOutfileGetFile(outfile);
    AjPAssemContig c = NULL;
    AjPAssemRead   r = NULL;
    AjPAssemTag    t = NULL;
    ajint i = 0;
    ajint nreads = 0;
    AjIList j = NULL;
    AjIList k = NULL;
    AjIList reads = NULL;
    AjPAssemContig* contigs = NULL;

    if(!outf) return ajFalse;
    if(!assem) return ajFalse;

    if(!assem->Hasdata)
    {
        ajTableToarrayValues(assem->Contigs, (void***)&contigs);
        for (;contigs[i];i++)
        {
            c = contigs[i];
            ajFmtPrintF(outf, "CO\t%S\n", c->Name);
            ajFmtPrintF(outf, "NR\t%d\n", c->Nreads);
            ajFmtPrintF(outf, "LC\t%d\n", c->Length);

            j = ajListIterNewread(c->Tags);
            while (!ajListIterDone(j))
            {
                t = ajListIterGet(j);
                ajFmtPrintF(outf, "CT\t%S %u %u %S\n", t->Name, t->x1, t->y1,
                            t->Comment);
            }
            ajListIterDel(&j);

            ajFmtPrintF(outf, "CS\t%S\n", c->Consensus);
            ajFmtPrintF(outf, "CQ\t%S\n", c->ConsensusQ);

            ajFmtPrintF(outf, "\\\\\n");
        }
        AJFREE(contigs);
        
        return ajTrue;
    }
    
    /* data */

    reads = ajListIterNewread(assem->Reads);

    while (!ajListIterDone(reads))
    {
        nreads++;
        r = ajListIterGet(reads);

        if(r->Reference !=i)
            ajErr("different reference/contig number(%d) than expected(%d)"
                  "\nreads were expected to be sorted by contigs");

        ajFmtPrintF(outf, "RD\t%S\n", r->Name);
        ajFmtPrintF(outf, "RS\t%S\n", r->Seq);
        ajFmtPrintF(outf, "RQ\t%S\n", r->SeqQ);
        ajFmtPrintF(outf, "TN\t%S\n", r->Template);

        if(r->Direction)
            ajFmtPrintF(outf, "DI\t%c\n", r->Direction);

        if(r->TemplateSizeMin)
            ajFmtPrintF(outf, "TF\t%d\n", r->TemplateSizeMin);

        if(r->TemplateSizeMax)
            ajFmtPrintF(outf, "TT\t%d\n", r->TemplateSizeMax);

        if(r->File)
            ajFmtPrintF(outf, "SF\t%S\n", r->File);

        if(r->VectorLeft)
            ajFmtPrintF(outf, "SL\t%d\n", r->VectorLeft);

        if(r->VectorRight)
            ajFmtPrintF(outf, "SR\t%d\n", r->VectorRight);

        if(r->QualLeft)
            ajFmtPrintF(outf, "QL\t%d\n", r->QualLeft);

        if(r->QualRight)
            ajFmtPrintF(outf, "QR\t%d\n", r->QualRight);

        if(r->ClipLeft)
            ajFmtPrintF(outf, "SL\t%d\n", r->ClipLeft);

        if(r->ClipRight)
            ajFmtPrintF(outf, "SR\t%d\n", r->ClipRight);

        k = ajListIterNewread(r->Tags);

        while (!ajListIterDone(k))
        {
            t = ajListIterGet(k);

            ajFmtPrintF(outf, "RT\t%S %u %u\n", t->Name,
			t->x1, t->y1);
        }

        ajListIterDel(&k);

        ajFmtPrintF(outf, "ST\t%S\n", r->Technology);
        ajFmtPrintF(outf, "ER\n");
        ajFmtPrintF(outf, "AT\t%u %u %u %u\n", r->x1, r->y1, r->x2, r->y2);
    }

    ajListIterDel(&j);
    ajListIterDel(&reads);

    if(!nreads)
    {
        ajFmtPrintF(outf, "//\n");
        ajFmtPrintF(outf, "EC\n");
    }
    

    return ajTrue;
}
Пример #17
0
static AjBool assemoutWriteBamAlignment(AjPSeqBamBgzf gzfile,
					const AjPAssemRead r,
					AjPSeqBam bam)
{
    AjPSeqBamCore c;
    AjPAssemTag tag;
    unsigned char *dpos;
    const char *s;
    ajuint ilen;
    ajuint slen;
    ajuint i;
    AjIList l = NULL;

    /* optional fields */
    ajint tagvalsize = 0;
    const unsigned char* tagval = 0;
    ajint intval =0;

    /* processing cigar strings*/
    char *t;
    int op;
    long x;


    unsigned char bam_nt16_table[256] =
    {
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     1, 2, 4, 8, 15,15,15,15, 15,15,15,15, 15, 0 /*=*/,15,15,
     15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
     15,15, 5, 6,  8,15, 7, 9, 15,10,15,15, 15,15,15,15,
     15, 1,14, 2, 13,15,15, 4, 11,15,15,12, 15, 3,15,15,
     15,15, 5, 6,  8,15, 7, 9, 15,10,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15,
     15,15,15,15, 15,15,15,15, 15,15,15,15, 15,15,15,15
    };

    /* bam_write1 for each alignment */

    c = &bam->core;

    ilen = ajStrGetLen(r->Seq);

    c->tid = (int) r->Reference;

    if(r->Flag & BAM_FREVERSE)
	c->pos = r->y1-1;
    else
	c->pos = r->x1-1; /* BAM format is zero based;
                             -1 is translated to 0, meaning unmapped */
    c->bin = 0;
    c->qual = r->MapQ;
    c->l_qname = 1 + ajStrGetLen(r->Name);
    c->flag = r->Flag;
    c->n_cigar = 0;
    c->l_qseq = ilen;
    c->mtid = (int) r->Rnext;
    c->mpos = (int) r->Pnext-1;
    c->isize = r->Tlen;


    /* get cigar string length */
    s = ajStrGetPtr(r->Cigar);
    if (strcmp(s,"*")) /* '*' means unavailable  */
    {
	for (; *s; ++s)
	{
	  if ((isalpha((int)*s)) || (*s=='='))
		++c->n_cigar;
	    else
	      if (!isdigit((int)*s))
		    ajWarn("invalid CIGAR character: %c\n", *s);
	}
    }


    bam->data_len = c->n_cigar*4 + c->l_qname +
        (ilen + 1)/2 + ilen;

    /* allocation for optional tags are made as they are appended */

    if(bam->data_len > bam->m_data)
    {
        AJCRESIZE0(bam->data,bam->m_data, bam->data_len);
        bam->m_data = bam->data_len;
    }

    dpos = bam->data;

    /* copy query name to bam->data */
    memcpy(dpos, ajStrGetPtr(r->Name), c->l_qname);
    dpos += c->l_qname;

    /* copy cigar string to bam->data */
    s = ajStrGetPtr(r->Cigar);
    for (i = 0; i != c->n_cigar; ++i)
    {
	x = strtol(s, &t, 10);
	op = toupper((int)*t);
	if (op == 'M') op = BAM_CMATCH;
	else if (op == 'I') op = BAM_CINS;
	else if (op == 'D') op = BAM_CDEL;
	else if (op == 'N') op = BAM_CREF_SKIP;
	else if (op == 'S') op = BAM_CSOFT_CLIP;
	else if (op == 'H') op = BAM_CHARD_CLIP;
	else if (op == 'P') op = BAM_CPAD;
	else if (op == '=') op = BAM_CEQUAL;
	else if (op == 'X') op = BAM_CDIFF;
	else ajWarn("invalid CIGAR operation: %c",op);
	s = t + 1;
	((ajuint*)dpos)[i] = x << BAM_CIGAR_SHIFT | op;
    }

    if (*s && c->n_cigar)
	ajWarn("unmatched CIGAR operation: %c", *s);

    c->bin = ajSeqBamReg2bin(c->pos, ajSeqBamCalend(c, MAJSEQBAMCIGAR(bam)));

    dpos += c->n_cigar*4;


    /* copy sequence string to bam->data */
    s = ajStrGetPtr(r->Seq);
    slen = (ilen+1)/2;
    for (i = 0; i < slen; ++i)
        dpos[i] = 0;
    for (i = 0; i < ilen; ++i)
        dpos[i/2] |= bam_nt16_table[(ajuint)s[i]] << 4*(1-i%2);
    dpos += slen;

    /* copy quality values to bam->data */
    if(r->SeqQ && !ajStrMatchC(r->SeqQ, "*"))
    {
	s = ajStrGetPtr(r->SeqQ);

	for(i=0;i<ilen;i++)
	    dpos[i]= s[i]-33;

    }
    else
	for(i=0;i<ilen;i++)
	    dpos[i]= 0xff;



    l = ajListIterNewread(r->Tags);
    bam->l_aux=0;
    while (!ajListIterDone(l))
    {

	tag = ajListIterGet(l);

	/* TODO: array type 'B' and other types */

	if(tag->type == 'i' || tag->type == 'I')
	{
	    tagvalsize = 4;
	    ajStrToInt(tag->Comment, &intval);
	    tagval = (unsigned char*)&intval;
	}
	else if(tag->type =='s' || tag->type =='S')
	{
	    tagvalsize = 2;
	    ajStrToInt(tag->Comment, &intval);
	    tagval = (unsigned char*)&intval;
	}
	else if(tag->type =='c' || tag->type =='C')
	{
	    tagvalsize = 1;
	    ajStrToInt(tag->Comment, &intval);
	    tagval = (unsigned char*)&intval;
	}
	else if(tag->type =='A')
	{
	    tagvalsize = 1;
	    tagval = (const unsigned char*)ajStrGetPtr(tag->Comment);
	}
	else if(tag->type =='Z')
	{
	    tagvalsize = ajStrGetLen(tag->Comment)+1;
	    tagval = (const unsigned char*)ajStrGetPtr(tag->Comment);
	}
	else
	{
	    ajWarn("tag type '%c' not yet supported",tag->type);
	    continue;
	}
	ajSeqBamAuxAppend(bam,
		ajStrGetPtr(tag->Name),
		tag->type,
		tagvalsize,
		tagval);


    }
    ajListIterDel(&l);



    ajSeqBamWrite(gzfile, bam);

    return ajTrue;
}
Пример #18
0
void ajResourceDel(AjPResource *Presource)
{
    AjPResource thys;
    AjPReslink  reslink;
    AjPResquery resqry;
    AjPResterm  resterm;
    AjIList iter;

    if(!Presource) return;
    if(!(*Presource)) return;

    thys = *Presource;

    ajStrDel(&thys->Id);
    ajListstrFreeData(&thys->Idalt);
    ajStrDel(&thys->Acc);
    ajStrDel(&thys->Name);
    ajStrDel(&thys->Desc);
    ajStrDel(&thys->Url);
    ajStrDel(&thys->Urllink);
    ajStrDel(&thys->Urlrest);
    ajStrDel(&thys->Urlsoap);
    ajListstrFreeData(&thys->Cat);

    iter = ajListIterNewread(thys->Taxon);
    while(!ajListIterDone(iter))
    {
        resterm = ajListIterGet(iter);
        ajRestermDel(&resterm);
    }
    ajListIterDel(&iter);
    ajListFree(&thys->Taxon);

    iter = ajListIterNewread(thys->Edamdat);
    while(!ajListIterDone(iter))
    {
        resterm = ajListIterGet(iter);
        ajRestermDel(&resterm);
    }
    ajListIterDel(&iter);
    ajListFree(&thys->Edamdat);

    iter = ajListIterNewread(thys->Edamfmt);
    while(!ajListIterDone(iter))
    {
        resterm = ajListIterGet(iter);
        ajRestermDel(&resterm);
    }
    ajListIterDel(&iter);
    ajListFree(&thys->Edamfmt);

    iter = ajListIterNewread(thys->Edamid);
    while(!ajListIterDone(iter))
    {
        resterm = ajListIterGet(iter);
        ajRestermDel(&resterm);
    }
    ajListIterDel(&iter);
    ajListFree(&thys->Edamid);

    iter = ajListIterNewread(thys->Edamtpc);
    while(!ajListIterDone(iter))
    {
        resterm = ajListIterGet(iter);
        ajRestermDel(&resterm);
    }
    ajListIterDel(&iter);
    ajListFree(&thys->Edamtpc);

    iter = ajListIterNewread(thys->Xref);
    while(!ajListIterDone(iter))
    {
        reslink = ajListIterGet(iter);
        ajReslinkDel(&reslink);
    }
    ajListIterDel(&iter);
    ajListFree(&thys->Xref);

    iter = ajListIterNewread(thys->Query);
    while(!ajListIterDone(iter))
    {
        resqry = ajListIterGet(iter);
        ajResqueryDel(&resqry);
    }
    ajListIterDel(&iter);
    ajListFree(&thys->Query);

    ajListstrFreeData(&thys->Example);
    ajStrDel(&thys->Db);
    ajStrDel(&thys->Setdb);
    ajStrDel(&thys->Full);
    ajStrDel(&thys->Qry);
    ajStrDel(&thys->Formatstr);
    ajStrDel(&thys->Filename);

    ajStrDel(&thys->TextPtr);

    AJFREE(*Presource);
    *Presource = NULL;

    return;
}
Пример #19
0
int main(int argc, char **argv)
{
    AjPSeqset seqset;
    AjPSeqall seqall;
    AjPSeq queryseq;
    const AjPSeq targetseq;
    ajint wordlen;
    AjPTable wordsTable = NULL;
    AjPList* matchlist = NULL;
    AjPFile logfile;
    AjPFeattable* seqsetftables = NULL;
    AjPFeattable seqallseqftable = NULL;
    AjPFeattabOut ftoutforseqsetseq = NULL;
    AjPFeattabOut ftoutforseqallseq = NULL;
    AjPAlign align = NULL;
    AjIList iter = NULL;
    ajint targetstart;
    ajint querystart;
    ajint len;
    ajuint i, j;
    ajulong nAllMatches = 0;
    ajulong sumAllScore = 0;
    AjBool dumpAlign = ajTrue;
    AjBool dumpFeature = ajTrue;
    AjBool checkmode = ajFalse;
    EmbPWordRK* wordsw = NULL;
    ajuint npatterns = 0;
    ajuint seqsetsize;
    ajuint nmatches;
    ajuint* nmatchesseqset;
    ajuint* lastlocation; /* Cursors for Rabin-Karp search. */
                          /* Shows until what point the query sequence was
                           *  scanned for a pattern sequences in the seqset.
                          */
    char* paddedheader = NULL;
    const char* header;
    AjPStr padding;

    header = "Pattern %S  #pat-sequences  #all-matches  avg-match-length\n";
    padding = ajStrNew();

    embInit("wordmatch", argc, argv);

    wordlen = ajAcdGetInt("wordsize");
    seqset  = ajAcdGetSeqset("asequence");
    seqall  = ajAcdGetSeqall("bsequence");
    logfile = ajAcdGetOutfile("logfile");
    dumpAlign = ajAcdGetToggle("dumpalign");
    dumpFeature = ajAcdGetToggle("dumpfeat");

    if(dumpAlign)
    {
        align = ajAcdGetAlign("outfile");
        ajAlignSetExternal(align, ajTrue);
    }

    seqsetsize = ajSeqsetGetSize(seqset);
    ajSeqsetTrim(seqset);
    AJCNEW0(matchlist, seqsetsize);
    AJCNEW0(seqsetftables, seqsetsize);
    AJCNEW0(nmatchesseqset, seqsetsize);

    if (dumpFeature)
    {
        ftoutforseqsetseq =  ajAcdGetFeatout("aoutfeat");
        ftoutforseqallseq =  ajAcdGetFeatout("boutfeat");
    }

    checkmode = !dumpFeature && !dumpAlign;
    embWordLength(wordlen);

    ajFmtPrintF(logfile, "Small sequence/file for constructing"
	    " target patterns: %S\n", ajSeqsetGetUsa(seqset));
    ajFmtPrintF(logfile, "Large sequence/file to be scanned"
	    " for patterns: %S\n", ajSeqallGetUsa(seqall));
    ajFmtPrintF(logfile, "Number of sequences in the patterns file: %u\n",
            seqsetsize);
    ajFmtPrintF(logfile, "Pattern/word length: %u\n", wordlen);

    for(i=0;i<seqsetsize;i++)
    {
        targetseq = ajSeqsetGetseqSeq(seqset, i);
        embWordGetTable(&wordsTable, targetseq);
    }

    AJCNEW0(lastlocation, seqsetsize);

    if(ajTableGetLength(wordsTable)>0)
    {
        npatterns = embWordRabinKarpInit(wordsTable,
                                       &wordsw, wordlen, seqset);
        ajFmtPrintF(logfile, "Number of patterns/words found: %u\n", npatterns);

        while(ajSeqallNext(seqall,&queryseq))
        {
            for(i=0;i<seqsetsize;i++)
            {
                lastlocation[i]=0;

                if (!checkmode)
                    matchlist[i] = ajListstrNew();
            }

            nmatches = embWordRabinKarpSearch(
                    ajSeqGetSeqS(queryseq), seqset,
                    (EmbPWordRK const *)wordsw, wordlen, npatterns,
                    matchlist, lastlocation, checkmode);
            nAllMatches += nmatches;

            if (checkmode)
        	continue;

            for(i=0;i<seqsetsize;i++)
            {
                if(ajListGetLength(matchlist[i])>0)
                {
                    iter = ajListIterNewread(matchlist[i]) ;

                    while(embWordMatchIter(iter, &targetstart, &querystart, &len,
                            &targetseq))
                    {
                        if(dumpAlign)
                        {
                            ajAlignDefineSS(align, targetseq, queryseq);
                            ajAlignSetScoreI(align, len);
                            /* ungapped alignment means same length
                             *  for both sequences
                            */
                            ajAlignSetSubRange(align, targetstart, 1, len,
                                    ajSeqIsReversed(targetseq),
                                    ajSeqGetLen(targetseq),
                                    querystart, 1, len,
                                    ajSeqIsReversed(queryseq),
                                    ajSeqGetLen(queryseq));
                        }
                    }

                    if(dumpAlign)
                    {
                	ajAlignWrite(align);
                	ajAlignReset(align);
                    }

                    if(ajListGetLength(matchlist[i])>0 && dumpFeature)
                    {
                        embWordMatchListConvToFeat(matchlist[i],
                                                   &seqsetftables[i],
                                                   &seqallseqftable,
                                                   targetseq, queryseq);
                        ajFeattableWrite(ftoutforseqallseq, seqallseqftable);
                        ajFeattableDel(&seqallseqftable);
                    }

                    ajListIterDel(&iter);
                }

                embWordMatchListDelete(&matchlist[i]);
            }
        }

        /* search completed, now report statistics */
        for(i=0;i<npatterns;i++)
        {
            sumAllScore += wordsw[i]->lenMatches;

            for(j=0;j<wordsw[i]->nseqs;j++)
        	nmatchesseqset[wordsw[i]->seqindxs[j]] +=
        		wordsw[i]->nSeqMatches[j];
        }

        ajFmtPrintF(logfile, "Number of sequences in the file scanned "
                "for patterns: %u\n", ajSeqallGetCount(seqall));
        ajFmtPrintF(logfile, "Number of all matches: %Lu"
                " (wordmatch finds exact matches only)\n", nAllMatches);

        if(nAllMatches>0)
        {
            ajFmtPrintF(logfile, "Sum of match lengths: %Lu\n", sumAllScore);
            ajFmtPrintF(logfile, "Average match length: %.2f\n",
        	    sumAllScore*1.0/nAllMatches);

            ajFmtPrintF(logfile, "\nDistribution of the matches among pattern"
        	    " sequences:\n");
            ajFmtPrintF(logfile, "-----------------------------------------"
        	    "-----------\n");

            for(i=0;i<ajSeqsetGetSize(seqset);i++)
            {
        	if (nmatchesseqset[i]>0)
        	    ajFmtPrintF(logfile, "%-42s: %8u\n",
        	                ajSeqGetNameC(ajSeqsetGetseqSeq(seqset, i)),
        	                nmatchesseqset[i]);

        	ajFeattableWrite(ftoutforseqsetseq, seqsetftables[i]);
        	ajFeattableDel(&seqsetftables[i]);
            }

            ajFmtPrintF(logfile, "\nPattern statistics:\n");
            ajFmtPrintF(logfile, "-------------------\n");
            if(wordlen>7)
        	ajStrAppendCountK(&padding, ' ', wordlen-7);
            paddedheader = ajFmtString(header,padding);
            ajFmtPrintF(logfile, paddedheader);

            for(i=0;i<npatterns;i++)
        	if (wordsw[i]->nMatches>0)
        	    ajFmtPrintF(logfile, "%-7s: %12u  %12u %17.2f\n",
        	                wordsw[i]->word->fword, wordsw[i]->nseqs,
        	                wordsw[i]->nMatches,
        	                wordsw[i]->lenMatches*1.0/wordsw[i]->nMatches);
        }

    }

    for(i=0;i<npatterns;i++)
    {
        for(j=0;j<wordsw[i]->nseqs;j++)
            AJFREE(wordsw[i]->locs[j]);

        AJFREE(wordsw[i]->locs);
        AJFREE(wordsw[i]->seqindxs);
        AJFREE(wordsw[i]->nnseqlocs);
        AJFREE(wordsw[i]->nSeqMatches);
        AJFREE(wordsw[i]);
    }

    embWordFreeTable(&wordsTable);

    AJFREE(wordsw);
    AJFREE(matchlist);
    AJFREE(lastlocation);
    AJFREE(nmatchesseqset);
    AJFREE(seqsetftables);

    if(dumpAlign)
    {
        ajAlignClose(align);
        ajAlignDel(&align);
    }

    if(dumpFeature)
    {
        ajFeattabOutDel(&ftoutforseqsetseq);
        ajFeattabOutDel(&ftoutforseqallseq);
    }

    ajFileClose(&logfile);

    ajSeqallDel(&seqall);
    ajSeqsetDel(&seqset);
    ajSeqDel(&queryseq);
    ajStrDel(&padding);
    AJFREE(paddedheader);

    embExit();

    return 0;
}
Пример #20
0
AjBool ajResourceGetDbdata(const AjPResource resource, AjPQuery qry,
                           AjBool findformat(const AjPStr format,
                                             ajint *iformat))
{
    AjIList iter;
    AjPResquery resqry = NULL;
    ajint format;
    AjBool ret = ajFalse;

    if(!resource)
        return ajFalse;

    qry->InDrcat = ajTrue;

    ajStrAssignC(&qry->DbType, ajNamQueryGetDatatypeC(qry));

    ajDebug("ajResourceGetDbdata dbtype %S %d\n", qry->DbType, qry->DataType);

    if(qry->DataType == AJDATATYPE_URL)
    {
        ajStrAssignC(&qry->Method, "urlonly");
        qry->QueryType = AJQUERY_ENTRY;
        qry->HasAcc = ajFalse;

        iter = ajListIterNewread(resource->Query);
        while(!ajListIterDone(iter)) 
        {
            resqry = ajListIterGet(iter);

            ajDebug("ajResourceGetDbdata test fmt: '%S' edam: '%S'\n",
                    resqry->Format, resqry->FormatTerm);

            if(ajStrMatchC(resqry->FormatTerm, "2331"))
            {
                ajDebug("     OK fmt: '%S' edam: '%S' url '%S'\n",
                        resqry->Format, resqry->FormatTerm, resqry->Url);
                ajStrAssignS(&qry->Formatstr, resqry->FormatTerm);
                ajStrAssignS(&qry->DbUrl, resqry->Url);
                ret = ajTrue;
            }
        }
        ajListIterDel(&iter);
    }
    else 
    {
        ajStrAssignC(&qry->Method, "url");
        qry->QueryType = AJQUERY_ENTRY;
        qry->HasAcc = ajFalse;

        iter = ajListIterNewread(resource->Query);
        while(!ajListIterDone(iter)) 
        {
            resqry = ajListIterGet(iter);

            ajDebug("ajResourceGetDbdata test fmt: '%S' edam: '%S'\n",
                    resqry->Format, resqry->FormatTerm);

            if(findformat(resqry->Format, &format))
            {
                ajDebug("     OK fmt: '%S' url '%S'\n",
                        resqry->Format, resqry->Url);
                ajStrAssignS(&qry->Formatstr, resqry->Format);
                ajStrAssignS(&qry->DbUrl, resqry->Url);
                ret = ajTrue;
            }
            if(ajStrGetLen(resqry->FormatTerm) &&
               findformat(resqry->FormatTerm, &format))
            {
                ajDebug("     OK edam: '%S' url '%S'\n",
                        resqry->FormatTerm, resqry->Url);
                ajStrAssignS(&qry->Formatstr, resqry->FormatTerm);
                ajStrAssignS(&qry->DbUrl, resqry->Url);
                ret = ajTrue;
            }
        }
        ajListIterDel(&iter);
    }

    return ret;
}
Пример #21
0
int main(ajint argc, char **argv)
{
    AjPList  ccfin        = NULL;  /* List of CCF (input) files.             */

    AjPDir   pdbin        = NULL;  /* Path of pdb input files.               */
    AjPStr   pdbprefix    = NULL;  /* Prefix of pdb input files.             */
    AjPStr   pdb_name     = NULL;  /* Full name (path/name/extension) of 
					 pdb format input file.              */

    AjPDirout ccfout     = NULL;   /* Path of coordinate output file.        */
    AjPStr   randomname  = NULL;   /* Name for temp file tempf.              */
    AjPStr   ccf_this    = NULL; 
    AjPStr   exec        = NULL; 
    AjPStr   naccess_str = NULL; 
    AjPStr   line        = NULL;
    AjPStr   syscmd      = NULL;   /* Command line arguments.                */
    AjPStr  *mode        = NULL;   /* Mode of operation from acd.            */

    AjPFile  errf        = NULL;   /* pdbplus error file pointer.            */
    AjPFile  serrf       = NULL;   /* stride error file pointer.             */
    AjPFile  nerrf       = NULL;   /* stride error file pointer.             */
    AjPFile  tempf       = NULL;   /* Temp file for holding STRIDE output.   */
    AjPFile  ccf_inf     = NULL;   /* Protein coordinate input file.         */
    AjPFile  ccf_outf    = NULL;   /* Protein coordinate output file.        */

    AjIList  iter        = NULL; 

    AjBool   done_naccess= ajFalse;
    AjBool   done_stride = ajFalse;
    AjBool   found       = ajFalse;
    AjPResidue temp_res  = NULL;  /* Pointer to Residue object.                */
    AjPPdb   pdb_old     = NULL;  /* Pointer to PDB object - without new
				     stride elements.                       */
    AjPPdb   pdb         = NULL;  /* Pointer to PDB object.                 */
    ajint    idn         = 0;     /* Chain identifier as a number (1,2,...) */
    ajint    chain_num   = 0;     /* Chain identifier index (0,1,...).      */
    ajint    tS          = 0;     /* User-defined threshold size for SSEs.  */
    ajint    nostride    = 0;     /* No. times stride failed                */
    ajint    nonaccess   = 0;     /* No. times naccess failed               */
    ajint    nofile      = 0;     /* No. times of file error                */

    /* Variables for each item that will be parsed from the ASG line. */
    AjPStr   res      = NULL;  /* Residue id from STRIDE ASG line (ALA etc). */
    AjPStr   res_num  = NULL;  /* PDB residue number from STRIDE ASG line.   */
    char     pcid     = ' ';   /* Protein chain identifier from STRIDE or 
				  NACESS output (A,B, etc).                  */
    char     ss       = ' ';   /* One-letter secondary structure code from 
				  STRIDE ASG line.                           */
    float    ph       = 0.0;   /* Phi angle from STRIDE ASG line.            */
    float    ps       = 0.0;   /* Psi angle from STRIDE ASG line.            */
    float    sa       = 0.0;   /* Residue solvent accessible area from STRIDE 
				  ASG line.                                  */
    float    f1       = 0;
    float    f2       = 0;
    float    f3       = 0;
    float    f4       = 0;
    float    f5       = 0;
    float    f6       = 0;
    float    f7       = 0;
    float    f8       = 0;
    float    f9       = 0;
    float    f10      = 0;





    /* Allocate strings; this section is used for variables that are 
       allocated once only. */
    pdb_name       = ajStrNew();
    res            = ajStrNew();
    res_num        = ajStrNew();
    randomname     = ajStrNew();
    syscmd         = ajStrNew();
    line           = ajStrNew();  
    naccess_str    = ajStrNew();
    exec           = ajStrNew();





    /* Read data from acd. */
    embInitPV("pdbplus",argc,argv,"STRUCTURE",VERSION);

    ccfin        = ajAcdGetDirlist("ccfinpath");  
    pdbin        = ajAcdGetDirectory("pdbindir"); 
    pdbprefix    = ajAcdGetString("pdbprefix");
    ccfout       = ajAcdGetOutdir("ccfoutdir");
    mode         = ajAcdGetList("mode");
    errf         = ajAcdGetOutfile("logfile");
    if(ajStrGetCharFirst(*mode) != '2')
	serrf    = ajAcdGetOutfile("slogfile");
    if(ajStrGetCharFirst(*mode) != '1')
	nerrf    = ajAcdGetOutfile("nlogfile");
    tS           = ajAcdGetInt("thresholdsize");
 

    


    
    ajRandomSeed();
    ajFilenameSetTempname(&randomname); 




    
    /* 
     **  Start of main application loop. 
     **  Process each PDB/ protein coordinate file (EMBL format) in turn. 
     */ 
    
    while(ajListPop(ccfin,(void **)&ccf_this))
    {
        /* Open protein coordinate file.  If it cannot be opened, write a 
           message to the error file, delete ccf_this and continue. */

        if((ccf_inf = ajFileNewInNameS(ccf_this)) == NULL)   
	{
	    ajWarn("%s%S\n//\n", 
		   "clean coordinate file not found: ", ccf_this);
	    
	    ajFmtPrintF(errf, "%s%S\n//\n", 
                        "clean coordinate file not found: ", ccf_this); 
            ajStrDel(&ccf_this); 
	    nofile++;
	    continue; 
        }       

        ajFmtPrint("Processing %S\n", ccf_this);
	fflush(stdout);

        /* Parse protein coordinate data (from clean format file) into 
	   AjPPdb object.  ajPdbReadAllModelsNew will create the AjPPdb object. */
      if(!(pdb_old=ajPdbReadAllModelsNew(ccf_inf)))
        {
	    ajWarn("ERROR Clean coordinate file read" 
		   "error: %S\n//\n", ccf_this);
            ajFmtPrintF(errf, "ERROR Clean coordinate file read" 
			"error: %S\n//\n", ccf_this);
            ajFileClose(&ccf_inf);
            ajStrDel(&ccf_this); 
	    nofile++;
            continue;
        }

        ajFileClose(&ccf_inf);
        ajPdbCopy(&pdb, pdb_old); 
        ajPdbDel(&pdb_old); 

        /* Construct name of corresponding PDB file.
	    NACCESS does *not* generate an output file if the path is './' e.g. 
	    naccess ./1rbp.ent , therefore replace './' with null. */
	ajStrAssignS(&pdb_name, ajDirGetPath(pdbin));
	if(ajStrMatchC(pdb_name, "./") || ajStrMatchC(pdb_name, "."))
	    ajStrAssignC(&pdb_name, "");
	
        ajStrAppendS(&pdb_name, pdbprefix);
	ajStrFmtLower(&pdb->Pdb);
        ajStrAppendS(&pdb_name, pdb->Pdb);
        ajStrAppendC(&pdb_name, ".");
	ajStrAppendS(&pdb_name, ajDirGetExt(pdbin));
	

        /* Check corresponding PDB file exists for reading using ajFileStat. */
	if(!(ajFilenameExistsRead(pdb_name)))
        {
            ajFmtPrintF(errf, "%s%S\n//\n", "PDB file not found: ", pdb_name);
            ajWarn("%s%S\n//\n", "PDB file not found: ", pdb_name);
            ajStrDel(&ccf_this); 
            ajPdbDel(&pdb);
	    nofile++;
            continue;
        }
        
	if(ajStrGetCharFirst(*mode) != '2')
        {        
	    /* 
	     **  Create a string containing the STRIDE command line (it needs
	     **  PDB file name & name of temp output file).
	     **  Call STRIDE by using ajSystem.
	     */
	    
	    ajFmtPrintS(&syscmd, "%S %S -f%S >> %s 2>&1",  
			ajAcdGetpathC("stride"),
                        pdb_name, randomname, ajFileGetNameC(serrf));
	    ajFmtPrint("%S %S -f%S >> %s 2>&1\n",  
		       ajAcdGetpathC("stride"),
                       pdb_name, randomname,ajFileGetNameC(serrf));
	    system(ajStrGetPtr(syscmd));  

	    
	    /* Open the stride output file */
	    if (((tempf = ajFileNewInNameS(randomname)) == NULL))
	    {
		ajWarn("%s%S\n//\n", 
		       "no stride output for: ", pdb_name); 
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "no stride output for: ", pdb_name); 
		nostride++;
		ajStrDel(&ccf_this);
		ajPdbDel(&pdb); 
		continue; 
	    } 
	    else
	      ajFmtPrintF(errf, "%s%S\n//\n", 
			  "stride output for: ", pdb_name); 

	    
	    done_stride = ajFalse;

	    /* Parse STRIDE output from temp output file a line at a time. */
	    while(ajReadlineTrim(tempf,&line))
	    {       
		if(ajStrPrefixC(line,"ASG"))    
		{
		    ajFmtScanS(line, "%*S %S  %c %S %*d %c %*S %f %f %f %*S", 
			       &res, &pcid, &res_num, &ss, &ph, &ps, &sa);
                
		    /* 
		     **  Populate pdbplus object with the data from this parsed
		     **  line. This means first identifying the chain, then 
		     **  finding the residue. 
		     */
                
		    /* Determine the chain number. ajDmxPdbplusChain does not 
		       recognise '-', so change '-' to '.'  */
		    if (pcid == '-')
			pcid = '.'; 

		    /* Get chain number from the chain identifier. */
		    if(!ajPdbChnidToNum(pcid, pdb, &idn)) 
		    {
			ajWarn("Could not convert chain id %c to chain"
			       " number in pdb file %S\n//\n", 
			       pcid, pdb_name);
			ajFmtPrintF(errf, "Could not convert chain id %c "
				    "to chain number in pdb file %S\n//\n", 
				    pcid, pdb_name);
			continue;
		    }
                    
		    /* 
		     **  The chain number that will get written starts at 1, but
		     **  we want an index into an array which must start at 0, 
		     **  so subtract 1 from the chain number to get the index. 
		     */
		    chain_num = idn-1; 
                  
		    /* 
		     **   Iiterate through the list of residues in the Pdb object,
		     **   found switches to true when first residue corresponding 
		     **   to the line is found. 
		     */

		    /* iter = ajListIterNewread(pdb->Chains[chain_num]->Atoms); */
		    iter = ajListIterNewread(pdb->Chains[chain_num]->Residues);
		    found = ajFalse; 

		    while((temp_res = (AjPResidue)ajListIterGet(iter)))
		    {
		        /* If we have found the residue we want */
			if((ajStrMatchS(res_num, temp_res->Pdb) && 
			    ajStrMatchS(res, temp_res->Id3)))
			{
                       	    done_stride = ajTrue;
			    found = ajTrue;
			    temp_res->eStrideType = ss;
			    temp_res->Phi  = ph;
			    temp_res->Psi  = ps;
			    temp_res->Area = sa;
			}                 
			/* If the matching residue has been processed
			   move on to next ASG line, next residue. */
			else if(found == ajTrue) 
			    break;	
			else 
			/* Matching residue not found yet. */       
			    continue;	
		    }
		    ajListIterDel(&iter);
		} /* End of if ASG loop. */ 
	    } /* End of while line loop. */
	    

	    if(done_stride)
	      ajFmtPrintF(errf, "%s%S\n//\n", 
			  "stride data for: ", pdb_name); 
	    else
	      {
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "no stride data for: ", pdb_name); 
		ajWarn("%s%S\n//\n", "no stride data for: ", pdb_name);
		nostride++;
	      }


	    /* Close STRIDE temp file. & tidy up. */
	    ajFileClose(&tempf);

	    /* Remove temporary file (stride output file). */
	    ajFmtPrintS(&exec, "rm %S", randomname); 
	    ajSysSystem(exec); 
	    
	    /* 
	     **  Calculate element serial numbers (eStrideNum)& ammend residue
	     **  objects, count no's of elements and ammend chain object 
	     **  (numHelices, num Strands). 
	     */
	    pdbplus_sort(pdb, tS);
	}
	

	if(ajStrGetCharFirst(*mode) != '1')
        {        
	    /* 
	     **   Create a string containing the NACCESS command line (it needs
	     **   PDB file name & name of temp output file) & call NACCESS.
	     **   If e.g. /data/structure/pdbfred.ent was parsed and the program
	     **   was run from /stuff, then /stuff/fred.asa and /stuff/fred.rsa
	     **   would be written.  These must be deleted once parsed (only
	     **   use the .rsa file here). 
	     */
	    
	    ajFmtPrintS(&syscmd, "%S %S  >> %s 2>&1",  
			ajAcdGetpathC("naccess"), pdb_name, 
			ajFileGetNameC(nerrf));
	    ajFmtPrint("%S %S  >> %s 2>&1\n",  
		       ajAcdGetpathC("naccess"), pdb_name, 
		       ajFileGetNameC(nerrf));
	    system(ajStrGetPtr(syscmd));  


	    
	    ajStrAssignS(&naccess_str, pdbprefix);
	    ajStrAppendS(&naccess_str, pdb->Pdb);
	    ajStrAppendC(&naccess_str, ".rsa");
	    
	    /* Open the NACCESS output file. */
	    if (((tempf = ajFileNewInNameS(naccess_str)) == NULL))
	    {
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "no naccess output for: ", pdb_name); 
		ajWarn("%s%S\n//\n", "no naccess output for: ", pdb_name);
		nonaccess++;
		ajStrDel(&ccf_this);
		ajPdbDel(&pdb); 
		continue; 
	    }	 
	    else
	      ajFmtPrintF(errf, "%s%S\n//\n", 
			  "naccess output for: ", pdb_name); 


	    done_naccess = ajFalse;
	    /* Parse NACCESS output from temp output file a line at a time. */	    
	    while(ajReadlineTrim(tempf,&line))
	    {       
		if(ajStrPrefixC(line,"RES"))    
		{
		    /* Read data from lines. */
		    if((pcid = line->Ptr[8]) == ' ')
		      ajFmtScanS(line, "%*S %S %S %f %f %f "
				 "%f %f %f %f %f %f %f", 
				 &res, &res_num, &f1, &f2, &f3, &f4, &f5, 
				 &f6, &f7, &f8, &f9, &f10);
		    else
		      ajFmtScanS(line, "%*S %S %*c %S %f %f "
				 "%f %f %f %f %f %f %f %f", 
				 &res, &res_num, &f1, &f2, &f3, &f4, &f5, 
				 &f6, &f7, &f8, &f9, &f10);

		    /* Identify the chain, then finding all the residues 
		       corresponding to the residue. */
                
		    /* Get the chain number from the chain identifier. */
		    if(!ajPdbChnidToNum(pcid, pdb, &idn))
		    {
                        ajWarn("Could not convert chain id %c to chain"
				    " number in pdb file %S\n//\n", 
			       pcid, pdb_name);	
			ajFmtPrintF(errf, "Could not convert chain id"
				    " %c to chain number in pdb file %S\n//\n",
				    pcid, pdb_name);
			continue;
		    }
                    

                  
		    /* 
		     **  Chain number will start at 1, but we want an index 
		     **  into an array which must start at 0, so subtract 1 
		     **  from the chain number to get the index.
		     */
		    chain_num = idn-1; 



		    /* 
		     **   Iiterate through the list of residues in the Pdb object,
		     **   temp_res is an AjPResidue used to point to the current
		     **   residue.
		     **   ajBool found switches to true when first residue 
		     **   corresponding to the line is found. 
		     */
		    iter = ajListIterNewread(pdb->Chains[chain_num]->Residues);

		    found = ajFalse; 
		    while((temp_res = (AjPResidue)ajListIterGet(iter)))
		    {
			/* If we have found the residue we want, write the residue 
			   object. */
			if((ajStrMatchS(res_num, temp_res->Pdb) && 
			    ajStrMatchS(res, temp_res->Id3)))
                        {
			    found = ajTrue;
			    done_naccess = ajTrue;
			    temp_res->all_abs  = f1;
			    temp_res->all_rel  = f2;
			    temp_res->side_abs = f3;
			    temp_res->side_rel = f4;
			    temp_res->main_abs = f5;
			    temp_res->main_rel = f6;
			    temp_res->npol_abs = f7;
			    temp_res->npol_rel = f8;
			    temp_res->pol_abs  = f9;
			    temp_res->pol_rel  = f10;

			}      
			/* If the matching residues have all been processed. 
			   move on to next ASG line, next residue. */
			else if(found == ajTrue) 
			    break;	
			else 
			    /* Matching residues not found yet, move on to next 
			       residue. */
			    continue;	 
		    }
		    ajListIterDel(&iter);
		} 
	    } 
	    
	    if(done_naccess)
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "naccess data for: ", pdb_name); 
	    else
	    {
		ajFmtPrintF(errf, "%s%S\n//\n", 
			    "no naccess data for: ", pdb_name); 
		ajWarn("%s%S\n//\n", "no naccess data for: ", pdb_name);
		nonaccess++;
	    }

	    /* Remove temporary file (naccess output files). */
	    ajFileClose(&tempf);
	    
	    ajFmtPrintS(&exec, "rm %S", naccess_str); 
	    ajSysSystem(exec); 

	    ajStrAssignS(&naccess_str, pdbprefix);
	    ajStrAppendS(&naccess_str, pdb->Pdb);
	    ajStrAppendC(&naccess_str, ".asa");
	    ajFmtPrintS(&exec, "rm %S", naccess_str);
	    ajSysSystem(exec); 

	    ajStrAssignS(&naccess_str, pdbprefix);
	    ajStrAppendS(&naccess_str, pdb->Pdb);
	    ajStrAppendC(&naccess_str, ".log");
	    ajFmtPrintS(&exec, "rm %S", naccess_str);
	    ajSysSystem(exec); 
	}

        /* Open CCF (output) file. */
        ccf_outf = ajFileNewOutNameDirS(pdb->Pdb, ccfout);
	
        
        /* Write AjPPdb object to the output file in clean format. */
        if(!ajPdbWriteAll(ccf_outf, pdb))
        {               
	    ajWarn("%s%S\n//\n","Could not write results file for: ", 
                        pdb->Pdb);  
	    
	    ajFmtPrintF(errf,"%s%S\n//\n","Could not write results file for ", 
                        pdb->Pdb);

        }	
	ajFileClose(&ccf_outf);
        ajPdbDel(&pdb);
        ajStrDel(&ccf_this);
    } /* End of main application loop. */


    ajFmtPrint("STRIDE  failures: %d\n", nostride);
    ajFmtPrint("NACCESS failures: %d\n", nonaccess);
    ajFmtPrintF(errf, "\n\nSTRIDE  failures: %d\nNACCESS failures: %d\n",
		nostride, nonaccess);
    

    

    ajListFree(&ccfin);
    ajDirDel(&pdbin);
    ajStrDel(&pdbprefix);
    ajStrDel(&pdb_name);
    ajDiroutDel(&ccfout);
    ajStrDel(&res);
    ajStrDel(&res_num);
    ajStrDel(&randomname);
    ajStrDel(&line);
    ajStrDel(&naccess_str);
    ajStrDel(&exec);
    ajStrDel(&syscmd);
  
    ajFileClose(&errf);
    if(ajStrGetCharFirst(*mode) != '2')
	ajFileClose(&serrf);
    if(ajStrGetCharFirst(*mode) != '1')
	ajFileClose(&nerrf);

    ajStrDel(&mode[0]);
    AJFREE(mode);




    
    ajExit();
    return 0;
} 
Пример #22
0
static void primersearch_store_hits(const Primer primdata,
			       AjPList fhits, AjPList rhits,
			       const AjPSeq seq, AjBool reverse)
{
    ajint amplen = 0;
    AjIList fi;
    AjIList ri;

    PHit primerhit = NULL;

    fi = ajListIterNewread(fhits);
    while(!ajListIterDone(fi))
    {
	EmbPMatMatch fm = NULL;
	EmbPMatMatch rm = NULL;
	amplen = 0;

	fm = ajListIterGet(fi);
	ri = ajListIterNewread(rhits);
	while(!ajListIterDone(ri))
	{
	    ajint seqlen = ajSeqGetLen(seq);
	    ajint s = (fm->start);
	    ajint e;

	    rm = ajListIterGet(ri);
	    e = (rm->start-1);
	    amplen = seqlen-(s-1)-e;

	    if (amplen > 0)	   /* no point making a hit if -ve length! */
	    {
		primerhit = NULL;
		AJNEW(primerhit);
		primerhit->desc=NULL;	 /* must be NULL for ajStrAss */
		primerhit->seqname=NULL; /* must be NULL for ajStrAss */
		primerhit->acc=NULL;
		primerhit->forward=NULL;
		primerhit->reverse=NULL;
		ajStrAssignC(&primerhit->seqname,ajSeqGetNameC(seq));
		ajStrAssignS(&primerhit->desc, ajSeqGetDescS(seq));
		ajStrAssignS(&primerhit->acc, ajSeqGetAccS(seq));
		primerhit->forward_pos = fm->start;
		primerhit->reverse_pos = rm->start;
		primerhit->forward_mismatch = fm->mm;
		primerhit->reverse_mismatch = rm->mm;
		primerhit->amplen = amplen;
		if(!reverse)
		{
		    ajStrAssignS(&primerhit->forward,
				 primdata->forward->patstr);
		    ajStrAssignS(&primerhit->reverse,
				 primdata->reverse->patstr);
		}
		else
		{
		    ajStrAssignS(&primerhit->forward,
				 primdata->reverse->patstr);
		    ajStrAssignS(&primerhit->reverse,
				 primdata->forward->patstr);
		}
		ajListPushAppend(primdata->hitlist, primerhit);


	    }
	}
	/*
	**  clean up rListIter here as it will be new'ed again next
	**  time through
	*/
	ajListIterDel(&ri);
    }

    ajListIterDel(&fi);
    return;
}
Пример #23
0
static AjBool assemoutWriteSamAlignment(AjPFile outf, const AjPAssemRead r,
					AjPAssemContig const * contigs,
					ajint ncontigs)
{
    AjPAssemTag    t = NULL;
    AjIList l = NULL;
    AjPStr qualstr = NULL;
    AjPStr tmp  = NULL;
    ajint  POS  = 0;
    AjPStr CIGAR = NULL;
    const char* RNEXT = NULL;
    AjPStr SEQ  = NULL;
    AjPStr QUAL = NULL;
    AjPStr SEQunpadded  = NULL;
    AjPStr QUALunpadded = NULL;
    AjPStr consensus = NULL;
    AjBool rc= ajFalse;
    AjBool ret = ajTrue;
    const char* refseq = NULL;
    const AjPAssemContig contig = NULL;

    ajuint k = 0;

    if(r->Reference>=ncontigs)
	ajDie("assemoutWriteSamAlignment: reference sequence number"
		" '%d' is larger than or equal to known number of reference"
		" sequences '%d'. Problem while processing read '%S'.",
		r->Reference,
		ncontigs,
		r->Name);

    contig = (r->Reference==-1 ? NULL : contigs[r->Reference]);

    ajStrAssignRef(&SEQ, r->Seq);
    consensus = contig==NULL? NULL : contig->Consensus;

    if (r->Rnext==-1)
	RNEXT= "*";
    else if(r->Rnext==r->Reference)
	RNEXT = "=";
    else
	RNEXT = ajStrGetPtr(contigs[r->Rnext]->Name);

    if (r->Flag & BAM_FREVERSE)
    {
	rc = ajTrue;
	qualstr = ajStrNewS(r->SeqQ);

	if(!r->Reversed)
	{
	    ajStrReverse(&qualstr);
	    ajSeqstrReverse(&SEQ);
	}

	QUAL = qualstr;
	POS = r->y1;
	ajStrAssignSubS(&tmp, SEQ,
		ajStrGetLen(r->Seq) - r->y2,
		ajStrGetLen(r->Seq) - r->x2
	);

    }
    else
    {
	rc= ajFalse;
	POS = r->x1;
	QUAL = r->SeqQ;
	ajStrAssignSubS(&tmp, SEQ,
		r->x2-1,
		r->y2-1
	);
    }

    if(r->Cigar==NULL && consensus)
    {
	refseq = ajStrGetPtr(consensus) + (rc ? r->y1-1 : r->x1-1);

	CIGAR = assemoutMakeCigar(refseq, ajStrGetPtr(tmp));

	SEQunpadded = ajStrNewRes(ajStrGetLen(SEQ));
	QUALunpadded = ajStrNewRes(ajStrGetLen(SEQ));

	for(k=0; k< ajStrGetLen(SEQ); k++)
	{
	    if (ajStrGetCharPos(SEQ, k) == '*')
		continue;

	    ajStrAppendK(&SEQunpadded, ajStrGetCharPos(SEQ, k));
	    ajStrAppendK(&QUALunpadded, ajStrGetCharPos(QUAL, k));
	}

	ajDebug("cigar: %S\n", CIGAR);

	ajStrAssignS(&tmp, CIGAR);

	if(rc)
	{
	    if(r->y2 < (ajint)ajStrGetLen(SEQ))
		ajFmtPrintS(&CIGAR, "%dS%S",
		            ajStrGetLen(SEQ) - r->y2, tmp);
	    if(r->x2 > 1)
		ajFmtPrintAppS(&CIGAR, "%dS", r->x2 - 1);
	}
	else
	{
	    if(r->x2 > 1)
		ajFmtPrintS(&CIGAR, "%dS%S", r->x2 - 1, tmp);
	    if(r->y2 < (ajint)ajStrGetLen(SEQ))
		ajFmtPrintAppS(&CIGAR, "%dS",
		               ajStrGetLen(SEQ) - r->y2);
	}
	ajStrDel(&tmp);
    }
    else if(r->Cigar==NULL)
    {
	ajErr("both CIGAR string and consensus sequence not available");
	ret = ajFalse;
	ajStrAssignK(&CIGAR, '*');
    }
    else if(!ajStrGetLen(r->Cigar))
	ajStrAssignK(&CIGAR, '*');
    else if(ajStrGetLen(r->Cigar))
    {
	if(!ajStrGetLen(SEQ))
	    ajStrAssignK(&SEQ, '*');

	if(!ajStrGetLen(QUAL))
	    ajStrAssignK(&QUAL, '*');
    }

    ajStrDel(&tmp);

    ajFmtPrintF(outf, "%S\t%d\t%s\t%d\t%d\t%S\t%s\t%Ld\t%d\t%S\t%S",
	    r->Name,
	    r->Flag,
	    (contig==NULL ? "*" : ajStrGetPtr(contig->Name)),
	    POS,
	    r->MapQ,
	    (CIGAR ? CIGAR : r->Cigar),
	    RNEXT,
	    r->Pnext,
	    r->Tlen,
	    (r->Cigar ? SEQ  : SEQunpadded),
	    (r->Cigar ? QUAL : QUALunpadded));

    l = ajListIterNewread(r->Tags);
    while (!ajListIterDone(l))
    {
	t = ajListIterGet(l);

	/* TODO: array type, 'B' */

	/* In SAM, all single integer types are mapped to int32_t [SAM spec] */
	ajFmtPrintF(outf, "\t%S:%c:",
		t->Name,
		(t->type == 'c' || t->type == 'C' ||
		 t->type == 's' || t->type == 'S'
				|| t->type == 'I') ? 'i' : t->type
	);

	if(t->x1 || t->y1)
	    ajFmtPrintF(outf, " %u %u", t->x1, t->y1);

	if(t->Comment && ajStrGetLen(t->Comment)>0)
	    ajFmtPrintF(outf, "%S", t->Comment);

    }
    ajListIterDel(&l);

    ajFmtPrintF(outf, "\n");

    if(qualstr)
	ajStrDel(&qualstr);

    ajStrDel(&SEQ);
    ajStrDel(&CIGAR);
    ajStrDel(&SEQunpadded);
    ajStrDel(&QUALunpadded);

    return ret;
}
Пример #24
0
static void remap_RestrictPreferred(const AjPList l, const AjPTable t)
{
    AjIList iter   = NULL;
    EmbPMatMatch m = NULL;
    const AjPStr value   = NULL;
    AjPStr newiso  = NULL;
    AjBool found;		/* name found in isoschizomer list */
        	    
    /* for parsing value->iso string */
    AjPStrTok tok = NULL;
    char tokens[] = " ,";
    AjPStr code = NULL;

    iter = ajListIterNewread(l);
    
    while((m = (EmbPMatMatch)ajListIterGet(iter)))
    {
	found = ajFalse;
	
    	/* get prototype name */
    	value = ajTableFetchS(t, m->cod);
    	if(value) 
    	{
    	    ajStrAssignC(&newiso, "");

	    /* parse isoschizomer names from m->iso */
	    ajStrTokenDel(&tok);
            tok = ajStrTokenNewC(m->iso,  tokens);
            while(ajStrTokenNextParseC(&tok, tokens, &code))
            {
	        if(ajStrGetLen(newiso) > 0)
	            ajStrAppendC(&newiso, ",");

		/* found the prototype name? */
	        if(!ajStrCmpCaseS(code, value)) 
	        {
	            ajStrAppendS(&newiso, m->cod);
	            found = ajTrue;
	        }
		else
	            ajStrAppendS(&newiso, code);
            }
            ajStrTokenDel(&tok);

	    /* if the name was not replaced, then add it in now */
	    if(!found)
	    {
	        if(ajStrGetLen(newiso) > 0)
	            ajStrAppendC(&newiso, ",");

	        ajStrAppendS(&newiso, m->cod);	
	    }
	    
	    ajDebug("RE: %S -> %S iso=%S newiso=%S\n", m->cod, value,
		    m->iso, newiso);

	    /* replace the old iso string with the new one */
	    ajStrAssignS(&m->iso, newiso);

	    /* rename the enzyme to the prototype name */
    	    ajStrAssignS(&m->cod, value);
    	}
    }
    
    ajListIterDel(&iter);     
    ajStrDel(&newiso);
    ajStrDel(&code);
    ajStrTokenDel(&tok);

    return; 
}
Пример #25
0
static void dottup_stretchplot(AjPGraph graph, const AjPList matchlist,
			       const AjPSeq seq1, const AjPSeq seq2,
			       ajint begin1, ajint begin2,
			       ajint end1, ajint end2)
{
    EmbPWordMatch wmp = NULL;
    float xa[1];
    float ya[2];
    AjPGraphdata gdata = NULL;
    AjPStr tit = NULL;
    float x1;
    float y1;
    float x2;
    float y2;
    AjIList iter = NULL;

    tit = ajStrNew();
    ajFmtPrintS(&tit,"%S",ajGraphGetTitleS(graph));


    gdata = ajGraphdataNewI(1);
    xa[0] = (float)begin1;
    ya[0] = (float)begin2;

    ajGraphSetTitleC(graph,ajStrGetPtr(tit));

    ajGraphSetXlabelC(graph,ajSeqGetNameC(seq1));
    ajGraphSetYlabelC(graph,ajSeqGetNameC(seq2));

    ajGraphdataSetTypeC(gdata,"2D Plot Float");
    ajGraphdataSetMinmax(gdata,(float)begin1,(float)end1,(float)begin2,
			   (float)end2);
    ajGraphdataSetTruescale(gdata,(float)begin1,(float)end1,(float)begin2,
			   (float)end2);
    ajGraphxySetXstartF(graph,(float)begin1);
    ajGraphxySetXendF(graph,(float)end1);
    ajGraphxySetYstartF(graph,(float)begin2);
    ajGraphxySetYendF(graph,(float)end2);

    ajGraphxySetXrangeII(graph,begin1,end1);
    ajGraphxySetYrangeII(graph,begin2,end2);


    if(matchlist)
    {
	iter = ajListIterNewread(matchlist);
	while((wmp = ajListIterGet(iter)))
	{
	    x1 = x2 = (float) (wmp->seq1start + begin1);
	    y1 = y2 = (float) (wmp->seq2start + begin2);
	    x2 += (float) wmp->length-1;
	    y2 += (float) wmp->length-1;
	    ajGraphAddLine(graph,x1,y1,x2,y2,0);
	}
	ajListIterDel(&iter);
    }

    ajGraphdataAddXY(gdata,xa,ya);
    ajGraphDataReplace(graph,gdata);


    ajGraphxyDisplay(graph,ajFalse);
    ajGraphicsClose();

    ajStrDel(&tit);

    return;
}
Пример #26
0
static void remap_NoCutList(AjPFile outfile, const AjPTable hittable,
			    AjBool html, const AjPStr enzymes, AjBool blunt,
			    AjBool sticky, ajuint sitelen, AjBool commercial,
			    AjBool ambiguity, AjBool limit,
			    const AjPTable retable)
{

    /* for iterating over hittable */
    PValue value;
    void **keyarray = NULL;			/* array for table */
    void **valarray = NULL;			/* array for table */
    ajint i;

    /* list of enzymes that cut */
    AjPList cutlist;
    AjIList citer;			/* iterator for cutlist */
    AjPStr cutname = NULL;
    AjBool found;

    /* for parsing value->iso string */
    AjPStrTok tok;
    char tokens[] = " ,";
    AjPStr code = NULL;
    const char *p;

    /* for reading in enzymes names */
    AjPFile enzfile = NULL;
    AjPStr *ea;
    ajint ne;				/* number of enzymes */
    AjBool isall = ajTrue;

    /* list of enzymes that don't cut */
    AjPList nocutlist;
    AjIList niter;			/* iterator for nocutlist */
    AjPStr nocutname = NULL;

    /* count of rejected enzymes not matching criteria */
    ajint rejected_count = 0;

    EmbPPatRestrict enz;

    /* for renaming preferred isoschizomers */
    AjPList newlist;

     /*
     **
     ** Make a list of enzymes('cutlist') that hit
     ** including the isoschizomer names
     **
     */
    ajDebug("Make a list of all enzymes that cut\n");


    cutlist   = ajListstrNew();
    nocutlist = ajListstrNew();


    ajTableToarrayKeysValues(hittable, &keyarray, &valarray);
    for(i = 0; keyarray[i]; i++)
    {
        value = (PValue) valarray[i];
        cutname = ajStrNew();
        ajStrAssignRef(&cutname, keyarray[i]);
        ajListstrPushAppend(cutlist, cutname);

        /* Add to cutlist all isoschizomers of enzymes that cut */
        ajDebug("Add to cutlist all isoschizomers of enzymes that cut\n");

        /* start token to parse isoschizomers names */
        tok = ajStrTokenNewC(value->iso,  tokens);
        while(ajStrTokenNextParseC(&tok, tokens, &code))
        {
            cutname = ajStrNew();
            ajStrAssignS(&cutname, code);
            ajListstrPushAppend(cutlist, cutname);
        }
        ajStrTokenDel(&tok);
    }
    ajStrDel(&code);
    AJFREE(keyarray);
    AJFREE(valarray);



     /*
     ** Read in list of enzymes ('nocutlist') - either all or
     ** the input enzyme list.
     ** Exclude those that don't match the selection criteria - count these.
     */

    ajDebug("Read in a list of all input enzyme names\n");

    ne = 0;
    if(!enzymes)
	isall = ajTrue;
    else
    {
	/* get input list of enzymes into ea[] */
	ne = ajArrCommaList(enzymes, &ea);
	if(ajStrMatchCaseC(ea[0], "all"))
	    isall = ajTrue;
	else
	{
	    isall = ajFalse;
	    for(i=0; i<ne; ++i)
		ajStrRemoveWhite(&ea[i]);
	}
    }

    enzfile = ajDatafileNewInNameC(ENZDATA);

    /* push all enzyme names without the required criteria onto nocutlist */

    enz = embPatRestrictNew();
    while(!ajFileIsEof(enzfile))
    {
        if(!embPatRestrictReadEntry(enz, enzfile))
	    continue;

         /* 
	 ** If user entered explicit enzyme list, then check to see if
	 ** this is one of that explicit list 
	 */
        if(!isall)
	{
            found = AJFALSE;
            for(i=0; i<ne; ++i)
                if(ajStrMatchCaseS(ea[i], enz->cod))
		{
		    found = AJTRUE;
                    break;
                }

	    if(!found)			/* not in the explicit list */
		continue;

	    ajDebug("RE %S is in the input explicit list of REs\n", enz->cod);
	}

	/* ignore ncuts==0 as they are unknown */
	if(!enz->ncuts)
	{
	    /* number of cut positions */
            ajDebug("RE %S has an unknown number of cut positions\n",
		    enz->cod);
	    continue;
	}
        ajDebug("RE %S has a known number of cut sites\n", enz->cod);

	if(enz->len < sitelen)
	{
	    /* recognition site length */
            ajDebug("RE %S does not have a long enough recognition site\n", 
		    enz->cod);
	    rejected_count++;
	    continue;
	}

	if(!blunt && enz->blunt)
	{
	    /* blunt/sticky */
            ajDebug("RE %S is blunt\n", enz->cod);
	    rejected_count++;
	    continue;
	}

	if(!sticky && !enz->blunt)
	{
	    /* blunt/sticky */
            ajDebug("RE %S is sticky\n", enz->cod);
	    rejected_count++;
	    continue;
	}

	/* commercially available enzymes have uppercase patterns */
	p = ajStrGetPtr(enz->pat);

         /* 
	 ** The -commercial qualifier is only used if we are searching
	 ** through 'all' of the REBASE database - if we have specified an
	 ** explicit list of enzymes then they are searched for whether or
	 ** not they are commercially available
	 */
	if((*p >= 'a' && *p <= 'z') && commercial && isall)
	{
            ajDebug("RE %S is not commercial\n", enz->cod);
	    rejected_count++;
	    continue;
        }

	if(!ambiguity && remap_Ambiguous(enz->pat)) {
	    ajDebug("RE %S is ambiguous\n", enz->cod);
	    rejected_count++;
	    continue;	
	}

        ajDebug("RE %S matches all required criteria\n", enz->cod);

        code = ajStrNew();
	ajStrAssignS(&code, enz->cod);
	ajListstrPushAppend(nocutlist, code);
    }
    embPatRestrictDel(&enz);
    ajFileClose(&enzfile);


    for(i=0; i<ne; ++i)
	if(ea[i])
	    ajStrDel(&ea[i]);

    if(ne)
	AJFREE(ea);

    /*
     ** Change names of enzymes in the non-cutter list
     ** to that of preferred (prototype) 
     ** enzyme name so that the isoschizomers of cutters
     ** will be removed from the 
     ** non-cutter list in the next bit.
     ** Remove duplicate prototype names.
     */
    if(limit)
    {
        newlist = ajListstrNew();
        remap_RenamePreferred(nocutlist, retable, newlist);
        ajListstrFreeData(&nocutlist);
        nocutlist = newlist;
        ajListSortUnique(nocutlist, remap_cmpcase, remap_strdel);
    }


     /*
     ** Iterate through the list of input enzymes removing those that are in
     ** the cutlist.
     */

    ajDebug("Remove from the nocutlist all enzymes and isoschizomers "
	    "that cut\n");

     /*
     **  This steps down both lists at the same time, comparing names and
     **  iterating to the next name in whichever list whose name compares
     **  alphabetically before the other.  Where a match is found, the
     **  nocutlist item is deleted.
     */

    ajListSort(nocutlist, remap_cmpcase);
    ajListSort(cutlist, remap_cmpcase);

    citer = ajListIterNewread(cutlist);
    niter = ajListIterNew(nocutlist);

    /*
       while((cutname = (AjPStr)ajListIterGet(citer)) != NULL)
       ajDebug("dbg cutname = %S\n", cutname);
       */

    nocutname = (AjPStr)ajListIterGet(niter);
    cutname   = (AjPStr)ajListIterGet(citer);

    ajDebug("initial cutname, nocutname: '%S' '%S'\n", cutname, nocutname);

    while(nocutname != NULL && cutname != NULL)
    {
	i = ajStrCmpCaseS(cutname, nocutname);
	ajDebug("compare cutname, nocutname: %S %S ", cutname, nocutname);
	ajDebug("ajStrCmpCase=%d\n", i);
	if(i == 0)
	{
	    /* match - so remove from nocutlist */
	    ajDebug("ajListstrRemove %S\n", nocutname);
	    ajListstrIterRemove(niter);
	    nocutname = (AjPStr)ajListIterGet(niter);
	     /* 
	     ** Don't increment the cutname list pointer here
	     ** - there may be more than one entry in the nocutname
	     ** list with the same name because we have converted 
	     ** isoschizomers to their preferred name
	     */
	    /* cutname = (AjPStr)ajListIterGet(citer); */
	}
	else if(i == -1)
	    /* cutlist name sorts before nocutlist name */
	    cutname = (AjPStr)ajListIterGet(citer);
	else if(i == 1)
	    /* nocutlist name sorts before cutlist name */
	    nocutname = (AjPStr)ajListIterGet(niter);
    }

    ajListIterDel(&citer);
    ajListIterDel(&niter);
    ajListstrFreeData(&cutlist);


     /* Print the resulting list of those that do not cut*/

    ajDebug("Print out the list\n");

    /* print the title */
    if(html)
	ajFmtPrintF(outfile, "<H2>");
    ajFmtPrintF(outfile, "\n\n# Enzymes that do not cut\n\n");

    if(html)
	ajFmtPrintF(outfile, "</H2>\n");

    if(html)
	ajFmtPrintF(outfile, "<PRE>");

    /*  ajListSort(nocutlist, ajStrVcmp);*/
    niter = ajListIterNewread(nocutlist);
    i = 0;
    while((nocutname = (AjPStr)ajListIterGet(niter)) != NULL)
    {
	ajFmtPrintF(outfile, "%-10S", nocutname);
	/* new line after every 7 names printed */
	if(i++ == 7)
	{
	    ajFmtPrintF(outfile, "\n");
	    i = 0;
	}
    }
    ajListIterDel(&niter);


    /* end the output */
    ajFmtPrintF(outfile, "\n");
    if(html) {ajFmtPrintF(outfile, "</PRE>\n");}



     /*
     ** Print the count of rejected enzymes
     ** N.B. This is the count of ALL rejected enzymes including all
     ** isoschizomers
     */

    if(html)
        ajFmtPrintF(outfile, "<H2>");
    ajFmtPrintF(outfile,
		"\n\n# No. of cutting enzymes which do not match the\n"
		"# SITELEN, BLUNT, STICKY, COMMERCIAL, AMBIGUOUS criteria\n\n");
    if(html)
	ajFmtPrintF(outfile, "</H2>\n");
    ajFmtPrintF(outfile, "%d\n", rejected_count);

    ajDebug("Tidy up\n");
    ajListstrFreeData(&nocutlist);
    ajListstrFreeData(&cutlist);

    return;
}
Пример #27
0
int main(int argc, char **argv)
{
    /* Variable declarations */
    AjPStr   dbname = NULL;
    AjPStr   svrname = NULL;
    AjPFile  outfile = NULL;    
    AjPResource resource = NULL;
    AjPResourcein resourcein = NULL;
    AjPStr resourceqry = NULL;
    AjPStr type     = NULL;
    AjBool id;
    AjBool qry;
    AjBool all;
    AjBool verbose;
    AjPStr methods = NULL;
    AjPStr release = NULL;
    AjPStr comment = NULL;
    AjPStr defined = NULL;
    AjPList list = NULL;
    AjPTagval tagval = NULL;
    AjIList iter = NULL;
    ajuint space = 0;
    AjPList aliaslist = NULL;
    AjIList aliter = NULL;
    const AjPStr alias = NULL;
    ajuint maxlen;
    AjPStr truedbname = NULL;
    AjBool isalias = ajFalse;

    /* ACD processing */
    embInit("dbtell", argc, argv);

    dbname   = ajAcdGetString("database");
    svrname   = ajAcdGetString("server");
    verbose  = ajAcdGetBoolean("full");
    outfile  = ajAcdGetOutfile("outfile");
    
    ajStrAssignS(&truedbname, dbname);
    ajNamAliasDatabase(&truedbname);
    ajStrFmtLower(&truedbname);

    if(!ajStrMatchS(dbname, truedbname))
        isalias = ajTrue;

    /* Application logic */
    /* Check EMBOSS database information.
       Write output file */

    if(ajNamDbDetailsSvr(truedbname, svrname, &type, &id, &qry, &all,
                         &comment, &release, &methods, &defined))
    {
        if(isalias)
            ajFmtPrintF(outfile, "# %S is an alias for %S defined in %S\n",
                        dbname, truedbname, defined);
        else
            ajFmtPrintF(outfile, "# %S is defined in %S\n",
                        truedbname, defined);

        ajFmtPrintF(outfile, "# access levels id: %B query: %B all: %B\n\n",
                    id, qry, all);
        ajFmtPrintF(outfile, "DBNAME %S [\n", truedbname);

        if(ajStrGetLen(svrname))
            list = ajNamDbGetAttrlistSvr(truedbname, svrname);
        else
            list = ajNamDbGetAttrlist(truedbname);

        iter = ajListIterNewread(list);
        while(!ajListIterDone(iter))
        {
            tagval = ajListIterGet(iter);
            space = 15 - ajStrGetLen(ajTagvalGetTag(tagval));
            ajFmtPrintF(outfile, "   %S:%.*s\"%S\"\n",
                        ajTagvalGetTag(tagval),
                        space, "                    ",
                        ajTagvalGetValue(tagval));
            ajTagvalDel(&tagval);
        }
        ajListIterDel(&iter);
        ajListFree(&list);

        ajFmtPrintF(outfile, "]\n");

        if(verbose)
        {
            aliaslist = ajListNew();
            ajNamListFindAliases(truedbname, aliaslist);

            if(ajListGetLength(aliaslist))
            {
                ajFmtPrintF(outfile, "\n");
                    
                aliter = ajListIterNewread(aliaslist);

                maxlen = 1;
                while(!ajListIterDone(aliter))
                {
                    alias = ajListIterGet(aliter);
                    if(MAJSTRGETLEN(alias) > maxlen)
                        maxlen = MAJSTRGETLEN(alias);
                }
                
                ajListIterDel(&aliter);
                aliter = ajListIterNewread(aliaslist);

                while(!ajListIterDone(aliter))
                {
                    alias = ajListIterGet(aliter);

                    if(ajStrFindK(alias, ':') < 0)
                        ajFmtPrintF(outfile, "ALIAS %-*S %S\n",
                                    maxlen, alias, truedbname);
                }

                ajListIterDel(&aliter);
            }

            ajListstrFree(&aliaslist);
        }

        ajStrDel(&type);
        ajStrDel(&methods);
        ajStrDel(&release);
        ajStrDel(&comment);
        ajStrDel(&defined);
    }
    else
    {
/* try looking in DRCAT */
        resourcein = ajResourceinNew();
        resource = ajResourceNew();

        ajFmtPrintS(&resourceqry, "drcat:%S", dbname);
        ajResourceinQryS(resourcein, resourceqry);
        if(ajResourceinRead(resourcein, resource))
        {
            ajFmtPrintF(outfile, "DBNAME %S [\n", dbname);
            ajFmtPrintF(outfile, "   comment: \"defined in DRCAT\"\n");
            ajFmtPrintF(outfile, "]\n");
         }
        ajResourceinDel(&resourcein);
        ajResourceDel(&resource);
        ajStrDel(&resourceqry);
    }

    /* Memory clean-up and exit */

    ajFileClose(&outfile);
    ajStrDel(&truedbname);
    ajStrDel(&dbname);
    ajStrDel(&svrname);

    embExit();

    return 0;
}