コード例 #1
0
ファイル: tfscan.c プロジェクト: WenchaoLin/JAMg
static void tfscan_print_hits(AjPList *l,
			      ajint hits, AjPReport outf,
			      const AjPTable t,
			      const AjPSeq seq, ajuint minlength,
			      const  AjPTable btable)
{
    ajint i;
    EmbPMatMatch m;
    const AjPStr acc = NULL;
    const AjPStr bf  = NULL;
    AjPFeattable ftable = NULL;
    AjPFeature gf  = NULL;
    AjPStr type = ajStrNewC("SO:0000235");
    AjPStr tmpstr = NULL;

    ftable = ajFeattableNewSeq(seq);

    /*ajFmtPrintF(outf,"TFSCAN of %s from %d to %d\n\n",ajStrGetPtr(name),
      begin,end);*/

    for(i=0;i<hits;++i)
    {
	ajListPop(*l,(void **)&m);
	acc = ajTableFetchS(t, m->seqname);

	if(m->len >= minlength)
        {
	    /*ajFmtPrintF(outf,"%-20s %-8s %-5d %-5d %s\n",
                        ajStrGetPtr(m->seqname),
			ajStrGetPtr(acc),m->start,
			m->start+m->len-1,ajStrGetPtr(s));*/
            if(m->forward)
                gf = ajFeatNew(ftable, ajUtilGetProgram(), type,
                               m->start,
                               m->start+m->len-1,
                               (float) m->score,
                               '+',0);
            else
                gf = ajFeatNew(ftable, ajUtilGetProgram(), type,
                               m->start,
                               m->start+m->len-1,
                               (float) m->score,
                               '-',0);
            ajFmtPrintS(&tmpstr, "*acc %S", acc);
            ajFeatTagAddSS(gf, NULL, tmpstr);
            bf  = ajTableFetchS(btable, m->seqname);
            ajFmtPrintS(&tmpstr, "*factor %S", bf);
            ajFeatTagAddSS(gf, NULL, tmpstr);
        }

	embMatMatchDel(&m);
    }

    ajReportWrite(outf, ftable, seq);
    ajFeattableDel(&ftable);
    ajStrDel(&tmpstr);
    ajStrDel(&type);

    return;
}
コード例 #2
0
ファイル: patmatmotifs.c プロジェクト: WenchaoLin/JAMg
int main(int argc, char **argv)
{

    AjPFile inf	 = NULL;
    AjPFile inf2 = NULL;
    AjPFeattable tab = NULL;
    AjPReport report = NULL;

    AjPSeq sequence = NULL;

    AjPStr redatanew = NULL;
    AjPStr str	     = NULL;
    AjPStr regexp    = NULL;
    AjPStr temp	     = NULL;
    AjPStr text      = NULL;
    AjPStr docdata   = NULL;
    AjPStr data      = NULL;
    AjPStr accession = NULL;
    AjPStr name      = NULL;
    EmbPPatMatch match = NULL;
    AjPStr savereg = NULL;
    AjPStr fthit   = NULL;

    AjBool full;
    AjBool prune;

    ajint i;
    ajint number;
    ajint start;
    ajint end;
    ajint length;
    ajint zstart;
    ajint zend;
    const char *p;
    ajint seqlength;
    AjPStr tmpstr  = NULL;
    AjPStr tailstr = NULL;
    AjPFeature gf;

    embInit("patmatmotifs", argc, argv);

    ajStrAssignC(&fthit, "SO:0001067");

    savereg   = ajStrNew();
    str       = ajStrNew();
    regexp    = ajStrNew();
    temp      = ajStrNew();
    data      = ajStrNew();
    accession = ajStrNew();
    text      = ajStrNew();
    name      = ajStrNew();

    sequence = ajAcdGetSeq("sequence");
    report   = ajAcdGetReport("outfile");
    full     = ajAcdGetBoolean("full");
    prune    = ajAcdGetBoolean("prune");

    ajSeqFmtUpper(sequence);		/* prosite regexs are all upper case */
    tab = ajFeattableNewSeq(sequence);
    ajStrAssignC(&tailstr, "");

    seqlength = ajStrGetLen(str);
    str       = ajSeqGetSeqCopyS(sequence);

    redatanew = ajStrNewC("PROSITE/prosite.lines");
    docdata   = ajStrNewC("PROSITE/");

    inf = ajDatafileNewInNameS(redatanew);
    if(!inf)
	ajFatal("Either EMBOSS_DATA undefined or PROSEXTRACT needs running");

    ajFmtPrintAppS(&tmpstr, "Full: %B\n", full);
    ajFmtPrintAppS(&tmpstr, "Prune: %B\n", prune);
    ajFmtPrintAppS(&tmpstr, "Data_file: %F\n", inf);
    ajReportSetHeaderS(report, tmpstr);

    while(ajReadlineTrim(inf, &regexp))
    {
	p=ajStrGetPtr(regexp);
	if(*p && *p!=' ' && *p!='^')
	{
	    p=ajSysFuncStrtok(p," ");
	    ajStrAssignC(&name,p);
	    if(prune)
		if(ajStrMatchCaseC(name,"myristyl") ||
		   ajStrMatchCaseC(name,"asn_glycosylation") ||
		   ajStrMatchCaseC(name,"camp_phospho_site") ||
		   ajStrMatchCaseC(name,"pkc_phospho_site") ||
		   ajStrMatchCaseC(name,"ck2_phospho_site") ||
		   ajStrMatchCaseC(name,"tyr_phospho_site"))
		{
		    for(i=0;i<4;++i)
			ajReadlineTrim(inf, &regexp);
		    continue;
		}
	    p=ajSysFuncStrtok(NULL," ");
	    ajStrAssignC(&accession,p);
	}

	if(ajStrPrefixC(regexp, "^"))
	{
	    p = ajStrGetPtr(regexp);

	    ajStrAssignC(&temp,p+1);
	    ajStrAssignC(&savereg,p+1);

	    match = embPatMatchFind(temp, str, ajFalse, ajFalse);
	    number = embPatMatchGetNumber(match);

	    for(i=0; i<number; i++)
	    {
		seqlength = ajStrGetLen(str);

		start = 1+embPatMatchGetStart(match, i);

		end = 1+embPatMatchGetEnd(match, i);

		length = embPatMatchGetLen(match, i);

		gf = ajFeatNew(tab, NULL, fthit, start, end,
			       (float) length, ' ', 0);

		ajFmtPrintS(&tmpstr, "*motif %S", name);
		ajFeatTagAddSS(gf, NULL, tmpstr);

		if(start-5<0)
		    zstart = 0;
		else
		    zstart = start-5;

		if(end+5> seqlength)
		    zend = end;
		else
		    zend = end+5;


		ajStrAssignSubS(&temp, str, zstart, zend);
	    }


	    if(full && number)
	    {
		ajStrAssignC(&redatanew,ajStrGetPtr(docdata));
		ajStrAppendC(&redatanew,ajStrGetPtr(accession));
		inf2 = ajDatafileNewInNameS(redatanew);
		if(!inf2)
		    continue;

		/*
		** Insert Prosite documentation from files made by
		** prosextract.c
		*/
		ajFmtPrintAppS(&tailstr, "Motif: %S\n", name);
		ajFmtPrintAppS(&tailstr, "Count: %d\n\n", number);
		while(ajReadlineTrim(inf2, &text))
		    ajFmtPrintAppS(&tailstr, "%S\n", text);

		ajFmtPrintAppS(&tailstr, "\n***************\n\n");
		ajFileClose(&inf2);

	    }
	    embPatMatchDel(&match);
	}
    }

    ajReportSetTailS(report,tailstr);
    ajReportWrite(report, tab, sequence);

    ajReportDel(&report);
    ajFeattableDel(&tab);

    ajStrDel(&temp);
    ajStrDel(&regexp);
    ajStrDel(&savereg);
    ajStrDel(&str);
    ajStrDel(&data);
    ajStrDel(&docdata);
    ajStrDel(&text);
    ajStrDel(&redatanew);
    ajStrDel(&accession);
    ajSeqDel(&sequence);
    ajStrDel(&tailstr);
    ajStrDel(&fthit);
    ajStrDel(&name);
    ajStrDel(&tmpstr);

    ajFeattableDel(&tab);
    ajFileClose(&inf);

    embExit();

    return 0;
}
コード例 #3
0
void embPatternRegexSearch (AjPFeattable ftable, const AjPSeq seq,
			    const AjPPatternRegex pat, AjBool reverse)
{
    ajint pos=0;
    ajint off;
    ajint len;
    AjPFeature sf    = NULL;
    AjPStr substr    = NULL;
    AjPStr seqstr    = NULL;
    AjPStr tmpstr = NULL;
    AjPStr tmp       = ajStrNew();
    AjPRegexp patexp = ajPatternRegexGetCompiled(pat);
    ajint adj;
    AjBool isreversed;
    AjPSeq revseq;
    ajint seqlen;

    seqlen = ajSeqGetLen(seq);
    if(!seqlen)
        return;

    isreversed = ajSeqIsReversedTrue(seq);

    if(isreversed)
	seqlen += ajSeqGetOffset(seq);

    pos = ajSeqGetBeginTrue(seq);
    adj = ajSeqGetEndTrue(seq);

    if(!ajStrGetLen(featMotifProt))
        ajStrAssignC(&featMotifProt, "SO:0001067");

    if(!ajStrGetLen(featMotifNuc))
        ajStrAssignC(&featMotifNuc, "SO:0000714");

    /*ajDebug("embPatternRegexSearch pos: %d adj: %d reverse: %B\n",
	   pos, adj, reverse, isreversed);*/
    /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n",
	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq));*/

    if (reverse)
    {
        revseq = ajSeqNewSeq(seq);
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq), pos-1, adj-1);
        ajSeqstrReverse(&seqstr);
    }

    ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq), pos-1, adj-1);

    ajStrFmtUpper(&seqstr);

    while(ajStrGetLen(seqstr) && ajRegExec(patexp, seqstr))
    {
	off = ajRegOffset(patexp);
	len = ajRegLenI(patexp, 0);

	if(off || len)
	{
	    ajRegSubI(patexp, 0, &substr);
	    ajRegPost(patexp, &tmp);
	    ajStrAssignS(&seqstr, substr);
            ajStrAppendS(&seqstr, tmp);
	    pos += off;

	    /*ajDebug("match pos: %d adj: %d len: %d off:%d\n",
                    pos, adj, len, off);*/
            if (reverse)
                sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   adj - pos - len + 2,
                                   adj - pos + 1,
                                   0.0, '-', 0);
	    else
            {
                if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
                    sf = ajFeatNewProt(ftable, NULL, featMotifProt,
                                       pos, pos + len - 1,
                                       0.0);
                else
                    sf = ajFeatNew(ftable, NULL, featMotifNuc,
                                   pos, pos + len - 1,
                                   0.0, '.', 0);
            }
            
	    if(isreversed)
		ajFeatReverse(sf, seqlen);

	    ajFmtPrintS (&tmpstr,"*pat %S: %S",
			 ajPatternRegexGetName(pat),
                         ajPatternRegexGetPattern(pat));
	    ajFeatTagAdd (sf,NULL,tmpstr);
	    pos += 1;
	    ajStrCutStart(&seqstr, 1);
	}
	else
	{
	    pos++;
	    ajStrCutStart(&seqstr, 1);
	}
    }

    ajStrDel(&tmpstr);
    ajStrDel(&tmp);
    ajStrDel(&substr);
    ajStrDel(&seqstr);

    if(reverse)
	ajSeqDel(&revseq);

    return;
}
コード例 #4
0
void embPatternSeqSearch (AjPFeattable ftable, const AjPSeq seq,
			  const AjPPatternSeq pat, AjBool reverse)
{
    const void *tidy;
    ajuint hits;
    ajuint i;
    AjPPatComp pattern;
    EmbPMatMatch m = NULL;
    AjPFeature sf  = NULL;
    AjPSeq revseq  = NULL;
    AjPList list   = ajListNew();
    AjPStr seqstr  = ajStrNew();
    AjPStr seqname = ajStrNew();
    AjPStr tmp     = ajStrNew();
    ajint adj;
    ajint begin;
    AjBool isreversed;
    ajint seqlen;

    seqlen = ajSeqGetLen(seq);
    if(!seqlen)
        return;

    isreversed = ajSeqIsReversedTrue(seq);

    if(isreversed)
	seqlen += ajSeqGetOffset(seq);

    begin = ajSeqGetBeginTrue(seq);
    adj = ajSeqGetEndTrue(seq);

    if(!ajStrGetLen(featMotifProt))
        ajStrAssignC(&featMotifProt, "SO:0001067");

    if(!ajStrGetLen(featMotifNuc))
        ajStrAssignC(&featMotifNuc, "SO:0000714");

    ajStrAssignS(&seqname,ajSeqGetNameS(seq));
    pattern = ajPatternSeqGetCompiled(pat);

    if (reverse)
    {
        revseq = ajSeqNewSeq(seq);
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(revseq),
			begin-1,adj-1);
        ajSeqstrReverse(&seqstr);
    }
    else
        ajStrAssignSubS(&seqstr, ajSeqGetSeqS(seq),
			begin-1,adj-1);

    ajStrFmtUpper(&seqstr);
    /*ajDebug("seqlen:%d len: %d offset: %d offend: %d begin: %d end: %d\n"
	   "'%S'\n",
	   seqlen , ajSeqGetLen(seq), ajSeqGetOffset(seq),
	   ajSeqGetOffend(seq), ajSeqGetBegin(seq), ajSeqGetEnd(seq),
	   seqstr);*/

    ajDebug("embPatternSeqSearch '%S' protein: %B reverse: %B\n",
	    pattern->pattern, pat->Protein, reverse);
    embPatFuzzSearchII(pattern,begin,seqname,seqstr,list,
                       ajPatternSeqGetMismatch(pat),&hits,&tidy);

    ajDebug ("embPatternSeqSearch: found %d hits\n",hits);

    if(!reverse)
	ajListReverse(list);

    for(i=0;i<hits;++i)
    {
        ajListPop(list,(void **)&m);

 	if (reverse)
	    sf = ajFeatNew(ftable, NULL, featMotifNuc,
                           adj - m->start - m->len + begin + 1,
                           adj - m->start + begin,
                           0.0, '-', 0);
	else
        {
	    if(ajSeqIsProt(seq) || ajFeattableIsProt(ftable))
                sf = ajFeatNewProt(ftable, NULL, featMotifProt,
                                   m->start,
                                   m->start + m->len - 1,
                                   0.0);
            else
                sf = ajFeatNew(ftable, NULL, featMotifNuc,
                               m->start,
                               m->start + m->len - 1,
                               0.0, '.', 0);
        }
        
	if(isreversed)
	    ajFeatReverse(sf, seqlen);

	/*
	ajUser("isrev: %B reverse: %B begin: %d adj: %d "
	       "start: %d len: %d seqlen: %d %d..%d '%c'\n",
	       isreversed, reverse, begin, adj, m->start, m->len, seqlen,
	       sf->Start, sf->End, sf->Strand);
	*/

	ajFeatSetScore(sf, (float) (m->len - m->mm));

        ajFmtPrintS(&tmp, "*pat %S: %S",
                    ajPatternSeqGetName(pat),
                    ajPatternSeqGetPattern(pat));
        ajFeatTagAdd(sf,NULL,tmp);

        if(m->mm)
        {
            ajFmtPrintS(&tmp, "*mismatch %d", m->mm);
            ajFeatTagAdd(sf, NULL, tmp);
        }

        embMatMatchDel(&m);
    }

    ajStrDel(&seqname);
    ajStrDel(&seqstr);
    ajStrDel(&tmp);
    ajListFree(&list);

    if (reverse)
        ajSeqDel(&revseq);

    return;
}
コード例 #5
0
static void cpgreport_cpgsearch(AjPFile outf, ajint from, ajint to,
				const char *p,
				const char *name, ajint begin, ajint score,
				AjPFeattable feattable)
{
    ajint i;
    ajint c;
    ajint z;

    ajint sum;
    ajint ssum;
    ajint lsum;
    ajint t;
    ajint top;

    ajint dcg;
    ajint dgc;
    ajint gc;
    char  strand = '+';
    ajint frame  = 0;
    AjPFeature feature;
    float score2 = 0.0;

    if(!cpgreportSource)
    {
      ajStrAssignC(&cpgreportSource,"cpgreport");
      ajStrAssignC(&cpgreportType,"misc_feature");
    }


    for(i=from,c=to-1,sum=ssum=t=top=0,lsum=-1,z=begin-1;i<to;++i,ssum=sum)
    {
	if(p[i]=='C' && p[i+1]=='G' && c-i)
	    sum += score+1;
	--sum;

	if(sum<0)
	    sum = 0;

	if(!sum && ssum)
	{
	    cpgreport_calcgc(lsum+1,t+2,p,&dcg,&dgc,&gc);
	    if(dgc)
	    {
	        score2 = (float) top;
	        feature = ajFeatNew(feattable, cpgreportSource,
				    cpgreportType,
				    lsum+2+z,t+2+z,
				    score2, strand, frame) ;
		if(!feature)
		    ajDebug("Error feature not added to feature table");

		ajFmtPrintF(outf,"%-20.20s %6d %6d %5d ",name,lsum+2+z,
			    t+2+z,top);
		ajFmtPrintF(outf,"     %5d %5.1f %6.2f\n",
			    dcg,(float)gc*100.0/(float)(t+1-lsum),
			    (float)(dcg/dgc));
	    }
	    else
	    {
		score2   = (float) top;
		feature = ajFeatNew(feattable, cpgreportSource,
				    cpgreportType,
				    lsum+2+z,t+2+z,
				    score2, strand, frame) ;
		ajFmtPrintF(outf,"%-20s %6d %6d %5d ",name,lsum+2+z,t+2+z,
			    top);
		ajFmtPrintF(outf,"     %5d %5.1f    -\n",
			    dcg,(float)gc*100.0/(float)(t+1-lsum));
	    }
	    cpgreport_cpgsearch(outf,t+2,i,p,name,begin,score,
				feattable);
	    sum = ssum = lsum = t = top = 0;
	}

	if(sum>top)
	{
	    t   = i;
	    top = sum;
	}

	if(!sum)
	    lsum = i;
    }


    if(sum)
    {
	cpgreport_calcgc(lsum+1,t+2,p,&dcg,&dgc,&gc);
	if(dgc)
	{
	    ajFmtPrintF(outf,"%-20s %6d %6d %5d ",name,lsum+2+z,t+2+z,
			top);
	    ajFmtPrintF(outf,"     %5d %5.1f %6.2f\n",
			dcg,(float)gc*100.0/(float)(t+1-lsum),
			((float)dcg/(float)dgc));
	    score2   = (float) top;
	    /*score2 = ajFmtPrintS(&score2,"%d.0",top);*/
	    feature  = ajFeatNew(feattable, cpgreportSource,
				 cpgreportType,
				 lsum+2+z,t+2+z,
				 score2, strand, frame);
	}
	else
	{
	    ajFmtPrintF(outf,"%-20s %6d %6d %5d ",name,lsum+2+z,t+2+z,top);
	    ajFmtPrintF(outf,"     %5d %5.1f    -\n",dcg,
			(float)gc*100.0/(float)(t+1-lsum));
	    score2   = (float) top;
	    /*score2 = ajFmtPrintS(&score2,"%d.0",top);*/
	    feature  = ajFeatNew(feattable, cpgreportSource,
				 cpgreportType,
				 lsum+2+z,t+2+z,
				 score2, strand, frame);
	}

	cpgreport_cpgsearch(outf,t+2,to,p,name,begin,score,feattable);
    }

    return;
}