int main(int argc, char **argv)
{

    AjPSeqall seqall;
    AjPFile primfile;
    AjPStr rdline = NULL;

    Primer primdata;
    AjPStrTok handle = NULL;

    AjPList primList = NULL;

    embInit("stssearch", argc, argv);

    primfile = ajAcdGetInfile("infile");
    out      = ajAcdGetOutfile("outfile");
    seqall   = ajAcdGetSeqall("seqall");

    while(ajReadlineTrim(primfile, &rdline))
    {
	if(ajStrGetCharFirst(rdline) == '#')
	    continue;
	if(ajStrSuffixC(rdline, ".."))
	    continue;

	AJNEW(primdata);
	primdata->Name   = NULL;
	primdata->Oligoa = NULL;
	primdata->Oligob = NULL;

	handle = ajStrTokenNewC(rdline, " \t");
	ajStrTokenNextParse(&handle, &primdata->Name);

	if(!(nprimers % 1000))
	    ajDebug("Name [%d]: '%S'\n", nprimers, primdata->Name);

	ajStrTokenNextParse(&handle, &primdata->Oligoa);
	ajStrFmtUpper(&primdata->Oligoa);
	primdata->Prima = ajRegComp(primdata->Oligoa);

	ajStrTokenNextParse(&handle, &primdata->Oligob);
	ajStrFmtUpper(&primdata->Oligob);
	primdata->Primb = ajRegComp(primdata->Oligob);
	ajStrTokenDel(&handle);

	if(!nprimers)
	    primList = ajListNew();

	ajListPushAppend(primList, primdata);
	nprimers++;
    }

    if(!nprimers)
	ajFatal("No primers read\n");

    ajDebug("%d primers read\n", nprimers);

    while(ajSeqallNext(seqall, &seq))
    {
	ajSeqFmtUpper(seq);
	ajStrAssignS(&seqstr, ajSeqGetSeqS(seq));
	ajStrAssignS(&revstr, ajSeqGetSeqS(seq));
	ajSeqstrReverse(&revstr);
	ajDebug("Testing: %s\n", ajSeqGetNameC(seq));
	ntests = 0;
	ajListMap(primList, stssearch_primTest, NULL);
    }

    ajFileClose(&out);

    ajSeqallDel(&seqall);
    ajSeqDel(&seq);
    ajFileClose(&out);
    ajStrDel(&revstr);
    ajStrDel(&seqstr);
    ajFileClose(&primfile);
    ajListMap(primList, stssearch_primDel, NULL);
    ajListFree(&primList);
    ajStrDel(&rdline);


    embExit();

    return 0;
}
示例#2
0
int main(int argc, char **argv)
{

    AjPFile inf	 = NULL;
    AjPFile inf2 = NULL;
    AjPFeattable tab = NULL;
    AjPReport report = NULL;

    AjPSeq sequence = NULL;

    AjPStr redatanew = NULL;
    AjPStr str	     = NULL;
    AjPStr regexp    = NULL;
    AjPStr temp	     = NULL;
    AjPStr text      = NULL;
    AjPStr docdata   = NULL;
    AjPStr data      = NULL;
    AjPStr accession = NULL;
    AjPStr name      = NULL;
    EmbPPatMatch match = NULL;
    AjPStr savereg = NULL;
    AjPStr fthit   = NULL;

    AjBool full;
    AjBool prune;

    ajint i;
    ajint number;
    ajint start;
    ajint end;
    ajint length;
    ajint zstart;
    ajint zend;
    const char *p;
    ajint seqlength;
    AjPStr tmpstr  = NULL;
    AjPStr tailstr = NULL;
    AjPFeature gf;

    embInit("patmatmotifs", argc, argv);

    ajStrAssignC(&fthit, "SO:0001067");

    savereg   = ajStrNew();
    str       = ajStrNew();
    regexp    = ajStrNew();
    temp      = ajStrNew();
    data      = ajStrNew();
    accession = ajStrNew();
    text      = ajStrNew();
    name      = ajStrNew();

    sequence = ajAcdGetSeq("sequence");
    report   = ajAcdGetReport("outfile");
    full     = ajAcdGetBoolean("full");
    prune    = ajAcdGetBoolean("prune");

    ajSeqFmtUpper(sequence);		/* prosite regexs are all upper case */
    tab = ajFeattableNewSeq(sequence);
    ajStrAssignC(&tailstr, "");

    seqlength = ajStrGetLen(str);
    str       = ajSeqGetSeqCopyS(sequence);

    redatanew = ajStrNewC("PROSITE/prosite.lines");
    docdata   = ajStrNewC("PROSITE/");

    inf = ajDatafileNewInNameS(redatanew);
    if(!inf)
	ajFatal("Either EMBOSS_DATA undefined or PROSEXTRACT needs running");

    ajFmtPrintAppS(&tmpstr, "Full: %B\n", full);
    ajFmtPrintAppS(&tmpstr, "Prune: %B\n", prune);
    ajFmtPrintAppS(&tmpstr, "Data_file: %F\n", inf);
    ajReportSetHeaderS(report, tmpstr);

    while(ajReadlineTrim(inf, &regexp))
    {
	p=ajStrGetPtr(regexp);
	if(*p && *p!=' ' && *p!='^')
	{
	    p=ajSysFuncStrtok(p," ");
	    ajStrAssignC(&name,p);
	    if(prune)
		if(ajStrMatchCaseC(name,"myristyl") ||
		   ajStrMatchCaseC(name,"asn_glycosylation") ||
		   ajStrMatchCaseC(name,"camp_phospho_site") ||
		   ajStrMatchCaseC(name,"pkc_phospho_site") ||
		   ajStrMatchCaseC(name,"ck2_phospho_site") ||
		   ajStrMatchCaseC(name,"tyr_phospho_site"))
		{
		    for(i=0;i<4;++i)
			ajReadlineTrim(inf, &regexp);
		    continue;
		}
	    p=ajSysFuncStrtok(NULL," ");
	    ajStrAssignC(&accession,p);
	}

	if(ajStrPrefixC(regexp, "^"))
	{
	    p = ajStrGetPtr(regexp);

	    ajStrAssignC(&temp,p+1);
	    ajStrAssignC(&savereg,p+1);

	    match = embPatMatchFind(temp, str, ajFalse, ajFalse);
	    number = embPatMatchGetNumber(match);

	    for(i=0; i<number; i++)
	    {
		seqlength = ajStrGetLen(str);

		start = 1+embPatMatchGetStart(match, i);

		end = 1+embPatMatchGetEnd(match, i);

		length = embPatMatchGetLen(match, i);

		gf = ajFeatNew(tab, NULL, fthit, start, end,
			       (float) length, ' ', 0);

		ajFmtPrintS(&tmpstr, "*motif %S", name);
		ajFeatTagAddSS(gf, NULL, tmpstr);

		if(start-5<0)
		    zstart = 0;
		else
		    zstart = start-5;

		if(end+5> seqlength)
		    zend = end;
		else
		    zend = end+5;


		ajStrAssignSubS(&temp, str, zstart, zend);
	    }


	    if(full && number)
	    {
		ajStrAssignC(&redatanew,ajStrGetPtr(docdata));
		ajStrAppendC(&redatanew,ajStrGetPtr(accession));
		inf2 = ajDatafileNewInNameS(redatanew);
		if(!inf2)
		    continue;

		/*
		** Insert Prosite documentation from files made by
		** prosextract.c
		*/
		ajFmtPrintAppS(&tailstr, "Motif: %S\n", name);
		ajFmtPrintAppS(&tailstr, "Count: %d\n\n", number);
		while(ajReadlineTrim(inf2, &text))
		    ajFmtPrintAppS(&tailstr, "%S\n", text);

		ajFmtPrintAppS(&tailstr, "\n***************\n\n");
		ajFileClose(&inf2);

	    }
	    embPatMatchDel(&match);
	}
    }

    ajReportSetTailS(report,tailstr);
    ajReportWrite(report, tab, sequence);

    ajReportDel(&report);
    ajFeattableDel(&tab);

    ajStrDel(&temp);
    ajStrDel(&regexp);
    ajStrDel(&savereg);
    ajStrDel(&str);
    ajStrDel(&data);
    ajStrDel(&docdata);
    ajStrDel(&text);
    ajStrDel(&redatanew);
    ajStrDel(&accession);
    ajSeqDel(&sequence);
    ajStrDel(&tailstr);
    ajStrDel(&fthit);
    ajStrDel(&name);
    ajStrDel(&tmpstr);

    ajFeattableDel(&tab);
    ajFileClose(&inf);

    embExit();

    return 0;
}
示例#3
0
int main(int argc, char **argv)
{
    AjPSeqall seqall;
    AjPSeq seq     = NULL;
    AjPStr seqcmp  = NULL;
    AjPStr enzymes = NULL;
    AjPFile outf   = NULL;
    ajint begin;
    ajint end;
    ajint min;
    ajint max;
    ajint sitelen;
    AjBool alpha;
    AjBool single;
    AjBool blunt;
    AjBool ambiguity;
    AjBool sticky;
    AjBool plasmid;
    AjBool threeprime;
    AjBool commercial;
    AjBool html;
    AjBool limit;
    AjBool frags;
    AjBool methyl;
    AjPFile dfile;

    AjPFile enzfile  = NULL;
    AjPFile equfile  = NULL;
    AjPFile methfile = NULL;
    
    AjPStr name = NULL;

    AjPTable table = NULL;

    ajint hits;

    AjPList l = NULL;

    embInit("restover", argc, argv);

    seqall    = ajAcdGetSeqall("sequence");
    seqcmp    = ajAcdGetString("seqcomp");
    ajStrFmtUpper(&seqcmp);
    outf      = ajAcdGetOutfile("outfile");

    /*
    ** Some of these are not needed but I left them in case someone wants to
    ** use them some time ...
    */
    enzymes   = ajStrNewC("all");

    min        = ajAcdGetInt("min");
    max        = ajAcdGetInt("max");
    sitelen    = 2;
    threeprime = ajAcdGetBoolean("threeprime");
    blunt      = ajAcdGetBoolean("blunt");
    sticky     = ajAcdGetBoolean("sticky");
    single     = ajAcdGetBoolean("single");
    html       = ajAcdGetBoolean("html");
    alpha      = ajAcdGetBoolean("alphabetic");
    ambiguity  = ajAcdGetBoolean("ambiguity");
    plasmid    = ajAcdGetBoolean("plasmid");
    commercial = ajAcdGetBoolean("commercial");
    limit      = ajAcdGetBoolean("limit");
    frags      = ajAcdGetBoolean("fragments");
    methyl     = ajAcdGetBoolean("methylation");
    dfile      = ajAcdGetDatafile("datafile");
    methfile   = ajAcdGetDatafile("mfile");

    if(single)
	max = min = 1;

    table = ajTablestrNew(EQUGUESS);
    l = ajListNew();

    if(threeprime)
	ajStrReverse(&seqcmp);

    /* read the local file of enzymes names */
    restover_read_file_of_enzyme_names(&enzymes);

    if(!dfile)
    {
	enzfile = ajDatafileNewInNameC(ENZDATA);
	if(!enzfile)
	    ajFatal("Cannot locate enzyme file. Run REBASEEXTRACT");
    }
    else
    {
	enzfile = dfile;
    }



    if(limit)
    {
	equfile = ajDatafileNewInNameC(EQUDATA);
	if(!equfile)
	    limit=ajFalse;
	else
	{
	    restover_read_equiv(equfile,table);
	    ajFileClose(&equfile);
	}
    }



    while(ajSeqallNext(seqall, &seq))
    {
	begin = ajSeqallGetseqBegin(seqall);
	end   = ajSeqallGetseqEnd(seqall);
	ajFileSeek(enzfile,0L,0);
	ajSeqFmtUpper(seq);

	hits = embPatRestrictMatch(seq,begin,end,enzfile,methfile,enzymes,
                                   sitelen,plasmid,ambiguity,min,max,blunt,
                                   sticky,commercial,methyl,l);
	ajDebug("hits:%d listlen:%u\n", hits, ajListGetLength(l));
	if(hits)
	{
	    name = ajStrNewC(ajSeqGetNameC(seq));
	    restover_printHits(seq, seqcmp, outf,l,name,hits,begin,end,
			       min,max,plasmid,
			       sitelen,limit,table,alpha,frags,
			       html);
	    ajStrDel(&name);
	}

	ajListFree(&l);
    }


    ajListFree(&l);
    ajSeqDel(&seq);
    ajFileClose(&outf);
    ajFileClose(&dfile);
    ajFileClose(&enzfile);
    ajFileClose(&equfile);
    ajFileClose(&methfile);

    ajSeqallDel(&seqall);
    ajStrDel(&seqcmp);
    ajStrDel(&enzymes);
    ajStrDel(&name);

    ajTablestrFree(&table);

    embExit();

    return 0;
}