Пример #1
0
int main(int argc, char **argv)
{
    const char *codons[]=
    {
	"TAG","TAA","TGA","GCG","GCA","GCT","GCC","TGT", /* 00-07 */
	"TGC","GAT","GAC","GAA","GAG","TTT","TTC","GGT", /* 08-15 */
	"GGG","GGA","GGC","CAT","CAC","ATA","ATT","ATC", /* 16-23 */
	"AAA","AAG","CTA","TTA","TTG","CTT","CTC","CTG", /* 24-31 */
	"ATG","AAT","AAC","CCG","CCA","CCT","CCC","CAA", /* 32-39 */
	"CAG","CGT","CGA","CGC","AGG","AGA","CGG","TCG", /* 40-47 */
	"TCA","AGT","TCT","TCC","AGC","ACG","ACT","ACA", /* 48-55 */
	"ACC","GTA","GTT","GTC","GTG","TGG","TAT","TAC"	 /* 56-63 */
    };

    const char *aa=
	"***AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYY";

    AjPFile inf     = NULL;
    AjPFile outf    = NULL;
    char *entryname = NULL;
    AjPStr fname    = NULL;
    AjPStr key      = NULL;
    AjPStr tmpkey   = NULL;
    AjBool allrecords = AJFALSE;

    AjPTable table  = NULL;
    ajint i = 0;
    ajint j = 0;
    ajint k = 0;
    ajint x = 0;
    ajint savecount[3];

    AjPStr *keyarray = NULL;
    CutgPValues *valarray = NULL;
    AjPCod codon  = NULL;
    ajint sum = 0;
    char c;

    AjPList flist = NULL;
    AjPFile logf = NULL;
    AjPStr  entry = NULL;
    AjPStr  baseentry = NULL;
    AjPStr  wild  = NULL;
    AjPStr division = NULL;
    AjPStr release = NULL;
    AjPStr wildspecies = NULL;
    CutgPValues value = NULL;
    AjPStr docstr = NULL;
    AjPStr species = NULL;
    AjPStr filename = NULL;
    ajint nstops;

    embInit("cutgextract",argc,argv);

    tmpkey = ajStrNew();
    fname  = ajStrNew();


    table = ajTablestrNewLen(TABLE_ESTIMATE);


    flist = ajAcdGetDirlist("directory");
    wild  = ajAcdGetString("wildspec");
    release  = ajAcdGetString("release");
    logf = ajAcdGetOutfile("outfile");
    wildspecies = ajAcdGetString("species");
    filename = ajAcdGetString("filename");
    allrecords = ajAcdGetBoolean("allrecords");

    ajStrInsertC(&release, 0, "CUTG");
    ajStrRemoveWhite(&release);

    while(ajListPop(flist,(void **)&entry))
    {
	ajStrAssignS(&baseentry, entry);
	ajFilenameTrimPath(&baseentry);
	ajDebug("Testing file '%S'\n", entry);
	if(!ajStrMatchWildS(baseentry,wild))
	{
	    ajStrDel(&entry);
	    continue;
	}

	ajDebug("... matched wildcard '%S'\n", wild);
	inf = ajFileNewInNameS(entry);
	if(!inf)
	    ajFatal("cannot open file %S",entry);

	ajFmtPrintS(&division, "%F", inf);
	ajFilenameTrimAll(&division);

	while((entryname = cutgextract_next(inf, wildspecies,
					    &species, &docstr)))
	{
	    if(ajStrGetLen(filename))
		ajStrAssignS(&tmpkey,filename);
	    else
		ajStrAssignC(&tmpkey,entryname);

	    /* See if organism is already in the table */
	    value = ajTableFetch(table,tmpkey);
	    if(!value)			/* Initialise */
	    {
		key = ajStrNewS(tmpkey);
		AJNEW0(value);
		ajStrAssignS(&value->Species,species);
		ajStrAssignS(&value->Division, division);
		ajTablePut(table,(void *)key,(void *)value);
	    }
	    for(k=0;k<3;k++)
		savecount[k] = value->Count[k];
	    nstops = cutgextract_readcodons(inf,allrecords, value->Count);
	    if(nstops < 1)
	    {
		value->Skip++;
		continue;
	    }
	    value->CdsCount++;
	    if(nstops>1)
	    {
		value->CdsCount += (nstops - 1);
		value->Warn++;
		ajWarn("Found %d stop codons (%d %d %d) for CDS '%S'",
		       nstops,
		       value->Count[0] - savecount[0],
		       value->Count[1] - savecount[1],
		       value->Count[2] - savecount[2],
		       cutgextractSavepid);
	    }
	}
	ajStrDel(&entry);
	ajFileClose(&inf);
    }

    ajTableToarrayKeysValues(table,(void***) &keyarray, (void***) &valarray);

    i = 0;
    while(keyarray[i])
    {
	key   = keyarray[i];
	value = (CutgPValues) valarray[i++];
	codon = ajCodNew();
	sum   = 0;
	for(j=0;j<CODONS;++j)
	{
	    sum += value->Count[j];
	    x = ajCodIndexC(codons[j]);
	    codon->num[x] = value->Count[j];

	    c = aa[j];
	    if(c=='*')
		codon->aa[x] = 27;
	    else
		codon->aa[x] = c-'A';
	}
	ajCodCalcUsage(codon,sum);

	ajStrAppendC(&key, ".cut");
	if(allrecords)
	{
	    if(value->Warn)
		ajFmtPrintF(logf, "Writing %S CDS: %d Warnings: %d\n",
			    key, value->CdsCount, value->Warn);
	    else
		ajFmtPrintF(logf, "Writing %S CDS: %d\n",
			    key, value->CdsCount);
	}
	else
	{
	    if(value->Skip)
		ajFmtPrintF(logf, "Writing %S CDS: %d Skipped: %d\n",
			    key, value->CdsCount, value->Skip);
	    else
		ajFmtPrintF(logf, "Writing %S CDS: %d\n",
			    key, value->CdsCount);
	}

	ajFmtPrintS(&fname,"CODONS/%S",key);
	outf = ajDatafileNewOutNameS(fname);
	if(!outf)
	    ajFatal("Cannot open output file %S",fname);

	ajCodSetNameS(codon, key);
	ajCodSetSpeciesS(codon, value->Species);
	ajCodSetDivisionS(codon, value->Division);
	ajCodSetReleaseS(codon, release);
	ajCodSetNumcds(codon, value->CdsCount);
	ajCodSetNumcodons(codon, sum);

	ajCodWrite(codon, outf);
	ajFileClose(&outf);


	ajStrDel(&key);
	ajStrDel(&value->Division);
	ajStrDel(&value->Doc);
	ajStrDel(&value->Species);
	AJFREE(value);
	ajCodDel(&codon);
    }

    AJFREE(keyarray);
    AJFREE(valarray);

    ajTableFree(&table);
    ajListFree(&flist);
    ajStrDel(&wild);
    ajStrDel(&release);
    ajStrDel(&wildspecies);
    ajStrDel(&filename);
    ajFileClose(&logf);

    ajStrDel(&cutgextractSavepid);
    ajStrDel(&cutgextractLine);
    ajStrDel(&cutgextractOrg);

    ajStrDel(&fname);
    ajStrDel(&tmpkey);
    ajStrDel(&species);
    ajStrDel(&docstr);
    ajStrDel(&division);
    ajStrDel(&baseentry);

    embExit();

    return 0;
}
Пример #2
0
int main(int argc, char **argv)
{
    AjPFile infdat = NULL;
    AjPFile infdoc = NULL;
    AjPFile outf   = NULL;
    AjPFile outs   = NULL;

    AjBool  haspattern;

    const char   *p;


    AjPStr line  = NULL;
    AjPStr text  = NULL;
    AjPStr dirname  = NULL;
    AjPStr filename = NULL;
    AjPStr id    = NULL;
    AjPStr ac    = NULL;
    AjPStr de    = NULL;
    AjPStr pa    = NULL;
    AjPStr ps    = NULL;
    AjPStr fn    = NULL;
    AjPStr re    = NULL;
    AjPStr fname = NULL;
    AjBool flag;
    AjBool isopen;
    AjBool goback;

    ajlong storepos = 0L;


    embInit("prosextract", argc, argv);

    dirname = ajAcdGetDirectoryName("prositedir");

    line = ajStrNew();
    text = ajStrNew();

    id = ajStrNew();
    ac = ajStrNew();
    de = ajStrNew();
    pa = ajStrNew();
    ps = ajStrNew();



    fn=ajStrNew();
    ajStrAssignS(&fn,dirname);
    ajStrAppendC(&fn,"prosite.dat");
    if(!(infdat=ajFileNewInNameS(fn)))
	ajFatal("Cannot open file %S",fn);
    ajStrDel(&fn);



    fn=ajStrNewC("PROSITE/prosite.lines");
    outf = ajDatafileNewOutNameS(fn);
    ajStrDel(&fn);



    haspattern = ajFalse;

    while(ajReadlineTrim(infdat, &line) )
    {
	if(ajStrPrefixC(line, "ID"))
	{
	    if(ajStrSuffixC(line,"PATTERN."))
	    {
		haspattern = ajTrue;
		/*save id*/
		p = ajStrGetPtr(line);
		p = ajSysFuncStrtok(p," \t;");
		p = ajSysFuncStrtok(NULL," \t;");
		ajStrAssignC(&id,p);
		ajFmtPrintF(outf, "%S ", id);
		continue;
	    }
	    else
	    {
		haspattern = ajFalse;
		continue;
	    }
	}

	if(!haspattern)
	    continue;


	if(ajStrPrefixC(line, "AC") )
	{
	    p = ajStrGetPtr(line);
	    p = ajSysFuncStrtok(p, " \t;");
	    p = ajSysFuncStrtok(NULL, " \t;");
	    ajStrAssignC(&ac,p);
	    ajFmtPrintF(outf, "%S\n ", ac);
	    continue;
	}

    	if(ajStrPrefixC(line, "DE") )
	{
	    p = ajStrGetPtr(line);
	    p = ajSysFuncStrtok(p, " \t.");
	    p = ajSysFuncStrtok(NULL, " \t.");
	    ajStrAssignC(&de,p);
	    ajFmtPrintF(outf, "%S\n ", de);
	    continue;
	}


	if(ajStrPrefixC(line, "PA"))
	{
	    ajStrAssignC(&pa,"");

	    while(ajStrPrefixC(line,"PA"))
	    {
		p = ajStrGetPtr(line);
		p = ajSysFuncStrtok(p, " \t.");
		p = ajSysFuncStrtok(NULL, " \t.");
		ajStrAppendC(&pa,p);
		ajReadlineTrim(infdat, &line);
	    }

	    ajFmtPrintF(outf, "%S\n", pa);
	    re = embPatPrositeToRegExp(pa);
	    ajFmtPrintF(outf, "^%S\n\n", re);
	    ajStrDel(&re);
	    continue;
	}
    }


  /* Finished processing prosite.dat so look at prosite.doc */


    fn = ajStrNew();
    ajStrAssignS(&fn,dirname);
    ajStrAppendC(&fn,"prosite.doc");
    if(!(infdoc=ajFileNewInNameS(fn)))
	ajFatal("Cannot open file %S",fn);
    ajStrDel(&fn);



    fname  = ajStrNewC("PROSITE/");
    flag   = ajFalse;
    isopen = ajFalse;
    goback = ajFalse;


    while(ajReadlineTrim(infdoc, &text))
    {
	if(ajStrPrefixC(text, "{PS") && isopen && !goback)
	    goback = ajTrue;

	if(ajStrPrefixC(text, "{PS") && !isopen)
	{
	    storepos = ajFileResetPos(infdoc);
	    /* save out the documentation text to acc numbered outfiles . */
	    p = ajStrGetPtr(text)+1;
	    p = ajSysFuncStrtok(p, ";");
	    ajStrAssignS(&filename, fname);
	    ajStrAppendC(&filename, p);

	    outs = ajDatafileNewOutNameS(filename);
	    flag   = ajTrue;
	    isopen = ajTrue;
	    continue;
	}


	if(ajStrPrefixC(text, "{BEGIN}") && flag)
	{
	    while(ajReadlineTrim(infdoc, &text))
	    {
		if(ajStrPrefixC(text,"{END}"))
		    break;

		ajFmtPrintF(outs, "%S\n", text);
	    }
	    ajFileClose(&outs);
	    isopen = ajFalse;

	    if(goback)
	    {
		goback = ajFalse;
		ajFileSeek(infdoc,storepos,0);
	    }

	}
    }

    ajStrDel(&line);
    ajStrDel(&text);
    ajStrDel(&dirname);
    ajStrDel(&filename);

    ajStrDel(&id);
    ajStrDel(&ac);
    ajStrDel(&de);
    ajStrDel(&pa);
    ajStrDel(&re);
    ajStrDel(&ps);
    ajStrDel(&fname);


    ajFileClose(&infdat);
    ajFileClose(&infdoc);
    ajFileClose(&outf);

    embExit();

    return 0;
}