int main(int argc, char **argv) { const char *codons[]= { "TAG","TAA","TGA","GCG","GCA","GCT","GCC","TGT", /* 00-07 */ "TGC","GAT","GAC","GAA","GAG","TTT","TTC","GGT", /* 08-15 */ "GGG","GGA","GGC","CAT","CAC","ATA","ATT","ATC", /* 16-23 */ "AAA","AAG","CTA","TTA","TTG","CTT","CTC","CTG", /* 24-31 */ "ATG","AAT","AAC","CCG","CCA","CCT","CCC","CAA", /* 32-39 */ "CAG","CGT","CGA","CGC","AGG","AGA","CGG","TCG", /* 40-47 */ "TCA","AGT","TCT","TCC","AGC","ACG","ACT","ACA", /* 48-55 */ "ACC","GTA","GTT","GTC","GTG","TGG","TAT","TAC" /* 56-63 */ }; const char *aa= "***AAAACCDDEEFFGGGGHHIIIKKLLLLLLMNNPPPPQQRRRRRRSSSSSSTTTTVVVVWYY"; AjPFile inf = NULL; AjPFile outf = NULL; char *entryname = NULL; AjPStr fname = NULL; AjPStr key = NULL; AjPStr tmpkey = NULL; AjBool allrecords = AJFALSE; AjPTable table = NULL; ajint i = 0; ajint j = 0; ajint k = 0; ajint x = 0; ajint savecount[3]; AjPStr *keyarray = NULL; CutgPValues *valarray = NULL; AjPCod codon = NULL; ajint sum = 0; char c; AjPList flist = NULL; AjPFile logf = NULL; AjPStr entry = NULL; AjPStr baseentry = NULL; AjPStr wild = NULL; AjPStr division = NULL; AjPStr release = NULL; AjPStr wildspecies = NULL; CutgPValues value = NULL; AjPStr docstr = NULL; AjPStr species = NULL; AjPStr filename = NULL; ajint nstops; embInit("cutgextract",argc,argv); tmpkey = ajStrNew(); fname = ajStrNew(); table = ajTablestrNewLen(TABLE_ESTIMATE); flist = ajAcdGetDirlist("directory"); wild = ajAcdGetString("wildspec"); release = ajAcdGetString("release"); logf = ajAcdGetOutfile("outfile"); wildspecies = ajAcdGetString("species"); filename = ajAcdGetString("filename"); allrecords = ajAcdGetBoolean("allrecords"); ajStrInsertC(&release, 0, "CUTG"); ajStrRemoveWhite(&release); while(ajListPop(flist,(void **)&entry)) { ajStrAssignS(&baseentry, entry); ajFilenameTrimPath(&baseentry); ajDebug("Testing file '%S'\n", entry); if(!ajStrMatchWildS(baseentry,wild)) { ajStrDel(&entry); continue; } ajDebug("... matched wildcard '%S'\n", wild); inf = ajFileNewInNameS(entry); if(!inf) ajFatal("cannot open file %S",entry); ajFmtPrintS(&division, "%F", inf); ajFilenameTrimAll(&division); while((entryname = cutgextract_next(inf, wildspecies, &species, &docstr))) { if(ajStrGetLen(filename)) ajStrAssignS(&tmpkey,filename); else ajStrAssignC(&tmpkey,entryname); /* See if organism is already in the table */ value = ajTableFetch(table,tmpkey); if(!value) /* Initialise */ { key = ajStrNewS(tmpkey); AJNEW0(value); ajStrAssignS(&value->Species,species); ajStrAssignS(&value->Division, division); ajTablePut(table,(void *)key,(void *)value); } for(k=0;k<3;k++) savecount[k] = value->Count[k]; nstops = cutgextract_readcodons(inf,allrecords, value->Count); if(nstops < 1) { value->Skip++; continue; } value->CdsCount++; if(nstops>1) { value->CdsCount += (nstops - 1); value->Warn++; ajWarn("Found %d stop codons (%d %d %d) for CDS '%S'", nstops, value->Count[0] - savecount[0], value->Count[1] - savecount[1], value->Count[2] - savecount[2], cutgextractSavepid); } } ajStrDel(&entry); ajFileClose(&inf); } ajTableToarrayKeysValues(table,(void***) &keyarray, (void***) &valarray); i = 0; while(keyarray[i]) { key = keyarray[i]; value = (CutgPValues) valarray[i++]; codon = ajCodNew(); sum = 0; for(j=0;j<CODONS;++j) { sum += value->Count[j]; x = ajCodIndexC(codons[j]); codon->num[x] = value->Count[j]; c = aa[j]; if(c=='*') codon->aa[x] = 27; else codon->aa[x] = c-'A'; } ajCodCalcUsage(codon,sum); ajStrAppendC(&key, ".cut"); if(allrecords) { if(value->Warn) ajFmtPrintF(logf, "Writing %S CDS: %d Warnings: %d\n", key, value->CdsCount, value->Warn); else ajFmtPrintF(logf, "Writing %S CDS: %d\n", key, value->CdsCount); } else { if(value->Skip) ajFmtPrintF(logf, "Writing %S CDS: %d Skipped: %d\n", key, value->CdsCount, value->Skip); else ajFmtPrintF(logf, "Writing %S CDS: %d\n", key, value->CdsCount); } ajFmtPrintS(&fname,"CODONS/%S",key); outf = ajDatafileNewOutNameS(fname); if(!outf) ajFatal("Cannot open output file %S",fname); ajCodSetNameS(codon, key); ajCodSetSpeciesS(codon, value->Species); ajCodSetDivisionS(codon, value->Division); ajCodSetReleaseS(codon, release); ajCodSetNumcds(codon, value->CdsCount); ajCodSetNumcodons(codon, sum); ajCodWrite(codon, outf); ajFileClose(&outf); ajStrDel(&key); ajStrDel(&value->Division); ajStrDel(&value->Doc); ajStrDel(&value->Species); AJFREE(value); ajCodDel(&codon); } AJFREE(keyarray); AJFREE(valarray); ajTableFree(&table); ajListFree(&flist); ajStrDel(&wild); ajStrDel(&release); ajStrDel(&wildspecies); ajStrDel(&filename); ajFileClose(&logf); ajStrDel(&cutgextractSavepid); ajStrDel(&cutgextractLine); ajStrDel(&cutgextractOrg); ajStrDel(&fname); ajStrDel(&tmpkey); ajStrDel(&species); ajStrDel(&docstr); ajStrDel(&division); ajStrDel(&baseentry); embExit(); return 0; }
int main(int argc, char **argv) { AjPFile infdat = NULL; AjPFile infdoc = NULL; AjPFile outf = NULL; AjPFile outs = NULL; AjBool haspattern; const char *p; AjPStr line = NULL; AjPStr text = NULL; AjPStr dirname = NULL; AjPStr filename = NULL; AjPStr id = NULL; AjPStr ac = NULL; AjPStr de = NULL; AjPStr pa = NULL; AjPStr ps = NULL; AjPStr fn = NULL; AjPStr re = NULL; AjPStr fname = NULL; AjBool flag; AjBool isopen; AjBool goback; ajlong storepos = 0L; embInit("prosextract", argc, argv); dirname = ajAcdGetDirectoryName("prositedir"); line = ajStrNew(); text = ajStrNew(); id = ajStrNew(); ac = ajStrNew(); de = ajStrNew(); pa = ajStrNew(); ps = ajStrNew(); fn=ajStrNew(); ajStrAssignS(&fn,dirname); ajStrAppendC(&fn,"prosite.dat"); if(!(infdat=ajFileNewInNameS(fn))) ajFatal("Cannot open file %S",fn); ajStrDel(&fn); fn=ajStrNewC("PROSITE/prosite.lines"); outf = ajDatafileNewOutNameS(fn); ajStrDel(&fn); haspattern = ajFalse; while(ajReadlineTrim(infdat, &line) ) { if(ajStrPrefixC(line, "ID")) { if(ajStrSuffixC(line,"PATTERN.")) { haspattern = ajTrue; /*save id*/ p = ajStrGetPtr(line); p = ajSysFuncStrtok(p," \t;"); p = ajSysFuncStrtok(NULL," \t;"); ajStrAssignC(&id,p); ajFmtPrintF(outf, "%S ", id); continue; } else { haspattern = ajFalse; continue; } } if(!haspattern) continue; if(ajStrPrefixC(line, "AC") ) { p = ajStrGetPtr(line); p = ajSysFuncStrtok(p, " \t;"); p = ajSysFuncStrtok(NULL, " \t;"); ajStrAssignC(&ac,p); ajFmtPrintF(outf, "%S\n ", ac); continue; } if(ajStrPrefixC(line, "DE") ) { p = ajStrGetPtr(line); p = ajSysFuncStrtok(p, " \t."); p = ajSysFuncStrtok(NULL, " \t."); ajStrAssignC(&de,p); ajFmtPrintF(outf, "%S\n ", de); continue; } if(ajStrPrefixC(line, "PA")) { ajStrAssignC(&pa,""); while(ajStrPrefixC(line,"PA")) { p = ajStrGetPtr(line); p = ajSysFuncStrtok(p, " \t."); p = ajSysFuncStrtok(NULL, " \t."); ajStrAppendC(&pa,p); ajReadlineTrim(infdat, &line); } ajFmtPrintF(outf, "%S\n", pa); re = embPatPrositeToRegExp(pa); ajFmtPrintF(outf, "^%S\n\n", re); ajStrDel(&re); continue; } } /* Finished processing prosite.dat so look at prosite.doc */ fn = ajStrNew(); ajStrAssignS(&fn,dirname); ajStrAppendC(&fn,"prosite.doc"); if(!(infdoc=ajFileNewInNameS(fn))) ajFatal("Cannot open file %S",fn); ajStrDel(&fn); fname = ajStrNewC("PROSITE/"); flag = ajFalse; isopen = ajFalse; goback = ajFalse; while(ajReadlineTrim(infdoc, &text)) { if(ajStrPrefixC(text, "{PS") && isopen && !goback) goback = ajTrue; if(ajStrPrefixC(text, "{PS") && !isopen) { storepos = ajFileResetPos(infdoc); /* save out the documentation text to acc numbered outfiles . */ p = ajStrGetPtr(text)+1; p = ajSysFuncStrtok(p, ";"); ajStrAssignS(&filename, fname); ajStrAppendC(&filename, p); outs = ajDatafileNewOutNameS(filename); flag = ajTrue; isopen = ajTrue; continue; } if(ajStrPrefixC(text, "{BEGIN}") && flag) { while(ajReadlineTrim(infdoc, &text)) { if(ajStrPrefixC(text,"{END}")) break; ajFmtPrintF(outs, "%S\n", text); } ajFileClose(&outs); isopen = ajFalse; if(goback) { goback = ajFalse; ajFileSeek(infdoc,storepos,0); } } } ajStrDel(&line); ajStrDel(&text); ajStrDel(&dirname); ajStrDel(&filename); ajStrDel(&id); ajStrDel(&ac); ajStrDel(&de); ajStrDel(&pa); ajStrDel(&re); ajStrDel(&ps); ajStrDel(&fname); ajFileClose(&infdat); ajFileClose(&infdoc); ajFileClose(&outf); embExit(); return 0; }