int main(int argc, char *argv[]) { embInitPV("genret", argc, argv, "GEMBASSY", "1.0.3"); AjPSeqall seqall; AjPSeq seq = NULL; AjPStr inseq = NULL; AjPStr gene = NULL; AjPStr access = NULL; AjBool accid = ajTrue; AjPStr argument = NULL; AjPFile outfile = NULL; AjPStr seqid = NULL; AjPStr restid = NULL; AjBool valid = ajFalse; AjBool isseq = ajFalse; AjBool isgbk = ajFalse; AjPFilebuff buff = NULL; AjPFile tmpfile = NULL; AjPStr tmpname = NULL; AjPStr regexstr = NULL; AjPStrTok token = NULL; AjPRegexp regex = NULL; AjPStr url = NULL; AjPStr base = NULL; AjPStr head = NULL; AjPStr line = NULL; seqall = ajAcdGetSeqall("sequence"); access = ajAcdGetString("access"); gene = ajAcdGetString("gene"); argument = ajAcdGetString("argument"); accid = ajAcdGetBoolean("accid"); outfile = ajAcdGetOutfile("outfile"); if( ajStrMatchC(access, "translation") || ajStrMatchC(access, "get_exon") || ajStrMatchC(access, "get_exons") || ajStrMatchC(access, "get_cdsseq") || ajStrMatchC(access, "get_gbkseq") || ajStrMatchC(access, "get_geneseq") || ajStrMatchC(access, "get_intron") || ajStrMatchC(access, "getseq") || ajStrMatchC(access, "seq") || ajStrMatchC(access, "around_startcodon") || ajStrMatchC(access, "around_stopcodon") || ajStrMatchC(access, "before_startcodon") || ajStrMatchC(access, "before_stopcodon") || ajStrMatchC(access, "after_startcodon") || ajStrMatchC(access, "after_stopcodon") ) { isseq = ajTrue; } else if(ajStrMatchC(access, "annotate") || ajStrMatchC(access, "output")) { isgbk = ajTrue; } else { ajFmtPrintF(outfile, "gene,%S\n", access); } base = ajStrNewC("rest.g-language.org"); ajStrExchangeCC(&argument, " ", "/"); ajStrExchangeCC(&argument, ",", "/"); ajStrExchangeCC(&argument, "\t", "/"); ajStrExchangeCC(&argument, "\r", "/"); ajStrExchangeCC(&argument, "\n", "/"); if(ajStrMatchC(gene, "*")) { ajStrInsertK(&gene, 0, '.'); } if(ajStrPrefixC(gene, "@") || ajStrPrefixC(gene, "list::")) { ajStrExchangeCC(&gene, "@", ""); ajStrExchangeCC(&gene, "list::", ""); ajStrAssignS(&tmpname, gene); tmpfile = ajFileNewInNameS(tmpname); if(!tmpfile) { ajDie("List file (%S) open error\n", tmpname); } gene = ajStrNew(); while(ajReadline(tmpfile, &line)) { ajStrAppendS(&gene, line); } ajFileClose(&tmpfile); ajStrDel(&tmpname); ajStrDel(&line); } tmpname = ajStrNew(); gAssignUniqueName(&tmpname); while(ajSeqallNext(seqall, &seq)) { inseq = ajStrNew(); if(!accid) { if(gFormatGenbank(seq, &inseq)) { tmpfile = ajFileNewOutNameS(tmpname); if(!tmpfile) { ajDie("Output file (%S) open error\n", tmpname); } ajFmtPrintF(tmpfile, "%S", inseq); ajFileClose(&tmpfile); ajFmtPrintS(&url, "http://%S/upload/upl.pl", base); gFilePostSS(url, tmpname, &restid); ajStrDel(&url); ajSysFileUnlinkS(tmpname); } else { ajWarn("Sequence does not have features\n" "Proceeding with sequence accession ID\n"); accid = ajTrue; } } ajStrAssignS(&seqid, ajSeqGetAccS(seq)); if(ajStrGetLen(seqid) == 0) { ajStrAssignS(&seqid, ajSeqGetNameS(seq)); } if(ajStrGetLen(seqid) == 0) { ajWarn("No valid header information\n"); } if(accid) { ajStrAssignS(&restid, seqid); if(ajStrGetLen(seqid) == 0) { ajDie("Cannot proceed without header with -accid\n"); } if(!gValID(seqid)) { ajDie("Invalid accession ID:%S, exiting\n", seqid); } } url = ajStrNew(); if(isgbk) { ajFmtPrintS(&url, "http://%S/%S/%S", base, restid, access); } else { ajFmtPrintS(&url, "http://%S/%S/*/%S/%S", base, restid, access, argument); } if(!gFilebuffURLS(url, &buff)) { ajDie("GET error from %S\n", url); } while(ajBuffreadLine(buff, &line)) { if(isgbk){ ajFmtPrintF(outfile, "%S", line); continue; } ajStrRemoveLastNewline(&line); regex = ajRegCompC("^>"); if(ajRegExec(regex, line)) { head = ajStrNew(); ajStrAssignS(&head, line); ajStrTrimStartC(&head, ">"); valid = ajFalse; token = ajStrTokenNewC(ajStrNewS(gene), " ,\t\r\n"); while(ajStrTokenNextParse(token, ®exstr)) { if(ajStrGetLen(regexstr)) { regex = ajRegComp(regexstr); if(ajRegExec(regex, line)) { valid = ajTrue; if(ajStrIsAlnum(regexstr)) { ajStrExchangeSC(&gene, regexstr, ""); } } ajRegFree(®ex); } } } else { if(valid) { if(isseq) { ajStrFmtWrap(&line, 60); ajFmtPrintF(outfile, ">%S\n%S\n", head, line); } else { ajFmtPrintF(outfile, "%S,%S\n", head, line); } valid = ajFalse; } } } ajFileClose(&outfile); ajStrDel(&restid); ajStrDel(&seqid); ajStrDel(&inseq); } ajSeqallDel(&seqall); ajSeqDel(&seq); ajStrDel(&access); ajStrDel(&gene); embExit(); }
int main(int argc, char** argv) { AjPFile outf = NULL; AjPFile cachef = NULL; AjIList iterator = NULL; AjPList aliases = NULL; AjPList dbas = NULL; AjPList species = NULL; AjPStr alias = NULL; AjPStr dbname = NULL; AjPStr spname = NULL; AjPStr svrname = NULL; AjPStr svrurl = NULL; AjPStr dbcurl = NULL; AjPTime svrtime = NULL; EnsEDatabaseadaptorGroup dbag = ensEDatabaseadaptorGroupNULL; EnsPDatabaseadaptor dba = NULL; EnsPDatabaseconnection dbc = NULL; embInit("cacheensembl", argc, argv); ensInit(); svrname = ajAcdGetString("servername"); outf = ajAcdGetOutfile("outfile"); cachef = ajAcdGetOutfile("cachefile"); dbcurl = ajStrNew(); svrurl = ajStrNew(); dbname = ajStrNew(); ajNamSvrGetUrl(svrname, &svrurl); if(!svrurl) ajFatal("Could not resolve server name '%S'.", svrname); dbc = ensDatabaseconnectionNewUrl(svrurl); ensRegistryLoadDatabaseconnection(dbc); ensDatabaseconnectionDel(&dbc); /* Write the server file header. */ svrtime = ajTimeNewTodayFmt("cachefile"); ajFmtPrintF(cachef, "# %S %D\n", ajFileGetNameS(cachef), svrtime); ajTimeDel(&svrtime); ajFmtPrintF(cachef, "# Automatically generated by cacheensembl " "for server '%S'.\n\n", svrname); /* ** Get all Ensembl Database Adaptor objects and write them as ** EMBOSS Database definitions. */ aliases = ajListstrNew(); dbas = ajListNew(); species = ajListstrNew(); ensRegistryRetrieveAllSpecies(species); while(ajListstrPop(species, &spname)) { ensRegistryGetAllDatabaseadaptors(ensEDatabaseadaptorGroupNULL, spname, dbas); while(ajListPop(dbas, (void**) &dba)) { dbag = ensDatabaseadaptorGetGroup(dba); if(dbag == ensEDatabaseadaptorGroupNULL) { ajDebug("cacheensembl main got unexpected " "Ensembl Database Adaptor Group %d.\n", dbag); continue; } ajStrAssignS(&dbname, ensDatabaseadaptorGetSpecies(dba)); if(dbag != ensEDatabaseadaptorGroupCore) { ajStrAppendC(&dbname, "_"); ajStrAppendC(&dbname, ensDatabaseadaptorGroupToChar(dbag)); } dbc = ensDatabaseadaptorGetDatabaseconnection(dba); ensDatabaseconnectionFetchUrl(dbc, &dbcurl); if(outf) ajFmtPrintF(outf, "%S\n", dbname); ajFmtPrintF(cachef, "DBNAME %S [\n", dbname); ajFmtPrintF(cachef, " release: \"%s\"\n", ensSoftwareGetVersion()); ajFmtPrintF(cachef, " server: \"%S\"\n", svrname); ajFmtPrintF(cachef, " url: \"%S\"\n", dbcurl); ajFmtPrintF(cachef, "]\n"); ajFmtPrintF(cachef, "\n"); if(dbag != ensEDatabaseadaptorGroupCore) continue; ensRegistryAliasFetchAllbySpecies( ensDatabaseadaptorGetSpecies(dba), aliases); /* ** Format all aliases to lower case, ** sort them alphabetically and remove duplicates. */ iterator = ajListIterNew(aliases); while(!ajListIterDone(iterator)) { alias = ajListstrIterGet(iterator); ajStrFmtLower(&alias); } ajListIterDel(&iterator); ajListSortUnique(aliases, cacheensembl_stringcompare, cacheensembl_stringdelete); alias = NULL; if(ajListGetLength(aliases) > 0) { while(ajListstrPop(aliases, &alias)) { /* ** Reject any aliases with other than alpha-numeric ** characters like white space. */ if(ajStrIsAlnum(alias)) ajFmtPrintF(cachef, "ALIAS %S %S\n", alias, ensDatabaseadaptorGetSpecies(dba)); ajStrDel(&alias); } ajFmtPrintF(cachef, "\n"); } /* Ensembl Database Adaptor objects *must not* be deleted. */ } ajStrDel(&spname); } ajListstrFree(&aliases); ajListFree(&dbas); ajStrDel(&dbcurl); ajStrDel(&svrurl); ajStrDel(&dbname); ajStrDel(&svrname); ajFileClose(&outf); ajFileClose(&cachef); embExit(); return EXIT_SUCCESS; }