int main(int argc, char **argv) { AjPSeqset seqset; AjPSeqall seqall; AjPSeq seq; ajint i = 0; AjPStr kimout = NULL; AjPStr dir = NULL; AjPFile obofile = NULL; AjPFile resfile = NULL; AjPDir taxdir = NULL; embInit("ajtest", argc, argv); seqall = ajAcdGetSeqall ("sequence"); seqset = ajAcdGetSeqset ("bsequence"); dir = ajAcdGetOutdirName("outdir"); obofile = ajAcdGetInfile ("obofile"); taxdir = ajAcdGetDirectory ("taxdir"); resfile = ajAcdGetInfile ("dbxreffile"); ajUser("Directory '%S'", dir); ajUser("Set of %d", ajSeqsetGetSize(seqset)); while(ajSeqallNext (seqall, &seq)) { ajUser ("%3d <%S>", i++, ajSeqGetUsaS(seq)); ajFmtPrintS(&kimout, "kim%d.out", i); ajtest_kim (kimout, seq); } ajSeqDel(&seq); ajSeqallDel(&seqall); ajSeqsetDel(&seqset); ajStrDel(&kimout); ajStrDel(&dir); if(taxdir) ajTaxLoad(taxdir); ajDirDel(&taxdir); if(obofile) ajOboParseObofile(obofile, ""); ajFileClose(&obofile); if(resfile) ajResourceParse(resfile, ""); ajFileClose(&resfile); embExit(); return 0; }
int main(int argc, char **argv) { AjPList idlist; AjPList* fieldList = NULL; AjBool systemsort; AjBool cleanup; ajint blastv = 0; char dbtype = '\0'; ajuint maxindex; ajuint maxidlen = 0; ajuint maxlen; AjPStr version = NULL; AjPStr seqtype = NULL; AjPFile elistfile = NULL; AjPFile* alistfile = NULL; AjPStr dbname = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjPStr sortopt = NULL; void **entryIds = NULL; AjBool usesrc = AJTRUE; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr curfilename = NULL; AjPStr idformat = NULL; EmbPEntry entry; PBlastDb db = NULL; ajuint idCount = 0; ajuint idDone; AjPList listTestFiles = NULL; void ** testFiles = NULL; ajuint nfiles; ajuint ifile; ajuint jfile; ajuint filesize; short recsize; ajuint maxfilelen = 20; char date[4] = { 0,0,0,0 }; AjPStr tmpfname = NULL; AjPStr* fields = NULL; AjPFile entFile = NULL; AjPStr* divfiles = NULL; ajint* maxFieldLen = NULL; ajuint ifield = 0; ajuint nfields = 0; AjPFile logfile = NULL; ajuint* countField = NULL; ajuint* fieldTot = NULL; ajuint idCountFile = 0; ajuint i = 0; embInit("dbiblast", argc, argv); idformat = ajStrNewC("NCBI"); fields = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); systemsort = ajAcdGetBoolean("systemsort"); cleanup = ajAcdGetBoolean("cleanup"); sortopt = ajAcdGetString("sortoptions"); maxindex = ajAcdGetInt("maxindex"); version = ajAcdGetListSingle("blastversion"); seqtype = ajAcdGetListSingle("seqtype"); usesrc = ajAcdGetBoolean("sourcefile"); logfile = ajAcdGetOutfile("outfile"); while(fields[nfields]) /* array ends with a NULL */ nfields++; if(nfields) { AJCNEW(maxFieldLen, nfields); AJCNEW0(countField, nfields); AJCNEW0(fieldTot, nfields); for(ifield=0; ifield < nfields; ifield++) maxFieldLen[ifield] = (ajint) maxindex * -1; if(systemsort) AJCNEW(alistfile, nfields); else { AJCNEW(fieldList, nfields); for(ifield=0; ifield < nfields; ifield++) fieldList[ifield] = ajListNew(); } } if(ajStrMatchC(datestr, "00/00/00")) ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex")); ajStrRemoveWhite(&dbname); /* used for temp filenames */ embDbiDateSet(datestr, date); idlist = ajListNew(); if(ajUtilGetBigendian()) readReverse = ajFalse; else readReverse = ajTrue; ajStrToInt(version, &blastv); dbtype = ajStrGetCharFirst(seqtype); ajDebug("reading '%S/%S'\n", directory, filename); ajDebug("writing '%S/'\n", indexdir); listTestFiles = embDbiFileListExc(directory, filename, exclude); ajListSort(listTestFiles, ajStrVcmp); nfiles = ajListToarray(listTestFiles, &testFiles); if(!nfiles) ajDie("No input files in '%S' matched filename '%S'", directory, filename); embDbiLogHeader(logfile, dbname, release, datestr, indexdir, maxindex); embDbiLogFields(logfile, fields, nfields); embDbiLogSource(logfile, directory, filename, exclude, (AjPStr*) testFiles, nfiles); embDbiLogCmdline(logfile); AJCNEW0(divfiles, nfiles); /* ** process each input file, one at a time */ jfile = 0; for(ifile=0; ifile < nfiles; ifile++) { curfilename = (AjPStr) testFiles[ifile]; if(!dbiblast_blastopenlib(curfilename, usesrc, blastv, dbtype, &db)) continue; /* could be the wrong file type with "*.*" */ ajDebug("processing filename '%S' ...\n", curfilename); ajDebug("processing file '%S' ...\n", db->TFile->Name); ajStrAssignS(&divfiles[jfile], db->TFile->Name); ajFilenameTrimPath(&divfiles[jfile]); if(ajStrGetLen(divfiles[jfile]) >= maxfilelen) maxfilelen = ajStrGetLen(divfiles[jfile]) + 1; if(systemsort) /* elistfile for entries, alist for fields */ elistfile = embDbiSortOpen(alistfile, jfile, dbname, fields, nfields); idCountFile = 0; for(i=0;i<nfields;i++) countField[i] = 0; while((entry=dbiblast_nextblastentry(db, jfile, idformat, systemsort, fields, maxFieldLen, &maxidlen, countField, elistfile, alistfile))) { idCountFile++; if(!systemsort) /* save the entry data in lists */ { embDbiMemEntry(idlist, fieldList, nfields, entry, jfile); } } idCount += idCountFile; if(systemsort) { embDbiSortClose(&elistfile, alistfile, nfields); /* lost the entry, so can't free it :-) */ } embDbiLogFile(logfile, curfilename, idCountFile, fields, countField, nfields); dbiblast_dbfree(&db); jfile++; } nfiles = jfile; /* ** write the division.lkp file */ embDbiWriteDivision(indexdir, dbname, release, date, maxfilelen, nfiles, divfiles, NULL); /* ** Write the entryname.idx index */ ajStrAssignC(&tmpfname, "entrynam.idx"); entFile = ajFileNewOutNamePathS(tmpfname, indexdir); recsize = maxidlen+10; filesize = 300 + (idCount*(ajint)recsize); embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date); if(systemsort) idDone = embDbiSortWriteEntry(entFile, maxidlen, dbname, nfiles, cleanup, sortopt); else /* save entries in entryIds array */ { idDone = embDbiMemWriteEntry(entFile, maxidlen, idlist, &entryIds); if(idDone != idCount) ajFatal("Duplicates not allowed for in-memory processing"); } embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone); ajFileClose(&entFile); /* ** Write the fields index files */ for(ifield=0; ifield < nfields; ifield++) { if(maxindex) maxlen = maxindex; else { if(maxFieldLen[ifield] >= 0) maxlen = maxFieldLen[ifield]; else maxlen = - maxFieldLen[ifield]; } if(systemsort) fieldTot[ifield] = embDbiSortWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, nfiles, idCount, cleanup, sortopt); else fieldTot[ifield] = embDbiMemWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, fieldList[ifield], entryIds); } embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot, nfields, nfiles, idDone, idCount); if(systemsort) embDbiRmEntryFile(dbname, cleanup); ajListMap(idlist, embDbiEntryDelMap, NULL); ajListFree(&idlist); AJFREE(entryIds); ajStrDelarray(&fields); for(i=0;i<nfields;i++) { if(systemsort) { ajFileClose(&alistfile[i]); } else { ajListMap(fieldList[i], embDbiFieldDelMap, NULL); ajListFree(&fieldList[i]); } } AJFREE(alistfile); AJFREE(fieldList); ajStrDel(&version); ajStrDel(&seqtype); ajFileClose(&elistfile); for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(countField); AJFREE(fieldTot); ajStrDel(&dbname); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&sortopt); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&idformat); ajStrDel(&tmpfname); AJFREE(maxFieldLen); ajFileClose(&logfile); ajListstrFreeData(&listTestFiles); ajStrDel(&t); ajStrDel(&id); ajStrDel(&acc); ajStrDel(&hline); ajStrDel(&tmpdes); ajStrDel(&tmpfd); ajStrDel(&tmpgi); ajStrDel(&tmpdb); ajStrDel(&tmpac); ajStrDel(&tmpsv); ajRegFree(&wrdexp); embDbiEntryDel(&dbiblastEntry); if(fdl) { for(i=0; i < nfields; i++) ajListFree(&fdl[i]); AJFREE(fdl); } for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(divfiles); AJFREE(testFiles); embExit(); return 0; }
int main(int argc, char **argv) { AjPList idlist; AjPList* fieldList = NULL; AjBool systemsort; AjBool cleanup; ajuint maxindex; ajuint maxidlen = 0; ajuint maxlen; AjPFile elistfile = NULL; AjPFile* alistfile = NULL; AjPStr dbname = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjPStr sortopt = NULL; void **entryIds = NULL; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr curfilename = NULL; AjPFile libr=NULL; AjPStr idformat = NULL; EmbPEntry entry; ajuint idtype = 0; ajuint idCount = 0; ajuint idDone; AjPList listInputFiles = NULL; void ** inputFiles = NULL; ajuint nfiles; ajuint ifile; ajuint filesize; short recsize; ajuint maxfilelen = 20; char date[4] = { 0,0,0,0 }; AjPStr tmpfname = NULL; AjPStr* fields = NULL; AjPFile entFile = NULL; AjPStr* divfiles = NULL; AjPRegexp regIdExp = NULL; ajint* maxFieldLen = NULL; ajuint ifield = 0; ajuint nfields = 0; AjPFile logfile = NULL; ajuint* countField = NULL; ajuint* fieldTot = NULL; ajuint idCountFile = 0; ajuint i; embInit("dbifasta", argc, argv); idformat = ajAcdGetListSingle("idformat"); fields = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); systemsort = ajAcdGetBoolean("systemsort"); cleanup = ajAcdGetBoolean("cleanup"); sortopt = ajAcdGetString("sortoptions"); maxindex = ajAcdGetInt("maxindex"); logfile = ajAcdGetOutfile("outfile"); while(fields[nfields]) /* array ends with a NULL */ nfields++; if(nfields) { AJCNEW(maxFieldLen, nfields); AJCNEW0(countField, nfields); AJCNEW0(fieldTot, nfields); for(ifield=0; ifield < nfields; ifield++) maxFieldLen[ifield] = (ajint)maxindex * -1; if(systemsort) AJCNEW(alistfile, nfields); else { AJCNEW(fieldList, nfields); for(ifield=0; ifield < nfields; ifield++) fieldList[ifield] = ajListNew(); } } if(ajStrMatchC(datestr, "00/00/00")) ajFmtPrintS(&datestr, "%D", ajTimeRefTodayFmt("dbindex")); ajStrRemoveWhite(&dbname); /* used for temp filenames */ embDbiDateSet(datestr, date); idlist = ajListNew(); regIdExp = dbifasta_getExpr(idformat, &idtype); ajDebug("reading '%S/%S'\n", directory, filename); ajDebug("writing '%S/'\n", indexdir); listInputFiles = embDbiFileListExc(directory, filename, exclude); ajListSort(listInputFiles, &ajStrVcmp); nfiles = (ajuint) ajListToarray(listInputFiles, &inputFiles); if(!nfiles) ajDie("No input files in '%S' matched filename '%S'", directory, filename); embDbiLogHeader(logfile, dbname, release, datestr, indexdir, maxindex); embDbiLogFields(logfile, fields, nfields); embDbiLogSource(logfile, directory, filename, exclude, (AjPStr*) inputFiles, nfiles); embDbiLogCmdline(logfile); AJCNEW0(divfiles, nfiles); /* ** process each input file, one at a time */ for(ifile=0; ifile < nfiles; ifile++) { ajStrAssignS(&curfilename,(AjPStr) inputFiles[ifile]); embDbiFlatOpenlib(curfilename, &libr); ajFilenameTrimPath(&curfilename); if(ajStrGetLen(curfilename) >= maxfilelen) maxfilelen = ajStrGetLen(curfilename) + 1; ajDebug("processing filename '%S' ...\n", curfilename); ajDebug("processing file '%F' ...\n", libr); ajStrAssignS(&divfiles[ifile], curfilename); if(systemsort) /* elistfile for entries, alist for fields */ elistfile = embDbiSortOpen(alistfile, ifile, dbname, fields, nfields); idCountFile = 0; for(i=0;i<nfields;i++) countField[i] = 0; while((entry=dbifasta_NextFlatEntry(libr, ifile, regIdExp, idtype, systemsort, fields, maxFieldLen, &maxidlen, countField, elistfile, alistfile))) { idCountFile++; if(!systemsort) /* save the entry data in lists */ embDbiMemEntry(idlist, fieldList, nfields, entry, ifile); entry = NULL; } idCount += idCountFile; if(systemsort) { embDbiSortClose(&elistfile, alistfile, nfields); AJFREE(entry); } else { embDbiEntryDel(&dbifastaGEntry); } embDbiLogFile(logfile, curfilename, idCountFile, fields, countField, nfields); } /* write the division.lkp file */ embDbiWriteDivision(indexdir, dbname, release, date, maxfilelen, nfiles, divfiles, NULL); /* Write the entryname.idx index */ ajStrAssignC(&tmpfname, "entrynam.idx"); entFile = ajFileNewOutNamePathS(tmpfname, indexdir); recsize = maxidlen+10; filesize = 300 + (idCount*(ajint)recsize); embDbiHeader(entFile, filesize, idCount, recsize, dbname, release, date); if(systemsort) idDone = embDbiSortWriteEntry(entFile, maxidlen, dbname, nfiles, cleanup, sortopt); else /* save entries in entryIds array */ { idDone = embDbiMemWriteEntry(entFile, maxidlen, idlist, &entryIds); if(idDone != idCount) ajFatal("Duplicates not allowed for in-memory processing"); } embDbiHeaderSize(entFile, 300+(idDone*(ajint)recsize), idDone); ajFileClose(&entFile); /* Write the fields index files */ for(ifield=0; ifield < nfields; ifield++) { if(maxindex) maxlen = maxindex; else { if(maxFieldLen[ifield] >= 0) maxlen = maxFieldLen[ifield]; else maxlen = - maxFieldLen[ifield]; } if(systemsort) fieldTot[ifield] = embDbiSortWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, nfiles, idCount, cleanup, sortopt); else fieldTot[ifield] = embDbiMemWriteFields(dbname, release, date, indexdir, fields[ifield], maxlen, fieldList[ifield], entryIds); } embDbiLogFinal(logfile,maxindex, maxFieldLen, fields, fieldTot, nfields, nfiles, idDone, idCount); if(systemsort) embDbiRmEntryFile(dbname, cleanup); ajStrDel(&idformat); ajStrDelarray(&fields); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&dbname); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&sortopt); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&tmpfname); ajFileClose(&libr); ajFileClose(&logfile); for(i=0;i<nfields;i++) { if(systemsort) { ajFileClose(&alistfile[i]); } else { ajListMap(fieldList[i], &embDbiFieldDelMap, NULL); ajListFree(&fieldList[i]); } } AJFREE(alistfile); AJFREE(fieldList); AJFREE(maxFieldLen); AJFREE(countField); AJFREE(fieldTot); for(i=0;i<nfiles;i++) { ajStrDel(&divfiles[i]); } AJFREE(divfiles); AJFREE(inputFiles); embDbiEntryDel(&dbifastaGEntry); ajStrDel(&dbifastaGRline); ajStrDel(&dbifastaGTmpId); if(dbifastaGFdl) { for(i=0; i < nfields; i++) ajListFree(&dbifastaGFdl[i]); AJFREE(dbifastaGFdl); } ajListMap(idlist, &embDbiEntryDelMap, NULL); ajListFree(&idlist); ajListstrFreeData(&listInputFiles); AJFREE(entryIds); ajRegFree(&dbifastaGIdexp); ajRegFree(&dbifastaGWrdexp); ajRegFree(®IdExp); ajStrDel(&dbifastaGTmpAc); ajStrDel(&dbifastaGTmpSv); ajStrDel(&dbifastaGTmpGi); ajStrDel(&dbifastaGTmpDb); ajStrDel(&dbifastaGTmpDes); ajStrDel(&dbifastaGTmpFd); ajStrDel(&curfilename); embExit(); return 0; }
int main(int argc, char **argv) { EmbPBtreeEntry entry = NULL; AjPStr dbname = NULL; AjPStr dbrs = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjBool statistics; AjBool compressed; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr dbtype = NULL; AjPFile outf = NULL; AjPStr *fieldarray = NULL; ajint nfields; ajint nfiles; AjPStr tmpstr = NULL; AjPStr thysfile = NULL; ajint i; AjPFile inf = NULL; AjPStr word = NULL; AjPBtId idobj = NULL; AjPBtPri priobj = NULL; AjPBtHybrid hyb = NULL; ajulong nentries = 0L; ajulong ientries = 0L; AjPTime starttime = NULL; AjPTime begintime = NULL; AjPTime nowtime = NULL; ajlong startclock = 0; ajlong beginclock = 0; ajlong nowclock = 0; ajulong idcache=0L, idread = 0L, idwrite = 0L, idsize= 0L; ajulong accache=0L, acread = 0L, acwrite = 0L, acsize= 0L; ajulong svcache=0L, svread = 0L, svwrite = 0L, svsize= 0L; ajulong kwcache=0L, kwread = 0L, kwwrite = 0L, kwsize= 0L; ajulong decache=0L, deread = 0L, dewrite = 0L, desize= 0L; ajulong txcache=0L, txread = 0L, txwrite = 0L, txsize= 0L; double tdiff = 0.0; ajint days = 0; ajint hours = 0; ajint mins = 0; embInit("dbxflat", argc, argv); dbtype = ajAcdGetListSingle("idformat"); fieldarray = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); outf = ajAcdGetOutfile("outfile"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); dbrs = ajAcdGetString("dbresource"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); statistics = ajAcdGetBoolean("statistics"); compressed = ajAcdGetBoolean("compressed"); entry = embBtreeEntryNew(); if(compressed) embBtreeEntrySetCompressed(entry); tmpstr = ajStrNew(); idobj = ajBtreeIdNew(); priobj = ajBtreePriNew(); hyb = ajBtreeHybNew(); nfields = embBtreeSetFields(entry,fieldarray); embBtreeSetDbInfo(entry,dbname,dbrs,datestr,release,dbtype,directory, indexdir); for(i=0; i< nfields; i++) { if(ajStrMatchC(fieldarray[i], "acc")) { accfield = embBtreeGetFieldS(entry, fieldarray[i]); if(compressed) embBtreeFieldSetCompressed(accfield); } else if(ajStrMatchC(fieldarray[i], "sv")) { svfield = embBtreeGetFieldS(entry, fieldarray[i]); if(compressed) embBtreeFieldSetCompressed(svfield); } else if(ajStrMatchC(fieldarray[i], "des")) { desfield = embBtreeGetFieldS(entry, fieldarray[i]); if(compressed) embBtreeFieldSetCompressed(desfield); } else if(ajStrMatchC(fieldarray[i], "key")) { keyfield = embBtreeGetFieldS(entry, fieldarray[i]); if(compressed) embBtreeFieldSetCompressed(keyfield); } else if(ajStrMatchC(fieldarray[i], "org")) { orgfield = embBtreeGetFieldS(entry, fieldarray[i]); if(compressed) embBtreeFieldSetCompressed(orgfield); } else if(!ajStrMatchC(fieldarray[i], "id")) ajErr("Unknown field '%S' specified for indexing", fieldarray[i]); } embBtreeGetRsInfo(entry); nfiles = embBtreeGetFiles(entry,directory,filename,exclude); if(!nfiles) ajDie("No input files in '%S' matched filename '%S'", directory, filename); embBtreeWriteEntryFile(entry); embBtreeOpenCaches(entry); starttime = ajTimeNewToday(); ajFmtPrintF(outf, "Processing directory: %S\n", directory); for(i=0;i<nfiles;++i) { begintime = ajTimeNewToday(); beginclock = ajClockNow(); ajListPop(entry->files,(void **)&thysfile); ajListPushAppend(entry->files,(void *)thysfile); ajFmtPrintS(&tmpstr,"%S%S",entry->directory,thysfile); if(!(inf=ajFileNewInNameS(tmpstr))) ajFatal("Cannot open input file %S\n",tmpstr); ajFilenameTrimPath(&tmpstr); ajFmtPrintF(outf,"Processing file: %S",tmpstr); ientries = 0L; while(dbxflat_NextEntry(entry,inf)) { ++ientries; if(entry->do_id) { if(ajStrGetLen(entry->id) > entry->idlen) { if(ajStrGetLen(entry->id) > maxidlen) { ajWarn("id '%S' too long, truncating to idlen %d", entry->id, entry->idlen); maxidlen = ajStrGetLen(entry->id); } idtrunc++; ajStrKeepRange(&entry->id,0,entry->idlen-1); } ajStrFmtLower(&entry->id); ajStrAssignS(&hyb->key1,entry->id); hyb->dbno = i; hyb->offset = entry->fpos; hyb->dups = 0; ajBtreeHybInsertId(entry->idcache,hyb); ++idtot; } if(accfield) { while(ajListPop(accfield->data,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&hyb->key1,word); hyb->dbno = i; hyb->offset = entry->fpos; hyb->dups = 0; ajBtreeHybInsertId(accfield->cache,hyb); ++acctot; ajStrDel(&word); } } if(svfield) { while(ajListPop(svfield->data,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&hyb->key1,word); hyb->dbno = i; hyb->offset = entry->fpos; hyb->dups = 0; ajBtreeHybInsertId(svfield->cache,hyb); ++svtot; ajStrDel(&word); } } if(keyfield) { while(ajListPop(keyfield->data,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&priobj->id,entry->id); ajStrAssignS(&priobj->keyword,word); priobj->treeblock = 0; ajBtreeInsertKeyword(keyfield->cache, priobj); ++keytot; ajStrDel(&word); } } if(desfield) { while(ajListPop(desfield->data,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&priobj->id,entry->id); ajStrAssignS(&priobj->keyword,word); priobj->treeblock = 0; if(ajBtreeInsertKeyword(desfield->cache, priobj)) ++destot; ajStrDel(&word); } } if(orgfield) { while(ajListPop(orgfield->data,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&priobj->id,entry->id); ajStrAssignS(&priobj->keyword,word); priobj->treeblock = 0; ajBtreeInsertKeyword(orgfield->cache, priobj); ++orgtot; ajStrDel(&word); } } } ajFileClose(&inf); nentries += ientries; nowtime = ajTimeNewToday(); nowclock = ajClockNow(); ajFmtPrintF(outf, " entries: %Lu (%Lu) time: %.1f/%.1fs (%.1f/%.1fs)\n", nentries, ientries, ajClockDiff(startclock,nowclock), ajTimeDiff(starttime, nowtime), ajClockDiff(beginclock,nowclock), ajTimeDiff(begintime, nowtime)); if(statistics) { if(entry->do_id) ajBtreeCacheStatsOut(outf, entry->idcache, &idcache, &idread, &idwrite, &idsize); if(accfield) ajBtreeCacheStatsOut(outf, accfield->cache, &accache, &acread, &acwrite, &acsize); if(svfield) ajBtreeCacheStatsOut(outf, svfield->cache, &svcache, &svread, &svwrite, &svsize); if(keyfield) ajBtreeCacheStatsOut(outf, keyfield->cache, &kwcache, &kwread, &kwwrite, &kwsize); if(desfield) ajBtreeCacheStatsOut(outf, desfield->cache, &decache, &deread, &dewrite, &desize); if(orgfield) ajBtreeCacheStatsOut(outf, orgfield->cache, &txcache, &txread, &txwrite, &txsize); } ajTimeDel(&begintime); ajTimeDel(&nowtime); } embBtreeDumpParameters(entry); embBtreeCloseCaches(entry); nowtime = ajTimeNewToday(); tdiff = ajTimeDiff(starttime, nowtime); days = (ajint) (tdiff/(24.0*3600.0)); tdiff -= (24.0*3600.0)*(double)days; hours = (ajint) (tdiff/3600.0); tdiff -= 3600.0*(double)hours; mins = (ajint) (tdiff/60.0); tdiff -= 60.0 * (double) mins; if(days) ajFmtPrintF(outf, "Total time: %d %02d:%02d:%04.1f\n", days, hours, mins, tdiff); else if (hours) ajFmtPrintF(outf, "Total time: %d:%02d:%04.1f\n", hours, mins, tdiff); else ajFmtPrintF(outf, "Total time: %d:%04.1f\n", mins, tdiff); ajTimeDel(&nowtime); ajTimeDel(&starttime); if(maxidlen) { ajFmtPrintF(outf, "Resource idlen truncated %u IDs. " "Maximum ID length was %u.", idtrunc, maxidlen); ajWarn("Resource idlen truncated %u IDs. Maximum ID length was %u.", idtrunc, maxidlen); } ajFileClose(&outf); embBtreeEntryDel(&entry); ajStrDel(&tmpstr); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&dbname); ajStrDel(&dbrs); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&dbtype); nfields = 0; while(fieldarray[nfields]) ajStrDel(&fieldarray[nfields++]); AJFREE(fieldarray); ajBtreeIdDel(&idobj); ajBtreePriDel(&priobj); ajBtreeHybDel(&hyb); ajRegFree(&dbxflat_wrdexp); embExit(); return 0; }
int main(int argc, char **argv) { EmbPBtreeEntry entry = NULL; AjPStr dbname = NULL; AjPStr dbrs = NULL; AjPStr release = NULL; AjPStr datestr = NULL; AjPStr directory; AjPStr indexdir; AjPStr filename; AjPStr exclude; AjPStr dbtype = NULL; AjPFile outf = NULL; AjPStr *fieldarray = NULL; ajint nfields; ajint nfiles; AjPStr tmpstr = NULL; AjPStr thysfile = NULL; ajint i; AjPFile inf = NULL; AjPStr word = NULL; AjPBtId idobj = NULL; AjPBtPri priobj = NULL; AjPBtHybrid hyb = NULL; ajulong nentries = 0L; ajulong ientries = 0L; AjPTime starttime = NULL; AjPTime begintime = NULL; AjPTime nowtime = NULL; ajlong startclock = 0; ajlong beginclock = 0; ajlong nowclock = 0; embInit("dbxflat", argc, argv); dbtype = ajAcdGetListSingle("idformat"); fieldarray = ajAcdGetList("fields"); directory = ajAcdGetDirectoryName("directory"); outf = ajAcdGetOutfile("outfile"); indexdir = ajAcdGetOutdirName("indexoutdir"); filename = ajAcdGetString("filenames"); exclude = ajAcdGetString("exclude"); dbname = ajAcdGetString("dbname"); dbrs = ajAcdGetString("dbresource"); release = ajAcdGetString("release"); datestr = ajAcdGetString("date"); entry = embBtreeEntryNew(); tmpstr = ajStrNew(); idobj = ajBtreeIdNew(); priobj = ajBtreePriNew(); hyb = ajBtreeHybNew(); nfields = embBtreeSetFields(entry,fieldarray); embBtreeSetDbInfo(entry,dbname,dbrs,datestr,release,dbtype,directory, indexdir); embBtreeGetRsInfo(entry); nfiles = embBtreeGetFiles(entry,directory,filename,exclude); embBtreeWriteEntryFile(entry); embBtreeOpenCaches(entry); starttime = ajTimeNewToday(); ajFmtPrintF(outf, "Processing directory: %S\n", directory); for(i=0;i<nfiles;++i) { begintime = ajTimeNewToday(); beginclock = ajClockNow(); ajListPop(entry->files,(void **)&thysfile); ajListPushAppend(entry->files,(void *)thysfile); ajFmtPrintS(&tmpstr,"%S%S",entry->directory,thysfile); if(!(inf=ajFileNewInNameS(tmpstr))) ajFatal("Cannot open input file %S\n",tmpstr); ajFilenameTrimPath(&tmpstr); ajFmtPrintF(outf,"Processing file: %S",tmpstr); ientries = 0L; while(dbxflat_NextEntry(entry,inf)) { ++ientries; if(entry->do_id) { if(ajStrGetLen(entry->id) > entry->idlen) { if(ajStrGetLen(entry->id) > maxidlen) { ajWarn("id '%S' too long, truncating to idlen %d", entry->id, entry->idlen); maxidlen = ajStrGetLen(entry->id); } idtrunc++; ajStrKeepRange(&entry->id,0,entry->idlen-1); } ajStrFmtLower(&entry->id); ajStrAssignS(&hyb->key1,entry->id); hyb->dbno = i; hyb->offset = entry->fpos; hyb->dups = 0; ajBtreeHybInsertId(entry->idcache,hyb); } if(entry->do_accession) { while(ajListPop(entry->ac,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&hyb->key1,word); hyb->dbno = i; hyb->offset = entry->fpos; hyb->dups = 0; ajBtreeHybInsertId(entry->accache,hyb); ajStrDel(&word); } } if(entry->do_sv) { while(ajListPop(entry->sv,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&hyb->key1,word); hyb->dbno = i; hyb->offset = entry->fpos; hyb->dups = 0; ajBtreeHybInsertId(entry->svcache,hyb); ajStrDel(&word); } } if(entry->do_keyword) { while(ajListPop(entry->kw,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&priobj->id,entry->id); ajStrAssignS(&priobj->keyword,word); priobj->treeblock = 0; ajBtreeInsertKeyword(entry->kwcache, priobj); ajStrDel(&word); } } if(entry->do_description) { while(ajListPop(entry->de,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&priobj->id,entry->id); ajStrAssignS(&priobj->keyword,word); priobj->treeblock = 0; ajBtreeInsertKeyword(entry->decache, priobj); ajStrDel(&word); } } if(entry->do_taxonomy) { while(ajListPop(entry->tx,(void **)&word)) { ajStrFmtLower(&word); ajStrAssignS(&priobj->id,entry->id); ajStrAssignS(&priobj->keyword,word); priobj->treeblock = 0; ajBtreeInsertKeyword(entry->txcache, priobj); ajStrDel(&word); } } } ajFileClose(&inf); nentries += ientries; nowtime = ajTimeNewToday(); nowclock = ajClockNow(); ajFmtPrintF(outf, " entries: %Lu (%Lu) time: %.1f/%.1fs (%.1f/%.1fs)\n", nentries, ientries, ajClockDiff(startclock,nowclock), ajTimeDiff(starttime, nowtime), ajClockDiff(beginclock,nowclock), ajTimeDiff(begintime, nowtime)); ajTimeDel(&begintime); ajTimeDel(&nowtime); } embBtreeDumpParameters(entry); embBtreeCloseCaches(entry); nowtime = ajTimeNewToday(); ajFmtPrintF(outf, "Total time: %.1fs\n", ajTimeDiff(starttime, nowtime)); ajTimeDel(&nowtime); ajTimeDel(&starttime); if(maxidlen) { ajFmtPrintF(outf, "Resource idlen truncated %u IDs. " "Maximum ID length was %u.", idtrunc, maxidlen); ajWarn("Resource idlen truncated %u IDs. Maximum ID length was %u.", idtrunc, maxidlen); } ajFileClose(&outf); embBtreeEntryDel(&entry); ajStrDel(&tmpstr); ajStrDel(&filename); ajStrDel(&exclude); ajStrDel(&dbname); ajStrDel(&dbrs); ajStrDel(&release); ajStrDel(&datestr); ajStrDel(&directory); ajStrDel(&indexdir); ajStrDel(&dbtype); nfields = 0; while(fieldarray[nfields]) ajStrDel(&fieldarray[nfields++]); AJFREE(fieldarray); ajBtreeIdDel(&idobj); ajBtreePriDel(&priobj); ajBtreeHybDel(&hyb); ajRegFree(&dbxflat_wrdexp); embExit(); return 0; }