Ejemplo n.º 1
0
int main(int argc, char **argv)
{
    EmbPBtreeEntry entry = NULL;
    
    AjPStr dbname   = NULL;
    AjPStr dbrs     = NULL;
    AjPStr release  = NULL;
    AjPStr datestr  = NULL;
    AjBool statistics;
    AjBool compressed;

    AjPStr directory;
    AjPStr indexdir;
    AjPStr filename;
    AjPStr exclude;
    AjPStr dbtype = NULL;
    AjPFile outf = NULL;

    AjPStr *fieldarray = NULL;
    
    ajint nfields;
    ajint nfiles;

    AjPStr tmpstr = NULL;
    AjPStr thysfile = NULL;
    
    ajint i;
    AjPFile inf = NULL;

    AjPStr word = NULL;
    
    AjPBtId  idobj  = NULL;
    AjPBtPri priobj = NULL;
    AjPBtHybrid hyb = NULL;
    
    ajulong nentries = 0L;
    ajulong ientries = 0L;
    AjPTime starttime = NULL;
    AjPTime begintime = NULL;
    AjPTime nowtime = NULL;
    ajlong startclock = 0;
    ajlong beginclock = 0;
    ajlong nowclock = 0;
    
    ajulong idcache=0L, idread = 0L, idwrite = 0L, idsize= 0L;
    ajulong accache=0L, acread = 0L, acwrite = 0L, acsize= 0L;
    ajulong svcache=0L, svread = 0L, svwrite = 0L, svsize= 0L;
    ajulong kwcache=0L, kwread = 0L, kwwrite = 0L, kwsize= 0L;
    ajulong decache=0L, deread = 0L, dewrite = 0L, desize= 0L;
    ajulong txcache=0L, txread = 0L, txwrite = 0L, txsize= 0L;

    double tdiff = 0.0;
    ajint days = 0;
    ajint hours = 0;
    ajint mins = 0;
    
    embInit("dbxflat", argc, argv);

    dbtype     = ajAcdGetListSingle("idformat");
    fieldarray = ajAcdGetList("fields");
    directory  = ajAcdGetDirectoryName("directory");
    outf       = ajAcdGetOutfile("outfile");
    indexdir   = ajAcdGetOutdirName("indexoutdir");
    filename   = ajAcdGetString("filenames");
    exclude    = ajAcdGetString("exclude");
    dbname     = ajAcdGetString("dbname");
    dbrs       = ajAcdGetString("dbresource");
    release    = ajAcdGetString("release");
    datestr    = ajAcdGetString("date");
    statistics = ajAcdGetBoolean("statistics");
    compressed = ajAcdGetBoolean("compressed");

    entry = embBtreeEntryNew();
    if(compressed)
        embBtreeEntrySetCompressed(entry);

    tmpstr = ajStrNew();
    
    idobj   = ajBtreeIdNew();
    priobj  = ajBtreePriNew();
    hyb     = ajBtreeHybNew();
    

    nfields = embBtreeSetFields(entry,fieldarray);
    embBtreeSetDbInfo(entry,dbname,dbrs,datestr,release,dbtype,directory,
		      indexdir);

    for(i=0; i< nfields; i++)
    {
        if(ajStrMatchC(fieldarray[i], "acc"))
        {
            accfield = embBtreeGetFieldS(entry, fieldarray[i]);
            if(compressed)
                embBtreeFieldSetCompressed(accfield);
        }
        else if(ajStrMatchC(fieldarray[i], "sv"))
        {
            svfield = embBtreeGetFieldS(entry, fieldarray[i]);
            if(compressed)
                embBtreeFieldSetCompressed(svfield);
        }
        else if(ajStrMatchC(fieldarray[i], "des"))
        {
            desfield = embBtreeGetFieldS(entry, fieldarray[i]);
            if(compressed)
                embBtreeFieldSetCompressed(desfield);
        }
        else if(ajStrMatchC(fieldarray[i], "key"))
        {
            keyfield = embBtreeGetFieldS(entry, fieldarray[i]);
            if(compressed)
                embBtreeFieldSetCompressed(keyfield);
        }
        else if(ajStrMatchC(fieldarray[i], "org"))
        {
            orgfield = embBtreeGetFieldS(entry, fieldarray[i]);
            if(compressed)
                embBtreeFieldSetCompressed(orgfield);
        }
        else if(!ajStrMatchC(fieldarray[i], "id"))
            ajErr("Unknown field '%S' specified for indexing", fieldarray[i]);
    }

    embBtreeGetRsInfo(entry);

    nfiles = embBtreeGetFiles(entry,directory,filename,exclude);
    if(!nfiles)
        ajDie("No input files in '%S' matched filename '%S'",
              directory, filename);

    embBtreeWriteEntryFile(entry);

    embBtreeOpenCaches(entry);

    starttime = ajTimeNewToday();

    ajFmtPrintF(outf, "Processing directory: %S\n", directory);

    for(i=0;i<nfiles;++i)
    {
        begintime = ajTimeNewToday();
        beginclock = ajClockNow();

	ajListPop(entry->files,(void **)&thysfile);
	ajListPushAppend(entry->files,(void *)thysfile);
	ajFmtPrintS(&tmpstr,"%S%S",entry->directory,thysfile);
	if(!(inf=ajFileNewInNameS(tmpstr)))
	    ajFatal("Cannot open input file %S\n",tmpstr);
	ajFilenameTrimPath(&tmpstr);
	ajFmtPrintF(outf,"Processing file: %S",tmpstr);

	ientries = 0L;

	while(dbxflat_NextEntry(entry,inf))
	{
	    ++ientries;

	    if(entry->do_id)
	    {
                if(ajStrGetLen(entry->id) > entry->idlen)
                {
                    if(ajStrGetLen(entry->id) > maxidlen)
                    {
                        ajWarn("id '%S' too long, truncating to idlen %d",
                               entry->id, entry->idlen);
                        maxidlen = ajStrGetLen(entry->id);
                    }
                    idtrunc++;
                    ajStrKeepRange(&entry->id,0,entry->idlen-1);
                }
    
		ajStrFmtLower(&entry->id);
		ajStrAssignS(&hyb->key1,entry->id);
		hyb->dbno = i;
		hyb->offset = entry->fpos;
		hyb->dups = 0;
		ajBtreeHybInsertId(entry->idcache,hyb);
                ++idtot;
	    }

	    if(accfield)
	    {
                while(ajListPop(accfield->data,(void **)&word))
                {
		    ajStrFmtLower(&word);
                    ajStrAssignS(&hyb->key1,word);
                    hyb->dbno = i;
		    hyb->offset = entry->fpos;
		    hyb->dups = 0;
		    ajBtreeHybInsertId(accfield->cache,hyb);
                    ++acctot;
		    ajStrDel(&word);
                }
	    }

	    if(svfield)
	    {
                while(ajListPop(svfield->data,(void **)&word))
                {
		    ajStrFmtLower(&word);
                    ajStrAssignS(&hyb->key1,word);
                    hyb->dbno = i;
		    hyb->offset = entry->fpos;
		    hyb->dups = 0;
		    ajBtreeHybInsertId(svfield->cache,hyb);
                    ++svtot;
		    ajStrDel(&word);
                }
	    }

	    if(keyfield)
	    {
                while(ajListPop(keyfield->data,(void **)&word))
                {
		    ajStrFmtLower(&word);
		    ajStrAssignS(&priobj->id,entry->id);
                    ajStrAssignS(&priobj->keyword,word);
                    priobj->treeblock = 0;
                    ajBtreeInsertKeyword(keyfield->cache, priobj);
                    ++keytot;
		    ajStrDel(&word);
                }
	    }

	    if(desfield)
	    {
                while(ajListPop(desfield->data,(void **)&word))
                {
		    ajStrFmtLower(&word);
		    ajStrAssignS(&priobj->id,entry->id);
                    ajStrAssignS(&priobj->keyword,word);
                    priobj->treeblock = 0;
                    if(ajBtreeInsertKeyword(desfield->cache, priobj))
                        ++destot;
		    ajStrDel(&word);
                }
	    }

	    if(orgfield)
	    {
                while(ajListPop(orgfield->data,(void **)&word))
                {
		    ajStrFmtLower(&word);
		    ajStrAssignS(&priobj->id,entry->id);
                    ajStrAssignS(&priobj->keyword,word);
                    priobj->treeblock = 0;
                    ajBtreeInsertKeyword(orgfield->cache, priobj);
                    ++orgtot;
		    ajStrDel(&word);
                }
	    }
	}
	
	ajFileClose(&inf);
	nentries += ientries;
	nowtime = ajTimeNewToday();
        nowclock = ajClockNow();
	ajFmtPrintF(outf, " entries: %Lu (%Lu) time: %.1f/%.1fs (%.1f/%.1fs)\n",
		    nentries, ientries,
		    ajClockDiff(startclock,nowclock),
                    ajTimeDiff(starttime, nowtime),
		    ajClockDiff(beginclock,nowclock),
                    ajTimeDiff(begintime, nowtime));
        if(statistics)
        {
            if(entry->do_id)
                ajBtreeCacheStatsOut(outf, entry->idcache,
                                     &idcache, &idread, &idwrite, &idsize);
            if(accfield)
                ajBtreeCacheStatsOut(outf, accfield->cache,
                                     &accache, &acread, &acwrite, &acsize);
            if(svfield)
                ajBtreeCacheStatsOut(outf, svfield->cache,
                                     &svcache, &svread, &svwrite, &svsize);
            if(keyfield)
                ajBtreeCacheStatsOut(outf, keyfield->cache,
                                     &kwcache, &kwread, &kwwrite, &kwsize);
            if(desfield)
                ajBtreeCacheStatsOut(outf, desfield->cache,
                                     &decache, &deread, &dewrite, &desize);
            if(orgfield)
                ajBtreeCacheStatsOut(outf, orgfield->cache,
                                     &txcache, &txread, &txwrite, &txsize);
        }

	ajTimeDel(&begintime);
	ajTimeDel(&nowtime);
    }
    


    embBtreeDumpParameters(entry);
    embBtreeCloseCaches(entry);
    
    nowtime = ajTimeNewToday();
    tdiff = ajTimeDiff(starttime, nowtime);
    days = (ajint) (tdiff/(24.0*3600.0));
    tdiff -= (24.0*3600.0)*(double)days;
    hours = (ajint) (tdiff/3600.0);
    tdiff -= 3600.0*(double)hours;
    mins = (ajint) (tdiff/60.0);
    tdiff -= 60.0 * (double) mins;
    if(days)
        ajFmtPrintF(outf, "Total time: %d %02d:%02d:%04.1f\n",
                    days, hours, mins, tdiff);
    else if (hours)
        ajFmtPrintF(outf, "Total time: %d:%02d:%04.1f\n",
                    hours, mins, tdiff);
    else 
        ajFmtPrintF(outf, "Total time: %d:%04.1f\n",
                    mins, tdiff);
    ajTimeDel(&nowtime);
    ajTimeDel(&starttime);

    if(maxidlen)
    {
        ajFmtPrintF(outf,
                    "Resource idlen truncated %u IDs. "
                    "Maximum ID length was %u.",
                    idtrunc, maxidlen);
        ajWarn("Resource idlen truncated %u IDs. Maximum ID length was %u.",
               idtrunc, maxidlen);
    }

    ajFileClose(&outf);
    embBtreeEntryDel(&entry);
    ajStrDel(&tmpstr);
    ajStrDel(&filename);
    ajStrDel(&exclude);
    ajStrDel(&dbname);
    ajStrDel(&dbrs);
    ajStrDel(&release);
    ajStrDel(&datestr);
    ajStrDel(&directory);
    ajStrDel(&indexdir);
    ajStrDel(&dbtype);    

    nfields = 0;
    while(fieldarray[nfields])
	ajStrDel(&fieldarray[nfields++]);
    AJFREE(fieldarray);


    ajBtreeIdDel(&idobj);
    ajBtreePriDel(&priobj);
    ajBtreeHybDel(&hyb);

    ajRegFree(&dbxflat_wrdexp);
    embExit();

    return 0;
}
Ejemplo n.º 2
0
int main(int argc, char **argv)
{
    EmbPBtreeEntry entry = NULL;
    
    AjPStr dbname   = NULL;
    AjPStr dbrs     = NULL;
    AjPStr release  = NULL;
    AjPStr datestr  = NULL;

    AjPStr directory;
    AjPStr indexdir;
    AjPStr filename;
    AjPStr exclude;
    AjPStr dbtype = NULL;
    AjPFile outf = NULL;

    AjPStr *fieldarray = NULL;
    
    ajint nfields;
    ajint nfiles;

    AjPStr tmpstr = NULL;
    AjPStr thysfile = NULL;
    
    ajint i;
    AjPFile inf = NULL;

    AjPStr word = NULL;
    
    AjPBtId  idobj  = NULL;
    AjPBtPri priobj = NULL;
    AjPBtHybrid hyb = NULL;
    
    ajulong nentries = 0L;
    ajulong ientries = 0L;
    AjPTime starttime = NULL;
    AjPTime begintime = NULL;
    AjPTime nowtime = NULL;
    ajlong startclock = 0;
    ajlong beginclock = 0;
    ajlong nowclock = 0;
    
    embInit("dbxflat", argc, argv);

    dbtype     = ajAcdGetListSingle("idformat");
    fieldarray = ajAcdGetList("fields");
    directory  = ajAcdGetDirectoryName("directory");
    outf       = ajAcdGetOutfile("outfile");
    indexdir   = ajAcdGetOutdirName("indexoutdir");
    filename   = ajAcdGetString("filenames");
    exclude    = ajAcdGetString("exclude");
    dbname     = ajAcdGetString("dbname");
    dbrs       = ajAcdGetString("dbresource");
    release    = ajAcdGetString("release");
    datestr    = ajAcdGetString("date");

    entry = embBtreeEntryNew();
    tmpstr = ajStrNew();
    
    idobj   = ajBtreeIdNew();
    priobj  = ajBtreePriNew();
    hyb     = ajBtreeHybNew();
    

    nfields = embBtreeSetFields(entry,fieldarray);
    embBtreeSetDbInfo(entry,dbname,dbrs,datestr,release,dbtype,directory,
		      indexdir);

    embBtreeGetRsInfo(entry);

    nfiles = embBtreeGetFiles(entry,directory,filename,exclude);
    embBtreeWriteEntryFile(entry);

    embBtreeOpenCaches(entry);

    starttime = ajTimeNewToday();

    ajFmtPrintF(outf, "Processing directory: %S\n", directory);

    for(i=0;i<nfiles;++i)
    {
        begintime = ajTimeNewToday();
        beginclock = ajClockNow();

	ajListPop(entry->files,(void **)&thysfile);
	ajListPushAppend(entry->files,(void *)thysfile);
	ajFmtPrintS(&tmpstr,"%S%S",entry->directory,thysfile);
	if(!(inf=ajFileNewInNameS(tmpstr)))
	    ajFatal("Cannot open input file %S\n",tmpstr);
	ajFilenameTrimPath(&tmpstr);
	ajFmtPrintF(outf,"Processing file: %S",tmpstr);

	ientries = 0L;

	while(dbxflat_NextEntry(entry,inf))
	{
	    ++ientries;
	    if(entry->do_id)
	    {
                if(ajStrGetLen(entry->id) > entry->idlen)
                {
                    if(ajStrGetLen(entry->id) > maxidlen)
                    {
                        ajWarn("id '%S' too long, truncating to idlen %d",
                               entry->id, entry->idlen);
                        maxidlen = ajStrGetLen(entry->id);
                    }
                    idtrunc++;
                    ajStrKeepRange(&entry->id,0,entry->idlen-1);
                }
    
		ajStrFmtLower(&entry->id);
		ajStrAssignS(&hyb->key1,entry->id);
		hyb->dbno = i;
		hyb->offset = entry->fpos;
		hyb->dups = 0;
		ajBtreeHybInsertId(entry->idcache,hyb);
	    }

	    if(entry->do_accession)
	    {
                while(ajListPop(entry->ac,(void **)&word))
                {
		    ajStrFmtLower(&word);
                    ajStrAssignS(&hyb->key1,word);
                    hyb->dbno = i;
		    hyb->offset = entry->fpos;
		    hyb->dups = 0;
		    ajBtreeHybInsertId(entry->accache,hyb);
		    ajStrDel(&word);
                }
	    }

	    if(entry->do_sv)
	    {
                while(ajListPop(entry->sv,(void **)&word))
                {
		    ajStrFmtLower(&word);
                    ajStrAssignS(&hyb->key1,word);
                    hyb->dbno = i;
		    hyb->offset = entry->fpos;
		    hyb->dups = 0;
		    ajBtreeHybInsertId(entry->svcache,hyb);
		    ajStrDel(&word);
                }
	    }

	    if(entry->do_keyword)
	    {
                while(ajListPop(entry->kw,(void **)&word))
                {
		    ajStrFmtLower(&word);
		    ajStrAssignS(&priobj->id,entry->id);
                    ajStrAssignS(&priobj->keyword,word);
                    priobj->treeblock = 0;
                    ajBtreeInsertKeyword(entry->kwcache, priobj);
		    ajStrDel(&word);
                }
	    }

	    if(entry->do_description)
	    {
                while(ajListPop(entry->de,(void **)&word))
                {
		    ajStrFmtLower(&word);
		    ajStrAssignS(&priobj->id,entry->id);
                    ajStrAssignS(&priobj->keyword,word);
                    priobj->treeblock = 0;
                    ajBtreeInsertKeyword(entry->decache, priobj);
		    ajStrDel(&word);
                }
	    }

	    if(entry->do_taxonomy)
	    {
                while(ajListPop(entry->tx,(void **)&word))
                {
		    ajStrFmtLower(&word);
		    ajStrAssignS(&priobj->id,entry->id);
                    ajStrAssignS(&priobj->keyword,word);
                    priobj->treeblock = 0;
                    ajBtreeInsertKeyword(entry->txcache, priobj);
		    ajStrDel(&word);
                }
	    }
	}
	
	ajFileClose(&inf);
	nentries += ientries;
	nowtime = ajTimeNewToday();
        nowclock = ajClockNow();
	ajFmtPrintF(outf, " entries: %Lu (%Lu) time: %.1f/%.1fs (%.1f/%.1fs)\n",
		    nentries, ientries,
		    ajClockDiff(startclock,nowclock),
                    ajTimeDiff(starttime, nowtime),
		    ajClockDiff(beginclock,nowclock),
                    ajTimeDiff(begintime, nowtime));
	ajTimeDel(&begintime);
	ajTimeDel(&nowtime);
    }
    


    embBtreeDumpParameters(entry);
    embBtreeCloseCaches(entry);
    
    nowtime = ajTimeNewToday();
    ajFmtPrintF(outf, "Total time: %.1fs\n", ajTimeDiff(starttime, nowtime));
    ajTimeDel(&nowtime);
    ajTimeDel(&starttime);

    if(maxidlen)
    {
        ajFmtPrintF(outf,
                    "Resource idlen truncated %u IDs. "
                    "Maximum ID length was %u.",
                    idtrunc, maxidlen);
        ajWarn("Resource idlen truncated %u IDs. Maximum ID length was %u.",
               idtrunc, maxidlen);
    }
    
    ajFileClose(&outf);
    embBtreeEntryDel(&entry);
    ajStrDel(&tmpstr);
    ajStrDel(&filename);
    ajStrDel(&exclude);
    ajStrDel(&dbname);
    ajStrDel(&dbrs);
    ajStrDel(&release);
    ajStrDel(&datestr);
    ajStrDel(&directory);
    ajStrDel(&indexdir);
    ajStrDel(&dbtype);    

    nfields = 0;
    while(fieldarray[nfields])
	ajStrDel(&fieldarray[nfields++]);
    AJFREE(fieldarray);


    ajBtreeIdDel(&idobj);
    ajBtreePriDel(&priobj);
    ajBtreeHybDel(&hyb);

    ajRegFree(&dbxflat_wrdexp);
    embExit();

    return 0;
}