int64_t TermInfosReader::getPosition(TermPtr term) { if (_size == 0) return -1; ensureIndexIsRead(); int32_t indexOffset = getIndexOffset(term); SegmentTermEnumPtr enumerator(getThreadResources()->termEnum); seekEnum(enumerator, indexOffset); while (term->compareTo(enumerator->term()) > 0 && enumerator->next()) { } return term->compareTo(enumerator->term()) == 0 ? enumerator->position : -1; }
GammaPDIndexDecoder(std::vector<std::string> const & rVfn) : Vfn(rVfn), valuesPerFile(0), blocksPerFile(0), indexEntriesPerFile(0) { uint64_t o = 0; for ( uint64_t i = 0; i < Vfn.size(); ++i ) { libmaus2::aio::InputStreamInstance ISI(Vfn[i]); uint64_t const vpf = getNumValues(ISI); if ( vpf ) { valuesPerFile.push_back(vpf); blocksPerFile.push_back(getNumBlocks(ISI)); indexEntriesPerFile.push_back(blocksPerFile.back()+1); indexOffset.push_back(getIndexOffset(ISI)); Vfn[o++] = Vfn[i]; } } // for prefix sum valuesPerFile.push_back(0); Vfn.resize(o); libmaus2::util::PrefixSums::prefixSums(valuesPerFile.begin(),valuesPerFile.end()); }
TermInfoPtr TermInfosReader::get(TermPtr term, bool useCache) { if (_size == 0) return TermInfoPtr(); ensureIndexIsRead(); TermInfoPtr ti; TermInfosReaderThreadResourcesPtr resources(getThreadResources()); TermInfoCachePtr cache; if (useCache) { cache = resources->termInfoCache; // check the cache first if the term was recently looked up ti = cache->get(term); if (ti) return ti; } // optimize sequential access: first try scanning cached enum without seeking SegmentTermEnumPtr enumerator = resources->termEnum; if (enumerator->term() && // term is at or past current ((enumerator->prev() && term->compareTo(enumerator->prev()) > 0) || term->compareTo(enumerator->term()) >= 0)) { int32_t enumOffset = (int32_t)(enumerator->position / totalIndexInterval ) + 1; if (indexTerms.size() == enumOffset || // but before end of block term->compareTo(indexTerms[enumOffset]) < 0) { // no need to seek int32_t numScans = enumerator->scanTo(term); if (enumerator->term() && term->compareTo(enumerator->term()) == 0) { ti = enumerator->termInfo(); if (cache && numScans > 1) { // we only want to put this TermInfo into the cache if scanEnum skipped more // than one dictionary entry. This prevents RangeQueries or WildcardQueries to // wipe out the cache when they iterate over a large numbers of terms in order. cache->put(term, ti); } } else ti.reset(); return ti; } } // random-access: must seek seekEnum(enumerator, getIndexOffset(term)); enumerator->scanTo(term); if (enumerator->term() && term->compareTo(enumerator->term()) == 0) { ti = enumerator->termInfo(); if (cache) cache->put(term, ti); } else ti.reset(); return ti; }
// // here are the private guts // rampInfo* cRamp::do_ramp( ramp_fileoffset_t arg , eWhatToRead what ) { switch( what ) { case RAMP_RUNINFO: case RAMP_HEADER: case RAMP_PEAKS: case RAMP_INSTRUMENT: break; // OK default: std::cerr << "unknown read type!\n"; return NULL; break; } rampInfo* returnPtr=NULL; if ((RAMP_RUNINFO != what) && (RAMP_INSTRUMENT != what) && !m_scanOffsets) { int iLastScan = 0; // we need the index to get anything besides the header ramp_fileoffset_t indexOffset = getIndexOffset(m_handle); m_scanOffsets = readIndex(m_handle, indexOffset, &iLastScan); if (iLastScan >= m_runInfo->m_data.scanCount) { if (!m_declaredScansOnly) { m_runInfo->m_data.scanCount = iLastScan; } else { // get rid of all the fake entries created for (int n=1;n<=iLastScan;n++) { // ramp is 1 based if (m_scanOffsets[n]==-1) { // find a run of fakes int m; for (m=n+1;(m<=iLastScan)&&(m_scanOffsets[m]==-1);m++); if (m<=iLastScan) { memmove(m_scanOffsets+n,m_scanOffsets+m, sizeof(ramp_fileoffset_t)*((iLastScan-m)+1)); } iLastScan-=(m-n); } } } } // HENRY - store last scan explicitly. m_lastScan = iLastScan; // END HENRY } // HENRY -- arg is out of bounds. instead of creating havoc in RAMP, let's just kill it here. if (RAMP_RUNINFO != what && (RAMP_INSTRUMENT != what) && (arg > m_runInfo->m_data.scanCount || arg < 1)) { return (NULL); } if (m_scanOffsets || (RAMP_RUNINFO == what) || (RAMP_INSTRUMENT == what)) { ramp_fileoffset_t scanOffset=-1; if (RAMP_RUNINFO == what || RAMP_INSTRUMENT == what) { scanOffset = 0; // read from head of file } else { scanOffset = m_scanOffsets[arg]; // ramp is one-based } if (scanOffset >= 0) { // ----------------------------------------------------------------------- // And now we can parse the info we were looking for // ----------------------------------------------------------------------- // Ok now we have to copy everything in our structure switch( what ) { case RAMP_RUNINFO: returnPtr = new rampRunInfo( m_handle ); break; case RAMP_HEADER: returnPtr = new rampScanInfo( m_handle, scanOffset, (int)arg ); if (returnPtr) { #ifdef HAVE_PWIZ_MZML_LIB if (!m_handle->mzML) // rampadapter already set this for us #endif ((rampScanInfo *)returnPtr)->m_data.filePosition = scanOffset; // for future reference // HENRY -- error checking here if (((rampScanInfo*)returnPtr)->m_data.acquisitionNum < 0) { // something failed in RAMP, possibly because it's a missing scan delete ((rampScanInfo*)returnPtr); returnPtr = NULL; } } break; case RAMP_PEAKS: returnPtr = new rampPeakList( m_handle, scanOffset); // HENRY -- error checking here if (returnPtr && ((rampPeakList*)returnPtr)->getPeakCount() <= 0) { // something failed in RAMP, possibly because it's a missing scan delete ((rampPeakList*)returnPtr); returnPtr = NULL; } break; // HENRY -- add the instrument info reading functionality (present in RAMP, but not provided in cRAMP before) case RAMP_INSTRUMENT: returnPtr = new rampInstrumentInfo(m_handle); if (((rampInstrumentInfo*)returnPtr)->m_instrumentStructPtr == NULL) { delete ((rampInstrumentInfo*)returnPtr); returnPtr = NULL; } break; } } } return returnPtr; }