Ejemplo n.º 1
0
 int64_t TermInfosReader::getPosition(TermPtr term)
 {
     if (_size == 0)
         return -1;
     
     ensureIndexIsRead();
     int32_t indexOffset = getIndexOffset(term);
     
     SegmentTermEnumPtr enumerator(getThreadResources()->termEnum);
     seekEnum(enumerator, indexOffset);
     
     while (term->compareTo(enumerator->term()) > 0 && enumerator->next())
     {
     }
     
     return term->compareTo(enumerator->term()) == 0 ? enumerator->position : -1;
 }
Ejemplo n.º 2
0
			GammaPDIndexDecoder(std::vector<std::string> const & rVfn)
			: Vfn(rVfn), valuesPerFile(0), blocksPerFile(0), indexEntriesPerFile(0)
			{
				uint64_t o = 0;
				for ( uint64_t i = 0; i < Vfn.size(); ++i )
				{
					libmaus2::aio::InputStreamInstance ISI(Vfn[i]);
					uint64_t const vpf = getNumValues(ISI);

					if ( vpf )
					{
						valuesPerFile.push_back(vpf);
						blocksPerFile.push_back(getNumBlocks(ISI));
						indexEntriesPerFile.push_back(blocksPerFile.back()+1);
						indexOffset.push_back(getIndexOffset(ISI));
						Vfn[o++] = Vfn[i];
					}
				}
				// for prefix sum
				valuesPerFile.push_back(0);
				Vfn.resize(o);

				libmaus2::util::PrefixSums::prefixSums(valuesPerFile.begin(),valuesPerFile.end());
			}
Ejemplo n.º 3
0
 TermInfoPtr TermInfosReader::get(TermPtr term, bool useCache)
 {
     if (_size == 0)
         return TermInfoPtr();
     
     ensureIndexIsRead();
     
     TermInfoPtr ti;
     TermInfosReaderThreadResourcesPtr resources(getThreadResources());
     TermInfoCachePtr cache;
     
     if (useCache)
     {
         cache = resources->termInfoCache;
         // check the cache first if the term was recently looked up
         ti = cache->get(term);
         if (ti)
             return ti;
     }
     
     // optimize sequential access: first try scanning cached enum without seeking
     SegmentTermEnumPtr enumerator = resources->termEnum;
     
     if (enumerator->term() && // term is at or past current
         ((enumerator->prev() && term->compareTo(enumerator->prev()) > 0) ||
         term->compareTo(enumerator->term()) >= 0))
     {
         int32_t enumOffset = (int32_t)(enumerator->position / totalIndexInterval ) + 1;
         if (indexTerms.size() == enumOffset || // but before end of block
             term->compareTo(indexTerms[enumOffset]) < 0)
         {
             // no need to seek
             int32_t numScans = enumerator->scanTo(term);
             if (enumerator->term() && term->compareTo(enumerator->term()) == 0)
             {
                 ti = enumerator->termInfo();
                 if (cache && numScans > 1)
                 {
                     // we only want to put this TermInfo into the cache if scanEnum skipped more 
                     // than one dictionary entry. This prevents RangeQueries or WildcardQueries to 
                     // wipe out the cache when they iterate over a large numbers of terms in order.
                     cache->put(term, ti);
                 }
             }
             else
                 ti.reset();
             return ti;
         }
     }
     
     // random-access: must seek
     seekEnum(enumerator, getIndexOffset(term));
     enumerator->scanTo(term);
     if (enumerator->term() && term->compareTo(enumerator->term()) == 0)
     {
         ti = enumerator->termInfo();
         if (cache)
             cache->put(term, ti);
     }
     else
         ti.reset();
     return ti;
 }
//
// here are the private guts
//
rampInfo* cRamp::do_ramp( ramp_fileoffset_t arg , eWhatToRead	what )
{
   
   switch( what ) {
   case RAMP_RUNINFO:
   case RAMP_HEADER:
   case RAMP_PEAKS:
   case RAMP_INSTRUMENT:
      break; // OK
   default:
	  std::cerr << "unknown read type!\n";
      return NULL;
      break;
   }	
   
   rampInfo* returnPtr=NULL;
   
   if ((RAMP_RUNINFO != what) && (RAMP_INSTRUMENT != what) && !m_scanOffsets) {
      int iLastScan = 0; 
     // we need the index to get anything besides the header
      ramp_fileoffset_t indexOffset = getIndexOffset(m_handle);
      m_scanOffsets = readIndex(m_handle, indexOffset, &iLastScan);
      if (iLastScan >= m_runInfo->m_data.scanCount) {
		 if (!m_declaredScansOnly) {
           m_runInfo->m_data.scanCount = iLastScan;
		 } else { // get rid of all the fake entries created
			 for (int n=1;n<=iLastScan;n++) { // ramp is 1 based
				 if (m_scanOffsets[n]==-1) {
					// find a run of fakes
				    int m;
					for (m=n+1;(m<=iLastScan)&&(m_scanOffsets[m]==-1);m++);
					if (m<=iLastScan) {
						memmove(m_scanOffsets+n,m_scanOffsets+m,
						  sizeof(ramp_fileoffset_t)*((iLastScan-m)+1));
					}
					iLastScan-=(m-n);
				 }
			 }
		 }
      }
      // HENRY - store last scan explicitly.
      m_lastScan = iLastScan;
      // END HENRY
   }

   
   // HENRY -- arg is out of bounds. instead of creating havoc in RAMP, let's just kill it here.
   if (RAMP_RUNINFO != what && (RAMP_INSTRUMENT != what) && (arg > m_runInfo->m_data.scanCount || arg < 1)) {
     return (NULL);
   }
     
   if (m_scanOffsets || (RAMP_RUNINFO == what) || (RAMP_INSTRUMENT == what)) {
      ramp_fileoffset_t scanOffset=-1;
      if (RAMP_RUNINFO == what || RAMP_INSTRUMENT == what) {
         scanOffset = 0; // read from head of file
      } else {
         scanOffset = m_scanOffsets[arg]; // ramp is one-based
      }
      
      if (scanOffset >= 0) {
         
         // -----------------------------------------------------------------------
         // And now we can parse the info we were looking for
         // -----------------------------------------------------------------------
         
         
         // Ok now we have to copy everything in our structure
         switch( what )
         {
         case RAMP_RUNINFO:
            returnPtr = new rampRunInfo( m_handle );
            break;
         case RAMP_HEADER:
            returnPtr = new rampScanInfo( m_handle, scanOffset, (int)arg );
            if (returnPtr) {
#ifdef HAVE_PWIZ_MZML_LIB
			   if (!m_handle->mzML) // rampadapter already set this for us
#endif
              ((rampScanInfo *)returnPtr)->m_data.filePosition = scanOffset; // for future reference
            
              // HENRY -- error checking here
              if (((rampScanInfo*)returnPtr)->m_data.acquisitionNum < 0) {
                // something failed in RAMP, possibly because it's a missing scan
                delete ((rampScanInfo*)returnPtr);
                returnPtr = NULL;
              }
            }
            break;           
         case RAMP_PEAKS:
            returnPtr = new rampPeakList( m_handle, scanOffset);
            
            // HENRY -- error checking here
            if (returnPtr && ((rampPeakList*)returnPtr)->getPeakCount() <= 0) {
              // something failed in RAMP, possibly because it's a missing scan
              delete ((rampPeakList*)returnPtr);
              returnPtr = NULL;
            }
            break;
            
         // HENRY -- add the instrument info reading functionality (present in RAMP, but not provided in cRAMP before)
         case RAMP_INSTRUMENT:
            returnPtr = new rampInstrumentInfo(m_handle);
            if (((rampInstrumentInfo*)returnPtr)->m_instrumentStructPtr == NULL) {
              delete ((rampInstrumentInfo*)returnPtr);
              returnPtr = NULL;
            }
            break;
         }
         
      }
   }
   
   
   
   return returnPtr;
}