Ejemplos de BigFile::getFilename en C++ (Cpp)

Lenguaje de programación: C++ (Cpp)

Clase / Tipo: BigFile

Método / Función: getFilename

Ejemplos en hotexamples.com: 5

C++ (Cpp) BigFile::getFilename - 5 ejemplos encontrados. Estos son los ejemplos en C++ (Cpp) del mundo real mejor valorados de BigFile::getFilename extraídos de proyectos de código abierto. Puedes valorar ejemplos para ayudarnos a mejorar la calidad de los ejemplos.

Métodos usados con frecuencia

Mostrar Ocultar

getFilename(5)

set(3)

read(2)

open(2)

getVfd(2)

AddFile(1)

doesExist(1)

getPartNum(1)

getFileSize(1)

getDir(1)

doesPartExist(1)

close(1)

CopyArea(1)

chopHead(1)

UpdateDir(1)

SetFilename(1)

SetEncryption(1)

Locate(1)

Load(1)

IsBad(1)

GetNumEntries(1)

GetEntry(1)

CrcBuffer(1)

unlink(1)

Ejemplo n.º 1

Mostrar archivo

Archivo: Indexdb.cpp Proyecto: BKJackson/open-source-search-engine

void Indexdb::deepVerify ( char *coll ) {
	log ( LOG_INFO, "db: Deep Verifying Indexdb for coll %s...", coll );
	g_threads.disableThreads();

	Msg5 msg5;
	Msg5 msg5b;
	RdbList list;
	key_t startKey;
	key_t endKey;
	startKey.setMin();
	endKey.setMax();
	//long minRecSizes = 64000;
	
	collnum_t collnum = g_collectiondb.getCollnum(coll);
	RdbBase *rdbBase = g_indexdb.m_rdb.getBase(collnum);
	long numFiles = rdbBase->getNumFiles();
	long currentFile = 0;
	
deepLoop:
	// done after scanning all files
	if ( currentFile >= numFiles ) {
		g_threads.enableThreads();
		log ( LOG_INFO, "db: Finished deep verify for %li files.",
				numFiles );
		return;
	}
	// scan this file
	if ( ! msg5.getList ( RDB_INDEXDB   ,
			      coll          ,
			      &list         ,
			      startKey      ,
			      endKey        ,
			      64000         , // minRecSizes   ,
			      true          , // includeTree   ,
			      false         , // add to cache?
			      0             , // max cache age
			      currentFile   , // startFileNum  ,
			      1             , // numFiles      ,
			      NULL          , // state
			      NULL          , // callback
			      0             , // niceness
			      false         , // err correction?
			      NULL          ,
			      0             ,
			      -1            ,
			      true          ,
			      -1LL          ,
			      &msg5b        ,
			      false         )) {
		g_threads.enableThreads();
		log("db: HEY! it did not block");
		return;
	}

	long count = 0;
	long got   = 0;
	for ( list.resetListPtr() ; ! list.isExhausted() ;
	      list.skipCurrentRecord() ) {
		key_t k = list.getCurrentKey();
		count++;
		//unsigned long groupId = k.n1 & g_hostdb.m_groupMask;
		unsigned long groupId = getGroupId ( RDB_INDEXDB , &k );
		if ( groupId == g_hostdb.m_groupId ) got++;
	}
	if ( got != count ) {
		BigFile *f = rdbBase->getFile(currentFile);
		log ("db: File %s: Out of first %li records in indexdb, "
		     "only %li belong to our group.",
		     f->getFilename(),count,got );
	}
	//else
	//	log ( LOG_INFO, "db: File %li: Indexdb passed verification "
	//	      "successfully for %li recs.",currentFile,count );
	// next file
	currentFile++;
	goto deepLoop;
}

Ejemplo n.º 2

Mostrar archivo

Archivo: Msg3.cpp Proyecto: lemire/open-source-search-engine

// . but now that we may get a list remotely to fix data corruption,
//   this may indeed block
bool Msg3::doneScanning ( ) {
	QUICKPOLL(m_niceness);
	// . did we have any error on any scan?
	// . if so, repeat ALL of the scans
	g_errno = m_errno;
	// 2 retry is the default
	int32_t max = 2;
	// see if explicitly provided by the caller
	if ( m_maxRetries >= 0 ) max = m_maxRetries;
	// now use -1 (no max) as the default no matter what
	max = -1;
	// ENOMEM is particulary contagious, so watch out with it...
	if ( g_errno == ENOMEM && m_maxRetries == -1 ) max = 0;
	// msg0 sets maxRetries to 2, don't let max stay set to -1
	if ( g_errno == ENOMEM && m_maxRetries != -1 ) max = m_maxRetries;
	// when thread cannot alloc enough read buf it keeps the read buf
	// set to NULL and BigFile.cpp sets g_errno to EBUFTOOSMALL
	if ( g_errno == EBUFTOOSMALL && m_maxRetries == -1 ) max = 0;
	// msg0 sets maxRetries to 2, don't let max stay set to -1
	if ( g_errno == EBUFTOOSMALL && m_maxRetries != -1 ) max = m_maxRetries;
	// . if no thread slots available, that hogs up serious memory.
	//   the size of Msg3 is 82k, so having just 5000 of them is 430MB.
	// . i just made Msg3 alloc mem when it needs more than about 2k
	//   so this problem is greatly reduced, therefore let's keep 
	//   retrying... forever if no thread slots in thread queue since
	//   we become the thread queue in a way.
	if ( g_errno == ENOTHREADSLOTS ) max = -1;
	// this is set above if the map has the same consecutive key repeated
	// and the read is enormous
	if ( g_errno == ECORRUPTDATA ) max = 0;
	// usually bad disk failures, don't retry those forever
	//if ( g_errno == EIO ) max = 3;
        // no, now our hitachis return these even when they're good so
	// we have to keep retrying forever
	if ( g_errno == EIO ) max = -1;
	// count these so we do not take drives offline just because
	// kernel ring buffer complains...
	if ( g_errno == EIO ) g_numIOErrors++;
	// bail early on high priority reads for these errors
	if ( g_errno == EDISKSTUCK && m_niceness == 0 ) max = 0;
	if ( g_errno == EIO        && m_niceness == 0 ) max = 0;

	// how does this happen? we should never bail out on a low priority
	// disk read... we just wait for it to complete...
	if ( g_errno == EDISKSTUCK && m_niceness != 0 ) { char *xx=NULL;*xx=0;}

	// on I/O, give up at call it corrupt after a while. some hitachis
	// have I/O errros on little spots, like gk88, maybe we can fix him
	if ( g_errno == EIO && m_retryNum >= 5 ) {
		m_errno = ECORRUPTDATA;
		m_hadCorruption = true;
		// do not do any retries any more
		max = 0;
	}

	// convert m_errno to ECORRUPTDATA if it is EBUFTOOSMALL and the
	// max of the bytesToRead are over 500MB.
	// if bytesToRead was ludicrous, then assume that the data file
	// was corrupted, the map was regenerated and it patched
	// over the corrupted bits which were 500MB or more in size.
	// we cannot practically allocate that much, so let's just
	// give back an empty buffer. treat it like corruption...
	// the way it patches is to store the same key over all the corrupted
	// pages, which can get pretty big. so if you read a range with that
	// key you will be hurting!!
	// this may be the same scenario as when the rdbmap has consecutive
	// same keys. see above where we set m_errno to ECORRUPTDATA...
	if ( g_errno == EBUFTOOSMALL ) { 
		int32_t biggest = 0;
		for ( int32_t i = 0 ; i < m_numFileNums ; i++ ) {
			if ( m_scans[i].m_bytesToRead < biggest ) continue;
			biggest = m_scans[i].m_bytesToRead;
		}
		if ( biggest > 500000000 ) {
			log("db: Max read size was %" PRId32" > 500000000. Assuming "
			    "corrupt data in data file.",biggest);
			m_errno = ECORRUPTDATA;
			m_hadCorruption = true;
			// do not do any retries on this, the read was > 500MB
			max = 0;
		}
	}

	// if shutting down gb then limit to 20 so we can shutdown because
	// it can't shutdown until all threads are out of the queue i think
	if ( g_process.m_mode == EXIT_MODE && max < 0 ) {
		//log("msg3: forcing retries to 0 because shutting down");
		max = 0;
	}

	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base = getRdbBase( m_rdbId, m_collnum );
	if ( ! base ) {
		return true;
	}

	// this really slows things down because it blocks the cpu so
	// leave it out for now
#ifdef GBSANITYCHECK
	// check for corruption here, do not do it again in Msg5 if we pass
	if ( ! g_errno ) { // && g_conf.m_doErrorCorrection ) {
		int32_t i;
		for ( i = 0 ; i < m_numFileNums ; i++ )
			if ( ! m_lists[i].checkList_r ( false, false ) ) break;
		if ( i < m_numFileNums ) {
			g_errno = ECORRUPTDATA;
			m_errno = ECORRUPTDATA;
			max     = g_conf.m_corruptRetries; // try 100 times
			log("db: Encountered corrupt list in file %s.",
			    base->getFile(m_fileNums[i])->getFilename());
		}
		else
			m_listsChecked = true;
	}
#endif

	// try to fix this error i've seen
	if ( g_errno == EBADENGINEER && max == -1 )
		max = 100;

	// . if we had a ETRYAGAIN error, then try again now
	// . it usually means the whole file or a part of it was deleted 
	//   before we could finish reading it, so we should re-read all now
	// . RdbMerge deletes BigFiles after it merges them and also chops
	//   off file heads
	// . now that we have threads i'd imagine we'd get EBADFD or something
	// . i've also seen "illegal seek" as well
	if ( m_errno && (m_retryNum < max || max < 0) &&
	     // this will complete in due time, we can't call a sleep wrapper
	     // on it because the read is really still pending...
	     m_errno != EDISKSTUCK ) {
		// print the error
		static time_t s_time  = 0;
		time_t now = getTime();
		if ( now - s_time > 5 || g_errno != ENOTHREADSLOTS ) {
			log("net: Had error reading %s: %s. Retrying. "
			    "(retry #%" PRId32")", 
			    base->m_dbname,mstrerror(m_errno) , m_retryNum );
			s_time = now;
		}
		// send email alert if in an infinite loop, but don't send
		// more than once every 2 hours
		static int32_t s_lastSendTime = 0;
		if ( m_retryNum == 100 && getTime() - s_lastSendTime > 3600*2){
			// remove this for now it is going off all the time
			//g_pingServer.sendEmail(NULL,//g_hostdb.getMyHost(),
			//		       "100 read retries",true);
			s_lastSendTime = getTime();
		}
		// clear g_errno cuz we should for call to readList()
		g_errno = 0;
		// free the list buffer since if we have 1000 Msg3s retrying
		// it will totally use all of our memory
		for ( int32_t i = 0 ; i < m_numChunks ; i++ ) 
			m_lists[i].destructor();
		// count retries
		m_retryNum++;
		// backoff scheme, wait 100ms more each time
		int32_t wait ;
		if ( m_retryNum == 1 ) wait = 10;
		else                   wait = 200 * m_retryNum;
		// . don't wait more than 10 secs between tries
		// . i've seen gf0 and gf16 get mega saturated
		if ( wait > 10000 ) wait = 10000;
		// wait 500 ms
		if ( g_loop.registerSleepCallback ( wait  , // ms
						    this  ,
						    doneSleepingWrapper3,
						    m_niceness))
			return false;
		// otherwise, registration failed
		log(
		    "net: Failed to register sleep callback for retry. "
		    "Abandoning read. This is bad.");
		// return, g_errno should be set
		g_errno = EBUFTOOSMALL;
		m_errno = EBUFTOOSMALL;
		return true;
	}

	// if we got an error and should not retry any more then give up
	if ( g_errno ) {
		log(
		    "net: Had error reading %s: %s. Giving up after %" PRId32" "
		    "retries.",
		    base->m_dbname,mstrerror(g_errno) , m_retryNum );
		return true;
	}

	// note it if the retry finally worked
	if ( m_retryNum > 0 ) 
		log(LOG_INFO,"disk: Read succeeded after retrying %" PRId32" times.",
		    (int32_t)m_retryNum);

	// count total bytes for logging
	int32_t count = 0;
	// . constrain all lists to make merging easier
	// . if we have only one list, then that's nice cuz the constrain
	//   will allow us to send it right away w/ zero copying
	// . if we have only 1 list, it won't be merged into a final list,
	//   that is, we'll just set m_list = &m_lists[i]
	for ( int32_t i = 0 ; i < m_numFileNums ; i++ ) {
		QUICKPOLL(m_niceness);
		// count total bytes for logging
		count += m_lists[i].getListSize();
		// . hint offset is relative to the offset of first key we read
		// . if that key was only 6 bytes RdbScan shift the list buf
		//   down 6 bytes to make the first key 12 bytes... a 
		//   requirement for all RdbLists
		// . don't inc it, though, if it was 0, pointing to the start
		//   of the list because our shift won't affect that
		if ( m_scans[i].m_shifted == 6 && m_hintOffsets[i] > 0 ) 
			m_hintOffsets[i] += 6;
		// posdb double compression
		if ( m_scans[i].m_shifted == 12 && m_hintOffsets[i] > 0 ) 
			m_hintOffsets[i] += 12;
		// . don't constrain on minRecSizes here because it may
		//   make our endKey smaller, which will cause problems
		//   when Msg5 merges these lists.
		// . If all lists have different endKeys RdbList's merge
		//   chooses the min and will merge in recs beyond that
		//   causing a bad list BECAUSE we don't check to make
		//   sure that recs we are adding are below the endKey
		// . if we only read from one file then constrain based 
		//   on minRecSizes so we can send the list back w/o merging
		//   OR if just merging with RdbTree's list
		int32_t mrs ;
		// . constrain to m_minRecSizesOrig, not m_minRecSizes cuz 
		//   that  could be adjusted by compensateForNegativeRecs()
		// . but, really, they should be the same if we only read from
		//   the root file
		if ( m_numFileNums == 1 ) mrs = m_minRecSizesOrig;
		else                      mrs = -1;
		// . this returns false and sets g_errno on error
		// . like if data is corrupt
		BigFile *ff = base->getFile(m_fileNums[i]);
		// if we did a merge really quick and delete one of the 
		// files we were reading, i've seen 'ff' be NULL
		char *filename = "lostfilename";
		if ( ff ) filename = ff->getFilename();

		// compute cache info
		RdbCache *rpc = getDiskPageCache ( m_rdbId );
		if ( ! m_allowPageCache ) rpc = NULL;
		int64_t vfd ;
		if ( ff ) vfd = ff->getVfd();
		key192_t ck ;
		if ( ff )
			ck = makeCacheKey ( vfd ,
					    m_scans[i].m_offset ,
					    m_scans[i].m_bytesToRead );
		if ( m_validateCache && ff && rpc && vfd != -1 ) {
			bool inCache;
			char *rec; int32_t recSize;
			inCache = rpc->getRecord ( (collnum_t)0 , // collnum
						   (char *)&ck , 
						   &rec , 
						   &recSize ,
						   true , // copy?
						   -1 , // maxAge, none 
						   true ); // inccounts?
			if ( inCache && 
			     // 1st byte is RdbScan::m_shifted
			     ( m_lists[i].m_listSize != recSize-1 ||
			       memcmp ( m_lists[i].m_list , rec+1,recSize-1) ||
			       *rec != m_scans[i].m_shifted ) ) {
				log("msg3: cache did not validate");
				char *xx=NULL;*xx=0;
			}
			mfree ( rec , recSize , "vca" );
		}


		///////
		//
		// STORE IN PAGE CACHE
		//
		///////
		// store what we read in the cache. don't bother storing
		// if it was a retry, just in case something strange happened.
		// store pre-constrain call is more efficient.
		if ( m_retryNum<=0 && ff && rpc && vfd != -1 &&
		     ! m_scans[i].m_inPageCache )
			rpc->addRecord ( (collnum_t)0 , // collnum
					 (char *)&ck , 
					 // rec1 is this little thingy
					 &m_scans[i].m_shifted,
					 1,
					 // rec2
					 m_lists[i].getList() ,
					 m_lists[i].getListSize() ,
					 0 ); // timestamp. 0 = now

		QUICKPOLL(m_niceness);

		// if from our 'page' cache, no need to constrain
		if ( ! m_lists[i].constrain ( m_startKey       ,
					      m_constrainKey   , // m_endKey
					      mrs           , // m_minRecSizes
					      m_hintOffsets[i] ,
					      //m_hintKeys   [i] ,
					      &m_hintKeys   [i*m_ks] ,
					      filename,//ff->getFilename() ,
					      m_niceness ) ) {
			log("net: Had error while constraining list read from "
			    "%s: %s/%s. vfd=%" PRId32" parts=%" PRId32". "
			    "This is likely caused by corrupted "
			    "data on disk.", 
			    mstrerror(g_errno), ff->getDir(),
			    ff->getFilename(), ff->m_vfd , 
			    (int32_t)ff->m_numParts );
			continue;
		}
	}

	// print the time
	if ( g_conf.m_logTimingDb ) {
		int64_t now = gettimeofdayInMilliseconds();
		int64_t took = now - m_startTime;
		log(LOG_TIMING,
		    "net: Took %" PRId64" ms to read %" PRId32" lists of %" PRId32" bytes total"
		     " from %s (niceness=%" PRId32").",
		     took,m_numFileNums,count,base->m_dbname,m_niceness);
	}
	return true;
}

Ejemplo n.º 3

Mostrar archivo

Archivo: PageInject.cpp Proyecto: chushuai/open-source-search-engine

// . sets m_fileOffset and m_bf
// . returns false and sets g_errno on error
// . returns false if nothing to read too... but does not set g_errno
bool ImportState::setCurrentTitleFileAndOffset ( ) {

	// leave m_bf and m_fileOffset alone if there is more to read
	if ( m_fileOffset < m_bfFileSize )
		return true;

	CollectionRec *cr = g_collectiondb.getRec ( m_collnum );
	if ( ! cr ) return false;

	log("import: import finding next file");
	
	// if ( m_offIsValid ) {
	// 	//*off = m_fileOffset;
	// 	return &m_bf; 
	// }
	//m_offIsValid = true;

	// look for titledb0001.dat etc. files in the 
	// workingDir/inject/ subdir
	SafeBuf ddd;
	ddd.safePrintf("%sinject",cr->m_importDir.getBufStart());
	// now use the one provided. we should also provide the # of threads
	if ( cr->m_importDir.getBufStart() && 
	     cr->m_importDir.getBufStart()[0] ) {
		ddd.reset();
		ddd.safeStrcpy ( cr->m_importDir.getBufStart() );
	}

	//
	// assume we are the first filename
	// set s_fileId to the minimum
	//
	Dir dir;
	dir.set(ddd.getBufStart());

	if ( ! dir.open() ) return false;

	// assume none
	long minFileId = -1;

	// getNextFilename() writes into this
	char pattern[64]; strcpy ( pattern , "titledb*" );
	char *filename;
	while ( ( filename = dir.getNextFilename ( pattern ) ) ) {
		// filename must be a certain length
		long filenameLen = gbstrlen(filename);
		// we need at least "titledb0001.dat"
		if ( filenameLen < 15 ) continue;
		// ensure filename starts w/ our m_dbname
		if ( strncmp ( filename , "titledb", 7 ) != 0 )
			continue;
		// skip if not .dat file
		if ( ! strstr ( filename , ".dat" ) )
			continue;
		// then a 4 digit number should follow
		char *s = filename + 7;
		if ( ! isdigit(*(s+0)) ) continue;
		if ( ! isdigit(*(s+1)) ) continue;
		if ( ! isdigit(*(s+2)) ) continue;
		if ( ! isdigit(*(s+3)) ) continue;
		// convert digit to id
		long id = atol(s);
		// . do not accept files we've already processed
		// . -1 means we haven't processed any yet
		if ( m_bfFileId >= 0 && id <= m_bfFileId ) continue;
		// the min of those we haven't yet processed/injected
		if ( id < minFileId || minFileId < 0 ) minFileId = id;
	}

	// get where we left off
	if ( ! m_loadedPlaceHolder ) {
		// read where we left off from file if possible
		char fname[256];
		sprintf(fname,"%slasttitledbinjectinfo.dat",g_hostdb.m_dir);
		SafeBuf ff;
		ff.fillFromFile(fname);
		if ( ff.length() > 1 ) {
			m_loadedPlaceHolder = true;
			// get the placeholder
			sscanf ( ff.getBufStart() 
				 , "%llu,%lu"
				 , &m_fileOffset
				 , &minFileId
				 );
		}
	}

	// if no files! return false to indicate we are done
	if ( minFileId == -1 ) return false;

	// set up s_bf then
	//if ( m_bfFileId != minFileId ) {
	SafeBuf tmp;
	tmp.safePrintf("titledb%04li-000.dat"
		       //,dir.getDirname()
		       ,minFileId);
	m_bf.set ( dir.getDirname() ,tmp.getBufStart() );
	if ( ! m_bf.open( O_RDONLY ) ) {
		log("inject: import: could not open %s%s for reading",
		    dir.getDirname(),tmp.getBufStart());
		return false;
	}
	m_bfFileId = minFileId;
	// reset ptr into file
	//*off = 0;
	// and set this
	m_bfFileSize = m_bf.getFileSize();

	m_fileOffset = 0;
	//}

	log("import: importing from file %s",m_bf.getFilename());

	return true;//&m_bf;
}

Ejemplo n.º 4

Mostrar archivo

Archivo: RdbMerge.cpp Proyecto: FlavioFalcao/open-source-search-engine

// . return false if blocked, true otherwise
// . sets g_errno on error
bool RdbMerge::getNextList ( ) {
	// return true if g_errno is set
	if ( g_errno || m_doneMerging ) return true;
	// it's suspended so we count this as blocking
	if ( m_isSuspended ) {
		m_isReadyToSave = true;
		return false;
	}
	// if the power is off, suspend the merging
	if ( ! g_process.m_powerIsOn ) {
		m_isReadyToSave = true;
		doSleep();
		return false;
	}
	// no chop threads
	m_numThreads = 0;
	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) return true;
	// . if a contributor has just surpassed a "part" in his BigFile
	//   then we can delete that part from the BigFile and the map
	for ( long i = m_startFileNum ; i < m_startFileNum + m_numFiles; i++ ){
		RdbMap    *map    = base->m_maps[i];
		long       page   = map->getPage ( m_startKey );
		long long  offset = map->getAbsoluteOffset ( page );
		BigFile   *file   = base->m_files[i];
		long       part   = file->getPartNum ( offset ) ;
		if ( part == 0 ) continue;
		// i've seen this bug happen if we chop a part off on our
		// last dump and the merge never completes for some reason...
		// so if we're in the last part then don't chop the part b4 us
		if ( part >= file->m_maxParts - 1 ) continue;
		// if we already unlinked part # (part-1) then continue
		if ( ! file->doesPartExist ( part - 1 ) ) continue;
		// . otherwise, excise from the map
		// . we must be able to chop the mapped segments corresponding
		//   EXACTLY to the part file
		// . therefore, PAGES_PER_SEGMENT define'd in RdbMap.h must
		//   evenly divide MAX_PART_SIZE in BigFile.h
		// . i do this check in RdbMap.cpp
		if ( ! map->chopHead ( MAX_PART_SIZE ) ) {
			// we had an error!
			log("db: Failed to remove data from map for "
			    "%s.part%li.",
			    file->getFilename(),part);
			return true;
		}
		// . also, unlink any part files BELOW part # "part"
		// . this returns false if it blocked, true otherwise
		// . this sets g_errno on error
		// . now we just unlink part file #(part-1) explicitly
		if ( ! file->chopHead ( part - 1 , chopWrapper , this ) ) 
			m_numThreads++;
		if ( ! g_errno ) continue;
		log("db: Failed to unlink file %s.part%li.",
		    file->getFilename(),part);
		return true;
	}
	// wait for file to be unlinked before getting list
	if ( m_numThreads > 0 ) return false;
	// otherwise, get it now
	return getAnotherList ( );
}

Ejemplo n.º 5

Mostrar archivo

Archivo: PageInject.cpp Proyecto: chushuai/open-source-search-engine

//
// . ENTRY POINT FOR IMPORTING TITLEDB RECS FROM ANOTHER CLUSTER
// . when user clicks 'begin' in import page we come here..
// . so when that parm changes in Parms.cpp we sense that and call
//   beginImport(CollectionRec *cr)
// . or on startup we call resumeImports to check each coll for 
//   an import in progress.
// . search for files named titledb*.dat
// . if none found just return
// . when msg7 inject competes it calls this
// . call this from sleep wrapper in Process.cpp
// . returns false if would block (outstanding injects), true otherwise
// . sets g_errno on error
bool ImportState::importLoop ( ) {

	CollectionRec *cr = g_collectiondb.getRec ( m_collnum );

	if ( ! cr || g_hostdb.m_hostId != 0 ) { 
		// if coll was deleted!
		log("import: collnum %li deleted while importing into",
		    (long)m_collnum);
		//if ( m_numOut > m_numIn ) return true;
		// delete the entire import state i guess
		// what happens if we have a msg7 reply come back in?
		// it should see the collrec is NULL and just fail.
		mdelete ( this, sizeof(ImportState) , "impstate");
		delete (this);
		return true;
	}

 INJECTLOOP:

	// stop if waiting on outstanding injects
	long long out = m_numOut - m_numIn;
	if ( out >= cr->m_numImportInjects ) {
		g_errno = 0;
		return false;
	}
	

	if ( ! cr->m_importEnabled ) {
		// wait for all to return
		if ( out > 0 ) return false;
		// then delete it
		log("import: collnum %li import loop disabled",
		    (long)m_collnum);
		mdelete ( this, sizeof(ImportState) , "impstate");
		delete (this);
		return true;
	}




	// scan each titledb file scanning titledb0001.dat first,
	// titledb0003.dat second etc.

	//long long offset = -1;
	// . when offset is too big for current m_bigFile file then
	//   we go to the next and set offset to 0.
	// . sets m_bf and m_fileOffset
	if ( ! setCurrentTitleFileAndOffset ( ) ) {//cr  , -1 );
		log("import: import: no files to read");
		//goto INJECTLOOP;
		return true;
	}



	// this is -1 if none remain!
	if ( m_fileOffset == -1 ) {
		log("import: import fileoffset is -1. done.");
		return true;
	}

	long long saved = m_fileOffset;

	//Msg7 *msg7;
	//GigablastRequest *gr;
	//SafeBuf *sbuf = NULL;

	long need = 12;
	long dataSize = -1;
	//XmlDoc xd;
	key_t tkey;
	bool status;
	SafeBuf tmp;
	SafeBuf *sbuf = &tmp;
	long long docId;
	long shardNum;
	long key;
	Multicast *mcast;
	char *req;
	long reqSize;

	if ( m_fileOffset >= m_bfFileSize ) {
		log("inject: import: done processing file %li %s",
		    m_bfFileId,m_bf.getFilename());
		goto nextFile;
	}
	
	// read in title rec key and data size
	status = m_bf.read ( &tkey, sizeof(key_t) , m_fileOffset );
	
	//if ( n != 12 ) goto nextFile;
	if ( g_errno ) {
		log("inject: import: reading file error: %s. advancing "
		    "to next file",mstrerror(g_errno));
		goto nextFile;
	}

	m_fileOffset += 12;

	// if negative key, skip
	if ( (tkey.n0 & 0x01) == 0 ) {
		goto INJECTLOOP;
	}

	// if non-negative then read in size
	status = m_bf.read ( &dataSize , 4 , m_fileOffset );
	if ( g_errno ) {
		log("main: failed to read in title rec "
		    "file. %s. Skipping file %s",
		    mstrerror(g_errno),m_bf.getFilename());
		goto nextFile;
	}
	m_fileOffset += 4;
	need += 4;
	need += dataSize;
	need += 4; // collnum, first 4 bytes
	if ( dataSize < 0 || dataSize > 500000000 ) {
		log("main: could not scan in titledb rec of "
		    "corrupt dataSize of %li. BAILING ENTIRE "
		    "SCAN of file %s",dataSize,m_bf.getFilename());
		goto nextFile;
	}

	//gr = &msg7->m_gr;

	//XmlDoc *xd = getAvailXmlDoc();
	//msg7 = getAvailMsg7();
	mcast = getAvailMulticast();

	// if none, must have to wait for some to come back to us
	if ( ! mcast ) {
		// restore file offset
		//m_fileOffset = saved;
		// no, must have been a oom or something
		log("import: import no mcast available");
		return true;//false;
	}
	
	// this is for holding a compressed titlerec
	//sbuf = &mcast->m_sbuf;//&gr->m_sbuf;

	// point to start of buf
	sbuf->reset();

	// ensure we have enough room
	sbuf->reserve ( need );

	// collnum first 4 bytes
	sbuf->pushLong( (long)m_collnum );

	// store title key
	sbuf->safeMemcpy ( &tkey , sizeof(key_t) );

	// then datasize if any. neg rec will have -1 datasize
	if ( dataSize >= 0 ) 
		sbuf->pushLong ( dataSize );

	// then read data rec itself into it, compressed titlerec part
	if ( dataSize > 0 ) {
		// read in the titlerec after the key/datasize
		status = m_bf.read ( sbuf->getBuf() ,
				     dataSize ,
				     m_fileOffset );
		if ( g_errno ) { // n != dataSize ) {
			log("main: failed to read in title rec "
			    "file. %s. Skipping file %s",
			    mstrerror(g_errno),m_bf.getFilename());
			// essentially free up this msg7 now
			//msg7->m_inUse = false;
			//msg7->reset();
			goto nextFile;
		}
		// advance
		m_fileOffset += dataSize;
		// it's good, count it
		sbuf->m_length += dataSize;
	}

	// set xmldoc from the title rec
	//xd->set ( sbuf.getBufStart() );
	//xd->m_masterState = NULL;
	//xd->m_masterCallback ( titledbInjectLoop );

	// we use this so we know where the doc we are injecting
	// was in the foregien titledb file. so we can update our bookmark
	// code.
	mcast->m_hackFileOff = saved;//m_fileOffset;
	mcast->m_hackFileId  = m_bfFileId;

	//
	// inject a title rec buf this time, we are doing an import
	// FROM A TITLEDB FILE!!!
	//
	//gr->m_titleRecBuf = &sbuf;

	// break it down into gw
	// xd.set2 ( sbuf.getBufStart() ,
	// 	  sbuf.length() , // max size
	// 	  cr->m_coll, // use our coll
	// 	  NULL , // pbuf for page parser
	// 	  1 , // niceness
	// 	  NULL ); //sreq );

	// // note it
	// log("import: importing %s",xd.m_firstUrl.getUrl());

	// now we can set gr for the injection
	// TODO: inject the whole "sbuf" so we get sitenuminlinks etc
	// all exactly the same...
	// gr->m_url = xd.getFirstUrl()->getUrl();
	// gr->m_queryToScrape = NULL;
	// gr->m_contentDelim = 0;
	// gr->m_contentTypeStr = g_contentTypeStrings [xd.m_contentType];
	// gr->m_contentFile = NULL;
	// gr->m_content = xd.ptr_utf8Content;
	// gr->m_diffbotReply = NULL;
	// gr->m_injectLinks = false;
	// gr->m_spiderLinks = true;
	// gr->m_shortReply = false;
	// gr->m_newOnly = false;
	// gr->m_deleteUrl = false;
	// gr->m_recycle = true; // recycle content? or sitelinks?
	// gr->m_dedup = false;
	// gr->m_hasMime = false;
	// gr->m_doConsistencyTesting = false;
	// gr->m_getSections = false;
	// gr->m_gotSections = false;
	// gr->m_charset = xd.m_charset;
	// gr->m_hopCount = xd.m_hopCount;


	//
	// point to next doc in the titledb file
	//
	//m_fileOffset += need;

	// get docid from key
	docId = g_titledb.getDocIdFromKey ( &tkey );

	// get shard that holds the titlerec for it
	shardNum = g_hostdb.getShardNumFromDocId ( docId );

	// for selecting which host in the shard receives it
	key = (long)docId;


	m_numOut++;

	// then index it. master callback will be called
	//if ( ! xd->index() ) return false;

	// TODO: make this forward the request to an appropriate host!!
	// . gr->m_sbuf is set to the titlerec so this should handle that
	//   and use XmlDoc::set4() or whatever
	// if ( msg7->injectTitleRec ( msg7 , // state
	// 			    gotMsg7ReplyWrapper , // callback
	// 			    cr )) {
	// 	// it didn't block somehow...
	// 	msg7->m_inUse = false;
	// 	msg7->gotMsg7Reply();
	// }


	req = sbuf->getBufStart();
	reqSize = sbuf->length();

	if ( reqSize != need ) { char *xx=NULL;*xx=0 ; }

	// do not free it, let multicast free it after sending it
	sbuf->detachBuf();


	if ( ! mcast->send ( req ,
			     reqSize ,
			     0x07 ,
			     true , // ownmsg?
			     shardNum,
			     false, // send to whole shard?
			     key , // for selecting host in shard
			     mcast , // state
			     NULL , // state2
			     gotMulticastReplyWrapper ,
			     999999 ) ) { // total timeout in seconds
		log("import: import mcast had error: %s",mstrerror(g_errno));
		m_numIn++;
	}

	goto INJECTLOOP;

 nextFile:
	// invalidate this flag
	//m_offIsValid = false;
	// . and call this function. we add one to m_bfFileId so we
	//   do not re-get the file we just injected.
	// . sets m_bf and m_fileOffset
	// . returns false if nothing to read
	if ( ! setCurrentTitleFileAndOffset ( ) ) { //cr , m_bfFileId+1 );
		log("import: import: no files left to read");
		//goto INJECTLOOP;
		return true;
	}

	// if it returns NULL we are done!
	log("main: titledb injection loop completed. waiting for "
	    "outstanding injects to return.");
		
	if ( m_numOut > m_numIn )
		return false;

	log("main: all injects have returned. DONE.");

	// dummy return
	return true;
}