// scan all Rdb databases and ensure no recs (it was a clean delete)
bool checkRdbLists ( long *rdbId ) {
	CollectionRec *cr = g_collectiondb.getRec("qatest123");
	if ( ! cr ) return true;
	collnum_t cn = cr->m_collnum;
	for ( ; *rdbId < RDB_END ; ) {
		// pre-inc it
		*rdbId = *rdbId + 1;
		char minKey[MAX_KEY_BYTES];
		char maxKey[MAX_KEY_BYTES];
	        KEYMIN(minKey,MAX_KEY_BYTES);
	        KEYMAX(maxKey,MAX_KEY_BYTES);
		if ( ! s_msg0.getList ( 0 , // hostid
					0 , // ip
					0 , // port
					0 , // cacheage
					false, // addtocache
					*rdbId , // rdbid
					cn , // collnum
					&s_list ,
					minKey ,
					maxKey ,
					1000 , // minrecsizes
					rdbId , // state
					gotList33,
					0 // niceness
					) )
			return false;
	}
	return true;
}
// . return false if blocked, true otherwise
// . set g_errno on error
// . list should be truncated, possible have all negative keys removed,
//   and de-duped thanks to RdbList::indexMerge_r() and RdbList::merge_r()
bool RdbMerge::dumpList ( ) {
	// return true on g_errno
	if ( g_errno ) return true;

	// . it's suspended so we count this as blocking
	// . resumeMerge() will call getNextList() again, not dumpList() so
	//   don't advance m_startKey
	if ( m_isSuspended ) {
		m_isReadyToSave = true;
		return false;
	}

	// . set the list to only those records that should be in our group
	// . filter the records that don't belong in this group via groupId
	//filterList ( &m_list );

	// . compute the new m_startKey to get the next list from disk
	// . m_list was formed via RdbList::merge() 
	// . m_list may be empty because of negative/positive collisions
	//   but there may still be data left
	//m_startKey = m_list.getLastKey() ;
	//m_list.getLastKey(m_startKey) ;
	// if we use getLastKey() for this the merge completes but then
	// tries to merge two empty lists and cores in the merge function
	// because of that. i guess it relies on endkey rollover only and
	// not on reading less than minRecSizes to determine when to stop
	// doing the merge.
	m_list.getEndKey(m_startKey) ;
	//m_startKey += (uint32_t)1;
	KEYADD(m_startKey,m_ks);

	/////
	//
	// dedup for spiderdb before we dump it. try to save disk space.
	//
	/////
	if ( m_rdbId == RDB_SPIDERDB )
		// removeNegRecs? = false
		dedupSpiderdbList(&m_list, false);

	// if the startKey rolled over we're done
	//if ( m_startKey.n0 == 0LL && m_startKey.n1 == 0 ) m_doneMerging=true;
	if ( KEYCMP(m_startKey,KEYMIN(),m_ks)==0 ) m_doneMerging = true;
	// debug msg
	log(LOG_DEBUG,"db: Dumping list.");
	// debug msg
	//fprintf(stderr,"list startKey.n1=%" PRIu32",n0=%" PRIu64", endKey.n1=%" PRIu32",n0=%" PRIu64","
	//	" size=%" PRId32"\n",
	//	m_list.getStartKey().n1, 
	//	m_list.getStartKey().n0, 
	//	m_list.getLastKey().n1, 
	//	m_list.getLastKey().n0,  m_list.getListSize() );
	// . send the whole list to the dump
	// . it returns false if blocked, true otherwise
	// . it sets g_errno on error
	// . it calls dumpListWrapper when done dumping
	// . return true if m_dump had an error or it did not block
	// . if it gets a EFILECLOSED error it will keep retrying forever
	return m_dump.dumpList ( &m_list , m_niceness , false/*recall?*/ ) ;
}
// . buffer is used for reading and writing
// . return false if blocked, true otherwise
// . sets g_errno on error
// . if niceness is 0 merge will block, otherwise will not block
// . we now use niceness of 1 which should spawn threads that don't allow
//   niceness 2 threads to launch while they're running
// . spider process now uses mostly niceness 2 
// . we need the merge to take priority over spider processes on disk otherwise
//   there's too much contention from spider lookups on disk for the merge
//   to finish in a decent amount of time and we end up getting too many files!
bool RdbMerge::merge ( char     rdbId        ,
		       //char    *coll         , //RdbBase *base         , 
		       collnum_t collnum,
		       BigFile *target       , 
		       RdbMap  *targetMap    ,
		       long     id2          , // target's secondary id
		       long     startFileNum , 
		       long     numFiles     ,
		       long     niceness     ,
		       class DiskPageCache *pc   ,
		       long long maxTargetFileSize ,
		       char     keySize      ) {
	// reset ourselves
	reset();
	// set it
	m_rdbId = rdbId;
	Rdb *rdb = getRdbFromId ( rdbId );
	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base; if (!(base=getRdbBase(m_rdbId,collnum))) return true;
	// don't breech the max
	//if ( numFiles > m_maxFilesToMerge ) numFiles = m_maxFilesToMerge;
	// reset this map! it's m_crcs needs to be reset
	//targetMap->reset();
	// remember some parms
	//if ( ! coll && rdb->m_isCollectionLess )
	//	strcpy ( m_coll , rdb->m_dbname );
	//else
	//	strcpy ( m_coll , coll );

	m_collnum = collnum;
	if ( rdb->m_isCollectionLess ) m_collnum = 0;

	m_target          = target;
	m_targetMap       = targetMap;
	m_id2             = id2;
	m_startFileNum    = startFileNum;
	m_numFiles        = numFiles;
	m_dedup           = base->m_dedup;
	m_fixedDataSize   = base->m_fixedDataSize;
	m_niceness        = niceness;
	m_pc              = pc;
	m_maxTargetFileSize = maxTargetFileSize;
	m_doneMerging     = false;
	m_ks              = keySize;
	// . set the key range we want to retrieve from the files
	// . just get from the files, not tree (not cache?)
	//m_startKey.setMin();
	//m_endKey.setMax();
	KEYMIN(m_startKey,m_ks);
	KEYMAX(m_endKey,m_ks);
	// if we're resuming a killed merge, set m_startKey to last
	// key the map knows about.
	// the dump will start dumping at the end of the targetMap's data file.
	if ( m_targetMap->getNumRecs() > 0 ) {
		log(LOG_INIT,"db: Resuming a killed merge.");
		//m_startKey = m_targetMap->getLastKey();
		m_targetMap->getLastKey(m_startKey);
		//m_startKey += (unsigned long) 1;
		KEYADD(m_startKey,1,m_ks);
		// if power goes out and we are not doing synchronous writes
		// then we could have completely lost some data and unlinked
		// a part file from the file being merged, so that the data is
		// gone. to be able to resume merging, we must increment the
		// startKey until it references a valid offset in all the 
		// files being merged. invalid offsets will reference parts 
		// that have been chopped.
		/*
		RdbMap  **maps  = rdb->getMaps();
		BigFile **files = rdb->getFiles();
		for ( long i=m_startFileNum;i<m_startFileNum+m_numFiles;i++){
			long long minOff = 0LL;
			long k = 0;
			while ( k < files[i]->m_maxParts &&
				!   files[i]->m_files[k]    ) {
				k++;
				minOff += MAX_PART_SIZE;
			}
			long pn0 = maps[i]->getPage ( m_startKey );
			long pn  = pn0;
			while ( maps[i]->getAbsoluteOffset(pn) < minOff ) pn++;
			if ( pn != pn0 ) {
				log("db: Lost data during merge. Starting "
				    "merge at page number %li from %li for "
				    "file.",pn,pn0);
				m_startKey = maps[i]->getKey ( pn );
			}
		}
		*/
	}
	// free our list's memory, just in case
	//m_list.freeList();
	// . we may have multiple hosts running on the same cpu/hardDrive
	// . therefore, to maximize disk space, we should only have 1 merge
	//   at a time going on between these hosts
	// . now tfndb has own merge class since titledb merge writes url recs
	/*
	if ( s_isMergeLocked ) {
		//log("RdbMerge::merge: someone else merging sleeping.");
		log("RdbMerge::merge: someone else merging. bad engineer.");
		return false;
		// if it fails then sleep until it works
		//returng_loop.registerSleepCallback(5000,this,getLockWrapper);
	}
	*/
	return gotLock();
}
// . returns false if blocked, true otherwise
// . sets g_errno on error
bool RdbMerge::gotLock ( ) {
	// get total recSizes of files we're merging
	//long totalSize = 0;
	//for ( long i=m_startFileNum ; i < m_startFileNum + m_numFiles ; i++ )
	//totalSize += m_base->m_files[i]->getSize();
	// . grow the map now so it doesn't have to keep growing dynamically
	//   which wastes memory
	// . setMapSize() returns false and sets g_errno on error
	// . we return true if it had an error
	//if ( ! m_targetMap->setMapSizeFromFileSize ( totalSize ) ) {
	//log("RdbMerge::getLockFile: targetMap setMapSize failed");
	//return true;
	//}

	// . get last mapped offset
	// . this may actually be smaller than the file's actual size
	//   but the excess is not in the map, so we need to do it again
	long long startOffset = m_targetMap->getFileSize();

	// if startOffset is > 0 use the last key as RdbDump:m_prevLastKey
	// so it can compress the next key it dumps providee m_useHalfKeys
	// is true (key compression) and the next key has the same top 6 bytes
	// as m_prevLastKey
	//key_t prevLastKey;
	//if ( startOffset > 0 ) prevLastKey = m_targetMap->getLastKey();
	//else                   prevLastKey.setMin();
	char prevLastKey[MAX_KEY_BYTES];
	if ( startOffset > 0 ) m_targetMap->getLastKey(prevLastKey);
	else                   KEYMIN(prevLastKey,m_ks);

	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_collnum))) return true;

	// . set up a a file to dump the records into
	// . returns false and sets g_errno on error
	// . this will open m_target as O_RDWR | O_NONBLOCK | O_ASYNC ...
	m_dump.set ( m_collnum          ,
		     m_target           ,
		     m_id2              ,
		     //m_startFileNum - 1 , // merge fileNum in Rdb::m_files[]
		     (m_rdbId == RDB_TITLEDB||m_rdbId== RDB2_TITLEDB2) ,
		     NULL         , // buckets to dump is NULL, we call dumpList
		     NULL         , // tree to dump is NULL, we call dumpList
		     m_targetMap  ,
		     NULL         , // for caching dumped tree
		     0            , // m_maxBufSize. not needed if no tree! 
		     true         , // orderedDump?
		     m_dedup      ,
		     m_niceness   , // niceness of dump
		     this         , // state
		     dumpListWrapper ,
		     base->useHalfKeys() ,
		     startOffset  ,
		     prevLastKey  ,
		     m_ks         ,
		     m_pc         ,
		     m_maxTargetFileSize ,
		     NULL                ); // set m_base::m_needsToSave? no.
	// what kind of error?
	if ( g_errno ) {
		log("db: gotLock: %s.", mstrerror(g_errno) );
		return true;
	}
	// . create a new msg3
	// . don't keep static because it contains a msg3, treeList & diskList
	// . these can take up  many megs of mem
	// . yes, but we need to avoid fragmentation, so hold on to our mem!
	//m_msg3 = new (Msg3);
	//if ( ! m_msg3 ) return false;
	// we're now merging since the dump was set up successfully
	m_isMerging     = true;
	// make it suspended for now
	m_isSuspended   = true;
	// grab the lock
	//s_isMergeLocked = true;
	// . this unsuspends it
	// . this returns false on error and sets g_errno
	// . it returns true if blocked or merge completed successfully
	return resumeMerge ( );
}
// . return false if blocked, true otherwise
// . sets g_errno on error
bool RdbDump::set ( //char     *coll          ,
		   collnum_t collnum ,
		    BigFile  *file          ,
		    int32_t      id2           , // in Rdb::m_files[] array
		    bool      isTitledb     ,
		    RdbBuckets *buckets     , // optional buckets to dump
		    RdbTree  *tree          , // optional tree to dump
		    RdbMap   *map           ,
		    RdbCache *cache         ,
		    int32_t      maxBufSize    ,
		    bool      orderedDump   , // dump in order of keys?
		    bool      dedup         , // 4 RdbCache::incorporateList()
		    int32_t      niceness      ,
		    void     *state         ,
		    void      (* callback) ( void *state ) ,
		    bool      useHalfKeys   ,
		    int64_t startOffset   ,
		    //key_t     prevLastKey   ,
		    char     *prevLastKey   ,
		    char      keySize       ,
		   //class DiskPageCache *pc     ,
		   void *pc ,
		    int64_t maxFileSize   ,
		    Rdb      *rdb           ) {

	if ( ! orderedDump ) {
		log(LOG_LOGIC,"db: RdbDump does not support non-ordered.");
		char *xx = NULL; *xx = 0;
	}
	//if ( ! coll &&
	//if ( ! coll && rdb->m_isCollectionLess )
	//	strcpy(m_coll,rdb->m_dbname);

	m_collnum = collnum;

	// use 0 for collectionless
	if ( rdb && rdb->m_isCollectionLess ) m_collnum = 0;

	// are we like catdb/statsdb etc.?
	m_doCollCheck = true;
	if ( rdb && rdb->m_isCollectionLess ) m_doCollCheck = false;
	// RdbMerge also calls us but rdb is always set to NULL and it was
	// causing a merge on catdb (collectionless) to screw up
	if ( ! rdb ) m_doCollCheck = false;

	/*
	if ( ! coll && g_catdb.getRdb() == rdb )
		strcpy(m_coll, "catdb");
	else if ( ! coll && g_statsdb.getRdb() == rdb )
		strcpy(m_coll, "statsdb");
	else if ( ! coll && g_accessdb.getRdb() == rdb )
		strcpy(m_coll, "accessdb");
	*/
	//else
	//	strcpy ( m_coll , coll );
	m_file          = file;
	m_id2           = id2;
	m_isTitledb     = isTitledb;
	m_buckets       = buckets;
	m_tree          = tree;
	m_map           = map;
	m_cache         = cache;
	m_orderedDump   = orderedDump;
	m_dedup         = dedup;
	m_state         = state;
	m_callback      = callback;
	m_list          = NULL;
	m_niceness      = niceness;
	m_tried         = false;
	m_isSuspended   = false;
	m_ks            = keySize;
	m_addToMap      = true;

	// reset this in case we run out of mem, it doesn't get set properly
	// and needs to be NULL for RdbMem's call to getLastKeyinQueue()
	m_lastKeyInQueue  = NULL;
	KEYMIN(m_firstKeyInQueue,m_ks);

	m_isDumping     = false;
	m_writing       = false;
	m_buf           = NULL;
	m_verifyBuf     = NULL;
	m_maxBufSize    = maxBufSize;
	m_offset        = startOffset ;
	m_rolledOver    = false; // true if m_nextKey rolls over back to 0
	//m_nextKey       = 0 ; // used in dumpTree()
	KEYMIN(m_nextKey,m_ks);
	m_nextNode      = 0 ; // used in dumpTree()
	// if we're dumping indexdb, allow half keys
	m_useHalfKeys  = useHalfKeys;
	//m_prevLastKey  = prevLastKey;
	KEYSET(m_prevLastKey,prevLastKey,m_ks);
	// for setting m_rdb->m_needsSave after deleting the dump list
	m_rdb = rdb;
	// . don't dump to a pre-existing file
	// . seems like Rdb.cpp makes a new BigFile before calling this
	// . now we can resume merges, so we can indeed dump to the END
	//   of a pre-exiting file, but not when dumping a tree!
	//if ( m_file->doesExist() > 0 ) {
	if ( (m_tree || m_buckets) && m_file->getFileSize() > 0 ) {
		g_errno = EEXIST;
		log("db: Could not dump to %s. File exists.",
		    m_file->getFilename());
		return true;
	}
	// . NOTE: MAX_PART_SIZE in BigFile must be defined to be bigger than
	//   anything we actually dump since we only anticipate spanning 1 file
	//   and so only register the first file's fd for write callbacks
	//if ( m_tree && m_tree->getMaxMem() > MAX_PART_SIZE ) 
	//return log("RdbDump::dump: tree bigger than file part size");
	// . open the file nonblocking, sync with disk, read/write
	// . NOTE: O_SYNC doesn't work too well over NFS
	// . we need O_SYNC when dumping trees only because we delete the
	//   nodes/records as we dump them 
	// . ensure this sets g_errno for us
	// . TODO: open might not block! fix that!
	int32_t flags = O_RDWR | O_CREAT ;
	// a niceness bigger than 0 means to do non-blocking dumps
	if ( niceness > 0 ) flags |=  O_ASYNC | O_NONBLOCK ;
	if ( ! m_file->open ( flags , pc , maxFileSize ) ) return true;
	// . get the file descriptor of the first real file in BigFile
	// . we should only dump to the first file in BigFile otherwise,
	//   we'd have to juggle fd registration
	m_fd = m_file->getfd ( 0 , false /*for reading?*/ );
	if ( m_fd < 0 ) {
		log(LOG_LOGIC,"db: dump: Bad fd of first file in BigFile.") ;
		return true;
	}
	// debug test
	//char buf1[10*1024];
	//int32_t n1 = m_file->write ( buf1 , 10*1024 , 0 );
	//log("bytes written=%"INT32"\n",n1);
	// we're now considered to be in dumping state
	m_isDumping = true;
	// . if no tree was provided to dump it must be RdbMerge calling us
	// . he'll want to call dumpList() on his own
	if ( ! m_tree && !m_buckets ) return true;
	// how many recs in tree?
	int32_t nr;
	char *structureName;
	if(m_tree) {
		nr = m_tree->getNumUsedNodes();
		structureName = "tree";
	}
	else if(m_buckets){
		nr = m_buckets->getNumKeys();
		structureName = "buckets";
	}
	// debug msg
	log(LOG_INFO,"db: Dumping %"INT32" recs from %s to files.",
	    nr, structureName);
	//    nr , m_file->getFilename() );
	// keep a total count for reporting when done
	m_totalPosDumped = 0;
	m_totalNegDumped = 0;

	// we have our own flag here since m_dump::m_isDumping gets
	// set to true between collection dumps, RdbMem.cpp needs
	// a flag that doesn't do that... see RdbDump.cpp.
	// this was in Rdb.cpp but when threads were turned off it was
	// NEVER getting set and resulted in corruption in RdbMem.cpp.
	m_rdb->m_inDumpLoop = true;

	// . start dumping the tree 
	// . return false if it blocked
	if ( ! dumpTree ( false ) ) return false;
	// no longer dumping
	doneDumping();
	// return true since we didn't block
	return true;
}