// . returns true if all done!
// . returns false if still doing stuff
bool Test::injectLoop ( ) {

	long  dlen   ;
	char *dom    ;
	long  fakeIp ;

 loop:
	// advance to next url
	for ( ; m_urlPtr < m_urlEnd && ! *m_urlPtr ; m_urlPtr++ ) ;
	// all done?
	if ( m_urlPtr >= m_urlEnd ) {
		// flush em out
		if ( ! flushMsg4Buffers ( this , injectedWrapper ) ) 
			return false;
		// note it
		m_isAdding = false;
		// all done
		return true;
	}
	// error means all done
	if ( m_errno ) { m_isAdding = false; return true; }
	// point to it
	char *u = m_urlPtr;
	// advance to point to the next url for the next loop!
	for ( ; m_urlPtr < m_urlEnd && *m_urlPtr ; m_urlPtr++ ) ;

	// hash it
	long long h = hash64b ( u );
	// dedup it lest we freeze up and stopIt() never gets called because
	// m_urlsAdded is never decremented all the way to zero in Spider.cpp
	if ( m_dt.isInTable ( &h ) ) goto loop;
	// add it. return true with g_errno set on error
	if ( ! m_dt.addKey ( &h ) ) goto hadError;

	// make the SpiderRequest from it
	m_sreq.reset();
	// url
	strcpy ( m_sreq.m_url , u );
	// get domain of url
	dom = getDomFast ( m_sreq.m_url , &dlen );
	// make a fake ip
	fakeIp = 0x123456;
	// use domain if we got that
	if ( dom && dlen ) fakeIp = hash32 ( dom , dlen );
	// first ip is fake
	m_sreq.m_firstIp = fakeIp; // 0x123456;
	// these too
	m_sreq.m_domHash32  = fakeIp;
	m_sreq.m_hostHash32 = fakeIp;
	m_sreq.m_siteHash32 = fakeIp;
	m_sreq.m_probDocId = g_titledb.getProbableDocId( m_sreq.m_url );
	// this crap is fake
	m_sreq.m_isInjecting = 1;
	// use test-spider subdir for storing pages and spider times?
	if ( g_conf.m_testSpiderEnabled ) m_sreq.m_useTestSpiderDir = 1;
	// use this later
	m_sreq.m_hasContent = 0;
	// injected requests use this as the spider time i guess
	// so we can sort them by this
	m_sreq.m_addedTime = ++s_count;

	// no, because to compute XmlDoc::m_min/maxPubDate we need this to
	// be valid for our test run.. no no we will fix it to be
	// basically 2 days before spider time in the code...
	//m_sreq.m_addedTime = spiderTime;

	m_sreq.m_fakeFirstIp = 1;

	// make the key (parentDocId=0)
	m_sreq.setKey ( fakeIp, 0LL , false );
	// test it
	if ( g_spiderdb.getFirstIp(&m_sreq.m_key) != fakeIp ) {
		char *xx=NULL;*xx=0;}
	// sanity check. check for http(s)://
	if ( m_sreq.m_url[0] != 'h' ) { char *xx=NULL;*xx=0; }

	// reset this
	g_errno = 0;

	// count it
	m_urlsAdded++;

	// note it
	//log("crazyout: %s",m_sreq.m_url );
	logf(LOG_DEBUG,"spider: injecting test url %s",m_sreq.m_url);

	// the receiving end will realize that we are injecting into the test
	// collection and use the "/test/" subdir to load the file
	// "ips.txt" to do our ip lookups, and search for any downloads in
	// that subdirectory as well.
	if ( ! m_msg4.addMetaList ( (char *)&m_sreq     ,
				    m_sreq.getRecSize() ,
				    m_coll              ,
				    NULL                ,
				    injectedWrapper     ,
				    MAX_NICENESS        ,
				    RDB_SPIDERDB        ) )
		// return false if blocked
		return false;
	// error?
	if ( g_errno ) {
		// jump down here from above on error
	hadError:
		// save it
		m_errno = g_errno;
		// flag it
		m_isAdding = false;
		// note it
		log("test: inject had error: %s",mstrerror(g_errno));
		// stop, we are all done!
		return true;
	}
	// add the next spider request
	goto loop;
}
bool sendPageCloneColl ( TcpSocket *s , HttpRequest *r ) {

	char format = r->getReplyFormat();

	char *coll = r->getString("c");

	if ( format == FORMAT_XML || format == FORMAT_JSON ) {
		if ( ! coll ) {
			g_errno = EBADENGINEER;
			char *msg = "no c parm provided";
			return g_httpServer.sendErrorReply(s,g_errno,msg,NULL);
		}
		return g_httpServer.sendSuccessReply(s,format);
	}

	char  buf [ 64*1024 ];
	SafeBuf p(buf, 64*1024);

	// print standard header
	g_pages.printAdminTop ( &p , s , r );

	char *msg = NULL;
	if ( g_errno ) msg = mstrerror(g_errno);

	if ( msg ) {
		p.safePrintf (
			  "<center>\n"
			  "<font color=red>"
			  "<b>Error cloning collection: %s. "
			  "See log file for details.</b>"
			  "</font>"
			  "</center><br>\n",msg);
	}

	// print the clone box

	p.safePrintf (
		      "<center>\n<table %s>\n"
		      "<tr class=hdrow><td colspan=2>"
		      "<center><b>Clone Collection</b></center>"
		      "</td></tr>\n",
		      TABLE_STYLE);

	p.safePrintf (
		      "<tr bgcolor=#%s>"
		      "<td><b>clone settings from this collection</b>"
		      "<br><font size=1>Copy settings FROM this "
		      "pre-existing collection into the currently "
		      "selected collection."
		      "</font></td>\n"
		      "<td><input type=text name=clonecoll size=30>"
		      "</td>"
		      "</tr>"

		      , LIGHT_BLUE
		      );

	p.safePrintf ( "</table></center><br>\n");
	// wrap up the form started by printAdminTop
	g_pages.printAdminBottom ( &p );
	long bufLen = p.length();
	return g_httpServer.sendDynamicPage (s,p.getBufStart(),bufLen);

}
bool PageNetTest::gotResults( TcpSocket *s ) {
	char *buf;
	long  bufLen, bufMaxLen;
	HttpMime mime;

	if ( g_errno ) {
		log( "net: nettest: g_errno: %s", mstrerror(g_errno) );
		g_errno = 0;
		return false;
	}
	if ( !s ) return false;


	buf       = s->m_readBuf;
	bufLen    = s->m_readOffset;
	bufMaxLen = s->m_readBufSize;

	char temp[64];
	long len = 0;
	len = sprintf(temp, "http://%s:%i/get?rnettest=1", 
		      iptoa(s->m_ip), s->m_port);
	Url u;
	u.set( temp, len );
	if ( !mime.set ( buf, bufLen, &u ) ) {		
		log( "net: nettest: MIME.set() failed." );
		return false;
	}

	if ( mime.getHttpStatus() != 200 ) {
		log( "net: nettest: MIME.getHttpStatus() failed." );
	        return false;
	}

	long state = 0;
	long hostId = 0;
	long testId = 0;

	if( !bufLen ) log( LOG_INFO, "net: nettest: we got an empty doc." );

	buf += mime.getMimeLen();
	bufLen -= mime.getMimeLen();

	for( long i = 0; i < bufLen; i++ ){		
		if( buf[i] == ' '  ) continue;
		if( buf[i] == '\r' ) continue;
		if( buf[i] == '\n' ) continue;
		if( buf[i] <  '0'  ) continue;

		if( state == 0 ) {
			hostId = atoi(&buf[i]);
			log( LOG_DEBUG, "net: nettest: host id is %ld",
			     hostId);
			state = 1;
		}
		else if( state == 1 ) {
			testId = atoi(&buf[i]);
			log( LOG_DEBUG, "net: nettest: test id is %ld",
			     testId);
			state = 2;
		}
		else if( state == 2 ){
			if( ((testId < hostId) || !hostId) && (testId) ) {
				if( !m_hostRates[0][hostId] )
					m_hostRates[0][hostId] = atoi(&buf[i]);
				else 
					m_hostRates[2][hostId] = atoi(&buf[i]);
			}
			else {
				if( !m_hostRates[2][hostId] )
					m_hostRates[2][hostId] = atoi(&buf[i]);
				else
					m_hostRates[0][hostId] = atoi(&buf[i]);
			}
			state = 3;
			log( LOG_DEBUG, "net: nettest: send rate is %d",
			     atoi(&buf[i]));
		}
		else if( state == 3 ) {
			if( ((testId < hostId) || !hostId) && (testId) ) {
				if( !m_hostRates[1][hostId] )
					m_hostRates[1][hostId] = atoi(&buf[i]);
				else
					m_hostRates[3][hostId] = atoi(&buf[i]);
			}
			else {
				if( !m_hostRates[3][hostId] )
					m_hostRates[3][hostId] = atoi(&buf[i]);
				else
					m_hostRates[1][hostId] = atoi(&buf[i]);				
			}
			state = 0;
			log( LOG_DEBUG, "net: nettest: rcv rate is %d",
			     atoi(&buf[i]));
		}

		while( buf[i+1] >= '0'  ) i++;
	}


	if( m_numResultsSent < g_hostdb.getNumHosts() )
		return collectResults();

	if( ++m_numResultsRecv < m_numResultsSent )
		return false;
	
	return true;
}
//
// . ENTRY POINT FOR IMPORTING TITLEDB RECS FROM ANOTHER CLUSTER
// . when user clicks 'begin' in import page we come here..
// . so when that parm changes in Parms.cpp we sense that and call
//   beginImport(CollectionRec *cr)
// . or on startup we call resumeImports to check each coll for 
//   an import in progress.
// . search for files named titledb*.dat
// . if none found just return
// . when msg7 inject competes it calls this
// . call this from sleep wrapper in Process.cpp
// . returns false if would block (outstanding injects), true otherwise
// . sets g_errno on error
bool ImportState::importLoop ( ) {

	CollectionRec *cr = g_collectiondb.getRec ( m_collnum );

	if ( ! cr || g_hostdb.m_hostId != 0 ) { 
		// if coll was deleted!
		log("import: collnum %li deleted while importing into",
		    (long)m_collnum);
		//if ( m_numOut > m_numIn ) return true;
		// delete the entire import state i guess
		// what happens if we have a msg7 reply come back in?
		// it should see the collrec is NULL and just fail.
		mdelete ( this, sizeof(ImportState) , "impstate");
		delete (this);
		return true;
	}

 INJECTLOOP:

	// stop if waiting on outstanding injects
	long long out = m_numOut - m_numIn;
	if ( out >= cr->m_numImportInjects ) {
		g_errno = 0;
		return false;
	}
	

	if ( ! cr->m_importEnabled ) {
		// wait for all to return
		if ( out > 0 ) return false;
		// then delete it
		log("import: collnum %li import loop disabled",
		    (long)m_collnum);
		mdelete ( this, sizeof(ImportState) , "impstate");
		delete (this);
		return true;
	}




	// scan each titledb file scanning titledb0001.dat first,
	// titledb0003.dat second etc.

	//long long offset = -1;
	// . when offset is too big for current m_bigFile file then
	//   we go to the next and set offset to 0.
	// . sets m_bf and m_fileOffset
	if ( ! setCurrentTitleFileAndOffset ( ) ) {//cr  , -1 );
		log("import: import: no files to read");
		//goto INJECTLOOP;
		return true;
	}



	// this is -1 if none remain!
	if ( m_fileOffset == -1 ) {
		log("import: import fileoffset is -1. done.");
		return true;
	}

	long long saved = m_fileOffset;

	//Msg7 *msg7;
	//GigablastRequest *gr;
	//SafeBuf *sbuf = NULL;

	long need = 12;
	long dataSize = -1;
	//XmlDoc xd;
	key_t tkey;
	bool status;
	SafeBuf tmp;
	SafeBuf *sbuf = &tmp;
	long long docId;
	long shardNum;
	long key;
	Multicast *mcast;
	char *req;
	long reqSize;

	if ( m_fileOffset >= m_bfFileSize ) {
		log("inject: import: done processing file %li %s",
		    m_bfFileId,m_bf.getFilename());
		goto nextFile;
	}
	
	// read in title rec key and data size
	status = m_bf.read ( &tkey, sizeof(key_t) , m_fileOffset );
	
	//if ( n != 12 ) goto nextFile;
	if ( g_errno ) {
		log("inject: import: reading file error: %s. advancing "
		    "to next file",mstrerror(g_errno));
		goto nextFile;
	}

	m_fileOffset += 12;

	// if negative key, skip
	if ( (tkey.n0 & 0x01) == 0 ) {
		goto INJECTLOOP;
	}

	// if non-negative then read in size
	status = m_bf.read ( &dataSize , 4 , m_fileOffset );
	if ( g_errno ) {
		log("main: failed to read in title rec "
		    "file. %s. Skipping file %s",
		    mstrerror(g_errno),m_bf.getFilename());
		goto nextFile;
	}
	m_fileOffset += 4;
	need += 4;
	need += dataSize;
	need += 4; // collnum, first 4 bytes
	if ( dataSize < 0 || dataSize > 500000000 ) {
		log("main: could not scan in titledb rec of "
		    "corrupt dataSize of %li. BAILING ENTIRE "
		    "SCAN of file %s",dataSize,m_bf.getFilename());
		goto nextFile;
	}

	//gr = &msg7->m_gr;

	//XmlDoc *xd = getAvailXmlDoc();
	//msg7 = getAvailMsg7();
	mcast = getAvailMulticast();

	// if none, must have to wait for some to come back to us
	if ( ! mcast ) {
		// restore file offset
		//m_fileOffset = saved;
		// no, must have been a oom or something
		log("import: import no mcast available");
		return true;//false;
	}
	
	// this is for holding a compressed titlerec
	//sbuf = &mcast->m_sbuf;//&gr->m_sbuf;

	// point to start of buf
	sbuf->reset();

	// ensure we have enough room
	sbuf->reserve ( need );

	// collnum first 4 bytes
	sbuf->pushLong( (long)m_collnum );

	// store title key
	sbuf->safeMemcpy ( &tkey , sizeof(key_t) );

	// then datasize if any. neg rec will have -1 datasize
	if ( dataSize >= 0 ) 
		sbuf->pushLong ( dataSize );

	// then read data rec itself into it, compressed titlerec part
	if ( dataSize > 0 ) {
		// read in the titlerec after the key/datasize
		status = m_bf.read ( sbuf->getBuf() ,
				     dataSize ,
				     m_fileOffset );
		if ( g_errno ) { // n != dataSize ) {
			log("main: failed to read in title rec "
			    "file. %s. Skipping file %s",
			    mstrerror(g_errno),m_bf.getFilename());
			// essentially free up this msg7 now
			//msg7->m_inUse = false;
			//msg7->reset();
			goto nextFile;
		}
		// advance
		m_fileOffset += dataSize;
		// it's good, count it
		sbuf->m_length += dataSize;
	}

	// set xmldoc from the title rec
	//xd->set ( sbuf.getBufStart() );
	//xd->m_masterState = NULL;
	//xd->m_masterCallback ( titledbInjectLoop );

	// we use this so we know where the doc we are injecting
	// was in the foregien titledb file. so we can update our bookmark
	// code.
	mcast->m_hackFileOff = saved;//m_fileOffset;
	mcast->m_hackFileId  = m_bfFileId;

	//
	// inject a title rec buf this time, we are doing an import
	// FROM A TITLEDB FILE!!!
	//
	//gr->m_titleRecBuf = &sbuf;

	// break it down into gw
	// xd.set2 ( sbuf.getBufStart() ,
	// 	  sbuf.length() , // max size
	// 	  cr->m_coll, // use our coll
	// 	  NULL , // pbuf for page parser
	// 	  1 , // niceness
	// 	  NULL ); //sreq );

	// // note it
	// log("import: importing %s",xd.m_firstUrl.getUrl());

	// now we can set gr for the injection
	// TODO: inject the whole "sbuf" so we get sitenuminlinks etc
	// all exactly the same...
	// gr->m_url = xd.getFirstUrl()->getUrl();
	// gr->m_queryToScrape = NULL;
	// gr->m_contentDelim = 0;
	// gr->m_contentTypeStr = g_contentTypeStrings [xd.m_contentType];
	// gr->m_contentFile = NULL;
	// gr->m_content = xd.ptr_utf8Content;
	// gr->m_diffbotReply = NULL;
	// gr->m_injectLinks = false;
	// gr->m_spiderLinks = true;
	// gr->m_shortReply = false;
	// gr->m_newOnly = false;
	// gr->m_deleteUrl = false;
	// gr->m_recycle = true; // recycle content? or sitelinks?
	// gr->m_dedup = false;
	// gr->m_hasMime = false;
	// gr->m_doConsistencyTesting = false;
	// gr->m_getSections = false;
	// gr->m_gotSections = false;
	// gr->m_charset = xd.m_charset;
	// gr->m_hopCount = xd.m_hopCount;


	//
	// point to next doc in the titledb file
	//
	//m_fileOffset += need;

	// get docid from key
	docId = g_titledb.getDocIdFromKey ( &tkey );

	// get shard that holds the titlerec for it
	shardNum = g_hostdb.getShardNumFromDocId ( docId );

	// for selecting which host in the shard receives it
	key = (long)docId;


	m_numOut++;

	// then index it. master callback will be called
	//if ( ! xd->index() ) return false;

	// TODO: make this forward the request to an appropriate host!!
	// . gr->m_sbuf is set to the titlerec so this should handle that
	//   and use XmlDoc::set4() or whatever
	// if ( msg7->injectTitleRec ( msg7 , // state
	// 			    gotMsg7ReplyWrapper , // callback
	// 			    cr )) {
	// 	// it didn't block somehow...
	// 	msg7->m_inUse = false;
	// 	msg7->gotMsg7Reply();
	// }


	req = sbuf->getBufStart();
	reqSize = sbuf->length();

	if ( reqSize != need ) { char *xx=NULL;*xx=0 ; }

	// do not free it, let multicast free it after sending it
	sbuf->detachBuf();


	if ( ! mcast->send ( req ,
			     reqSize ,
			     0x07 ,
			     true , // ownmsg?
			     shardNum,
			     false, // send to whole shard?
			     key , // for selecting host in shard
			     mcast , // state
			     NULL , // state2
			     gotMulticastReplyWrapper ,
			     999999 ) ) { // total timeout in seconds
		log("import: import mcast had error: %s",mstrerror(g_errno));
		m_numIn++;
	}

	goto INJECTLOOP;

 nextFile:
	// invalidate this flag
	//m_offIsValid = false;
	// . and call this function. we add one to m_bfFileId so we
	//   do not re-get the file we just injected.
	// . sets m_bf and m_fileOffset
	// . returns false if nothing to read
	if ( ! setCurrentTitleFileAndOffset ( ) ) { //cr , m_bfFileId+1 );
		log("import: import: no files left to read");
		//goto INJECTLOOP;
		return true;
	}

	// if it returns NULL we are done!
	log("main: titledb injection loop completed. waiting for "
	    "outstanding injects to return.");
		
	if ( m_numOut > m_numIn )
		return false;

	log("main: all injects have returned. DONE.");

	// dummy return
	return true;
}
// . returns false if blocked, true otherwise
// . sets g_errno on error
// . we are called by Parms::sendPageGeneric() to handle this request
//   which was called by Pages.cpp's sendDynamicReply() when it calls 
//   pg->function() which is called by HttpServer::sendReply(s,r) when it 
//   gets an http request
// . so "hr" is on the stack in HttpServer::requestHandler() which calls
//   HttpServer::sendReply() so we gotta copy it here
bool sendPageInject ( TcpSocket *sock , HttpRequest *hr ) {

	if ( ! g_conf.m_injectionEnabled ) {
		g_errno = EBADENGINEER;
		log("inject: injection disabled");
		return g_httpServer.sendErrorReply(sock,500,"injection is "
						   "disabled by "
						   "the administrator in "
						   "the master "
						   "controls");
	}



	// get the collection
	// make a new state
	Msg7 *msg7;
	try { msg7= new (Msg7); }
	catch ( ... ) { 
		g_errno = ENOMEM;
		log("PageInject: new(%i): %s", 
		    (int)sizeof(Msg7),mstrerror(g_errno));
	       return g_httpServer.sendErrorReply(sock,500,mstrerror(g_errno));
	}
	mnew ( msg7, sizeof(Msg7) , "PageInject" );

	msg7->m_socket = sock;

	char format = hr->getReplyFormat();

	// no url parm?
	if ( format != FORMAT_HTML && ! hr->getString("c",NULL) ) {
		g_errno = ENOCOLLREC;
		char *msg = mstrerror(g_errno);
		return g_httpServer.sendErrorReply(sock,g_errno,msg,NULL);
	}

	// set this. also sets gr->m_hr
	GigablastRequest *gr = &msg7->m_gr;
	// this will fill in GigablastRequest so all the parms we need are set
	g_parms.setGigablastRequest ( sock , hr , gr );

	// if content is "" make it NULL so XmlDoc will download it
	// if user really wants empty content they can put a space in there
	// TODO: update help then...
	if ( gr->m_content && ! gr->m_content[0]  )
		gr->m_content = NULL;

	if ( gr->m_contentFile && ! gr->m_contentFile[0]  )
		gr->m_contentFile = NULL;

	if ( gr->m_contentDelim && ! gr->m_contentDelim[0] )
		gr->m_contentDelim = NULL;

	// set this to  false
	gr->m_gotSections = false;

	// if we had a delimeter but not content, zero it out...
	char *content = gr->m_content;
	if ( ! content ) content = gr->m_contentFile;
	if ( ! content ) gr->m_contentDelim = NULL;

	// get collection rec
	CollectionRec *cr = g_collectiondb.getRec ( gr->m_coll );
	// bitch if no collection rec found
	if ( ! cr ) {
		g_errno = ENOCOLLREC;
		//log("build: Injection from %s failed. "
		//    "Collection \"%s\" does not exist.",
		//    iptoa(s->m_ip),coll);
		// g_errno should be set so it will return an error response
		return sendReply ( msg7 );
	}




	// a scrape request?
	if ( gr->m_queryToScrape && gr->m_queryToScrape[0] ) {
		//char *uf="http://www.google.com/search?num=50&"
		//	"q=%s&scoring=d&filter=0";
		msg7->m_linkDedupTable.set(4,0,512,NULL,0,false,0,"ldtab");
		if ( ! msg7->scrapeQuery ( ) ) return false;
		return sendReply ( msg7 );
	}

	// if no url do not inject
	if ( ! gr->m_url || gr->m_url[0] == '\0' ) 
		return sendReply ( msg7 );

	// call sendReply() when inject completes
	if ( ! msg7->inject ( msg7 , sendReplyWrapper ) )
		return false;

	// it did not block, i gues we are done
	return sendReply ( msg7 );
}
// . THIS Msg0 class must be alloc'd, i.e. not on the stack, etc.
// . if list is stored locally this tries to get it locally
// . otherwise tries to get the list from the network
// . returns false if blocked, true otherwise
// . sets g_errno on error
// . NOTE: i was having problems with queries being cached too long, you
//   see the cache here is a NETWORK cache, so when the machines that owns
//   the list updates it on disk it can't flush our cache... so use a small
//   maxCacheAge of like , 30 seconds or so...
bool Msg0::getList ( long long hostId      , // host to ask (-1 if none)
		     long      ip          , // info on hostId
		     short     port        ,
		     long      maxCacheAge , // max cached age in seconds
		     bool      addToCache  , // add net recv'd list to cache?
		     char      rdbId       , // specifies the rdb
		     char     *coll        ,
		     RdbList  *list        ,
		     //key_t     startKey    , 
		     //key_t     endKey      , 
		     char     *startKey    ,
		     char     *endKey      ,
		     long      minRecSizes ,  // use -1 for no max
		     void     *state       ,
		     void    (* callback)(void *state ),//, RdbList *list ) ,
		     long      niceness    ,
		     bool      doErrorCorrection ,
		     bool      includeTree ,
		     bool      doMerge     ,
		     long      firstHostId   ,
		     long      startFileNum  ,
		     long      numFiles      ,
		     long      timeout       ,
		     long long syncPoint     ,
		     long      preferLocalReads ,
		     Msg5     *msg5             ,
		     Msg5     *msg5b            ,
		     bool      isRealMerge      ,
//#ifdef SPLIT_INDEXDB
		     bool      allowPageCache    ,
		     bool      forceLocalIndexdb ,
		     bool      noSplit , // doIndexdbSplit    ,
		     long      forceParitySplit  ) {
//#else
//		     bool      allowPageCache ) {
//#endif
	// this is obsolete! mostly, but we need it for PageIndexdb.cpp to 
	// show a "termlist" for a given query term in its entirety so you 
	// don't have to check each machine in the network. if this is true it
	// means to query each split and merge the results together into a
	// single unified termlist. only applies to indexdb/datedb.
	//if ( doIndexdbSplit ) { char *xx = NULL; *xx = 0; }
	// note this because if caller is wrong it hurts performance major!!
	//if ( doIndexdbSplit ) 
	//	logf(LOG_DEBUG,"net: doing msg0 with indexdb split true");
	// warning
	if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg0.");

	//if ( doIndexdbSplit ) { char *xx=NULL;*xx=0; }

	// reset the list they passed us
	list->reset();
	// get keySize of rdb
	m_ks = getKeySizeFromRdbId ( rdbId );
	// if startKey > endKey, don't read anything
	//if ( startKey > endKey ) return true;
	if ( KEYCMP(startKey,endKey,m_ks)>0 ) { char *xx=NULL;*xx=0; }//rettrue
	// . reset hostid if it is dead
	// . this is causing UOR queries to take forever when we have a dead
	if ( hostId >= 0 && g_hostdb.isDead ( hostId ) ) hostId = -1;
	// no longer accept negative minrecsize
	if ( minRecSizes < 0 ) {
		g_errno = EBADENGINEER;
		log(LOG_LOGIC,
		    "net: msg0: Negative minRecSizes no longer supported.");
		char *xx=NULL;*xx=0;
		return true;
	}

	// debug msg
	//if ( niceness != 0 ) log("HEY start");
	// ensure startKey last bit clear, endKey last bit set
	//if ( (startKey.n0 & 0x01) == 0x01 ) 
	//	log("Msg0::getList: warning startKey lastbit set"); 
	//if ( (endKey.n0   & 0x01) == 0x00 ) 
	//	log("Msg0::getList: warning endKey lastbit clear"); 
	// remember these
	m_state         = state;
	m_callback      = callback;
	m_list          = list;
	m_hostId        = hostId;
	m_niceness      = niceness;
	//m_ip            = ip;
	//m_port          = port;
	m_addToCache    = addToCache;
	// . these define our request 100%
	//m_startKey      = startKey;
	//m_endKey        = endKey;
	KEYSET(m_startKey,startKey,m_ks);
	KEYSET(m_endKey,endKey,m_ks);
	m_minRecSizes   = minRecSizes;
	m_rdbId         = rdbId;
	m_coll          = coll;
	m_isRealMerge   = isRealMerge;
	m_allowPageCache = allowPageCache;

	// . group to ask is based on the first key 
	// . we only do 1 group per call right now
	// . groupMask must turn on higher bits first (count downwards kinda)
	// . titledb and spiderdb use special masks to get groupId

	// did they force it? core until i figure out what this is
	if ( forceParitySplit >= 0 ) 
		m_groupId =  g_hostdb.getGroupId ( forceParitySplit );
	else
		m_groupId = getGroupId ( m_rdbId , startKey , ! noSplit );
	// how is this used?
	if ( forceLocalIndexdb ) m_groupId = g_hostdb.m_groupId;

	// . store these parameters
	// . get a handle to the rdb in case we can satisfy locally
	// . returns NULL and sets g_errno on error
	QUICKPOLL((m_niceness));
	Rdb *rdb = getRdbFromId ( m_rdbId );
	if ( ! rdb ) return true;
	// we need the fixedDataSize
	m_fixedDataSize = rdb->getFixedDataSize();
	m_useHalfKeys   = rdb->useHalfKeys();
	// . debug msg
	// . Msg2 does this when checking for a cached compound list.
	//   compound lists do not actually exist, they are merges of smaller
	//   UOR'd lists.
	if ( maxCacheAge != 0 && ! addToCache && (numFiles > 0 || includeTree))
		log(LOG_LOGIC,"net: msg0: "
		    "Weird. check but don't add... rdbid=%li.",(long)m_rdbId);
	// set this here since we may not call msg5 if list not local
	//m_list->setFixedDataSize ( m_fixedDataSize );

	// . now that we do load balancing we don't want to do a disk lookup
	//   even if local if we are merging or dumping
	// . UNLESS g_conf.m_preferLocalReads is true
	if ( preferLocalReads == -1 ) 
		preferLocalReads = g_conf.m_preferLocalReads;

	// . always prefer local for full split clusterdb
	// . and keep the tfndb/titledb lookups in the same stripe
	// . so basically we can't do biased caches if fully split
	//if ( g_conf.m_fullSplit ) preferLocalReads = true;
	preferLocalReads = true;

	// it it stored locally?
	bool isLocal = ( m_hostId == -1 && g_hostdb.m_groupId == m_groupId );
	// only do local lookups if this is true
	if ( ! preferLocalReads ) isLocal = false;

	/*
	m_numSplit = 1;
	if ( g_hostdb.m_indexSplits > 1 &&
	     ( rdbId == RDB_POSDB || rdbId==RDB_DATEDB)&&
	     ! forceLocalIndexdb && doIndexdbSplit ) {
		isLocal  = false;
		//m_numSplit = INDEXDB_SPLIT;
		m_numSplit = g_hostdb.m_indexSplits;
		char *xx=NULL;*xx=0;
	}
	*/
	/*
	long long singleDocIdQuery = 0LL;
	if ( rdbId == RDB_POSDB ) {
		long long d1 = g_posdb.getDocId(m_startKey);
		long long d2 = g_posdb.getDocId(m_endKey);
		if ( d1+1 == d2 ) singleDocIdQuery = d1;
	}

	// . try the LOCAL termlist cache
	// . so when msg2 is evaluating a gbdocid:| query and it has to
	//   use msg0 to go across the network to get the same damn termlist
	//   over and over again for the same docid, this will help alot.
	// . ideally it'd be nice if the seo pipe in xmldoc.cpp can try to
	//   send the same gbdocid:xxxx docids to the same hosts. maybe hash
	//   based on docid into the list of hosts and if that host is busy
	//   just chain until we find someone not busy.
	if ( singleDocIdQuery &&
	     getListFromTermListCache ( coll,
					m_startKey,
					m_endKey,
					maxCacheAge,
					list ) )
		// found!
		return true;
	*/

	// but always local if only one host
	if ( g_hostdb.getNumHosts() == 1 ) isLocal = true;

	// force a msg0 if doing a docid restrictive query like
	// gbdocid:xxxx|<query> so we call cacheTermLists() 
	//if ( singleDocIdQuery ) isLocal = false;

	// . if the group is local then do it locally
	// . Msg5::getList() returns false if blocked, true otherwise
	// . Msg5::getList() sets g_errno on error
	// . don't do this if m_hostId was specified
	if ( isLocal ) { // && !g_conf.m_interfaceMachine ) {
		if ( msg5 ) {
			m_msg5 = msg5;
			m_deleteMsg5 = false;
		}
		else {
			try { m_msg5 = new ( Msg5 ); } 
			catch ( ... ) {
				g_errno = ENOMEM;
				log("net: Local alloc for disk read failed "
				    "while tring to read data for %s. "
				    "Trying remote request.",
				    getDbnameFromId(m_rdbId));
				goto skip;
			}
			mnew ( m_msg5 , sizeof(Msg5) , "Msg0" );
			m_deleteMsg5 = true;
		}

		QUICKPOLL(m_niceness);
		// same for msg5b
		if ( msg5b ) {
			m_msg5b = msg5b;
			m_deleteMsg5b = false;
		}
		else if ( m_rdbId == RDB_TITLEDB ) {
			try { m_msg5b = new ( Msg5 ); } 
			catch ( ... ) {
				g_errno = ENOMEM;
				log("net: Local alloc for disk read failed "
				    "while tring to read data for %s. "
				    "Trying remote request. 2.",
				    getDbnameFromId(m_rdbId));
				goto skip;
			}
			mnew ( m_msg5b , sizeof(Msg5) , "Msg0b" );
			m_deleteMsg5b = true;
		}
		QUICKPOLL(m_niceness);
		if ( ! m_msg5->getList ( rdbId,
					 coll ,
					 m_list ,
					 m_startKey ,
					 m_endKey   ,
					 m_minRecSizes ,
					 includeTree   , // include Tree?
					 addToCache    , // addToCache?
					 maxCacheAge   ,
					 startFileNum  , 
					 numFiles      ,
					 this ,
					 gotListWrapper2   ,
					 niceness          ,
					 doErrorCorrection ,
					 NULL , // cacheKeyPtr
					 0    , // retryNum
					 -1   , // maxRetries
					 true , // compensateForMerge
					 syncPoint ,
					 m_msg5b   ,
					 m_isRealMerge ,
					 m_allowPageCache ) ) return false;
		// nuke it
		reset();
		return true;
	}
skip:
	// debug msg
	if ( g_conf.m_logDebugQuery )
		log(LOG_DEBUG,"net: msg0: Sending request for data to "
		    "group=%li listPtr=%li minRecSizes=%li termId=%llu "
		    //"startKey.n1=%lx,n0=%llx (niceness=%li)",
		    "startKey.n1=%llx,n0=%llx (niceness=%li)",
		    g_hostdb.makeHostId ( m_groupId ) ,(long)m_list,
		    m_minRecSizes, g_posdb.getTermId(m_startKey) , 
		    //m_startKey.n1,m_startKey.n0 , (long)m_niceness);
		    KEY1(m_startKey,m_ks),KEY0(m_startKey),
		    (long)m_niceness);

	char *replyBuf = NULL;
	long  replyBufMaxSize = 0;
	bool  freeReply = true;

	// adjust niceness for net transmission
	bool realtime = false;
	//if ( minRecSizes + 32 < TMPBUFSIZE ) realtime = true;

	// if we're niceness 0 we need to pre-allocate for reply since it
	// might be received within the asynchronous signal handler which
	// cannot call mmalloc()
	if ( realtime ) { // niceness <= 0 || netnice == 0 ) {
		// . we should not get back more than minRecSizes bytes since 
		//   we are now performing merges
		// . it should not slow things down too much since the hashing
		//   is 10 times slower than merging anyhow...
		// . CAUTION: if rdb is not fixed-datasize then this will
		//            not work for us! it can exceed m_minRecSizes.
		replyBufMaxSize = m_minRecSizes ;
		// . get a little extra to fix the error where we ask for 64 
		//   but get 72
		// . where is that coming from?
		// . when getting titleRecs we often exceed the minRecSizes 
		// . ?Msg8? was having trouble. was short 32 bytes sometimes.
		replyBufMaxSize += 36;
		// why add ten percent?
		//replyBufMaxSize *= 110 ;
		//replyBufMaxSize /= 100 ;
		// make a buffer to hold the reply
//#ifdef SPLIT_INDEXDB
/*
		if ( m_numSplit > 1 ) {
			m_replyBufSize = replyBufMaxSize * m_numSplit;
			replyBuf = (char *) mmalloc(m_replyBufSize, "Msg0");
			m_replyBuf  = replyBuf;
			freeReply = false;
		}
		else
*/
//#endif
			replyBuf = (char *) mmalloc(replyBufMaxSize , "Msg0");
		// g_errno is set and we return true if it failed
		if ( ! replyBuf ) {
			log("net: Failed to pre-allocate %li bytes to hold "
			    "data read remotely from %s: %s.",
			    replyBufMaxSize,getDbnameFromId(m_rdbId),
			    mstrerror(g_errno));
			return true;
		}
	}

	// . make a request with the info above (note: not in network order)
	// . IMPORTANT!!!!! if you change this change 
	//   Multicast.cpp::sleepWrapper1 too!!!!!!!!!!!!
	//   no, not anymore, we commented out that request peeking code
	char *p = m_request;
	*(long long *) p = syncPoint        ; p += 8;
	//*(key_t     *) p = m_startKey       ; p += sizeof(key_t);
	//*(key_t     *) p = m_endKey         ; p += sizeof(key_t);
	*(long      *) p = m_minRecSizes    ; p += 4;
	*(long      *) p = startFileNum     ; p += 4;
	*(long      *) p = numFiles         ; p += 4;
	*(long      *) p = maxCacheAge      ; p += 4;
	*p               = m_rdbId          ; p++;
	*p               = addToCache       ; p++;
	*p               = doErrorCorrection; p++;
	*p               = includeTree      ; p++;
	*p               = (char)niceness   ; p++;
	*p               = (char)m_allowPageCache; p++;
	KEYSET(p,m_startKey,m_ks);          ; p+=m_ks;
	KEYSET(p,m_endKey,m_ks);            ; p+=m_ks;
	// NULL terminated collection name
	strcpy ( p , coll ); p += gbstrlen ( coll ); *p++ = '\0';
	m_requestSize    = p - m_request;
	// ask an individual host for this list if hostId is NOT -1
	if ( m_hostId != -1 ) {
		// get Host
		Host *h = g_hostdb.getHost ( m_hostId );
		if ( ! h ) { 
			g_errno = EBADHOSTID; 
			log(LOG_LOGIC,"net: msg0: Bad hostId of %lli.",
			    m_hostId);
			return true;
		}
		// if niceness is 0, use the higher priority udpServer
		UdpServer *us ;
		unsigned short port;
		QUICKPOLL(m_niceness);
		//if ( niceness <= 0 || netnice == 0 ) { 
		//if ( realtime ) {
		//	us = &g_udpServer2; port = h->m_port2; }
		//else                 { 
		us = &g_udpServer ; port = h->m_port ; 
		// . returns false on error and sets g_errno, true otherwise
		// . calls callback when reply is received (or error)
		// . we return true if it returns false
		if ( ! us->sendRequest ( m_request     ,
					 m_requestSize ,
					 0x00          , // msgType
					 h->m_ip       ,
					 port          ,
					 m_hostId      ,
					 NULL          , // the slotPtr
					 this          ,
					 gotSingleReplyWrapper ,
					 timeout       ,
					 -1            , // backoff
					 -1            , // maxwait
					 replyBuf      ,
					 replyBufMaxSize ,
					 m_niceness     ) ) // cback niceness
			return true;
		// return false cuz it blocked
		return false;
	}
	// timing debug
	if ( g_conf.m_logTimingNet )
		m_startTime = gettimeofdayInMilliseconds();
	else
		m_startTime = 0;
	//if ( m_rdbId == RDB_INDEXDB ) log("Msg0:: getting remote indexlist. "
	//			"termId=%llu, "
	//			"groupNum=%lu",
	//			g_indexdb.getTermId(m_startKey) ,
	//			g_hostdb.makeHostId ( m_groupId ) );

	/*
	// make the cache key so we can see what remote host cached it, if any
	char cacheKey[MAX_KEY_BYTES];
	//key_t cacheKey = makeCacheKey ( startKey     ,
	makeCacheKey ( startKey     ,
		       endKey       ,
		       includeTree  ,
		       minRecSizes  ,
		       startFileNum ,
		       numFiles     ,
		       cacheKey     ,
		       m_ks         );
	*/

	// . get the top long of the key
	// . i guess this will work for 128 bit keys... hmmmmm
	long keyTop = hash32 ( (char *)startKey , m_ks );

	/*
	// allocate space
	if ( m_numSplit > 1 ) {
		long  need = m_numSplit * sizeof(Multicast) ;
		char *buf  = (char *)mmalloc ( need,"msg0mcast" );
		if ( ! buf ) return true;
		m_mcasts = (Multicast *)buf;
		for ( long i = 0; i < m_numSplit ; i++ )
			m_mcasts[i].constructor();
	}
	*/

        // . otherwise, multicast to a host in group "groupId"
	// . returns false and sets g_errno on error
	// . calls callback on completion
	// . select first host to send to in group based on upper 32 bits
	//   of termId (m_startKey.n1)
//#ifdef SPLIT_INDEXDB
	// . need to send out to all the indexdb split hosts
	m_numRequests = 0;
	m_numReplies  = 0;
	//for ( long i = 0; i < m_numSplit; i++ ) {

	QUICKPOLL(m_niceness);
	long gr;
	char *buf;
	/*
	if ( m_numSplit > 1 ) {
		gr  = g_indexdb.getSplitGroupId ( baseGroupId, i );
		buf = &replyBuf[i*replyBufMaxSize];
	}
	else {
	*/
	gr  = m_groupId;
	buf = replyBuf;
	//}

	// get the multicast
	Multicast *m = &m_mcast;
	//if ( m_numSplit > 1 ) m = &m_mcasts[i];

        if ( ! m->send ( m_request    , 
//#else
//        if ( ! m_mcast.send ( m_request    , 
//#endif
			      m_requestSize, 
			      0x00         , // msgType 0x00
			      false        , // does multicast own request?
//#ifdef SPLIT_INDEXDB
			      gr           , // group + offset
//#else
//			      m_groupId    , // group to send to (groupKey)
//#endif
			      false        , // send to whole group?
			      //m_startKey.n1, // key is passed on startKey
			      keyTop       , // key is passed on startKey
			      this         , // state data
			      NULL         , // state data
			      gotMulticastReplyWrapper0 ,
			      timeout      , // timeout in seconds (was 30)
			      niceness     ,
			      realtime     ,
			      firstHostId  ,
//#ifdef SPLIT_INDEXDB
//			      &replyBuf[i*replyBufMaxSize] ,
//#else
//			      replyBuf        ,
//#endif
			      buf             ,
			      replyBufMaxSize ,
			      freeReply       , // free reply buf?
			      true            , // do disk load balancing?
			      maxCacheAge     ,
			      //(key_t *)cacheKey        ,
			      // multicast uses it for determining the best
			      // host to send the request to when doing 
			      // disk load balancing. if the host has our 
			      // data cached, then it will probably get to
			      // handle the request. for now let's just assume
			      // this is a 96-bit key. TODO: fix...
			 0 , // *(key_t *)cacheKey        ,
			      rdbId           ,
			      minRecSizes     ) ) {
		log("net: Failed to send request for data from %s in group "
		    "#%li over network: %s.",
		    getDbnameFromId(m_rdbId),m_groupId, mstrerror(g_errno));
		// no, multicast will free this when it is destroyed
		//if (replyBuf) mfree ( replyBuf , replyBufMaxSize , "Msg22" );
		// but speed it up
//#ifdef SPLIT_INDEXDB
		m_errno = g_errno;
		m->reset();
		if ( m_numRequests > 0 )
			return false;
//#else
//		m_mcast.reset();
//#endif
		return true;
	}
//#ifdef SPLIT_INDEXDB
	m_numRequests++;

//#endif
	// we blocked
	return false;
}
// . but now that we may get a list remotely to fix data corruption,
//   this may indeed block
bool Msg3::doneScanning ( ) {
	QUICKPOLL(m_niceness);
	// . did we have any error on any scan?
	// . if so, repeat ALL of the scans
	g_errno = m_errno;
	// 2 retry is the default
	long max = 2;
	// see if explicitly provided by the caller
	if ( m_maxRetries >= 0 ) max = m_maxRetries;
	// now use -1 (no max) as the default no matter what
	max = -1;
	// ENOMEM is particulary contagious, so watch out with it...
	if ( g_errno == ENOMEM && m_maxRetries == -1 ) max = 0;
	// msg0 sets maxRetries to 2, don't let max stay set to -1
	if ( g_errno == ENOMEM && m_maxRetries != -1 ) max = m_maxRetries;
	// when thread cannot alloc enough read buf it keeps the read buf
	// set to NULL and BigFile.cpp sets g_errno to EBUFTOOSMALL
	if ( g_errno == EBUFTOOSMALL && m_maxRetries == -1 ) max = 0;
	// msg0 sets maxRetries to 2, don't let max stay set to -1
	if ( g_errno == EBUFTOOSMALL && m_maxRetries != -1 ) max = m_maxRetries;
	// . if no thread slots available, that hogs up serious memory.
	//   the size of Msg3 is 82k, so having just 5000 of them is 430MB.
	// . i just made Msg3 alloc mem when it needs more than about 2k
	//   so this problem is greatly reduced, therefore let's keep 
	//   retrying... forever if no thread slots in thread queue since
	//   we become the thread queue in a way.
	if ( g_errno == ENOTHREADSLOTS ) max = -1;
	// this is set above if the map has the same consecutive key repeated
	// and the read is enormous
	if ( g_errno == ECORRUPTDATA ) max = 0;
	// usually bad disk failures, don't retry those forever
	//if ( g_errno == EIO ) max = 3;
        // no, now our hitachis return these even when they're good so
	// we have to keep retrying forever
	if ( g_errno == EIO ) max = -1;
	// count these so we do not take drives offline just because
	// kernel ring buffer complains...
	if ( g_errno == EIO ) g_numIOErrors++;
	// bail early on high priority reads for these errors
	if ( g_errno == EDISKSTUCK && m_niceness == 0 ) max = 0;
	if ( g_errno == EIO        && m_niceness == 0 ) max = 0;

	// how does this happen? we should never bail out on a low priority
	// disk read... we just wait for it to complete...
	if ( g_errno == EDISKSTUCK && m_niceness != 0 ) { char *xx=NULL;*xx=0;}

	// on I/O, give up at call it corrupt after a while. some hitachis
	// have I/O errros on little spots, like gk88, maybe we can fix him
	if ( g_errno == EIO && m_retryNum >= 5 ) {
		m_errno = ECORRUPTDATA;
		m_hadCorruption = true;
		// do not do any retries any more
		max = 0;
	}

	// convert m_errno to ECORRUPTDATA if it is EBUFTOOSMALL and the
	// max of the bytesToRead are over 500MB.
	// if bytesToRead was ludicrous, then assume that the data file
	// was corrupted, the map was regenerated and it patched
	// over the corrupted bits which were 500MB or more in size.
	// we cannot practically allocate that much, so let's just
	// give back an empty buffer. treat it like corruption...
	// the way it patches is to store the same key over all the corrupted
	// pages, which can get pretty big. so if you read a range with that
	// key you will be hurting!!
	// this may be the same scenario as when the rdbmap has consecutive
	// same keys. see above where we set m_errno to ECORRUPTDATA...
	if ( g_errno == EBUFTOOSMALL ) { 
		long biggest = 0;
		for ( long i = 0 ; i < m_numFileNums ; i++ ) {
			if ( m_scans[i].m_bytesToRead < biggest ) continue;
			biggest = m_scans[i].m_bytesToRead;
		}
		if ( biggest > 500000000 ) {
			log("db: Max read size was %li > 500000000. Assuming "
			    "corrupt data in data file.",biggest);
			m_errno = ECORRUPTDATA;
			m_hadCorruption = true;
			// do not do any retries on this, the read was > 500MB
			max = 0;
		}
	}

	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) return true;

	// this really slows things down because it blocks the cpu so
	// leave it out for now
#ifdef _SANITY_CHECK_
	// check for corruption here, do not do it again in Msg5 if we pass
	if ( ! g_errno ) { // && g_conf.m_doErrorCorrection ) {
		long i;
		for ( i = 0 ; i < m_numFileNums ; i++ )
			if ( ! m_lists[i].checkList_r ( false, false ) ) break;
		if ( i < m_numFileNums ) {
			g_errno = ECORRUPTDATA;
			m_errno = ECORRUPTDATA;
			max     = g_conf.m_corruptRetries; // try 100 times
			log("db: Encountered corrupt list in file %s.",
			    base->getFile(m_fileNums[i])->getFilename());
		}
		else
			m_listsChecked = true;
	}
#endif

	// . if we had a ETRYAGAIN error, then try again now
	// . it usually means the whole file or a part of it was deleted 
	//   before we could finish reading it, so we should re-read all now
	// . RdbMerge deletes BigFiles after it merges them and also chops
	//   off file heads
	// . now that we have threads i'd imagine we'd get EBADFD or something
	// . i've also seen "illegal seek" as well
	if ( m_errno && (m_retryNum < max || max < 0) &&
	     // this will complete in due time, we can't call a sleep wrapper
	     // on it because the read is really still pending...
	     m_errno != EDISKSTUCK ) {
		// print the error
		static time_t s_time  = 0;
		time_t now = getTime();
		if ( now - s_time > 5 || g_errno != ENOTHREADSLOTS ) {
			log("net: Had error reading %s: %s. Retrying. "
			    "(retry #%li)", 
			    base->m_dbname,mstrerror(g_errno) , m_retryNum );
			s_time = now;
		}
		// send email alert if in an infinite loop, but don't send
		// more than once every 2 hours
		static long s_lastSendTime = 0;
		if ( m_retryNum == 100 && getTime() - s_lastSendTime > 3600*2){
			// remove this for now it is going off all the time
			//g_pingServer.sendEmail(NULL,//g_hostdb.getMyHost(),
			//		       "100 read retries",true);
			s_lastSendTime = getTime();
		}
		// clear g_errno cuz we should for call to readList()
		g_errno = 0;
		// free the list buffer since if we have 1000 Msg3s retrying
		// it will totally use all of our memory
		for ( long i = 0 ; i < m_numChunks ; i++ ) 
			m_lists[i].destructor();
		// count retries
		m_retryNum++;
		// backoff scheme, wait 100ms more each time
		long wait ;
		if ( m_retryNum == 1 ) wait = 10;
		else                   wait = 200 * m_retryNum;
		// . don't wait more than 10 secs between tries
		// . i've seen gf0 and gf16 get mega saturated
		if ( wait > 10000 ) wait = 10000;
		// wait 500 ms
		if ( g_loop.registerSleepCallback ( wait  , // ms
						    this  ,
						    doneSleepingWrapper3,
						    m_niceness))
			return false;
		// otherwise, registration failed
		log(
		    "net: Failed to register sleep callback for retry. "
		    "Abandoning read. This is bad.");
		// return, g_errno should be set
		g_errno = EBUFTOOSMALL;
		m_errno = EBUFTOOSMALL;
		return true;
	}

	// if we got an error and should not retry any more then give up
	if ( g_errno ) {
		log(
		    "net: Had error reading %s: %s. Giving up after %li "
		    "retries.",
		    base->m_dbname,mstrerror(g_errno) , m_retryNum );
		return true;
	}

	// note it if the retry finally worked
	if ( m_retryNum > 0 ) 
		log(LOG_INFO,"disk: Read succeeded after retrying %li times.",
		    (long)m_retryNum);

	// count total bytes for logging
	long count = 0;
	// . constrain all lists to make merging easier
	// . if we have only one list, then that's nice cuz the constrain
	//   will allow us to send it right away w/ zero copying
	// . if we have only 1 list, it won't be merged into a final list,
	//   that is, we'll just set m_list = &m_lists[i]
	for ( long i = 0 ; i < m_numFileNums ; i++ ) {
		QUICKPOLL(m_niceness);
		// count total bytes for logging
		count += m_lists[i].getListSize();
		// . hint offset is relative to the offset of first key we read
		// . if that key was only 6 bytes RdbScan shift the list buf
		//   down 6 bytes to make the first key 12 bytes... a 
		//   requirement for all RdbLists
		// . don't inc it, though, if it was 0, pointing to the start
		//   of the list because our shift won't affect that
		if ( m_scans[i].m_shifted == 6 && m_hintOffsets[i] > 0 ) 
			m_hintOffsets[i] += 6;
		// posdb double compression
		if ( m_scans[i].m_shifted == 12 && m_hintOffsets[i] > 0 ) 
			m_hintOffsets[i] += 12;
		// . don't constrain on minRecSizes here because it may
		//   make our endKey smaller, which will cause problems
		//   when Msg5 merges these lists.
		// . If all lists have different endKeys RdbList's merge
		//   chooses the min and will merge in recs beyond that
		//   causing a bad list BECAUSE we don't check to make
		//   sure that recs we are adding are below the endKey
		// . if we only read from one file then constrain based 
		//   on minRecSizes so we can send the list back w/o merging
		//   OR if just merging with RdbTree's list
		long mrs ;
		// . constrain to m_minRecSizesOrig, not m_minRecSizes cuz 
		//   that  could be adjusted by compensateForNegativeRecs()
		// . but, really, they should be the same if we only read from
		//   the root file
		if ( m_numFileNums == 1 ) mrs = m_minRecSizesOrig;
		else                      mrs = -1;
		// . this returns false and sets g_errno on error
		// . like if data is corrupt
		BigFile *ff = base->getFile(m_fileNums[i]);
		if ( ! m_lists[i].constrain ( m_startKey       ,
					      m_constrainKey   , // m_endKey
					      mrs           , // m_minRecSizes
					      m_hintOffsets[i] ,
					      //m_hintKeys   [i] ,
					      &m_hintKeys   [i*m_ks] ,
					      ff->getFilename() ,
					      m_niceness ) ) {
			log("net: Had error while constraining list read from "
			    "%s: %s. This is likely caused by corrupted "
			    "data on disk.", 
			    mstrerror(g_errno), ff->getFilename());
		}
	}

	// print the time
	if ( g_conf.m_logTimingDb ) {
		long long now = gettimeofdayInMilliseconds();
		long long took = now - m_startTime;
		log(LOG_TIMING,
		    "net: Took %lli ms to read %li lists of %li bytes total"
		     " from %s (niceness=%li).",
		     took,m_numFileNums,count,base->m_dbname,m_niceness);
	}
	return true;
}
// return false if blocked, true otherwise
bool Msg39::addedLists ( ) {

	if ( m_posdbTable.m_t1 ) {
		// . measure time to add the lists in bright green
		// . use darker green if rat is false (default OR)
		long color;
		//char *label;
		color = 0x0000ff00 ;
		//label = "termlist_intersect";
		g_stats.addStat_r ( 0 , 
				    m_posdbTable.m_t1 , 
				    m_posdbTable.m_t2 , color );
	}


	// accumulate total hits count over each docid split
	m_numTotalHits += m_posdbTable.m_docIdVoteBuf.length() / 6;

	// before wrapping up, complete our docid split loops!
	// so do not send the reply back yet... send reply back from
	// the docid loop function... doDocIdSplitLoop()
	if ( m_numDocIdSplits >= 2 ) return true;


	// . save some memory,free m_topDocIdPtrs2,m_topScores2,m_topExplicits2
	// . the m_topTree should have been filled from the call to
	//   IndexTable2::fillTopDocIds() and it no longer has ptrs to the
	//   docIds, but has the docIds themselves
	//m_posdbTable.freeMem();

	// error?
	if ( m_posdbTable.m_errno ) {
		// we do not need to store the intersection i guess...??
		m_posdbTable.freeMem();
		g_errno = m_posdbTable.m_errno;
		log("query: posdbtable had error = %s",mstrerror(g_errno));
		sendReply ( m_slot , this , NULL , 0 , 0 ,true);
		return true;
	}


	// should we put cluster recs in the tree?
	//m_gotClusterRecs = ( g_conf.m_fullSplit && m_r->m_doSiteClustering );
	m_gotClusterRecs = ( m_r->m_doSiteClustering );
	
	// . before we send the top docids back, lookup their site hashes
	//   in clusterdb so we can do filtering at this point.
	//   BUT only do this if we are in a "full split" config, because that
	//   way we can guarantee all clusterdb recs are local (on this host)
	//   and should be in the page cache. the page cache should do ultra
	//   quick lookups and no memcpy()'s for this operation. it should
	//   be <<1ms to lookup thousands of docids.
	// . when doing innerLoopSiteClustering we always use top tree now
	//   because our number of "top docids" can be somewhat unpredictably 
	//   large due to having a ton of results with the same "domain hash" 
	//   (see the "vcount" in IndexTable2.cpp)
	// . do NOT do if we are just "getting weights", phr and aff weights
	if ( m_gotClusterRecs ) {
		// . set the clusterdb recs in the top tree
		return setClusterRecs ( ) ;
	}

	// if we did not call setClusterRecs, go on to estimate the hits
	estimateHits();
	return true;
}
// . send an add command to all machines in the appropriate group
// . returns false if blocked, true otherwise
// . sets g_errno on error
// . groupId is -1 if we choose it automatically
// . if waitForReply is false we return true right away, but we can only
//   launch MAX_MSG1S requests without waiting for replies, and
//   when the reply does come back we do NOT call the callback
bool Msg1::addList ( RdbList      *list              ,
		     char          rdbId             ,
		     collnum_t collnum, // char         *coll              ,
		     void         *state             ,
		     void (* callback)(void *state)  ,
		     bool          forceLocal        ,
		     int32_t          niceness          ,
		     bool          injecting         ,
		     bool          waitForReply      ,
		     bool         *inTransit         ) {
	// warning
	if ( collnum<0 ) log(LOG_LOGIC,"net: bad collection. msg1.cpp.");
	// if list has no records in it return true
	if ( ! list || list->isEmpty() ) return true;
	// sanity check
	if ( list->m_ks !=  8 &&
	     list->m_ks != 12 &&
	     list->m_ks != 16 &&
	     list->m_ks != 24 ) { 
		g_process.shutdownAbort(true); }
	// start at the beginning
	list->resetListPtr();
	// if caller does not want reply try to accomodate him
	if ( ! waitForReply && list != &m_ourList ) {
		Msg1 *Y = getMsg1();
		if ( ! Y ) { 
			waitForReply = true; 
			log(LOG_DEBUG,"net: msg1: "
			    "No floating request slots "
			    "available for adding data. "
			    "Blocking on reply."); 
			goto skip; 
		}
		// steal the list, we don't want caller to free it
		gbmemcpy ( &Y->m_ourList , list , sizeof(RdbList) );
		
 		QUICKPOLL(niceness);
		
		// if list is small enough use our buf
		if ( ! list->m_ownData && list->m_listSize <= MSG1_BUF_SIZE ) {
			gbmemcpy ( Y->m_buf , list->m_list , list->m_listSize );
			Y->m_ourList.m_list    = Y->m_buf;
			Y->m_ourList.m_listEnd = Y->m_buf + list->m_listSize;
			Y->m_ourList.m_alloc   = NULL;
			Y->m_ourList.m_ownData = false;
		}
		// otherwise, we cannot copy it and i don't want to mdup it...
		else if ( ! list->m_ownData ) {
			log(LOG_LOGIC,"net: msg1: List must own data. Bad "
			    "engineer.");
			g_process.shutdownAbort(true); 
		}
		// lastly, if it was a clean steal, don't let list free it
		else list->m_ownData = false;
		// reset m_listPtr and m_listPtrHi so we pass the isExhausted()
		// check in sendSomeOfList() below
		Y->m_ourList.resetListPtr();
		// sanity test
		if ( Y->m_ourList.isExhausted() ) {
			log(LOG_LOGIC,"net: msg1: List is exhausted. "
			    "Bad engineer."); 
			g_process.shutdownAbort(true); }
		// now re-call
		bool inTransit;
		bool status = Y->addList ( &Y->m_ourList ,
					   rdbId         ,
					   collnum       ,
					   Y             , // state
					   returnMsg1    , // callback
					   forceLocal    ,
					   niceness      ,
					   injecting     ,
					   waitForReply  ,
					   &inTransit    ) ;
		// if we really blocked return false
		if ( ! status ) return false;
		// otherwise, it may have returned true because waitForReply
		// is false, but the request may still be in transit
		if ( inTransit ) return true;
		// debug msg
		//log("did not block, listSize=%" PRId32,m->m_ourList.m_listSize);
		// we did it without blocking, but it is still in transit
		// unless there was an error
		if ( g_errno ) log("net: Adding data to %s had error: %s.",
				   getDbnameFromId(rdbId),
				   mstrerror(g_errno));
		// otherwise, if not in transit and no g_errno then it must
		// have really completed without blocking. in which case
		// we are done with "Y"
		returnMsg1 ( (void *)Y );
		return true;
	}
 skip:
	// remember these vars
	m_list          = list;
	m_rdbId         = rdbId;
	m_collnum       = collnum;
	m_state         = state;
	m_callback      = callback;
	m_forceLocal    = forceLocal;
	m_niceness      = niceness;
	m_injecting     = injecting;
	m_waitForReply  = waitForReply;

	QUICKPOLL(niceness);
	// reset m_listPtr to point to first record again
	list->resetListPtr();
	// is the request in transit? assume not (assume did not block)
	if ( inTransit ) *inTransit = false;
	// . not all records in the list may belong to the same group
	// . records should be sorted by key so we don't need to sort them
	// . if this did not block, return true
	if ( sendSomeOfList ( ) ) return true;
	// it is in transit
	if ( inTransit ) *inTransit = true;
	// if we should waitForReply return false
	if ( m_waitForReply ) return false;
	// tell caller we did not block on the reply, even though we did
	return true;
}
// . returns false if blocked, true otherwise
// . sets g_errno on error
bool Msg39::getLists () {

	if ( m_debug ) m_startTime = gettimeofdayInMilliseconds();
	// . ask Indexdb for the IndexLists we need for these termIds
	// . each rec in an IndexList is a termId/score/docId tuple

	//
	// restrict to docid range?
	//
	// . get the docid start and end
	// . do docid paritioning so we can send to all hosts
	//   in the network, not just one stripe
	long long docIdStart = 0;
	long long docIdEnd = MAX_DOCID;
	// . restrict to this docid?
	// . will really make gbdocid:| searches much faster!
	long long dr = m_tmpq.m_docIdRestriction;
	if ( dr ) {
		docIdStart = dr;
		docIdEnd   = dr + 1;
	}
	// . override
	// . this is set from Msg39::doDocIdSplitLoop() to compute 
	//   search results in stages, so that we do not load massive
	//   termlists into memory and got OOM (out of memory)
	if ( m_r->m_minDocId != -1 ) docIdStart = m_r->m_minDocId;
	if ( m_r->m_maxDocId != -1 ) docIdEnd   = m_r->m_maxDocId+1;
	
	// if we have twins, then make sure the twins read different
	// pieces of the same docid range to make things 2x faster
	bool useTwins = false;
	if ( g_hostdb.getNumStripes() == 2 ) useTwins = true;
	if ( useTwins ) {
		long long delta2 = ( docIdEnd - docIdStart ) / 2;
		if ( m_r->m_stripe == 0 ) docIdEnd = docIdStart + delta2;
		else                      docIdStart = docIdStart + delta2;
	}
	// TODO: add triplet support later for this to split the
	// read 3 ways. 4 ways for quads, etc.
	if ( g_hostdb.getNumStripes() >= 3 ) { char *xx=NULL;*xx=0;}
	// do not go over MAX_DOCID  because it gets masked and
	// ends up being 0!!! and we get empty lists
	if ( docIdEnd > MAX_DOCID ) docIdEnd = MAX_DOCID;
	// remember so Msg2.cpp can use them to restrict the termlists 
	// from "whiteList" as well
	m_docIdStart = docIdStart;
	m_docIdEnd   = docIdEnd;
	

	//
	// set startkey/endkey for each term/termlist
	//
	for ( long i = 0 ; i < m_tmpq.getNumTerms() ; i++ ) {
		// breathe
		QUICKPOLL ( m_r->m_niceness );
		// shortcuts
		QueryTerm *qterm = &m_tmpq.m_qterms[i];
		char *sk = qterm->m_startKey;
		char *ek = qterm->m_endKey;
		// get the term id
		long long tid = m_tmpq.getTermId(i);
		// if only 1 stripe
		//if ( g_hostdb.getNumStripes() == 1 ) {
		//	docIdStart = 0;
		//	docIdEnd   = MAX_DOCID;
		//}
		// store now in qterm
		g_posdb.makeStartKey ( sk , tid , docIdStart );
		g_posdb.makeEndKey   ( ek , tid , docIdEnd   );
		qterm->m_ks = sizeof(POSDBKEY);//key144_t);
	}

	// debug msg
	if ( m_debug || g_conf.m_logDebugQuery ) {
		for ( long i = 0 ; i < m_tmpq.getNumTerms() ; i++ ) {
			// get the term in utf8
			//char bb[256];
			QueryTerm *qt = &m_tmpq.m_qterms[i];
			//utf16ToUtf8(bb, 256, qt->m_term, qt->m_termLen);
			char *tpc = qt->m_term + qt->m_termLen;
			char  tmp = *tpc;
			*tpc = '\0';
			char sign = qt->m_termSign;
			if ( sign == 0 ) sign = '0';
			QueryWord *qw = qt->m_qword;
			long wikiPhrId = qw->m_wikiPhraseId;
			if ( m_tmpq.isPhrase(i) ) wikiPhrId = 0;
			char leftwikibigram = 0;
			char rightwikibigram = 0;
			if ( qt->m_leftPhraseTerm &&
			     qt->m_leftPhraseTerm->m_isWikiHalfStopBigram )
				leftwikibigram = 1;
			if ( qt->m_rightPhraseTerm &&
			     qt->m_rightPhraseTerm->m_isWikiHalfStopBigram )
				rightwikibigram = 1;
			/*
			char c = m_tmpq.getTermSign(i);
			char tt[512];
			long ttlen = m_tmpq.getTermLen(i);
			if ( ttlen > 254 ) ttlen = 254;
			if ( ttlen < 0   ) ttlen = 0;
			// old:painful: convert each term from unicode to ascii
			memcpy ( tt , m_tmpq.getTerm(i) , ttlen );
			*/
			long isSynonym = 0;
			QueryTerm *st = qt->m_synonymOf;
			if ( st ) isSynonym = true;
			SafeBuf sb;
			// now we can display it
			//tt[ttlen]='\0';
			//if ( c == '\0' ) c = ' ';
			sb.safePrintf(
			     "query: msg39: [%lu] query term #%li \"%s\" "
			     "phr=%li termId=%llu rawTermId=%llu "
			     //"estimatedTermFreq=%lli (+/- ~16000) "
			     "tfweight=%.02f "
			     "sign=%c "
			     "numPlusses=%hhu "
			     "required=%li "
			     "fielcode=%li "

			     "ebit=0x%0llx "
			     "impBits=0x%0llx "

			     "wikiphrid=%li "
			     "leftwikibigram=%li "
			     "rightwikibigram=%li "
			     //"range.startTermNum=%hhi range.endTermNum=%hhi "
			     //"minRecSizes=%li "
			     "readSizeInBytes=%li "
			     //"ebit=0x%llx "
			     //"impBits=0x%llx "
			     "hc=%li "
			     "component=%li "
			     "otermLen=%li "
			     "isSynonym=%li "
			     "querylangid=%li ",
			     (long)this ,
			     i          ,
			     qt->m_term,//bb ,
			     (long)m_tmpq.isPhrase (i) ,
			     m_tmpq.getTermId      (i) ,
			     m_tmpq.getRawTermId   (i) ,
			     ((float *)m_r->ptr_termFreqWeights)[i] ,
			     sign , //c ,
			     0 , 
			     (long)qt->m_isRequired,
			     (long)qt->m_fieldCode,

			     (long long)qt->m_explicitBit  ,
			     (long long)qt->m_implicitBits ,

			     wikiPhrId,
			     (long)leftwikibigram,
			     (long)rightwikibigram,
			     ((long *)m_r->ptr_readSizes)[i]         ,
			     //(long long)m_tmpq.m_qterms[i].m_explicitBit  ,
			     //(long long)m_tmpq.m_qterms[i].m_implicitBits ,
			     (long)m_tmpq.m_qterms[i].m_hardCount ,
			     (long)m_tmpq.m_componentCodes[i],
			     (long)m_tmpq.getTermLen(i) ,
			     isSynonym,
			     (long)m_tmpq.m_langId); // ,tt
			// put it back
			*tpc = tmp;
			if ( st ) {
				long stnum = st - m_tmpq.m_qterms;
				sb.safePrintf("synofterm#=%li",stnum);
				//sb.safeMemcpy(st->m_term,st->m_termLen);
				sb.pushChar(' ');
				sb.safePrintf("synwid0=%lli ",qt->m_synWids0);
				sb.safePrintf("synwid1=%lli ",qt->m_synWids1);
				sb.safePrintf("synalnumwords=%li ",
					      qt->m_numAlnumWordsInSynonym);
				// like for synonym "nj" it's base,
				// "new jersey" has 2 alnum words!
				sb.safePrintf("synbasealnumwords=%li ",
					      qt->m_numAlnumWordsInBase);
			}
			logf(LOG_DEBUG,"%s",sb.getBufStart());

		}
		m_tmpq.printBooleanTree();
	}
	// timestamp log
	if ( m_debug ) 
		log(LOG_DEBUG,"query: msg39: [%lu] Getting %li index lists ",
		     (long)this,m_tmpq.getNumTerms());
	// . now get the index lists themselves
	// . return if it blocked
	// . not doing a merge (last parm) means that the lists we receive
	//   will be an appending of a bunch of lists so keys won't be in order
	// . merging is uneccessary for us here because we hash the keys anyway
	// . and merging takes up valuable cpu time
	// . caution: the index lists returned from Msg2 are now compressed
	// . now i'm merging because it's 10 times faster than hashing anyway
	//   and the reply buf should now always be <= minRecSizes so we can
	//   pre-allocate one better, and, 3) this should fix the yahoo.com 
	//   reindex bug
	char rdbId = RDB_POSDB;

	// . TODO: MDW: fix
	// . partap says there is a bug in this??? we can't cache UOR'ed lists?
	bool checkCache = false;
	// split is us????
	//long split = g_hostdb.m_myHost->m_group;
	long split = g_hostdb.m_myHost->m_shardNum;
	// call msg2
	if ( ! m_msg2.getLists ( rdbId                      ,
				 m_r->ptr_coll              ,
				 m_r->m_maxAge              ,
				 m_r->m_addToCache          ,
				 //m_tmpq.m_qterms ,
				 &m_tmpq,
				 m_r->ptr_whiteList,
				 // we need to restrict docid range for
				 // whitelist as well! this is from
				 // doDocIdSplitLoop()
				 m_docIdStart,
				 m_docIdEnd,
				 // how much of each termlist to read in bytes
				 (long *)m_r->ptr_readSizes ,
				 //m_tmpq.getNumTerms()       , // numLists
				 m_lists                    ,
				 this                       ,
				 gotListsWrapper            ,
				 m_r                        ,
				 m_r->m_niceness            ,
				 true                       , // do merge?
				 m_debug                  ,
				 NULL                       ,  // best hostids
				 m_r->m_restrictPosdbForQuery  ,
				 split                      ,
				 checkCache                 )) {
		m_blocked = true;
		return false;
	}

	// error?
	if ( g_errno ) { 
		log("msg39: Had error getting termlists2: %s.",
		    mstrerror(g_errno));
		// don't bail out here because we are in docIdSplitLoop()
		//sendReply (m_slot,this,NULL,0,0,true);
		return true; 
	}
	
	return gotLists ( true );
}
// . now come here when we got the necessary index lists
// . returns false if blocked, true otherwise
// . sets g_errno on error
bool Msg39::gotLists ( bool updateReadInfo ) {
	// bail on error
	if ( g_errno ) { 
		log("msg39: Had error getting termlists: %s.",
		    mstrerror(g_errno));
		if ( ! g_errno ) { char *xx=NULL;*xx=0; }
		//sendReply (m_slot,this,NULL,0,0,true);
		return true; 
	}
	// timestamp log
	if ( m_debug ) {
		log(LOG_DEBUG,"query: msg39: [%lu] Got %li lists in %lli ms"
		    , (long)this,m_tmpq.getNumTerms(),
		     gettimeofdayInMilliseconds() - m_startTime);
		m_startTime = gettimeofdayInMilliseconds();
	}

	// breathe
	QUICKPOLL ( m_r->m_niceness );

	// . set the IndexTable so it can set it's score weights from the
	//   termFreqs of each termId in the query
	// . this now takes into account the special termIds used for sorting
	//   by date (0xdadadada and 0xdadadad2 & TERMID_MASK)
	// . it should weight them so much so that the summation of scores
	//   from other query terms cannot make up for a lower date score
	// . this will actually calculate the top
	// . this might also change m_tmpq.m_termSigns 
	// . this won't do anything if it was already called
	m_posdbTable.init ( &m_tmpq                ,
			    m_debug              ,
			    this                   ,
			    &m_tt                  ,
			    m_r->ptr_coll          , 
			    &m_msg2 , // m_lists                ,
			    //m_tmpq.m_numTerms      , // m_numLists
			    m_r                              );

	// breathe
	QUICKPOLL ( m_r->m_niceness );

	// . we have to do this here now too
	// . but if we are getting weights, we don't need m_tt!
	// . actually we were using it before for rat=0/bool queries but
	//   i got rid of NO_RAT_SLOTS
	if ( ! m_allocedTree && ! m_posdbTable.allocTopTree() ) {
		if ( ! g_errno ) { char *xx=NULL;*xx=0; }
		//sendReply ( m_slot , this , NULL , 0 , 0 , true);
		return true;
	}

	// we have to allocate this with each call because each call can
	// be a different docid range from doDocIdSplitLoop.
	if ( ! m_posdbTable.allocWhiteListTable() ) {
		log("msg39: Had error allocating white list table: %s.",
		    mstrerror(g_errno));
		if ( ! g_errno ) { char *xx=NULL;*xx=0; }
		//sendReply (m_slot,this,NULL,0,0,true);
		return true; 
	}


	// do not re do it if doing docid range splitting
	m_allocedTree = true;


	// . now we must call this separately here, not in allocTopTree()
	// . we have to re-set the QueryTermInfos with each docid range split
	//   since it will set the list ptrs from the msg2 lists
	if ( m_r->m_useNewAlgo && ! m_posdbTable.setQueryTermInfo () ) {
		return true;
	}

	// timestamp log
	if ( m_debug ) {
		log(LOG_DEBUG,"query: msg39: [%lu] Preparing to intersect "
		     "took %lli ms",
		     (long)this, gettimeofdayInMilliseconds() - m_startTime );
		m_startTime = gettimeofdayInMilliseconds();
	}

	// time it
	long long start = gettimeofdayInMilliseconds();
	long long diff;

	// . don't bother making a thread if lists are small
	// . look at STAGE? in IndexReadInfo.cpp to see how we read in stages
	// . it's always saying msg39 handler is hogging cpu...could this be it
	//if ( m_msg2.getTotalRead() < 2000*8 ) goto skipThread;

	// debug
	//goto skipThread;

	// . NOW! let's do this in a thread so we can continue to service
	//   incoming requests
	// . don't launch more than 1 thread at a time for this
	// . set callback when thread done

	// breathe
	QUICKPOLL ( m_r->m_niceness );

	// . create the thread
	// . only one of these type of threads should be launched at a time
	if ( g_threads.call ( INTERSECT_THREAD  , // threadType
			      m_r->m_niceness   ,
			      this              , // top 4 bytes must be cback
			      threadDoneWrapper ,
			      addListsWrapper   ) ) {
		m_blocked = true;
		return false;
	}
	// if it failed
	//log(LOG_INFO,"query: Intersect thread creation failed. Doing "
	//    "blocking. Hurts performance.");
	// check tree
	if ( m_tt.m_nodes == NULL ) {
		log(LOG_LOGIC,"query: msg39: Badness."); 
		char *xx = NULL; *xx = 0; }

	// sometimes we skip the thread
	//skipThread:
	// . addLists() should never have a problem
	// . g_errno should be set by prepareToAddLists() above if there is
	//   going to be a problem
	//if ( m_r->m_useNewAlgo )
	m_posdbTable.intersectLists10_r ( );
	//else
	//	m_posdbTable.intersectLists9_r ( );

	// time it
	diff = gettimeofdayInMilliseconds() - start;
	if ( diff > 10 ) log("query: Took %lli ms for intersection",diff);
	// returns false if blocked, true otherwise
	return addedLists ();
}
// . returns false if blocked, true if done
// . to avoid running out of memory, generate the search results for
//   multiple smaller docid-ranges, one range at a time.
bool Msg39::doDocIdSplitLoop ( ) {
	long long delta = MAX_DOCID / (long long)m_numDocIdSplits;
	for ( ; m_ddd < m_dddEnd ; ) {
		// the starting docid...
		long long d0 = m_ddd;
		// advance to point to the exclusive endpoint
		m_ddd += delta;
		// ensure this is exclusive of ddd since it will be
		// inclusive in the following iteration.
		long long d1 = m_ddd;
		// fix rounding errors
		if ( d1 + 20LL > MAX_DOCID ) {
			d1    = MAX_DOCID;
			m_ddd = MAX_DOCID;
		}
		// fix it
		m_r->m_minDocId = d0;
		m_r->m_maxDocId = d1; // -1; // exclude d1
		// allow posdbtable re-initialization each time to set
		// the msg2 termlist ptrs anew, otherwise we core in
		// call to PosdbTable::init() below
		//m_posdbTable.m_initialized = false;
		// reset ourselves, partially, anyway, not tmpq etc.
		reset2();
		// debug log
		log("msg39: docid split phase %lli-%lli",d0,d1);
		// wtf?
		if ( d0 >= d1 ) break;
		// use this
		//m_debug = true;
		//log("call1");
		// . get the lists
		// . i think this always should block!
		// . it will also intersect the termlists to get the search
		//   results and accumulate the winners into the "tree"
		if ( ! getLists() ) return false;
		//log("call2 g_errno=%li",(long)g_errno);
		// if there was an error, stop!
		if ( g_errno ) break;
	}

	// return error reply if we had an error
	if ( g_errno ) {
		log("msg39: Had error3: %s.", mstrerror(g_errno));
		sendReply (m_slot,this,NULL,0,0 , true);
		return true; 
	}

	if ( m_debug ) 
		log("msg39: done with all docid range splits");

	// all done. this will send reply back
	//estimateHits();
	//addedLists();

	// should we put cluster recs in the tree?
	//m_gotClusterRecs = ( g_conf.m_fullSplit && m_r->m_doSiteClustering );
	m_gotClusterRecs = ( m_r->m_doSiteClustering );
	
	// . before we send the top docids back, lookup their site hashes
	//   in clusterdb so we can do filtering at this point.
	//   BUT only do this if we are in a "full split" config, because that
	//   way we can guarantee all clusterdb recs are local (on this host)
	//   and should be in the page cache. the page cache should do ultra
	//   quick lookups and no memcpy()'s for this operation. it should
	//   be <<1ms to lookup thousands of docids.
	// . when doing innerLoopSiteClustering we always use top tree now
	//   because our number of "top docids" can be somewhat unpredictably 
	//   large due to having a ton of results with the same "domain hash" 
	//   (see the "vcount" in IndexTable2.cpp)
	// . do NOT do if we are just "getting weights", phr and aff weights
	if ( m_gotClusterRecs ) {
		// . set the clusterdb recs in the top tree
		// . this calls estimateHits() in its reply wrapper when done
		return setClusterRecs ( ) ;
	}

	// if we did not call setClusterRecs, go on to estimate the hits
	estimateHits();

	// no block, we are done
	return true;
}
void Msg39::getDocIds2 ( Msg39Request *req ) {

	// flag it as in use
	m_inUse = true;

	// store it, might be redundant if called from getDocIds() above
	m_r = req;

	// a handy thing
	m_debug = false;
	if ( m_r->m_debug          ) m_debug = true;
	if ( g_conf.m_logDebugQuery  ) m_debug = true;
	if ( g_conf.m_logTimingQuery ) m_debug = true;

        // ensure it's size is ok
        if ( m_r->size_coll <= 0 ) {
		g_errno = ENOCOLLREC;
		log(LOG_LOGIC,"query: msg39: getDocIds: %s." , 
		    mstrerror(g_errno) );
		sendReply ( m_slot , this , NULL , 0 , 0 , true );
		return ; 
	}

        CollectionRec *cr = g_collectiondb.getRec ( m_r->ptr_coll );
        if ( ! cr ) {
		g_errno = ENOCOLLREC;
		log(LOG_LOGIC,"query: msg39: getDocIds: %s." , 
		    mstrerror(g_errno) );
		sendReply ( m_slot , this , NULL , 0 , 0 , true );
		return ; 
	}

	// . set our m_q class
	// . m_boolFlag is either 1 or 0 in this case, the caller did the
	//   auto-detect (boolFlag of 2) before calling us
	// . this now calls Query::addCompoundTerms() for us
	if ( ! m_tmpq.set2 ( m_r->ptr_query  , 
			     m_r->m_language ,
			     m_r->m_queryExpansion ,
			     m_r->m_useQueryStopWords ) ) {
		log(LOG_LOGIC,"query: msg39: setQuery: %s." , 
		    mstrerror(g_errno) );
		sendReply ( m_slot , this , NULL , 0 , 0 , true );
		return ; 
	}

	QUICKPOLL ( m_r->m_niceness );

	// set m_errno
	if ( m_tmpq.m_truncated ) m_errno = EQUERYTRUNCATED;
	// ensure matches with the msg3a sending us this request
	if ( m_tmpq.getNumTerms() != m_r->m_nqt ) {
		g_errno = EBADENGINEER;
		log("query: Query parsing inconsistency for q=%s. "
		    "langid=%li. Check langids and m_queryExpansion parms "
		    "which are the only parms that could be different in "
		    "Query::set2()."
		    ,m_tmpq.m_orig
		    ,(long)m_r->m_language
		    );
		sendReply ( m_slot , this , NULL , 0 , 0 , true );
		return ; 
	}
	// debug
	if ( m_debug )
		logf(LOG_DEBUG,"query: msg39: [%lu] Got request "
		     "for q=%s", (long) this,m_tmpq.m_orig);

	// reset this
	m_tt.reset();

	QUICKPOLL ( m_r->m_niceness );

	// assume not doing special docid splitting
	m_numDocIdSplits = 1;

	// . do not do splits if caller is already specifying a docid range
	//   like for gbdocid: queries i guess.
	// . make sure m_msg2 is non-NULL, because if it is NULL we are
	//   evaluating a query for a single docid for seo tools
	if ( m_r->m_minDocId == -1 ) { // && m_msg2 ) {
		long nt = m_tmpq.getNumTerms();
		m_numDocIdSplits = nt / 2;
		if ( m_numDocIdSplits == 0 ) m_numDocIdSplits = 1;
	}

	//if ( ! g_conf.m_doDocIdRangeSplitting )
	//	m_numDocIdSplits = 1;

	// limit to 10
	if ( m_numDocIdSplits > 10 ) 
		m_numDocIdSplits = 10;

	// . if caller already specified a docid range, then be loyal to that!
	// . or if we do not have enough query terms to warrant splitting
	if ( m_numDocIdSplits == 1 ) {
		getLists();
		return;
	}

	// . set up docid range cursor
	// . do twin splitting
	if ( m_r->m_stripe == 1 ) {
		m_ddd = MAX_DOCID / 2LL;
		m_dddEnd = MAX_DOCID + 1LL;
	}
	else if ( m_r->m_stripe == 0 ) {
		m_ddd = 0;
		m_dddEnd = MAX_DOCID / 2LL;
	}
	// support triplets, etc. later
	else {
		char *xx=NULL;*xx=0; 
	}
	// do not do twin splitting if only one host per group
	if ( g_hostdb.getNumStripes() == 1 ) {
		m_ddd    = 0;
		m_dddEnd = MAX_DOCID;
	}


	// . otherwise, to prevent oom, split up docids into ranges
	//   and get winners of each range.
	if ( ! doDocIdSplitLoop() ) return;

	// error?
	if ( g_errno ) {
		log(LOG_LOGIC,"query: msg39: doDocIdSplitLoop: %s." , 
		    mstrerror(g_errno) );
		sendReply ( m_slot , this , NULL , 0 , 0 , true );
		return ; 
	}
	// it might not have blocked! if all lists in tree and used no thread
	// it will come here after sending the reply and destroying "this"
	return;
}
void sendReply ( void *state ) {

	StateStatsdb *st = (StateStatsdb *)state;

	if ( g_errno ) {
		g_httpServer.sendErrorReply(st->m_socket,
					    500,mstrerror(g_errno));
		return;
	}

	TcpSocket *s = st->m_socket;

	SafeBuf buf( 1024*32 );
	SafeBuf tmpBuf( 1024 );

	//
	// take these out until we need them!
	//
	/*
	// print the top of the page
	tmpBuf.safePrintf( 
			  //"<style type=\"text/css\">"
			  //"@import url(/styles/statsdb.css);</style>\n"
		"<script type=\"text/javascript\" "
		"src=\"/scripts/statsdb.js\"></script>\n"
		"<!-- DHTML Calendar -->"
		"<style type=\"text/css\">"
		"@import url(/jsc/calendar-win2k-1.css);"
		"</style>\n"
		"<script type=\"text/javascript\" "
		"src=\"/jsc/calendar.js\"></script>\n"
		"<script type=\"text/javascript\" "
		"src=\"/jsc/lang/calendar-en.js\"></script>\n"
		"<script type=\"text/javascript\" "
		"src=\"/jsc/calendar-setup.js\"></script>\n"
	);
	*/

	// make the query string
	char qs[1024];
	sprintf(qs,"&date_period=%li&date_units=%li&samples=%li",
		st->m_datePeriod,
		st->m_dateUnits,
		st->m_samples);

	// print standard header
	g_pages.printAdminTop ( &buf , st->m_socket , &st->m_request ,
				qs );

	buf.cat ( tmpBuf );

	//g_pages.printAdminTop2 ( &buf , st->m_socket , &st->m_request, NULL ,
	//			 tmpBuf.getBufStart(), tmpBuf.length() ); 

	// write the controls section of the page
	writeControls( &buf, st );

	// Debug print of CGI parameters and errors
	char startTimeStr[30];
	char endTimeStr[30];

	strncpy( startTimeStr, ctime( &st->m_startDate ), 30 );
	strncpy( endTimeStr, ctime( &st->m_endDate ), 30 );

	buf.safePrintf("<center>\n");

	if ( ! g_conf.m_useStatsdb ) 
		buf.safePrintf("<font color=red><b>Statsdb disabled. "
			       "Turn on in the master controls.</b>"
			       "</font>\n" );

	buf.safePrintf("<table cellpadding=10 border=0>\n");

	buf.safePrintf("<tr><td>"
		       "<center>"
		       "<img src=\"/stats%li.gif\" height=%li width=%li "
		       "border=\"0px\">"
		       "</center>"
		       //"class=\"statsdb_image\">"
		       "</td></tr>\n",
		       st->m_hostId,
		       g_statsdb.getImgHeight(),
		       g_statsdb.getImgWidth());

	// the map key
	buf.safePrintf("<tr><td>");
	buf.cat ( st->m_sb2 );
	buf.safePrintf("</td></tr>\n");

	buf.safePrintf( "</table>\n" );

	buf.safePrintf("</center>");

	// print the bottom of the page
	g_pages.printAdminBottom2( &buf );
	
	g_errno = 0;
	mdelete ( st, sizeof(StateStatsdb), "PageStatsdb" );
	delete st;

	g_httpServer.sendDynamicPage ( s, buf.getBufStart(), buf.length() );
}
// close the least used of all the file descriptors.
// we don't touch files opened for writing, however.
bool File::closeLeastUsed () {

	int64_t min  ;
	int    mini = -1;
	int64_t now = gettimeofdayInMillisecondsLocal();


	int32_t notopen = 0;
	int32_t writing = 0;
	int32_t unlinking = 0;
	int32_t young = 0;

	// get the least used of all the actively opened file descriptors.
	// we can't get files that were opened for writing!!!
	int i;
	for ( i = 0 ; i < MAX_NUM_FDS ; i++ ) {
		//if ( s_fds   [ i ] < 0        ) continue;
		if ( ! s_open[i] ) { notopen++; continue; }
		// fds opened for writing are not candidates, because if
		// we close on a threaded write, that fd may be used to
		// re-open another file which gets garbled!
		if ( s_writing [ i ] ) { writing++; continue; }
		// do not close guys being unlinked they are in the middle
		// of being closed ALREADY in close1_r(). There should only be 
		// like one unlink thread allowed to be active at a time so we 
		// don't have to worry about it hogging all the fds.
		if ( s_unlinking [ i ] ) { unlinking++; continue; }
		// when we got like 1000 reads queued up, it uses a *lot* of
		// memory and we can end up never being able to complete a
		// read because the descriptors are always getting closed on us
		// so do a hack fix and do not close descriptors that are
		// about .5 seconds old on avg.
		if ( s_timestamps [ i ] == now ) { young++; continue; }
		if ( s_timestamps [ i ] == now - 1 ) { young++; continue; }
		if ( mini == -1 || s_timestamps [ i ] < min ) {
			min  = s_timestamps [ i ];
			mini = i;
		}
	}

	/*
	// use the new linked list of active file descriptors
	// . file at tail is the most active
	File *f = s_activeHead;

	// if nothing to do return true
	//if ( ! f ) return true;

	int32_t mini2 = -1;

	// close the head if not writing
	for ( ; f ; f = f->m_nextActive ) {
		mini2 = f->m_vfd;
		// how can this be?
		if ( s_fds [ mini2 ] < 0 ) { char *xx=NULL;*xx=0; }
		if ( s_writing [ mini2 ] ) continue;
		if ( s_unlinking [ mini2 ] ) continue;
		// when we got like 1000 reads queued up, it uses a *lot* of
		// memory and we can end up never being able to complete a
		// read because the descriptors are always getting closed on us
		// so do a hack fix and do not close descriptors that are
		// about .5 seconds old on avg.
		if ( s_timestamps [ mini2 ] >= now - 1000 ) continue;
		break;
	}

	// debug why it doesn't work right
	if ( mini != mini2 ) {
		int fd1 = -1;
		int fd2 = -1;
		if ( mini >= 0 ) fd1 = s_fds[mini];
		if ( mini2 >= 0 ) fd2 = s_fds[mini2];
		int32_t age = now - s_timestamps[mini] ;
		log("File: linkedlistfd=%i != rightfd=%i agems=%i",fd1,fd2,
		    (int)age);
	}
	*/

	// if nothing to free then return false
	if ( mini == -1 ) 
		return log("File: closeLeastUsed: failed. All %"INT32" "
			   "descriptors "
			   "are unavailable to be closed and re-used to read "
			   "from another file. notopen=%i writing=%i "
			   "unlinking=%i young=%i"
			   ,(int32_t)s_maxNumOpenFiles
			   ,notopen
			   ,writing
			   ,unlinking
			   ,young );


	int fd = mini;

	// always block on close
	//int fd    = s_fds[mini];
	int flags = fcntl ( fd , F_GETFL ) ;
	// turn off these 2 flags on fd to make sure
	flags &= ~( O_NONBLOCK | O_ASYNC );
 retry27:
	// return false on error
	if ( fcntl ( fd, F_SETFL, flags ) < 0 ) {
		// valgrind
		if ( errno == EINTR ) goto retry27;
		//char *xx = NULL; *xx = 1;
		log("disk: fcntl(%i): %s",fd,mstrerror(errno));
		// return false;
		errno = 0;
	}

	// . tally up another close for this fd, if any
	// . so if an open happens shortly here after, and 
	//   gets this fd, then any read that was started 
	//   before that open will know it!
	//s_closeCounts [ fd ]++;
	// otherwise we gotta really close it
 again:
	if ( fd == 0 ) log("disk: closing3 fd of 0");
	int status = ::close ( fd );
	if ( status == -1 && errno == EINTR ) goto again;

	// -1 means can be reopened because File::close() wasn't called.
	// we're just conserving file descriptors
	//s_fds [ mini ] = -1;

	// if the real close was successful then decrement the # of open files
	if ( status == 0 ) {
		// it's not open
		s_open     [ fd ] = 0;
		// if someone is trying to read on this let them know
		s_closeCounts [ fd ]++;

		s_numOpenFiles--;

		File *f = s_filePtrs [ fd ];
		// don't let him use the stolen fd
		f->m_fd = -1 ;

		// debug msg
		if ( g_conf.m_logDebugDisk ) {
			File *f = s_filePtrs [ fd ];
			char *fname = "";
			if ( f ) fname = f->getFilename();
			logf(LOG_DEBUG,"disk: force closed fd %i for"
			     " %s. age=%"INT64" #openfiles=%i this=0x%"PTRFMT,
			     fd,fname,now-s_timestamps[mini],
			     (int)s_numOpenFiles,
			     (PTRTYPE)this);
		}

		// no longer the owner
		s_filePtrs [ fd ] = NULL;

		// excise from linked list of active files
		//rmFileFromLinkedList ( f );
		// getfd() may not execute in time to ince the closeCount
		// so do it here. test by setting the max open files to like
		// 10 or so and spidering heavily.
		//s_closeCounts [ fd ]++;
	}


	if ( status == -1 ) 
		return log("disk: close(%i) : %s", fd , strerror(errno));

	if ( g_conf.m_logDebugDisk ) sanityCheck();

	return true;
}	
// . return false if blocked, true otherwise
// . sets g_errno on error
bool Msg1::sendData ( uint32_t shardNum, char *listData , int32_t listSize) {
	// debug msg
	//log("sendData: mcast=%" PRIu32" listSize=%" PRId32,
	//    (int32_t)&m_mcast,(int32_t)listSize);

	// bail if this is an interface machine, don't write to the main
	if ( g_conf.m_interfaceMachine ) return true;
	// return true if no data
	if ( listSize == 0 ) return true;
	// how many hosts in this group
	//int32_t numHosts = g_hostdb.getNumHostsPerShard();
	// . NOTE: for now i'm removing this until I handle ETRYAGAIN errors
	//         properly... by waiting and retrying...
	// . if this is local data just for us just do an addList to OUR rdb
	/*
	if ( groupId == g_hostdb.m_groupId  && numHosts == 1 ) {
		// this sets g_errno on error
		Msg0 msg0;
		Rdb *rdb = msg0.getRdb ( (char) m_rdbId );
		if ( ! rdb ) return true;
		// make a list from this data
		RdbList list;
		list.set (listData,listSize,listSize,rdb->getFixedDataSize(),
			  false) ; // ownData?
		// this returns false and sets g_errno on error
		rdb->addList ( &list );
		// . if we got a ETRYAGAIN cuz the buffer we add to was full
		//   then we should sleep and try again!
		// . return false cuz this blocks for a period of time
		//   before trying again
		if ( g_errno == ETRYAGAIN ) {
			// try adding again in 1 second
			registerSleepCallback ( 1000, slot, tryAgainWrapper1 );
			// return now
			return false;
		}
		// . always return true cuz we did not block
		// . g_errno may be set
		return true;
	}
	*/
	// if the data is being added to our group, don't send ourselves
	// a msg1, if we can add it right now
	// MDW: crap this is getting ETRYAGAIN and it isn't being tried again
	// i guess and Spider.cpp fails to add to doledb but the doleiptable
	// maintains a positive count, thereby hanging the spiders. let's
	// just always go through multicast so it will auto-retry ETRYAGAIN
	/*
	bool sendToSelf = true;
	if ( shardNum == getMyShardNum() &&
	     ! g_conf.m_interfaceMachine ) {
		// get the rdb to which it belongs, use Msg0::getRdb()
		Rdb *rdb = getRdbFromId ( (char) m_rdbId );
		if ( ! rdb ) goto skip;
		// key size
		int32_t ks = getKeySizeFromRdbId ( m_rdbId );
		// reset g_errno
		g_errno = 0;
		// . make a list from this data
		// . skip over the first 4 bytes which is the rdbId
		// . TODO: embed the rdbId in the msgtype or something...
		RdbList list;
		// set the list
		list.set ( listData ,
			   listSize ,
			   listData ,
			   listSize ,
			   rdb->getFixedDataSize() ,
			   false                   ,  // ownData?
			   rdb->useHalfKeys()      ,
			   ks                      ); 
		// note that
		//log("msg1: local addlist niceness=%" PRId32,m_niceness);
		// this returns false and sets g_errno on error
		rdb->addList ( m_coll , &list , m_niceness );
		// if titledb, add tfndb recs to map the title recs
		//if ( ! g_errno && rdb == g_titledb.getRdb() && m_injecting ) 
		//	// this returns false and sets g_errno on error
		//	updateTfndb ( m_coll , &list , true , m_niceness);
		// if no error, no need to use a Msg1 UdpSlot for ourselves
		if ( ! g_errno ) sendToSelf = false;
		else {
			log("rdb: msg1 coll=%s rdb=%s had error: %s",
			    m_coll,rdb->m_dbname,mstrerror(g_errno));
			// this is messing up generate catdb's huge rdblist add
			// why did we put it in there??? from msg9b.cpp
			//return true;
		}
		
 		QUICKPOLL(m_niceness);
		// if we're the only one in the group, bail, we're done
		if ( ! sendToSelf &&
		     g_hostdb.getNumHostsPerShard() == 1 ) return true;
	}
skip:
	*/
	// . make an add record request to multicast to a bunch of machines
	// . this will alloc new space, returns NULL on failure
	//char *request = makeRequest ( listData, listSize, groupId , 
	//m_rdbId , &requestLen );
	//int32_t collLen = strlen ( m_coll );
	// . returns NULL and sets g_errno on error
	// . calculate total size of the record
	// . 1 byte for rdbId, 1 byte for flags,
	//   then collection NULL terminated, then list
	int32_t requestLen = 1 + 1 + sizeof(collnum_t) + listSize ;
	// make the request
	char *request = (char *) mmalloc ( requestLen ,"Msg1" );
	if ( ! request ) return true;
	char *p = request;
	// store the rdbId at top of request
	*p++ = m_rdbId;
	// then the flags
	*p = 0;
	if ( m_injecting ) *p |= 0x80;
	p++;
	// then collection name
	//gbmemcpy ( p , m_coll , collLen );
	//p += collLen;
	//*p++ = '\0';
	*(collnum_t *)p = m_collnum;
	p += sizeof(collnum_t);
	// sanity check
	//if ( collLen <= 0 ) {
	//	log(LOG_LOGIC,"net: No collection specified for list add.");
	//	//g_process.shutdownAbort(true);
	//	g_errno = ENOCOLLREC;
	//	return true;
	//}
	//if ( m_deleteRecs    ) request[1] |= 0x80;
	//if ( m_overwriteRecs ) request[1] |= 0x40;
	// store the list after coll
	gbmemcpy ( p , listData , listSize );
 	QUICKPOLL(m_niceness);
	// for small packets
	//int32_t niceness = 2;
	//if ( requestLen < TMPBUFSIZE - 32 ) niceness = 0;
	//log("msg1: sending mcast niceness=%" PRId32,m_niceness);
	// . multicast to all hosts in group "groupId"
	// . multicast::send() returns false and sets g_errno on error
	// . we return false if we block, true otherwise
	// . will loop indefinitely if a host in this group is down
	key_t k; k.setMin();
	if ( m_mcast.send ( request    , // sets mcast->m_msg    to this
			    requestLen , // sets mcast->m_msgLen to this
			    msg_type_1       ,
			    true       , // does multicast own msg?
			    shardNum   , // group to send to (groupKey)
			    true       , // send to whole group?
			    0          , // key is useless for us
			    this       , // state data
			    NULL       , // state data
			    gotReplyWrapper1 ,
			    multicast_msg1_senddata_timeout         , // timeout
			    m_niceness , // niceness 
			    -1    , // first host to try
			    NULL  , // replyBuf        = NULL ,
			    0     , // replyBufMaxSize = 0 ,
			    true  , // freeReplyBuf    = true ,
			    false , // doDiskLoadBalancing = false ,
			    -1    , // no max cache age limit
			    //(key_t)0 , // cache key
			    k    , // cache key
			    RDB_NONE , // bogus rdbId
			    -1    , // unknown minRecSizes read size
			    true )) // sendToSelf ))
		return false;

 	QUICKPOLL(m_niceness);
	// g_errno should be set
	log("net: Had error when sending request to add data to %s in shard "
	    "#%" PRIu32": %s.", getDbnameFromId(m_rdbId),shardNum,mstrerror(g_errno));
	return true;	
}
// . reply to a request for an RdbList
// . MUST call g_udpServer::sendReply or sendErrorReply() so slot can
//   be destroyed
void handleRequest0 ( UdpSlot *slot , long netnice ) {
	// if niceness is 0, use the higher priority udpServer
	UdpServer *us = &g_udpServer;
	//if ( netnice == 0 ) us = &g_udpServer2;
	// get the request
	char *request     = slot->m_readBuf;
	long  requestSize = slot->m_readBufSize;
	// collection is now stored in the request, so i commented this out
	//if ( requestSize != MSG0_REQ_SIZE ) {
	//	log("net: Received bad data request size of %li bytes. "
	//	    "Should be %li.", requestSize ,(long)MSG0_REQ_SIZE);
	//	us->sendErrorReply ( slot , EBADREQUESTSIZE );
	//	return;
	//}
	// parse the request
	char *p                  = request;
	long long syncPoint          = *(long long *)p ; p += 8;
	//key_t     startKey           = *(key_t     *)p ; p += sizeof(key_t);
	//key_t     endKey             = *(key_t     *)p ; p += sizeof(key_t);
	long      minRecSizes        = *(long      *)p ; p += 4;
	long      startFileNum       = *(long      *)p ; p += 4;
	long      numFiles           = *(long      *)p ; p += 4;
	long      maxCacheAge        = *(long      *)p ; p += 4;
	char      rdbId              = *p++;
	char      addToCache         = *p++;
	char      doErrorCorrection  = *p++;
	char      includeTree        = *p++;
	// this was messing up our niceness conversion logic
	long      niceness           = slot->m_niceness;//(long)(*p++);
	// still need to skip it though!
	p++;
	bool      allowPageCache     = (bool)(*p++);
	char ks = getKeySizeFromRdbId ( rdbId );
	char     *startKey           = p; p+=ks;
	char     *endKey             = p; p+=ks;
	// then null terminated collection
	char     *coll               = p;


	// error set from XmlDoc::cacheTermLists()?
	if ( g_errno ) {
		us->sendErrorReply ( slot , EBADRDBID ); return;}

	// is this being called from callWaitingHandlers()
	//bool isRecall = (netnice == 99);

	// . get the rdb we need to get the RdbList from
	// . returns NULL and sets g_errno on error
	//Msg0 msg0;
	//Rdb *rdb = msg0.getRdb ( rdbId );
	Rdb *rdb = getRdbFromId ( rdbId );
	if ( ! rdb ) { 
		us->sendErrorReply ( slot , EBADRDBID ); return;}

	// keep track of stats
	rdb->readRequestGet ( requestSize );

	/*
	// keep track of stats
	if ( ! isRecall ) rdb->readRequestGet ( requestSize );

	long long singleDocId2 = 0LL;
	if ( rdbId == RDB_POSDB && maxCacheAge ) {
		long long d1 = g_posdb.getDocId(startKey);
		long long d2 = g_posdb.getDocId(endKey);
		if ( d1+1 == d2 ) singleDocId2 = d1;
	}

	// have we parsed this docid and cached its termlists?
	bool shouldBeCached2 = false;
	if ( singleDocId2 && 
	     isDocIdInTermListCache ( singleDocId2 , coll ) ) 
		shouldBeCached2 = true;

	// if in the termlist cache, send it back right away
	char *trec;
	long trecSize;
	if ( singleDocId2 &&
	     getRecFromTermListCache(coll,
				     startKey,
				     endKey,
				     maxCacheAge,
				     &trec,
				     &trecSize) ) {
		// if in cache send it back!
		us->sendReply_ass(trec,trecSize,trec,trecSize,slot);
		return;
	}

	// if should be cached but was not found then it's probably a
	// synonym form not in the doc content. make an empty list then.
	if ( shouldBeCached2 ) {
		// send back an empty termlist
		us->sendReply_ass(NULL,0,NULL,0,slot);
		return;
	}

	// MUST be in termlist cache! if not in there it is a probably
	// a synonym term termlist of a word in the doc.
	if ( isRecall ) {
		// send back an empty termlist
		us->sendReply_ass(NULL,0,NULL,0,slot);
		return;
	}
	
	// init waiting table?
	static bool s_waitInit = false;
	if ( ! s_waitInit ) {
		// do not repeat
		s_waitInit = true;
		// niceness = 0
		if ( ! g_waitingTable.set(8,4,2048,NULL,0,true,0,"m5wtbl")){
			log("msg5: failed to init waiting table");
			// error kills us!
			us->sendErrorReply ( slot , EBADRDBID ); 
			return;
		}
	}

	// wait in waiting table?
	if ( singleDocId2 && g_waitingTable.isInTable ( &singleDocId2 ) ) {
		g_waitingTable.addKey ( &singleDocId2 , &slot );
		return;
	}

	// if it's for a special gbdocid: query then cache ALL termlists
	// for this docid into g_termListCache right now
	if ( singleDocId2 ) {
		// have all further incoming requests for this docid
		// wait in the waiting table
		g_waitingTable.addKey ( &singleDocId2 , &slot );
		// load the title rec and store its posdb termlists in cache
		XmlDoc *xd;
		try { xd = new ( XmlDoc ); }
		catch ( ... ) {
			g_errno = ENOMEM;
			us->sendErrorReply ( slot , g_errno );
			return;
		}
		mnew ( xd, sizeof(XmlDoc),"msg0xd");
		// always use niceness 1 now even though we use niceness 0
		// to make the cache hits fast
		//niceness = 1;
		// . load the old title rec first and just recycle all
		// . typically there might be a few hundred related docids
		//   each with 50,000 matching queries on average to evaluate
		//   with the gbdocid:xxxx| restriction?
		if ( ! xd->set3 ( singleDocId2 , coll , niceness ) ) {
			us->sendErrorReply ( slot , g_errno ); return;}
		// init the new xmldoc
		xd->m_callback1 = callWaitingHandlers;
		xd->m_state     = xd;
		// . if this blocks then return
		// . should call loadOldTitleRec() and get JUST the posdb recs
		//   by setting m_useTitledb, etc. to false. then it should
		//   make posdb termlists with the compression using
		//   RdbList::addRecord() and add those lists to 
		//   g_termListCache
		if ( ! xd->cacheTermLists ( ) ) return;
		// otherwise, it completed right away!
		callWaitingHandlers ( xd );
		return;
	}
	*/

	/*
	// init special sectiondb cache?
	if ( rdbId == RDB_SECTIONDB && ! s_initCache ) {
		// try to init cache
		if ( ! s_sectiondbCache.init ( 20000000 , // 20MB max mem
					       -1       , // fixed data size
					       false    , // support lists?
					       20000    , // 20k max recs
					       false    , // use half keys?
					       "secdbche", // dbname
					       false, // load from disk?
					       sizeof(key128_t), //cachekeysize
					       0 , // data key size
					       20000 )) // numPtrs max
			log("msg0: failed to init sectiondb cache: %s",
			    mstrerror(g_errno));
		else
			s_initCache = true;
	}

	// check the sectiondb cache
	if ( rdbId == RDB_SECTIONDB ) {
		//long long sh48 = g_datedb.getTermId((key128_t *)startKey);
		// use the start key now!!!
		char *data;
		long  dataSize;
		if (s_sectiondbCache.getRecord ( coll,
						 startKey,//&sh48,
						 &data,
						 &dataSize,
						 true, // docopy?
						 600, // maxage (10 mins)
						 true, // inc counts?
						 NULL, // cachedtime
						 true // promoteRec?
						 )){
			// debug
			//log("msg0: got sectiondblist in cache datasize=%li",
			//    dataSize);
			// send that back
			g_udpServer.sendReply_ass ( data            ,
						    dataSize        ,
						    data            ,
						    dataSize        ,
						    slot            ,
						    60              ,
						    NULL            ,
						    doneSending_ass ,
						    -1              ,
						    -1              ,
						    true            );
			return;
		}
	}
	*/

	// . do a local get
	// . create a msg5 to get the list
	State00 *st0 ;
	try { st0 = new (State00); }
	catch ( ... ) { 
		g_errno = ENOMEM;
		log("Msg0: new(%i): %s", sizeof(State00),mstrerror(g_errno));
		us->sendErrorReply ( slot , g_errno ); 
		return; 
	}
	mnew ( st0 , sizeof(State00) , "State00" );
	// timing debug
	if ( g_conf.m_logTimingNet )
		st0->m_startTime = gettimeofdayInMilliseconds();
	// save slot in state
	st0->m_slot = slot;
	// save udp server to send back reply on
	st0->m_us = us;
	// init this one
	st0->m_niceness = niceness;
	st0->m_rdbId    = rdbId;

	QUICKPOLL(niceness);

	// debug msg
	if ( maxCacheAge != 0 && ! addToCache )
		log(LOG_LOGIC,"net: msg0: check but don't add... rdbid=%li.",
		    (long)rdbId);
	// . if this request came over on the high priority udp server
	//   make sure the priority gets passed along
	// . return if this blocks
	// . we'll call sendReply later
	if ( ! st0->m_msg5.getList ( rdbId             ,
				     coll              ,
				     &st0->m_list      ,
				     startKey          ,
				     endKey            ,
				     minRecSizes       ,
				     includeTree       , // include tree?
				     addToCache        , // addToCache?
				     maxCacheAge       ,
				     startFileNum      , 
				     numFiles          ,
				     st0               ,
				     gotListWrapper    ,
				     niceness          ,
				     doErrorCorrection ,
				     NULL , // cacheKeyPtr
				     0    , // retryNum
				     2    , // maxRetries
				     true , // compensateForMerge
				     syncPoint ,
				     &st0->m_msg5b ,
				     false,
				     allowPageCache ) )
		return;
	// call wrapper ouselves
	gotListWrapper ( st0 , NULL , NULL );
}
// g_errno may be set when this is called
void addedList ( UdpSlot *slot , Rdb *rdb ) {
	// no memory means to try again
	if ( g_errno == ENOMEM ) g_errno = ETRYAGAIN;
	// doing a full rebuid will add collections
	if ( g_errno == ENOCOLLREC &&
	     g_repairMode > 0 )
	     //g_repair.m_fullRebuild )
		g_errno = ETRYAGAIN;

	// it seems like someone can delete a collection and there can
	// be adds in transit to doledb and it logs
	// "doledb bad collnum of 30110"
	// so just absorb those
	if ( g_errno == ENOCOLLREC ) {
		log("msg1: missing collrec to add to to %s. just dropping.",
		    rdb->m_dbname);
		g_errno = 0;
	}

	// . if we got a ETRYAGAIN cuz the buffer we add to was full
	//   then we should sleep and try again!
	// . return false cuz this blocks for a period of time
	//   before trying again
	// . but now to free the udp slot when we are doing an urgent merge
	//   let's send an error back!
	//if ( g_errno == ETRYAGAIN ) {
		// debug msg
		//log("REGISTERING SLEEP CALLBACK");
		// try adding again in 1 second
	//	g_loop.registerSleepCallback ( 1000, slot, tryAgainWrapper );
		// return now
	//	return;
	//}
	// random test
	//if ( (rand() % 10) == 1 ) g_errno = ETRYAGAIN;
	//int32_t niceness = slot->getNiceness() ;
	// select udp server based on niceness
	UdpServer *us = &g_udpServer ;
	//if ( niceness == 0 ) us = &g_udpServer2;
	//else                 us = &g_udpServer ;
	// chalk it up
	rdb->sentReplyAdd ( 0 );
	// are we done
	if ( ! g_errno ) {
		// . send an empty (non-error) reply as verification
		// . slot should be auto-nuked on transmission/timeout of reply
		// . udpServer should free the readBuf
		us->sendReply_ass ( NULL , 0 , NULL , 0 , slot ) ;
		return;
	}
	// on other errors just send the err code back
	log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. error=%s", __FILE__, __func__, __LINE__, mstrerror(g_errno));
	us->sendErrorReply ( slot , g_errno );
}
// . return false if blocked, true otherwise
// . set g_errno on error
// . read list of keys in [startKey,endKey] range
// . read at least "minRecSizes" bytes of keys in that range
// . the "m_endKey" of resulting, merged list may have a smaller endKey
//   than the argument, "endKey" due to limitation by "minRecSizes"
// . resulting list will contain ALL keys between ITS [m_startKey,m_endKey]
// . final merged list "should" try to have a size of at least "minRecSizes"
//   but due to negative/postive rec elimination may be less
// . the endKey of the lists we read may be <= "endKey" provided
// . we try to shrink the endKey if minRecSizes is >= 0 in order to
//   avoid excessive reading
// . by shrinking the endKey we cannot take into account the size of deleted
//   records, so therefore we may fall short of "minRecSizes" in actuality,
//   in fact, the returned list may even be empty with a shrunken endKey
// . we merge all lists read from disk into the provided "list"
// . caller should call Msg3.getList(long i) and Msg3:getNumLists() to retrieve
// . this makes the query engine faster since we don't need to merge the docIds
//   and can just send them across the network separately and they will be
//   hashed into IndexTable's table w/o having to do time-wasting merging.
// . caller can specify array of filenums to read from so incremental syncing
//   in Sync class can just read from titledb*.dat files that were formed
//   since the last sync point.
bool Msg3::readList  ( char           rdbId         ,
		       char          *coll          ,
		       //key_t          startKey      , 
		       //key_t          endKey        , 
		       char          *startKeyArg      , 
		       char          *endKeyArg        , 
		       long           minRecSizes   , // max size of scan
		       long           startFileNum  , // first file to scan
		       long           numFiles      , // rel. to startFileNum
		       void          *state         , // for callback
		       void        (* callback ) ( void *state ) ,
		       long           niceness      ,
		       long           retryNum      ,
		       long           maxRetries    ,
		       bool           compensateForMerge ,
		       long long      syncPoint     ,
		       bool           justGetEndKey ,
		       bool           allowPageCache ,
		       bool           hitDisk        ) {
	// clear, this MUST be done so if we return true g_errno is correct
	g_errno = 0;
	// assume lists are not checked for corruption
	m_listsChecked = false;
	// warn
	if ( minRecSizes < -1 ) {
		log(LOG_LOGIC,"db: Msg3 got minRecSizes of %li, changing "
		    "to -1.",minRecSizes);
		minRecSizes = -1;
	}
	// reset m_alloc and data in all lists in case we are a re-call
	reset();
	// warning
	if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg3.");
	// remember the callback
	m_rdbId              = rdbId;
	m_coll               = coll;
	m_callback           = callback;
	m_state              = state;
	m_niceness           = niceness;
	m_numScansCompleted  = 0;
	m_retryNum           = retryNum;
	m_maxRetries         = maxRetries;
	m_compensateForMerge = compensateForMerge;
	m_allowPageCache     = allowPageCache;
	m_hitDisk            = hitDisk;
	m_hadCorruption      = false;
	// get keySize of rdb
	m_ks = getKeySizeFromRdbId ( m_rdbId );
	// reset the group error
	m_errno    = 0;
	// . reset all our lists 
	// . these are reset in call the RdbScan::setRead() below
	//for ( long i = 0 ; i < MAX_RDB_FILES ; i++ ) m_lists[i].reset();
	// . ensure startKey last bit clear, endKey last bit set
	// . no! this warning is now only in Msg5
	// . if RdbMerge is merging some files, not involving the root 
	//   file, then we can expect to get a lot of unmatched negative recs.
	// . as a consequence, our endKeys may often be negative. This means
	//   it may not annihilate with the positive key, but we should only
	//   miss like this at the boundaries of the lists we fetch.
	// . so in that case RdbList::merge will stop merging once the
	//   minRecSizes limit is reached even if it means ending on a negative
	//   rec key
	//if ( (startKey.n0 & 0x01) == 0x01 ) 
	if ( !KEYNEG(startKeyArg) )
		log(LOG_REMIND,"net: msg3: StartKey lastbit set."); 
	if (  KEYNEG(endKeyArg) )
		log(LOG_REMIND,"net: msg3: EndKey lastbit clear."); 

	// declare vars here becaues of 'goto skip' below
	long mergeFileNum = -1 ;
	long max ;

	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) return true;

	// if caller specified exactly
	/*
	m_syncPoint = syncPoint;
	if ( syncPoint != -1 && syncPoint != 0 ) {
		// . store them all
		// . what if we merged one of these files (or are merging)???
		// . then sync class should not discard syncpoints until no
		//   longer syncing and we'll know about it
		// . this should compensate for merges by including any files
		//   that are merging a file in m_fileNums
		m_numFileNums = g_sync.getFileNums ( m_rdbId       ,
						     m_coll        ,
						     m_syncPoint   ,
						     m_fileNums    , 
						     MAX_RDB_FILES );
		log("NOOOOOO. we do not alloc if we go to skip!!");
		char *xx = NULL; *xx = 0;
		// bring back the comment below... i removed it because i added
		// "long chunk" et al below and didn't want to move them.
		//if ( m_numFileNums > 0 ) goto skip;
		log("net: Trying to read data in %s from files generated after"
		    " a sync point %llu in \"sync\" file, but none found.",
		    base->m_dbname,m_syncPoint);
		return true;
	}
	// should we read all?
	if ( m_syncPoint == 0 ) {
		numFiles     = -1;
		startFileNum =  0;
	}
	*/

	// store the file numbers in the array, these are the files we read
	m_numFileNums = 0;

	// save startFileNum here, just for recall
	m_startFileNum = startFileNum;
	m_numFiles     = numFiles;

	// . if we have a merge going on, we may have to change startFileNum
	// . if some files get unlinked because merge completes then our 
	//   reads will detect the error and loop back here
	// . we launch are reads right after this without giving up the cpu
	//   and we use file descriptors, so any changes to Rdb::m_files[]
	//   should not hurt us
	// . WARNING: just make sure you don't lose control of cpu until after
	//   you call RdbScan::set()
	// . we use hasMergeFile() instead of isMerging() because he may not 
	//   be merging cuz he got suspended or he restarted and
	//   hasn't called attemptMerge() yet, but he may still contain it
	if ( g_conf.m_logDebugQuery )
		log(LOG_DEBUG,
		    "net: msg3: "
		    "c=%li hmf=%li sfn=%li msfn=%li nf=%li db=%s.",
		     (long)compensateForMerge,(long)base->hasMergeFile(),
		     (long)startFileNum,(long)base->m_mergeStartFileNum-1,
		     (long)numFiles,base->m_dbname);
	long pre = -10;
	if ( compensateForMerge && base->hasMergeFile() && 
	     startFileNum >= base->m_mergeStartFileNum - 1 &&
	     (startFileNum > 0 || numFiles != -1) ) {
		// now also include the file being merged into, but only
		// if we are reading from a file being merged...
		if ( startFileNum < base->m_mergeStartFileNum +
		     base->m_numFilesToMerge - 1 )
			//m_fileNums [ m_numFileNums++ ] =
			//	base->m_mergeStartFileNum - 1;
			pre = base->m_mergeStartFileNum - 1;
		// debug msg
		if ( g_conf.m_logDebugQuery )
			log(LOG_DEBUG,
			   "net: msg3: startFileNum from %li to %li (mfn=%li)",
			    startFileNum,startFileNum+1,mergeFileNum);
		// if merge file was inserted before us, inc our file number
		startFileNum++;
	}
	// adjust num files if we need to, as well
	if ( compensateForMerge && base->hasMergeFile() && 
	     startFileNum < base->m_mergeStartFileNum - 1 &&
	     numFiles != -1 &&
	     startFileNum + numFiles - 1 >= base->m_mergeStartFileNum - 1 ) {
		// debug msg
		if ( g_conf.m_logDebugQuery )
			log(LOG_DEBUG,"net: msg3: numFiles up one.");
		// if merge file was inserted before us, inc our file number
		numFiles++;
	}

	// . how many rdb files does this base have?
	// . IMPORTANT: this can change since files are unstable because they
	//   might have all got merged into one!
	// . so do this check to make sure we're safe... especially if
	//   there was an error before and we called readList() on ourselves
	max = base->getNumFiles();
	// -1 means we should scan ALL the files in the base
	if ( numFiles == -1 ) numFiles = max;
	// limit it by startFileNum, however
	if ( numFiles > max - startFileNum ) numFiles = max - startFileNum;
	// set g_errno and return true if it is < 0
	if ( numFiles < 0 ) { 
		log(LOG_LOGIC,
		   "net: msg3: readList: numFiles = %li < 0 (max=%li)(sf=%li)",
		    numFiles , max , startFileNum );
		g_errno = EBADENGINEER; 
		// force core dump
		//char *p = NULL;
		//*p = 0;
		return true; 
	}

	// . allocate buffer space
	// . m_scans, m_startpg, m_endpg, m_hintKeys, m_hintOffsets,
	//   m_fileNums, m_lists, m_tfns
	long chunk = sizeof(RdbScan) + // m_scans
		4 +                    // m_startpg
		4 +                    // m_endpg
		//sizeof(key_t) +        // m_hintKeys
		m_ks +                 // m_hintKeys
		4 +                    // m_hintOffsets
		4 +                    // m_fileNums
		sizeof(RdbList) +      // m_lists
		4 ;                    // m_tfns
	long nn   = numFiles;
	if ( pre != -10 ) nn++;
	m_numChunks = nn;
	long need = nn * (chunk);
	m_alloc = m_buf;
	if ( need > (long)MSG3_BUF_SIZE ) {
		m_allocSize = need;
		m_alloc = (char *)mcalloc ( need , "Msg3" );
		if ( ! m_alloc ) {
			log("disk: Could not allocate %li bytes read "
			    "structures to read %s.",need,base->m_dbname);
			return true;
		}
	}
	char *p = m_alloc;
	m_scans       = (RdbScan *)p; p += nn * sizeof(RdbScan);
	m_startpg     = (long    *)p; p += nn * 4;
	m_endpg       = (long    *)p; p += nn * 4;
	//m_hintKeys    = (key_t   *)p; p += nn * sizeof(key_t);
	m_hintKeys    = (char    *)p; p += nn * m_ks;
	m_hintOffsets = (long    *)p; p += nn * 4;
	m_fileNums    = (long    *)p; p += nn * 4;
	m_lists       = (RdbList *)p; p += nn * sizeof(RdbList);
	m_tfns        = (long    *)p; p += nn * 4;
	// sanity check
	if ( p - m_alloc != need ) {
		log(LOG_LOGIC,"disk: Bad malloc in Msg3.cpp.");
		char *xx = NULL; *xx = 0;
	}
	// call constructors
	for ( long i = 0 ; i < nn ; i++ ) m_lists[i].constructor();
	// make fix from up top
	if ( pre != -10 ) m_fileNums [ m_numFileNums++ ] = pre;

	// store them all
	for ( long i = startFileNum ; i < startFileNum + numFiles ; i++ )
		m_fileNums [ m_numFileNums++ ] = i;

	// we skip down to here when a syncPoint was used to set the
	// m_fileNums/m_numFileNums array of files to read from
// JAB: warning abatement
// skip:
	// . remove file nums that are being unlinked after a merge now
	// . keep it here (below skip: label) so sync point reads can use it
	long n = 0;
	for ( long i = 0 ; i < m_numFileNums ; i++ ) {
		// skip those that are being unlinked after the merge
		if ( base->m_isUnlinking && 
		     m_fileNums[i] >= base->m_mergeStartFileNum &&
		     m_fileNums[i] <  base->m_mergeStartFileNum + 
		                      base->m_numFilesToMerge      )
			continue;
		// otherwise, keep it
		m_fileNums[n++] = m_fileNums[i];
	}
	m_numFileNums = n;

	// . if root file is being merged, he's file #0, & root file is file #1
	// . this is a hack so caller gets what he wants
	//if ( startFileNum == 0 && base->getFileId(0) == 0 && numFiles == 1 )
	//	numFiles = 2;

	// remember the file range we should scan
	m_numScansStarted    = 0;
	m_numScansCompleted  = 0;
	//m_startKey           = startKey;
	//m_endKey             = endKey;
	//m_constrainKey       = endKey; // set in case justGetEndKey is true
	KEYSET(m_startKey,startKeyArg,m_ks);
	KEYSET(m_endKey,endKeyArg,m_ks);
	KEYSET(m_constrainKey,endKeyArg,m_ks);//set incase justGetEndKey istrue
	m_minRecSizes        = minRecSizes;
	m_compensateForMerge = compensateForMerge;
	// bail if 0 files to scan -- no! need to set startKey/endKey
	if ( numFiles == 0 ) return true;
	// don't read anything if endKey < startKey
	//if ( m_startKey > m_endKey ) return true;
	if ( KEYCMP(m_startKey,m_endKey,m_ks)>0 ) return true;
	// keep the original in tact in case g_errno == ETRYAGAIN
	//m_endKeyOrig        = endKey;
	KEYSET(m_endKeyOrig,endKeyArg,m_ks);
	m_minRecSizesOrig   = minRecSizes;
	// start reading at this key
	m_fileStartKey = startKeyArg;
	// start the timer, keep it fast for clusterdb though
	if ( g_conf.m_logTimingDb ) m_startTime = gettimeofdayInMilliseconds();
	// translate base to an id, for the sake of m_msg0
	//char baseId = m_msg0->getRdbId ( base );
	// map ptrs
	RdbMap **maps = base->getMaps();
	// . we now boost m_minRecSizes to account for negative recs 
	// . but not if only reading one list, cuz it won't get merged and
	//   it will be too big to send back
	if ( m_numFileNums > 1 ) compensateForNegativeRecs ( base );
	// . often endKey is too big for an efficient read of minRecSizes bytes
	//   because we end up reading too much from all the files
	// . this will set m_startpg[i], m_endpg[i] for each RdbScan/RdbFile
	//   to ensure we read "minRecSizes" worth of records, not much more
	// . returns the new endKey for all ranges
	// . now this just overwrites m_endKey
	//m_endKey = setPageRanges ( base           ,
	setPageRanges ( base           ,
			m_fileNums     ,
			m_numFileNums  ,
			m_fileStartKey , // start reading @ key
			m_endKey       , // stop reading @ key
			m_minRecSizes  );

	// . NEVER let m_endKey be a negative key, because it will 
	//   always be unmatched, since delbit is cleared
	// . adjusting it here ensures our generated hints are valid
	// . we will use this key to call constrain() with
	//m_constrainKey = m_endKey;
	//if ( ( m_constrainKey.n0 & 0x01) == 0x00 ) 
	//	m_constrainKey -= (unsigned long)1;
	KEYSET(m_constrainKey,m_endKey,m_ks);
	if ( KEYNEG(m_constrainKey) )
		KEYSUB(m_constrainKey,1,m_ks);

	// if m_endKey splits some keys that should be together, we need to
	// decrease it so such a split doesn't happen. 
	//if ( m_endKey != m_endKeyOrig && m_rdbId==RDB_TFNDB && numFiles > 0){
	/*
	if ( KEYCMP(m_endKey,m_endKeyOrig,m_ks)!=0 && m_rdbId==RDB_TFNDB && 
	     numFiles > 0 ) {
		// . drop the docid down one and max out the tfn...
		// . we may lose some recs when we call constrain, but at least
		//   we are guaranteed not to split a sequence with the same
		//   docid but different tfns... thus the disk merge will
		//   then work correctly. before we were splitting these
		//   sequence between successive disk reads and they were not
		//   getting annihilated together in the call to indexMerge_r()
		long long d = g_tfndb.getDocId ( (key_t *)&m_endKey );
		if ( d > 0 ) d = d - 1LL;
		//m_constrainKey = g_tfndb.makeMaxKey(d);
		*(key_t *)m_constrainKey = g_tfndb.makeMaxKey(d);
		// set the half bit on
		//m_constrainKey.n0 |= 0x02;
		*m_constrainKey |= 0x02;
		// note it
		//logf(LOG_DEBUG,"oldukey.n1=%lx n0=%llx new.n1=%lx n0=%llx",
		//     m_endKey.n1,m_endKey.n0,
		//     m_constrainKey.n1,m_constrainKey.n0);
	}
	*/

	// Msg5 likes to get the endkey for getting the list from the tree
	if ( justGetEndKey ) return true;

	// sanity check
	if ( m_numFileNums > nn ) {
		log(LOG_LOGIC,"disk: Failed sanity check in Msg3.");
		char *xx = NULL; *xx = 0;
	}

	// debug msg
	//log("msg3 getting list (msg5=%lu)",m_state);
	// . MDW removed this -- go ahead an end on a delete key
	// . RdbMerge might not pick it up this round, but oh well
	// . so we can have both positive and negative co-existing in same file
	// make sure the last bit is set so we don't end on a delete key
	//m_endKey.n0 |= 0x01LL;
	// . now start reading/scanning the files
	// . our m_scans array starts at 0
	for ( long i = 0 ; i < m_numFileNums ; i++ ) {
		// get the page range
		//long p1 = m_startpg [ i ];
		//long p2 = m_endpg   [ i ];
		//#ifdef _SANITYCHECK_
		long fn = m_fileNums[i];
		// this can happen somehow!
		if ( fn < 0 ) {
			log(LOG_LOGIC,"net: msg3: fn=%li. Bad engineer.",fn);
			continue;
		}
		// sanity check
		if ( i > 0 && m_fileNums[i-1] >= fn ) {
			log(LOG_LOGIC,
			    "net: msg3: files must be read in order "
			    "from oldest to newest so RdbList::indexMerge_r "
			    "works properly. Otherwise, corruption will "
			    "result. ");
			char *xx = NULL; *xx = 0;
			return true;
		}
		// . sanity check?
		// . no, we must get again since we turn on endKey's last bit
		long p1 , p2;
		maps[fn]->getPageRange ( m_fileStartKey , 
					m_endKey       , 
					&p1            , 
					&p2            ,
					NULL           );
		//if ( p1 != p1c || p2 != p2c ) {
		//	fprintf(stderr,"Msg3::bad page range\n");
		//	sleep(50000);
		//}
		// sanity check, each endpg's key should be > endKey
		//if ( p2 < maps[fn]->getNumPages() && 
		//     maps[fn]->getKey ( p2 ) <= m_endKey ) {
		//	fprintf(stderr,"Msg3::bad page range 2\n");
		//	sleep(50000);
		//}
		//#endif
		//long p1 , p2; 
		//maps[fn]->getPageRange (startKey,endKey,minRecSizes,&p1,&p2);
		// now get some read info
		long long offset      = maps[fn]->getAbsoluteOffset ( p1 );
		long      bytesToRead = maps[fn]->getRecSizes ( p1, p2, false);
		// max out the endkey for this list
		// debug msg
		//#ifdef _DEBUG_		
		//if ( minRecSizes == 2000000 ) 
		//log("Msg3:: reading %li bytes from file #%li",bytesToRead,i);
		//#endif
		// inc our m_numScans
		m_numScansStarted++;
		// . keep stats on our disk accesses
		// . count disk seeks (assuming no fragmentation)
		// . count disk bytes read
		if ( bytesToRead > 0 ) {
			base->m_rdb->didSeek (             );
			base->m_rdb->didRead ( bytesToRead );
		}
		// . the startKey may be different for each RdbScan class
		// . RdbLists must have all keys within their [startKey,endKey]
		// . therefore set startKey individually from first page in map
		// . this endKey must be >= m_endKey 
		// . this startKey must be < m_startKey
		//key_t startKey = maps[fn]->getKey ( p1 );
		//key_t endKey   = maps[fn]->getKey ( p2 );
		char startKey2 [ MAX_KEY_BYTES ];
		char endKey2   [ MAX_KEY_BYTES ];
		maps[fn]->getKey ( p1 , startKey2 );
		maps[fn]->getKey ( p2 , endKey2 );
		//char *startKey = maps[fn]->getKeyPtr ( p1 );
		//char *endKey   = maps[fn]->getKeyPtr ( p2 );
		// store in here
		m_startpg [ i ] = p1;
		m_endpg   [ i ] = p2;

		// . we read UP TO that endKey, so reduce by 1
		// . but iff p2 is NOT the last page in the map/file
		// . maps[fn]->getKey(lastPage) will return the LAST KEY
		//   and maps[fn]->getOffset(lastPage) the length of the file
		//if ( maps[fn]->getNumPages()!=p2) endKey -=(unsigned long)1;
		if ( maps[fn]->getNumPages() != p2 ) KEYSUB(endKey2,1,m_ks);
		// otherwise, if we're reading all pages, then force the
		// endKey to virtual inifinite
		//else endKey.setMax();
		else KEYMAX(endKey2,m_ks);

		// . set up the hints
		// . these are only used if we are only reading from 1 file
		// . these are used to call constrain() so we can constrain
		//   the end of the list w/o looping through all the recs
		//   in the list
		long h2 = p2 ;
		// decrease by one page if we're on the last page
		if ( h2 > p1 && maps[fn]->getNumPages() == h2 ) h2--;
		// . decrease hint page until key is <= endKey on that page
		//   AND offset is NOT -1 because the old way would give
		//   us hints passed the endkey
		// . also decrease so we can constrain on minRecSizes in
		//   case we're the only list being read
		// . use >= m_minRecSizes instead of >, otherwise we may
		//   never be able to set "size" in RdbList::constrain()
		//   because "p" could equal "maxPtr" right away
		while ( h2 > p1 && 
			//( maps[fn]->getKey   (h2) > m_constrainKey ||
		      (KEYCMP(maps[fn]->getKeyPtr(h2),m_constrainKey,m_ks)>0||
			  maps[fn]->getOffset(h2) == -1            ||
			  maps[fn]->getAbsoluteOffset(h2) - offset >=
			  m_minRecSizes ) )
			h2--;
		// now set the hint
		m_hintOffsets [ i ] = maps[fn]->getAbsoluteOffset ( h2 ) -
			              maps[fn]->getAbsoluteOffset ( p1 ) ;
		//m_hintKeys    [ i ] = maps[fn]->getKey            ( h2 );
		KEYSET(&m_hintKeys[i*m_ks],maps[fn]->getKeyPtr(h2),m_ks);

		// reset g_errno before calling setRead()
		g_errno = 0;
		// . this fix is now in RdbList::checklist_r()
		// . we can now have dup keys, so, we may read in
		//   a rec with key "lastMinKey" even though we don't read
		//   in the first key on the end page, so don't subtract 1...
		//if ( endKey != m_endKeyOrig ) 
		//	endKey += (unsigned long) 1;

		// timing debug
		if ( g_conf.m_logTimingDb )
			log(LOG_TIMING,
			    "net: msg: reading %li bytes from %s file #%li "
			     "(niceness=%li)",
			     bytesToRead,base->m_dbname,i,m_niceness);

		// set the tfn
		if ( m_rdbId == RDB_TITLEDB )
			m_tfns[i] = base->getFileId2(m_fileNums[i]);

		// log huge reads, those hurt us
		if ( bytesToRead > 150000000 ) {
			logf(LOG_INFO,"disk: Reading %li bytes at offset %lli "
			    "from %s.",
			    bytesToRead,offset,base->m_dbname);
		}

		// if any keys in the map are the same report corruption
		char tmpKey    [16];
		char lastTmpKey[16];
		long ccount = 0;
		if ( bytesToRead     > 10000000      && 
		     bytesToRead / 2 > m_minRecSizes &&
		     base->m_fixedDataSize >= 0        ) {
			for ( long pn = p1 ; pn <= p2 ; pn++ ) {
				maps[fn]->getKey ( pn , tmpKey );
				if ( KEYCMP(tmpKey,lastTmpKey,m_ks) == 0 ) 
					ccount++;
				memcpy(lastTmpKey,tmpKey,m_ks);
			}
		}
		if ( ccount > 10 ) {
			logf(LOG_INFO,"disk: Reading %li bytes from %s file #"
			     "%li when min "
			     "required is %li. Map is corrupt and has %li "
			     "identical consecutive page keys because the "
			     "map was \"repaired\" because out of order keys "
			     "in the index.",
			     (long)bytesToRead,
			     base->m_dbname,fn,
			     (long)m_minRecSizes,
			     (long)ccount);
			m_numScansCompleted++;
			m_errno = ECORRUPTDATA;
			m_hadCorruption = true;
			//m_maxRetries = 0;
			break;
		}

		// . do the scan/read of file #i
		// . this returns false if blocked, true otherwise
		// . this will set g_errno on error
		bool done = m_scans[i].setRead (base->getFile(m_fileNums[i]),
						base->m_fixedDataSize ,
						 offset                 ,
						 bytesToRead            ,
						 startKey2              ,
						 endKey2                ,
						m_ks                    ,
						 &m_lists[i]            ,
						 this                   ,
						 doneScanningWrapper    ,
						 base->useHalfKeys()    ,
						m_rdbId,
						 m_niceness             ,
						 m_allowPageCache       ,
						 m_hitDisk              ) ;
		// . damn, usually the above will indirectly launch a thread
		//   to do the reading, but it sets g_errno to EINTR,
		//   "interrupted system call"!
		// . i guess the thread does the read w/o blocking and then
		//   queues the signal on g_loop's queue before it exits
		// . try ignoring, and keep going
		if ( g_errno == EINTR ) {
			log("net: Interrupted system call while reading file. "
			    "Ignoring.");
			g_errno = 0;
		}
		// debug msg
		//fprintf(stderr,"Msg3:: reading %li bytes from file #%li,"
		//	"done=%li,offset=%lli,g_errno=%s,"
		//	"startKey=n1=%lu,n0=%llu,  "
		//	"endKey=n1=%lu,n0=%llu\n",
		//	bytesToRead,i,(long)done,offset,mstrerror(g_errno),
		//	m_startKey,m_endKey);
		//if ( bytesToRead == 0 )
		//	fprintf(stderr,"shit\n");
		// if it did not block then it completed, so count it
		if ( done ) m_numScansCompleted++;
		// break on an error, and remember g_errno in case we block
		if ( g_errno && g_errno != ENOTHREADSLOTS ) { 
			long tt = LOG_WARN;
			if ( g_errno == EFILECLOSED ) tt = LOG_INFO;
			log(tt,"disk: Reading %s had error: %s.",
			    base->m_dbname, mstrerror(g_errno));
			m_errno = g_errno; 
			break; 
		}
	}
	// debug test
	//if ( rand() % 100 <= 10 ) m_errno = EIO;

	// if we blocked, return false
	if ( m_numScansCompleted < m_numScansStarted ) return false;
	// . if all scans completed without blocking then wrap it up & ret true
	// . doneScanning may now block if it finds data corruption and must
	//   get the list remotely
	return doneScanning();
}
// . returns false if blocked, true otherwise
// . sets g_errno on error
// . dumps the RdbTree, m_tree, into m_file
// . also sets and writes the RdbMap for m_file
// . we methodically get RdbLists from the RdbTree 
// . dumped recs are ordered by key if "orderedDump" was true in call to set()
//   otherwise, lists are ordered by node #
// . we write each list of recs to the file until the whole tree has been done
// . we delete all records in list from the tree after we've written the list
// . if a cache was provided we incorporate the list into the cache before
//   deleting it from the tree to keep the cache in sync. NO we do NOT!
// . called again by writeBuf() when it's done writing the whole list
bool RdbDump::dumpTree ( bool recall ) {
	// set up some vars
	//int32_t  nextNode;
	//key_t maxEndKey;
	//maxEndKey.setMax();
	char maxEndKey[MAX_KEY_BYTES];
	KEYMAX(maxEndKey,m_ks);
	// if dumping statsdb, we can only dump records 30 seconds old or
	// more because Statsdb.cpp can "back modify" such records in the tree
	// because it may have a query that took 10 seconds come in then it
	// needs to add a partial stat to the last 10 stats for those 10 secs.
	// we use Global time at this juncture
	if ( m_rdb->m_rdbId == RDB_STATSDB ) {
		int32_t nowSecs = getTimeGlobal();
		StatKey *sk = (StatKey *)maxEndKey;
		sk->m_zero      = 0x01;
		sk->m_labelHash = 0xffffffff;
		// leave last 60 seconds in there just to be safe
		sk->m_time1     = nowSecs - 60;
	}

	// this list will hold the list of nodes/recs from m_tree
	m_list = &m_ourList;
	// convert coll to collnum
	//collnum_t collnum = g_collectiondb.getCollnum ( m_coll );
	// a collnum of -1 is for collectionless rdbs
	//if ( collnum < 0 ) {
	//	//if ( g_catdb->getRdb() == m_rdb )
	//	if ( ! m_rdb->m_isCollectionLess ) {
	//		char *xx=NULL;*xx=0; //return true;
	//	}
	//	g_errno = 0;
	//	collnum = 0;
	//}
	// getMemOccupiedForList2() can take some time, so breathe
	int32_t niceness = 1;
 loop:
	// if the lastKey was the max end key last time then we're done
	if ( m_rolledOver     ) return true;
	// this is set to -1 when we're done with our unordered dump
	if ( m_nextNode == -1 ) return true;
	// . NOTE: list's buffer space should be re-used!! (TODO)
	// . "lastNode" is set to the last node # in the list
	bool status = true;
	//if ( ! m_orderedDump ) {
	//	status = ((RdbTree *)m_tree)->getListUnordered ( m_nextNode ,
	//							 m_maxBufSize ,
	//							 m_list , 
	//							 &nextNode );
	//	// this is -1 when no more nodes are left
	//	m_nextNode = nextNode;
	//}
	// "lastKey" is set to the last key in the list
	//else {
	{

		// can we remove neg recs?
		// class RdbBase *base = m_rdb->getBase(m_collnum);
		// bool removeNegRecs = false;
		// if ( base->m_numFiles <= 0 ) removeNegRecs = true;

		if ( recall ) goto skip;

		// debug msg
		//log("RdbDump:: getting list");
		m_t1 = gettimeofdayInMilliseconds();
		if(m_tree)
			status = m_tree->getList ( m_collnum       ,
					   m_nextKey     , 
					   maxEndKey     ,
					   m_maxBufSize  , // max recSizes
					   m_list        , 
					   &m_numPosRecs   ,
					   &m_numNegRecs   ,
					   m_useHalfKeys ,
						   niceness );
		else if(m_buckets)
			status = m_buckets->getList ( m_collnum,
					   m_nextKey     , 
					   maxEndKey     ,
					   m_maxBufSize  , // max recSizes
					   m_list        , 
					   &m_numPosRecs   ,
					   &m_numNegRecs   ,
					   m_useHalfKeys );


		// don't dump out any neg recs if it is our first time dumping
		// to a file for this rdb/coll. TODO: implement this later.
		//if ( removeNegRecs )
		//	m_list.removeNegRecs();

 		// if(!m_list->checkList_r ( false , // removeNegRecs?
 		// 			 false , // sleep on problem?
 		// 			 m_rdb->m_rdbId )) {
 		// 	log("db: list to dump is not sane!");
		// 	char *xx=NULL;*xx=0;
 		// }


	skip:
		int64_t t2;
		//key_t lastKey;
		char *lastKey;
		// if error getting list (out of memory?)
		if ( ! status ) goto hadError;
		// debug msg
		t2 = gettimeofdayInMilliseconds();
		log(LOG_INFO,"db: Get list took %"INT64" ms. "
		    "%"INT32" positive. %"INT32" negative.",
		    t2 - m_t1 , m_numPosRecs , m_numNegRecs );
		// keep a total count for reporting when done
		m_totalPosDumped += m_numPosRecs;
		m_totalNegDumped += m_numNegRecs;
		// . check the list we got from the tree for problems
		// . ensures keys are ordered from lowest to highest as well
		//#ifdef GBSANITYCHECK
		if ( g_conf.m_verifyWrites ) {
			char *s = "none";
			if ( m_rdb ) s = getDbnameFromId(m_rdb->m_rdbId);
			log("dump: verifying list before dumping (rdb=%s)",s);
			m_list->checkList_r ( false , // removeNegRecs?
					      false , // sleep on problem?
					      m_rdb->m_rdbId );
		}
		// if list is empty, we're done!
		if ( status && m_list->isEmpty() ) {
			// consider that a rollover?
			if ( m_rdb->m_rdbId == RDB_STATSDB )
				m_rolledOver = true;
			return true;
		}
		// get the last key of the list
		lastKey = m_list->getLastKey();
		// advance m_nextKey
		//m_nextKey  = lastKey ;
		//m_nextKey += (uint32_t)1;
		//if ( m_nextKey < lastKey ) m_rolledOver = true;
		KEYSET(m_nextKey,lastKey,m_ks);
		KEYADD(m_nextKey,1,m_ks);
		if (KEYCMP(m_nextKey,lastKey,m_ks)<0) m_rolledOver = true;
	      // debug msg
	      //log(0,"RdbDump:lastKey.n1=%"UINT32",n0=%"UINT64"",lastKey.n1,lastKey.n0);
	      //log(0,"RdbDump:next.n1=%"UINT32",n0=%"UINT64"",m_nextKey.n1,m_nextKey.n0);
	}
	// . return true on error, g_errno should have been set
	// . this is probably out of memory error
	if ( ! status ) {
	hadError:
		log("db: Had error getting data for dump: %s. Retrying.", 
		    mstrerror(g_errno));
		// debug msg
		//log("RdbDump::getList: sleeping and retrying");
		// retry for the remaining two types of errors
		if (!g_loop.registerSleepCallback(1000,this,tryAgainWrapper2)){
			log(
			    "db: Retry failed. Could not register callback.");
			return true;
		}
		// wait for sleep
		return false;
	}
	// if list is empty, we're done!
	if ( m_list->isEmpty() ) return true;
	// . set m_firstKeyInQueue and m_lastKeyInQueue
	// . this doesn't work if you're doing an unordered dump, but we should
	//   not allow adds when closing
	m_lastKeyInQueue  = m_list->getLastKey();
	//m_firstKeyInQueue = m_list->getCurrentKey();
	m_list->getCurrentKey(m_firstKeyInQueue);
	// . write this list to disk
	// . returns false if blocked, true otherwise
	// . sets g_errno on error
	// . if this blocks it should call us (dumpTree() back)
	if ( ! dumpList ( m_list , m_niceness , false ) ) return false;
	// close up shop on a write/dumpList error
	if ( g_errno ) return true;
	// . if dumpList() did not block then keep on truckin'
	// . otherwise, wait for callback of dumpTree()
	goto loop;
}
// . m_key bitmap in statsdb:
//   tttttttt tttttttt tttttttt tttttttt  t = time in milliseconds, t1
//   tttttttt tttttttt tttttttt tttttttt
//   hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh  h = hash32 of m_title
// . returns false if could not add stat, true otherwise
// . do not set g_errno if we return false just to keep things simple
// . we only add the stat to our local statsdb rdb, but because
//   we might be dumping statsdb to disk or something it is possible
//   we get an ETRYAGAIN error, so we try to accumulate stats in a
//   local buffer in that case
// . "label" is something like "queryLatency" or whatever
// . [t1,t2] are the time endpoints for the operation being measured
// . "value" is usually "numBytes", or a quantity indicator of whatever
//   was processed.
// . oldVal, newVal are reflect a state change, like maybe changing the
//   value of a parm. typically for such things t1 equals t2
bool Statsdb::addStat ( int32_t        niceness ,
			const char    *label    ,
			int64_t   t1Arg    ,
			int64_t   t2Arg    ,
			float     value    , // y-value really, "numBytes"
			int32_t   parmHash ,
			float     oldVal   ,
			float     newVal   ,
			int32_t   userId32 ) {

	if ( ! g_conf.m_useStatsdb ) return true;

	// so Process.cpp can turn it off when dumping core
	if ( m_disabled ) return true;

	// not thread safe!
	//if ( g_threads.amThread() ) { 
	//	log("statsdb: called from thread");
	//	g_process.shutdownAbort(true); 
	//}

	// . for now we can only add stats if we are synced with host #0 clock
	// . this is kinda a hack and it would be nice to not miss stats!
	if ( ! isClockInSync() ) return true;

	RdbTree *tree = &m_rdb.m_tree;
	// do not add stats to our tree if it is loading
	if ( tree->m_isLoading ) return true;

	// convert into host #0 synced time
	t1Arg = localToGlobalTimeMilliseconds ( t1Arg );
	t2Arg = localToGlobalTimeMilliseconds ( t2Arg );

	// sanity check
	if ( ! label ) { g_process.shutdownAbort(true); }

	int32_t labelHash;
	if ( parmHash ) labelHash = parmHash;
	else            labelHash = hash32n ( label );

	// fix it for parm changes, and docs_indexed stat, etc.
	if ( t1Arg == t2Arg ) t2Arg++;

	// how many SECONDS did the op take? (convert from ms to secs)
	float dtms   = (t2Arg - t1Arg);
	float dtSecs = dtms / 1000.0;

	// we have already flushed stats 30+ seconds old, so if this op took
	// 30 seconds, discard it!
	if ( dtSecs >= 30 ) {
		//log("statsdb: stat is %" PRId32" secs > 30 secs old, discarding.",
		//   (int32_t)dtSecs);
		return true;
	}

	int64_t nextup;

	// loop over all "second" buckets
	for ( int64_t tx = t1Arg ; tx < t2Arg ; tx = nextup ) {
		// get next second-aligned point in milliseconds
		nextup = ((tx +1000)/ 1000) * 1000;
		// truncate if we need to
		if ( nextup > t2Arg ) nextup = t2Arg;
		// . how much of the stat is in this time interval?
		// . like if operation took 3 seconds, we might cover
		//   50% of the first 1-second interval. so we use this
		//   as a weight for the stats we keep for that particular
		//   second. then we can plot a point for each second
		//   in time which is an average of all the queries that
		//   were in progress at that second.
		float fractionTime = ((float)(nextup - tx)) / dtms;

		// . get the time point bucket in which this stat belongs
		// . every "second" in time has a bucket
		uint32_t t1 = tx / 1000;

		StatKey sk;
		memset(&sk,0,sizeof(sk));
		sk.m_zero      = 0x01; // make it a positive key
		sk.m_time1     = t1;
		sk.m_labelHash = labelHash;

		// so we can show just the stats for a particular user...
		if ( userId32 ) {
			sk.m_zero = userId32;
			// make it positive
			sk.m_zero |= 0x01; 
		}

		// if we already have added a bucket for this "second" then
		// get it from the tree so we can add to its accumulated stats.
		int32_t node1 = tree->getNode ( 0 , (char *)&sk );
		int32_t node2;

		StatData *sd;

		// get that stat, see if we are accumulating it already
		if ( node1 >= 0 ) 
			sd = (StatData *)tree->getData ( node1 );

		// make a new one if not there
		else {
			StatData tmp;
			// init it
			memset(&tmp,0,sizeof(tmp));
			tmp.m_totalOps      = 0.0;
			tmp.m_totalQuantity = 0.0;
			tmp.m_totalTime     = 0.0;

			// save this
			int32_t saved = g_errno;
			// need to add using rdb so it can gbmemcpy the data
			if ( ! m_rdb.addRecord ( (collnum_t)0 ,
						 (char *)&sk,
						 (char *)&tmp,
						 sizeof(StatData),
						 niceness ) ) {
				if ( g_errno != ETRYAGAIN )
				log("statsdb: add rec failed: %s",
				    mstrerror(g_errno));
				// caller does not care about g_errno
				g_errno = saved;
				return false;
			}
			// caller does not care about g_errno
			g_errno = saved;
			// get the node in the tree
			//sd = (StatData *)tree->getData ( node1 );
			// must be there!
			node2 = tree->getNode ( 0 , (char *)&sk );
			// must be there!
			if ( node2 < 0 ) { g_process.shutdownAbort(true); }
			// point to it
			sd = (StatData *)tree->getData ( node2 );
		}

		// use the milliseconds elapsed as the value if none given
		//if ( value == 0 && ! parmHash )
		//	value = t2Arg - t1Arg;

		// if we got it for this time, accumulate it
		// convert x into pixel position
		sd->m_totalOps      += 1      * fractionTime;
		sd->m_totalQuantity += value  * fractionTime;
		sd->m_totalTime     += dtSecs * fractionTime;
		
		if ( ! parmHash ) continue;

		sd->m_totalOps = 0;
		sd->m_totalQuantity = oldVal;
		sd->m_newVal        = newVal;
		// no fractions for this!
		break;
	}

	//logf(LOG_DEBUG,"statsdb: sp=0x%" PRIx32,(int32_t)sp);

	return true;
}	
// . return false if blocked, true otherwise
// . sets g_errno on error
// . this one is also called by RdbMerge to dump lists
bool RdbDump::dumpList ( RdbList *list , int32_t niceness , bool recall ) {

	// if we had a write error and are being recalled...
	if ( recall ) { m_offset -= m_bytesToWrite; goto recallskip; }
	// assume we don't hack the list
	m_hacked = false;
	m_hacked12 = false;
	// save ptr to list... why?
	m_list = list;
	// nothing to do if list is empty
	if ( m_list->isEmpty() ) return true;
	// we're now in dump mode again 
	m_isDumping = true;
	//#ifdef GBSANITYCHECK
	// don't check list if we're dumping an unordered list from tree!
	if ( g_conf.m_verifyWrites && m_orderedDump ) {
		m_list->checkList_r ( false /*removedNegRecs?*/ );
		// print list stats
		// log("dump: sk=%s ",KEYSTR(m_list->m_startKey,m_ks));
		// log("dump: ek=%s ",KEYSTR(m_list->m_endKey,m_ks));
	}
	//#endif

	// before calling RdbMap::addList(), always reset list ptr
	// since we no longer call this in RdbMap::addList() so we don't
	// mess up the possible HACK below
	m_list->resetListPtr();

	// . SANITY CHECK
	// . ensure first key is >= last key added to the map map
	if ( m_offset > 0 && m_map ) {
		//key_t k       = m_list->getCurrentKey();
		char k[MAX_KEY_BYTES];
		m_list->getCurrentKey(k);
		//key_t lastKey = m_map->getLastKey    (); // m_lastKey
		char lastKey[MAX_KEY_BYTES];
		m_map->getLastKey(lastKey);
		//char *lastKey = m_map->getLastKey();
		//if ( k <= lastKey ) {
		if ( KEYCMP(k,lastKey,m_ks)<=0 ) {
			log(LOG_LOGIC,"db: Dumping list key out of order. "
			    //"lastKey.n1=%"XINT32" n0=%"XINT64" k.n1=%"XINT32" n0=%"XINT64"",
			    //lastKey.n1,lastKey.n0,k.n1,k.n0);
			    "lastKey=%s k=%s",
			    KEYSTR(lastKey,m_ks),
			    KEYSTR(k,m_ks));
			g_errno = EBADENGINEER;
			//return true;
			char *xx = NULL; *xx = 0;
		}
	}

	if ( g_conf.m_verifyWrites ) {
		char rdbId = 0;
		if ( m_rdb ) rdbId = m_rdb->m_rdbId;
		m_list->checkList_r(false,false,rdbId);//RDB_POSDB);
		m_list->resetListPtr();
	}

	// HACK! POSDB
	if ( m_ks == 18 && m_orderedDump && m_offset > 0 ) {
		char k[MAX_KEY_BYTES];
		m_list->getCurrentKey(k);
		// . same top 6 bytes as last key we added?
		// . if so, we should only add 6 bytes from this key, not 12
		//   so on disk it is compressed consistently
		if ( memcmp ( (k             ) + (m_ks-12) ,
			      (m_prevLastKey ) + (m_ks-12) , 12 ) == 0 ) {
			char tmp[MAX_KEY_BYTES];
			char *p = m_list->getList();
			// swap high 12 bytes with low 6 bytes for first key
			gbmemcpy ( tmp   , p            , m_ks-12 );
			gbmemcpy ( p     , p + (m_ks-12) ,      12 );
			gbmemcpy ( p + 12, tmp          , m_ks-12 );
			// big hack here
			m_list->m_list         = p + 12;
			m_list->m_listPtr      = p + 12;
			m_list->m_listPtrLo    = p ;
			m_list->m_listPtrHi    = p + 6;
			m_list->m_listSize    -= 12 ;
			// turn on both bits to indicate double compression
			*(p+12) |= 0x06;
			m_hacked12 = true;
		}
	}

	// . HACK
	// . if we're doing an ordered dump then hack the list's first 12 byte
	//   key to make it a 6 byte iff the last key we dumped last time
	//   shares the same top 6 bytes as the first key of this list
	// . this way we maintain compression consistency on the disk
	//   so IndexTable.cpp can expect all 6 byte keys for the same termid
	//   and RdbList::checkList_r() can expect the half bits to always be
	//   on when they can be on
	// . IMPORTANT: calling m_list->resetListPtr() will mess this HACK up!!
	if ( m_useHalfKeys && m_orderedDump && m_offset > 0 && ! m_hacked12 ) {
		//key_t k = m_list->getCurrentKey();	
		char k[MAX_KEY_BYTES];
		m_list->getCurrentKey(k);
		// . same top 6 bytes as last key we added?
		// . if so, we should only add 6 bytes from this key, not 12
		//   so on disk it is compressed consistently
		//if ( memcmp ( ((char *)&k             ) + 6 ,
		//	      ((char *)&m_prevLastKey ) + 6 , 6 ) == 0 ) {
		if ( memcmp ( (k             ) + (m_ks-6) ,
			      (m_prevLastKey ) + (m_ks-6) , 6 ) == 0 ) {
			m_hacked = true;
			//char tmp[6];
			char tmp[MAX_KEY_BYTES];
			char *p = m_list->getList();
			//gbmemcpy ( tmp   , p     , 6 );
			//gbmemcpy ( p     , p + 6 , 6 );
			//gbmemcpy ( p + 6 , tmp   , 6 );
			gbmemcpy ( tmp   , p            , m_ks-6 );
			gbmemcpy ( p     , p + (m_ks-6) ,      6 );
			gbmemcpy ( p + 6 , tmp          , m_ks-6 );
			// big hack here
			m_list->m_list       = p + 6;
			m_list->m_listPtr    = p + 6;
			// make this work for POSDB, too
			m_list->m_listPtrLo  = p + 6 + 6;
			m_list->m_listPtrHi  = p ;
			m_list->m_listSize  -= 6 ;
			// hack on the half bit, too
			*(p+6) |= 0x02;
		}
	}

	// update old last key
	//m_prevLastKey = m_list->getLastKey();
	m_list->getLastKey(m_prevLastKey);

	// now write it to disk
	m_buf          = m_list->getList    ();
	m_bytesToWrite = m_list->getListSize();
	//#ifdef GBSANITYCHECK
	//if (m_list->getListSize()!=m_list->getListEnd() - m_list->getList()){
	//	log("RdbDump::dumpList: major problem here!");
	//	sleep(50000);
	//}
	//#endif
 recallskip:
	// make sure we have enough mem to add to map after a successful
	// dump up here, otherwise, if we write it and fail to add to map
	// the map is not in sync if we core thereafter
	if ( m_addToMap && m_map && ! m_map->prealloc ( m_list ) ) {
		log("db: Failed to prealloc list into map: %s.",
		    mstrerror(g_errno));
		// g_errno should be set to something if that failed
		if ( ! g_errno ) { char *xx = NULL; *xx = 0; }
		return true;
	}
	// tab to the old offset
	int64_t offset = m_offset;
	// might as well update the offset now, even before write is done
	m_offset += m_bytesToWrite ;
	// write thread is out
	m_writing = true;
	//m_bytesWritten = 0;

	// sanity check
	//log("dump: writing %"INT32" bytes at offset %"INT64"",m_bytesToWrite,offset);

	// . if we're called by RdbMerge directly use m_callback/m_state
	// . otherwise, use doneWritingWrapper() which will call dumpTree()
	// . BigFile::write() return 0 if blocked,-1 on error,>0 on completion
	// . it also sets g_errno on error
	bool isDone = m_file->write ( m_buf          ,
				      m_bytesToWrite ,
				      offset         ,
				      &m_fstate      ,
				      this           ,
				      doneWritingWrapper ,
				      niceness         );
	// debug msg
	//log("RdbDump dumped %"INT32" bytes, done=%"INT32"\n",
	//	m_bytesToWrite,isDone); 
	// return false if it blocked
	if ( ! isDone ) return false;
	// done writing
	m_writing = false;
	// return true on error
	if ( g_errno    ) return true;
	// . delete list from tree, incorporate list into cache, add to map
	// . returns false if blocked, true otherwise, sets g_errno on error
	// . will only block in calling updateTfndb()
	return doneDumpingList ( true );
}
bool sendReply ( void *state ) {
	// get the state properly
	Msg7 *msg7= (Msg7 *) state;

	GigablastRequest *gr = &msg7->m_gr;

	// extract info from state
	TcpSocket *sock = gr->m_socket;

	XmlDoc *xd = &msg7->m_xd;
	// log it
	//if ( msg7->m_url[0] ) xd->logIt();

	// msg7 has the docid for what we injected, iff g_errno is not set
	//long long docId  = msg7->m_msg7.m_docId;
	//long      hostId = msg7->m_msg7.m_hostId;
	long long docId  = xd->m_docId;
	long      hostId = 0;//msg7->m_msg7.m_hostId;

	// set g_errno to index code
	if ( xd->m_indexCodeValid && xd->m_indexCode && ! g_errno )
		g_errno = xd->m_indexCode;

	char format = gr->m_hr.getReplyFormat();

	// no url parm?
	if ( ! g_errno && ! gr->m_url && format != FORMAT_HTML )
		g_errno = EMISSINGINPUT;

	if ( g_errno && g_errno != EDOCUNCHANGED ) {
		long save = g_errno;
		mdelete ( msg7, sizeof(Msg7) , "PageInject" );
		delete (msg7);
		g_errno = save;
		char *msg = mstrerror(g_errno);
		return g_httpServer.sendErrorReply(sock,save,msg,NULL);
	}

	char abuf[320];
	SafeBuf am(abuf,320,0,false);
	am.setLabel("injbuf");
	char *ct = NULL;

	// a success reply, include docid and url i guess
	if ( format == FORMAT_XML ) {
		am.safePrintf("<response>\n");
		am.safePrintf("\t<statusCode>%li</statusCode>\n",
			      (long)g_errno);
		am.safePrintf("\t<statusMsg><![CDATA[");
		am.cdataEncode(mstrerror(g_errno));
		am.safePrintf("]]></statusMsg>\n");
		am.safePrintf("\t<docId>%lli</docId>\n",xd->m_docId);
		if ( gr->m_getSections ) {
			SafeBuf *secBuf = xd->getInlineSectionVotingBuf();
			am.safePrintf("\t<htmlSrc><![CDATA[");
			if ( secBuf->length() ) 
				am.cdataEncode(secBuf->getBufStart());
			am.safePrintf("]]></htmlSrc>\n");
		}
		am.safePrintf("</response>\n");
		ct = "text/xml";
	}

	if ( format == FORMAT_JSON ) {
		am.safePrintf("{\"response\":{\n");
		am.safePrintf("\t\"statusCode\":%li,\n",(long)g_errno);
		am.safePrintf("\t\"statusMsg\":\"");
		am.jsonEncode(mstrerror(g_errno));
		am.safePrintf("\",\n");
		am.safePrintf("\t\"docId\":%lli,\n",xd->m_docId);
		if ( gr->m_getSections ) {
			SafeBuf *secBuf = xd->getInlineSectionVotingBuf();
			am.safePrintf("\t\"htmlSrc\":\"");
			if ( secBuf->length() ) 
				am.jsonEncode(secBuf->getBufStart());
			am.safePrintf("\",\n");
		}
		// subtract ",\n"
		am.m_length -= 2;
		am.safePrintf("\n}\n}\n");
		ct = "application/json";
	}

	if ( format == FORMAT_XML || format == FORMAT_JSON ) {
		mdelete ( msg7, sizeof(Msg7) , "PageInject" );
		delete (msg7);
		return g_httpServer.sendDynamicPage(sock,
						    am.getBufStart(),
						    am.length(),
						    0,
						    false,
						    ct );
	}

	//
	// debug
	//

	/*
	// now get the meta list, in the process it will print out a 
	// bunch of junk into msg7->m_pbuf
	if ( xd->m_docId ) {
		char *metalist = xd->getMetaList ( 1,1,1,1,1,1 );
		if ( ! metalist || metalist==(void *)-1){char *xx=NULL;*xx=0;}
		// print it out
		SafeBuf *pbuf = &msg7->m_sbuf;
		xd->printDoc( pbuf );
		bool status = g_httpServer.sendDynamicPage( msg7->m_socket , 
							   pbuf->getBufStart(),
							    pbuf->length() ,
							    -1, //cachtime
							    false ,//postreply?
							    NULL, //ctype
							    -1 , //httpstatus
							    NULL,//cookie
							    "utf-8");
		// delete the state now
		mdelete ( st , sizeof(Msg7) , "PageInject" );
		delete (st);
		// return the status
		return status;
	}
	*/
	//
	// end debug
	//

	char *url = gr->m_url;
	
	// . if we're talking w/ a robot he doesn't care about this crap
	// . send him back the error code (0 means success)
	if ( url && gr->m_shortReply ) {
		char buf[1024*32];
		char *p = buf;
		// return docid and hostid
		if ( ! g_errno ) p += sprintf ( p , 
					   "0,docId=%lli,hostId=%li," , 
					   docId , hostId );
		// print error number here
		else  p += sprintf ( p , "%li,0,0,", (long)g_errno );
		// print error msg out, too or "Success"
		p += sprintf ( p , "%s", mstrerror(g_errno));
		mdelete ( msg7, sizeof(Msg7) , "PageInject" );
		delete (msg7);
		return g_httpServer.sendDynamicPage ( sock,buf, gbstrlen(buf) ,
						      -1/*cachetime*/);
	}

	SafeBuf sb;

	// print admin bar
	g_pages.printAdminTop ( &sb, sock , &gr->m_hr );

	// print a response msg if rendering the page after a submission
	if ( g_errno )
		sb.safePrintf ( "<center>Error injecting url: <b>%s[%i]</b>"
				"</center>", 
				mstrerror(g_errno) , g_errno);
	else if ( (gr->m_url&&gr->m_url[0]) ||
		  (gr->m_queryToScrape&&gr->m_queryToScrape[0]) )
		sb.safePrintf ( "<center><b>Sucessfully injected %s"
				"</center><br>"
				, xd->m_firstUrl.m_url
				);


	// print the table of injection parms
	g_parms.printParmTable ( &sb , sock , &gr->m_hr );


	// clear g_errno, if any, so our reply send goes through
	g_errno = 0;
	// calculate buffer length
	//long bufLen = p - buf;
	// nuke state
	mdelete ( msg7, sizeof(Msg7) , "PageInject" );
	delete (msg7);
	// . send this page
	// . encapsulates in html header and tail
	// . make a Mime
	// . i thought we need -2 for cacheTime, but i guess not
	return g_httpServer.sendDynamicPage (sock, 
					     sb.getBufStart(),
					     sb.length(), 
					     -1/*cachetime*/);
}
// . delete list from tree, incorporate list into cache, add to map
// . returns false if blocked, true otherwise, sets g_errno on error
bool RdbDump::doneDumpingList ( bool addToMap ) {
	// we can get suspended when gigablast is shutting down, in which
	// case the map may have been deleted. only RdbMerge suspends its
	// m_dump class, not Rdb::m_dump. return false so caller nevers
	// gets called back. we can not resume from this suspension!
	//if ( m_isSuspended ) return false;
	// . if error was EFILECLOSE (file got closed before we wrote to it)
	//   then try again. file can close because fd pool needed more fds
	// . we cannot do this retry in BigFile.cpp because the BigFile
	//   may have been deleted/unlinked from a merge, but we could move
	//   this check to Msg3... and do it for writes, too...
	// . seem to be getting EBADFD errors now, too (what code is it?)
	//   i don't remember, just do it on *all* errors for now!
	//if ( g_errno == EFILECLOSED || g_errno == EBADFD ) {
	if ( g_errno && ! m_isSuspended ) {
		log(LOG_INFO,"db: Had error dumping data: %s. Retrying.",
		    mstrerror(g_errno));
		// . deal with the EBADF bug, it will loop forever on this
		// . i still don't know how the fd gets closed and s_fds[vfd]
		//   is not set to -1?!?!?!
		if ( g_errno == EBADF ) {
			// note it
			log(LOG_LOGIC,"db: setting fd for vfd to -1.");
			// mark our fd as not there...
			//int32_t i=(m_offset-m_bytesToWrite) / MAX_PART_SIZE;
			// sets s_fds[vfd] to -1
			// MDW: no, can't do this now
			// if ( m_file->m_files[i] )
			// 	releaseVfd ( m_file->m_files[i]->m_vfd );
		}
		//log("RdbDump::doneDumpingList: retrying.");
		return dumpList ( m_list , m_niceness , true );
	}
	// bail on error
	if ( g_errno ) {
		log("db: Had error dumping data: %s.", mstrerror(g_errno));
		//log("RdbDump::doneDumpingList: %s",mstrerror(g_errno));
		return true;
	}
	// . don't delete the list if we were dumping an unordered list
	// . we only dump unordered lists when we do a save
	// . it saves time not having to delete the list and it also allows
	//   us to do saves without deleting our data! good!
	if ( ! m_orderedDump ) return true; //--turn this off until save works

	// save for verify routine
	m_addToMap = addToMap;

	// should we verify what we wrote? useful for preventing disk 
	// corruption from those pesky Western Digitals and Maxtors?
	if ( g_conf.m_verifyWrites ) {
		// a debug message, if log disk debug messages is enabled
		log(LOG_DEBUG,"disk: Verifying %"INT32" bytes written.",
		    m_bytesToWrite);
		// make a read buf
		if ( m_verifyBuf && m_verifyBufSize < m_bytesToWrite ) {
			mfree ( m_verifyBuf , m_verifyBufSize , "RdbDump3" );
			m_verifyBuf = NULL;
			m_verifyBufSize = 0;
		}
		if ( ! m_verifyBuf ) {
			m_verifyBuf = (char *)mmalloc ( m_bytesToWrite , 
							"RdbDump3" );
			m_verifyBufSize = m_bytesToWrite;
		}
		// out of mem? if so, skip the write verify
		if ( ! m_verifyBuf ) return doneReadingForVerify();
		// read what we wrote
		bool isDone = m_file->read ( m_verifyBuf    ,
					     m_bytesToWrite ,
					     m_offset - m_bytesToWrite ,
					     &m_fstate      ,
					     this           ,
					     doneReadingForVerifyWrapper ,
					     m_niceness      );
		// debug msg
		//log("RdbDump dumped %"INT32" bytes, done=%"INT32"\n",
		//	m_bytesToWrite,isDone); 
		// return false if it blocked
		if ( ! isDone ) return false;
	}
	return doneReadingForVerify();
}
bool sendPageAddDelColl ( TcpSocket *s , HttpRequest *r , bool add ) {
	// get collection name
	//long  nclen;
	//char *nc   = r->getString ( "nc" , &nclen );
	//long  cpclen;
	//char *cpc  = r->getString ( "cpc" , &cpclen );

	g_errno = 0;

	//bool cast = r->getLong("cast",0);

	char *msg = NULL;

	// if any host in network is dead, do not do this
	//if ( g_hostdb.hasDeadHost() ) msg = "A host in the network is dead.";

	// . are we adding a collection?
	// . return if error adding, might already exist!
	// . g_errno should be set
	// . WE DO NOT NEED THIS ANYMORE. Pages.cpp now broadcasts
	//   addcoll as CommandAddColl() parm.
	/*
	if ( nclen > 0 && add && ! cast ) {
		// do not allow "main" that is used for the "" collection
		// for backwards compatibility
		//if ( strcmp ( nc , "main" ) != 0 ) 
		g_collectiondb.addRec (nc,cpc,cpclen,true,(collnum_t)-1,
				       false , // isdump?
				       true  ) ;// save it?
		//else 
		//	log("admin: \"main\" collection is forbidden.");
	}

	if ( ! add && ! cast ) g_collectiondb.deleteRecs ( r )   ;
	*/

	char format = r->getReplyFormat();


	if ( format == FORMAT_XML || format == FORMAT_JSON ) {
		// no addcoll given?
		long  page = g_pages.getDynamicPageNumber ( r );
		char *addcoll = r->getString("addcoll",NULL);
		char *delcoll = r->getString("delcoll",NULL);
		if ( ! addcoll ) addcoll = r->getString("addColl",NULL);
		if ( ! delcoll ) delcoll = r->getString("delColl",NULL);
		if ( page == PAGE_ADDCOLL && ! addcoll ) {
			g_errno = EBADENGINEER;
			char *msg = "no addcoll parm provided";
			return g_httpServer.sendErrorReply(s,g_errno,msg,NULL);
		}
		if ( page == PAGE_DELCOLL && ! delcoll ) {
			g_errno = EBADENGINEER;
			char *msg = "no delcoll parm provided";
			return g_httpServer.sendErrorReply(s,g_errno,msg,NULL);
		}
		return g_httpServer.sendSuccessReply(s,format);
	}

	// error?
	char *action = r->getString("action",NULL);
	char *addColl = r->getString("addcoll",NULL);



	char  buf [ 64*1024 ];
	SafeBuf p(buf, 64*1024);


	//
	// CLOUD SEARCH ENGIEN SUPPORT - GIGABOT ERRORS
	//

	SafeBuf gtmp;
	char *gmsg = NULL;
	// is it too big?
	if ( action && addColl && gbstrlen(addColl) > MAX_COLL_LEN ) {
		gtmp.safePrintf("search engine name is too long");
		gmsg = gtmp.getBufStart();
	}
	// from Collectiondb.cpp::addNewColl() ensure coll name is legit
	char *x = addColl;
	for ( ; x && *x ; x++ ) {
		if ( is_alnum_a(*x) ) continue;
		if ( *x == '-' ) continue;
		if ( *x == '_' ) continue; // underscore now allowed
		break;
	}
	if ( x && *x ) {
		g_errno = EBADENGINEER;
		gtmp.safePrintf("<font color=red>Error. \"%s\" is a "
				"malformed name because it "
				"contains the '%c' character.</font><br><br>",
				addColl,*x);
		gmsg = gtmp.getBufStart();
	}

	//
	// END GIGABOT ERRORS
	//



	//
	// CLOUD SEARCH ENGINE SUPPORT
	//
	// if added the coll successfully, do not print same page, jump to
	// printing the basic settings page so they can add sites to it.
	// crap, this GET request, "r", is missing the "c" parm sometimes.
	// we need to use the "addcoll" parm anyway. maybe print a meta
	// redirect then?
	char guide = r->getLong("guide",0);
	// do not redirect if gmsg is set, there was a problem with the name
	if ( action && ! msg && format == FORMAT_HTML && guide && ! gmsg ) {
		//return g_parms.sendPageGeneric ( s, r, PAGE_BASIC_SETTINGS );
		// just redirect to it
		if ( addColl )
			p.safePrintf("<meta http-equiv=Refresh "
				      "content=\"0; URL=/admin/settings"
				      "?guide=1&c=%s\">",
				      addColl);
		return g_httpServer.sendDynamicPage (s,
						     p.getBufStart(),
						     p.length());
	}


	// print standard header
	g_pages.printAdminTop ( &p , s , r , NULL, 
				"onload=document."
				"getElementById('acbox').focus();");


	// gigabot error?
	//if ( gmsg ) 
	//	p.safePrintf("Gigabot says: %s<br><br>",gmsg);



	//long  page     = g_pages.getDynamicPageNumber ( r );
	//char *coll     = r->getString    ( "c"    );
	//char *pwd      = r->getString    ( "pwd" );
	//char *username = g_users.getUsername( r );
	//long  user = g_pages.getUserType ( s , r );
	//if ( ! coll )  coll = "";

	//if ( ! nc   )    nc = "";
	//if ( ! pwd  )   pwd = "";

	if ( g_errno ) msg = mstrerror(g_errno);





	if ( msg && ! guide ) {
		char *cc = "deleting";
		if ( add ) cc = "adding";
		p.safePrintf (
			  "<center>\n"
			  "<font color=red>"
			  "<b>Error %s collection: %s. "
			  "See log file for details.</b>"
			  "</font>"
			  "</center><br>\n",cc,msg);
	}

	//
	// CLOUD SEARCH ENGINE SUPPORT
	//
	if ( add && guide )
		printGigabotAdvice ( &p , PAGE_ADDCOLL , r , gmsg );



	// print the add collection box
	if ( add /*&& (! nc[0] || g_errno ) */ ) {

		char *t1 = "Add Collection";
		if ( guide ) t1 = "Add Search Engine";

		p.safePrintf (
			  "<center>\n<table %s>\n"
			   "<tr class=hdrow><td colspan=2>"
			  "<center><b>%s</b></center>"
			  "</td></tr>\n"
			  ,TABLE_STYLE
			  ,t1
			      );
		char *t2 = "collection";
		if ( guide ) t2 = "search engine";
		char *str = addColl;
		if ( ! addColl ) str = "";
		p.safePrintf (
			      "<tr bgcolor=#%s>"
			      "<td><b>name of new %s to add</td>\n"
			      "<td><input type=text name=addcoll size=30 "
			      "id=acbox "
			      "value=\"%s\">"
			      "</td></tr>\n"
			      , LIGHT_BLUE
			      , t2 
			      , str
			      );

		// don't show the clone box if we are under gigabot the guide
		if ( ! guide )
			p.safePrintf(
				     "<tr bgcolor=#%s>"
				     "<td><b>clone settings from this "
				     "collection</b>"
				     "<br><font size=1>Copy settings from "
				     "this pre-existing collection. Leave "
				     "blank to "
				     "accept default values.</font></td>\n"
				     "<td><input type=text name=clonecoll "
				     "size=30>"
				     "</td>"
				     "</tr>"
				     , LIGHT_BLUE
				     );
		// now list collections from which to copy the config
		//p.safePrintf (
		//	  "<tr><td><b>copy configuration from this "
		//	  "collection</b><br><font size=1>Leave blank to "
		//	  "accept default values.</font></td>\n"
		//	  "<td><input type=text name=cpc value=\"%s\" size=30>"
		//	  "</td></tr>\n",coll);
		p.safePrintf ( "</table></center><br>\n");

		// wrap up the form started by printAdminTop
		g_pages.printAdminBottom ( &p );
		long bufLen = p.length();
		return g_httpServer.sendDynamicPage (s,p.getBufStart(),bufLen);
	}

	// if we added a collection, print its page
	//if ( add && nc[0] && ! g_errno ) 
	//	return g_parms.sendPageGeneric2 ( s , r , PAGE_SEARCH ,
	//					  nc , pwd );

	if ( g_collectiondb.m_numRecsUsed <= 0 ) goto skip;

	// print all collections out in a checklist so you can check the
	// ones you want to delete, the values will be the id of that collectn
	p.safePrintf (
		  "<center>\n<table %s>\n"
		  "<tr class=hdrow><td><center><b>Delete Collections"
		  "</b></center></td></tr>\n"
		  "<tr bgcolor=#%s><td>"
		  "<center><b>Select the collections you wish to delete. "
		  //"<font color=red>This feature is currently under "
		  //"development.</font>"
		  "</b></center></td></tr>\n"
		  "<tr bgcolor=#%s><td>"
		  // table within a table
		  "<center><table width=20%%>\n",
		  TABLE_STYLE,
		  LIGHT_BLUE,
		  DARK_BLUE
		      );

	for ( long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) {
		CollectionRec *cr = g_collectiondb.m_recs[i];
		if ( ! cr ) continue;
		p.safePrintf (
			  "<tr bgcolor=#%s><td>"
			  "<input type=checkbox name=delcoll value=\"%s\"> "
			  "%s</td></tr>\n",
			  DARK_BLUE,
			  cr->m_coll,cr->m_coll);
	}
	p.safePrintf( "</table></center></td></tr></table><br>\n" );
skip:
	// wrap up the form started by printAdminTop
	g_pages.printAdminBottom ( &p );
	long bufLen = p.length();
	return g_httpServer.sendDynamicPage (s,p.getBufStart(),bufLen);
}
bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) {
	SafeBuf sb(512 * 512,"autobbuf");
	//read in all of the possible cgi parms off the bat:
	//long  user     = g_pages.getUserType( s , r );
	//char *username = g_users.getUsername(r);
	//char *pwd  = r->getString ("pwd");

	char *coll = r->getString ("c");

	long banIpsLen;
	char *banIps = r->getString ("banIps" , &banIpsLen , NULL);

	long allowIpsLen;
	char *allowIps = r->getString ("allowIps" , &allowIpsLen , NULL);

 	long clearLen;
 	char *clear = r->getString ("clear" , &clearLen , NULL);

	bool changed = false;

 	long validCodesLen;
 	char *validCodes = r->getString ("validCodes", &validCodesLen, NULL);

	long showAllIps = r->getLong("showAllIps", 0);
	long showLongView = r->getLong("longview", 0);

	// do it all from parm now
	//long banRegexLen;
	//char *banRegex = r->getString("banRegex", &banRegexLen, NULL);
	

// 	char *ss = sb.getBuf();
// 	char *ssend = sb.getBufEnd();
	g_pages.printAdminTop ( &sb, s , r );

	//sb.incrementLength(sss - ss);

	// MDW: moved to here

	long now = getTime();
	
	long days;
	long hours;
	long minutes;
	long secs;
	long msecs;

	if(r->getLong("resetcodes", 0)) {
		setCodesFromConf();
	}

	sb.safePrintf("\n<br><br><table %s>\n",TABLE_STYLE);

	getCalendarFromMs((now - m_codeResetTime) * 1000,
			  &days, 
			  &hours, 
			  &minutes, 
			  &secs,
			  &msecs);
	sb.safePrintf("<tr><td colspan=18 bgcolor=#%s>"
		      "<center><b>Code Usage "
		      "(<a href=\"/admin/"
		      "autoban?c=%s&resetcodes=1\">reset</a> "
		      "%li days %li hours %li "
		      "minutes %li sec ago)"
		      "</b></center></td></tr>", 
		      DARK_BLUE,
		      coll,
		      days, 
		      hours, 
		      minutes, 
		      secs);
	sb.safePrintf("<tr bgcolor=#%s>"
		      "<td><center><b>Code</b></center></td>"
		      "<td><center><b>IP</b></center></td>"
		      "<td><center><b>Query Count</b></center></td>"

		      "<td><center><b>Bytes Read</b></center></td>"
		      "<td><center><b>Bytes Sent</b></center></td>"
		      
		      "<td><center><b>Outstanding Count</b></center></td>"
		      "<td><center><b>Most Ever Outstanding</b></center></td>"
		      "<td><center><b>Max Outstanding</b></center></td>"
		      "</tr>", 
		      LIGHT_BLUE);


	for(long i = 0; i < m_ht.getNumSlots(); i++) {
		if ( m_ht.getKey ( i ) == 0 ) continue;
		CodeVal *cv = m_ht.getValuePointerFromSlot ( i );
		if ( ! cv ) continue;
		
		sb.safePrintf("<tr>");
		sb.safePrintf("<td>");
		sb.copyToken(cv->m_code);//m_codeVals[i].m_code);
		sb.safePrintf("</td>");
		sb.safePrintf("<td><center>%s</center> </td>",
			      iptoa(cv->m_ip));
		sb.safePrintf("<td><center>%lli</center></td>", 
			      cv->m_count);

		sb.safePrintf("<td><center>%lli</center></td>", 
			      cv->m_bytesRead);
		sb.safePrintf("<td><center>%lli</center></td>", 
			      cv->m_bytesSent);

		sb.safePrintf("<td><center>%li</center></td>", 
			      cv->m_outstanding);
		sb.safePrintf("<td><center>%li</center></td>", 
			      cv->m_maxEver);
		if ( cv->m_maxOutstanding != 50 )
			sb.safePrintf("<td><center><b>%li</b></center></td>", 
				      cv->m_maxOutstanding);
		else
			sb.safePrintf("<td><center>%li</center></td>", 
				      cv->m_maxOutstanding);

		sb.safePrintf("</tr>");
		
	}
	sb.safePrintf ("</table><br><br>\n" );


 	if(clear && clearLen < 64) {
 		long ip = atoip(clear, clearLen);
 		if(ip) {
			removeIp(ip);
			char *beginning;
			char ipbuf[64];//gotta NULL terminate for strstr
			memcpy(ipbuf, clear, clearLen);
			ipbuf[clearLen] = '\0';
			beginning = findToken(g_conf.m_banIps, ipbuf, 
					      clearLen);
			if(beginning) {
				char *to = beginning;
				char *from = beginning + clearLen;
				while(*to) *to++ = *from++;
			}
			beginning = findToken(g_conf.m_allowIps, ipbuf,
					      clearLen);
			if(beginning) {
				char *to = beginning;
				char *from = beginning + clearLen;
				while(*to) *to++ = *from++;
			}
			changed = true;
 		}
 	}

 	long allowLen;
 	char *allow = r->getString ( "allow" , &allowLen , NULL );
 	if(allow && allowLen < 64) {
 		long ip = atoip(allow, allowLen);
		
 		if(ip) {
			char *beginning;
			char ipbuf[64];//gotta NULL terminate for strstr
			memcpy(ipbuf, allow, allowLen);
			ipbuf[allowLen] = '\0';
			beginning = findToken(g_conf.m_allowIps, ipbuf, 
					      allowLen);
			if(!beginning) {
				//its not present, so add it.
				char *p = g_conf.m_allowIps;
				while(*p) p++;
				if(p - g_conf.m_allowIps + allowLen + 2 
				   < AUTOBAN_TEXT_SIZE) {
					*p++ = '\n';
					memcpy(p, ipbuf,allowLen);
					*(p + allowLen) = '\0';
				}
				else {
					sb.safePrintf("<font color=red>"
						      "Not enough stack space "
						      "to fit allowIps.  "
						      "Increase "
						      "AUTOBAN_TEXT_SIZE in "
						      "Conf.h. "
						      "Had %i need %li."
						      "</font>", 
						      AUTOBAN_TEXT_SIZE,
						      p - g_conf.m_allowIps + 
						      allowLen + 2);
					goto dontRemove1;
				}
			}
			beginning = findToken(g_conf.m_banIps, ipbuf, 
					      allowLen);
			if(beginning) {
				//remove it from banned if present.
				char *to = beginning;
				char *from = beginning + allowLen;
				while(*to) *to++ = *from++;
			}

			changed = true;
 		}
 	}
 dontRemove1:
 	long denyLen;
 	char *deny = r->getString ( "deny" , &denyLen , NULL );
 	if(deny && denyLen < 64) {
 		long ip = atoip(deny, denyLen);
		
 		if(ip) {
			char *beginning;
			char ipbuf[64];//gotta NULL terminate for strstr
			memcpy(ipbuf, deny, denyLen);
			ipbuf[denyLen] = '\0';
			beginning = findToken(g_conf.m_banIps, ipbuf, denyLen);
			if(!beginning) {
				//its not present, so add it.
				char *p =g_conf.m_banIps;
				while(*p) p++;
				if(p - g_conf.m_banIps + denyLen + 2 < 
				   AUTOBAN_TEXT_SIZE) {
					*p++ = '\n';
					memcpy(p, ipbuf,denyLen);
					*(p + denyLen) = '\0';
				}
				else {
					sb.safePrintf("<font color=red>Not "
						      "enough stack space "
						      "to fit bannedIPs.  "
						      "Increase "
						      "AUTOBAN_TEXT_SIZE in "
						      "Conf.h. "
						      "Had %i need %li."
						      "</font>", 
						      AUTOBAN_TEXT_SIZE,
						      p - g_conf.m_banIps +
						      denyLen + 2);
					goto dontRemove2;
				}
			}
			beginning = findToken(g_conf.m_allowIps, ipbuf,
					      denyLen);
			if(beginning) {
				//remove it from allowed list if present.
				char *to = beginning;
				char *from = beginning + denyLen;
				while(*to) *to++ = *from++;
			}
			changed = true;
 		}
 	}
 dontRemove2:

	if(!g_conf.m_doAutoBan) {
		sb.safePrintf("<center><font color=red><b>Autoban is disabled, "
			      "turn it on in Master Controls.</b></font></center><br>");
	}

 	if(validCodes) {
		if(validCodesLen >= AUTOBAN_TEXT_SIZE) {
			sb.safePrintf("<font color=red>Not enough stack space "
				      "to fit codes.  "
				      "Increase AUTOBAN_TEXT_SIZE in Conf.h. "
				      "Had %i need %li.</font>", 
				      AUTOBAN_TEXT_SIZE,
				      validCodesLen);
			validCodes = NULL;
			validCodesLen = 0;
		}
		else {
			memcpy(g_conf.m_validCodes, validCodes, validCodesLen);
			g_conf.m_validCodes[validCodesLen] = '\0';
			trimWhite(g_conf.m_validCodes);
			setCodesFromConf();
		}
	}



	//first remove all of the ips in the conf, then add the passed in 
	//  ones to the conf parm; 
	if (banIps) {
		//ack, the browser puts in crlf when this comes back, so
		//we will have a longer string here than the one we sent 
		//out. trim back all extrainious whitespace before we do
		//bounds checking.
		trimWhite(banIps);
		banIpsLen = gbstrlen(banIps);
		if(banIpsLen >= AUTOBAN_TEXT_SIZE) {
			sb.safePrintf("<font color=red>Not enough stack space "
				      "to fit bannedIps.  "
				      "Increase AUTOBAN_TEXT_SIZE in Conf.h. "
				      "Had %i need %li.</font>", 
				      AUTOBAN_TEXT_SIZE,
				      banIpsLen);
			banIpsLen = AUTOBAN_TEXT_SIZE - 1;
		}
		for(long i = 0; i < m_tableSize; i++) {
			if(m_detectKeys[i] == 0) continue;
			//check the 'set from conf' bit, and clear those.
			if(m_detectVals[i].m_flags & FROMCONF) {
				removeIp(m_detectKeys[i]);
			}
		}
		memcpy(g_conf.m_banIps, banIps, banIpsLen);
		g_conf.m_banIps[banIpsLen] = '\0';
		changed = true;
	}
	if (allowIps) {
		trimWhite(allowIps);
		allowIpsLen = gbstrlen(allowIps);

		if(allowIpsLen >= AUTOBAN_TEXT_SIZE) {
			sb.safePrintf("<font color=red>Not enough stack space "
				      "to fit allowIps.  "
				      "Increase AUTOBAN_TEXT_SIZE in Conf.h. "
				      "Had %i need %li.</font>", 
				      AUTOBAN_TEXT_SIZE,
				      allowIpsLen);
			allowIpsLen = AUTOBAN_TEXT_SIZE - 1;
		}
		for(long i = 0; i < m_tableSize; i++) {
			if(m_detectKeys[i] == 0) continue;
			//check the 'set from conf' bit, and clear those.
			if(m_detectVals[i].m_flags & FROMCONF) {
				removeIp(m_detectKeys[i]);
			}
		}
		memcpy(g_conf.m_allowIps, allowIps, allowIpsLen);
		g_conf.m_allowIps[allowIpsLen] = '\0';
		changed = true;
	}
	if(changed) {
		trimWhite(g_conf.m_allowIps);
		trimWhite(g_conf.m_banIps);
		setFromConf();
	}



	sb.safePrintf("\n<table %s>\n",TABLE_STYLE);
	sb.safePrintf("<tr><td colspan=2 bgcolor=#%s>"
		      "<center><b>Add IPs</b></center></td></tr>", 
		      DARK_BLUE);

// 	ss = sb.getBuf();
// 	ssend = sb.getBufEnd();
	g_parms.printParms (&sb, s, r);
	//	sb.incrementLength(sss - ss);



	sb.safePrintf ("<tr><td>"
		       "<center>" 
		       "<input type=submit value=\"Update\" "
		       "method=\"POST\" border=0>"
		       "</center></td></tr>");

	sb.safePrintf ("</table><br><br>\n" );



	if(!showLongView) {
		sb.safePrintf("<b><a href=\"autoban"
			      "?c=%s"
			      "&showAllIps=%li"
			      "&longview=1\">Show watched ips table...</a></b>",
			      coll,
			      showAllIps);
		return g_httpServer.sendDynamicPage ( s , 
						      sb.getBufStart() , 
						      sb.length() , 
						      -1 , 
						      false);
	}

	/////////////////////////////////////////////////////////////////////

	sb.safePrintf("\n<table %s>\n",TABLE_STYLE);

	sb.safePrintf("<tr><td colspan=3 bgcolor=#%s>"
		      "<center><b>Watched Ips</b></center></td></tr>", 
		      DARK_BLUE);

	sb.safePrintf("<tr bgcolor=#%s>"
		      "<td><center><b>IP</b></center></td>"
		      "<td><center><b>Description</b></center></td>"
		      //		      "<td><center><b>Time Added</b></center></td>"
		      "<td><center><b>Allow/Deny/Clear</b></center></td>"
		      "</tr>", 
		      LIGHT_BLUE);




	long *sortedIndices = (long*)mmalloc(m_tableSize * sizeof(long), 
					     "AutoBanH");

	if(!sortedIndices) {
		return g_httpServer.sendErrorReply(s,500,mstrerror(ENOMEM));
	}

	long numEntries = 0;
	for(long i = 0; i < m_tableSize; i++) {
		if(m_detectKeys[i] == 0) continue;
		sortedIndices[numEntries++] = i;
	}
	SorterTable = m_detectKeys;

        gbsort(sortedIndices, numEntries, sizeof(long), ip_cmp);


	//lets put each class of watched ip in its own safebuf then cat 
	//them together at the end.
	
	SafeBuf allowed;
	SafeBuf banned; 
	SafeBuf feedLeachers; 
	SafeBuf cowBots; 
	SafeBuf *e;

	for(long j = 0; j < numEntries; j++) {
		long i = sortedIndices[j];
		if(m_detectKeys[i] == 0) continue;
		//if(!(m_detectVals[i].m_flags & FROMCONF)) continue;
		bool allow =  m_detectVals[i].m_flags & ALLOW && 
			m_detectVals[i].m_flags & FROMCONF;
		bool deny  =  m_detectVals[i].m_flags & DENY && 
			m_detectVals[i].m_flags & FROMCONF;
		bool explicitban = deny && m_detectVals[i].m_flags & FROMCONF;
		unsigned short dayCount = m_detectVals[i].m_dayCount;
		unsigned char minuteCount = m_detectVals[i].m_minuteCount;

		bool day =    dayCount >= g_conf.m_numFreeQueriesPerDay;
		bool minute = minuteCount >= g_conf.m_numFreeQueriesPerMinute;

		char *description;
		char *color;

		if(allow) {
			color = GREEN;
			description = "Allowed";
			e = &allowed;
		} 
		else if(explicitban) {
			color = RED;
			description = "Banned";
			e = &banned;
		}
		else if(minute) {
			color = RED;
			description = "Cow Bot";
			e = &cowBots;
		}
		else if(day) {
			color = RED;
			description = "Feed Leacher";
			e = &feedLeachers;
		}
		else {
			//this can happen when someone was banned due to 
			//exceeding the quota, then the quota was lowered.
			
			m_detectVals[i].m_flags &= ~DENY;
			//log("autoban: ohshit-banning %s",iptoa(s->m_ip));
			continue;
		}

		
		e->safePrintf("<tr>");

		e->safePrintf("<td bgcolor=#%s><center>%s</center></td><td>"
			      "<center>%s</center></td>"

// 			      "<td><center>"
// 			      "%li days %li hrs %li min ago"
// 			      "</center></td>"

			      "<td><center><a href=\"/admin/"
			      "autoban?c=%s&allow=%s&showAllIps=%li\">" 
			      "allow/</a>"

			      "<a href=\"/admin/"
			      "autoban?c=%s&deny=%s&showAllIps=%li\">" 
			      "deny/</a>"

			      "<a href=\"/admin/"
			      "autoban?c=%s&clear=%s&showAllIps=%li\">"
			      "clear</a></center>"
			      "</td>",color, 
			      iptoa(m_detectKeys[i]),
			      description,

			      //      days,hours,minutes,

			      coll,
			      iptoa(m_detectKeys[i]),
			      showAllIps,
			      coll,
			      iptoa(m_detectKeys[i]),
			      showAllIps,
			      coll,
			      iptoa(m_detectKeys[i]),
			      showAllIps);
		e->safePrintf("</tr>");
	}

	sb.cat(allowed);
	sb.cat(banned); 
	sb.cat(feedLeachers); 
	sb.cat(cowBots); 

	sb.safePrintf ("</table><br><br>\n" );


	// MDW moved from here

	sb.safePrintf("\n<br><br><table %s>\n",TABLE_STYLE);

	sb.safePrintf("<tr><td colspan=5 bgcolor=#%s>"
		      "<center><b>Control Panel</b></center></td></tr>", 
		      DARK_BLUE);

	sb.safePrintf("<tr>"
		      "<td bgcolor=#%s><center><b>Show Ips by Number of Queries"
		      "</b></center></td>",
		      LIGHT_BLUE);
	sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/"
		      "autoban?c=%s&showAllIps=0\">"
		      "0 Queries</a></b>"
		      "</font></center></td>",
		      coll);
	sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/"
		      "autoban?c=%s&showAllIps=1\">"
		      "1 Query</a></b>"
		      "</font></center></td>",
		      coll);
	sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/"
		      "autoban?c=%s&showAllIps=10\">"
		      "10 Queries</a></b>"
		      "</font></center></td>",
		      coll);
	sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/"
		      "autoban?c=%s&showAllIps=100\">"
		      "100 Queries</a></b>"
		      "</font></center></td></tr>",
		      coll);

	sb.safePrintf ("</table><br><br>\n");



	if(!showAllIps) {

		char* ss = (char*) sb.getBufStart();
		long sslen = sb.length();
		mfree(sortedIndices, m_tableSize * sizeof(long),"AutoBanH");

		return g_httpServer.sendDynamicPage ( s , ss , sslen , -1 , false);
	}
	

	sb.safePrintf("\n<br><br><table %s>\n",TABLE_STYLE);

	sb.safePrintf("<tr><td colspan=6 bgcolor=#%s>"
		      "<center><b>Queries Today</b></center></td></tr>", 
		      DARK_BLUE);

	sb.safePrintf("<tr bgcolor=#%s>"
		      "<td><center><b>IP</b></center></td>"
		      "<td><center><b>Minute count</b></center></td>"
		      "<td><center><b>Day count</b></center></td>"
		      "<td><center><b>Time Until Reset</b></center></td>"
		      "<td><center><b>Times Banned</b></center></td>"
		      "<td><center><b>Allow/Deny</b></center></td>"
		      "</tr>", 
		      LIGHT_BLUE);


	char minBuf[128];
	char dayBuf[128];
	unsigned long lastIpGroup = 0;
	for(long j = 0; j < numEntries; j++) {
		long i = sortedIndices[j];
		long  dayCount = m_detectVals[i].m_dayCount;
		unsigned char minuteCount = m_detectVals[i].m_minuteCount;

		if(!(m_detectVals[i].m_flags & FROMCONF)) {
			if(m_detectVals[i].m_minuteExpires < now) 
				minuteCount = 0;
			if(!(m_detectVals[i].m_flags & DENY) && 
			   m_detectVals[i].m_dayExpires < now) 
				dayCount = 0;
		}
		//a hack:
		if( dayCount < showAllIps) continue;

		char *color = YELLOW;
		
		if(m_detectVals[i].m_flags & ALLOW) {
			color = GREEN;
			snprintf(minBuf, 128, "--");
			snprintf(dayBuf, 128, "%li", dayCount);
		}
		else if(m_detectVals[i].m_flags & DENY) {
			color = RED;
			snprintf(minBuf, 128, "--");
			snprintf(dayBuf, 128, "%li", dayCount);
		} 
		else {
			snprintf(minBuf, 128, "%li", (long)minuteCount);
			snprintf(dayBuf, 128, "%li", (long)dayCount);
		}

		unsigned long thisIpGroup = (unsigned long)m_detectKeys[i] & 
			0x00ffffff;

		sb.safePrintf("<tr><center>");

		if(m_detectVals[i].m_flags & FROMCONF) {
			sb.safePrintf("<td bgcolor=#%s><center>%s%s%s</center></td>"
				      "<td><center>%s</center> </td>"
				      "<td><center>%s</center></td>" 
				      "<td><center><font color=red>"
				      "<b>NEVER</b>"
				      "</font></center></td>"
				      "<td><center>--</center></td>",
				      color, 
				      (thisIpGroup == lastIpGroup)?"<b>":"",
				      iptoa(m_detectKeys[i]),
				      (thisIpGroup == lastIpGroup)?"</b>":"",
				      minBuf,
				      dayBuf);
		}
		else {
			//they haven't done a query since being unbanned,
			//unban them now so we don't get negative resets displayed.
			/*
			  no, don't unban the bots!!! MDW yippy project
			if(m_detectVals[i].m_dayExpires < now) {
				m_detectVals[i].m_flags &= ~DENY; 
				//log("autoban: dayexpire-unbanning %s",
				//    iptoa(ip));
				m_detectVals[i].m_dayExpires = now + ONE_DAY;
				m_detectVals[i].m_minuteExpires = now + 60;
				m_detectVals[i].m_dayCount = 0;
				m_detectVals[i].m_minuteCount = 0;
				sb.safePrintf("</center></tr>");
				continue;
			}
			*/

			getCalendarFromMs((m_detectVals[i].m_dayExpires - now)* 1000,
					  &days, 
					  &hours, 
					  &minutes, 
					  &secs,
					  &msecs);

			sb.safePrintf("<td bgcolor=#%s><center>%s%s%s</center></td>"
				      "<td><center>%s</center> </td>"
				      "<td><center>%s</center></td>" 
				      "<td><center><font color=red>"
				      "<b>%li days %li hrs %li min %li sec</b>"
				      "</font></center></td>"
				      "<td><center>%i</center></td>",
				      color, 
				      (thisIpGroup == lastIpGroup)?"<b>":"",
				      iptoa(m_detectKeys[i]),
				      (thisIpGroup == lastIpGroup)?"</b>":"",
				      minBuf,
				      dayBuf,
				      days, hours, minutes, secs,
				      m_detectVals[i].m_timesBanned);
		}
		sb.safePrintf("<td><center>"
			      "<a href=\"/admin/"
			      "autoban?c=%s&allow=%s&showAllIps=%li\">" 
			      "allow/</a>"
			      "<a href=\"/admin/"
			      "autoban?c=%s&deny=%s&showAllIps=%li\">" 
			      "deny</a></center>"
			      "</td>",
			      coll,
			      iptoa(m_detectKeys[i]),
			      showAllIps,
			      coll,
			      iptoa(m_detectKeys[i]),
			      showAllIps);

		sb.safePrintf("</center></tr>");
		lastIpGroup = thisIpGroup;
	}


	sb.safePrintf ("</table><br><br>\n" );


	char* ss = (char*) sb.getBufStart();
	long sslen = sb.length();

	mfree(sortedIndices, m_tableSize * sizeof(long),"AutoBanH");

	return g_httpServer.sendDynamicPage ( s , ss , sslen , -1 , false);
}
///////////
//
// main > Basic > Status
//
///////////
bool sendPageBasicStatus ( TcpSocket *socket , HttpRequest *hr ) {
	char  buf [ 128000 ];
	SafeBuf sb(buf,128000);
	sb.reset();

	char format = hr->getReplyFormat();


	// true = usedefault coll?
	CollectionRec *cr = g_collectiondb.getRec ( hr , true );
	if ( ! cr ) {
		g_httpServer.sendErrorReply(socket,500,"invalid collection");
		return true;
	}

	if ( format == FORMAT_JSON || format == FORMAT_XML) {
		// this is in PageCrawlBot.cpp
		printCrawlDetails2 ( &sb , cr , format );
		char *ct = "text/xml";
		if ( format == FORMAT_JSON ) ct = "application/json";
		return g_httpServer.sendDynamicPage (socket, 
						     sb.getBufStart(), 
						     sb.length(),
						     0, // cachetime
						     false,//POSTReply        ,
						     ct);
	}

	// print standard header 
	if ( format == FORMAT_HTML )
		// this prints the <form tag as well
		g_pages.printAdminTop ( &sb , socket , hr );

	// table to split between widget and stats in left and right panes
	if ( format == FORMAT_HTML ) {
		sb.safePrintf("<TABLE id=pane>"
			      "<TR><TD valign=top>");
	}

	int32_t savedLen1, savedLen2;

	//
	// widget
	//
	// put the widget in here, just sort results by spidered date
	//
	// the scripts do "infinite" scrolling both up and down.
	// but if you are at the top then new results will load above
	// you and we try to maintain your current visual state even though
	// the scrollbar position will change.
	//
	if ( format == FORMAT_HTML ) {

		// save position so we can output the widget code
		// so user can embed it into their own web page
		savedLen1 = sb.length();
		
		printScrollingWidget ( &sb , cr );

		savedLen2 = sb.length();

	}

	// the right table pane is the crawl stats
	if ( format == FORMAT_HTML ) {
		sb.safePrintf("</TD><TD valign=top>");
	}


	//
	// show stats
	//
	if ( format == FORMAT_HTML ) {

		char *seedStr = cr->m_diffbotSeeds.getBufStart();
		if ( ! seedStr ) seedStr = "";

		SafeBuf tmp;
		int32_t crawlStatus = -1;
		getSpiderStatusMsg ( cr , &tmp , &crawlStatus );
		CrawlInfo *ci = &cr->m_localCrawlInfo;
		int32_t sentAlert = (int32_t)ci->m_sentCrawlDoneAlert;
		if ( sentAlert ) sentAlert = 1;

		//sb.safePrintf(
		//	      "<form method=get action=/crawlbot>"
		//	      "%s"
		//	      , sb.getBufStart() // hidden input token/name/..
		//	      );

		char *hurts = "No";
		if ( cr->m_globalCrawlInfo.m_hasUrlsReadyToSpider )
			hurts = "Yes";

		sb.safePrintf(//"<TABLE border=0>"
			      //"<TR><TD valign=top>"

			      "<table id=stats border=0 cellpadding=5>"

			      "<tr>"
			      "<td><b>Crawl Status Code:</td>"
			      "<td>%" PRId32"</td>"
			      "</tr>"

			      "<tr>"
			      "<td><b>Crawl Status Msg:</td>"
			      "<td>%s</td>"
			      "</tr>"

			      //"<tr>"
			      //"<td><b>Rounds Completed:</td>"
			      //"<td>%" PRId32"</td>"
			      //"</tr>"

			      "<tr>"
			      "<td><b>Has Urls Ready to Spider:</td>"
			      "<td>%s</td>"
			      "</tr>"


			      // this will  have to be in crawlinfo too!
			      //"<tr>"
			      //"<td><b>pages indexed</b>"
			      //"<td>%" PRId64"</td>"
			      //"</tr>"

			      "<tr>"
			      "<td><b><nobr>URLs Harvested</b> "
			      "(may include dups)</nobr></td>"
			      "<td>%" PRId64"</td>"
     
			      "</tr>"

			      //"<tr>"
			      //"<td><b>URLs Examined</b></td>"
			      //"<td>%" PRId64"</td>"
			      //"</tr>"

			      "<tr>"
			      "<td><b>Page Crawl Attempts</b></td>"
			      "<td>%" PRId64"</td>"
			      "</tr>"

			      "<tr>"
			      "<td><b>Page Crawl Successes</b></td>"
			      "<td>%" PRId64"</td>"
			      "</tr>"
			      , crawlStatus
			      , tmp.getBufStart()
			      //, cr->m_spiderRoundNum
			      //, cr->m_globalCrawlInfo.m_hasUrlsReadyToSpider
			      , hurts

			      , cr->m_globalCrawlInfo.m_urlsHarvested
			      //, cr->m_globalCrawlInfo.m_urlsConsidered

			      , cr->m_globalCrawlInfo.m_pageDownloadAttempts
			      , cr->m_globalCrawlInfo.m_pageDownloadSuccesses
			      );


		//
		// begin status code breakdown
		//
		for ( int32_t i = 0 ; i < 65536 ; i++ ) {
			if ( g_stats.m_allErrorsNew[i] == 0 &&
			     g_stats.m_allErrorsOld[i] == 0 )
				continue;
			sb.safePrintf (
				       "<tr>"
				       "<td><b> &nbsp; <a href=/search?c=%s&q="
				       "gbstatusmsg%%3A"
				       "%%22"
				       ,
				       cr->m_coll );
			sb.urlEncode(mstrerror(i));
			sb.safePrintf ("%%22>"
				       "%s"
				       "</a>"
				       "</b></td>"
				       "<td>%" PRId64"</td>"
				       "</tr>\n" ,
				       mstrerror(i),
				       g_stats.m_allErrorsNew[i] +
				       g_stats.m_allErrorsOld[i] );
		}
		//
		// end status code breakdown
		//


		char tmp3[64];
		struct tm *timeStruct;
		time_t tt = (time_t)cr->m_diffbotCrawlStartTime;
		timeStruct = localtime(&tt);
		// Jan 01 1970 at 10:30:00
		strftime ( tmp3,64 , "%b %d %Y at %H:%M:%S",timeStruct);
		sb.safePrintf("<tr><td><b>Collection Created</b></td>"
			      "<td>%s (local time)</td></tr>",tmp3);



		
		// print link to embed the code in their own site
		SafeBuf embed;
		embed.htmlEncode(sb.getBufStart()+savedLen1,
				 savedLen2-savedLen1,
				 false); // encodePoundSign #?
		// convert all ''s to "'s for php's echo ''; cmd
		embed.replaceChar('\'','\"');

		sb.safePrintf("<tr>"
			      "<td valign=top>"
			      "<a onclick=\""
			      "var dd=document.getElementById('hcode');"
			      "if ( dd.style.display=='none' ) "
			      "dd.style.display=''; "
			      "else "
			      "dd.style.display='none';"
			      "\" style=color:blue;>"
			      "<u>"
			      "show Widget HTML code"
			      "</u>"
			      "</a>"
			      "</td><td>"
			      "<div id=hcode style=display:none;"
			      "max-width:800px;>"
			      "%s"
			      "</div>"
			      "</td></tr>"
			      , embed.getBufStart() );

		sb.safePrintf("<tr>"
			      "<td valign=top>"
			      "<a onclick=\""
			      "var dd=document.getElementById('pcode');"
			      "if ( dd.style.display=='none' ) "
			      "dd.style.display=''; "
			      "else "
			      "dd.style.display='none';"
			      "\" style=color:blue;>"
			      "<u>"
			      "show Widget PHP code"
			      "</u>"
			      "</a>"
			      "</td>"
			      "<td>"
			      "<div id=pcode style=display:none;"
			      "max-width:800px;>"
			      "<i>"
			      "echo '"
			      "%s"
			      "';"
			      "</i>"
			      "</div>"
			      "</td></tr>"
			      , embed.getBufStart() );


		sb.safePrintf("</table>\n\n");

	}

	// end the right table pane
	if ( format == FORMAT_HTML ) {
		sb.safePrintf("</TD></TR></TABLE>");
	}


	//if ( format != FORMAT_JSON )
	//	// wrap up the form, print a submit button
	//	g_pages.printAdminBottom ( &sb );

	return g_httpServer.sendDynamicPage (socket, 
					     sb.getBufStart(), 
					     sb.length(),
					     0); // cachetime
}
bool Msge0::sendMsg8a ( long i ) {
	// handle errors
	if ( g_errno && ! m_errno ) m_errno = g_errno;
	g_errno = 0;
	Msg8a  *m   = &m_msg8as[i];
	//TagRec *m = &m_tagRecs[i];
	// save state into Msg8a
	m->m_state2 =  this;
	m->m_state3 = (void *)i;

	// how big are all the tags we got for this url
	long need = sizeof(TagRec);
	// sanity check
	if ( need > SLAB_SIZE ) { char *xx=NULL;*xx=0; }
	// how much space left in the latest buffer
	if ( m_slabPtr + need > m_slabEnd ) {
		// inc the buffer number
		m_slabNum++;
		// allocate a new 8k buffer
		m_slab[m_slabNum] = (char *)mmalloc (SLAB_SIZE,"msgeslab");
		// failed?
		if ( ! m_slab[m_slabNum] ) {
			// do not free if null above
			m_slabNum--;
			// count as reply
			m_numReplies++;
			// make it available again
			m_used[i] = false;
			// record error
			if ( ! m_errno ) m_errno = g_errno;
			// error out
			log("msge0: slab alloc: %s",mstrerror(g_errno));
			return true;
		}
		// uh oh?
		if ( ! m_slab[m_slabNum] && m_errno == 0 ) 
			m_errno = g_errno;
		// set it (will be NULL if malloc failed)
		m_slabPtr = m_slab[m_slabNum];
		m_slabEnd = m_slabPtr + SLAB_SIZE;
	}
	// we are processing the nth url
	long n = m_ns[i];
	// now use it
	m_tagRecPtrs[n] = (TagRec *)m_slabPtr;
	// constructor
	m_tagRecPtrs[n]->constructor();
	// advance it
	m_slabPtr += sizeof(TagRec);

	// skip for debug
	//return doneSending(i);
	
	// . this now employs the tagdb filters table for lookups
	// . that is really a hack until we find a way to identify subsites
	//   on a domain automatically, like blogspot.com/users/harry/ is a 
	//   subsite.
	if ( ! m->getTagRec ( &m_urls[i]    ,
			      NULL, // sites[i] ,
			      m_collnum        ,
			      // if domain is banned, we will miss that here!
			      true          , // skip domain lookup?
			      m_niceness    ,
			      m             , // state
			      gotTagRecWrapper ,
			      m_tagRecPtrs[n]) )
		return false;
	return doneSending ( i );
}
void Scraper::gotPhrase ( ) {
	// error getting random phrase? bail!
	if ( g_errno ) log("scraper: got error getting random phrase: %s",
			   mstrerror(g_errno));

	CollectionRec *cr = g_collectiondb.getRec ( m_coll );

 loop:
	// what type of query should we do?
	m_qtype = rand() % 3;

	// make sure web, news, blog is enabled
	if ( m_qtype == 0 && ! cr->m_scrapingEnabledWeb   ) goto loop;
	if ( m_qtype == 1 && ! cr->m_scrapingEnabledNews  ) goto loop;
	if ( m_qtype == 2 && ! cr->m_scrapingEnabledBlogs ) goto loop;

	// scraping is off when repairing obviously
	if ( g_repairMode ) return;

	// get it
	char *s = g_wiki.m_randPhrase;
	// convert _'s to spaces
	for ( char *p = s ; *p ; p++ )
		if ( *p == '_' ) *p = ' ';
	// . url encode the random phrase
	// . truncate it to 200 bytes to keep things sane
	// . Wiki::doneReadingWiki() keeps it below 128 i think anyway
	char qe[400];
	urlEncode(qe, 200, s , gbstrlen(s) );
	char *end = qe + 390;

	// half the time append a random word from dictionary so that we 
	// discovery those tail-end sites better
	if ( m_qtype == 0 && (rand() % 2) ) { 
		// point into it for appending
		char *p = qe + gbstrlen(qe);
		// add a space, url encoded
		*p++ = '+';
		// append a random word to it from dictionary
		char *rw = g_speller.getRandomWord();
		// append that in
		urlEncode( p , end - p - 1 , rw , gbstrlen(rw) );
	}

	// make a query to scrape
	char buf[2048];

	char *uf ;
	if      ( m_qtype == 0 )
		uf="http://www.google.com/search?num=50&q=%s&scoring=d"
			"&filter=0";
	// google news query? sort by date.
	else if ( m_qtype == 1 )
		uf="http://news.google.com/news?num=50&q=%s&sort=n"
			"&filter=0";
	// google blog query?
	else if ( m_qtype == 2 ) 
		uf="http://www.google.com/blogsearch?num=50&q=%s&scoring=d"
			"&filter=0";
	// sanity check
	else { char *xx=NULL;*xx=0; }

	// make the url we will download
	sprintf ( buf , uf , qe );

	SpiderRequest sreq;
	// set the SpiderRequest
	strcpy(sreq.m_url, uf);
	// . tell it to only add the hosts of each outlink for now!
	// . that will be passed on to when XmlDoc calls Links::set() i guess
	// . xd will not reschedule the scraped url into spiderdb either
	sreq.m_isScraping = 1;
	sreq.m_fakeFirstIp = 1;
	long firstIp = hash32n(uf);
	if ( firstIp == 0 || firstIp == -1 ) firstIp = 1;
	sreq.m_firstIp = firstIp;
	// parent docid is 0
	sreq.setKey(firstIp,0LL,false);

	// forceDEl = false, niceness = 0
	m_xd.set4 ( &sreq , NULL , m_coll , NULL , 0 ); 

	//m_xd.m_isScraping = true;

	// download without throttling
	//m_xd.m_throttleDownload = false;

	// disregard this
	m_xd.m_useRobotsTxt = false;

	// call this when index completes
	m_xd.setCallback ( NULL , indexedDocWrapper );

	// assume it blocked
	m_numSent++;

	// scraper is special
	m_xd.m_usePosdb     = false;
	m_xd.m_useDatedb    = false;
	m_xd.m_useClusterdb = false;
	m_xd.m_useLinkdb    = false;
	m_xd.m_useSpiderdb  = true; // only this one i guess
	m_xd.m_useTitledb   = false;
	m_xd.m_useTagdb     = false;
	m_xd.m_usePlacedb   = false;
	//m_xd.m_useTimedb    = false;
	//m_xd.m_useSectiondb = false;
	//m_xd.m_useRevdb     = false;

	// . return false if this blocks
	// . will add the spider recs to spiderdb of the outlinks
	// . will add "ingoogle", etc. tags for each outlink
	if ( ! m_xd.indexDoc ( ) ) return ;

	// we didn't block
	indexedDoc ( );
}
// . return ptr to the buffer we serialize into
// . return NULL and set g_errno on error
bool Msg20Reply::sendReply ( XmlDoc *xd ) {

	// get it
	UdpSlot *slot = (UdpSlot *)xd->m_slot;

	if ( g_errno ) {
		// extract titleRec ptr
		log("query: Had error generating msg20 reply for d=%"INT64": "
		    "%s",xd->m_docId, mstrerror(g_errno));
		// don't forget to delete this list
	haderror:
		mdelete ( xd, sizeof(XmlDoc) , "Msg20" );
		delete ( xd );
		g_udpServer.sendErrorReply ( slot , g_errno ) ;
		return true;
	}

	// now create a buffer to store title/summary/url/docLen and send back
	int32_t  need = getStoredSize();
	char *buf  = (char *)mmalloc ( need , "Msg20Reply" );
	if ( ! buf ) goto haderror;

	// should never have an error!
	int32_t used = serialize ( buf , need );

	// sanity
	if ( used != need ) { char *xx=NULL;*xx=0; }

	// sanity check, no, might have been banned/filtered above around
	// line 956 and just called sendReply directly
	//if ( st->m_memUsed == 0 ) { char *xx=NULL;*xx=0; }

	// use blue for our color
	int32_t color = 0x0000ff;
	// but use dark blue for niceness > 0
	if ( xd->m_niceness > 0 ) color = 0x0000b0;

	//Msg20Reply *tt = (Msg20Reply *)buf;

	// sanity check
	if ( ! xd->m_utf8ContentValid ) { char *xx=NULL;*xx=0; }
	// for records
	int32_t clen = 0;
	if ( xd->m_utf8ContentValid ) clen = xd->size_utf8Content - 1;
	// show it in performance graph
	if ( xd->m_startTimeValid ) 
		g_stats.addStat_r ( clen                         ,
				    xd->m_startTime              , 
				    gettimeofdayInMilliseconds() ,
				    color                        );
	
	
	// . del the list at this point, we've copied all the data into reply
	// . this will free a non-null State20::m_ps (ParseState) for us
	mdelete ( xd , sizeof(XmlDoc) , "xd20" );
	delete ( xd );
	
	g_udpServer.sendReply_ass ( buf , need , buf , need , slot );

	return true;
}