// g_errno may be set when this is called
void addedList ( UdpSlot *slot , Rdb *rdb ) {
	// no memory means to try again
	if ( g_errno == ENOMEM ) g_errno = ETRYAGAIN;
	// doing a full rebuid will add collections
	if ( g_errno == ENOCOLLREC &&
	     g_repairMode > 0 )
	     //g_repair.m_fullRebuild )
		g_errno = ETRYAGAIN;

	// it seems like someone can delete a collection and there can
	// be adds in transit to doledb and it logs
	// "doledb bad collnum of 30110"
	// so just absorb those
	if ( g_errno == ENOCOLLREC ) {
		log("msg1: missing collrec to add to to %s. just dropping.",
		g_errno = 0;

	// . if we got a ETRYAGAIN cuz the buffer we add to was full
	//   then we should sleep and try again!
	// . return false cuz this blocks for a period of time
	//   before trying again
	// . but now to free the udp slot when we are doing an urgent merge
	//   let's send an error back!
	//if ( g_errno == ETRYAGAIN ) {
		// debug msg
		// try adding again in 1 second
	//	g_loop.registerSleepCallback ( 1000, slot, tryAgainWrapper );
		// return now
	//	return;
	// random test
	//if ( (rand() % 10) == 1 ) g_errno = ETRYAGAIN;
	//int32_t niceness = slot->getNiceness() ;
	// select udp server based on niceness
	UdpServer *us = &g_udpServer ;
	//if ( niceness == 0 ) us = &g_udpServer2;
	//else                 us = &g_udpServer ;
	// chalk it up
	rdb->sentReplyAdd ( 0 );
	// are we done
	if ( ! g_errno ) {
		// . send an empty (non-error) reply as verification
		// . slot should be auto-nuked on transmission/timeout of reply
		// . udpServer should free the readBuf
		us->sendReply_ass ( NULL , 0 , NULL , 0 , slot ) ;
	// on other errors just send the err code back
	log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. error=%s", __FILE__, __func__, __LINE__, mstrerror(g_errno));
	us->sendErrorReply ( slot , g_errno );
// this must always be called sometime AFTER handleRequest() is called
void sendReply ( UdpSlot *slot , Msg39 *msg39 , char *reply , int32_t replyLen ,
		 int32_t replyMaxSize , bool hadError ) {
	// debug msg
	if ( g_conf.m_logDebugQuery || (msg39&&msg39->m_debug) ) 
		logf(LOG_DEBUG,"query: msg39: [%"PTRFMT"] "
		     "Sending reply len=%"INT32".",

	// sanity
	if ( hadError && ! g_errno ) { char *xx=NULL;*xx=0; }

	// no longer in use. msg39 will be NULL if ENOMEM or something
	if ( msg39 ) msg39->m_inUse = false;

	// . if we enter from a local call and not from handling a udp slot
	//   then execute this logic here to return control to caller.
	// . do not delete ourselves because we will be re-used probably and
	//   caller handles that now.
	if ( msg39 && msg39->m_callback ) {
		// if we blocked call user callback
		if ( msg39->m_blocked ) msg39->m_callback ( msg39->m_state );
		// if not sending back a udp reply, return now

	// . now we can free the lists before sending
	// . may help a little bit...
	//if ( msg39 ) {
	//	for ( int32_t j = 0 ; j < msg39->m_msg2.m_numLists ; j++ ) 
	//		msg39->m_lists[j].freeList();
	// get the appropriate UdpServer for this niceness level
	UdpServer *us = &g_udpServer;
	// i guess clear this
	int32_t err = g_errno;
	g_errno = 0;
	// send an error reply if g_errno is set
	if ( err ) us->sendErrorReply ( slot , err ) ; 
	else       us->sendReply_ass ( reply    , 
				       replyLen , 
				       reply    , 
				       replyMaxSize , 
				       slot     );
	// always delete ourselves when done handling the request
	if ( msg39 ) {
		mdelete ( msg39 , sizeof(Msg39) , "Msg39" );
		delete (msg39);
void handleRequest12 ( UdpSlot *udpSlot , int32_t niceness ) {
	// get request
	char *request = udpSlot->m_readBuf;
	int32_t  reqSize = udpSlot->m_readBufSize;
	// shortcut
	UdpServer *us = &g_udpServer;
	// breathe
	QUICKPOLL ( niceness );

	// shortcut
	char *reply = udpSlot->m_tmpBuf;

	// . is it confirming that he got all the locks?
	// . if so, remove the doledb record and dock the doleiptable count
	//   before adding a waiting tree entry to re-pop the doledb record
	if ( reqSize == sizeof(ConfirmRequest) ) {
		char *msg = NULL;
		ConfirmRequest *cq = (ConfirmRequest *)request;

		// confirm the lock
		HashTableX *ht = &g_spiderLoop.m_lockTable;
		int32_t slot = ht->getSlot ( &cq->m_lockKeyUh48 );
		if ( slot < 0 ) { 
			log("spider: got a confirm request for a key not "
			    "in the table! coll must have been deleted "
			    " or reset "
			    "while lock request was outstanding.");
			g_errno = EBADENGINEER;
			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
			us->sendErrorReply ( udpSlot , g_errno );
			//char *xx=NULL;*xx=0; }
		UrlLock *lock = (UrlLock *)ht->getValueFromSlot ( slot );
		lock->m_confirmed = true;

		// note that
		if ( g_conf.m_logDebugSpider ) // Wait )
			log("spider: got confirm lock request for ip=%s",

		// get it
		SpiderColl *sc = g_spiderCache.getSpiderColl(cq->m_collnum);
		// make it negative
		cq->m_doledbKey.n0 &= 0xfffffffffffffffeLL;
		// and add the negative rec to doledb (deletion operation)
		Rdb *rdb = &g_doledb.m_rdb;
		if ( ! rdb->addRecord ( cq->m_collnum,
					(char *)&cq->m_doledbKey,
					NULL , // data
					0    , //dataSize
					1 )){ // niceness
			// tree is dumping or something, probably ETRYAGAIN
			if ( g_errno != ETRYAGAIN ) {msg = "error adding neg rec to doledb";	log("spider: %s %s",msg,mstrerror(g_errno));
			//char *xx=NULL;*xx=0;
			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
			us->sendErrorReply ( udpSlot , g_errno );
		// now remove from doleiptable since we removed from doledb
		if ( sc ) sc->removeFromDoledbTable ( cq->m_firstIp );

		// how many spiders outstanding for this coll and IP?
		//int32_t out=g_spiderLoop.getNumSpidersOutPerIp ( cq->m_firstIp);

		// DO NOT add back to waiting tree if max spiders
		// out per ip was 1 OR there was a crawldelay. but better
		// yet, take care of that in the winReq code above.

		// . now add to waiting tree so we add another spiderdb
		//   record for this firstip to doledb
		// . true = callForScan
		// . do not add to waiting tree if we have enough outstanding
		//   spiders for this ip. we will add to waiting tree when
		//   we receive a SpiderReply in addSpiderReply()
		if ( sc && //out < cq->m_maxSpidersOutPerIp &&
		     // this will just return true if we are not the 
		     // responsible host for this firstip
		    // DO NOT populate from this!!! say "false" here...
		     ! sc->addToWaitingTree ( 0 , cq->m_firstIp, false ) &&
		     // must be an error...
		     g_errno ) {
			log("spider: %s %s",msg,mstrerror(g_errno));
			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
			us->sendErrorReply ( udpSlot , g_errno );
		// success!!
		reply[0] = 1;
		us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );

	// sanity check
	if ( reqSize != sizeof(LockRequest) ) {
		log("spider: bad msg12 request size of %" PRId32,reqSize);
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( udpSlot , EBADREQUEST );
	// deny it if we are not synced yet! otherwise we core in 
	// getTimeGlobal() below
	if ( ! isClockInSync() ) { 
		// log it so we can debug it
		//log("spider: clock not in sync with host #0. so "
		//    "returning etryagain for lock reply");
		// let admin know why we are not spidering
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( udpSlot , ETRYAGAIN );

	LockRequest *lr = (LockRequest *)request;
	//uint64_t lockKey = *(int64_t *)request;
	//int32_t lockSequence = *(int32_t *)(request+8);
	// is this a remove operation? assume not
	//bool remove = false;
	// get top bit
	//if ( lockKey & 0x8000000000000000LL ) remove = true;

	// mask it out
	//lockKey &= 0x7fffffffffffffffLL;
	// sanity check, just 6 bytes! (48 bits)
	if ( lr->m_lockKeyUh48 &0xffff000000000000LL ) { char *xx=NULL;*xx=0; }
	// note it
	if ( g_conf.m_logDebugSpider )
		log("spider: got msg12 request uh48=%" PRId64" remove=%" PRId32,
		    lr->m_lockKeyUh48, (int32_t)lr->m_removeLock);
	// get time
	int32_t nowGlobal = getTimeGlobal();
	// shortcut
	HashTableX *ht = &g_spiderLoop.m_lockTable;

	int32_t hostId = g_hostdb.getHostId ( udpSlot->m_ip , udpSlot->m_port );
	// this must be legit - sanity check
	if ( hostId < 0 ) { char *xx=NULL;*xx=0; }

	// remove expired locks from locktable
	removeExpiredLocks ( hostId );

	int64_t lockKey = lr->m_lockKeyUh48;

	// check tree
	int32_t slot = ht->getSlot ( &lockKey ); // lr->m_lockKeyUh48 );
	// put it here
	UrlLock *lock = NULL;
	// if there say no no
	if ( slot >= 0 ) lock = (UrlLock *)ht->getValueFromSlot ( slot );

	// if doing a remove operation and that was our hostid then unlock it
	if ( lr->m_removeLock && 
	     lock && 
	     lock->m_hostId == hostId &&
	     lock->m_lockSequence == lr->m_lockSequence ) {
		// note it for now
		if ( g_conf.m_logDebugSpider )
			log("spider: removing lock for lockkey=%" PRIu64" hid=%" PRId32,
		// unlock it
		ht->removeSlot ( slot );
		// it is gone
		lock = NULL;
	// ok, at this point all remove ops return
	if ( lr->m_removeLock ) {
		reply[0] = 1;
		us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );

	// add new lock

	// if lock > 1 hour old then remove it automatically!!
	if ( lock && nowGlobal - lock->m_timestamp > MAX_LOCK_AGE ) {
		// note it for now
		log("spider: removing lock after %" PRId32" seconds "
		    "for lockKey=%" PRIu64" hid=%" PRId32,
		    (nowGlobal - lock->m_timestamp),
		// unlock it
		ht->removeSlot ( slot );
		// it is gone
		lock = NULL;
	// if lock still there, do not grant another lock
	if ( lock ) {
		// note it for now
		if ( g_conf.m_logDebugSpider )
			log("spider: refusing lock for lockkey=%" PRIu64" hid=%" PRId32,
		reply[0] = 0;
		us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
	// make the new lock
	UrlLock tmp;
	tmp.m_hostId       = hostId;
	tmp.m_lockSequence = lr->m_lockSequence;
	tmp.m_timestamp    = nowGlobal;
	tmp.m_expires      = 0;
	tmp.m_firstIp      = lr->m_firstIp;
	tmp.m_collnum      = lr->m_collnum;

	// when the spider returns we remove its lock on reception of the
	// spiderReply, however, we actually just set the m_expires time
	// to 5 seconds into the future in case there is a current request
	// to get a lock for that url in progress. but, we do need to
	// indicate that the spider has indeed completed by setting
	// m_spiderOutstanding to true. this way, addToWaitingTree() will
	// not count it towards a "max spiders per IP" quota when deciding
	// on if it should add a new entry for this IP.
	tmp.m_spiderOutstanding = true;
	// this is set when all hosts in the group (shard) have granted the
	// lock and the host sends out a confirmLockAcquisition() request.
	// until then we do not know if the lock will be granted by all hosts
	// in the group (shard)
	tmp.m_confirmed    = false;

	// put it into the table
	if ( ! ht->addKey ( &lockKey , &tmp ) ) {
		// return error if that failed!
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( udpSlot , g_errno );
	// note it for now
	if ( g_conf.m_logDebugSpider )
		log("spider: granting lock for lockKey=%" PRIu64" hid=%" PRId32,
	// grant the lock
	reply[0] = 1;
	us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
void gotTitleList ( void *state , RdbList *list , Msg5 *msg5 ) {

	State22 *st = (State22 *)state;
	// if niceness is 0, use the higher priority udpServer
	UdpServer *us = &g_udpServer;
	// shortcut
	Msg22Request *r = st->m_r;
	// breathe

	// send error reply on error
	if ( g_errno ) { 
		log("db: Had error getting title record from titledb: %s.",
		if ( ! g_errno ) { char *xx=NULL;*xx=0; }
		us->sendErrorReply ( st->m_slot , g_errno ); 
		mdelete ( st , sizeof(State22) , "Msg22" );
		delete ( st ); 
		return ;

	// convenience var
	RdbList *tlist = &st->m_tlist;

	// set probable docid
	long long pd = 0LL;
	if ( r->m_url[0] ) {
		pd = g_titledb.getProbableDocId(r->m_url);
		if ( pd != st->m_pd ) { 
			log("db: crap probable docids do not match! u=%s",
			g_errno = EBADENGINEER;
			goto hadError;
		// sanity
		//if ( pd != st->m_pd ) { char *xx=NULL;*xx=0; }

	// the probable docid is the PREFERRED docid in this case
	if ( r->m_getAvailDocIdOnly ) pd = st->m_r->m_docId;

	// . these are both meant to be available docids
	// . if ad2 gets exhausted we use ad1
	long long ad1 = st->m_docId1;
	long long ad2 = pd;

	bool docIdWasFound = false;

	// scan the titleRecs in the list
	for ( ; ! tlist->isExhausted() ; tlist->skipCurrentRecord ( ) ) {
		// breathe
		QUICKPOLL ( r->m_niceness );
		// get the rec
		char *rec     = tlist->getCurrentRec();
		long  recSize = tlist->getCurrentRecSize();
		// get that key
		key_t *k = (key_t *)rec;
		// skip negative recs, first one should not be negative however
		if ( ( k->n0 & 0x01 ) == 0x00 ) continue;

		// get docid of that titlerec
		long long dd = g_titledb.getDocId(k);

		if ( r->m_getAvailDocIdOnly ) {
			// make sure our available docids are availble!
			if ( dd == ad1 ) ad1++;
			if ( dd == ad2 ) ad2++;
		// if we had a url make sure uh48 matches
		else if ( r->m_url[0] ) {
			// get it
			long long uh48 = g_titledb.getUrlHash48(k);
			// sanity check
			if ( st->m_uh48 == 0 ) { char *xx=NULL;*xx=0; }
			// make sure our available docids are availble!
			if ( dd == ad1 ) ad1++;
			if ( dd == ad2 ) ad2++;
			// we must match this exactly
			if ( uh48 != st->m_uh48 ) continue;
		// otherwise, check docid
		else {
			// compare that
			if ( r->m_docId != dd ) continue;

		// flag that we matched m_docId
		docIdWasFound = true;

		// do not set back titlerec if just want avail docid
		//if ( r->m_getAvailDocIdOnly ) continue;

		// ok, if just "checking tfndb" no need to go further
		if ( r->m_justCheckTfndb ) {
			// send back a good reply (empty means found!)
			us->sendReply_ass ( NULL,0,NULL,0,st->m_slot);
			// don't forget to free the state
			mdelete ( st , sizeof(State22) , "Msg22" );
			delete ( st );

		// use rec as reply
		char *reply = rec;

		// . send this rec back, it's a match
		// . if only one rec in list, steal the list's memory
		if ( recSize != tlist->getAllocSize() ) {
			// otherwise, alloc space for the reply
			reply = (char *)mmalloc (recSize, "Msg22");
			if ( ! reply ) goto hadError;
			memcpy ( reply , rec , recSize );
		// otherwise we send back the whole list!
		else {
			// we stole this from list
			tlist->m_ownData = false;
		// off ya go
		// don't forget to free the state
		mdelete ( st , sizeof(State22) , "Msg22" );
		delete ( st );
		// all done

	// maybe no available docid if we breached our range
	if ( ad1 >= pd           ) ad1 = 0LL;
	if ( ad2 >  st->m_docId2 ) ad2 = 0LL;
	// get best
	long long ad = ad2;
	// but wrap around if we need to
	if ( ad == 0LL ) ad = ad1;
	// if "docId" was unmatched that should be the preferred available
	// docid then...
	//if(! docIdWasFound && r->m_getAvailDocIdOnly && ad != r->m_docId ) { 
	//	char *xx=NULL;*xx=0; }
	// remember it. this might be zero if none exist!
	st->m_availDocId = ad;
	// note it
	if ( ad == 0LL && (r->m_getAvailDocIdOnly || r->m_url[0]) ) 
		log("msg22: avail docid is 0 for pd=%lli!",pd);

	// . ok, return an available docid
	if ( r->m_url[0] || r->m_justCheckTfndb || r->m_getAvailDocIdOnly ) {
		// store docid in reply
		char *p = st->m_slot->m_tmpBuf;
		// send back the available docid
		*(long long *)p = st->m_availDocId;
		// send it
		us->sendReply_ass ( p , 8 , p , 8 , st->m_slot );
		// don't forget to free state
		mdelete ( st , sizeof(State22) , "Msg22" );
		delete ( st );

	// not found! and it was a docid based request...
	log("msg22: could not find title rec for docid %llu",r->m_docId);
	g_errno = ENOTFOUND;
	goto hadError;