// this must always be called sometime AFTER handleRequest() is called
void sendReply ( UdpSlot *slot , Msg39 *msg39 , char *reply , long replyLen ,
		 long replyMaxSize , bool hadError ) {
	// debug msg
	if ( g_conf.m_logDebugQuery || (msg39&&msg39->m_debug) ) 
		logf(LOG_DEBUG,"query: msg39: [%lu] Sending reply len=%li.",
		     (long)msg39,replyLen);

	// sanity
	if ( hadError && ! g_errno ) { char *xx=NULL;*xx=0; }

	// no longer in use. msg39 will be NULL if ENOMEM or something
	if ( msg39 ) msg39->m_inUse = false;

	// . if we enter from a local call and not from handling a udp slot
	//   then execute this logic here to return control to caller.
	// . do not delete ourselves because we will be re-used probably and
	//   caller handles that now.
	if ( msg39 && msg39->m_callback ) {
		// if we blocked call user callback
		if ( msg39->m_blocked ) msg39->m_callback ( msg39->m_state );
		// if not sending back a udp reply, return now
		return;
	}

	// . now we can free the lists before sending
	// . may help a little bit...
	//if ( msg39 ) {
	//	for ( long j = 0 ; j < msg39->m_msg2.m_numLists ; j++ ) 
	//		msg39->m_lists[j].freeList();
	//}
	// get the appropriate UdpServer for this niceness level
	UdpServer *us = &g_udpServer;
	// i guess clear this
	long err = g_errno;
	g_errno = 0;
	// send an error reply if g_errno is set
	if ( err ) us->sendErrorReply ( slot , err ) ; 
	else       us->sendReply_ass ( reply    , 
				       replyLen , 
				       reply    , 
				       replyMaxSize , 
				       slot     );
	// always delete ourselves when done handling the request
	if ( msg39 ) {
		mdelete ( msg39 , sizeof(Msg39) , "Msg39" );
		delete (msg39);
	}
}
Example #2
0
UINT __stdcall UdpServerForIpWorker(PVOID context)
{
	UdpServer server;
	if (server.Init("127.0.0.1", (int)context)) {
		std::vector<UCHAR> buffer;
		buffer.resize(0x1000);
		for (;;) {
			sockaddr_in addr;
			int recv_length = server.Recv((char *)buffer.data(), buffer.size(), &addr);
			if (recv_length == -1) {
				break;
			}

			server.Send("OK", 3, &addr);
		}
	}

	return 0;
}
// g_errno may be set when this is called
void addedList ( UdpSlot *slot , Rdb *rdb ) {
	// no memory means to try again
	if ( g_errno == ENOMEM ) g_errno = ETRYAGAIN;
	// doing a full rebuid will add collections
	if ( g_errno == ENOCOLLREC &&
	     g_repairMode > 0 )
	     //g_repair.m_fullRebuild )
		g_errno = ETRYAGAIN;
	// . if we got a ETRYAGAIN cuz the buffer we add to was full
	//   then we should sleep and try again!
	// . return false cuz this blocks for a period of time
	//   before trying again
	// . but now to free the udp slot when we are doing an urgent merge
	//   let's send an error back!
	//if ( g_errno == ETRYAGAIN ) {
		// debug msg
		//log("REGISTERING SLEEP CALLBACK");
		// try adding again in 1 second
	//	g_loop.registerSleepCallback ( 1000, slot, tryAgainWrapper );
		// return now
	//	return;
	//}
	// random test
	//if ( (rand() % 10) == 1 ) g_errno = ETRYAGAIN;
	//long niceness = slot->getNiceness() ;
	// select udp server based on niceness
	UdpServer *us = &g_udpServer ;
	//if ( niceness == 0 ) us = &g_udpServer2;
	//else                 us = &g_udpServer ;
	// chalk it up
	rdb->sentReplyAdd ( 0 );
	// are we done
	if ( ! g_errno ) {
		// . send an empty (non-error) reply as verification
		// . slot should be auto-nuked on transmission/timeout of reply
		// . udpServer should free the readBuf
		us->sendReply_ass ( NULL , 0 , NULL , 0 , slot ) ;
		return;
	}
	// on other errors just send the err code back
	us->sendErrorReply ( slot , g_errno );
}
Example #4
0
void* UdpServer::startThread(void *obj)
{
	UdpServer *instance = reinterpret_cast<UdpServer *>(obj);

	socklen_t londesde;
	londesde = sizeof(sockaddr_in);

	while(true)
	{
		int size;
		londesde = sizeof(sockaddr_in);

		(instance->getUDPRespSock())->sin_port = htons (instance->getUDPPort());

		//cerr<<"UDPSERV: listening"<<endl;

		if ( (size = recvfrom(instance->getUDPSocket(), instance->getUDPBuffer(), MAXMSGSIZE, 0, (sockaddr *)instance->getUDPRespSock(), &londesde))  < 0 ) {
			cerr<<"UDPSERV: Error calling recvfrom()"<<endl;
			return NULL;
		}

		if(size>0)
			instance->UDPmanageRecv(size);
	}
	
}
// . did we receive a checkoff request from a fellow twin?
// . request is a list of checkoff request keys ("a" keys)
void handleRequest5d ( UdpSlot *slot , long netnice ) {
	// get the sending hostid
	long sid = slot->m_hostId;
	// sanity check
	if ( sid < 0 ) { char *xx=NULL; *xx=0; }
	// get the request buffer
	//key128_t *keys = (key128_t *)slot->m_readBuf;
	long      nk   = slot->m_readBufSize / 16;
	// shortcut
	UdpServer *us = &g_udpServer;
	// if tree gets full, then return false forever
	if ( ! g_syncdb.m_qt.hasRoomForKeys ( nk ) ) { 
		us->sendErrorReply ( slot , ETRYAGAIN );
		return; 
	}
	for ( long i = 0 ; i < nk ; i++ ) {
		// get the key
		key128_t k = g_syncdb.m_keys[i];
		// sanity check. must be a negative key.
		if ( (k.n0 & 0x1) != 0x0 ) { char *xx=NULL;*xx=0; }
		// get the anti key. the "need to recv checkoff request"
		// key which is the positive
		key128_t pk = k;
		// make it positive
		pk.n0 |= 0x01;
		// is it in there?
		long nn = g_syncdb.m_qt.getNode ( 0 , (char *)&pk );
		// if yes, nuke it. they annihilate.
		if ( nn >= 0 ) {
			g_syncdb.m_qt.deleteNode ( nn , true );
			continue;
		}
		// . otherwise, add right to the tree
		// . should always succeed!
		if ( g_syncdb.m_qt.addKey(&k)<0) { char *xx=NULL;*xx=0; }
	}
	// return empty reply to mean success
	us->sendReply_ass ( NULL , 0 , NULL , 0 , slot );
}
Example #6
0
void * UdpServer::OperatorThread(void * pParam)
{
    if(!pParam)
    {
        return 0;
    }

    UdpServer * pThis = (UdpServer*)pParam;
    pthread_mutex_lock(&pThis->mMutex);
    MsgInfo data;
    if(pThis->mDataList.size() > 0)
    {
        data = pThis->mDataList.front();
        pThis->mDataList.pop_front();
        if(pThis->mOperaFunc)
        {
            pThis->mOperaFunc(data.Content, data.Fd, data.Time);
        }
        pThis->OnReceiveMessage(data.Content, data.Fd, data.Time);
    }
    pthread_mutex_unlock(&pThis->mMutex);
    return 0;
}
// . THIS Msg0 class must be alloc'd, i.e. not on the stack, etc.
// . if list is stored locally this tries to get it locally
// . otherwise tries to get the list from the network
// . returns false if blocked, true otherwise
// . sets g_errno on error
// . NOTE: i was having problems with queries being cached too long, you
//   see the cache here is a NETWORK cache, so when the machines that owns
//   the list updates it on disk it can't flush our cache... so use a small
//   maxCacheAge of like , 30 seconds or so...
bool Msg0::getList ( int64_t hostId      , // host to ask (-1 if none)
		     int32_t      ip          , // info on hostId
		     int16_t     port        ,
		     int32_t      maxCacheAge , // max cached age in seconds
		     bool      addToCache  , // add net recv'd list to cache?
		     char      rdbId       , // specifies the rdb
		     //char     *coll        ,
		     collnum_t collnum ,
		     RdbList  *list        ,
		     //key_t     startKey    , 
		     //key_t     endKey      , 
		     char     *startKey    ,
		     char     *endKey      ,
		     int32_t      minRecSizes ,  // use -1 for no max
		     void     *state       ,
		     void    (* callback)(void *state ),//, RdbList *list ) ,
		     int32_t      niceness    ,
		     bool      doErrorCorrection ,
		     bool      includeTree ,
		     bool      doMerge     ,
		     int32_t      firstHostId   ,
		     int32_t      startFileNum  ,
		     int32_t      numFiles      ,
		     int32_t      timeout       ,
		     int64_t syncPoint     ,
		     int32_t      preferLocalReads ,
		     Msg5     *msg5             ,
		     Msg5     *msg5b            ,
		     bool      isRealMerge      ,
//#ifdef SPLIT_INDEXDB
		     bool      allowPageCache    ,
		     bool      forceLocalIndexdb ,
		     bool      noSplit , // doIndexdbSplit    ,
		     int32_t      forceParitySplit  ) {
//#else
//		     bool      allowPageCache ) {
//#endif
	// this is obsolete! mostly, but we need it for PageIndexdb.cpp to 
	// show a "termlist" for a given query term in its entirety so you 
	// don't have to check each machine in the network. if this is true it
	// means to query each split and merge the results together into a
	// single unified termlist. only applies to indexdb/datedb.
	//if ( doIndexdbSplit ) { char *xx = NULL; *xx = 0; }
	// note this because if caller is wrong it hurts performance major!!
	//if ( doIndexdbSplit ) 
	//	logf(LOG_DEBUG,"net: doing msg0 with indexdb split true");
	// warning
	if ( collnum < 0 ) log(LOG_LOGIC,"net: NULL collection. msg0.");

	//if ( doIndexdbSplit ) { char *xx=NULL;*xx=0; }

	// reset the list they passed us
	list->reset();
	// get keySize of rdb
	m_ks = getKeySizeFromRdbId ( rdbId );
	// if startKey > endKey, don't read anything
	//if ( startKey > endKey ) return true;
	if ( KEYCMP(startKey,endKey,m_ks)>0 ) { char *xx=NULL;*xx=0; }//rettrue
	// . reset hostid if it is dead
	// . this is causing UOR queries to take forever when we have a dead
	if ( hostId >= 0 && g_hostdb.isDead ( hostId ) ) hostId = -1;
	// no longer accept negative minrecsize
	if ( minRecSizes < 0 ) {
		g_errno = EBADENGINEER;
		log(LOG_LOGIC,
		    "net: msg0: Negative minRecSizes no longer supported.");
		char *xx=NULL;*xx=0;
		return true;
	}

	// debug msg
	//if ( niceness != 0 ) log("HEY start");
	// ensure startKey last bit clear, endKey last bit set
	//if ( (startKey.n0 & 0x01) == 0x01 ) 
	//	log("Msg0::getList: warning startKey lastbit set"); 
	//if ( (endKey.n0   & 0x01) == 0x00 ) 
	//	log("Msg0::getList: warning endKey lastbit clear"); 
	// remember these
	m_state         = state;
	m_callback      = callback;
	m_list          = list;
	m_hostId        = hostId;
	m_niceness      = niceness;
	//m_ip            = ip;
	//m_port          = port;
	m_addToCache    = addToCache;
	// . these define our request 100%
	//m_startKey      = startKey;
	//m_endKey        = endKey;
	KEYSET(m_startKey,startKey,m_ks);
	KEYSET(m_endKey,endKey,m_ks);
	m_minRecSizes   = minRecSizes;
	m_rdbId         = rdbId;
	m_collnum = collnum;//          = coll;
	m_isRealMerge   = isRealMerge;
	m_allowPageCache = allowPageCache;

	// . group to ask is based on the first key 
	// . we only do 1 group per call right now
	// . groupMask must turn on higher bits first (count downwards kinda)
	// . titledb and spiderdb use special masks to get groupId

	// if diffbot.cpp is reading spiderdb from each shard we have to
	// get groupid from hostid here lest we core in getGroupId() below.
	// it does that for dumping spiderdb to the client browser. they
	// can download the whole enchilada.
	if ( hostId >= 0 && m_rdbId == RDB_SPIDERDB )
		m_shardNum = 0;
	// did they force it? core until i figure out what this is
	else if ( forceParitySplit >= 0 ) 
		//m_groupId =  g_hostdb.getGroupId ( forceParitySplit );
		m_shardNum = forceParitySplit;
	else
		//m_groupId = getGroupId ( m_rdbId , startKey , ! noSplit );
		m_shardNum = getShardNum ( m_rdbId , startKey );

	// if we are looking up a termlist in posdb that is split by termid and
	// not the usual docid then we have to set this posdb key bit that tells
	// us that ...
	if ( noSplit && m_rdbId == RDB_POSDB )
		m_shardNum = g_hostdb.getShardNumByTermId ( startKey );

	// how is this used?
	//if ( forceLocalIndexdb ) m_groupId = g_hostdb.m_groupId;
	if ( forceLocalIndexdb ) m_shardNum = getMyShardNum();

	// . store these parameters
	// . get a handle to the rdb in case we can satisfy locally
	// . returns NULL and sets g_errno on error
	QUICKPOLL((m_niceness));
	Rdb *rdb = getRdbFromId ( m_rdbId );
	if ( ! rdb ) return true;
	// we need the fixedDataSize
	m_fixedDataSize = rdb->getFixedDataSize();
	m_useHalfKeys   = rdb->useHalfKeys();
	// . debug msg
	// . Msg2 does this when checking for a cached compound list.
	//   compound lists do not actually exist, they are merges of smaller
	//   UOR'd lists.
	if ( maxCacheAge != 0 && ! addToCache && (numFiles > 0 || includeTree))
		log(LOG_LOGIC,"net: msg0: "
		    "Weird. check but don't add... rdbid=%"INT32".",(int32_t)m_rdbId);
	// set this here since we may not call msg5 if list not local
	//m_list->setFixedDataSize ( m_fixedDataSize );

	// . now that we do load balancing we don't want to do a disk lookup
	//   even if local if we are merging or dumping
	// . UNLESS g_conf.m_preferLocalReads is true
	if ( preferLocalReads == -1 ) 
		preferLocalReads = g_conf.m_preferLocalReads;

	// . always prefer local for full split clusterdb
	// . and keep the tfndb/titledb lookups in the same stripe
	// . so basically we can't do biased caches if fully split
	//if ( g_conf.m_fullSplit ) preferLocalReads = true;
	preferLocalReads = true;

	// it it stored locally?
	bool isLocal = ( m_hostId == -1 && //g_hostdb.m_groupId == m_groupId );
			 m_shardNum == getMyShardNum() );
	// only do local lookups if this is true
	if ( ! preferLocalReads ) isLocal = false;

	/*
	m_numSplit = 1;
	if ( g_hostdb.m_indexSplits > 1 &&
	     ( rdbId == RDB_POSDB || rdbId==RDB_DATEDB)&&
	     ! forceLocalIndexdb && doIndexdbSplit ) {
		isLocal  = false;
		//m_numSplit = INDEXDB_SPLIT;
		m_numSplit = g_hostdb.m_indexSplits;
		char *xx=NULL;*xx=0;
	}
	*/
	/*
	int64_t singleDocIdQuery = 0LL;
	if ( rdbId == RDB_POSDB ) {
		int64_t d1 = g_posdb.getDocId(m_startKey);
		int64_t d2 = g_posdb.getDocId(m_endKey);
		if ( d1+1 == d2 ) singleDocIdQuery = d1;
	}

	// . try the LOCAL termlist cache
	// . so when msg2 is evaluating a gbdocid:| query and it has to
	//   use msg0 to go across the network to get the same damn termlist
	//   over and over again for the same docid, this will help alot.
	// . ideally it'd be nice if the seo pipe in xmldoc.cpp can try to
	//   send the same gbdocid:xxxx docids to the same hosts. maybe hash
	//   based on docid into the list of hosts and if that host is busy
	//   just chain until we find someone not busy.
	if ( singleDocIdQuery &&
	     getListFromTermListCache ( coll,
					m_startKey,
					m_endKey,
					maxCacheAge,
					list ) )
		// found!
		return true;
	*/

	// but always local if only one host
	if ( g_hostdb.getNumHosts() == 1 ) isLocal = true;

	// force a msg0 if doing a docid restrictive query like
	// gbdocid:xxxx|<query> so we call cacheTermLists() 
	//if ( singleDocIdQuery ) isLocal = false;

	// . if the group is local then do it locally
	// . Msg5::getList() returns false if blocked, true otherwise
	// . Msg5::getList() sets g_errno on error
	// . don't do this if m_hostId was specified
	if ( isLocal ) { // && !g_conf.m_interfaceMachine ) {
		if ( msg5 ) {
			m_msg5 = msg5;
			m_deleteMsg5 = false;
		}
		else {
			try { m_msg5 = new ( Msg5 ); } 
			catch ( ... ) {
				g_errno = ENOMEM;
				log("net: Local alloc for disk read failed "
				    "while tring to read data for %s. "
				    "Trying remote request.",
				    getDbnameFromId(m_rdbId));
				goto skip;
			}
			mnew ( m_msg5 , sizeof(Msg5) , "Msg0" );
			m_deleteMsg5 = true;
		}

		QUICKPOLL(m_niceness);
		// same for msg5b
		if ( msg5b ) {
			m_msg5b = msg5b;
			m_deleteMsg5b = false;
		}
		/*
		else if ( m_rdbId == RDB_TITLEDB ) {
			try { m_msg5b = new ( Msg5 ); } 
			catch ( ... ) {
				g_errno = ENOMEM;
				log("net: Local alloc for disk read failed "
				    "while tring to read data for %s. "
				    "Trying remote request. 2.",
				    getDbnameFromId(m_rdbId));
				goto skip;
			}
			mnew ( m_msg5b , sizeof(Msg5) , "Msg0b" );
			m_deleteMsg5b = true;
		}
		*/
		QUICKPOLL(m_niceness);
		if ( ! m_msg5->getList ( rdbId,
					 m_collnum ,
					 m_list ,
					 m_startKey ,
					 m_endKey   ,
					 m_minRecSizes ,
					 includeTree   , // include Tree?
					 addToCache    , // addToCache?
					 maxCacheAge   ,
					 startFileNum  , 
					 numFiles      ,
					 this ,
					 gotListWrapper2   ,
					 niceness          ,
					 doErrorCorrection ,
					 NULL , // cacheKeyPtr
					 0    , // retryNum
					 -1   , // maxRetries
					 true , // compensateForMerge
					 syncPoint ,
					 NULL,//m_msg5b   ,
					 m_isRealMerge ,
					 m_allowPageCache ) ) return false;
		// nuke it
		reset();
		return true;
	}
skip:
	// debug msg
	if ( g_conf.m_logDebugQuery )
		log(LOG_DEBUG,"net: msg0: Sending request for data to "
		    "shard=%"UINT32" "
		    "listPtr=%"PTRFMT" minRecSizes=%"INT32" termId=%"UINT64" "
		    //"startKey.n1=%"XINT32",n0=%"XINT64" (niceness=%"INT32")",
		    "startKey.n1=%"XINT64",n0=%"XINT64" (niceness=%"INT32")",
		    //g_hostdb.makeHostId ( m_groupId ) ,
		    m_shardNum,
		    (PTRTYPE)m_list,
		    m_minRecSizes, g_posdb.getTermId(m_startKey) , 
		    //m_startKey.n1,m_startKey.n0 , (int32_t)m_niceness);
		    KEY1(m_startKey,m_ks),KEY0(m_startKey),
		    (int32_t)m_niceness);

	char *replyBuf = NULL;
	int32_t  replyBufMaxSize = 0;
	bool  freeReply = true;

	// adjust niceness for net transmission
	bool realtime = false;
	//if ( minRecSizes + 32 < TMPBUFSIZE ) realtime = true;

	// if we're niceness 0 we need to pre-allocate for reply since it
	// might be received within the asynchronous signal handler which
	// cannot call mmalloc()
	if ( realtime ) { // niceness <= 0 || netnice == 0 ) {
		// . we should not get back more than minRecSizes bytes since 
		//   we are now performing merges
		// . it should not slow things down too much since the hashing
		//   is 10 times slower than merging anyhow...
		// . CAUTION: if rdb is not fixed-datasize then this will
		//            not work for us! it can exceed m_minRecSizes.
		replyBufMaxSize = m_minRecSizes ;
		// . get a little extra to fix the error where we ask for 64 
		//   but get 72
		// . where is that coming from?
		// . when getting titleRecs we often exceed the minRecSizes 
		// . ?Msg8? was having trouble. was int16_t 32 bytes sometimes.
		replyBufMaxSize += 36;
		// why add ten percent?
		//replyBufMaxSize *= 110 ;
		//replyBufMaxSize /= 100 ;
		// make a buffer to hold the reply
//#ifdef SPLIT_INDEXDB
/*
		if ( m_numSplit > 1 ) {
			m_replyBufSize = replyBufMaxSize * m_numSplit;
			replyBuf = (char *) mmalloc(m_replyBufSize, "Msg0");
			m_replyBuf  = replyBuf;
			freeReply = false;
		}
		else
*/
//#endif
			replyBuf = (char *) mmalloc(replyBufMaxSize , "Msg0");
		// g_errno is set and we return true if it failed
		if ( ! replyBuf ) {
			log("net: Failed to pre-allocate %"INT32" bytes to hold "
			    "data read remotely from %s: %s.",
			    replyBufMaxSize,getDbnameFromId(m_rdbId),
			    mstrerror(g_errno));
			return true;
		}
	}

	// . make a request with the info above (note: not in network order)
	// . IMPORTANT!!!!! if you change this change 
	//   Multicast.cpp::sleepWrapper1 too!!!!!!!!!!!!
	//   no, not anymore, we commented out that request peeking code
	char *p = m_request;
	*(int64_t *) p = syncPoint        ; p += 8;
	//*(key_t     *) p = m_startKey       ; p += sizeof(key_t);
	//*(key_t     *) p = m_endKey         ; p += sizeof(key_t);
	*(int32_t      *) p = m_minRecSizes    ; p += 4;
	*(int32_t      *) p = startFileNum     ; p += 4;
	*(int32_t      *) p = numFiles         ; p += 4;
	*(int32_t      *) p = maxCacheAge      ; p += 4;
	if ( p - m_request != RDBIDOFFSET ) { char *xx=NULL;*xx=0; }
	*p               = m_rdbId          ; p++;
	*p               = addToCache       ; p++;
	*p               = doErrorCorrection; p++;
	*p               = includeTree      ; p++;
	*p               = (char)niceness   ; p++;
	*p               = (char)m_allowPageCache; p++;
	KEYSET(p,m_startKey,m_ks);          ; p+=m_ks;
	KEYSET(p,m_endKey,m_ks);            ; p+=m_ks;
	// NULL terminated collection name
	//strcpy ( p , coll ); p += gbstrlen ( coll ); *p++ = '\0';
	*(collnum_t *)p = m_collnum; p += sizeof(collnum_t);
	m_requestSize    = p - m_request;
	// ask an individual host for this list if hostId is NOT -1
	if ( m_hostId != -1 ) {
		// get Host
		Host *h = g_hostdb.getHost ( m_hostId );
		if ( ! h ) { 
			g_errno = EBADHOSTID; 
			log(LOG_LOGIC,"net: msg0: Bad hostId of %"INT64".",
			    m_hostId);
			return true;
		}
		// if niceness is 0, use the higher priority udpServer
		UdpServer *us ;
		uint16_t port;
		QUICKPOLL(m_niceness);
		//if ( niceness <= 0 || netnice == 0 ) { 
		//if ( realtime ) {
		//	us = &g_udpServer2; port = h->m_port2; }
		//else                 { 
		us = &g_udpServer ; port = h->m_port ; 
		// . returns false on error and sets g_errno, true otherwise
		// . calls callback when reply is received (or error)
		// . we return true if it returns false
		if ( ! us->sendRequest ( m_request     ,
					 m_requestSize ,
					 0x00          , // msgType
					 h->m_ip       ,
					 port          ,
					 m_hostId      ,
					 NULL          , // the slotPtr
					 this          ,
					 gotSingleReplyWrapper ,
					 timeout       ,
					 -1            , // backoff
					 -1            , // maxwait
					 replyBuf      ,
					 replyBufMaxSize ,
					 m_niceness     ) ) // cback niceness
			return true;
		// return false cuz it blocked
		return false;
	}
	// timing debug
	if ( g_conf.m_logTimingNet )
		m_startTime = gettimeofdayInMilliseconds();
	else
		m_startTime = 0;
	//if ( m_rdbId == RDB_INDEXDB ) log("Msg0:: getting remote indexlist. "
	//			"termId=%"UINT64", "
	//			"groupNum=%"UINT32"",
	//			g_indexdb.getTermId(m_startKey) ,
	//			g_hostdb.makeHostId ( m_groupId ) );

	/*
	// make the cache key so we can see what remote host cached it, if any
	char cacheKey[MAX_KEY_BYTES];
	//key_t cacheKey = makeCacheKey ( startKey     ,
	makeCacheKey ( startKey     ,
		       endKey       ,
		       includeTree  ,
		       minRecSizes  ,
		       startFileNum ,
		       numFiles     ,
		       cacheKey     ,
		       m_ks         );
	*/

	// . get the top int32_t of the key
	// . i guess this will work for 128 bit keys... hmmmmm
	int32_t keyTop = hash32 ( (char *)startKey , m_ks );

	/*
	// allocate space
	if ( m_numSplit > 1 ) {
		int32_t  need = m_numSplit * sizeof(Multicast) ;
		char *buf  = (char *)mmalloc ( need,"msg0mcast" );
		if ( ! buf ) return true;
		m_mcasts = (Multicast *)buf;
		for ( int32_t i = 0; i < m_numSplit ; i++ )
			m_mcasts[i].constructor();
	}
	*/

        // . otherwise, multicast to a host in group "groupId"
	// . returns false and sets g_errno on error
	// . calls callback on completion
	// . select first host to send to in group based on upper 32 bits
	//   of termId (m_startKey.n1)
//#ifdef SPLIT_INDEXDB
	// . need to send out to all the indexdb split hosts
	m_numRequests = 0;
	m_numReplies  = 0;
	//for ( int32_t i = 0; i < m_numSplit; i++ ) {

	QUICKPOLL(m_niceness);
	//int32_t gr;
	char *buf;
	/*
	if ( m_numSplit > 1 ) {
		gr  = g_indexdb.getSplitGroupId ( baseGroupId, i );
		buf = &replyBuf[i*replyBufMaxSize];
	}
	else {
	*/
	//gr  = m_groupId;
	buf = replyBuf;
	//}

	// get the multicast
	Multicast *m = &m_mcast;
	//if ( m_numSplit > 1 ) m = &m_mcasts[i];

        if ( ! m->send ( m_request    , 
//#else
//        if ( ! m_mcast.send ( m_request    , 
//#endif
			      m_requestSize, 
			      0x00         , // msgType 0x00
			      false        , // does multicast own request?
			 m_shardNum ,
//#ifdef SPLIT_INDEXDB
//			      gr           , // group + offset
//#else
//			      m_groupId    , // group to send to (groupKey)
//#endif
			      false        , // send to whole group?
			      //m_startKey.n1, // key is passed on startKey
			      keyTop       , // key is passed on startKey
			      this         , // state data
			      NULL         , // state data
			      gotMulticastReplyWrapper0 ,
			      timeout      , // timeout in seconds (was 30)
			      niceness     ,
			      realtime     ,
			      firstHostId  ,
//#ifdef SPLIT_INDEXDB
//			      &replyBuf[i*replyBufMaxSize] ,
//#else
//			      replyBuf        ,
//#endif
			      buf             ,
			      replyBufMaxSize ,
			      freeReply       , // free reply buf?
			      true            , // do disk load balancing?
			      maxCacheAge     ,
			      //(key_t *)cacheKey        ,
			      // multicast uses it for determining the best
			      // host to send the request to when doing 
			      // disk load balancing. if the host has our 
			      // data cached, then it will probably get to
			      // handle the request. for now let's just assume
			      // this is a 96-bit key. TODO: fix...
			 0 , // *(key_t *)cacheKey        ,
			      rdbId           ,
			      minRecSizes     ) ) {
		log("net: Failed to send request for data from %s in shard "
		    "#%"UINT32" over network: %s.",
		    getDbnameFromId(m_rdbId),m_shardNum, mstrerror(g_errno));
		// no, multicast will free this when it is destroyed
		//if (replyBuf) mfree ( replyBuf , replyBufMaxSize , "Msg22" );
		// but speed it up
//#ifdef SPLIT_INDEXDB
		m_errno = g_errno;
		m->reset();
		if ( m_numRequests > 0 )
			return false;
//#else
//		m_mcast.reset();
//#endif
		return true;
	}
//#ifdef SPLIT_INDEXDB
	m_numRequests++;

//#endif
	// we blocked
	return false;
}
// . reply to a request for an RdbList
// . MUST call g_udpServer::sendReply or sendErrorReply() so slot can
//   be destroyed
void handleRequest0 ( UdpSlot *slot , int32_t netnice ) {
	// if niceness is 0, use the higher priority udpServer
	UdpServer *us = &g_udpServer;
	//if ( netnice == 0 ) us = &g_udpServer2;
	// get the request
	char *request     = slot->m_readBuf;
	int32_t  requestSize = slot->m_readBufSize;
	// collection is now stored in the request, so i commented this out
	//if ( requestSize != MSG0_REQ_SIZE ) {
	//	log("net: Received bad data request size of %"INT32" bytes. "
	//	    "Should be %"INT32".", requestSize ,(int32_t)MSG0_REQ_SIZE);
	//	us->sendErrorReply ( slot , EBADREQUESTSIZE );
	//	return;
	//}
	// parse the request
	char *p                  = request;
	int64_t syncPoint          = *(int64_t *)p ; p += 8;
	//key_t     startKey           = *(key_t     *)p ; p += sizeof(key_t);
	//key_t     endKey             = *(key_t     *)p ; p += sizeof(key_t);
	int32_t      minRecSizes        = *(int32_t      *)p ; p += 4;
	int32_t      startFileNum       = *(int32_t      *)p ; p += 4;
	int32_t      numFiles           = *(int32_t      *)p ; p += 4;
	int32_t      maxCacheAge        = *(int32_t      *)p ; p += 4;
	char      rdbId              = *p++;
	char      addToCache         = *p++;
	char      doErrorCorrection  = *p++;
	char      includeTree        = *p++;
	// this was messing up our niceness conversion logic
	int32_t      niceness           = slot->m_niceness;//(int32_t)(*p++);
	// still need to skip it though!
	p++;
	bool      allowPageCache     = (bool)(*p++);
	char ks = getKeySizeFromRdbId ( rdbId );
	char     *startKey           = p; p+=ks;
	char     *endKey             = p; p+=ks;
	// then null terminated collection
	//char     *coll               = p;
	collnum_t collnum = *(collnum_t *)p; p += sizeof(collnum_t);

	CollectionRec *xcr = g_collectiondb.getRec ( collnum );
	if ( ! xcr ) g_errno = ENOCOLLREC;
	
	// error set from XmlDoc::cacheTermLists()?
	if ( g_errno ) {
		us->sendErrorReply ( slot , EBADRDBID ); return;}

	// is this being called from callWaitingHandlers()
	//bool isRecall = (netnice == 99);

	// . get the rdb we need to get the RdbList from
	// . returns NULL and sets g_errno on error
	//Msg0 msg0;
	//Rdb *rdb = msg0.getRdb ( rdbId );
	Rdb *rdb = getRdbFromId ( rdbId );
	if ( ! rdb ) { 
		us->sendErrorReply ( slot , EBADRDBID ); return;}

	// keep track of stats
	rdb->readRequestGet ( requestSize );

	/*
	// keep track of stats
	if ( ! isRecall ) rdb->readRequestGet ( requestSize );

	int64_t singleDocId2 = 0LL;
	if ( rdbId == RDB_POSDB && maxCacheAge ) {
		int64_t d1 = g_posdb.getDocId(startKey);
		int64_t d2 = g_posdb.getDocId(endKey);
		if ( d1+1 == d2 ) singleDocId2 = d1;
	}

	// have we parsed this docid and cached its termlists?
	bool shouldBeCached2 = false;
	if ( singleDocId2 && 
	     isDocIdInTermListCache ( singleDocId2 , coll ) ) 
		shouldBeCached2 = true;

	// if in the termlist cache, send it back right away
	char *trec;
	int32_t trecSize;
	if ( singleDocId2 &&
	     getRecFromTermListCache(coll,
				     startKey,
				     endKey,
				     maxCacheAge,
				     &trec,
				     &trecSize) ) {
		// if in cache send it back!
		us->sendReply_ass(trec,trecSize,trec,trecSize,slot);
		return;
	}

	// if should be cached but was not found then it's probably a
	// synonym form not in the doc content. make an empty list then.
	if ( shouldBeCached2 ) {
		// send back an empty termlist
		us->sendReply_ass(NULL,0,NULL,0,slot);
		return;
	}

	// MUST be in termlist cache! if not in there it is a probably
	// a synonym term termlist of a word in the doc.
	if ( isRecall ) {
		// send back an empty termlist
		us->sendReply_ass(NULL,0,NULL,0,slot);
		return;
	}
	
	// init waiting table?
	static bool s_waitInit = false;
	if ( ! s_waitInit ) {
		// do not repeat
		s_waitInit = true;
		// niceness = 0
		if ( ! g_waitingTable.set(8,4,2048,NULL,0,true,0,"m5wtbl")){
			log("msg5: failed to init waiting table");
			// error kills us!
			us->sendErrorReply ( slot , EBADRDBID ); 
			return;
		}
	}

	// wait in waiting table?
	if ( singleDocId2 && g_waitingTable.isInTable ( &singleDocId2 ) ) {
		g_waitingTable.addKey ( &singleDocId2 , &slot );
		return;
	}

	// if it's for a special gbdocid: query then cache ALL termlists
	// for this docid into g_termListCache right now
	if ( singleDocId2 ) {
		// have all further incoming requests for this docid
		// wait in the waiting table
		g_waitingTable.addKey ( &singleDocId2 , &slot );
		// load the title rec and store its posdb termlists in cache
		XmlDoc *xd;
		try { xd = new ( XmlDoc ); }
		catch ( ... ) {
			g_errno = ENOMEM;
			us->sendErrorReply ( slot , g_errno );
			return;
		}
		mnew ( xd, sizeof(XmlDoc),"msg0xd");
		// always use niceness 1 now even though we use niceness 0
		// to make the cache hits fast
		//niceness = 1;
		// . load the old title rec first and just recycle all
		// . typically there might be a few hundred related docids
		//   each with 50,000 matching queries on average to evaluate
		//   with the gbdocid:xxxx| restriction?
		if ( ! xd->set3 ( singleDocId2 , coll , niceness ) ) {
			us->sendErrorReply ( slot , g_errno ); return;}
		// init the new xmldoc
		xd->m_callback1 = callWaitingHandlers;
		xd->m_state     = xd;
		// . if this blocks then return
		// . should call loadOldTitleRec() and get JUST the posdb recs
		//   by setting m_useTitledb, etc. to false. then it should
		//   make posdb termlists with the compression using
		//   RdbList::addRecord() and add those lists to 
		//   g_termListCache
		if ( ! xd->cacheTermLists ( ) ) return;
		// otherwise, it completed right away!
		callWaitingHandlers ( xd );
		return;
	}
	*/

	/*
	// init special sectiondb cache?
	if ( rdbId == RDB_SECTIONDB && ! s_initCache ) {
		// try to init cache
		if ( ! s_sectiondbCache.init ( 20000000 , // 20MB max mem
					       -1       , // fixed data size
					       false    , // support lists?
					       20000    , // 20k max recs
					       false    , // use half keys?
					       "secdbche", // dbname
					       false, // load from disk?
					       sizeof(key128_t), //cachekeysize
					       0 , // data key size
					       20000 )) // numPtrs max
			log("msg0: failed to init sectiondb cache: %s",
			    mstrerror(g_errno));
		else
			s_initCache = true;
	}

	// check the sectiondb cache
	if ( rdbId == RDB_SECTIONDB ) {
		//int64_t sh48 = g_datedb.getTermId((key128_t *)startKey);
		// use the start key now!!!
		char *data;
		int32_t  dataSize;
		if (s_sectiondbCache.getRecord ( coll,
						 startKey,//&sh48,
						 &data,
						 &dataSize,
						 true, // docopy?
						 600, // maxage (10 mins)
						 true, // inc counts?
						 NULL, // cachedtime
						 true // promoteRec?
						 )){
			// debug
			//log("msg0: got sectiondblist in cache datasize=%"INT32"",
			//    dataSize);
			// send that back
			g_udpServer.sendReply_ass ( data            ,
						    dataSize        ,
						    data            ,
						    dataSize        ,
						    slot            ,
						    60              ,
						    NULL            ,
						    doneSending_ass ,
						    -1              ,
						    -1              ,
						    true            );
			return;
		}
	}
	*/

	// . do a local get
	// . create a msg5 to get the list
	State00 *st0 ;
	try { st0 = new (State00); }
	catch ( ... ) { 
		g_errno = ENOMEM;
		log("Msg0: new(%"INT32"): %s", 
		    (int32_t)sizeof(State00),mstrerror(g_errno));
		us->sendErrorReply ( slot , g_errno ); 
		return; 
	}
	mnew ( st0 , sizeof(State00) , "State00" );
	// timing debug
	if ( g_conf.m_logTimingNet )
		st0->m_startTime = gettimeofdayInMilliseconds();
	// save slot in state
	st0->m_slot = slot;
	// save udp server to send back reply on
	st0->m_us = us;
	// init this one
	st0->m_niceness = niceness;
	st0->m_rdbId    = rdbId;

	QUICKPOLL(niceness);

	// debug msg
	if ( maxCacheAge != 0 && ! addToCache )
		log(LOG_LOGIC,"net: msg0: check but don't add... rdbid=%"INT32".",
		    (int32_t)rdbId);
	// . if this request came over on the high priority udp server
	//   make sure the priority gets passed along
	// . return if this blocks
	// . we'll call sendReply later
	if ( ! st0->m_msg5.getList ( rdbId             ,
				     collnum           ,
				     &st0->m_list      ,
				     startKey          ,
				     endKey            ,
				     minRecSizes       ,
				     includeTree       , // include tree?
				     addToCache        , // addToCache?
				     maxCacheAge       ,
				     startFileNum      , 
				     numFiles          ,
				     st0               ,
				     gotListWrapper    ,
				     niceness          ,
				     doErrorCorrection ,
				     NULL , // cacheKeyPtr
				     0    , // retryNum
				     2    , // maxRetries
				     true , // compensateForMerge
				     syncPoint ,
				     NULL,//&st0->m_msg5b ,
				     false,
				     allowPageCache ) )
		return;
	// call wrapper ouselves
	gotListWrapper ( st0 , NULL , NULL );
}
// . slot should be auto-nuked upon transmission or error
// . TODO: ensure if this sendReply() fails does it really nuke the slot?
void gotListWrapper ( void *state , RdbList *listb , Msg5 *msg5xx ) {
	// get the state
	State00 *st0 = (State00 *)state;
	// extract the udp slot and list and msg5
	UdpSlot   *slot =  st0->m_slot;
	RdbList   *list = &st0->m_list;
	Msg5      *msg5 = &st0->m_msg5;
	UdpServer *us   =  st0->m_us;
	// sanity check -- ensure they match
	//if ( niceness != st0->m_niceness )
	//	log("Msg0: niceness mismatch");
	// debug msg
	//if ( niceness != 0 ) 
	//	log("HEY! niceness is not 0");
	// timing debug
	if ( g_conf.m_logTimingNet || g_conf.m_logDebugNet ) {
		//log("Msg0:hndled request %"UINT64"",gettimeofdayInMilliseconds());
		int32_t size = -1;
		if ( list ) size     = list->getListSize();
		log(LOG_TIMING|LOG_DEBUG,
		    "net: msg0: Handled request for data. "
		    "Now sending data termId=%"UINT64" size=%"INT32""
		    " transId=%"INT32" ip=%s port=%i took=%"INT64" "
		    "(niceness=%"INT32").",
		    g_posdb.getTermId(msg5->m_startKey),
		    size,slot->m_transId,
		    iptoa(slot->m_ip),slot->m_port,
		    gettimeofdayInMilliseconds() - st0->m_startTime ,
		    st0->m_niceness );
	}
	// debug
	//if ( ! msg5->m_includeTree )
	//	log("hotit\n");
	// on error nuke the list and it's data
	if ( g_errno ) {
		mdelete ( st0 , sizeof(State00) , "Msg0" );
		delete (st0);
		// TODO: free "slot" if this send fails
		us->sendErrorReply ( slot , g_errno );
		return;
	}

	QUICKPOLL(st0->m_niceness);
	// point to the serialized list in "list"
	char *data      = list->getList();
	int32_t  dataSize  = list->getListSize();
	char *alloc     = list->getAlloc();
	int32_t  allocSize = list->getAllocSize();
	// tell list not to free the data since it is a reply so UdpServer
	// will free it when it destroys the slot
	list->setOwnData ( false );
	// keep track of stats
	Rdb *rdb = getRdbFromId ( st0->m_rdbId );
	if ( rdb ) rdb->sentReplyGet ( dataSize );
	// TODO: can we free any memory here???

	// keep track of how long it takes to complete the send
	st0->m_startTime = gettimeofdayInMilliseconds();
	// debug point
	int32_t oldSize = msg5->m_minRecSizes;
	int32_t newSize = msg5->m_minRecSizes + 20;
	// watch for wrap around
	if ( newSize < oldSize ) newSize = 0x7fffffff;
	if ( dataSize > newSize && list->getFixedDataSize() == 0 &&
	     // do not annoy me with these linkdb msgs
	     dataSize > newSize+100 ) 
		log(LOG_LOGIC,"net: msg0: Sending more data than what was "
		    "requested. Ineffcient. Bad engineer. dataSize=%"INT32" "
		    "minRecSizes=%"INT32".",dataSize,oldSize);
	/*
	// always compress these lists
	if ( st0->m_rdbId == RDB_SECTIONDB ) { // && 1 == 3) {

		// get sh48, the sitehash
		key128_t *startKey = (key128_t *)msg5->m_startKey ;
		int64_t sh48 = g_datedb.getTermId(startKey);

		// debug
		//log("msg0: got sectiondblist from disk listsize=%"INT32"",
		//    list->getListSize());

		if ( dataSize > 50000 )
			log("msg0: sending back list rdb=%"INT32" "
			    "listsize=%"INT32" sh48=0x%"XINT64"",
			    (int32_t)st0->m_rdbId,
			    dataSize,
			    sh48);

		// save it
		int32_t origDataSize = dataSize;
		// store compressed list on itself
		char *dst = list->m_list;
		// warn if niceness is 0!
		if ( st0->m_niceness == 0 )
			log("msg0: compressing sectiondb list at niceness 0!");
		// compress the list
		uint32_t lastVoteHash32 = 0LL;
		SectionVote *lastVote = NULL;
		for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) {
			// breathe
			QUICKPOLL ( st0->m_niceness );
			// get rec
			char *rec = list->getCurrentRec();
			// for ehre
			key128_t *key = (key128_t *)rec;
			// the score is the bit which is was set in 
			// Section::m_flags for that docid
			int32_t secType = g_indexdb.getScore ( (char *)key );
			// 0 means it probably used to count # of voters
			// from this site, so i don't think xmldoc uses
			// that any more
			if ( secType == SV_SITE_VOTER ) continue;
			// treat key like a datedb key and get the taghash
			uint32_t h32 = g_datedb.getDate ( key );
			// get data/vote from the current record in the 
			// sectiondb list
			SectionVote *sv=(SectionVote *)list->getCurrentData ();
			// get the average score for this doc
			float avg = sv->m_score ;
			if ( sv->m_numSampled > 0.0 ) avg /= sv->m_numSampled;
			// if same as last guy, add to it
			if ( lastVoteHash32 == h32 && lastVote ) {
				// turn possible multi-vote into single docid
				// into a single vote, with the score averaged.
				lastVote->m_score += avg;
				lastVote->m_numSampled++;
				continue;
			}
			// otherwise, add in a new guy!
			*(key128_t *)dst = *key;
			dst += sizeof(key128_t);
			// the new vote
			SectionVote *dsv = (SectionVote *)dst;
			dsv->m_score = avg;
			dsv->m_numSampled = 1;
			// set this
			lastVote = dsv;
			lastVoteHash32 = h32;
			// skip over
			dst += sizeof(SectionVote);
		}
		// update the list size now for sending back
		dataSize = dst - data;
		// if the list was over the requested minrecsizes we need
		// to set a flag so that the caller will do a re-call.
		// so making the entire odd, will be the flag.
	        if ( origDataSize > msg5->m_minRecSizes && 
		     dataSize < origDataSize ) {
			*dst++ = '\0';
			dataSize++;
		}

		// debug
		//log("msg0: compressed sectiondblist from disk "
		//    "newlistsize=%"INT32"", dataSize);
		
		// use this timestamp
		int32_t now = getTimeLocal();//Global();
		// finally, cache this sucker
		s_sectiondbCache.addRecord ( msg5->m_coll,
					     (char *)startKey,//(char *)&sh48
					     data, 
					     dataSize ,
					     now );
		// ignore errors
		g_errno = 0;
	}
	*/
		    
	//
	// for linkdb lists, remove all the keys that have the same IP32
	// and store a count of what we removed somewhere
	//
	if ( st0->m_rdbId == RDB_LINKDB ) {
		// store compressed list on itself
		char *dst = list->m_list;
		// keep stats
		int32_t totalOrigLinks = 0;
		int32_t ipDups = 0;
		int32_t lastIp32 = 0;
		char *listEnd = list->getListEnd();
		// compress the list
		for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) {
			// breathe
			QUICKPOLL ( st0->m_niceness );
			// count it
			totalOrigLinks++;
			// get rec
			char *rec = list->getCurrentRec();
			int32_t ip32 = g_linkdb.getLinkerIp_uk((key224_t *)rec );
			// same as one before?
			if ( ip32 == lastIp32 && 
			     // are we the last rec? include that for
			     // advancing the m_nextKey in Linkdb more 
			     // efficiently.
			     rec + LDBKS < listEnd ) {
				ipDups++;
				continue;
			}
			// store it
			gbmemcpy (dst , rec , LDBKS );
			dst += LDBKS;
			// update it
			lastIp32 = ip32;
		}
		// . if we removed one key, store the stats
		// . caller should recognize reply is not a multiple of
		//   the linkdb key size LDBKS and no its there!
		if ( ipDups ) {
			//*(int32_t *)dst = totalOrigLinks;
			//dst += 4;
			//*(int32_t *)dst = ipDups;
			//dst += 4;
		}
		// update list parms
		list->m_listSize = dst - list->m_list;
		list->m_listEnd  = list->m_list + list->m_listSize;
		data      = list->getList();
		dataSize  = list->getListSize();
	}


	//log("sending replySize=%"INT32" min=%"INT32"",dataSize,msg5->m_minRecSizes);
	// . TODO: dataSize may not equal list->getListMaxSize() so
	//         Mem class may show an imblanace
	// . now g_udpServer is responsible for freeing data/dataSize
	// . the "true" means to call doneSending_ass() from the signal handler
	//   if need be
	st0->m_us->sendReply_ass  ( data            ,
				    dataSize        ,
				    alloc           , // alloc
				    allocSize       , // alloc size
				    slot            ,
				    60              ,
				    st0             ,
				    doneSending_ass ,
				    -1              ,
				    -1              ,
				    true            );
}	
Example #10
0
//接收socket连接线程
void * UdpServer::AcceptThread(void * pParam)
{
    if(0 == pParam)
    {
        cout << "param is null." << endl;
        return 0;
    }
    UdpServer * pThis = (UdpServer*)pParam;

    int maxFd = 0;
    struct sockaddr_in cliAddr;
    while(true)
    {
        FD_ZERO(&pThis->mReadFd);
        FD_SET(pThis->mServerSocket, &pThis->mReadFd);
        maxFd = (pThis->mServerSocket > maxFd)? pThis->mServerSocket : maxFd;

        for(list<int>::iterator itor = pThis->mCliList.begin(); itor != pThis->mCliList.end(); itor++)
        {
            FD_SET(*itor, &pThis->mReadFd);
            maxFd = (*itor > maxFd)? *itor : maxFd;
        }

        int res = select(maxFd + 1, &pThis->mReadFd, 0, 0, NULL);
        if(-1 == res)
        {
            cout << "select failed." << endl;
        }


        if(-1 != pThis->mServerSocket && FD_ISSET(pThis->mServerSocket , &pThis->mReadFd))
        {
            int sin_size = sizeof(struct sockaddr_in);
            int clientSoc = accept(pThis->mServerSocket, (struct sockaddr *)(&cliAddr), (socklen_t*)&sin_size);
            if(-1 == clientSoc)
            {
                cout << "accept error." << endl;
                continue;
            }
            if(DEBUG) cout << "Accept new client : " << clientSoc << endl;
            pThis->mCliList.push_front(clientSoc);

            string ip = inet_ntoa(cliAddr.sin_addr);
            char tmp[16] = {0};
            sprintf(tmp, "%d", ntohs(cliAddr.sin_port));
            string port = tmp;
            if(0 != pThis->mAcceptFunc)
            {
                pThis->mAcceptFunc(ip, port, clientSoc);
            }
            pThis->OnClientConnect(ip, port, clientSoc);
        }

        for(list<int>::iterator itor = pThis->mCliList.begin(); itor != pThis->mCliList.end(); itor++)
        {
            if(-1 != *itor && FD_ISSET(*itor , &pThis->mReadFd))
            {
                char buf[MAX_MESSAGE_LEN] = {0};
                int res = recv(*itor, buf, sizeof(buf), 0);
                if(0 < res)
                {
                    if(DEBUG) cout << "Receive data : " << buf << endl;
                    pthread_mutex_lock(&pThis->mMutex);
                    pThis->mDataList.push_back(MsgInfo(*itor, buf));
                    pthread_mutex_unlock(&pThis->mMutex);
                }
                else if(0 == res)
                {
                    cout << "client quit." << endl;
                    pthread_mutex_lock(&pThis->mMutex);
                    pThis->mCliList.remove(*itor);
                    itor--;
                    pThis->mDisconnectFunc(*itor);
                    pThis->OnClientDisconnect(*itor);
                    pthread_mutex_unlock(&pThis->mMutex);
                }
                else
                {
                    cout << "receive failed." << endl;
                }
            }
        }
    }
}
// . returns false if blocked, true otherwise.
// . returns true and sets g_errno on error
// . before we can spider for a SpiderRequest we must be granted the lock
// . each group shares the same doledb and each host in the group competes
//   for spidering all those urls. 
// . that way if a host goes down is load is taken over
bool Msg12::getLocks ( int64_t uh48, // probDocId , 
		       char *url ,
		       DOLEDBKEY *doledbKey,
		       collnum_t collnum,
		       int32_t sameIpWaitTime,
		       int32_t maxSpidersOutPerIp,
		       int32_t firstIp,
		       void *state ,
		       void (* callback)(void *state) ) {

	// ensure not in use. not msg12 replies outstanding.
	if ( m_numRequests != m_numReplies ) { char *xx=NULL;*xx=0; }

	// no longer use this
	char *xx=NULL;*xx=0;

	// do not use locks for injections
	//if ( m_sreq->m_isInjecting ) return true;
	// get # of hosts in each mirror group
	int32_t hpg = g_hostdb.getNumHostsPerShard();
	// reset
	m_numRequests = 0;
	m_numReplies  = 0;
	m_grants   = 0;
	m_removing = false;
	m_confirming = false;
	// make sure is really docid
	//if ( probDocId & ~DOCID_MASK ) { char *xx=NULL;*xx=0; }
	// . mask out the lower bits that may change if there is a collision
	// . in this way a url has the same m_probDocId as the same url
	//   in the index. i.e. if we add a new spider request for url X and
	//   url X is already indexed, then they will share the same lock 
	//   even though the indexed url X may have a different actual docid
	//   than its probable docid.
	// . we now use probable docids instead of uh48 because query reindex
	//   in PageReindex.cpp adds docid based spider requests and we
	//   only know the docid, not the uh48 because it is creating
	//   SpiderRequests from docid-only search results. having to look
	//   up the msg20 summary for like 1M search results is too painful!
	//m_lockKey = g_titledb.getFirstProbableDocId(probDocId);
	// . use this for locking now, and let the docid-only requests just use
	//   the docid
	m_lockKeyUh48 = makeLockTableKey ( uh48 , firstIp );
	m_url = url;
	m_callback = callback;
	m_state = state;
	m_hasLock = false;
	m_origUh48 = uh48;
	// support ability to spider multiple urls from same ip
	m_doledbKey = *doledbKey;
	m_collnum = collnum;
	m_sameIpWaitTime = sameIpWaitTime;
	m_maxSpidersOutPerIp = maxSpidersOutPerIp;
	m_firstIp = firstIp;

	// sanity check, just 6 bytes! (48 bits)
	if ( uh48 & 0xffff000000000000LL ) { char *xx=NULL;*xx=0; }

	if ( m_lockKeyUh48 & 0xffff000000000000LL ) { char *xx=NULL;*xx=0; }

	// cache time
	int32_t ct = 120;

	// if docid based assume it was a query reindex and keep it short!
	// otherwise we end up waiting 120 seconds for a query reindex to
	// go through on a docid we just spidered. TODO: use m_urlIsDocId
	// MDW: check this out
	if ( url && is_digit(url[0]) ) ct = 2;

	// . this seems to be messing us up and preventing us from adding new
	//   requests into doledb when only spidering a few IPs.
	// . make it random in the case of twin contention
	ct = rand() % 10;

	// . check our cache to avoid repetitive asking
	// . use -1 for maxAge to indicate no max age
	// . returns -1 if not in cache
	// . use maxage of two minutes, 120 seconds
	int32_t lockTime ;
	lockTime = g_spiderLoop.m_lockCache.getLong(0,m_lockKeyUh48,ct,true);
	// if it was in the cache and less than 2 minutes old then return
	// true now with m_hasLock set to false.
	if ( lockTime >= 0 ) {
		if ( g_conf.m_logDebugSpider )
			logf(LOG_DEBUG,"spider: cached missed lock for %s "
			     "lockkey=%" PRIu64, m_url,m_lockKeyUh48);
		return true;
	}

	if ( g_conf.m_logDebugSpider )
		logf(LOG_DEBUG,"spider: sending lock request for %s "
		     "lockkey=%" PRIu64,  m_url,m_lockKeyUh48);

	// now the locking group is based on the probable docid
	//m_lockGroupId = g_hostdb.getGroupIdFromDocId(m_lockKey);
	// ptr to list of hosts in the group
	//Host *hosts = g_hostdb.getGroup ( m_lockGroupId );
	// the same group (shard) that has the spiderRequest/Reply is
	// the one responsible for locking.
	Host *hosts = g_hostdb.getMyShard();

	// shortcut
	UdpServer *us = &g_udpServer;


	static int32_t s_lockSequence = 0;
	// remember the lock sequence # in case we have to call remove locks
	m_lockSequence = s_lockSequence++;

	LockRequest *lr = &m_lockRequest;
	lr->m_lockKeyUh48 = m_lockKeyUh48;
	lr->m_firstIp = m_firstIp;
	lr->m_removeLock = 0;
	lr->m_lockSequence = m_lockSequence;
	lr->m_collnum = collnum;

	// reset counts
	m_numRequests = 0;
	m_numReplies  = 0;

	// point to start of the 12 byte request buffer
	char *request = (char *)lr;//m_lockKey;
	int32_t  requestSize = sizeof(LockRequest);//12;

	// loop over hosts in that shard
	for ( int32_t i = 0 ; i < hpg ; i++ ) {
		// get a host
		Host *h = &hosts[i];
		// skip if dead! no need to get a reply from dead guys
		if ( g_hostdb.isDead (h) ) continue;
		// note it
		if ( g_conf.m_logDebugSpider )
			logf(LOG_DEBUG,"spider: sent lock "
			     "request #%" PRId32" for lockkey=%" PRIu64" %s to "
			     "hid=%" PRId32,m_numRequests,m_lockKeyUh48,
			     m_url,h->m_hostId);
		// send request to him
		if ( ! us->sendRequest ( request      ,
					 requestSize  ,
					 0x12         , // msgType
					 h->m_ip      ,
					 h->m_port    ,
					 h->m_hostId  ,
					 NULL         , // retSlotPtrPtr
					 this         , // state data
					 gotLockReplyWrapper ,
					 udpserver_sendrequest_infinite_timeout ) ) 
			// udpserver returns false and sets g_errno on error
			return true;
		// count them
		m_numRequests++;
	}
	// block?
	if ( m_numRequests > 0 ) return false;
	// i guess nothing... hmmm... all dead?
	//char *xx=NULL; *xx=0; 
	// m_hasLock should be false... all lock hosts seem dead... wait
	if ( g_conf.m_logDebugSpider )
		logf(LOG_DEBUG,"spider: all lock hosts seem dead for %s "
		     "lockkey=%" PRIu64, m_url,m_lockKeyUh48);
	return true;
}
bool Msg12::removeAllLocks ( ) {

	// ensure not in use. not msg12 replies outstanding.
	if ( m_numRequests != m_numReplies ) { char *xx=NULL;*xx=0; }

	// no longer use this
	char *xx=NULL;*xx=0;

	// skip if injecting
	//if ( m_sreq->m_isInjecting ) return true;
	if ( g_conf.m_logDebugSpider )
		logf(LOG_DEBUG,"spider: removing all locks for %s %" PRIu64,
		     m_url,m_lockKeyUh48);
	// we are now removing 
	m_removing = true;

	LockRequest *lr = &m_lockRequest;
	lr->m_lockKeyUh48 = m_lockKeyUh48;
	lr->m_lockSequence = m_lockSequence;
	lr->m_firstIp = m_firstIp;
	lr->m_removeLock = 1;

	// reset counts
	m_numRequests = 0;
	m_numReplies  = 0;

	// make that the request
	// . point to start of the 12 byte request buffer
	// . m_lockSequence should still be valid
	char *request     = (char *)lr;//m_lockKey;
	int32_t  requestSize = sizeof(LockRequest);//12;

	// now the locking group is based on the probable docid
	//uint32_t groupId = g_hostdb.getGroupIdFromDocId(m_lockKeyUh48);
	// ptr to list of hosts in the group
	//Host *hosts = g_hostdb.getGroup ( groupId );
	Host *hosts = g_hostdb.getMyShard();
	// this must select the same group that is going to spider it!
	// i.e. our group! because we check our local lock table to see
	// if a doled url is locked before spidering it ourselves.
	//Host *hosts = g_hostdb.getMyGroup();
	// shortcut
	UdpServer *us = &g_udpServer;
	// set the hi bit though for this one
	//m_lockKey |= 0x8000000000000000LL;
	// get # of hosts in each mirror group
	int32_t hpg = g_hostdb.getNumHostsPerShard();
	// loop over hosts in that shard
	for ( int32_t i = 0 ; i < hpg ; i++ ) {
		// get a host
		Host *h = &hosts[i];
		// skip if dead! no need to get a reply from dead guys
		if ( g_hostdb.isDead ( h ) ) continue;
		// send request to him
		if ( ! us->sendRequest ( request      ,
					 requestSize  ,
					 0x12         , // msgType
					 h->m_ip      ,
					 h->m_port    ,
					 h->m_hostId  ,
					 NULL         , // retSlotPtrPtr
					 this         , // state data
					 gotLockReplyWrapper ,
					 udpserver_sendrequest_infinite_timeout ) ) 
			// udpserver returns false and sets g_errno on error
			return true;
		// count them
		m_numRequests++;
	}
	// block?
	if ( m_numRequests > 0 ) return false;
	// did not block
	return true;
}
Example #13
0
// . THIS Msg0 class must be alloc'd, i.e. not on the stack, etc.
// . if list is stored locally this tries to get it locally
// . otherwise tries to get the list from the network
// . returns false if blocked, true otherwise
// . sets g_errno on error
// . NOTE: i was having problems with queries being cached too long, you
//   see the cache here is a NETWORK cache, so when the machines that owns
//   the list updates it on disk it can't flush our cache... so use a small
//   maxCacheAge of like , 30 seconds or so...
bool Msg0::getList ( int64_t hostId      , // host to ask (-1 if none)
		     int32_t      ip          , // info on hostId
		     int16_t     port        ,
		     int32_t      maxCacheAge , // max cached age in seconds
		     bool      addToCache  , // add net recv'd list to cache?
		     char      rdbId       , // specifies the rdb
		     collnum_t collnum ,
		     RdbList  *list        ,
		     const char     *startKey    ,
		     const char     *endKey      ,
		     int32_t      minRecSizes ,  // use -1 for no max
		     void     *state       ,
		     void    (* callback)(void *state ),//, RdbList *list ) ,
		     int32_t      niceness    ,
		     bool      doErrorCorrection ,
		     bool      includeTree ,
		     bool      doMerge     ,
		     int32_t      firstHostId   ,
		     int32_t      startFileNum  ,
		     int32_t      numFiles      ,
		     int64_t      timeout       ,
		     int64_t syncPoint     ,
		     int32_t      preferLocalReads ,
		     Msg5     *msg5             ,
		     bool      isRealMerge      ,
		     bool      allowPageCache    ,
		     bool      forceLocalIndexdb ,
		     bool      noSplit ,
		     int32_t      forceParitySplit  ) {
	logTrace( g_conf.m_logTraceMsg0, "BEGIN. hostId: %" PRId64", rdbId: %d", hostId, (int)rdbId );

	// warning
	if ( collnum < 0 ) log(LOG_LOGIC,"net: NULL collection. msg0.");

	// reset the list they passed us
	list->reset();
	// get keySize of rdb
	m_ks = getKeySizeFromRdbId ( rdbId );
	
//	if( g_conf.m_logTraceMsg0 ) 
//	{
//		log("%s:%s:%d: rdbId. [%d]", __FILE__,__func__,__LINE__, (int)rdbId);
//		log("%s:%s:%d: m_ks.. [%d]", __FILE__,__func__,__LINE__, (int)m_ks);
//		log("%s:%s:%d: hostId [%" PRId64"]", __FILE__,__func__,__LINE__, hostId);
//	}

	// if startKey > endKey, don't read anything
	//if ( startKey > endKey ) return true;
	if ( KEYCMP(startKey,endKey,m_ks)>0 ) { char *xx=NULL;*xx=0; }//rettrue
	// . reset hostid if it is dead
	// . this is causing UOR queries to take forever when we have a dead
	if ( hostId >= 0 && g_hostdb.isDead ( hostId ) ) hostId = -1;
	// no longer accept negative minrecsize
	if ( minRecSizes < 0 ) {
		g_errno = EBADENGINEER;
		logTrace( g_conf.m_logTraceMsg0, "END" );

		log(LOG_LOGIC, "net: msg0: Negative minRecSizes no longer supported.");
		char *xx=NULL;*xx=0;
	}

	// remember these
	m_state         = state;
	m_callback      = callback;
	m_list          = list;
	m_hostId        = hostId;
	m_niceness      = niceness;
	m_addToCache    = addToCache;
	// . these define our request 100%
	KEYSET(m_startKey,startKey,m_ks);
	KEYSET(m_endKey,endKey,m_ks);
	m_minRecSizes   = minRecSizes;
	m_rdbId         = rdbId;
	m_collnum = collnum;//          = coll;
	m_isRealMerge   = isRealMerge;
	m_allowPageCache = allowPageCache;

	// . group to ask is based on the first key 
	// . we only do 1 group per call right now
	// . groupMask must turn on higher bits first (count downwards kinda)
	// . titledb and spiderdb use special masks to get groupId

	// if diffbot.cpp is reading spiderdb from each shard we have to
	// get groupid from hostid here lest we core in getGroupId() below.
	// it does that for dumping spiderdb to the client browser. they
	// can download the whole enchilada.
	if ( hostId >= 0 && m_rdbId == RDB_SPIDERDB )
		m_shardNum = 0;
	// did they force it? core until i figure out what this is
	else if ( forceParitySplit >= 0 ) 
		//m_groupId =  g_hostdb.getGroupId ( forceParitySplit );
		m_shardNum = forceParitySplit;
	else
		//m_groupId = getGroupId ( m_rdbId , startKey , ! noSplit );
		m_shardNum = getShardNum ( m_rdbId , startKey );

	// if we are looking up a termlist in posdb that is split by termid and
	// not the usual docid then we have to set this posdb key bit that tells
	// us that ...
	if ( noSplit && m_rdbId == RDB_POSDB )
		m_shardNum = g_hostdb.getShardNumByTermId ( startKey );

	// how is this used?
	if ( forceLocalIndexdb ) m_shardNum = getMyShardNum();


//	if( g_conf.m_logTraceMsg0 ) log("%s:%s:%d: shardNum [%" PRId32"]", __FILE__,__func__, __LINE__, m_shardNum);


	// . store these parameters
	// . get a handle to the rdb in case we can satisfy locally
	// . returns NULL and sets g_errno on error
	QUICKPOLL((m_niceness));
	Rdb *rdb = getRdbFromId ( m_rdbId );
	if ( ! rdb ) return true;
	// we need the fixedDataSize
	m_fixedDataSize = rdb->getFixedDataSize();
	m_useHalfKeys   = rdb->useHalfKeys();
	// . debug msg
	// . Msg2 does this when checking for a cached compound list.
	//   compound lists do not actually exist, they are merges of smaller
	//   UOR'd lists.
	if ( maxCacheAge != 0 && ! addToCache && (numFiles > 0 || includeTree)) {
		log( LOG_LOGIC, "net: msg0: Weird. check but don't add... rdbid=%" PRId32".", ( int32_t ) m_rdbId );
	}

	// set this here since we may not call msg5 if list not local
	//m_list->setFixedDataSize ( m_fixedDataSize );

	// . now that we do load balancing we don't want to do a disk lookup
	//   even if local if we are merging or dumping
	// . UNLESS g_conf.m_preferLocalReads is true
	if ( preferLocalReads == -1 ) 
		preferLocalReads = g_conf.m_preferLocalReads;

	// . always prefer local for full split clusterdb
	// . and keep the tfndb/titledb lookups in the same stripe
	// . so basically we can't do biased caches if fully split
	//if ( g_conf.m_fullSplit ) preferLocalReads = true;
	preferLocalReads = true;

	// it it stored locally?
	bool isLocal = ( m_hostId == -1 && //g_hostdb.m_groupId == m_groupId );
			 m_shardNum == getMyShardNum() );
	// only do local lookups if this is true
	if ( ! preferLocalReads ) isLocal = false;

	/*
	int64_t singleDocIdQuery = 0LL;
	if ( rdbId == RDB_POSDB ) {
		int64_t d1 = g_posdb.getDocId(m_startKey);
		int64_t d2 = g_posdb.getDocId(m_endKey);
		if ( d1+1 == d2 ) singleDocIdQuery = d1;
	}

	// . try the LOCAL termlist cache
	// . so when msg2 is evaluating a gbdocid:| query and it has to
	//   use msg0 to go across the network to get the same damn termlist
	//   over and over again for the same docid, this will help alot.
	// . ideally it'd be nice if the seo pipe in xmldoc.cpp can try to
	//   send the same gbdocid:xxxx docids to the same hosts. maybe hash
	//   based on docid into the list of hosts and if that host is busy
	//   just chain until we find someone not busy.
	if ( singleDocIdQuery &&
	     getListFromTermListCache ( coll,
					m_startKey,
					m_endKey,
					maxCacheAge,
					list ) )
		// found!
		return true;
	*/

	// but always local if only one host
	if ( g_hostdb.getNumHosts() == 1 ) isLocal = true;

	// . if the group is local then do it locally
	// . Msg5::getList() returns false if blocked, true otherwise
	// . Msg5::getList() sets g_errno on error
	// . don't do this if m_hostId was specified
	if ( isLocal ) {
		logTrace( g_conf.m_logTraceMsg0, "isLocal" );

		if ( msg5 ) {
			m_msg5 = msg5;
			m_deleteMsg5 = false;
		}
		else {
			try { m_msg5 = new ( Msg5 ); } 
			catch ( ... ) {
				g_errno = ENOMEM;
				log("net: Local alloc for disk read failed "
				    "while tring to read data for %s. "
				    "Trying remote request.",
				    getDbnameFromId(m_rdbId));
				goto skip;
			}
			mnew ( m_msg5 , sizeof(Msg5) , "Msg0::Msg5" );
			m_deleteMsg5 = true;
		}

		QUICKPOLL(m_niceness);
		if ( ! m_msg5->getList ( rdbId,
					 m_collnum ,
					 m_list ,
					 m_startKey ,
					 m_endKey   ,
					 m_minRecSizes ,
					 includeTree   , // include Tree?
					 addToCache    , // addToCache?
					 maxCacheAge   ,
					 startFileNum  , 
					 numFiles      ,
					 this ,
					 gotListWrapper2   ,
					 niceness          ,
					 doErrorCorrection ,
					 NULL , // cacheKeyPtr
					 0    , // retryNum
					 -1   , // maxRetries
					 true , // compensateForMerge
					 syncPoint ,
					 m_isRealMerge ,
					 m_allowPageCache ) ) {
			logTrace( g_conf.m_logTraceMsg0, "END, return false" );
			return false;
		}

		// nuke it
		reset();
		logTrace( g_conf.m_logTraceMsg0, "END, return true" );
		return true;
	}
skip:
	// debug msg
	if ( g_conf.m_logDebugQuery )
		log(LOG_DEBUG,"net: msg0: Sending request for data to "
		    "shard=%" PRIu32" "
		    "listPtr=%" PTRFMT" minRecSizes=%" PRId32" termId=%" PRIu64" "
		    //"startKey.n1=%" PRIx32",n0=%" PRIx64" (niceness=%" PRId32")",
		    "startKey.n1=%" PRIx64",n0=%" PRIx64" (niceness=%" PRId32")",
		    //g_hostdb.makeHostId ( m_groupId ) ,
		    m_shardNum,
		    (PTRTYPE)m_list,
		    m_minRecSizes, g_posdb.getTermId(m_startKey) , 
		    //m_startKey.n1,m_startKey.n0 , (int32_t)m_niceness);
		    KEY1(m_startKey,m_ks),KEY0(m_startKey),
		    (int32_t)m_niceness);

	char *replyBuf = NULL;
	int32_t  replyBufMaxSize = 0;
	bool  freeReply = true;

	// . make a request with the info above (note: not in network order)
	// . IMPORTANT!!!!! if you change this change 
	//   Multicast.cpp::sleepWrapper1 too!!!!!!!!!!!!
	//   no, not anymore, we commented out that request peeking code
	char *p = m_request;
	*(int64_t *) p = syncPoint        ; p += 8;
	//*(key_t     *) p = m_startKey       ; p += sizeof(key_t);
	//*(key_t     *) p = m_endKey         ; p += sizeof(key_t);
	*(int32_t      *) p = m_minRecSizes    ; p += 4;
	*(int32_t      *) p = startFileNum     ; p += 4;
	*(int32_t      *) p = numFiles         ; p += 4;
	*(int32_t      *) p = maxCacheAge      ; p += 4;
	if ( p - m_request != RDBIDOFFSET ) { char *xx=NULL;*xx=0; }
	*p               = m_rdbId          ; p++;
	*p               = addToCache       ; p++;
	*p               = doErrorCorrection; p++;
	*p               = includeTree      ; p++;
	*p               = (char)niceness   ; p++;
	*p               = (char)m_allowPageCache; p++;
	KEYSET(p,m_startKey,m_ks);          ; p+=m_ks;
	KEYSET(p,m_endKey,m_ks);            ; p+=m_ks;
	// NULL terminated collection name
	//strcpy ( p , coll ); p += gbstrlen ( coll ); *p++ = '\0';
	*(collnum_t *)p = m_collnum; p += sizeof(collnum_t);
	m_requestSize    = p - m_request;
	// ask an individual host for this list if hostId is NOT -1
	if ( m_hostId != -1 ) {
		// get Host
		Host *h = g_hostdb.getHost ( m_hostId );
		if ( ! h ) { 
			g_errno = EBADHOSTID; 
			log(LOG_LOGIC,"net: msg0: Bad hostId of %" PRId64".", m_hostId);
			logTrace( g_conf.m_logTraceMsg0, "END, return true. Bad hostId" );
			return true;
		}
		
		// if niceness is 0, use the higher priority udpServer
		UdpServer *us ;
		uint16_t port;
		QUICKPOLL(m_niceness);

		us = &g_udpServer ; port = h->m_port ; 
		// . returns false on error and sets g_errno, true otherwise
		// . calls callback when reply is received (or error)
		// . we return true if it returns false
		if ( ! us->sendRequest ( m_request     ,
					 m_requestSize ,
					 0x00          , // msgType
					 h->m_ip       ,
					 port          ,
					 m_hostId      ,
					 NULL          , // the slotPtr
					 this          ,
					 gotSingleReplyWrapper ,
					 timeout       ,
					 -1            , // backoff
					 -1            , // maxwait
					 replyBuf      ,
					 replyBufMaxSize ,
					 m_niceness     ) ) { // cback niceness
			logTrace( g_conf.m_logTraceMsg0, "END, return true. Request sent" );
			return true;
		}
		
		// return false cuz it blocked
		logTrace( g_conf.m_logTraceMsg0, "END, return false. sendRequest blocked" );
		return false;
	}
	// timing debug
	if ( g_conf.m_logTimingNet )
		m_startTime = gettimeofdayInMilliseconds();
	else
		m_startTime = 0;

	// . get the top int32_t of the key
	// . i guess this will work for 128 bit keys... hmmmmm
	int32_t keyTop = hash32 ( (char *)startKey , m_ks );

	// . otherwise, multicast to a host in group "groupId"
	// . returns false and sets g_errno on error
	// . calls callback on completion
	// . select first host to send to in group based on upper 32 bits
	//   of termId (m_startKey.n1)
	// . need to send out to all the indexdb split hosts
	m_numRequests = 0;
	m_numReplies  = 0;
	//for ( int32_t i = 0; i < m_numSplit; i++ ) {

	QUICKPOLL(m_niceness);
	//int32_t gr;
	char *buf;
	buf = replyBuf;

	// get the multicast
	Multicast *m = &m_mcast;

        if ( ! m->send ( m_request    , 
			      m_requestSize,
			      0x00         , // msgType 0x00
			      false        , // does multicast own request?
			 m_shardNum ,
			      false        , // send to whole group?
			      //m_startKey.n1, // key is passed on startKey
			      keyTop       , // key is passed on startKey
			      this         , // state data
			      NULL         , // state data
			      gotMulticastReplyWrapper0 ,
			      timeout*1000 , // timeout
			      niceness     ,
			      firstHostId  ,
			      buf             ,
			      replyBufMaxSize ,
			      freeReply       , // free reply buf?
			      true            , // do disk load balancing?
			      maxCacheAge     ,
			      //(key_t *)cacheKey        ,
			      // multicast uses it for determining the best
			      // host to send the request to when doing 
			      // disk load balancing. if the host has our 
			      // data cached, then it will probably get to
			      // handle the request. for now let's just assume
			      // this is a 96-bit key. TODO: fix...
			 0 , // *(key_t *)cacheKey        ,
			      rdbId           ,
			      minRecSizes     ) ) 
	{
		log(LOG_ERROR, "net: Failed to send request for data from %s in shard "
		    "#%" PRIu32" over network: %s.",
		    getDbnameFromId(m_rdbId),m_shardNum, mstrerror(g_errno));
		// but speed it up
		m_errno = g_errno;
		m->reset();
		if ( m_numRequests > 0 ) {
			logTrace( g_conf.m_logTraceMsg0, "END - returning false" );
			
			return false;
		}

		logTrace( g_conf.m_logTraceMsg0, "END - returning true" );
		return true;
	}

	m_numRequests++;

	// we blocked
	logTrace( g_conf.m_logTraceMsg0, "END - returning false, blocked" );
	return false;
}
void handleRequest22 ( UdpSlot *slot , int32_t netnice ) {
	// shortcut
	UdpServer *us = &g_udpServer;
	// get the request
	Msg22Request *r = (Msg22Request *)slot->m_readBuf;

	// sanity check
	int32_t  requestSize = slot->m_readBufSize;
	if ( requestSize < r->getMinSize() ) {
		log("db: Got bad request size of %" PRId32" bytes for title record. "
		    "Need at least 28.",  requestSize );
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( slot , EBADREQUESTSIZE );
		return;
	}

	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *tbase = getRdbBase( RDB_TITLEDB, r->m_collnum );
	if ( ! tbase ) {
		log("db: Could not get title rec in collection # %" PRId32" because rdbbase is null.", (int32_t)r->m_collnum);
		g_errno = EBADENGINEER;
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( slot , g_errno ); 
		return; 
	}

	// overwrite what is in there so niceness conversion algo works
	r->m_niceness = netnice;

	// if just checking tfndb, do not do the cache lookup in clusterdb
	if ( r->m_justCheckTfndb ) {
		r->m_maxCacheAge = 0;
	}

	g_titledb.getRdb()->readRequestGet  (requestSize);

	// breathe
	QUICKPOLL ( r->m_niceness);

	// sanity check
	if ( r->m_collnum < 0 ) { char *xx=NULL;*xx=0; }


	// make the state now
	State22 *st ;
	try { st = new (State22); }
	catch ( ... ) {
		g_errno = ENOMEM;
		log("query: Msg22: new(%" PRId32"): %s", (int32_t)sizeof(State22),
		mstrerror(g_errno));
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( slot , g_errno );
		return;
	}
	mnew ( st , sizeof(State22) , "Msg22" );

	// store ptr to the msg22request
	st->m_r = r;
	// save for sending back reply
	st->m_slot = slot;

	// then tell slot not to free it since m_r references it!
	// so we'll have to free it when we destroy State22
	st->m_slotAllocSize = slot->m_readBufMaxSize;
	st->m_slotReadBuf   = slot->m_readBuf;
	slot->m_readBuf = NULL;

	// . if docId was explicitly specified...
	// . we may get multiple tfndb recs
	if ( ! r->m_url[0] ) {
	   st->m_docId1 = r->m_docId;
	   st->m_docId2 = r->m_docId;
	}

	// but if we are requesting an available docid, it might be taken
	// so try the range
	if ( r->m_getAvailDocIdOnly ) {
	   int64_t pd = r->m_docId;
	   int64_t d1 = g_titledb.getFirstProbableDocId ( pd );
	   int64_t d2 = g_titledb.getLastProbableDocId  ( pd );
	   // sanity - bad url with bad subdomain?
	   if ( pd < d1 || pd > d2 ) { char *xx=NULL;*xx=0; }
	   // make sure we get a decent sample in titledb then in
	   // case the docid we wanted is not available
	   st->m_docId1 = d1;
	   st->m_docId2 = d2;
	}

	// . otherwise, url was given, like from Msg15
	// . we may get multiple tfndb recs
	if ( r->m_url[0] ) {
	   int32_t  dlen = 0;
	   // this causes ip based urls to be inconsistent with the call
	   // to getProbableDocId(url) below
	   char *dom  = getDomFast ( r->m_url , &dlen );
	   // bogus url?
	   if ( ! dom ) {
	       log("msg22: got bad url in request: %s from "
		   "hostid %" PRId32" for msg22 call ",
		   r->m_url,slot->m_host->m_hostId);
	       g_errno = EBADURL;
	       log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
	       us->sendErrorReply ( slot , g_errno );
	       mdelete ( st , sizeof(State22) , "Msg22" );
	       delete ( st );
	       return;
	   }
	   int64_t pd = g_titledb.getProbableDocId (r->m_url,dom,dlen);
	   int64_t d1 = g_titledb.getFirstProbableDocId ( pd );
	   int64_t d2 = g_titledb.getLastProbableDocId  ( pd );
	   // sanity - bad url with bad subdomain?
	   if ( pd < d1 || pd > d2 ) { char *xx=NULL;*xx=0; }
	   // store these
	   st->m_pd     = pd;
	   st->m_docId1 = d1;
	   st->m_docId2 = d2;
	   st->m_uh48   = hash64b ( r->m_url ) & 0x0000ffffffffffffLL;
	}

	QUICKPOLL ( r->m_niceness );

	// make the cacheKey ourself, since Msg5 would make the key wrong
	// since it would base it on startFileNum and numFiles
	key_t cacheKey ; cacheKey.n1 = 0; cacheKey.n0 = r->m_docId;
	// make titledb keys
	key_t startKey = g_titledb.makeFirstKey ( st->m_docId1 );
	key_t endKey   = g_titledb.makeLastKey  ( st->m_docId2 );

	// . load the list of title recs from disk now
	// . our file range should be solid
	// . use 500 million for min recsizes to get all in range
	if ( ! st->m_msg5.getList ( RDB_TITLEDB       ,
				    r->m_collnum ,
				    &st->m_tlist      ,
				    startKey          , // startKey
				    endKey            , // endKey
				    500000000         , // minRecSizes
				    true              , // includeTree
				    false,//r->m_addToCache   , // addToCache?
				    0,//r->m_maxCacheAge  , // max cache age
				    0,//startFileNum      ,
				    -1                 , // numFiles
				    st , // state             ,
				    gotTitleList      ,
				    r->m_niceness     ,
				    true              , // do error correct?
				    &cacheKey         ,
				    0                 , // retry num
				    -1                , // maxRetries
				    true              , // compensate for merge
				    -1LL              ) ) // sync point
		return ;

	// we did not block, nice... in cache?
	gotTitleList ( st , NULL , NULL );
}
// . destroys the slot if false is returned
// . this is registered in Msg1::set() to handle add rdb record msgs
// . seems like we should always send back a reply so we don't leave the
//   requester's slot hanging, unless he can kill it after transmit success???
// . TODO: need we send a reply back on success????
// . NOTE: Must always call g_udpServer::sendReply or sendErrorReply() so
//   read/send bufs can be freed
void handleRequest1 ( UdpSlot *slot , int32_t netnice ) {


	// extract what we read
	char *readBuf     = slot->m_readBuf;
	int32_t  readBufSize = slot->m_readBufSize;
	int32_t niceness = slot->m_niceness;

	// select udp server based on niceness
	UdpServer *us = &g_udpServer;
	// must at least have an rdbId
	if ( readBufSize <= 4 ) {
		g_errno = EREQUESTTOOSHORT;
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. Request too short", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( slot , g_errno );
		return;
	}
	char *p    = readBuf;
	char *pend = readBuf + readBufSize;
	// extract rdbId
	char rdbId = *p++;
	// get the rdb to which it belongs, use Msg0::getRdb()
	Rdb *rdb = getRdbFromId ( (char) rdbId );
	if ( ! rdb ) { 
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. Bad rdbid", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( slot, EBADRDBID ); 
		return;
	}
	// keep track of stats
	rdb->readRequestAdd ( readBufSize );
	// reset g_errno
	g_errno = 0;
	// are we injecting some title recs?
	bool injecting;
	if ( *p & 0x80 ) injecting = true;
	else             injecting = false;
	p++;
	// then collection
	//char *coll = p;
	//p += strlen (p) + 1;
	collnum_t collnum = *(collnum_t *)p;
	p += sizeof(collnum_t);
	// . make a list from this data
	// . skip over the first 4 bytes which is the rdbId
	// . TODO: embed the rdbId in the msgtype or something...
	RdbList list;
	// set the list
	list.set ( p        , // readBuf     + 4         ,
		   pend - p , // readBufSize - 4         ,
		   p        , // readBuf     + 4         ,
		   pend - p , // readBufSize - 4         ,
		   rdb->getFixedDataSize() ,
		   false                   ,  // ownData?
		   rdb->useHalfKeys()      ,
		   rdb->getKeySize ()      ); 
	// note it
	//log("msg1: handlerequest1 calling addlist niceness=%" PRId32,niceness);
	//log("msg1: handleRequest1 niceness=%" PRId32,niceness);
	// this returns false and sets g_errno on error
	rdb->addList ( collnum , &list , niceness);
	// if titledb, add tfndb recs to map the title recs
	//if ( ! g_errno && rdb == g_titledb.getRdb() && injecting ) 
	//	updateTfndb ( coll , &list , true, 0);
	// but if deleting a "new" and unforced record from spiderdb
	// then only delete tfndb record if it was tfn=255
	//if ( ! g_errno && rdb == g_spiderdb.getRdb() )
	//	updateTfndb2 ( coll , &list , false );
	// retry on some errors
	addedList ( slot , rdb );
}
Example #16
0
// . reply to a request for an RdbList
// . MUST call g_udpServer::sendReply or sendErrorReply() so slot can
//   be destroyed
void handleRequest0 ( UdpSlot *slot , int32_t netnice ) {
	logTrace( g_conf.m_logTraceMsg0, "BEGIN. Got request for an RdbList" );

	// if niceness is 0, use the higher priority udpServer
	UdpServer *us = &g_udpServer;
	//if ( netnice == 0 ) us = &g_udpServer2;
	// get the request
	char *request     = slot->m_readBuf;
	int32_t  requestSize = slot->m_readBufSize;
	// collection is now stored in the request, so i commented this out
	//if ( requestSize != MSG0_REQ_SIZE ) {
	//	log("net: Received bad data request size of %" PRId32" bytes. "
	//	    "Should be %" PRId32".", requestSize ,(int32_t)MSG0_REQ_SIZE);
	//	us->sendErrorReply ( slot , EBADREQUESTSIZE );
	//	return;
	//}
	// parse the request
	char *p                  = request;
	int64_t syncPoint          = *(int64_t *)p ; p += 8;
	//key_t     startKey           = *(key_t     *)p ; p += sizeof(key_t);
	//key_t     endKey             = *(key_t     *)p ; p += sizeof(key_t);
	int32_t      minRecSizes        = *(int32_t      *)p ; p += 4;
	int32_t      startFileNum       = *(int32_t      *)p ; p += 4;
	int32_t      numFiles           = *(int32_t      *)p ; p += 4;
	int32_t      maxCacheAge        = *(int32_t      *)p ; p += 4;
	char      rdbId              = *p++;
	char      addToCache         = *p++;
	char      doErrorCorrection  = *p++;
	char      includeTree        = *p++;
	// this was messing up our niceness conversion logic
	int32_t      niceness           = slot->m_niceness;//(int32_t)(*p++);
	// still need to skip it though!
	p++;
	bool      allowPageCache     = (bool)(*p++);
	char ks = getKeySizeFromRdbId ( rdbId );
	char     *startKey           = p; p+=ks;
	char     *endKey             = p; p+=ks;
	collnum_t collnum = *(collnum_t *)p; p += sizeof(collnum_t);

	CollectionRec *xcr = g_collectiondb.getRec ( collnum );
	if ( ! xcr ) g_errno = ENOCOLLREC;

	if( g_conf.m_logTraceMsg0 ) {
		logTrace( g_conf.m_logTraceMsg0, "rdbId....... %d", (int)rdbId );
		logTrace( g_conf.m_logTraceMsg0, "key size.... %d", (int)ks );
		logTrace( g_conf.m_logTraceMsg0, "startFileNum %" PRId32, startFileNum );
		logTrace( g_conf.m_logTraceMsg0, "numFiles.... %" PRId32, numFiles );
	}

	// error set from XmlDoc::cacheTermLists()?
	if ( g_errno ) {
		logTrace( g_conf.m_logTraceMsg0, "END. Invalid collection" );

		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( slot , EBADRDBID ); 
		return;
	}

	// . get the rdb we need to get the RdbList from
	// . returns NULL and sets g_errno on error
	//Msg0 msg0;
	//Rdb *rdb = msg0.getRdb ( rdbId );
	Rdb *rdb = getRdbFromId ( rdbId );
	if ( ! rdb ) {
		logTrace( g_conf.m_logTraceMsg0, "END. Invalid rdbId" );
		
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( slot , EBADRDBID ); 
		return;
	}

	// keep track of stats
	rdb->readRequestGet ( requestSize );

	// . do a local get
	// . create a msg5 to get the list
	State00 *st0 ;
	try { st0 = new (State00); }
	catch ( ... ) { 
		g_errno = ENOMEM;
		log("Msg0: new(%" PRId32"): %s", 
		    (int32_t)sizeof(State00),mstrerror(g_errno));
		    
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( slot , g_errno ); 
		return; 
	}
	mnew ( st0 , sizeof(State00) , "State00" );
	// timing debug
	if ( g_conf.m_logTimingNet )
		st0->m_startTime = gettimeofdayInMilliseconds();
	// save slot in state
	st0->m_slot = slot;
	// save udp server to send back reply on
	st0->m_us = us;
	// init this one
	st0->m_niceness = niceness;
	st0->m_rdbId    = rdbId;

	QUICKPOLL(niceness);

	// debug msg
	if ( maxCacheAge != 0 && ! addToCache ) {
		log( LOG_LOGIC, "net: msg0: check but don't add... rdbid=%" PRId32".", ( int32_t ) rdbId );
	}

	// . if this request came over on the high priority udp server
	//   make sure the priority gets passed along
	// . return if this blocks
	// . we'll call sendReply later
	if ( ! st0->m_msg5.getList ( rdbId             ,
				     collnum           ,
				     &st0->m_list      ,
				     startKey          ,
				     endKey            ,
				     minRecSizes       ,
				     includeTree       , // include tree?
				     addToCache        , // addToCache?
				     maxCacheAge       ,
				     startFileNum      , 
				     numFiles          ,
				     st0               ,
				     gotListWrapper    ,
				     niceness          ,
				     doErrorCorrection ,
				     NULL , // cacheKeyPtr
				     0    , // retryNum
				     2    , // maxRetries
				     true , // compensateForMerge
				     syncPoint ,
				     false,
				     allowPageCache ) ) {
		logTrace( g_conf.m_logTraceMsg0, "END. m_msg5.getList returned false" );
		return;
	}

	// call wrapper ouselves
	logTrace( g_conf.m_logTraceMsg0, "Calling gotListWrapper" );

	gotListWrapper ( st0 , NULL , NULL );

	logTrace( g_conf.m_logTraceMsg0, "END" );
}
Example #17
0
// . slot should be auto-nuked upon transmission or error
// . TODO: ensure if this sendReply() fails does it really nuke the slot?
void gotListWrapper ( void *state , RdbList *listb , Msg5 *msg5xx ) {
	logTrace( g_conf.m_logTraceMsg0, "BEGIN" );
	
	// get the state
	State00 *st0 = (State00 *)state;
	// extract the udp slot and list and msg5
	UdpSlot   *slot =  st0->m_slot;
	RdbList   *list = &st0->m_list;
	Msg5      *msg5 = &st0->m_msg5;
	UdpServer *us   =  st0->m_us;

	// timing debug
	if ( g_conf.m_logTimingNet || g_conf.m_logDebugNet ) {
		//log("Msg0:hndled request %" PRIu64,gettimeofdayInMilliseconds());
		int32_t size = -1;
		if ( list ) size     = list->getListSize();
		log(LOG_TIMING|LOG_DEBUG,
		    "net: msg0: Handled request for data. "
		    "Now sending data termId=%" PRIu64" size=%" PRId32
		    " transId=%" PRId32" ip=%s port=%i took=%" PRId64" "
		    "(niceness=%" PRId32").",
		    g_posdb.getTermId(msg5->m_startKey),
		    size,slot->m_transId,
		    iptoa(slot->m_ip),slot->m_port,
		    gettimeofdayInMilliseconds() - st0->m_startTime ,
		    st0->m_niceness );
	}

	// on error nuke the list and it's data
	if ( g_errno ) {
		mdelete ( st0 , sizeof(State00) , "Msg0" );
		delete (st0);
		// TODO: free "slot" if this send fails
		
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( slot , g_errno );
		return;
	}

	QUICKPOLL(st0->m_niceness);
	// point to the serialized list in "list"
	char *data      = list->getList();
	int32_t  dataSize  = list->getListSize();
	char *alloc     = list->getAlloc();
	int32_t  allocSize = list->getAllocSize();
	// tell list not to free the data since it is a reply so UdpServer
	// will free it when it destroys the slot
	list->setOwnData ( false );
	// keep track of stats
	Rdb *rdb = getRdbFromId ( st0->m_rdbId );
	if ( rdb ) rdb->sentReplyGet ( dataSize );
	// TODO: can we free any memory here???

	// keep track of how long it takes to complete the send
	st0->m_startTime = gettimeofdayInMilliseconds();
	// debug point
	int32_t oldSize = msg5->m_minRecSizes;
	int32_t newSize = msg5->m_minRecSizes + 20;
	// watch for wrap around
	if ( newSize < oldSize ) newSize = 0x7fffffff;
	if ( dataSize > newSize && list->getFixedDataSize() == 0 &&
	     // do not annoy me with these linkdb msgs
	     dataSize > newSize+100 ) 
		log(LOG_LOGIC,"net: msg0: Sending more data than what was "
		    "requested. Ineffcient. Bad engineer. dataSize=%" PRId32" "
		    "minRecSizes=%" PRId32".",dataSize,oldSize);
		    
	//
	// for linkdb lists, remove all the keys that have the same IP32
	// and store a count of what we removed somewhere
	//
	if ( st0->m_rdbId == RDB_LINKDB ) {
		// store compressed list on itself
		char *dst = list->m_list;
		// keep stats
		int32_t totalOrigLinks = 0;
		int32_t ipDups = 0;
		int32_t lastIp32 = 0;
		char *listEnd = list->getListEnd();
		// compress the list
		for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) {
			// breathe
			QUICKPOLL ( st0->m_niceness );
			// count it
			totalOrigLinks++;
			// get rec
			char *rec = list->getCurrentRec();
			int32_t ip32 = g_linkdb.getLinkerIp_uk((key224_t *)rec );
			// same as one before?
			if ( ip32 == lastIp32 && 
			     // are we the last rec? include that for
			     // advancing the m_nextKey in Linkdb more 
			     // efficiently.
			     rec + LDBKS < listEnd ) {
				ipDups++;
				continue;
			}
			// store it
			gbmemcpy (dst , rec , LDBKS );
			dst += LDBKS;
			// update it
			lastIp32 = ip32;
		}
		// . if we removed one key, store the stats
		// . caller should recognize reply is not a multiple of
		//   the linkdb key size LDBKS and no its there!
		if ( ipDups ) {
			//*(int32_t *)dst = totalOrigLinks;
			//dst += 4;
			//*(int32_t *)dst = ipDups;
			//dst += 4;
		}
		// update list parms
		list->m_listSize = dst - list->m_list;
		list->m_listEnd  = list->m_list + list->m_listSize;
		data      = list->getList();
		dataSize  = list->getListSize();
	}


	//log("sending replySize=%" PRId32" min=%" PRId32,dataSize,msg5->m_minRecSizes);
	// . TODO: dataSize may not equal list->getListMaxSize() so
	//         Mem class may show an imblanace
	// . now g_udpServer is responsible for freeing data/dataSize
	// . the "true" means to call doneSending_ass() from the signal handler
	//   if need be
	st0->m_us->sendReply_ass( data, dataSize, alloc, allocSize, slot, st0, doneSending_ass, -1, -1, true );

	logTrace( g_conf.m_logTraceMsg0, "END" );
}	
// rcp our files to a new host (was a spare)
void handleRequest55 ( UdpSlot *slot , long netnice ) {
	// who should we copy to?
	char *p    = slot->m_readBuf;
	long  size = slot->m_readBufSize;
	UdpServer *us = &g_udpServer;
	// get hostid
	long hostId = -1;
	if ( size == 4 ) hostId = *(long *)p;
	// panic?
	if ( hostId < 0 || hostId >= g_hostdb.getNumHosts() ) {
		us->sendErrorReply ( slot , EBADENGINEER );
		return; 
	}
	// set this. RdbBase will turn off all merging
	Host *h = g_hostdb.getHost ( hostId );
	// only one at a time now!
	if ( g_hostdb.m_syncHost ) { 
		log ( "sync: Already syncing with another host. Aborting.");
		us->sendErrorReply ( slot , EBADENGINEER );
		return; 
	}
	// can't sync with ourselves
	if ( h == g_hostdb.m_myHost ) { 
		log ( "sync: Cannot sync with yourself. Aborting.");
		us->sendErrorReply ( slot , EBADENGINEER );
		return; 
	}
	if ( ! g_hostdb.isDead ( h ) ) {
		log ( "sync: Cannot sync live host. Aborting.");
		us->sendErrorReply ( slot , EBADENGINEER );
		return; 
	}
	Host *me = g_hostdb.m_myHost;
	if ( me->m_isPermanentOutOfSync ) {
		log ( "sync: Src host is permanently out of sync. Aborting.");
		us->sendErrorReply ( slot , EBADENGINEER );
		return; 
	}
	// now check it for a clean directory
	long ip = h->m_ip;
	// if that is dead use ip #2
	if ( h->m_ping        >= g_conf.m_deadHostTimeout &&
	     h->m_pingShotgun <  g_conf.m_deadHostTimeout   ) 
		ip = h->m_ipShotgun;
	char *ips = iptoa(ip);
	char cmd[1024];
	sprintf ( cmd, "ssh %s \"cd %s; du -b | tail -n 1\" > ./synccheck.txt",
		  ips, h->m_dir );
	log ( LOG_INFO, "init: %s", cmd );
	system(cmd);
	long fd = open ( "./synccheck.txt", O_RDONLY );
	if ( fd < 0 ) {
		log( "sync: Unable to open synccheck.txt. Aborting.");
		us->sendErrorReply ( slot , EBADENGINEER );
		return; 
	}

	long len = read ( fd, cmd, 1023 );
	cmd[len] = '\0';
	close(fd);
	// delete the file to make sure we don't reuse it
	system ( "rm ./synccheck.txt" );
	// check the size
	long checkSize = atol(cmd);
	if ( checkSize > 4096 || checkSize <= 0 ) {
		log("sync: Detected %li bytes in directory to "
		    "sync.  Must be empty.  Is .antiword dir in "
		    "there?", checkSize);
		us->sendErrorReply ( slot , EBADENGINEER );
		return; 
	}
	// RdbBase will not start any new merges if this m_syncHost is set
	g_hostdb.m_syncHost = h;
        // set this flag
	g_hostdb.m_syncHost->m_doingSync = 1;
}
// returns true if all done, false if waiting for more replies
bool Msg12::gotLockReply ( UdpSlot *slot ) {

	// no longer use this
	char *xx=NULL;*xx=0;

	// got reply
	m_numReplies++;
	// don't let udpserver free the request, it's our m_request[]
	slot->m_sendBufAlloc = NULL;
	// check for a hammer reply
	char *reply     = slot->m_readBuf;
	int32_t  replySize = slot->m_readBufSize;
	// if error, treat as a not grant
	if ( g_errno ) {
		bool logIt = true;
		// note it
		if ( g_conf.m_logDebugSpider )
			log("spider: got msg12 reply error = %s",
			    mstrerror(g_errno));
		// if we got an ETRYAGAIN when trying to confirm our lock
		// that means doledb was saving/dumping to disk and we 
		// could not remove the record from doledb and add an
		// entry to the waiting tree, so we need to keep trying
		if ( g_errno == ETRYAGAIN && m_confirming ) {
			// c ount it again
			m_numRequests++;
			// use what we were using
			char *request     = (char *)&m_confirmRequest;
			int32_t  requestSize = sizeof(ConfirmRequest);
			Host *h = g_hostdb.getHost(slot->m_hostId);
			// send request to him
			UdpServer *us = &g_udpServer;
			if ( ! us->sendRequest ( request      ,
						 requestSize  ,
						 0x12         , // msgType
						 h->m_ip      ,
						 h->m_port    ,
						 h->m_hostId  ,
						 NULL         , // retSlotPtrPt
						 this         , // state data
						 gotLockReplyWrapper ,
						 udpserver_sendrequest_infinite_timeout ) ) 
				return false;
			// error?
			// don't spam the log!
			static int32_t s_last = 0;
			int32_t now = getTimeLocal();
			if ( now - s_last >= 1 ) {
				s_last = now;
				log("spider: error re-sending confirm "
				    "request: %s",  mstrerror(g_errno));
			}
		}
		// only log every 10 seconds for ETRYAGAIN
		if ( g_errno == ETRYAGAIN ) {
			static time_t s_lastTime = 0;
			time_t now = getTimeLocal();
			logIt = false;
			if ( now - s_lastTime >= 3 ) {
				logIt = true;
				s_lastTime = now;
			}
		}
		if ( logIt )
			log ( "sploop: host had error getting lock url=%s"
			      ": %s" ,
			      m_url,mstrerror(g_errno) );
	}
	// grant or not
	if ( replySize == 1 && ! g_errno && *reply == 1 ) m_grants++;
	// wait for all to get back
	if ( m_numReplies < m_numRequests ) return false;
	// all done if we were removing
	if ( m_removing ) {
		// note it
		if ( g_conf.m_logDebugSpider )
		      logf(LOG_DEBUG,"spider: done removing all locks "
			   "(replies=%" PRId32") for %s",
			   m_numReplies,m_url);//m_sreq->m_url);
		// we are done
		m_gettingLocks = false;
		return true;
	}
	// all done if we were confirming
	if ( m_confirming ) {
		// note it
		if ( g_conf.m_logDebugSpider )
		      logf(LOG_DEBUG,"spider: done confirming all locks "
			   "for %s uh48=%" PRId64,m_url,m_origUh48);//m_sreq->m_url);
		// we are done
		m_gettingLocks = false;
		// . keep processing
		// . if the collection was nuked from under us the spiderUrl2
		//   will return true and set g_errno
		if ( ! m_callback ) return g_spiderLoop.spiderUrl2();
		// if we had a callback let our parent call it
		return true;
	}

	// if got ALL locks, spider it
	if ( m_grants == m_numReplies ) {
		// note it
		if ( g_conf.m_logDebugSpider )
		      logf(LOG_DEBUG,"spider: got lock for docid=lockkey=%" PRIu64,
			   m_lockKeyUh48);
		// flag this
		m_hasLock = true;
		// we are done
		//m_gettingLocks = false;


		///////
		//
		// now tell our group (shard) to remove from doledb
		// and re-add to waiting tree. the evalIpLoop() function
		// should skip this probable docid because it is in the 
		// LOCK TABLE!
		//
		// This logic should allow us to spider multiple urls
		// from the same IP at the same time.
		//
		///////

		// returns false if would block
		if ( ! confirmLockAcquisition ( ) ) return false;
		// . we did it without blocking, maybe cuz we are a single node
		// . ok, they are all back, resume loop
		// . if the collection was nuked from under us the spiderUrl2
		//   will return true and set g_errno
		if ( ! m_callback ) g_spiderLoop.spiderUrl2 ( );
		// all done
		return true;

	}
	// note it
	if ( g_conf.m_logDebugSpider )
		logf(LOG_DEBUG,"spider: missed lock for %s lockkey=%" PRIu64" "
		     "(grants=%" PRId32")",   m_url,m_lockKeyUh48,m_grants);

	// . if it was locked by another then add to our lock cache so we do
	//   not try to lock it again
	// . if grants is not 0 then one host granted us the lock, but not
	//   all hosts, so we should probably keep trying on it until it is
	//   locked up by one host
	if ( m_grants == 0 ) {
		int32_t now = getTimeGlobal();
		g_spiderLoop.m_lockCache.addLong(0,m_lockKeyUh48,now,NULL);
	}

	// reset again
	m_numRequests = 0;
	m_numReplies  = 0;
	// no need to remove them if none were granted because another
	// host in our group might have it 100% locked. 
	if ( m_grants == 0 ) {
		// no longer in locks operation mode
		m_gettingLocks = false;
		// ok, they are all back, resume loop
		//if ( ! m_callback ) g_spiderLoop.spiderUrl2 ( );
		// all done
		return true;
	}
	// note that
	if ( g_conf.m_logDebugSpider )
		logf(LOG_DEBUG,"spider: sending request to all in shard to "
		     "remove lock uh48=%" PRIu64". grants=%" PRId32,
		     m_lockKeyUh48,(int32_t)m_grants);
	// remove all locks we tried to get, BUT only if from our hostid!
	// no no! that doesn't quite work right... we might be the ones
	// locking it! i.e. another one of our spiders has it locked...
	if ( ! removeAllLocks ( ) ) return false; // true;
	// if did not block, how'd that happen?
	log("sploop: did not block in removeAllLocks: %s",mstrerror(g_errno));
	return true;
}
void handleRequest22 ( UdpSlot *slot , long netnice ) {
	// shortcut
	UdpServer *us = &g_udpServer;
	// get the request
	Msg22Request *r = (Msg22Request *)slot->m_readBuf;
       // get this
	//char *coll = g_collectiondb.getCollName ( r->m_collnum );

	// sanity check
	long  requestSize = slot->m_readBufSize;
	if ( requestSize < r->getMinSize() ) {
		log("db: Got bad request size of %li bytes for title record. "
		    "Need at least 28.",  requestSize );
		us->sendErrorReply ( slot , EBADREQUESTSIZE );
		return;
	}

	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *tbase; 
	if ( ! (tbase=getRdbBase(RDB_TITLEDB,r->m_collnum) ) ) {
		log("db: Could not get title rec in collection # %li "
		    "because rdbbase is null.",
		    (long)r->m_collnum);
		g_errno = EBADENGINEER;
		us->sendErrorReply ( slot , g_errno ); 
		return; 
	}

	// overwrite what is in there so niceness conversion algo works
	r->m_niceness = netnice;

	// if just checking tfndb, do not do the cache lookup in clusterdb
	if ( r->m_justCheckTfndb ) r->m_maxCacheAge = 0;

	// keep track of stats
	//if    (r->m_justCheckTfndb)
	//       g_tfndb.getRdb()->readRequestGet(requestSize);
	//      else    
       g_titledb.getRdb()->readRequestGet  (requestSize);

       // breathe
       QUICKPOLL ( r->m_niceness);

       // sanity check
       if ( r->m_collnum < 0 ) { char *xx=NULL;*xx=0; }


       // make the state now
       State22 *st ;
       try { st = new (State22); }
       catch ( ... ) {
	       g_errno = ENOMEM;
	       log("query: Msg22: new(%i): %s", sizeof(State22),
		   mstrerror(g_errno));
	       us->sendErrorReply ( slot , g_errno );
	       return;
       }
       mnew ( st , sizeof(State22) , "Msg22" );
       
       // store ptr to the msg22request
       st->m_r = r;
       // save for sending back reply
       st->m_slot = slot;

       // then tell slot not to free it since m_r references it!
       // so we'll have to free it when we destroy State22
       st->m_slotAllocSize = slot->m_readBufMaxSize;
       st->m_slotReadBuf   = slot->m_readBuf;
       slot->m_readBuf = NULL;

       // . make the keys for getting recs from tfndb
       // . url recs map docid to the title file # that contains the titleRec
       //key_t uk1 ;
       //key_t uk2 ;
       // . if docId was explicitly specified...
       // . we may get multiple tfndb recs
       if ( ! r->m_url[0] ) {
	       // there are no del bits in tfndb
	       //uk1 = g_tfndb.makeMinKey ( r->m_docId );
	       //uk2 = g_tfndb.makeMaxKey ( r->m_docId );
	       st->m_docId1 = r->m_docId;
	       st->m_docId2 = r->m_docId;
       }

       // but if we are requesting an available docid, it might be taken
       // so try the range
       if ( r->m_getAvailDocIdOnly ) {
	       long long pd = r->m_docId;
	       long long d1 = g_titledb.getFirstProbableDocId ( pd );
	       long long d2 = g_titledb.getLastProbableDocId  ( pd );
	       // sanity - bad url with bad subdomain?
	       if ( pd < d1 || pd > d2 ) { char *xx=NULL;*xx=0; }
	       // make sure we get a decent sample in titledb then in 
	       // case the docid we wanted is not available
	       st->m_docId1 = d1;
	       st->m_docId2 = d2;
       }

       // . otherwise, url was given, like from Msg15
       // . we may get multiple tfndb recs
       if ( r->m_url[0] ) {
	       long  dlen = 0;
	       // this causes ip based urls to be inconsistent with the call
	       // to getProbableDocId(url) below
	       char *dom  = getDomFast ( r->m_url , &dlen );
	       // bogus url?
	       if ( ! dom ) {
		       log("msg22: got bad url in request: %s",r->m_url);
		       g_errno = EBADURL;
		       us->sendErrorReply ( slot , g_errno ); 
		       mdelete ( st , sizeof(State22) , "Msg22" );
		       delete ( st );
		       return; 
	       }
	       long long pd = g_titledb.getProbableDocId (r->m_url,dom,dlen);
	       long long d1 = g_titledb.getFirstProbableDocId ( pd );
	       long long d2 = g_titledb.getLastProbableDocId  ( pd );
	       // sanity - bad url with bad subdomain?
	       if ( pd < d1 || pd > d2 ) { char *xx=NULL;*xx=0; }
	       // there are no del bits in tfndb
	       //uk1 = g_tfndb.makeMinKey ( d1 );
	       //uk2 = g_tfndb.makeMaxKey ( d2 );
	       // store these
	       st->m_pd     = pd;
	       st->m_docId1 = d1;
	       st->m_docId2 = d2;
	       st->m_uh48   = hash64b ( r->m_url ) & 0x0000ffffffffffffLL;
       }
       
       QUICKPOLL ( r->m_niceness );
       
       /*
       // shortcut
       Rdb *tdb = g_titledb.getRdb();

       // init this
       st->m_tfn2 = -1;
       // skip tfndb lookup if we can. saves some time.
       if ( g_conf.m_readOnlyMode && 
	    // must not be a *url* lookup, it must be a docid lookup
	    ! r->m_url[0] &&
	    // tree must be empty too i guess
	    tdb->getTree()->getNumUsedNodes() ==0 ) {
	       // the RdbBase contains the BigFiles for tfndb
	       RdbBase *base = tdb->m_bases[r->m_collnum];
	       // can only have one titledb file
	       if ( base->getNumFiles() == 1 ) {
		       // now we can get RdbBase
		       st->m_tfn2 = base->m_fileIds2[0];
		       // sanity check
		       if ( st->m_tfn2 < 0 ) { char *xx = NULL; *xx = 0; }
	       }
       }

       // check the tree for this docid
       RdbTree *tt = tdb->getTree();
       // make titledb keys
       key_t startKey = g_titledb.makeFirstKey ( st->m_docId1 );
       key_t endKey   = g_titledb.makeLastKey  ( st->m_docId2 );
       long  n        = tt->getNextNode ( r->m_collnum , startKey );
       
       // there should only be one match, one titlerec per docid!
       for ( ; n >= 0 ; n = tt->getNextNode ( n ) ) {
	       // break if collnum does not match. we exceeded our tree range.
	       if ( tt->getCollnum ( n ) != r->m_collnum ) break;
	       // get the key of this node
	       key_t k = *(key_t *)tt->getKey(n);
	       // if passed limit, break out, no match
	       if ( k > endKey ) break;
	       // if we had a url make sure uh48 matches
	       if ( r->m_url[0] ) {
		       // get it
		       long long uh48 = g_titledb.getUrlHash48(&k);
		       // sanity check
		       if ( st->m_uh48 == 0 ) { char *xx=NULL;*xx=0; }
		       // we must match this exactly
		       if ( uh48 != st->m_uh48 ) continue;
	       }
	       // . if we matched a negative key, then skip
	       // . just break out here and enter the normal logic
	       // . it should load tfndb and find that it is not in tfndb
	       //   because when you add a negative key to titledb in
	       //   Rdb::addList, it adds a negative rec to tfndb immediately
	       // . NO! because we add the negative key to the tree when we
	       //   delete the old titledb rec, then we add the new one!
	       //   when a negative key is added Rdb::addRecord() removes
	       //   the positive key (and vice versa) from the tree.
	       if ( KEYNEG((char *)&k) ) continue;
	       // if just checking for its existence, we are done
	       if ( r->m_justCheckTfndb ) {
		       us->sendReply_ass ( NULL,0,NULL,0,slot);
		       // don't forget to free the state
		       mdelete ( st , sizeof(State22) , "Msg22" );
		       delete ( st );
		       return;
	       }
	       // ok, we got a match, return it
	       char *data     = tt->getData     ( n );
	       long  dataSize = tt->getDataSize ( n );
	       // wierd!
	       if ( dataSize == 0 ) { char *xx=NULL;*xx=0; }
	       // send the whole rec back
	       long need = 12 + 4 + dataSize;
	       // will this copy it? not!
	       char *buf = (char *)mmalloc ( need , "msg22t" );
	       if ( ! buf ) {
		       us->sendErrorReply ( slot , g_errno ); 
		       mdelete ( st , sizeof(State22) , "Msg22" );
		       delete ( st );
		       return; 
	       }
	       // log it
	       if ( g_conf.m_logDebugSpider )
		       logf(LOG_DEBUG,"spider: found %s in titledb tree",
			    r->m_url);
	       // store in the buf for sending
	       char *p = buf;
	       // store key
	       *(key_t *)p = k; p += sizeof(key_t);
	       // then dataSize
	       *(long *)p = dataSize; p += 4;
	       // then the data
	       memcpy ( p , data , dataSize ); p += dataSize;
	       // send off the record
	       us->sendReply_ass (buf, need,buf, need,slot);
	       // don't forget to free the state
	       mdelete ( st , sizeof(State22) , "Msg22" );
	       delete ( st );
	       return;
       }

       // if we did not need to consult tfndb cuz we only have one file
       if ( st->m_tfn2 >= 0 ) {
	       gotUrlListWrapper ( st , NULL , NULL );
	       return;
       }

       // . get the list of url recs for this docid range
       // . this should not block, tfndb SHOULD all be in memory all the time
       // . use 500 million for min recsizes to get all in range
       // . no, using 500MB causes problems for RdbTree::getList, so use
       //   100k. how many recs can there be?
       if ( ! st->m_msg5.getList ( RDB_TFNDB         ,
				   coll              ,
				   &st->m_ulist      ,
				   uk1               , // startKey
				   uk2               , // endKey
				   // use 0x7fffffff preceisely because it
				   // will determine eactly how long the
				   // tree list needs to allocate in Msg5.cpp
				   0x7fffffff        , // minRecSizes
				   true              , // includeTree?
				   false             , // addToCache?
				   0                 , // max cache age
				   0                 , // startFileNum
				   -1                , // numFiles (-1 =all)
				   st                ,
				   gotUrlListWrapper ,
				   r->m_niceness     ,
				   true              ))// error correction?
	       return ;
       // we did not block
       gotUrlListWrapper ( st , NULL , NULL );
}

static void gotTitleList ( void *state , RdbList *list , Msg5 *msg5 ) ;

void gotUrlListWrapper ( void *state , RdbList *list , Msg5 *msg5 ) {
	// shortcuts
	State22   *st = (State22 *)state;
	UdpServer *us = &g_udpServer;

	// bail on error
	if ( g_errno ) { 
		log("db: Had error getting info from tfndb: %s.",
		    mstrerror(g_errno));
		log("db: uk1.n1=%li n0=%lli uk2.n1=%li n0=%lli "
		    "d1=%lli d2=%lli.",
		    ((key_t *)st->m_msg5.m_startKey)->n1 ,
		    ((key_t *)st->m_msg5.m_startKey)->n0 ,
		    ((key_t *)st->m_msg5.m_endKey)->n1   ,
		    ((key_t *)st->m_msg5.m_endKey)->n0   ,
		    st->m_docId1 ,
		    st->m_docId2 );
		us->sendErrorReply ( st->m_slot , g_errno ); 
		mdelete ( st , sizeof(State22) , "Msg22" );
		delete ( st );
		return;
	}

	// shortcuts
	RdbList      *ulist = &st->m_ulist;
	Msg22Request *r     = st->m_r;
	char         *coll  = g_collectiondb.getCollName ( r->m_collnum );

	// point to top just in case
	ulist->resetListPtr();

	// get base, returns NULL and sets g_errno to ENOCOLLREC on error
	RdbBase *tbase = getRdbBase(RDB_TITLEDB,coll);

	// set probable docid
	long long pd = 0LL;
	if ( r->m_url[0] ) {
		pd = g_titledb.getProbableDocId(r->m_url);
		// sanity
		if ( pd != st->m_pd ) { char *xx=NULL;*xx=0; }
	}

	// . these are both meant to be available docids
	// . if ad2 gets exhausted we use ad1
	long long ad1 = st->m_docId1;
	long long ad2 = pd;


	long tfn = -1;
	// sanity check. make sure did not load from tfndb if did not need to
	if ( ! ulist->isExhausted() && st->m_tfn2 >= 0 ) {char *xx=NULL;*xx=0;}
	// if only one titledb file and none in memory use it
	if ( st->m_tfn2 >= 0 ) tfn = st->m_tfn2;

	// we may have multiple tfndb recs but we should NEVER have to read
	// multiple titledb files...
	for ( ; ! ulist->isExhausted() ; ulist->skipCurrentRecord() ) {
		// breathe
		QUICKPOLL ( r->m_niceness );
		// get first rec
		key_t k = ulist->getCurrentKey();
		// . skip negative keys
		// . seems to happen when we have tfndb in the tree...
		if ( KEYNEG((char *)&k) ) continue;

		// if we have a url and no docid, we gotta check uh48!
		if ( r->m_url[0] && g_tfndb.getUrlHash48(&k)!=st->m_uh48){
			// get docid of that guy
			long long dd = g_tfndb.getDocId(&k);
			// if matches avail docid, inc it
			if ( dd == ad1 ) ad1++;
			if ( dd == ad2 ) ad2++;
			// try next tfndb key
			continue;
		}
		// . get file num this rec is stored in
		// . this is updated right after the file num is merged by
		//   scanning all records in tfndb. this is very quick if all
		//   of tfndb is in memory, otherwise, it might take a few
		//   seconds. update call done in RdbMerge::incorporateMerge().
		tfn = g_tfndb.getTfn ( &k );
		// i guess we got a good match!
		break;
	}

	// sanity check. 255 used to mean in spiderdb or in tree
	if ( tfn >= 255 ) { char *xx=NULL;*xx=0; }


	// maybe no available docid if we breached our range
	if ( ad1 >= pd           ) ad1 = 0LL;
	if ( ad2 >  st->m_docId2 ) ad2 = 0LL;
	// get best
	long long ad = ad2;
	// but wrap around if we need to
	if ( ad == 0LL ) ad = ad1;

	// breathe
	QUICKPOLL ( r->m_niceness);

	// . log if different
	// . if our url rec was in there, this could still be different
	//   if there was another url rec in there with the same docid and
	//   a diferent extension, but with a tfn of 255, meaning that it
	//   is just in spiderdb and not in titledb yet. so it hasn't been
	//   assigned a permanent docid...
	// . another way "ad" may be different now is from the old bug which
	//   did not chain the docid properly because it limited the docid
	//   chaining to one titleRec file. so conceivably we can have 
	//   different docs sharing the same docids, but with different 
	//   url hash extensions. for instance, on host #9 we have:
	//   00f3b2ff63aec3a9 docId=261670033643 e=0x58 tfn=117 clean=0 half=0 
	//   00f3b2ff63af66c9 docId=261670033643 e=0x6c tfn=217 clean=0 half=0 
	// . Msg16 will only use the avail docid if the titleRec is not found
	if ( r->m_url[0] && pd != ad ) {
		//log(LOG_INFO,"build: Docid %lli collided. %s Changing "
		//
		// http://www.airliegardens.org/events.asp?dt=2&date=8/5/2011
		//
		// COLLIDES WITH
		//
		// http://www.bbonline.com/i/chicago.html
		//
		// collision alert!
		log("spider: Docid %lli collided. %s Changing "
		    "to %lli.", r->m_docId , r->m_url , ad );
		// debug this for now
		//char *xx=NULL;*xx=0; 
	}

	// remember it
	st->m_availDocId = ad;

	// if tfn is -1 then it was not in titledb
	if ( tfn == -1 ) { 
		// store docid in reply
		char *p = st->m_slot->m_tmpBuf;
		// send back the available docid
		*(long long *)p = ad;
		// send it
		us->sendReply_ass ( p , 8 , p , 8 , st->m_slot );
		// don't forget to free state
		mdelete ( st , sizeof(State22) , "Msg22" );
		delete ( st );
		return;
	}

	// sanity
	if ( tfn < 0 ) { char *xx=NULL;*xx=0; }

	// breathe
	QUICKPOLL ( r->m_niceness );

	// ok, if just "checking tfndb" no need to go further
	if ( r->m_justCheckTfndb ) {
		// send back a good reply (empty means found!)
		us->sendReply_ass ( NULL,0,NULL,0,st->m_slot);
		// don't forget to free the state
		mdelete ( st , sizeof(State22) , "Msg22" );
		delete ( st );
		return;
	}

	// . compute the file scan range
	// . tfn is now equivalent to Rdb's id2, a secondary file id, it
	//   follows the hyphen in "titledb0001-023.dat"
	// . default to just scan the root file AND the tree, cuz we're
	//   assuming restrictToRoot was set to true so we did not get a tfndb
	//   list
	// . even if a file number is given, always check the tree in case
	//   it got re-spidered
	// . shit, but we can still miss it if it gets dumped right after
	//   our thread is spawned, in which case we'd fall back to the old
	//   version. no. because if its in the tree now we get it before
	//   spawning a thread. there is no blocking. TRICKY. so if it is in 
	//   the tree at this point we'll get it, but may end up scanning the
	//   file with the older version of the doc... not too bad.
	long startFileNum = tbase->getFileNumFromId2 ( tfn );

	// if tfn refers to a missing titledb file...
	if ( startFileNum < 0 ) {
		if ( r->m_url[0] ) log("db: titledb missing url %s",r->m_url);
		else        log("db: titledb missing docid %lli", r->m_docId);
		us->sendErrorReply ( st->m_slot,ENOTFOUND );
		mdelete ( st , sizeof(State22) , "Msg22" );
		delete ( st ); 
		return ;
	}

	// save this
	st->m_tfn = tfn;
	*/
	// make the cacheKey ourself, since Msg5 would make the key wrong
	// since it would base it on startFileNum and numFiles
	key_t cacheKey ; cacheKey.n1 = 0; cacheKey.n0 = r->m_docId;
	// make titledb keys
	key_t startKey = g_titledb.makeFirstKey ( st->m_docId1 );
	key_t endKey   = g_titledb.makeLastKey  ( st->m_docId2 );

	// . load the list of title recs from disk now
	// . our file range should be solid
	// . use 500 million for min recsizes to get all in range
	if ( ! st->m_msg5.getList ( RDB_TITLEDB       ,
				    r->m_collnum ,
				    &st->m_tlist      ,
				    startKey          , // startKey
				    endKey            , // endKey
				    500000000         , // minRecSizes
				    true              , // includeTree
				    false,//r->m_addToCache   , // addToCache?
				    0,//r->m_maxCacheAge  , // max cache age
				    0,//startFileNum      ,
				    -1                 , // numFiles
				    st , // state             ,
				    gotTitleList      ,
				    r->m_niceness     ,
				    true              , // do error correct?
				    &cacheKey         ,
				    0                 , // retry num
				    -1                , // maxRetries
				    true              , // compensate for merge
				    -1LL              , // sync point
				    &st->m_msg5b      ) ) return ;

	// we did not block, nice... in cache?
	gotTitleList ( st , NULL , NULL );
}
bool Msg12::confirmLockAcquisition ( ) {

	// ensure not in use. not msg12 replies outstanding.
	if ( m_numRequests != m_numReplies ) { char *xx=NULL;*xx=0; }

	// no longer use this
	char *xx=NULL;*xx=0;

	// we are now removing 
	m_confirming = true;

	// make that the request
	// . point to start of the 12 byte request buffer
	// . m_lockSequence should still be valid
	ConfirmRequest *cq = &m_confirmRequest;
	char *request     = (char *)cq;
	int32_t  requestSize = sizeof(ConfirmRequest);
	// sanity
	if ( requestSize == sizeof(LockRequest)){ char *xx=NULL;*xx=0; }
	// set it
	cq->m_collnum   = m_collnum;
	cq->m_doledbKey = m_doledbKey;
	cq->m_firstIp   = m_firstIp;
	cq->m_lockKeyUh48 = m_lockKeyUh48;
	cq->m_maxSpidersOutPerIp = m_maxSpidersOutPerIp;
	// . use the locking group from when we sent the lock request
	// . get ptr to list of hosts in the group
	//Host *hosts = g_hostdb.getGroup ( m_lockGroupId );
	// the same group (shard) that has the spiderRequest/Reply is
	// the one responsible for locking.
	Host *hosts = g_hostdb.getMyShard();
	// this must select the same shard that is going to spider it!
	// i.e. our shard! because we check our local lock table to see
	// if a doled url is locked before spidering it ourselves.
	//Host *hosts = g_hostdb.getMyShard();
	// shortcut
	UdpServer *us = &g_udpServer;
	// get # of hosts in each mirror group
	int32_t hpg = g_hostdb.getNumHostsPerShard();
	// reset counts
	m_numRequests = 0;
	m_numReplies  = 0;
	// note it
	if ( g_conf.m_logDebugSpider )
		log("spider: confirming lock for uh48=%" PRIu64" firstip=%s",
		    m_lockKeyUh48,iptoa(m_firstIp));
	// loop over hosts in that shard
	for ( int32_t i = 0 ; i < hpg ; i++ ) {
		// get a host
		Host *h = &hosts[i];
		// skip if dead! no need to get a reply from dead guys
		if ( g_hostdb.isDead ( h ) ) continue;
		// send request to him
		if ( ! us->sendRequest ( request      ,
					 // a size of 2 should mean confirm
					 requestSize  ,
					 0x12         , // msgType
					 h->m_ip      ,
					 h->m_port    ,
					 h->m_hostId  ,
					 NULL         , // retSlotPtrPtr
					 this         , // state data
					 gotLockReplyWrapper ,
					 udpserver_sendrequest_infinite_timeout ) ) 
			// udpserver returns false and sets g_errno on error
			return true;
		// count them
		m_numRequests++;
	}
	// block?
	if ( m_numRequests > 0 ) return false;
	// did not block
	return true;
}
void gotTitleList ( void *state , RdbList *list , Msg5 *msg5 ) {

	State22 *st = (State22 *)state;
	// if niceness is 0, use the higher priority udpServer
	UdpServer *us = &g_udpServer;
	// shortcut
	Msg22Request *r = st->m_r;
	// breathe
	QUICKPOLL(r->m_niceness);

	// send error reply on error
	if ( g_errno ) { 
	hadError:
		log("db: Had error getting title record from titledb: %s.",
		    mstrerror(g_errno));
		if ( ! g_errno ) { char *xx=NULL;*xx=0; }
		us->sendErrorReply ( st->m_slot , g_errno ); 
		mdelete ( st , sizeof(State22) , "Msg22" );
		delete ( st ); 
		return ;
	}

	// convenience var
	RdbList *tlist = &st->m_tlist;

	// set probable docid
	long long pd = 0LL;
	if ( r->m_url[0] ) {
		pd = g_titledb.getProbableDocId(r->m_url);
		if ( pd != st->m_pd ) { 
			log("db: crap probable docids do not match! u=%s",
			    r->m_url);
			g_errno = EBADENGINEER;
			goto hadError;
		}
		// sanity
		//if ( pd != st->m_pd ) { char *xx=NULL;*xx=0; }
	}

	// the probable docid is the PREFERRED docid in this case
	if ( r->m_getAvailDocIdOnly ) pd = st->m_r->m_docId;

	// . these are both meant to be available docids
	// . if ad2 gets exhausted we use ad1
	long long ad1 = st->m_docId1;
	long long ad2 = pd;


	bool docIdWasFound = false;

	// scan the titleRecs in the list
	for ( ; ! tlist->isExhausted() ; tlist->skipCurrentRecord ( ) ) {
		// breathe
		QUICKPOLL ( r->m_niceness );
		// get the rec
		char *rec     = tlist->getCurrentRec();
		long  recSize = tlist->getCurrentRecSize();
		// get that key
		key_t *k = (key_t *)rec;
		// skip negative recs, first one should not be negative however
		if ( ( k->n0 & 0x01 ) == 0x00 ) continue;

		// get docid of that titlerec
		long long dd = g_titledb.getDocId(k);

		if ( r->m_getAvailDocIdOnly ) {
			// make sure our available docids are availble!
			if ( dd == ad1 ) ad1++;
			if ( dd == ad2 ) ad2++;
			continue;
		}
		// if we had a url make sure uh48 matches
		else if ( r->m_url[0] ) {
			// get it
			long long uh48 = g_titledb.getUrlHash48(k);
			// sanity check
			if ( st->m_uh48 == 0 ) { char *xx=NULL;*xx=0; }
			// make sure our available docids are availble!
			if ( dd == ad1 ) ad1++;
			if ( dd == ad2 ) ad2++;
			// we must match this exactly
			if ( uh48 != st->m_uh48 ) continue;
		}
		// otherwise, check docid
		else {
			// compare that
			if ( r->m_docId != dd ) continue;
		}

		// flag that we matched m_docId
		docIdWasFound = true;

		// do not set back titlerec if just want avail docid
		//if ( r->m_getAvailDocIdOnly ) continue;

		// ok, if just "checking tfndb" no need to go further
		if ( r->m_justCheckTfndb ) {
			// send back a good reply (empty means found!)
			us->sendReply_ass ( NULL,0,NULL,0,st->m_slot);
			// don't forget to free the state
			mdelete ( st , sizeof(State22) , "Msg22" );
			delete ( st );
			return;
		}

		// use rec as reply
		char *reply = rec;

		// . send this rec back, it's a match
		// . if only one rec in list, steal the list's memory
		if ( recSize != tlist->getAllocSize() ) {
			// otherwise, alloc space for the reply
			reply = (char *)mmalloc (recSize, "Msg22");
			if ( ! reply ) goto hadError;
			memcpy ( reply , rec , recSize );
		}
		// otherwise we send back the whole list!
		else {
			// we stole this from list
			tlist->m_ownData = false;
		}
		// off ya go
		us->sendReply_ass(reply,recSize,reply,recSize,st->m_slot);
		// don't forget to free the state
		mdelete ( st , sizeof(State22) , "Msg22" );
		delete ( st );
		// all done
		return;
	}

	// maybe no available docid if we breached our range
	if ( ad1 >= pd           ) ad1 = 0LL;
	if ( ad2 >  st->m_docId2 ) ad2 = 0LL;
	// get best
	long long ad = ad2;
	// but wrap around if we need to
	if ( ad == 0LL ) ad = ad1;
	// if "docId" was unmatched that should be the preferred available
	// docid then...
	//if(! docIdWasFound && r->m_getAvailDocIdOnly && ad != r->m_docId ) { 
	//	char *xx=NULL;*xx=0; }
	// remember it. this might be zero if none exist!
	st->m_availDocId = ad;
	// note it
	if ( ad == 0LL && (r->m_getAvailDocIdOnly || r->m_url[0]) ) 
		log("msg22: avail docid is 0 for pd=%lli!",pd);

	// . ok, return an available docid
	if ( r->m_url[0] || r->m_justCheckTfndb || r->m_getAvailDocIdOnly ) {
		// store docid in reply
		char *p = st->m_slot->m_tmpBuf;
		// send back the available docid
		*(long long *)p = st->m_availDocId;
		// send it
		us->sendReply_ass ( p , 8 , p , 8 , st->m_slot );
		// don't forget to free state
		mdelete ( st , sizeof(State22) , "Msg22" );
		delete ( st );
		return;
	}

	// not found! and it was a docid based request...
	log("msg22: could not find title rec for docid %llu",r->m_docId);
	g_errno = ENOTFOUND;
	goto hadError;
}
void handleRequest12 ( UdpSlot *udpSlot , int32_t niceness ) {
	// get request
	char *request = udpSlot->m_readBuf;
	int32_t  reqSize = udpSlot->m_readBufSize;
	// shortcut
	UdpServer *us = &g_udpServer;
	// breathe
	QUICKPOLL ( niceness );

	// shortcut
	char *reply = udpSlot->m_tmpBuf;

	//
	// . is it confirming that he got all the locks?
	// . if so, remove the doledb record and dock the doleiptable count
	//   before adding a waiting tree entry to re-pop the doledb record
	//
	if ( reqSize == sizeof(ConfirmRequest) ) {
		char *msg = NULL;
		ConfirmRequest *cq = (ConfirmRequest *)request;

		// confirm the lock
		HashTableX *ht = &g_spiderLoop.m_lockTable;
		int32_t slot = ht->getSlot ( &cq->m_lockKeyUh48 );
		if ( slot < 0 ) { 
			log("spider: got a confirm request for a key not "
			    "in the table! coll must have been deleted "
			    " or reset "
			    "while lock request was outstanding.");
			g_errno = EBADENGINEER;
			
			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
			us->sendErrorReply ( udpSlot , g_errno );
			return;
			//char *xx=NULL;*xx=0; }
		}
		UrlLock *lock = (UrlLock *)ht->getValueFromSlot ( slot );
		lock->m_confirmed = true;

		// note that
		if ( g_conf.m_logDebugSpider ) // Wait )
			log("spider: got confirm lock request for ip=%s",
			    iptoa(lock->m_firstIp));

		// get it
		SpiderColl *sc = g_spiderCache.getSpiderColl(cq->m_collnum);
		// make it negative
		cq->m_doledbKey.n0 &= 0xfffffffffffffffeLL;
		// and add the negative rec to doledb (deletion operation)
		Rdb *rdb = &g_doledb.m_rdb;
		if ( ! rdb->addRecord ( cq->m_collnum,
					(char *)&cq->m_doledbKey,
					NULL , // data
					0    , //dataSize
					1 )){ // niceness
			// tree is dumping or something, probably ETRYAGAIN
			if ( g_errno != ETRYAGAIN ) {msg = "error adding neg rec to doledb";	log("spider: %s %s",msg,mstrerror(g_errno));
			}
			//char *xx=NULL;*xx=0;
			
			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
			us->sendErrorReply ( udpSlot , g_errno );
			return;
		}
		// now remove from doleiptable since we removed from doledb
		if ( sc ) sc->removeFromDoledbTable ( cq->m_firstIp );

		// how many spiders outstanding for this coll and IP?
		//int32_t out=g_spiderLoop.getNumSpidersOutPerIp ( cq->m_firstIp);

		// DO NOT add back to waiting tree if max spiders
		// out per ip was 1 OR there was a crawldelay. but better
		// yet, take care of that in the winReq code above.

		// . now add to waiting tree so we add another spiderdb
		//   record for this firstip to doledb
		// . true = callForScan
		// . do not add to waiting tree if we have enough outstanding
		//   spiders for this ip. we will add to waiting tree when
		//   we receive a SpiderReply in addSpiderReply()
		if ( sc && //out < cq->m_maxSpidersOutPerIp &&
		     // this will just return true if we are not the 
		     // responsible host for this firstip
		    // DO NOT populate from this!!! say "false" here...
		     ! sc->addToWaitingTree ( 0 , cq->m_firstIp, false ) &&
		     // must be an error...
		     g_errno ) {
			msg = "FAILED TO ADD TO WAITING TREE";
			log("spider: %s %s",msg,mstrerror(g_errno));
			
			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
			us->sendErrorReply ( udpSlot , g_errno );
			return;
		}
		// success!!
		reply[0] = 1;
		us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
		return;
	}



	// sanity check
	if ( reqSize != sizeof(LockRequest) ) {
		log("spider: bad msg12 request size of %" PRId32,reqSize);
		
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( udpSlot , EBADREQUEST );
		return;
	}
	// deny it if we are not synced yet! otherwise we core in 
	// getTimeGlobal() below
	if ( ! isClockInSync() ) { 
		// log it so we can debug it
		//log("spider: clock not in sync with host #0. so "
		//    "returning etryagain for lock reply");
		// let admin know why we are not spidering
		
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( udpSlot , ETRYAGAIN );
		return;
	}

	LockRequest *lr = (LockRequest *)request;
	//uint64_t lockKey = *(int64_t *)request;
	//int32_t lockSequence = *(int32_t *)(request+8);
	// is this a remove operation? assume not
	//bool remove = false;
	// get top bit
	//if ( lockKey & 0x8000000000000000LL ) remove = true;

	// mask it out
	//lockKey &= 0x7fffffffffffffffLL;
	// sanity check, just 6 bytes! (48 bits)
	if ( lr->m_lockKeyUh48 &0xffff000000000000LL ) { char *xx=NULL;*xx=0; }
	// note it
	if ( g_conf.m_logDebugSpider )
		log("spider: got msg12 request uh48=%" PRId64" remove=%" PRId32,
		    lr->m_lockKeyUh48, (int32_t)lr->m_removeLock);
	// get time
	int32_t nowGlobal = getTimeGlobal();
	// shortcut
	HashTableX *ht = &g_spiderLoop.m_lockTable;

	int32_t hostId = g_hostdb.getHostId ( udpSlot->m_ip , udpSlot->m_port );
	// this must be legit - sanity check
	if ( hostId < 0 ) { char *xx=NULL;*xx=0; }

	// remove expired locks from locktable
	removeExpiredLocks ( hostId );

	int64_t lockKey = lr->m_lockKeyUh48;

	// check tree
	int32_t slot = ht->getSlot ( &lockKey ); // lr->m_lockKeyUh48 );
	// put it here
	UrlLock *lock = NULL;
	// if there say no no
	if ( slot >= 0 ) lock = (UrlLock *)ht->getValueFromSlot ( slot );

	// if doing a remove operation and that was our hostid then unlock it
	if ( lr->m_removeLock && 
	     lock && 
	     lock->m_hostId == hostId &&
	     lock->m_lockSequence == lr->m_lockSequence ) {
		// note it for now
		if ( g_conf.m_logDebugSpider )
			log("spider: removing lock for lockkey=%" PRIu64" hid=%" PRId32,
			    lr->m_lockKeyUh48,hostId);
		// unlock it
		ht->removeSlot ( slot );
		// it is gone
		lock = NULL;
	}
	// ok, at this point all remove ops return
	if ( lr->m_removeLock ) {
		reply[0] = 1;
		us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
		return;
	}

	/////////
	//
	// add new lock
	//
	/////////


	// if lock > 1 hour old then remove it automatically!!
	if ( lock && nowGlobal - lock->m_timestamp > MAX_LOCK_AGE ) {
		// note it for now
		log("spider: removing lock after %" PRId32" seconds "
		    "for lockKey=%" PRIu64" hid=%" PRId32,
		    (nowGlobal - lock->m_timestamp),
		    lr->m_lockKeyUh48,hostId);
		// unlock it
		ht->removeSlot ( slot );
		// it is gone
		lock = NULL;
	}
	// if lock still there, do not grant another lock
	if ( lock ) {
		// note it for now
		if ( g_conf.m_logDebugSpider )
			log("spider: refusing lock for lockkey=%" PRIu64" hid=%" PRId32,
			    lr->m_lockKeyUh48,hostId);
		reply[0] = 0;
		us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
		return;
	}
	// make the new lock
	UrlLock tmp;
	tmp.m_hostId       = hostId;
	tmp.m_lockSequence = lr->m_lockSequence;
	tmp.m_timestamp    = nowGlobal;
	tmp.m_expires      = 0;
	tmp.m_firstIp      = lr->m_firstIp;
	tmp.m_collnum      = lr->m_collnum;

	// when the spider returns we remove its lock on reception of the
	// spiderReply, however, we actually just set the m_expires time
	// to 5 seconds into the future in case there is a current request
	// to get a lock for that url in progress. but, we do need to
	// indicate that the spider has indeed completed by setting
	// m_spiderOutstanding to true. this way, addToWaitingTree() will
	// not count it towards a "max spiders per IP" quota when deciding
	// on if it should add a new entry for this IP.
	tmp.m_spiderOutstanding = true;
	// this is set when all hosts in the group (shard) have granted the
	// lock and the host sends out a confirmLockAcquisition() request.
	// until then we do not know if the lock will be granted by all hosts
	// in the group (shard)
	tmp.m_confirmed    = false;

	// put it into the table
	if ( ! ht->addKey ( &lockKey , &tmp ) ) {
		// return error if that failed!
		
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( udpSlot , g_errno );
		return;
	}
	// note it for now
	if ( g_conf.m_logDebugSpider )
		log("spider: granting lock for lockKey=%" PRIu64" hid=%" PRId32,
		    lr->m_lockKeyUh48,hostId);
	// grant the lock
	reply[0] = 1;
	us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
	return;
}
Example #24
0
int main(int argc, char *argv[])
{
    QCoreApplication a(argc, argv);
    UdpServer server;
    QString ttyPort = "/dev/ttyACM0";
    int baudrate = 115200;
    int udpPort = 27800;

    QStringList args = QCoreApplication::arguments();

    for (int i = 0;i < args.size();i++) {
        // Skip the program argument
        if (i == 0) {
            continue;
        }

        QString str = args.at(i).toLower();

        // Skip path argument
        if (i >= args.size() && args.size() >= 3) {
            break;
        }

        bool dash = str.startsWith("-") && !str.startsWith("--");
        bool found = false;

        if ((dash && str.contains('h')) || str == "--help") {
            showHelp();
            found = true;
            return 0;
        }

        if ((dash && str.contains('t')) || str == "--ttyport") {
            if ((i - 1) < args.size()) {
                i++;
                ttyPort = args.at(i);
                found = true;
            }
        }

        if ((dash && str.contains('b')) || str == "--baudrate") {
            if ((i - 1) < args.size()) {
                i++;
                bool ok;
                baudrate = args.at(i).toInt(&ok);
                found = ok;
            }
        }

        if ((dash && str.contains('u')) || str == "--udpport") {
            if ((i - 1) < args.size()) {
                i++;
                bool ok;
                udpPort = args.at(i).toInt(&ok);
                found = ok;
            }
        }

        if (!found) {
            if (dash) {
                qCritical() << "At least one of the flags is invalid:" << str;
            } else {
                qCritical() << "Invalid option:" << str;
            }

            showHelp();
            return 1;
        }
    }

    qDebug() << "Serial Port:" << ttyPort;
    qDebug() << "Baudrate:" << baudrate;
    qDebug() << "UDP Port:" << udpPort;

    if (server.startServer(ttyPort, baudrate, udpPort)) {
        qDebug() << "The server was successfully started.";
        qDebug() << "UDP Port:" << server.getUdpPort();
        qDebug() << "Serial Port:" << server.getSerialPortPath();
    } else {
        qCritical() << "Errors occured. Exiting...";
        QTimer::singleShot(500, &a, SLOT(quit()));
    }

    return a.exec();
}