// . returns false and sets g_errno on error
// . saved by Process::saveBlockingFiles1()
bool saveTimeAdjustment ( ) {
	// fortget it if setTimeAdjustmentFilename never called
	if ( ! s_hasFileName ) return true;
	// must be in sync!
	if ( ! g_clockInSync ) return true;
	// store it
	uint64_t local = gettimeofdayInMillisecondsLocal();
	char wbuf[1024];
	sprintf (wbuf,"%" PRIu64" %" PRId64"\n",local,s_adjustment);
	// write it out
	int fd = open ( s_tafile , O_CREAT|O_WRONLY|O_TRUNC , 0666 );
	if ( fd < 0 ) {
		log("util: could not open %s for writing",s_tafile);
		g_errno = errno;
		return false;
	}
	// how many bytes to write?
	int32_t len = strlen(wbuf);
	// read in max bytes
	int nw = write ( fd , wbuf , len );
	if ( nw != len ) {
		log(LOG_WARN, "util: writing %s had error: %s",s_tafile,
		    mstrerror(errno));
		close(fd);
		g_errno = errno;
		return false;
	}
	close(fd);
	// note it
	log(LOG_DEBUG, "util: saved %s",s_tafile);
	// it was written ok
	return true;
}
// returns false and sets g_errno on error
bool loadTimeAdjustment ( ) {
	// bail if no filename to read
	if ( ! s_hasFileName ) return true;
	// read it in
	// one line in text
	int fd = open ( s_tafile , O_RDONLY );
	if ( fd < 0 ) {
		log("util: could not open %s for reading",s_tafile);
		g_errno = errno;
		return false;
	}
	char rbuf[1024+1];
	// read in max bytes
	ssize_t bytes_read = read ( fd , rbuf , sizeof(rbuf)-1 );
	if ( bytes_read < 0 ) {
		log(LOG_WARN, "util: reading %s had error: %s",s_tafile,
		    mstrerror(errno));
		close(fd);
		g_errno = errno;
		return false;
	}
	close(fd);
	rbuf[(size_t)bytes_read] = '\0';
	
	// parse the text line
	int64_t stampTime = 0LL;
	int64_t clockAdj  = 0LL;
	if(sscanf ( rbuf , "%" PRIu64" %" PRId64, &stampTime, &clockAdj ) != 2) {
		log("util: Could not parse content of %s", s_tafile);
		g_errno = errno;
		return false;
	}
	// get stamp age
	int64_t local = gettimeofdayInMillisecondsLocal();
	int64_t stampAge = local - stampTime;
	// if too old forget about it
	if ( stampAge > 2*86400 ) return true;
	// update adjustment
	s_adjustment = clockAdj;
	// if stamp in file is within 2 days old, assume its still good
	// this will prevent having to rebuild a sortbydatetable
	// and really slow down loadups
	g_clockInSync = true;
	// note it
	log(LOG_DEBUG, "util: loaded %s and put clock in sync. age=%" PRIu64" adj=%" PRId64,
	    s_tafile,stampAge,clockAdj);
	return true;
}
// . use msg 0x55 to say you are done using the proxy
// . we now use the top part of the Msg13Request as the proxy request
void returnProxy ( Msg13Request *preq , UdpSlot *udpSlot ) {

	//char *p = request;
	//int32_t  proxyIp   = *(int32_t  *)p; p += 4;
	//int16_t proxyPort = *(int16_t *)p; p += 2;
	//int32_t  lbId      = *(int32_t  *)p; p += 4;

	int32_t  urlIp     = preq->m_urlIp;

	//
	// update the load bucket
	//

	// scan over all that match to find lbid
	int32_t hslot = s_loadTable.getSlot ( &urlIp );
	// scan all proxies that have this urlip outstanding
	int32_t i;for (i=hslot ; i >= 0 ; i = s_loadTable.getNextSlot(i,&urlIp)){
		// get the bucket
		LoadBucket *lb= (LoadBucket *)s_loadTable.getValueFromSlot(i);
		// is it the right id?
		if ( lb->m_id != preq->m_lbId ) continue;
		if ( lb->m_proxyIp != preq->m_proxyIp ) continue;
		if ( lb->m_proxyPort != preq->m_proxyPort ) continue;
		// that's it. set the download end time
		int64_t nowms = gettimeofdayInMillisecondsLocal();
		lb->m_downloadEndTimeMS = nowms;
		break;
	}

	if ( i < 0 ) 
		log("sproxy: could not find load bucket id #%" PRId32,preq->m_lbId);

	// if no slot provided, return to called without sending back reply,
	// they are banning a proxy and need to also return it before
	// we send them back another proxy to try.
	if ( ! udpSlot ) return;

	// gotta send reply back
	g_udpServer.sendReply(0, 0, 0, 0, udpSlot);
}
Ejemplo n.º 4
0
void *Mem::gbmalloc ( size_t size , const char *note ) {
	logTrace( g_conf.m_logTraceMem, "size=%zu note='%s'", size, note );

	// don't let electric fence zap us
	if ( size == 0 ) return (void *)0x7fffffff;
	
	if ( allocationShouldFailRandomly() ) {
		g_errno = ENOMEM; 
		log( LOG_WARN, "mem: malloc-fake(%zu,%s): %s",size,note, mstrerror(g_errno));
		return NULL;
	} 

retry:
	size_t max = g_conf.m_maxMem;

	// don't go over max
	if ( g_mem.getUsedMem() + size + UNDERPAD + OVERPAD >= max ) {
		// try to free temp mem. returns true if it freed some.
		if ( freeCacheMem() ) goto retry;
		g_errno = ENOMEM;
		log( LOG_WARN, "mem: malloc(%zu): Out of memory", size );
		return NULL;
	}

	void *mem;

	mem = (void *)sysmalloc ( size + UNDERPAD + OVERPAD );

	int32_t memLoop = 0;
mallocmemloop:
	if ( ! mem && size > 0 ) {
		g_mem.m_outOfMems++;
		// try to free temp mem. returns true if it freed some.
		if ( freeCacheMem() ) goto retry;
		g_errno = errno;
		static int64_t s_lastTime;
		static int32_t s_missed = 0;
		int64_t now = gettimeofdayInMillisecondsLocal();
		int64_t avail = (int64_t)g_conf.m_maxMem - (int64_t)m_used;
		if ( now - s_lastTime >= 1000LL ) {
			log(LOG_WARN, "mem: system malloc(%zu,%s) availShouldBe=%" PRId64": "
			    "%s (%s) (ooms suppressed since last log msg = %" PRId32")",
			    size+UNDERPAD+OVERPAD,
			    note,
			    avail,
			    mstrerror(g_errno),
			    note,
			    s_missed);
			s_lastTime = now;
			s_missed = 0;
		} else {
			s_missed++;
		}

		return NULL;
	}
	if ( (PTRTYPE)mem < 0x00010000 ) {
		void *remem = sysmalloc(size);
		log( LOG_WARN, "mem: Caught low memory allocation "
		      "at %08" PTRFMT", "
		      "reallocated to %08" PTRFMT"",
		      (PTRTYPE)mem, (PTRTYPE)remem );
		sysfree(mem);
		mem = remem;
		memLoop++;
		if ( memLoop > 100 ) {
			log( LOG_WARN, "mem: Attempted to reallocate low "
					"memory allocation 100 times, "
					"aborting and returning NOMEM." );
			g_errno = ENOMEM;
			return NULL;
		}
		goto mallocmemloop;
	}

	logTrace( g_conf.m_logTraceMem, "mem=%p size=%zu note='%s'", mem, size, note );

	addMem ( (char *)mem + UNDERPAD , size , note , 0 );
	return (char *)mem + UNDERPAD;
}
// a host is asking us (host #0) what proxy to use?
static void handleRequest54(UdpSlot *udpSlot, int32_t niceness) {

	char *request     = udpSlot->m_readBuf;
	int32_t  requestSize = udpSlot->m_readBufSize;

	// we now use the top part of the Msg13Request as the ProxyRequest
	Msg13Request *preq = (Msg13Request *)request;

	// sanity check
	if ( requestSize != preq->getProxyRequestSize() ) {
		log("db: Got bad request 0x54 size of %" PRId32" bytes. bad",
		    requestSize );
		g_udpServer.sendErrorReply ( udpSlot , EBADREQUESTSIZE );
		return;
	}

	// is the request telling us it is done downloading through a proxy?
	if ( preq->m_opCode == OP_RETPROXY ) {
		returnProxy ( preq , udpSlot );
		return;
	}

	// if sender is asking for a new proxy and wants us to ban
	// the previous proxy we sent for this urlIp...
	if ( preq->m_banProxyIp ) {
		// don't core if misses sanity. it seems we don't always
		// NULLify these or something.
		// these must match
		if(preq->m_banProxyIp   != preq->m_proxyIp  ||
		   preq->m_banProxyPort != preq->m_proxyPort){
			log("db: proxy: banproxyip != proxyip. mismatch!");
			g_udpServer.sendErrorReply ( udpSlot , EBADENGINEER);
			return;
		}
		// this will "return" the banned proxy
		returnProxy ( preq , NULL );
		// now add it to the banned table
		int64_t uip = preq->m_urlIp;
		int64_t pip = preq->m_banProxyIp;
		int64_t h64 = hash64h ( uip , pip );
		if ( ! s_proxyBannedTable.isInTable ( &h64 ) ) {
			s_proxyBannedTable.addKey ( &h64 );
			// for stats counting. each proxy ip maps to #
			// of unique website IPs that have banned it.
			s_banCountTable.addTerm32((uint32_t)pip);
		}
	}
	

	// shortcut
	int32_t urlIp = preq->m_urlIp;

	// send to a proxy that is up and has the least amount
	// of LoadBuckets with this urlIp, if tied, go to least loaded.

	// clear counts for this url ip for scoring the best proxy to use
	for ( int32_t i = 0 ; i < s_iptab.getNumSlots() ; i++ ) {
		// skip empty slots
		if ( ! s_iptab.m_flags[i] ) continue;
		SpiderProxy *sp = (SpiderProxy *)s_iptab.getValueFromSlot(i);
		sp->m_countForThisIp = 0;
		sp->m_lastTimeUsedForThisIp = 0LL;
	}

	// this table maps a url's current IP to a possibly MULTIPLE slots
	// which tell us what proxy is downloading a page from that IP.
	// so we can try to find a proxy that is not download a url from
	// this IP currently, or hasn't been for the longest time...
	int32_t hslot = s_loadTable.getSlot ( &urlIp );
	// scan all proxies that have this urlip outstanding
	for ( int32_t i = hslot ; i >= 0 ; i = s_loadTable.getNextSlot(i,&urlIp)){
		// get the bucket
		LoadBucket *lb;
		lb = (LoadBucket *)s_loadTable.getValueFromSlot(i);
		// get the spider proxy this load point was for
		uint64_t key = (uint32_t)lb->m_proxyIp;
		key <<= 16;
		key |= (uint16_t)lb->m_proxyPort;
		SpiderProxy *sp = (SpiderProxy *)s_iptab.getValue(&key);
		// must be there unless user remove it from the list
		if ( ! sp ) continue;
		// count it up
		if (  lb->m_downloadEndTimeMS == 0LL ) 
			sp->m_countForThisIp++;
		// set the last time used to the most recently downloaded time
		// that this proxy has downloaded from this ip
		if ( lb->m_downloadEndTimeMS &&
		     lb->m_downloadEndTimeMS > sp->m_lastTimeUsedForThisIp )
			sp->m_lastTimeUsedForThisIp = lb->m_downloadEndTimeMS;
	}

	// first try to get a spider proxy that is not "dead"
	bool skipDead = true;

	int32_t numBannedProxies = 0;
	int32_t aliveProxyCandidates = 0;

 redo:
	// get the min of the counts
	int32_t minCount = 999999;
	for ( int32_t i = 0 ; i < s_iptab.getNumSlots() ; i++ ) {
		// skip empty slots
		if ( ! s_iptab.m_flags[i] ) continue;
		// get the spider proxy
		SpiderProxy *sp = (SpiderProxy *)s_iptab.getValueFromSlot(i);

		// if this proxy was banned by the url's ip... skip it. it is
		// not a candidate...
		if ( skipDead ) {
			int64_t uip = preq->m_urlIp;
			int64_t pip = sp->m_ip;
			int64_t h64 = hash64h ( uip , pip );
			if ( s_proxyBannedTable.isInTable ( &h64 ) ) {
				numBannedProxies++;
				continue;
			}
		}

		// if it failed the last test, skip it
		if ( skipDead && sp->m_lastDownloadError ) continue;

		if ( skipDead ) aliveProxyCandidates++;

		if ( sp->m_countForThisIp >= minCount ) continue;
		minCount = sp->m_countForThisIp;
	}

	// all dead? then get the best dead one
	if ( minCount == 999999 ) {
		skipDead = false;
		goto redo;
	}

	// . we only use one proxy if none are banned by this IP
	// . when that gets banned, we will use the next 2 proxies with
	//   a higher backoff/crawlDelay, etc.
	int32_t threshHold;
	if      ( numBannedProxies <= 0  ) threshHold = 1;

	// if first proxy gets banned, try next 2 proxies until both get ban'd
	else if ( numBannedProxies == 1  ) threshHold = 2;
	else if ( numBannedProxies <  1+2) threshHold = 3 - numBannedProxies;

	// if next two proxies got banned, try next 4 proxies until banned
	else if ( numBannedProxies == 3  ) threshHold = 4;
	else if ( numBannedProxies <  3+4) threshHold = 7 - numBannedProxies;

	// if next 4 proxies got banned, try next 8 proxies until they get band
	else if ( numBannedProxies == 7  ) threshHold = 8;
	else if ( numBannedProxies <  7+8) threshHold = 15 - numBannedProxies;

	else if ( numBannedProxies == 15) threshHold = 16;
	else if ( numBannedProxies <  15+16 ) threshHold = 31-numBannedProxies;

	else if ( numBannedProxies == 31 ) threshHold = 32;
	else if ( numBannedProxies <  31+32)threshHold=63-numBannedProxies;

	else if ( numBannedProxies == 63 ) threshHold = 64;
	else if ( numBannedProxies <  63+64)threshHold=127-numBannedProxies;

	else if ( numBannedProxies == 127 ) threshHold = 128;
	else if ( numBannedProxies <  127+128)threshHold=255-numBannedProxies;

	else if ( numBannedProxies == 255 ) threshHold = 256;
	else if ( numBannedProxies <  255+256)threshHold=512-numBannedProxies;

	else if ( numBannedProxies == 511 ) threshHold = 512;
	else if ( numBannedProxies <  511+512)threshHold=1024-numBannedProxies;

	else threshHold = 1024;
	
	
	if ( threshHold <= 0 ) {
		log("proxy: spiderproxy error in threshold of %" PRId32" "
		    "for banned=%" PRId32,threshHold,numBannedProxies);
		threshHold = 1;
	}

	// reset minCount so we can take the min over those we check here
	minCount = -1;
	int64_t oldest = 0x7fffffffffffffffLL;
	SpiderProxy *winnersp = NULL;
	int32_t count = 0;
	// start at a random slot based on url's IP so we don't
	// overload the first proxy
	int32_t start = ((uint32_t)urlIp) % s_iptab.getNumSlots();
	int32_t slotCount = s_iptab.getNumSlots();
	// . now find the best proxy wih the minCount
	for ( int32_t i = start ; ; i++ ) {
		// scan all slots in hash table, then stop
		if ( slotCount-- <= 0 ) break;
		// wrap around to zero if we hit the end
		if ( i == s_iptab.getNumSlots() ) i = 0;
		// skip empty slots
		if ( ! s_iptab.m_flags[i] ) continue;
		// get the spider proxy
		SpiderProxy *sp = (SpiderProxy *)s_iptab.getValueFromSlot(i);
		// if it failed the last test, skip it... not here...
		if ( skipDead && sp->m_lastDownloadError ) continue;

		// if this proxy was banned by the url's ip... skip it. it is
		// not a candidate...
		if ( skipDead ) {
			int64_t uip = preq->m_urlIp;
			int64_t pip = sp->m_ip;
			int64_t h64 = hash64h ( uip , pip );
			if ( s_proxyBannedTable.isInTable ( &h64 ) ) continue;
		}

		// if some proxies are "alive" then only pick from
		// the first half of the proxies that are alive (i.e. still
		// work). that way, when one of those goes dead we will inc
		// the backoff (crawldelay) and a new proxy that we haven't
		// used for this url's IP will take it's place. and such
		// new proxies will only have the new backoff count used
		// through them. that way, we don't get ALL of our proxies
		// banned at about the same time since we do somewhat uniform
		// load balancing over them.
		if ( skipDead && count >= threshHold)//aliveProxyCandidates/2 )
			continue;

		// count the alive/non-banned candidates
		count++;

		// if all hosts were "dead" because they all had 
		// m_lastDownloadError set then minCount will be 999999
		// and nobody should continue from this statement:
		if ( sp->m_countForThisIp > minCount && minCount>=0 ) continue;
		// then go by last download time for this ip
		if ( sp->m_countForThisIp == minCount && minCount>=0 &&
		     sp->m_lastTimeUsedForThisIp >= oldest ) 
			continue;

		// pick the spider proxy used longest ago
		oldest   = sp->m_lastTimeUsedForThisIp;
		minCount = sp->m_countForThisIp;
		// got a new winner
		winnersp = sp;
	}

	// we must have a winner
	if ( ! winnersp ) { g_process.shutdownAbort(true); }

	int64_t nowms = gettimeofdayInMillisecondsLocal();

	// add a new load bucket then!
	LoadBucket bb;
	bb.m_urlIp = urlIp;
	// the time it started
	bb.m_downloadStartTimeMS = nowms;
	// download has not ended yet
	bb.m_downloadEndTimeMS = 0LL;
	// the host using the proxy
	bb.m_hostId = udpSlot->getHostId();
	// key is this for m_prTable
	bb.m_proxyIp   = winnersp->m_ip;
	bb.m_proxyPort = winnersp->m_port;
	// a new id. we use this to update the downloadEndTime when done
	static int32_t s_lbid = 0;
	// add it now
	bb.m_id = s_lbid++;
	s_loadTable.addKey ( &urlIp , &bb );
	// winner count update
	winnersp->m_timesUsed++;

	// sanity
	if ( (int32_t)sizeof(ProxyReply) > TMPBUFSIZE ){g_process.shutdownAbort(true);}

	// and give proxy ip/port back to the requester so they can
	// use that to download their url
	ProxyReply *prep = (ProxyReply *)udpSlot->m_tmpBuf;
	prep->m_proxyIp = winnersp->m_ip;
	prep->m_proxyPort = winnersp->m_port;

	// this is just '\0' if none
	strcpy(prep->m_usernamePwd,winnersp->m_usernamePwd);

	// do not count the proxy we are returning as "more"
	prep->m_hasMoreProxiesToTry = ( aliveProxyCandidates > 1 );
	// and the loadbucket id, so requester can tell us it is done
	// downloading through the proxy and we can update the LoadBucket
	// for this transaction (m_lbId)
	prep->m_lbId = bb.m_id;
	// requester wants to know how many proxies have been banned by the
	// urlIp so it can increase a self-imposed crawl-delay to be more
	// sensitive to the spider policy.
	prep->m_numBannedProxies = numBannedProxies;

	//char *p = udpSlot->m_tmpBuf;
	//*(int32_t  *)p = winnersp->m_ip  ; p += 4;
	//*(int16_t *)p = winnersp->m_port; p += 2;
	// and the loadbucket id
	//*(int32_t *)p = bb.m_id; p += 4;

	// with dup keys we end up with long chains of crap and this
	// takes forever. so just flush the whole thing every 2 minutes AND
	// when 20000+ entries are in there
	static time_t s_lastTime = 0;
	time_t now = nowms / 1000;
	if ( s_lastTime == 0 ) s_lastTime = now;
	time_t elapsed = now - s_lastTime;
	if ( elapsed > 120 && s_loadTable.getNumSlots() > 10000 ) {
		log("sproxy: flushing %i entries from proxy loadtable that "
		    "have accumulated since %i seconds ago",
		    (int)s_loadTable.m_numSlotsUsed,(int)elapsed);
		s_loadTable.clear();
		// only do this one per minute
		s_lastTime = now;
	}


	int32_t sanityCount = 0;//s_loadTable.getNumSlots();
	// top:
	// now remove old entries from the load table. entries that
	// have completed and have a download end time more than 10 mins ago.
	for ( int32_t i = s_loadTable.getNumSlots() - 1 ; i >= 0 ; i-- ) {
		// skip if empty
		if ( ! s_loadTable.m_flags[i] ) continue;
		// get the bucket
		LoadBucket *pp =(LoadBucket *)s_loadTable.getValueFromSlot(i);
		// skip if still active
		if ( pp->m_downloadEndTimeMS == 0LL ) continue;
		// delta t
		int64_t took = nowms - pp->m_downloadEndTimeMS;
		// < 10 mins? now it's < 15 seconds to prevent clogging.
		if ( took < LOADPOINT_EXPIRE_MS ) continue;

		// 100 at a time so we don't slam cpu
		if ( sanityCount++ > 100 ) break;

		// ok, its too old, nuke it to save memory
		s_loadTable.removeSlot(i);
		// the keys might have buried us but we really should not
		// mis out on analyzing any keys if we just keep looping here
		// should we? TODO: figure it out. if we miss a few it's not
		// a big deal.
		//i--;
		//goto top;
	}

	// send the proxy ip/port/LBid back to user
	g_udpServer.sendReply(udpSlot->m_tmpBuf, sizeof(ProxyReply), udpSlot->m_tmpBuf, sizeof(ProxyReply), udpSlot);
}
Ejemplo n.º 6
0
// . returns false if blocked, true otherwise
// . sets errno on error
// . make a web page displaying the config of this host
// . call g_httpServer.sendDynamicPage() to send it
bool sendPageHosts ( TcpSocket *s , HttpRequest *r ) {
	// don't allow pages bigger than 128k in cache
	char  buf [ 64*1024 ];
	//char *p    = buf;
	//char *pend = buf + 64*1024;
	SafeBuf sb(buf, 64*1024);


	// XML OR JSON
	 char format = r->getReplyFormat();
	// if ( format == FORMAT_XML || format == FORMAT_JSON )
	// 	return sendPageHostsInXmlOrJson( s , r );


	// check for a sort request
	int32_t sort  = r->getLong ( "sort", -1 );
	// sort by hostid with dead on top by default
	if ( sort == -1 ) sort = 16;
	const char *coll = r->getString ( "c" );
	//char *pwd  = r->getString ( "pwd" );
	// check for setnote command
	int32_t setnote = r->getLong("setnote", 0);
	int32_t setsparenote = r->getLong("setsparenote", 0);
	// check for replace host command
	int32_t replaceHost = r->getLong("replacehost", 0);
	// check for sync host command
	int32_t syncHost = r->getLong("synchost", 0);
	// set note...
	if ( setnote == 1 ) {
		// get the host id to change
		int32_t host = r->getLong("host", -1);
		if ( host == -1 ) goto skipReplaceHost;
		// get the note to set
		int32_t  noteLen;
		const char *note = r->getString("note", &noteLen, "", 0);
		// set the note
		g_hostdb.setNote(host, note, noteLen);
	}
	// set spare note...
	if ( setsparenote == 1 ) {
		// get the host id to change
		int32_t spare = r->getLong("spare", -1);
		if ( spare == -1 ) goto skipReplaceHost;
		// get the note to set
		int32_t  noteLen;
		const char *note = r->getString("note", &noteLen, "", 0);
		// set the note
		g_hostdb.setSpareNote(spare, note, noteLen);
	}
	// replace host...
	if ( replaceHost == 1 ) {
		// get the host ids to swap
		int32_t rhost = r->getLong("rhost", -1);
		int32_t rspare = r->getLong("rspare", -1);
		if ( rhost == -1 || rspare == -1 )
			goto skipReplaceHost;
		// replace
		g_hostdb.replaceHost(rhost, rspare);
	}
	// sync host...
	if ( syncHost == 1 ) {
		// get the host id to sync
		int32_t syncHost = r->getLong("shost", -1);
		if ( syncHost == -1 ) goto skipReplaceHost;
		// call sync
		g_hostdb.syncHost(syncHost, false);
	}
	if ( syncHost == 2 ) {
		// get the host id to sync
		int32_t syncHost = r->getLong("shost", -1);
		if ( syncHost == -1 ) goto skipReplaceHost;
		// call sync
		g_hostdb.syncHost(syncHost, true);
	}

skipReplaceHost:

	int32_t refreshRate = r->getLong("rr", 0);
	if(refreshRate > 0 && format == FORMAT_HTML ) 
		sb.safePrintf("<META HTTP-EQUIV=\"refresh\" "
			      "content=\"%" PRId32"\"\\>",
			      refreshRate);

	// print standard header
	// 	char *pp    = sb.getBuf();
	// 	char *ppend = sb.getBufEnd();
	// 	if ( pp ) {
	if ( format == FORMAT_HTML ) g_pages.printAdminTop ( &sb , s , r );
	//	sb.incrementLength ( pp - sb.getBuf() );
	//	}
	const char *colspan = "30";
	//char *shotcol = "";
	char shotcol[1024];
	shotcol[0] = '\0';
	const char *cs = coll;
	if ( ! cs ) cs = "";

	if ( g_conf.m_useShotgun && format == FORMAT_HTML ) {
		colspan = "31";
		//shotcol = "<td><b>ip2</b></td>";
		sprintf ( shotcol, "<td><a href=\"/admin/hosts?c=%s"
			 	   "&sort=2\">"
			  "<b>ping2</b></td></a>",
			  cs);
	}

	// print host table
	if ( format == FORMAT_HTML )
		sb.safePrintf ( 
			       "<table %s>"
			       "<tr><td colspan=%s><center>"
			       //"<font size=+1>"
			       "<b>Hosts "
			       "(<a href=\"/admin/hosts?c=%s&sort=%" PRId32"&resetstats=1\">"
			       "reset)</a></b>"
			       //"</font>"
			       "</td></tr>" 
			       "<tr bgcolor=#%s>"
			       "<td><a href=\"/admin/hosts?c=%s&sort=0\">"

			       "<b>hostId</b></a></td>"
			       "<td><b>host ip</b></td>"
			       "<td><b>shard</b></td>"
			       "<td><b>mirror</b></td>" // mirror # within the shard

			       // i don't remember the last time i used this, so let's
			       // just comment it out to save space
			       //"<td><b>group mask</td>"

			       //"<td><b>ip1</td>"
			       //"<td><b>ip2</td>"
			       //"<td><b>udp port</td>"

			       // this is now more or less obsolete
			       //"<td><b>priority udp port</td>"

			       //"<td><b>dns client port</td>"
			       "<td><b>http port</b></td>"

			       // this is now obsolete since ide channel is. it was used
			       // so that only the guy with the token could merge,
			       // and it made sure that only one merge per ide channel
			       // and per group was going on at any one time for performance
			       // reasons.
			       //"<td><b>token group</td>"

			       //"<td><b>best switch id</td>"
			       //"<td><b>actual switch id</td>"
			       //"<td><b>switch id</td>"

			       // this is now fairly obsolete
			       //"<td><b>ide channel</td>"

			       //"<td><b>HD temps (C)</b></td>"
			       "<td><b>GB version</b></td>"

			       //"<td><b>resends sent</td>"
			       //"<td><b>errors recvd</td>"
			       "<td><b>try agains recvd</b></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=3\">"
			       "<b>dgrams resent</b></a></td>"

			       /*

				 MDW: take out for adding new stuff

			       "<td><a href=\"/admin/hosts?c=%s&sort=4\">"
			       "<b>errors recvd</a></td>"
			       "<td><a href=\"/admin/hosts?c=%s&sort=5\">"
			       "<b>ETRY AGAINS recvd</a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=6\">"
			       "<b>dgrams to</a></td>"
			       "<td><a href=\"/admin/hosts?c=%s&sort=7\">"
			       "<b>dgrams from</a></td>"
			       */

			       // "<td><a href=\"/admin/hosts?c=%s&sort=18\">"
			       // "<b>corrupts</a></td>"
			       // "<td><a href=\"/admin/hosts?c=%s&sort=19\">"
			       // "<b># ooms</a></td>"
			       // "<td><a href=\"/admin/hosts?c=%s&sort=20\">"
			       // "<b>socks closed</a></td>"


			       //"<td><a href=\"/admin/hosts?c=%s&sort=8\">"
			       //"<b>loadavg</a></td>"


			       "<td><a href=\"/admin/hosts?c=%s&sort=13\">"
			       "<b>avg split time</b></a></td>"

			       "<td><b>splits done</b></a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=12\">"
			       "<b>status</b></a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=15\">"
			       "<b>slow reads</b></a></td>"

			       "<td><b>docs indexed</a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=9\">"
			       "<b>mem used</a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=10\">"
			       "<b>cpu used</b></a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=17\">"
			       "<b>disk used</b></a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=14\">"
			       "<b>max ping1</b></a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=11\">"
			       "<b>ping1 age</b></a></td>"

			       //"<td><b>ip1</td>"
			       "<td><a href=\"/admin/hosts?c=%s&sort=1\">"
			       "<b>ping1</b></a></td>"

			       "%s"// "<td><b>ip2</td>"
			       //"<td><b>inSync</td>",
			       //"<td>avg roundtrip</td>"
			       //"<td>std. dev.</td></tr>"
			       "<td><b>note</b></td>",
			       TABLE_STYLE ,
			       colspan    ,

			       cs, sort,
			       DARK_BLUE  ,

			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       shotcol    );

	// loop through each host we know and print it's stats
	int32_t nh = g_hostdb.getNumHosts();
	// should we reset resends, errorsRecvd and ETRYAGAINS recvd?
	if ( r->getLong("resetstats",0) ) {
		for ( int32_t i = 0 ; i < nh ; i++ ) {
			// get the ith host (hostId)
			Host *h = g_hostdb.getHost ( i );
			h->m_pingInfo.m_totalResends   = 0;
			h->m_errorReplies = 0;
			h->m_pingInfo.m_etryagains   = 0;
			h->m_dgramsTo     = 0;
			h->m_dgramsFrom   = 0;
			h->m_splitTimes = 0;
			h->m_splitsDone = 0;
			h->m_pingInfo.m_slowDiskReads =0;
			
		}
	}

	// sort hosts if needed
	int32_t hostSort [ MAX_HOSTS ];
	for ( int32_t i = 0 ; i < nh ; i++ )
		hostSort [ i ] = i;
	switch ( sort ) {
	case 1: gbsort ( hostSort, nh, sizeof(int32_t), pingSort1      ); break;
	case 2: gbsort ( hostSort, nh, sizeof(int32_t), pingSort2      ); break;
	case 3: gbsort ( hostSort, nh, sizeof(int32_t), resendsSort    ); break;
	case 4: gbsort ( hostSort, nh, sizeof(int32_t), errorsSort     ); break;
	case 5: gbsort ( hostSort, nh, sizeof(int32_t), tryagainSort   ); break;
	case 6: gbsort ( hostSort, nh, sizeof(int32_t), dgramsToSort   ); break;
	case 7: gbsort ( hostSort, nh, sizeof(int32_t), dgramsFromSort ); break;
	//case 8: gbsort ( hostSort, nh, sizeof(int32_t), loadAvgSort    ); break;
	case 9: gbsort ( hostSort, nh, sizeof(int32_t), memUsedSort    ); break;
	case 10:gbsort ( hostSort, nh, sizeof(int32_t), cpuUsageSort   ); break;
	case 11:gbsort ( hostSort, nh, sizeof(int32_t), pingAgeSort    ); break;
	case 12:gbsort ( hostSort, nh, sizeof(int32_t), flagSort       ); break;
	case 13:gbsort ( hostSort, nh, sizeof(int32_t), splitTimeSort  ); break;
	case 14:gbsort ( hostSort, nh, sizeof(int32_t), pingMaxSort    ); break;
	case 15:gbsort ( hostSort, nh, sizeof(int32_t), slowDiskSort    ); break;
	case 16:gbsort ( hostSort, nh, sizeof(int32_t), defaultSort    ); break;
	case 17:gbsort ( hostSort, nh, sizeof(int32_t), diskUsageSort   ); break;

	}

	// we are the only one that uses these flags, so set them now
	/*
	static char s_properSet = 0;
	if ( ! s_properSet ) {
		s_properSet = 1;
		g_hostdb.setOnProperSwitchFlags();
	}
	*/

	if ( format == FORMAT_XML ) {
		sb.safePrintf("<response>\n");
		sb.safePrintf("\t<statusCode>0</statusCode>\n");
		sb.safePrintf("\t<statusMsg>Success</statusMsg>\n");
	}

	if ( format == FORMAT_JSON ) {
		sb.safePrintf("{\"response\":{\n");
		sb.safePrintf("\t\"statusCode\":0,\n");
		sb.safePrintf("\t\"statusMsg\":\"Success\",\n");
	}

	int64_t nowmsLocal = gettimeofdayInMillisecondsLocal();

	// compute majority gb version so we can highlight bad out of sync
	// gb versions in red below
	int32_t majorityHash32 = 0;
	int32_t lastCount = 0;
	// get majority gb version
	for ( int32_t si = 0 ; si < nh ; si++ ) {
		int32_t i = hostSort[si];
		// get the ith host (hostId)
		Host *h = g_hostdb.getHost ( i );
		char *vbuf = h->m_pingInfo.m_gbVersionStr;//gbVersionStrBuf;
		int32_t vhash32 = hash32n ( vbuf );
		if ( vhash32 == majorityHash32 ) lastCount++;
		else lastCount--;
		if ( lastCount < 0 ) majorityHash32 = vhash32;
	}


	// print it
	//int32_t ng = g_hostdb.getNumGroups();
	for ( int32_t si = 0 ; si < nh ; si++ ) {
		int32_t i = hostSort[si];
		// get the ith host (hostId)
		Host *h = g_hostdb.getHost ( i );
		// get avg/stdDev msg roundtrip times in ms for ith host
		//int32_t avg , stdDev;
		//g_hostdb.getTimes ( i , &avg , &stdDev );
                char ptr[256];
                int32_t pingAge = generatePingMsg(h, nowmsLocal, ptr);
		char pms[64];
		if ( h->m_pingMax < 0 ) sprintf(pms,"???");
		else                    sprintf(pms,"%" PRId32"ms",h->m_pingMax);
		// the sync status ascii-ized
		char syncStatus = h->m_syncStatus;
		const char *ptr2;
		if      (syncStatus==0) 
			ptr2 ="<b>N</b>";
		else if (syncStatus==1) 
			ptr2 ="Y";
		else 
			ptr2 ="?";
		char ipbuf1[64];
		char ipbuf2[64];
		strcpy(ipbuf1,iptoa(h->m_ip));
		strcpy(ipbuf2,iptoa(h->m_ipShotgun));

		/*
		char  hdbuf[128];
		char *hp = hdbuf;
		for ( int32_t k = 0 ; k < 4 ; k++ ) {
			int32_t temp = h->m_hdtemps[k];
			if ( temp > 50 && format == FORMAT_HTML )
				hp += sprintf(hp,"<font color=red><b>%" PRId32
					      "</b></font>",
					      temp);
			else
				hp += sprintf(hp,"%" PRId32,temp);
			if ( k < 3 ) *hp++ = '/';
			*hp = '\0';
		}
		*/
		char *vbuf = h->m_pingInfo.m_gbVersionStr;//m_gbVersionStrBuf;
		// get hash
		int32_t vhash32 = hash32n ( vbuf );
		const char *vbuf1 = "";
		const char *vbuf2 = "";
		if ( vhash32 != majorityHash32 ) {
			vbuf1 = "<font color=red><b>";
			vbuf2 = "</font></b>";
		}

		//int32_t switchGroup = 0;
		//if ( g_hostdb.m_indexSplits > 1 )
		//	switchGroup = h->m_group%g_hostdb.m_indexSplits;

		// host can have 2 ip addresses, get the one most
		// similar to that of the requester
		int32_t eip = g_hostdb.getBestIp ( h , s->m_ip );
		char ipbuf3[64];
		strcpy(ipbuf3,iptoa(eip));

		const char *fontTagFront = "";
		const char *fontTagBack  = "";
		if ( h->m_pingInfo.m_percentMemUsed >= 98.0 && 
		     format == FORMAT_HTML ) {
			fontTagFront = "<font color=red>";
			fontTagBack  = "</font>";
		}

		float cpu = h->m_pingInfo.m_cpuUsage;
		if ( cpu > 100.0 ) cpu = 100.0;
		if ( cpu < 0.0   ) cpu = -1.0;

		char diskUsageMsg[64];
		sprintf(diskUsageMsg,"%.1f%%",h->m_pingInfo.m_diskUsage);
		if ( h->m_pingInfo.m_diskUsage < 0.0 )
			sprintf(diskUsageMsg,"???");
		if ( h->m_pingInfo.m_diskUsage>=98.0 && format == FORMAT_HTML )
			sprintf(diskUsageMsg,"<font color=red><b>%.1f%%"
				"</b></font>",h->m_pingInfo.m_diskUsage);


		// split time, don't divide by zero!
		int32_t splitTime = 0;
		if ( h->m_splitsDone ) 
			splitTime = h->m_splitTimes / h->m_splitsDone;

		//char flagString[32];
		char tmpfb[64];
		SafeBuf fb(tmpfb,64);
		//char *fs = flagString;
		//*fs = '\0';

		// does its hosts.conf file disagree with ours?
		if ( h->m_pingInfo.m_hostsConfCRC &&
		     format == FORMAT_HTML &&
		     h->m_pingInfo.m_hostsConfCRC != g_hostdb.getCRC() )
			fb.safePrintf("<font color=red><b title=\"Hosts.conf "
				      "in disagreement with ours.\">H"
				      "</b></font>");
		if ( h->m_pingInfo.m_hostsConfCRC &&
		     format != FORMAT_HTML &&
		     h->m_pingInfo.m_hostsConfCRC != g_hostdb.getCRC() )
			fb.safePrintf("Hosts.conf in disagreement with ours");

		int32_t flags = h->m_pingInfo.m_flags;


		if ( format == FORMAT_HTML ) {
			// use these new ones for now
			int n = h->m_pingInfo.m_numCorruptDiskReads;
			if ( n )
				fb.safePrintf("<font color=red><b>"
					      "C"
					      "<sup>%" PRId32"</sup>"
					      "</b></font>"
					      , n );
			n = h->m_pingInfo.m_numOutOfMems;
			if ( n )
				fb.safePrintf("<font color=red><b>"
					      "O"
					      "<sup>%" PRId32"</sup>"
					      "</b></font>"
					      , n );
			n = h->m_pingInfo.m_socketsClosedFromHittingLimit;
			if ( n )
				fb.safePrintf("<font color=red><b>"
					      "K"
					      "<sup>%" PRId32"</sup>"
					      "</b></font>"
					      , n );
			if ( flags & PFLAG_OUTOFSYNC )
				fb.safePrintf("<font color=red><b>"
					      "N"
					      "</b></font>"
					      );
		}

		// recovery mode? reocvered from coring?
		if ((flags & PFLAG_RECOVERYMODE)&& format == FORMAT_HTML ) {
			fb.safePrintf("<b title=\"Recovered from core"
				      "\">x</b>");
			// this is only 8-bits at the moment so it's capped
			// at 255. this level is 1 the first time we core
			// and are restarted.
			if ( h->m_pingInfo.m_recoveryLevel > 1 )
			fb.safePrintf("<sup>%" PRId32"</sup>",
				      (int32_t)
				      h->m_pingInfo.m_recoveryLevel);
		}

		if ((flags & PFLAG_RECOVERYMODE)&& format != FORMAT_HTML )
			fb.safePrintf("Recovered from core");

		// rebalancing?
		if ( (flags & PFLAG_REBALANCING)&& format == FORMAT_HTML )
			fb.safePrintf("<b title=\"Currently "
				      "rebalancing\">R</b>");
		if ( (flags & PFLAG_REBALANCING)&& format != FORMAT_HTML )
			fb.safePrintf("Currently rebalancing");

		// has recs that should be in another shard? indicates
		// we need to rebalance or there is a bad hosts.conf
		if ((flags & PFLAG_FOREIGNRECS) && format == FORMAT_HTML )
			fb.safePrintf("<font color=red><b title=\"Foreign "
				      "data "
				      "detected. Needs rebalance.\">F"
				      "</b></font>");
		if ((flags & PFLAG_FOREIGNRECS) && format != FORMAT_HTML )
			fb.safePrintf("Foreign data detected. "
				      "Needs rebalance.");

		// if it has spiders going on say "S" with # as the superscript
		if ((flags & PFLAG_HASSPIDERS) && format == FORMAT_HTML )
			fb.safePrintf ( "<span title=\"Spidering\">S"
					"<sup>%" PRId32"</sup>"
					"</span>"
					,h->m_pingInfo.m_currentSpiders
					);

		if ( format == FORMAT_HTML && 
		     h->m_pingInfo.m_udpSlotsInUseIncoming ) {
			const char *f1 = "";
			const char *f2 = "";
			// MAXUDPSLOTS in Spider.cpp is 300 right now
			if ( h->m_pingInfo.m_udpSlotsInUseIncoming >= 300 ) {
				f1 = "<b>";
				f2 = "</b>";
			}
			if ( h->m_pingInfo.m_udpSlotsInUseIncoming >= 400 ) {
				f1 = "<b><font color=red>";
				f2 = "</font></b>";
			}
			fb.safePrintf("<span title=\"udpSlotsInUse\">"
				      "%s"
				      "U"
				      "<sup>%" PRId32"</sup>"
				      "%s"
				      "</span>"
				      ,f1
				      ,h->m_pingInfo.m_udpSlotsInUseIncoming
				      ,f2
				      );
		}

		if ( format == FORMAT_HTML && h->m_pingInfo.m_tcpSocketsInUse){
			const char *f1 = "";
			const char *f2 = "";
			if ( h->m_pingInfo.m_tcpSocketsInUse >= 100 ) {
				f1 = "<b>";
				f2 = "</b>";
			}
			if ( h->m_pingInfo.m_tcpSocketsInUse >= 200 ) {
				f1 = "<b><font color=red>";
				f2 = "</font></b>";
			}
			fb.safePrintf("<span title=\"tcpSocketsInUse\">"
				      "%s"
				      "T"
				      "<sup>%" PRId32"</sup>"
				      "%s"
				      "</span>"
				      ,f1
				      ,h->m_pingInfo.m_tcpSocketsInUse
				      ,f2
				      );
		}

		if ((flags & PFLAG_HASSPIDERS) && format != FORMAT_HTML )
			fb.safePrintf ( "Spidering");

		// say "M" if merging
		if ( (flags & PFLAG_MERGING) && format == FORMAT_HTML )
			fb.safePrintf ( "<span title=\"Merging\">M</span>");
		if ( (flags & PFLAG_MERGING) && format != FORMAT_HTML )
			fb.safePrintf ( "Merging");

		// say "D" if dumping
		if (   (flags & PFLAG_DUMPING) && format == FORMAT_HTML )
			fb.safePrintf ( "<span title=\"Dumping\">D</span>");
		if (   (flags & PFLAG_DUMPING) && format != FORMAT_HTML )
			fb.safePrintf ( "Dumping");


		// say "y" if doing the daily merge
		if (  !(flags & PFLAG_MERGEMODE0) )
			fb.safePrintf ( "y");


		if ( format == FORMAT_HTML && !h->m_spiderEnabled) {
			fb.safePrintf("<span title=\"Spider Disabled\" style=\"text-decoration:line-through;\">S</span>");
		}
		if ( format == FORMAT_HTML && !h->m_queryEnabled) {
			fb.safePrintf("<span title=\"Query Disabled\" style=\"text-decoration:line-through;\">Q</span>");
		}


		// clear it if it is us, this is invalid
		if ( ! h->m_gotPingReply ) {
			fb.reset();
			fb.safePrintf("??");
		}
		if ( fb.length() == 0 && format == FORMAT_HTML )
			fb.safePrintf("&nbsp;");

		fb.nullTerm();

		const char *bg = LIGHT_BLUE;
		if ( h->m_ping >= g_conf.m_deadHostTimeout ) 
			bg = "ffa6a6";


		//
		// BEGIN XML OUTPUT
		//
		if ( format == FORMAT_XML ) {
			
			sb.safePrintf("\t<host>\n"
				      "\t\t<name><![CDATA["
				      );
			sb.cdataEncode (h->m_hostname);
			sb.safePrintf("]]></name>\n");
			sb.safePrintf("\t\t<shard>%" PRId32"</shard>\n",
				      (int32_t)h->m_shardNum);
			sb.safePrintf("\t\t<mirror>%" PRId32"</mirror>\n",
				      h->m_stripe);

			sb.safePrintf("\t\t<ip1>%s</ip1>\n",
				      iptoa(h->m_ip));
			sb.safePrintf("\t\t<ip2>%s</ip2>\n",
				      iptoa(h->m_ipShotgun));

			sb.safePrintf("\t\t<httpPort>%" PRId32"</httpPort>\n",
				      (int32_t)h->m_httpPort);
			sb.safePrintf("\t\t<udpPort>%" PRId32"</udpPort>\n",
				      (int32_t)h->m_port);
			sb.safePrintf("\t\t<dnsPort>%" PRId32"</dnsPort>\n",
				      (int32_t)h->m_dnsClientPort);

			//sb.safePrintf("\t\t<hdTemp>%s</hdTemp>\n",hdbuf);
			sb.safePrintf("\t\t<gbVersion>%s</gbVersion>\n",vbuf);

			sb.safePrintf("\t\t<resends>%" PRId32"</resends>\n",
				      h->m_pingInfo.m_totalResends);

			/*
			  MDW: take out for new stuff
			sb.safePrintf("\t\t<errorReplies>%" PRId32"</errorReplies>\n",
				      h->m_errorReplies);
			*/

			sb.safePrintf("\t\t<errorTryAgains>%" PRId32
				      "</errorTryAgains>\n",
				      h->m_pingInfo.m_etryagains);

			sb.safePrintf("\t\t<udpSlotsInUse>%" PRId32
				      "</udpSlotsInUse>\n",
				      h->m_pingInfo.m_udpSlotsInUseIncoming);

			sb.safePrintf("\t\t<tcpSocketsInUse>%" PRId32
				      "</tcpSocketsInUse>\n",
				      h->m_pingInfo.m_tcpSocketsInUse);

			/*
			sb.safePrintf("\t\t<dgramsTo>%" PRId64"</dgramsTo>\n",
				      h->m_dgramsTo);
			sb.safePrintf("\t\t<dgramsFrom>%" PRId64"</dgramsFrom>\n",
				      h->m_dgramsFrom);
			*/

			sb.safePrintf("\t\t<numCorruptDiskReads>%" PRId32
				      "</numCorruptDiskReads>\n"
				      ,h->m_pingInfo.m_numCorruptDiskReads);
			sb.safePrintf("\t\t<numOutOfMems>%" PRId32
				      "</numOutOfMems>\n"
				      ,h->m_pingInfo.m_numOutOfMems);
			sb.safePrintf("\t\t<numClosedSockets>%" PRId32
				      "</numClosedSockets>\n"
				      ,h->m_pingInfo.
				      m_socketsClosedFromHittingLimit);
			sb.safePrintf("\t\t<numOutstandingSpiders>%" PRId32
				      "</numOutstandingSpiders>\n"
				      ,h->m_pingInfo.m_currentSpiders );


			sb.safePrintf("\t\t<splitTime>%" PRId32"</splitTime>\n",
				      splitTime);
			sb.safePrintf("\t\t<splitsDone>%" PRId32"</splitsDone>\n",
				      h->m_splitsDone);
			
			sb.safePrintf("\t\t<status><![CDATA[%s]]></status>\n",
				      fb.getBufStart());

			sb.safePrintf("\t\t<slowDiskReads>%" PRId32
				      "</slowDiskReads>\n",
				      h->m_pingInfo.m_slowDiskReads);

			sb.safePrintf("\t\t<docsIndexed>%" PRId32
				      "</docsIndexed>\n",
				      h->m_pingInfo.m_totalDocsIndexed);

			sb.safePrintf("\t\t<percentMemUsed>%.1f%%"
				      "</percentMemUsed>",
				      h->m_pingInfo.m_percentMemUsed); // float

			sb.safePrintf("\t\t<cpuUsage>%.1f%%"
				      "</cpuUsage>",
				      cpu );

			sb.safePrintf("\t\t<percentDiskUsed><![CDATA[%s]]>"
				      "</percentDiskUsed>",
				      diskUsageMsg);

			sb.safePrintf("\t\t<maxPing1>%s</maxPing1>\n",
				      pms );

			sb.safePrintf("\t\t<maxPingAge1>%" PRId32"ms</maxPingAge1>\n",
				      pingAge );

			sb.safePrintf("\t\t<ping1>%s</ping1>\n",
				      ptr );

			sb.safePrintf("\t\t<note>%s</note>\n",
				      h->m_note );

			sb.safePrintf("\t\t<spider>%" PRId32"</spider>\n",
						  (int32_t)h->m_spiderEnabled );


			sb.safePrintf("\t\t<query>%" PRId32"</query>\n",
						  (int32_t)h->m_queryEnabled );

			sb.safePrintf("\t</host>\n");

			continue;
		}
		//
		// END XML OUTPUT
		//


		//
		// BEGIN JSON OUTPUT
		//
		if ( format == FORMAT_JSON ) {
			
			sb.safePrintf("\t\"host\":{\n");
			sb.safePrintf("\t\t\"name\":\"%s\",\n",h->m_hostname);
			sb.safePrintf("\t\t\"shard\":%" PRId32",\n",
				      (int32_t)h->m_shardNum);
			sb.safePrintf("\t\t\"mirror\":%" PRId32",\n", h->m_stripe);

			sb.safePrintf("\t\t\"ip1\":\"%s\",\n",iptoa(h->m_ip));
			sb.safePrintf("\t\t\"ip2\":\"%s\",\n",
				      iptoa(h->m_ipShotgun));

			sb.safePrintf("\t\t\"httpPort\":%" PRId32",\n",
				      (int32_t)h->m_httpPort);
			sb.safePrintf("\t\t\"udpPort\":%" PRId32",\n",
				      (int32_t)h->m_port);
			sb.safePrintf("\t\t\"dnsPort\":%" PRId32",\n",
				      (int32_t)h->m_dnsClientPort);

			//sb.safePrintf("\t\t\"hdTemp\":\"%s\",\n",hdbuf);
			sb.safePrintf("\t\t\"gbVersion\":\"%s\",\n",vbuf);

			sb.safePrintf("\t\t\"resends\":%" PRId32",\n",
				      h->m_pingInfo.m_totalResends);

			/*
			sb.safePrintf("\t\t\"errorReplies\":%" PRId32",\n",
				      h->m_errorReplies);
			*/
			sb.safePrintf("\t\t\"errorTryAgains\":%" PRId32",\n",
				      h->m_pingInfo.m_etryagains);
			sb.safePrintf("\t\t\"udpSlotsInUse\":%" PRId32",\n",
				      h->m_pingInfo.m_udpSlotsInUseIncoming);
			sb.safePrintf("\t\t\"tcpSocketsInUse\":%" PRId32",\n",
				      h->m_pingInfo.m_tcpSocketsInUse);

			/*
			sb.safePrintf("\t\t\"dgramsTo\":%" PRId64",\n",
				      h->m_dgramsTo);
			sb.safePrintf("\t\t\"dgramsFrom\":%" PRId64",\n",
				      h->m_dgramsFrom);
			*/


			sb.safePrintf("\t\t\"numCorruptDiskReads\":%" PRId32",\n"
				      ,h->m_pingInfo.m_numCorruptDiskReads);
			sb.safePrintf("\t\t\"numOutOfMems\":%" PRId32",\n"
				      ,h->m_pingInfo.m_numOutOfMems);
			sb.safePrintf("\t\t\"numClosedSockets\":%" PRId32",\n"
				      ,h->m_pingInfo.
				      m_socketsClosedFromHittingLimit);
			sb.safePrintf("\t\t\"numOutstandingSpiders\":%" PRId32
				      ",\n"
				      ,h->m_pingInfo.m_currentSpiders );


			sb.safePrintf("\t\t\"splitTime\":%" PRId32",\n",
				      splitTime);
			sb.safePrintf("\t\t\"splitsDone\":%" PRId32",\n",
				      h->m_splitsDone);
			
			sb.safePrintf("\t\t\"status\":\"%s\",\n",
				      fb.getBufStart());

			sb.safePrintf("\t\t\"slowDiskReads\":%" PRId32",\n",
				      h->m_pingInfo.m_slowDiskReads);

			sb.safePrintf("\t\t\"docsIndexed\":%" PRId32",\n",
				      h->m_pingInfo.m_totalDocsIndexed);

			sb.safePrintf("\t\t\"percentMemUsed\":\"%.1f%%\",\n",
				      h->m_pingInfo.m_percentMemUsed); // float

			sb.safePrintf("\t\t\"cpuUsage\":\"%.1f%%\",\n",cpu);

			sb.safePrintf("\t\t\"percentDiskUsed\":\"%s\",\n",
				      diskUsageMsg);

			sb.safePrintf("\t\t\"maxPing1\":\"%s\",\n",pms);

			sb.safePrintf("\t\t\"maxPingAge1\":\"%" PRId32"ms\",\n",
				      pingAge );

			sb.safePrintf("\t\t\"ping1\":\"%s\",\n",
				      ptr );

			sb.safePrintf("\t\t\"note\":\"%s\"\n",
				      h->m_note );

			sb.safePrintf("\t\t\"spider\":\"%" PRId32"\"\n",
						  (int32_t)h->m_spiderEnabled );

			sb.safePrintf("\t\t\"query\":\"%" PRId32"\"\n",
						  (int32_t)h->m_queryEnabled );


            
			sb.safePrintf("\t},\n");

			continue;
		}
		//
		// END JSON OUTPUT
		//


		sb.safePrintf (
			  "<tr bgcolor=#%s>"
			  "<td><a href=\"http://%s:%hi/admin/hosts?"
			  ""
			  "c=%s"
			  "&sort=%" PRId32"\">%" PRId32"</a></td>"

			  "<td>%s</td>" // hostname

			  "<td>%" PRId32"</td>" // group
			  "<td>%" PRId32"</td>" // stripe
			  //"<td>0x%08" PRIx32"</td>" // group mask

			  //"<td>%s</td>" // ip1
			  //"<td>%s</td>" // ip2
			  //"<td>%hi</td>" // port
			  //"<td>%hi</td>" // client port
			  "<td>%hi</td>" // http port
			  //"<td>%" PRId32"</td>" // token group num
			  //"<td>%" PRId32"</td>" // switch group
			  //"<td>%s</td>" // tmpN

			  // hd temps
			  // no, this is gb version now
			  "<td><nobr>%s%s%s</nobr></td>"

			  // resends
			  "<td>%" PRId32"</td>"

			  // error replies
			  //"<td>%" PRId32"</td>"

			  // etryagains
			  "<td>%" PRId32"</td>"

			  // # dgrams sent to
			  //"<td>%" PRId64"</td>"
			  // # dgrams recvd from
			  //"<td>%" PRId64"</td>"

			  // loadavg
			  //"<td>%.2f</td>"

			  // split time
			  "<td>%" PRId32"</td>"
			  // splits done
			  "<td>%" PRId32"</td>"

			  // flags
			  "<td>%s</td>"

			  // slow disk reads
			  "<td>%" PRId32"</td>"

			  // docs indexed
			  "<td>%" PRId32"</td>"

			  // percent mem used
			  "<td>%s%.1f%%%s</td>"
			  // cpu usage
			  "<td>%.1f%%</td>"
			  // disk usage
			  "<td>%s</td>"

			  // ping max
			  "<td>%s</td>"

			  // ping age
			  "<td>%" PRId32"ms</td>"

			  // ping
			  "<td>%s</td>"
			  //"<td>%s</td>"
			  //"<td>%" PRId32"ms</td>"
			  "<td nowrap=1>%s</td>"
			  "</tr>" , 
			  bg,//LIGHT_BLUE ,
			  ipbuf3, h->m_httpPort, 
			  cs, sort,
			  i , 
			  h->m_hostname,
			  (int32_t)h->m_shardNum,//group,
			  h->m_stripe,
			  // group mask is not looked at a lot and is
			  // really only for indexdb and a few other rdbs
			  //g_hostdb.makeGroupId(i,ng) ,
			  //ipbuf1,
			  //ipbuf2,
			  //h->m_port , 
			  //h->m_dnsClientPort ,
			  h->m_httpPort ,
			  //h->m_tokenGroupNum,
			  //switchGroup ,
			  //tmpN,
			  vbuf1,
			  vbuf,//hdbuf,
			  vbuf2,

			  h->m_pingInfo.m_totalResends,


			  // h->m_errorReplies,
			  h->m_pingInfo.m_etryagains,
			  // h->m_dgramsTo,
			  // h->m_dgramsFrom,

			  //h->m_loadAvg, // double
			  splitTime,
			  h->m_splitsDone,

			  fb.getBufStart(),//flagString,

			  h->m_pingInfo.m_slowDiskReads,
			  h->m_pingInfo.m_totalDocsIndexed,

			  fontTagFront,
			  h->m_pingInfo.m_percentMemUsed, // float
			  fontTagBack,
			  cpu, // float
			  diskUsageMsg,

			  // ping max
			  pms,
			  // ping age
			  pingAge,

			  //avg , 
			  //stdDev,
			  //ping,
			  ptr ,
			  //ptr2 ,
			  h->m_note );
	}

	if ( format == FORMAT_XML ) {
		sb.safePrintf("</response>\n");
		return g_httpServer.sendDynamicPage ( s , 
						      sb.getBufStart(),
						      sb.length() ,
						      0, 
						      false, 
						      "text/xml");
	}

	if ( format == FORMAT_JSON ) {
		// remove last \n, from json host{}
		sb.m_length -= 2;
		sb.safePrintf("\n}\n}");
		return g_httpServer.sendDynamicPage ( s , 
						      sb.getBufStart(),
						      sb.length() ,
						      0, 
						      false, 
						      "application/json");
	}


	// end the table now
	sb.safePrintf ( "</table><br>\n" );

	

	if( g_hostdb.m_numSpareHosts ) {
		// print spare hosts table
		sb.safePrintf ( 
					   "<table %s>"
					   "<tr class=hdrow><td colspan=10><center>"
					   //"<font size=+1>"
					   "<b>Spares</b>"
					   //"</font>"
					   "</td></tr>" 
					   "<tr bgcolor=#%s>"
					   "<td><b>spareId</td>"
					   "<td><b>host name</td>"
					   "<td><b>ip1</td>"
					   "<td><b>ip2</td>"
					   //"<td><b>udp port</td>"
					   //"<td><b>priority udp port</td>"
					   //"<td><b>dns client port</td>"
					   "<td><b>http port</td>"
					   //"<td><b>switch id</td>"

					   // this is now fairly obsolete
					   //"<td><b>ide channel</td>"

					   "<td><b>note</td>",
					   TABLE_STYLE,
					   DARK_BLUE  );

		for ( int32_t i = 0; i < g_hostdb.m_numSpareHosts; i++ ) {
			// get the ith host (hostId)
			Host *h = g_hostdb.getSpare ( i );

			char ipbuf1[64];
			char ipbuf2[64];
			strcpy(ipbuf1,iptoa(h->m_ip));
			strcpy(ipbuf2,iptoa(h->m_ipShotgun));

			// print it
			sb.safePrintf (
						   "<tr bgcolor=#%s>"
						   "<td>%" PRId32"</td>"
						   "<td>%s</td>"
						   "<td>%s</td>"
						   "<td>%s</td>"
						   //"<td>%hi</td>"
						   //"<td>%hi</td>" // priority udp port
						   //"<td>%hi</td>"
						   "<td>%hi</td>"
						   //"<td>%i</td>" // switch id
						   "<td>%s</td>"
						   "</tr>" , 
						   LIGHT_BLUE,
						   i , 
						   h->m_hostname,
						   ipbuf1,
						   ipbuf2,
						   //h->m_port , 
						   //h->m_port2 , 
						   //h->m_dnsClientPort ,
						   h->m_httpPort ,
						   //h->m_switchId,
						   h->m_note );
		}
		sb.safePrintf ( "</table><br>" );
	}



	/*
	// print proxy hosts table
	sb.safePrintf ( 
		  "<table %s>"
		  "<tr class=hdrow><td colspan=12><center>"
		  //"<font size=+1>"
		  "<b>Proxies</b>"
		  //"</font>"
		  "</td></tr>" 
		  "<tr bgcolor=#%s>"
		  "<td><b>proxyId</b></td>"
		  "<td><b>type</b></td>"
		  "<td><b>host name</b></td>"
		  "<td><b>ip1</b></td>"
		  "<td><b>ip2</b></td>"
		  //"<td><b>udp port</td>"

		  //"<td><b>priority udp port</td>"

		  //"<td><b>dns client port</td>"
		  "<td><b>http port</b></td>"
		  //"<td><b>switch id</td>"
                  "<td><b>max ping1</b></td>"
                  "<td><b>ping1 age</b></td>"
                  "<td><b>ping1</b></td>"
		  //"<td><b>ping2</b></td>"
		  // this is now fairly obsolete
		  //"<td><b>ide channel</td>"

		  "<td><b>note</td>",
		  TABLE_STYLE,
		  DARK_BLUE 
			);
	for ( int32_t i = 0; i < g_hostdb.m_numProxyHosts; i++ ) {
		// get the ith host (hostId)
		Host *h = g_hostdb.getProxy ( i );

                char ptr[256];
                int32_t pingAge = generatePingMsg(h, nowmsLocal, ptr);

		char ipbuf1[64];
		char ipbuf2[64];
		strcpy(ipbuf1,iptoa(h->m_ip));
		strcpy(ipbuf2,iptoa(h->m_ipShotgun));

		// host can have 2 ip addresses, get the one most
		// similar to that of the requester
		int32_t eip = g_hostdb.getBestIp ( h , s->m_ip );
		char ipbuf3[64];
		strcpy(ipbuf3,iptoa(eip));


		char pms[64];
		if ( h->m_pingMax < 0 ) sprintf(pms,"???");
		else                    sprintf(pms,"%" PRId32"ms",h->m_pingMax);
		// the sync status ascii-ized

		char *type = "proxy";
		if ( h->m_type == HT_QCPROXY ) type = "qcproxy";
		if ( h->m_type == HT_SCPROXY ) type = "scproxy";

		// print it
		sb.safePrintf (
			  "<tr bgcolor=#%s>"

			  "<td><a href=\"http://%s:%hi/admin/hosts?"
			  ""
			  "c=%s\">"
			  "%" PRId32"</a></td>"

			  "<td>%s</td>"
			  "<td>%s</td>"
			  "<td>%s</td>"
			  "<td>%s</td>"
			  //"<td>%hi</td>"
			  //"<td>%hi</td>" // priority udp port
			  //"<td>%hi</td>"
			  "<td>%hi</td>"
			  //"<td>%i</td>" // switch id
			  "<td>%s</td>" // ping max
			  "<td>%" PRId32"ms</td>" // ping age
			  "<td>%s</td>" // ping
			  //"<td>%" PRId32"</td>" // ide channel
			  "<td>%s </td>"
			  "</tr>" , 

			  LIGHT_BLUE,
			  ipbuf3,
			  h->m_httpPort,
			  cs,
			  i , 

			  type,
			  h->m_hostname,
			  ipbuf1,
			  ipbuf2,
			  //h->m_port , 
			  //h->m_port2 , 
			  //h->m_dnsClientPort ,
			  h->m_httpPort ,
			  //h->m_switchId,
			  pms,
                          pingAge,
                          ptr,
			  //h->m_ideChannel ,
			  h->m_note );
	}
	sb.safePrintf ( "</table><br><br>" );
	*/

	sb.safePrintf(
		      "<style>"
		      ".poo { background-color:#%s;}\n"
		      "</style>\n" ,
		      LIGHT_BLUE );


	// print help table
	sb.safePrintf ( 
		  "<table %s>"
		  "<tr class=hdrow><td colspan=10><center>"
		  //"<font size=+1>"
		  "<b>Key</b>"
		  //"</font>"
		  "</td></tr>" 

		  "<tr class=poo>"
		  "<td>host ip</td>"
		  "<td>The primary IP address of the host."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>shard</td>"
		  "<td>"
		  "The index is split into shards. Which shard does this "
		  "host serve?"
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>mirror</td>"
		  "<td>"
		  "A shard can be mirrored multiple times for "
		  "data redundancy."
		  "</td>"
		  "</tr>\n"

		  /*
		  "<tr class=poo>"
		  "<td>ip2</td>"
		  "<td>The secondary IP address of the host."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>udp port</td>"
		  "<td>The UDP port the host uses to send and recieve "
		  "datagrams."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>dns client port</td>"
		  "<td>The UDP port used to send and receive dns traffic with."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>http port</td>"
		  "<td>The port you can connect a browser to."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>best switch id</td>"
		  "<td>The host prefers to be on this switch because it "
		  "needs to send a lot of data to other hosts on this swtich. "
		  "Therefore, ideally, the best switch id should match the "
		  "actual switch id for optimal performance."
		  "</td>"
		  "</tr>\n"
		  */

		  /*
		  "<tr class=poo>"
		  "<td>switch id</td>"
		  "<td>Hosts that share the same switch id are "
		  "physically on the same switch."
		  "</td>"
		  "</tr>\n"
		  */

		  "<tr class=poo>"
		  "<td>dgrams resent</td>"
		  "<td>How many datagrams have had to be resent to a host "
		  "because it was not ACKed quick enough or because it was "
		  "fully ACKed but the entire request was resent in case "
		  "the host was reset."
		  "</td>"
		  "</tr>\n"

		  /*
		  "<tr class=poo>"
		  "<td>errors recvd</td>"
		  "<td>How many errors were received from a host in response "
		  "to a request to retrieve or insert data."
		  "</td>"
		  "</tr>\n"
		  */

		  "<tr class=poo>"
		  "<td>try agains recvd</td>"
		  "<td>How many ETRYAGAIN errors "
		  "were received in response to a "
		  "request to add data. Usually because the host's memory "
		  "is full and it is dumping its data to disk. This number "
		  "can be high if the host if failing to dump the data "
		  "to disk because of some malfunction, and it can therefore "
		  "bottleneck the entire cluster."
		  "</td>"
		  "</tr>\n"

		  /*
		  "<tr class=poo>"
		  "<td>dgrams to</td>"
		  "<td>How many datagrams were sent to the host from the "
		  "selected host since startup. Includes ACK datagrams. This "
		  "can actually be higher than the number of dgrams read "
		  "when the selected host is the same as the host in the "
		  "table because of resends. Gigablast will resend datagrams "
		  "that are not promptly ACKknowledged."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>dgrams from</td>"
		  "<td>How many datagrams were received from the host by the "
		  "selected host since startup. Includes ACK datagrams."
		  "</td>"
		  "</tr>\n"
		  */

		  "<tr class=poo>"
		  "<td>avg split time</td>"
		  "<td>Average time this host took to compute the docids "
		  "for a query. Useful for guaging the slowness of a host "
		  "compare to other hosts."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>splits done</td>"
		  "<td>Number of queries this host completed. Used in "
		  "computation of the <i>avg split time</i>."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>status</td>"
		  "<td>Status flags for the host. See key below."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>slow reads</td>"
		  "<td>Number of slow disk reads the host has had. "
		  "When this is big compared to other hosts it is a good "
		  "indicator its drives are relatively slow."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>docs indexed</td>"
		  "<td>Number of documents this host has indexed over all "
		  "collections. All hosts should have close to the same "
		  "number in a well-sharded situation."
		  "</td>"
		  "</tr>\n"

		  //"<tr class=poo>"
		  //"<td>loadavg</td>"
		  //"<td>1-minute sliding-window load average from "
		  //"/proc/loadavg."
		  //"</td>"
		  //"</tr>\n"

		  "<tr class=poo>"
		  "<td>mem used</td>"
		  "<td>Percentage of memory currently used."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>cpu used</td>"
		  "<td>Percentage of cpu resources in use by the gb process."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>disk used</td>"
		  "<td>Percentage of disk in use. When this gets close to "
		  "100%% you need to do something."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>max ping1</td>"
		  "<td>The worst ping latency from host to host."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>ping1 age</td>"
		  "<td>How long ago the last ping request was sent to "
		  "this host. Let's us know how fresh the ping time is."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>ping1</td>"
		  "<td>Ping time to this host on the primary network."
		  "</td>"
		  "</tr>\n"

		  /*
		  "<tr class=poo>"
		  "<td>ping2</td>"
		  "<td>Ping time to this host on the seconday/shotgun "
		  "network. This column is not visible if the shotgun "
		  "network is not enabled in the master controls."
		  "</td>"
		  "</tr>\n"
		  */

		  "<tr class=poo>"
		  "<td>M (status flag)</td>"
		  "<td>Indicates host is merging files on disk."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>D (status flag)</td>"
		  "<td>Indicates host is dumping data to disk."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>S (status flag)</td>"
		  "<td>Indicates host has outstanding spiders."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>y (status flag)</td>"
		  "<td>Indicates host is performing the daily merge."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>R (status flag)</td>"
		  "<td>Indicates host is performing a rebalance operation."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>F (status flag)</td>"
		  "<td>Indicates host has foreign records and requires "
		  "a rebalance operation."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>x (status flag)</td>"
		  "<td>Indicates host has abruptly exited due to a fatal "
		  "error (cored) and "
		  "restarted itself. The exponent is how many times it has "
		  "done this. If no exponent, it only did it once."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>C (status flag)</td>"
		  "<td>Indicates # of corrupted disk reads."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>K (status flag)</td>"
		  "<td>Indicates # of sockets closed from hitting limit."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td><nobr>O (status flag)</nobr></td>"
		  "<td>Indicates # of times we ran out of memory."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td><nobr>N (status flag)</nobr></td>"
		  "<td>Indicates host's clock is NOT in sync with host #0. "
		  "Gigablast should automatically sync on startup, "
		  "so this would be a problem "
		  "if it does not go away. Hosts need to have their clocks "
		  "in sync before they can add data to their index."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td><nobr>U (status flag)</nobr></td>"
		  "<td>Indicates the number of active UDP transactions "
		  "which are incoming requests. These will pile up if a "
		  "host can't handle them fast enough."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td><nobr>T (status flag)</nobr></td>"
		  "<td>Indicates the number of active TCP transactions "
		  "which are either outgoing or incoming requests."
		  "</td>"
		  "</tr>\n"

		  ,
		  TABLE_STYLE
			);

	sb.safePrintf ( "</table><br></form><br>" );

	//p = g_pages.printAdminBottom ( p , pend );

	// calculate buffer length
	//int32_t bufLen = p - buf;
	// . send this page
	// . encapsulates in html header and tail
	// . make a Mime
	return g_httpServer.sendDynamicPage ( s , (char*) sb.getBufStart() ,
						  sb.length() );
}
Ejemplo n.º 7
0
void *Mem::gbmalloc ( int size , const char *note ) {
	logTrace( g_conf.m_logTraceMem, "size=%d note='%s'", size, note );

	// don't let electric fence zap us
	if ( size == 0 ) return (void *)0x7fffffff;
	
	// random oom testing
	//static int32_t s_mcount = 0;
	//s_mcount++;
	if ( g_conf.m_testMem && (rand() % 100) < 2 ) { 
		//if ( s_mcount > 1055 && (rand() % 1000) < 2 ) { 
		g_errno = ENOMEM; 
		log( LOG_WARN, "mem: malloc-fake(%i,%s): %s",size,note, mstrerror(g_errno));
		return NULL;
	} 

retry:
	int64_t max = g_conf.m_maxMem;

	// don't go over max
	if ( m_used + size + UNDERPAD + OVERPAD >= max ) {
		// try to free temp mem. returns true if it freed some.
		if ( freeCacheMem() ) goto retry;
		g_errno = ENOMEM;
		log( LOG_WARN, "mem: malloc(%i): Out of memory", size );
		return NULL;
	}

	if ( size < 0 ) {
		g_errno = EBADENGINEER;
		log( LOG_ERROR, "mem: malloc(%i): Bad value.", size );
		char *xx = NULL; *xx = 0;
		return NULL;
	}

	void *mem;

	g_inMemFunction = true;

	mem = (void *)sysmalloc ( size + UNDERPAD + OVERPAD );

	g_inMemFunction = false;

	int32_t memLoop = 0;
mallocmemloop:
	if ( ! mem && size > 0 ) {
		g_mem.m_outOfMems++;
		// try to free temp mem. returns true if it freed some.
		if ( freeCacheMem() ) goto retry;
		g_errno = errno;
		static int64_t s_lastTime;
		static int32_t s_missed = 0;
		int64_t now = gettimeofdayInMillisecondsLocal();
		int64_t avail = (int64_t)g_conf.m_maxMem - 
			(int64_t)m_used;
		if ( now - s_lastTime >= 1000LL ) {
			log(LOG_WARN, "mem: system malloc(%i,%s) availShouldBe=%" PRId64": "
			    "%s (%s) (ooms suppressed since last log msg = %" PRId32")",
			    size+UNDERPAD+OVERPAD,
			    note,
			    avail,
			    mstrerror(g_errno),
			    note,
			    s_missed);
			s_lastTime = now;
			s_missed = 0;
		} else {
			s_missed++;
		}
		// to debug oom issues:
		//char *xx=NULL;*xx=0;

		// send an email alert if this happens! it is a sign of "memory fragmentation"
		//static bool s_sentEmail = false;
		// stop sending these now... seems to be problematic. says
		// 160MB is avail and can't alloc 20MB...
		static bool s_sentEmail = true;
		// assume only 90% is really available because of 
		// inefficient mallocing
		avail = (int64_t)((float)avail * 0.80);
		// but if it is within about 15MB of what is theoretically
		// available, don't send an email, because there is always some
		// minor fragmentation
		if ( ! s_sentEmail && avail > size ) {
			s_sentEmail = true;
			char msgbuf[1024];
			Host *h = g_hostdb.m_myHost;
			snprintf(msgbuf, 1024,
				 "Possible memory fragmentation "
				 "on host #%" PRId32" %s",
				 h->m_hostId,h->m_note);
			log(LOG_WARN, "query: %s",msgbuf);
			g_pingServer.sendEmail(NULL, msgbuf,true,true);
		}
		return NULL;
	}
	if ( (PTRTYPE)mem < 0x00010000 ) {
		void *remem = sysmalloc(size);
		log ( LOG_WARN, "mem: Caught low memory allocation "
		      "at %08" PTRFMT", "
		      "reallocated to %08" PTRFMT"",
		      (PTRTYPE)mem, (PTRTYPE)remem );
		sysfree(mem);
		mem = remem;
		memLoop++;
		if ( memLoop > 100 ) {
			log ( LOG_WARN, "mem: Attempted to reallocate low "
					"memory allocation 100 times, "
					"aborting and returning NOMEM." );
			g_errno = ENOMEM;
			return NULL;
		}
		goto mallocmemloop;
	}

	logTrace( g_conf.m_logTraceMem, "mem=%p size=%d note='%s'", mem, size, note );

	addMem ( (char *)mem + UNDERPAD , size , note , 0 );
	return (char *)mem + UNDERPAD;
}
void DailyMerge::dailyMergeLoop ( ) {
	// disable for now!
	//return;
	// if in repair mode, do not do daily merge
	if ( g_repairMode ) return;
	// or if in read only mode
	if ( g_conf.m_readOnlyMode ) return;
	// skip if proxy, a proxy can be hostid 0!
	if ( g_proxy.isProxy() ) return;
	// wait for clock to be synced with host #0
	if ( ! isClockInSync() ) return;
	// get local time
	int64_t nowLocalMS = gettimeofdayInMillisecondsLocal();
	// get our hostid
	int32_t hid = g_hostdb.m_myHost->m_hostId;
	// if process only recently started (1 min ago or less)
	// then do not immediately do this...
	if (hid==0 && nowLocalMS - g_process.m_processStartTime < 1*60*1000)
		return;
	// wait until the right time (this is in UTC)
	time_t nowSynced = getTimeSynced();

	// get time since midnight
	struct tm *tt ;
	// how many MINUTES into the day are we? (in UTC)
	tt = gmtime ( &nowSynced );
	int32_t elapsedMins = tt->tm_hour * 60 + tt->tm_min ;

	// what collnum to merge?
	collnum_t i ;

	// . if we are not 0, just use host #0's collnum
	// . an error here will screw up the whole daily merge process
	if ( hid != 0 && m_mergeMode == 0 ) {
		// get host #0
		Host *h = &g_hostdb.m_hosts[0];
		// must have got a ping reply from him
		if ( ! h->m_gotPingReply ) return;
		// hostid #0 must NOT be in mode 0
		if ( h->m_pingInfo.m_flags & PFLAG_MERGEMODE0 ) return;
		// get the collnum that host #0 is currently daily merging
		i = g_hostdb.m_hosts[0].m_pingInfo.m_dailyMergeCollnum;
		// this means host #0 is not daily merging a collnum now
		if ( i < 0 ) return;
		// if it is valid, the CollectionRec MUST be there
		CollectionRec *cr = g_collectiondb.getRec ( i );
		if ( ! cr ) { 
			log("daily: host #0 bad collnum %"INT32"",(int32_t)i);return;}
		// if valid, use it
		m_cr = cr;
		// we set m_cr, go to next mode
		m_mergeMode = 1;
		// set the start time here, but don't commit to m_cr just yet
		m_savedStartTime = nowSynced;
	}

	// . only host #0 should do this loop!!!
	// . loop through each collection to check the time
	for (i=0; hid==0&&m_mergeMode==0 && i<g_collectiondb.m_numRecs; i++) {
		// get collection rec for collnum #i
		CollectionRec *cr = g_collectiondb.getRec ( i );
		// skip if empty, it was deleted at some point
		if ( ! cr ) continue;
		// skip if daily merge trigger is < 0 (do not do dailies)
		if ( cr->m_dailyMergeTrigger < 0 ) continue;
		// . skip if not time yet
		// . !!!!!THIS IS IN MINUTES!!!!!!!!
		if ( (int32_t)elapsedMins < (int32_t)cr->m_dailyMergeTrigger ) 
			continue;
		// do not start more than 15 mins after the trigger time,
		// if we miss that cuz we are down, then too bad
		if ( (int32_t)elapsedMins > (int32_t)cr->m_dailyMergeTrigger + 15 )
			continue;
 		// . how long has it been (in seconds)
		// . !!!!!THIS IS IN SECONDS!!!!!!!!
		int32_t diff = nowSynced - cr->m_dailyMergeStarted;
		// crazy?
		if ( diff < 0 ) continue;
		// if less than 24 hours ago, we already did it
		if ( diff < 24*3600 ) continue;
		// . we must now match the day of week
		// . use <= 0 to do it every day
		// . 0 = sunday ... 6 = saturday
		// . comma separated list is ok ("0,1, 6")
		// . leave blank or at least no numbers to do every day
		char *s = cr->m_dailyMergeDOWList;
		char dowCounts[8];
		memset(dowCounts,0,8);
		for ( ; *s ; s++ ) {
			if ( ! is_digit(*s) ) continue;
			int32_t num = atoi(s);
			if ( num < 0 ) continue;
			if ( num > 6 ) continue;
			dowCounts[num]++;
		}
		// get our dow
		int32_t todayDOW = tt->tm_wday + 1;
		// make sure 1 to 7
		if ( todayDOW < 0 || todayDOW > 6 ) { 
			log("merge: bad today dow of %i for coll %s",
			    (int)todayDOW,cr->m_coll);
			return;
		}
		//if ( todayDOW > 6 ) { char *xx=NULL;*xx=0; }
		// skip if not a dayofweek to merge on
		if ( dowCounts [ todayDOW ] == 0 ) continue;

		// set the start time here, but don't commit to m_cr just yet
		m_savedStartTime = nowSynced;
		// . wait for everyone to be in mode #0 in case they just
		//   finished another daily merge. only host #0 does this loop.
		// . PROBLEM: if host #0 crashes before everyone can get into 
		//   mode 1+ and then host #0 is brought back up, then 
		//   obviously, we will not be able to meet this condition,
		//   therefore only check to see if this condition is 
		//   satisfied our "second time around" (so we must complete
		//   one daily merge before checking this again). that is why
		//   i added "m_didDaily". -- MDW
		for ( int32_t i = 0 ; m_didDaily && i<g_hostdb.m_numHosts ; i++){
			// skip ourselves, obviously we are in merge mode 2
			if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
				continue;
			// that's good if he is in mode 0
			if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags & 
			     PFLAG_MERGEMODE0 )
				continue;
			// oops, someone is not mode 0
			return;
		}
		// got one, save it
		m_cr = cr;
		// if we were hostid 0, go into merge mode 1 now
		m_mergeMode = 1;
		// bust out of loop
		break;
	}

	// can we advance to merge mode 1?
	if ( m_mergeMode == 1 ) {
		// no candidates, go back to mode 0 now, we are done
		if ( ! m_cr ) {
			log("daily: Could not get coll rec.");
			m_mergeMode = 0; return; 
		}
		// ok, we got a collection that needs it so turn off spiders
		m_mergeMode = 2;
		// turn spiders off to keep query latency down
		m_spideringEnabled = g_conf.m_spideringEnabled;
		//m_injectionEnabled = g_conf.m_injectionEnabled;
		g_conf.m_spideringEnabled = false;
		//g_conf.m_injectionEnabled = false;
		// log it
		log("daily: Starting daily merge for %s.",m_cr->m_coll);
		log("daily: Waiting for other hosts to enter merge mode.");
	}

	// wait for everyone to make it to mode 1+ before going on
	if ( m_mergeMode == 2 ) {
		// check the ping packet flags
		for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
			// get the host
			Host *h = &g_hostdb.m_hosts[i];
			// skip ourselves, obviously we are in merge mode 2
			if ( h == g_hostdb.m_myHost ) 
				continue;
			// skip dead hosts
			if ( g_hostdb.isDead(h) )
				continue;
			// return if a host still in merge mode 0. wait for it.
			if ( h->m_pingInfo.m_flags & PFLAG_MERGEMODE0 )
				return;
		}
		// ok, everyone is out of mode 0 now
		m_mergeMode = 3;
		// log it
		log("daily: Waiting for all hosts to have 0 "
		    "spiders out.");
	}

	// wait for ALL spiders in network to clear
	if ( m_mergeMode == 3 ) {
		// return if we got spiders out!
		if ( g_spiderLoop.m_numSpidersOut > 0 )
			return;
		// check the ping packet flags
		for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
			// skip ourselves, obviously we are in merge mode 2
			if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
				continue;
			// if host still has spiders out, we can't go to mode 4
			if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags & 
			     PFLAG_HASSPIDERS ) 
				return;
		}
		// ok, nobody has spiders now
		m_mergeMode = 4;
		// log it
		log("daily: Dumping trees.");
	}

	// start the dumps
	if ( m_mergeMode == 4 ) {
		// . set when we did it last, save that to disk to avoid thrash
		// . TODO: BUT do not allow it to be set in the spider 
		//   controls!
		// . THIS IS IN SECONDS!!!!!!!
		// . use the time we started, otherwise the merge time keeps
		//   getting pushed back.
		m_cr->m_dailyMergeStarted = m_savedStartTime; // nowSynced;
		// tell it to save, otherwise this might not get saved
		m_cr->m_needsSave = true;
		// initiate dumps
		g_indexdb.getRdb  ()->dumpTree(1); // niceness = 1
		//g_datedb.getRdb   ()->dumpTree(1); // niceness = 1
		g_spiderdb.getRdb ()->dumpTree(1); // niceness = 1
		g_linkdb.getRdb   ()->dumpTree(1); // niceness = 1
		// if neither has recs in tree, go to next mode
		if(g_indexdb .getRdb()->getNumUsedNodes()>0) return;
		//if(g_datedb  .getRdb()->getNumUsedNodes()>0) return;
		if(g_spiderdb.getRdb()->getNumUsedNodes()>0) return;
		if(g_linkdb  .getRdb()->getNumUsedNodes()>0) return;
		// ok, all trees are clear and dumped
		m_mergeMode = 5;
		// log it
		log("daily: Merging indexdb and datedb files.");
	}

	// start the merge
	if ( m_mergeMode == 5 ) {
		// kick off the merges if not already going
		//g_indexdb.getRdb()->attemptMerge(1,true,false);
		//g_datedb .getRdb()->attemptMerge(1,true,false);
		// if has more than one file, bail on it
		RdbBase *base;

		base = g_indexdb .getRdb()->getBase(m_cr->m_collnum);
		// . niceness,forced?,doLog?,minFilesToMerge
		// . only does a merge if there are 2 or more "big" indexdb 
		//   files present. Merges so that there are LESS THAN 2 files.
		//   just another way of describing a tight merge.
		base->attemptMerge (1,true,false,2);
		if ( base->getNumFiles() >= 2 ) return;

		//base = g_datedb  .getRdb()->getBase(m_cr->m_collnum);
		//base->attemptMerge (1,true,false,2);
		//if ( base->getNumFiles() >= 2 ) return;

		base = g_spiderdb.getRdb()->getBase(m_cr->m_collnum);
		base->attemptMerge (1,true,false,2);
		if ( base->getNumFiles() >= 2 ) return;

		base = g_linkdb  .getRdb()->getBase(m_cr->m_collnum);
		base->attemptMerge (1,true,false,2);
		if ( base->getNumFiles() >= 2 ) return;

		// . minimize titledb merging at spider time, too
		// . will perform a merge IFF there are 200 or more titledb 
		//   files present, otherwise, it will not. will do the merge
		//   such that LESS THAN 200 titledb files will be present
		//   AFTER the merge is completed.
		// . do NOT force merge ALL files on this one, we just want
		//   to make sure there are not 200+ titledb files
		base = g_titledb .getRdb()->getBase(m_cr->m_collnum);
		// we seem to dump about 70 per day at a decent spider rate
		// so merge enough so that we don't have to merge while 
		// spidering
		base->attemptMerge (1,false,false,230-70);
		if ( base->getNumFiles() >= 230-70 ) return;

		// set m_cr to NULL up here, so that the last guy to
		// complete the daily merge, does not "cycle back" and
		// try to re-daily merge the same collection!
		m_cr = NULL;
		// ok, merges are done
		m_mergeMode = 6;
		// log it
		log("daily: Waiting for all hosts to finish merging.");
	}

	// wait for all to finish before re-enabling spiders
	if ( m_mergeMode == 6 ) {
		// check the ping packet flags
		for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) {
			// skip ourselves, obviously we are ok
			if ( &g_hostdb.m_hosts[i] == g_hostdb.m_myHost )
				continue;
			// if host in mode 6 or 0, that's good
			if ( g_hostdb.m_hosts[i].m_pingInfo.m_flags & 
			     PFLAG_MERGEMODE0OR6)
				continue;
			// otherwise, wait for it to be in 6 or 0
			return;
		}
		// ok, nobody has spiders now, everyone is 6 or 0
		m_mergeMode = 0;
		// no coll rec now
		m_cr = NULL;
		// spiders back on
		g_conf.m_spideringEnabled = m_spideringEnabled;
		//g_conf.m_injectionEnabled = m_injectionEnabled;
		// log it
		log("daily: Daily merge completed.");
		// now the next time we do a daily we must make sure all hosts
		// are in merge mode #0 before we start
		m_didDaily  = true;
	}		
}
Ejemplo n.º 9
0
// close the least used of all the file descriptors.
// we don't touch files opened for writing, however.
bool File::closeLeastUsed () {

	long long min  ;
	int    mini = -1;
	long long now = gettimeofdayInMillisecondsLocal();

	// get the least used of all the actively opened file descriptors.
	// we can't get files that were opened for writing!!!
	int i;
	for ( i = 0 ; i < MAX_NUM_VFDS ; i++ ) {
		if ( s_fds   [ i ] < 0        ) continue;
		// fds opened for writing are not candidates, because if
		// we close on a threaded write, that fd may be used to
		// re-open another file which gets garbled!
		if ( s_writing [ i ] ) continue;
		// do not close guys being unlinked they are in the middle
		// of being closed ALREADY in close1_r(). There should only be 
		// like one unlink thread allowed to be active at a time so we 
		// don't have to worry about it hogging all the fds.
		if ( s_unlinking [ i ] ) continue;
		// when we got like 1000 reads queued up, it uses a *lot* of
		// memory and we can end up never being able to complete a
		// read because the descriptors are always getting closed on us
		// so do a hack fix and do not close descriptors that are
		// about .5 seconds old on avg.
		if ( s_timestamps [ i ] == now ) continue;
		if ( s_timestamps [ i ] == now - 1 ) continue;
		if ( mini == -1 || s_timestamps [ i ] < min ) {
			min  = s_timestamps [ i ];
			mini = i;
		}
	}

	// if nothing to free then return false
	if ( mini == -1 ) 
		return log("File: closeLeastUsed: failed. All %li descriptors "
			   "are unavailable to be closed and re-used to read "
			   "from another file.",(long)s_maxNumOpenFiles);

	// debug msg
	log(LOG_DEBUG,"disk: Closing vfd #%i of %li. delta=%lli",
	    mini,(long)s_fds[mini],now-s_timestamps[mini]);

	// always block on close
	int fd    = s_fds[mini];
	int flags = fcntl ( fd , F_GETFL ) ;
	// turn off these 2 flags on fd to make sure
	flags &= ~( O_NONBLOCK | O_ASYNC );
 retry27:
	// return false on error
	if ( fcntl ( fd, F_SETFL, flags ) < 0 ) {
		// valgrind
		if ( errno == EINTR ) goto retry27;
		//char *xx = NULL; *xx = 1;
		log("disk: fcntl(%i): %s",fd,mstrerror(errno));
		// return false;
		errno = 0;
	}

	// . tally up another close for this fd, if any
	// . so if an open happens shortly here after, and 
	//   gets this fd, then any read that was started 
	//   before that open will know it!
	//s_closeCounts [ fd ]++;
	// otherwise we gotta really close it
 again:
	if ( fd == 0 ) log("disk: closing3 fd of 0");
	int status = ::close ( fd );
	if ( status == -1 && errno == EINTR ) goto again;

	// -1 means can be reopened because File::close() wasn't called.
	// we're just conserving file descriptors
	s_fds [ mini ] = -1;

	// if the real close was successful then decrement the # of open files
	if ( status == 0 ) s_numOpenFiles--;

	if ( status == -1 ) 
		return log("disk: close(%i) : %s", fd , strerror(errno));

	return true;
}	
Ejemplo n.º 10
0
// . get the fd of this file
// . if it was closed by us we reopen it
// . may re-open a virtual fd whose real fd was closed
// . if we hit our max # of real fds allowed we'll have to close 
//   the least used of those so we can open this one
// . return -2 if never been opened
// . return -1 on other errors
// . otherwise, return the file descriptor
int File::getfd () {
	// if m_vfd is -1 it's never been opened
	if ( m_vfd < 0 ) {
		g_errno = EBADENGINEER;
		log(LOG_LOGIC,"disk: getfd: Must call open() first.");
		char *xx=NULL; *xx=0; 
		return -2;
	}
	// . sanity check
	// . no caller should call open/getfd after unlink was queued for thred
	//if ( m_gone ) { char *xx = NULL; *xx = 0; }
	// get the real fd from the virtual fd
	int fd = s_fds [ m_vfd ];
	// return true if it's already opened
	if ( fd >=  0 ) { 
		// debug msg
		log(LOG_DEBUG,"disk: Opened vfd #%li of %li.",
		    (long)m_vfd,(long)s_fds[m_vfd]);
		// but update the timestamp to reduce chance it closes on us
		//s_timestamps [ m_vfd ] = getTime();
		s_timestamps [ m_vfd ] = gettimeofdayInMillisecondsLocal();
		return fd;
	}
	// if fd is -2 it's marked as available
	if ( fd != -1 ) {
		g_errno = EBADENGINEER;
		log (LOG_LOGIC, "disk: getfd: fd is available?!?!" );
		return -2;
	}
	// . a real fd of -1 means it's been closed and we gotta reopen it
	// . we have to close someone if we don't have enough room
	while ( s_numOpenFiles >= s_maxNumOpenFiles ) 
		if ( ! closeLeastUsed() ) return -1;
	// what was the filename/mode of this timed-out fd?
	//char *filename    = s_filenames   [ m_vfd ];
	// time the calls to open just in case they are hurting us
	long long t1 = -1LL;
	// . re-open the sleeping file descriptor
	// . if a rename thread was queued or spawned, try old guy first
	//if ( m_oldFilename[0] ) {
	//	t1 = gettimeofdayInMilliseconds();
	//	fd = ::open ( m_oldFilename , m_flags , m_permissions );
	//}
	// then try to open the new name
	if ( fd == -1 ) {
		t1 = gettimeofdayInMilliseconds();
 retry7:
		fd = ::open ( m_filename , m_flags , m_permissions );
		// valgrind
		if ( fd == -1 && errno == EINTR ) goto retry7;
		// 0 means stdout, right? why am i seeing it get assigned???
		if ( fd == 0 ) 
			log("disk: Got fd of 0 when opening %s.",m_filename);
		if ( fd == 0 )
			fd = ::open ( m_filename , m_flags , m_permissions );
		if ( fd == 0 ) 
			log("disk: Got fd of 0 when opening2 %s.",m_filename);
		// . now inc that count in case there was someone reading on
		//   that fd right before it was closed and we got it
		// . ::close() call can now happen in a thread, so we
		//   need to inc this guy here now, too
		// . so when that read returns it will know to re-do
		// . this should really be named s_openCounts!!
		if ( fd >= 0 ) s_closeCounts [ fd ]++;
		// . we now record this
		// . that way if our fd gets closed in closeLeastUsed() or
		//   in close1_r() due to a rename/unlink then we know it!
		// . this fixes a race condition of closeCounts in Threads.cpp
		//   where we did not know that the fd had been stolen from
		//   us and assigned to another file because our close1_r()
		//   had called ::close() on our fd and our closeCount algo
		//   failed us. see the top of this file for more description
		//   into this bug fix.
		m_closeCount = s_closeCounts[fd];
	}
	if ( t1 >= 0 ) {
		long long dt = gettimeofdayInMilliseconds() - t1 ;
		if ( dt > 1 ) log(LOG_INFO,
				  "disk: call to open(%s) blocked for "
				  "%lli ms.",m_filename,dt);
	}
	// copy errno to g_errno
	if ( fd == -1 ) {
		g_errno = errno;
		log("disk: error open(%s) : %s",m_filename,strerror(g_errno));
		return -1;
	}
	// we're another open file
	s_numOpenFiles++;
	// set this file descriptor, the other stuff remains the same
	s_fds [ m_vfd ] = fd;
	// 0 means stdout, right? why am i seeing it get assigned???
	if ( fd == 0 ) 
		log("disk: Found fd of 0 when opening %s.",m_filename);
	// reset
	s_writing   [ m_vfd ] = 0;
	s_unlinking [ m_vfd ] = 0;
	// update the time stamp
	s_timestamps [ m_vfd ] = gettimeofdayInMillisecondsLocal();
	return fd;
}
Ejemplo n.º 11
0
// close the least used of all the file descriptors.
// we don't touch files opened for writing, however.
bool File::closeLeastUsed () {

	int64_t min  ;
	int    mini = -1;
	int64_t now = gettimeofdayInMillisecondsLocal();


	int32_t notopen = 0;
	int32_t writing = 0;
	int32_t unlinking = 0;
	int32_t young = 0;

	// get the least used of all the actively opened file descriptors.
	// we can't get files that were opened for writing!!!
	int i;
	for ( i = 0 ; i < MAX_NUM_FDS ; i++ ) {
		//if ( s_fds   [ i ] < 0        ) continue;
		if ( ! s_open[i] ) { notopen++; continue; }
		// fds opened for writing are not candidates, because if
		// we close on a threaded write, that fd may be used to
		// re-open another file which gets garbled!
		if ( s_writing [ i ] ) { writing++; continue; }
		// do not close guys being unlinked they are in the middle
		// of being closed ALREADY in close1_r(). There should only be 
		// like one unlink thread allowed to be active at a time so we 
		// don't have to worry about it hogging all the fds.
		if ( s_unlinking [ i ] ) { unlinking++; continue; }
		// when we got like 1000 reads queued up, it uses a *lot* of
		// memory and we can end up never being able to complete a
		// read because the descriptors are always getting closed on us
		// so do a hack fix and do not close descriptors that are
		// about .5 seconds old on avg.
		if ( s_timestamps [ i ] == now ) { young++; continue; }
		if ( s_timestamps [ i ] == now - 1 ) { young++; continue; }
		if ( mini == -1 || s_timestamps [ i ] < min ) {
			min  = s_timestamps [ i ];
			mini = i;
		}
	}

	/*
	// use the new linked list of active file descriptors
	// . file at tail is the most active
	File *f = s_activeHead;

	// if nothing to do return true
	//if ( ! f ) return true;

	int32_t mini2 = -1;

	// close the head if not writing
	for ( ; f ; f = f->m_nextActive ) {
		mini2 = f->m_vfd;
		// how can this be?
		if ( s_fds [ mini2 ] < 0 ) { char *xx=NULL;*xx=0; }
		if ( s_writing [ mini2 ] ) continue;
		if ( s_unlinking [ mini2 ] ) continue;
		// when we got like 1000 reads queued up, it uses a *lot* of
		// memory and we can end up never being able to complete a
		// read because the descriptors are always getting closed on us
		// so do a hack fix and do not close descriptors that are
		// about .5 seconds old on avg.
		if ( s_timestamps [ mini2 ] >= now - 1000 ) continue;
		break;
	}

	// debug why it doesn't work right
	if ( mini != mini2 ) {
		int fd1 = -1;
		int fd2 = -1;
		if ( mini >= 0 ) fd1 = s_fds[mini];
		if ( mini2 >= 0 ) fd2 = s_fds[mini2];
		int32_t age = now - s_timestamps[mini] ;
		log("File: linkedlistfd=%i != rightfd=%i agems=%i",fd1,fd2,
		    (int)age);
	}
	*/

	// if nothing to free then return false
	if ( mini == -1 ) 
		return log("File: closeLeastUsed: failed. All %"INT32" "
			   "descriptors "
			   "are unavailable to be closed and re-used to read "
			   "from another file. notopen=%i writing=%i "
			   "unlinking=%i young=%i"
			   ,(int32_t)s_maxNumOpenFiles
			   ,notopen
			   ,writing
			   ,unlinking
			   ,young );


	int fd = mini;

	// always block on close
	//int fd    = s_fds[mini];
	int flags = fcntl ( fd , F_GETFL ) ;
	// turn off these 2 flags on fd to make sure
	flags &= ~( O_NONBLOCK | O_ASYNC );
 retry27:
	// return false on error
	if ( fcntl ( fd, F_SETFL, flags ) < 0 ) {
		// valgrind
		if ( errno == EINTR ) goto retry27;
		//char *xx = NULL; *xx = 1;
		log("disk: fcntl(%i): %s",fd,mstrerror(errno));
		// return false;
		errno = 0;
	}

	// . tally up another close for this fd, if any
	// . so if an open happens shortly here after, and 
	//   gets this fd, then any read that was started 
	//   before that open will know it!
	//s_closeCounts [ fd ]++;
	// otherwise we gotta really close it
 again:
	if ( fd == 0 ) log("disk: closing3 fd of 0");
	int status = ::close ( fd );
	if ( status == -1 && errno == EINTR ) goto again;

	// -1 means can be reopened because File::close() wasn't called.
	// we're just conserving file descriptors
	//s_fds [ mini ] = -1;

	// if the real close was successful then decrement the # of open files
	if ( status == 0 ) {
		// it's not open
		s_open     [ fd ] = 0;
		// if someone is trying to read on this let them know
		s_closeCounts [ fd ]++;

		s_numOpenFiles--;

		File *f = s_filePtrs [ fd ];
		// don't let him use the stolen fd
		f->m_fd = -1 ;

		// debug msg
		if ( g_conf.m_logDebugDisk ) {
			File *f = s_filePtrs [ fd ];
			char *fname = "";
			if ( f ) fname = f->getFilename();
			logf(LOG_DEBUG,"disk: force closed fd %i for"
			     " %s. age=%"INT64" #openfiles=%i this=0x%"PTRFMT,
			     fd,fname,now-s_timestamps[mini],
			     (int)s_numOpenFiles,
			     (PTRTYPE)this);
		}

		// no longer the owner
		s_filePtrs [ fd ] = NULL;

		// excise from linked list of active files
		//rmFileFromLinkedList ( f );
		// getfd() may not execute in time to ince the closeCount
		// so do it here. test by setting the max open files to like
		// 10 or so and spidering heavily.
		//s_closeCounts [ fd ]++;
	}


	if ( status == -1 ) 
		return log("disk: close(%i) : %s", fd , strerror(errno));

	if ( g_conf.m_logDebugDisk ) sanityCheck();

	return true;
}	
Ejemplo n.º 12
0
// . get the fd of this file
// . if it was closed by us we reopen it
// . may re-open a virtual fd whose real fd was closed
// . if we hit our max # of real fds allowed we'll have to close 
//   the least used of those so we can open this one
// . return -2 if never been opened
// . return -1 on other errors
// . otherwise, return the file descriptor
int File::getfd () {
	// if m_vfd is -1 it's never been opened
	if ( ! m_calledOpen ) { // m_vfd < 0 ) {
		g_errno = EBADENGINEER;
		log(LOG_LOGIC,"disk: getfd: Must call open() first.");
		char *xx=NULL; *xx=0; 
		return -2;
	}

	// if someone closed our fd, why didn't our m_fd get set to -1 ??!?!?!!
	if ( m_fd >= 0 && m_closeCount != s_closeCounts[m_fd] ) {
		log(LOG_DEBUG,"disk: invalidating existing fd %i "
		    "for %s this=0x%"PTRFMT" ccSaved=%i ccNow=%i", 
		    (int)m_fd,getFilename(),(PTRTYPE)this,
		    (int)m_closeCount,
		    (int)s_closeCounts[m_fd]);
		m_fd = -1;
	}

	// . sanity check
	// . no caller should call open/getfd after unlink was queued for thred
	//if ( m_gone ) { char *xx = NULL; *xx = 0; }
	// get the real fd from the virtual fd
	//int fd = s_fds [ m_vfd ];
	// return true if it's already opened
	if ( m_fd >=  0 ) { 
		// debug msg
		if ( g_conf.m_logDebugDisk )
			log(LOG_DEBUG,"disk: returning existing fd %i for %s "
			    "this=0x%"PTRFMT" ccSaved=%i ccNow=%i", 
			    (int)m_fd,getFilename(),(PTRTYPE)this,
			    (int)m_closeCount,
			    (int)s_closeCounts[m_fd]);
		if ( m_fd >= MAX_NUM_FDS ) { char *xx=NULL;*xx=0; }
		// but update the timestamp to reduce chance it closes on us
		//s_timestamps [ m_vfd ] = getTime();
		s_timestamps [ m_fd ] = gettimeofdayInMillisecondsLocal();
		return m_fd;
	}
	// if fd is -2 it's marked as available
	// if ( fd != -1 ) {
	// 	g_errno = EBADENGINEER;
	// 	log (LOG_LOGIC, "disk: getfd: fd is available?!?!" );
	// 	return -2;
	// }
	// . a real fd of -1 means it's been closed and we gotta reopen it
	// . we have to close someone if we don't have enough room
	while ( s_numOpenFiles >= s_maxNumOpenFiles )  {
		if ( g_conf.m_logDebugDisk ) sanityCheck();
		if ( ! closeLeastUsed() ) return -1;
		if ( g_conf.m_logDebugDisk ) sanityCheck();
	}
	// what was the filename/mode of this timed-out fd?
	//char *filename    = s_filenames   [ m_vfd ];
	// time the calls to open just in case they are hurting us
	int64_t t1 = -1LL;
	// . re-open the sleeping file descriptor
	// . if a rename thread was queued or spawned, try old guy first
	//if ( m_oldFilename[0] ) {
	//	t1 = gettimeofdayInMilliseconds();
	//	fd = ::open ( m_oldFilename , m_flags , m_permissions );
	//}
	int fd = -1;
	// then try to open the new name
	if ( fd == -1 ) {
		t1 = gettimeofdayInMilliseconds();
 retry7:
		fd = ::open ( getFilename() , m_flags,getFileCreationFlags());
		// valgrind
		if ( fd == -1 && errno == EINTR ) goto retry7;
		// 0 means stdout, right? why am i seeing it get assigned???
		if ( fd == 0 ) 
			log("disk: Got fd of 0 when opening %s.",
			    getFilename());
		if ( fd == 0 )
		       fd=::open(getFilename(),m_flags,getFileCreationFlags());
		if ( fd == 0 ) 
			log("disk: Got fd of 0 when opening2 %s.",
			    getFilename());
		if ( fd >= MAX_NUM_FDS )
			log("disk: got fd of %i out of bounds 1 of %i",
			    (int)fd,(int)MAX_NUM_FDS);

		// if we got someone else's fd that called close1_r() in a
		// thread but did not have time to call close2() to fix
		// up these member vars, then do it here. close2() will 
		// see that s_filePtrs[fd] does not equal the file ptr any more
		// and it will not update s_numOpenFiles in that case.
		if ( fd >= 0 && s_open [ fd ] ) {
			File *f = s_filePtrs [ fd ];
			if ( g_conf.m_logDebugDisk )
				log("disk: swiping fd %i from %s before "
				    "his close thread returned "
				    "this=0x%"PTRFMT,
				    fd,
				    f->getFilename(),
				    (PTRTYPE)f);
			// he only incs/decs his counters if he owns it so in
			// close2() so dec this global counter here
			s_numOpenFiles--;
			s_open[fd] = 0;
			s_filePtrs[fd] = NULL;
			if ( g_conf.m_logDebugDisk ) sanityCheck();
		}

		// sanity. how can we get an fd already opened?
		// because it was closed in a thread in close1_r()
		if ( fd >= 0 && s_open[fd] ) { char *xx=NULL;*xx=0; }
		// . now inc that count in case there was someone reading on
		//   that fd right before it was closed and we got it
		// . ::close() call can now happen in a thread, so we
		//   need to inc this guy here now, too
		// . so when that read returns it will know to re-do
		// . this should really be named s_openCounts!!
		if ( fd >= 0 ) s_closeCounts [ fd ]++;
		// . we now record this
		// . that way if our fd gets closed in closeLeastUsed() or
		//   in close1_r() due to a rename/unlink then we know it!
		// . this fixes a race condition of closeCounts in Threads.cpp
		//   where we did not know that the fd had been stolen from
		//   us and assigned to another file because our close1_r()
		//   had called ::close() on our fd and our closeCount algo
		//   failed us. see the top of this file for more description
		//   into this bug fix.
		m_closeCount = s_closeCounts[fd];
	}
	if ( t1 >= 0 ) {
		int64_t dt = gettimeofdayInMilliseconds() - t1 ;
		if ( dt > 1 ) log(LOG_INFO,
				  "disk: call to open(%s) blocked for "
				  "%"INT64" ms.",getFilename(),dt);
	}
	// copy errno to g_errno
	if ( fd <= -1 ) {
		g_errno = errno;
		log("disk: error open(%s) : %s fd %i",
		    getFilename(),strerror(g_errno),(int)fd);
		return -1;
	}

	if ( g_conf.m_logDebugDisk ) sanityCheck();

	// we're another open file
	s_numOpenFiles++;

	// debug log
	if ( g_conf.m_logDebugDisk )
		log("disk: opened1 fd %i for %s #openfiles=%i this=0x%"PTRFMT,
		    (int)fd,getFilename(),(int)s_numOpenFiles,(PTRTYPE)this);

	// set this file descriptor, the other stuff remains the same
	//s_fds [ m_vfd ] = fd;
	m_fd = fd;
	// 0 means stdout, right? why am i seeing it get assigned???
	if ( fd == 0 ) 
		log("disk: Found fd of 0 when opening %s.",getFilename());
	// reset
	s_writing   [ fd ] = 0;
	s_unlinking [ fd ] = 0;
	// update the time stamp
	s_timestamps [ fd ] = gettimeofdayInMillisecondsLocal();
	s_open       [ fd ] = true;
	s_filePtrs   [ fd ] = this;

	if ( g_conf.m_logDebugDisk ) sanityCheck();
	// add file to linked list of active files
	//addFileToLinkedList ( this );
	return fd;
}
// . returns false if blocked, true otherwise
// . sets errno on error
// . make a web page displaying the config of this host
// . call g_httpServer.sendDynamicPage() to send it
bool sendPageHosts ( TcpSocket *s , HttpRequest *r ) {
	// don't allow pages bigger than 128k in cache
	char  buf [ 64*1024 ];
	//char *p    = buf;
	//char *pend = buf + 64*1024;
	SafeBuf sb(buf, 64*1024);
	// check for a sort request
	long sort  = r->getLong ( "sort", -1 );
	// sort by hostid with dead on top by default
	if ( sort == -1 ) sort = 16;
	char *coll = r->getString ( "c" );
	//char *pwd  = r->getString ( "pwd" );
	// check for setnote command
	long setnote = r->getLong("setnote", 0);
	long setsparenote = r->getLong("setsparenote", 0);
	// check for replace host command
	long replaceHost = r->getLong("replacehost", 0);
	// check for sync host command
	long syncHost = r->getLong("synchost", 0);
	// set note...
	if ( setnote == 1 ) {
		// get the host id to change
		long host = r->getLong("host", -1);
		if ( host == -1 ) goto skipReplaceHost;
		// get the note to set
		long  noteLen;
		char *note = r->getString("note", &noteLen, "", 0);
		// set the note
		g_hostdb.setNote(host, note, noteLen);
	}
	// set spare note...
	if ( setsparenote == 1 ) {
		// get the host id to change
		long spare = r->getLong("spare", -1);
		if ( spare == -1 ) goto skipReplaceHost;
		// get the note to set
		long  noteLen;
		char *note = r->getString("note", &noteLen, "", 0);
		// set the note
		g_hostdb.setSpareNote(spare, note, noteLen);
	}
	// replace host...
	if ( replaceHost == 1 ) {
		// get the host ids to swap
		long rhost = r->getLong("rhost", -1);
		long rspare = r->getLong("rspare", -1);
		if ( rhost == -1 || rspare == -1 )
			goto skipReplaceHost;
		// replace
		g_hostdb.replaceHost(rhost, rspare);
	}
	// sync host...
	if ( syncHost == 1 ) {
		// get the host id to sync
		long syncHost = r->getLong("shost", -1);
		if ( syncHost == -1 ) goto skipReplaceHost;
		// call sync
		g_hostdb.syncHost(syncHost, false);
		//g_syncdb.syncHost ( syncHost );
	}
	if ( syncHost == 2 ) {
		// get the host id to sync
		long syncHost = r->getLong("shost", -1);
		if ( syncHost == -1 ) goto skipReplaceHost;
		// call sync
		g_hostdb.syncHost(syncHost, true);
		//g_syncdb.syncHost ( syncHost );
	}

skipReplaceHost:

	long refreshRate = r->getLong("rr", 0);
	if(refreshRate > 0) 
		sb.safePrintf("<META HTTP-EQUIV=\"refresh\" "
			      "content=\"%li\"\\>", 
			      refreshRate);

	// ignore
	//char *username = g_users.getUsername ( r );
	//char *password = NULL;
	//User *user = NULL;
	//if ( username ) user = g_users.getUser (username );
	//if ( user     ) password = user->m_password;
	//if ( ! password ) password = "";
	//if ( ! username ) username = "";

	// print standard header
	// 	char *pp    = sb.getBuf();
	// 	char *ppend = sb.getBufEnd();
	// 	if ( pp ) {
	g_pages.printAdminTop ( &sb , s , r );
	//	sb.incrementLength ( pp - sb.getBuf() );
	//	}
	char *colspan = "30";
	//char *shotcol = "";
	char shotcol[1024];
	shotcol[0] = '\0';
	if ( g_conf.m_useShotgun ) {
		colspan = "31";
		//shotcol = "<td><b>ip2</b></td>";
		sprintf ( shotcol, "<td><a href=\"/master/hosts?c=%s"
			 	   "&sort=2\">"
			  "<b>ping2</b></td></a>",
			  coll);
	}

	// print host table
	sb.safePrintf ( 
		  "<table %s>"
		  "<tr><td colspan=%s><center>"
		  //"<font size=+1>"
		  "<b>Hosts "
		  "(<a href=\"/master/hosts?c=%s&sort=%li&reset=1\">"
		  "reset)</b>"
		  //"</font>"
		  "</td></tr>" 
		  "<tr bgcolor=#%s>"
		  "<td><a href=\"/master/hosts?c=%s&sort=0\">"

		  "<b>hostId</b></td>"
		  "<td><b>host ip</b></td>"
		  "<td><b>shard</b></td>" // mirror group
		  "<td><b>stripe</b></td>"

		  // i don't remember the last time i used this, so let's
		  // just comment it out to save space
		  //"<td><b>group mask</td>"

		  //"<td><b>ip1</td>"
		  //"<td><b>ip2</td>"
		  //"<td><b>udp port</td>"

		  // this is now more or less obsolete
		  //"<td><b>priority udp port</td>"

		  //"<td><b>dns client port</td>"
		  "<td><b>http port</td>"

		  // this is now obsolete since ide channel is. it was used
		  // so that only the guy with the token could merge,
		  // and it made sure that only one merge per ide channel
		  // and per group was going on at any one time for performance
		  // reasons.
		  //"<td><b>token group</td>"

		  //"<td><b>best switch id</td>"
		  //"<td><b>actual switch id</td>"
		  //"<td><b>switch id</td>"

		  // this is now fairly obsolete
		  //"<td><b>ide channel</td>"

		  "<td><b>HD temps (C)</b></td>"

		  //"<td><b>resends sent</td>"
		  //"<td><b>errors recvd</td>"
		  //"<td><b>ETRYAGAINS recvd</td>"
		  "<td><a href=\"/master/hosts?c=%s&sort=3\">"
		  "<b>dgrams resent</a></td>"
		  "<td><a href=\"/master/hosts?c=%s&sort=4\">"
		  "<b>errors recvd</a></td>"
		  "<td><a href=\"/master/hosts?c=%s&sort=5\">"
		  "<b>ETRY AGAINS recvd</a></td>"

		  "<td><a href=\"/master/hosts?c=%s&sort=6\">"
		  "<b>dgrams to</a></td>"
		  "<td><a href=\"/master/hosts?c=%s&sort=7\">"
		  "<b>dgrams from</a></td>"

		  //"<td><a href=\"/master/hosts?c=%s&sort=8\">"
		  //"<b>loadavg</a></td>"


		  "<td><a href=\"/master/hosts?c=%s&sort=13\">"
		  "<b>avg split time</a></td>"

		  "<td><b>splits done</a></td>"

		  "<td><a href=\"/master/hosts?c=%s&sort=12\">"
		  "<b>status</a></td>"

		  "<td><a href=\"/master/hosts?c=%s&sort=15\">"
		  "<b>slow reads</a></td>"

		  "<td><b>docs indexed</a></td>"

		  "<td><a href=\"/master/hosts?c=%s&sort=9\">"
		  "<b>mem used</a></td>"

		  "<td><a href=\"/master/hosts?c=%s&sort=10\">"
		  "<b>cpu</a></td>"

		  "<td><a href=\"/master/hosts?c=%s&sort=14\">"
		  "<b>max ping1</a></td>"

		  "<td><a href=\"/master/hosts?c=%s&sort=11\">"
		  "<b>ping1 age</a></td>"

		  //"<td><b>ip1</td>"
		  "<td><a href=\"/master/hosts?c=%s&sort=1\">"
		  "<b>ping1</a></td>"

		  "%s"// "<td><b>ip2</td>"
		  //"<td><b>inSync</td>",
		  //"<td>avg roundtrip</td>"
		  //"<td>std. dev.</td></tr>"
		  "<td><b>note</td>",
		  TABLE_STYLE ,
		  colspan    ,

		  coll, sort,
		  DARK_BLUE  ,

		  coll,
		  coll,
		  coll,
		  coll,
		  coll,
		  coll,
		  coll,
		  coll,
		  coll,
		  coll,
		  coll,
		  coll,
		  coll,
		  coll,
		  shotcol    );

	// loop through each host we know and print it's stats
	long nh = g_hostdb.getNumHosts();
	// should we reset resends, errorsRecvd and ETRYAGAINS recvd?
	if ( r->getLong("reset",0) ) {
		for ( long i = 0 ; i < nh ; i++ ) {
			// get the ith host (hostId)
			Host *h = g_hostdb.getHost ( i );
			h->m_totalResends   = 0;
			h->m_errorReplies = 0;
			h->m_etryagains   = 0;
			h->m_dgramsTo     = 0;
			h->m_dgramsFrom   = 0;
		}
	}

	// sort hosts if needed
	long hostSort [ MAX_HOSTS ];
	for ( long i = 0 ; i < nh ; i++ )
		hostSort [ i ] = i;
	switch ( sort ) {
	case 1: gbsort ( hostSort, nh, sizeof(long), pingSort1      ); break;
	case 2: gbsort ( hostSort, nh, sizeof(long), pingSort2      ); break;
	case 3: gbsort ( hostSort, nh, sizeof(long), resendsSort    ); break;
	case 4: gbsort ( hostSort, nh, sizeof(long), errorsSort     ); break;
	case 5: gbsort ( hostSort, nh, sizeof(long), tryagainSort   ); break;
	case 6: gbsort ( hostSort, nh, sizeof(long), dgramsToSort   ); break;
	case 7: gbsort ( hostSort, nh, sizeof(long), dgramsFromSort ); break;
	//case 8: gbsort ( hostSort, nh, sizeof(long), loadAvgSort    ); break;
	case 9: gbsort ( hostSort, nh, sizeof(long), memUsedSort    ); break;
	case 10:gbsort ( hostSort, nh, sizeof(long), cpuUsageSort   ); break;
	case 11:gbsort ( hostSort, nh, sizeof(long), pingAgeSort    ); break;
	case 12:gbsort ( hostSort, nh, sizeof(long), flagSort       ); break;
	case 13:gbsort ( hostSort, nh, sizeof(long), splitTimeSort  ); break;
	case 14:gbsort ( hostSort, nh, sizeof(long), pingMaxSort    ); break;
	case 15:gbsort ( hostSort, nh, sizeof(long), slowDiskSort    ); break;
	case 16:gbsort ( hostSort, nh, sizeof(long), defaultSort    ); break;
	}

	// we are the only one that uses these flags, so set them now
	/*
	static char s_properSet = 0;
	if ( ! s_properSet ) {
		s_properSet = 1;
		g_hostdb.setOnProperSwitchFlags();
	}
	*/

	long long nowmsLocal = gettimeofdayInMillisecondsLocal();

	// print it
	//long ng = g_hostdb.getNumGroups();
	for ( long si = 0 ; si < nh ; si++ ) {
		long i = hostSort[si];
		// get the ith host (hostId)
		Host *h = g_hostdb.getHost ( i );
		// get avg/stdDev msg roundtrip times in ms for ith host
		//long avg , stdDev;
		//g_hostdb.getTimes ( i , &avg , &stdDev );
                char ptr[256];
                long pingAge = generatePingMsg(h, nowmsLocal, ptr);
		char pms[64];
		if ( h->m_pingMax < 0 ) sprintf(pms,"???");
		else                    sprintf(pms,"%lims",h->m_pingMax);
		// the sync status ascii-ized
		char syncStatus = h->m_syncStatus;
		char *ptr2;
		if      (syncStatus==0) 
			ptr2 ="<b>N</b>";
		else if (syncStatus==1) 
			ptr2 ="Y";
		else 
			ptr2 ="?";
		char ipbuf1[64];
		char ipbuf2[64];
		strcpy(ipbuf1,iptoa(h->m_ip));
		strcpy(ipbuf2,iptoa(h->m_ipShotgun));

		char  hdbuf[128];
		char *hp = hdbuf;
		for ( long k = 0 ; k < 4 ; k++ ) {
			long temp = h->m_hdtemps[k];
			if ( temp > 50 )
				hp += sprintf(hp,"<font color=red><b>%li"
					      "</b></font>",
					      temp);
			else
				hp += sprintf(hp,"%li",temp);
			if ( k < 3 ) *hp++ = '/';
			*hp = '\0';
		}

		//long switchGroup = 0;
		//if ( g_hostdb.m_indexSplits > 1 )
		//	switchGroup = h->m_group%g_hostdb.m_indexSplits;

		// the switch id match
		//char tmpN[256];
		//if ( ! h->m_onProperSwitch )
		//	sprintf(tmpN, "<font color=#ff0000><b>%li</b></font>",
		//		(long)h->m_switchId);
		//else
		//	sprintf(tmpN, "%li", (long)h->m_switchId);

		// host can have 2 ip addresses, get the one most
		// similar to that of the requester
		long eip = g_hostdb.getBestIp ( h , s->m_ip );
		char ipbuf3[64];
		strcpy(ipbuf3,iptoa(eip));

		char *fontTagFront = "";
		char *fontTagBack  = "";
		if ( h->m_percentMemUsed >= 98.0 ) {
			fontTagFront = "<font color=red>";
			fontTagBack  = "</font>";
		}

		float cpu = h->m_cpuUsage;
		if ( cpu > 100.0 ) cpu = 100.0;
		if ( cpu < 0.0   ) cpu = -1.0;

		// split time, don't divide by zero!
		long splitTime = 0;
		if ( h->m_splitsDone ) 
			splitTime = h->m_splitTimes / h->m_splitsDone;

		//char flagString[32];
		char tmpfb[64];
		SafeBuf fb(tmpfb,64);
		//char *fs = flagString;
		//*fs = '\0';

		// does its hosts.conf file disagree with ours?
		if ( h->m_hostsConfCRC &&
		     h->m_hostsConfCRC != g_hostdb.getCRC() )
			fb.safePrintf("<font color=red><b title=\"Hosts.conf "
				      "in disagreement with ours.\">H"
				      "</b></font>");
		// recovery mode? reocvered from coring?
		if ( h->m_flags & PFLAG_RECOVERYMODE )
			fb.safePrintf("<b title=\"Recovered from core"
				      "\">x</b>");
		// rebalancing?
		if ( h->m_flags & PFLAG_REBALANCING )
			fb.safePrintf("<b title=\"Currently "
				      "rebalancing\">R</b>");
		// has recs that should be in another shard? indicates
		// we need to rebalance or there is a bad hosts.conf
		if ( h->m_flags & PFLAG_FOREIGNRECS )
			fb.safePrintf("<font color=red><b title=\"Foreign data "
				      "detected. Needs rebalance.\">F"
				      "</b></font>");
		// if it has spiders going on say "S"
		if ( h->m_flags & PFLAG_HASSPIDERS )
			fb.safePrintf ( "<span title=\"Spidering\">S</span>");
		// say "M" if merging
		if (   h->m_flags & PFLAG_MERGING )
			fb.safePrintf ( "<span title=\"Merging\">M</span>");
		// say "D" if dumping
		if (   h->m_flags & PFLAG_DUMPING )
			fb.safePrintf ( "<span title=\"Dumping\">D</span>");
		// say "y" if doing the daily merge
		if (  !(h->m_flags & PFLAG_MERGEMODE0) )
			fb.safePrintf ( "y");
		// clear it if it is us, this is invalid
		if ( ! h->m_gotPingReply ) {
			fb.reset();
			fb.safePrintf("??");
		}
		if ( fb.length() == 0 )
			fb.safePrintf("&nbsp;");

		char *bg = LIGHT_BLUE;
		if ( h->m_ping >= g_conf.m_deadHostTimeout ) 
			bg = "ffa6a6";

		// print it
		sb.safePrintf (
			  "<tr bgcolor=#%s>"
			  "<td><a href=\"http://%s:%hi/master/hosts?"
			  ""
			  "c=%s"
			  "&sort=%li\">%li</a></td>"

			  "<td>%s</td>" // hostname

			  "<td>%li</td>" // group
			  "<td>%li</td>" // stripe
			  //"<td>0x%08lx</td>" // group mask

			  //"<td>%s</td>" // ip1
			  //"<td>%s</td>" // ip2
			  //"<td>%hi</td>" // port
			  //"<td>%hi</td>" // client port
			  "<td>%hi</td>" // http port
			  //"<td>%li</td>" // token group num
			  //"<td>%li</td>" // switch group
			  //"<td>%s</td>" // tmpN
			  //"<td>%li</td>" // ide channel

			  // hd temps
			  "<td>%s</td>"

			  // resends
			  "<td>%li</td>"
			  // error replies
			  "<td>%li</td>"
			  // etryagains
			  "<td>%li</td>"

			  // # dgrams sent to
			  "<td>%lli</td>"
			  // # dgrams recvd from
			  "<td>%lli</td>"

			  // loadavg
			  //"<td>%.2f</td>"

			  // split time
			  "<td>%li</td>"
			  // splits done
			  "<td>%li</td>"

			  // flags
			  "<td>%s</td>"

			  // slow disk reads
			  "<td>%li</td>"

			  // docs indexed
			  "<td>%li</td>"

			  // percent mem used
			  "<td>%s%.1f%%%s</td>"
			  // cpu usage
			  "<td>%.1f%%</td>"

			  // ping max
			  "<td>%s</td>"

			  // ping age
			  "<td>%lims</td>"

			  // ping
			  "<td>%s</td>"
			  //"<td>%s</td>"
			  //"<td>%lims</td>"
			  "<td nowrap=1>%s</td>"
			  "</tr>" , 
			  bg,//LIGHT_BLUE ,
			  ipbuf3, h->m_httpPort, 
			  coll, sort,
			  i , 
			  h->m_hostname,
			  h->m_shardNum,//group,
			  h->m_stripe,
			  // group mask is not looked at a lot and is
			  // really only for indexdb and a few other rdbs
			  //g_hostdb.makeGroupId(i,ng) ,
			  //ipbuf1,
			  //ipbuf2,
			  //h->m_port , 
			  //h->m_dnsClientPort ,
			  h->m_httpPort ,
			  //h->m_tokenGroupNum,
			  //switchGroup ,
			  //tmpN,
			  //h->m_ideChannel,
			  hdbuf,
			  h->m_totalResends,
			  h->m_errorReplies,
			  h->m_etryagains,

			  h->m_dgramsTo,
			  h->m_dgramsFrom,

			  //h->m_loadAvg, // double
			  splitTime,
			  h->m_splitsDone,

			  fb.getBufStart(),//flagString,

			  h->m_slowDiskReads,
			  h->m_docsIndexed,

			  fontTagFront,
			  h->m_percentMemUsed, // float
			  fontTagBack,
			  cpu, // float

			  // ping max
			  pms,
			  // ping age
			  pingAge,

			  //avg , 
			  //stdDev,
			  //ping,
			  ptr ,
			  //ptr2 ,
			  h->m_note );
	}
	// end the table now
	sb.safePrintf ( "</table><br>\n" );

	
	// print spare hosts table
	sb.safePrintf ( 
		  "<table %s>"
		  "<tr class=hdrow><td colspan=10><center>"
		  //"<font size=+1>"
		  "<b>Spares</b>"
		  //"</font>"
		  "</td></tr>" 
		  "<tr bgcolor=#%s>"
		  "<td><b>spareId</td>"
		  "<td><b>host name</td>"
		  "<td><b>ip1</td>"
		  "<td><b>ip2</td>"
		  //"<td><b>udp port</td>"
		  //"<td><b>priority udp port</td>"
		  //"<td><b>dns client port</td>"
		  "<td><b>http port</td>"
		  //"<td><b>switch id</td>"

		  // this is now fairly obsolete
		  //"<td><b>ide channel</td>"

		  "<td><b>note</td>",
		  TABLE_STYLE,
		  DARK_BLUE  );

	for ( long i = 0; i < g_hostdb.m_numSpareHosts; i++ ) {
		// get the ith host (hostId)
		Host *h = g_hostdb.getSpare ( i );

		char ipbuf1[64];
		char ipbuf2[64];
		strcpy(ipbuf1,iptoa(h->m_ip));
		strcpy(ipbuf2,iptoa(h->m_ipShotgun));

		// print it
		sb.safePrintf (
			  "<tr bgcolor=#%s>"
			  "<td>%li</td>"
			  "<td>%s</td>"
			  "<td>%s</td>"
			  "<td>%s</td>"
			  //"<td>%hi</td>"
			  //"<td>%hi</td>" // priority udp port
			  //"<td>%hi</td>"
			  "<td>%hi</td>"
			  //"<td>%i</td>" // switch id
			  //"<td>%li</td>" // ide channel
			  "<td>%s</td>"
			  "</tr>" , 
			  LIGHT_BLUE,
			  i , 
			  h->m_hostname,
			  ipbuf1,
			  ipbuf2,
			  //h->m_port , 
			  //h->m_port2 , 
			  //h->m_dnsClientPort ,
			  h->m_httpPort ,
			  //h->m_switchId,
			  //h->m_ideChannel ,
			  h->m_note );
	}
	sb.safePrintf ( "</table><br>" );

	// print proxy hosts table
	sb.safePrintf ( 
		  "<table %s>"
		  "<tr class=hdrow><td colspan=12><center>"
		  //"<font size=+1>"
		  "<b>Proxies</b>"
		  //"</font>"
		  "</td></tr>" 
		  "<tr bgcolor=#%s>"
		  "<td><b>proxyId</b></td>"
		  "<td><b>type</b></td>"
		  "<td><b>host name</b></td>"
		  "<td><b>ip1</b></td>"
		  "<td><b>ip2</b></td>"
		  //"<td><b>udp port</td>"

		  //"<td><b>priority udp port</td>"

		  //"<td><b>dns client port</td>"
		  "<td><b>http port</b></td>"
		  //"<td><b>switch id</td>"
                  "<td><b>max ping1</b></td>"
                  "<td><b>ping1 age</b></td>"
                  "<td><b>ping1</b></td>"
		  //"<td><b>ping2</b></td>"
		  // this is now fairly obsolete
		  //"<td><b>ide channel</td>"

		  "<td><b>note</td>",
		  TABLE_STYLE,
		  DARK_BLUE 
			);
	for ( long i = 0; i < g_hostdb.m_numProxyHosts; i++ ) {
		// get the ith host (hostId)
		Host *h = g_hostdb.getProxy ( i );

                char ptr[256];
                long pingAge = generatePingMsg(h, nowmsLocal, ptr);

		char ipbuf1[64];
		char ipbuf2[64];
		strcpy(ipbuf1,iptoa(h->m_ip));
		strcpy(ipbuf2,iptoa(h->m_ipShotgun));

		// host can have 2 ip addresses, get the one most
		// similar to that of the requester
		long eip = g_hostdb.getBestIp ( h , s->m_ip );
		char ipbuf3[64];
		strcpy(ipbuf3,iptoa(eip));


		char pms[64];
		if ( h->m_pingMax < 0 ) sprintf(pms,"???");
		else                    sprintf(pms,"%lims",h->m_pingMax);
		// the sync status ascii-ized

		char *type = "proxy";
		if ( h->m_type == HT_QCPROXY ) type = "qcproxy";
		if ( h->m_type == HT_SCPROXY ) type = "scproxy";

		// print it
		sb.safePrintf (
			  "<tr bgcolor=#%s>"

			  "<td><a href=\"http://%s:%hi/master/hosts?"
			  ""
			  "c=%s\">"
			  "%li</a></td>"

			  "<td>%s</td>"
			  "<td>%s</td>"
			  "<td>%s</td>"
			  "<td>%s</td>"
			  //"<td>%hi</td>"
			  //"<td>%hi</td>" // priority udp port
			  //"<td>%hi</td>"
			  "<td>%hi</td>"
			  //"<td>%i</td>" // switch id
			  "<td>%s</td>" // ping max
			  "<td>%ldms</td>" // ping age
			  "<td>%s</td>" // ping
			  //"<td>%li</td>" // ide channel
			  "<td>%s </td>"
			  "</tr>" , 

			  LIGHT_BLUE,
			  ipbuf3,
			  h->m_httpPort,
			  coll,
			  i , 

			  type,
			  h->m_hostname,
			  ipbuf1,
			  ipbuf2,
			  //h->m_port , 
			  //h->m_port2 , 
			  //h->m_dnsClientPort ,
			  h->m_httpPort ,
			  //h->m_switchId,
			  pms,
                          pingAge,
                          ptr,
			  //h->m_ideChannel ,
			  h->m_note );
	}
	sb.safePrintf ( "</table><br><br>" );

	sb.safePrintf(
		      "<style>"
		      ".poo { background-color:#%s;}\n"
		      "</style>\n" ,
		      LIGHT_BLUE );


	// print help table
	sb.safePrintf ( 
		  "<table %s>"
		  "<tr class=hdrow><td colspan=10><center>"
		  //"<font size=+1>"
		  "<b>Key</b>"
		  //"</font>"
		  "</td></tr>" 

		  "<tr class=poo>"
		  "<td>shard</td>"
		  "<td>"
		  "The index is split into shards. Which shard does this "
		  "host serve?"
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>stripe</td>"
		  "<td>"
		  "Hosts with the same stripe serve the same shard "
		  "of data."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>ip1</td>"
		  "<td>The primary IP address of the host."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>ip2</td>"
		  "<td>The secondary IP address of the host."
		  "</td>"
		  "</tr>\n"

		  /*
		  "<tr class=poo>"
		  "<td>udp port</td>"
		  "<td>The UDP port the host uses to send and recieve "
		  "datagrams."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>dns client port</td>"
		  "<td>The UDP port used to send and receive dns traffic with."
		  "</td>"
		  "</tr>\n"
		  */

		  "<tr class=poo>"
		  "<td>http port</td>"
		  "<td>The port you can connect a browser to."
		  "</td>"
		  "</tr>\n"

		  /*
		  "<tr class=poo>"
		  "<td>best switch id</td>"
		  "<td>The host prefers to be on this switch because it "
		  "needs to send a lot of data to other hosts on this swtich. "
		  "Therefore, ideally, the best switch id should match the "
		  "actual switch id for optimal performance."
		  "</td>"
		  "</tr>\n"
		  */

		  /*
		  "<tr class=poo>"
		  "<td>switch id</td>"
		  "<td>Hosts that share the same switch id are "
		  "physically on the same switch."
		  "</td>"
		  "</tr>\n"
		  */

		  "<tr class=poo>"
		  "<td>dgrams resent</td>"
		  "<td>How many datagrams have had to be resent to a host "
		  "because it was not ACKed quick enough or because it was "
		  "fully ACKed but the entire request was resent in case "
		  "the host was reset."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>errors recvd</td>"
		  "<td>How many errors were received from a host in response "
		  "to a request to retrieve or insert data."
		  "</td>"
		  "</tr>\n"


		  "<tr class=poo>"
		  "<td>ETRYAGAINS recvd</td>"
		  "<td>How many ETRYAGAIN were received in response to a "
		  "request to add data. Usually because the host's memory "
		  "is full and it is dumping its data to disk. This number "
		  "can be high if the host if failing to dump the data "
		  "to disk because of some malfunction, and it can therefore "
		  "bottleneck the entire cluster."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>dgrams to</td>"
		  "<td>How many datagrams were sent to the host from the "
		  "selected host since startup. Includes ACK datagrams. This "
		  "can actually be higher than the number of dgrams read "
		  "when the selected host is the same as the host in the "
		  "table because of resends. Gigablast will resend datagrams "
		  "that are not promptly ACKknowledged."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>dgrams from</td>"
		  "<td>How many datagrams were received from the host by the "
		  "selected host since startup. Includes ACK datagrams."
		  "</td>"
		  "</tr>\n"

		  //"<tr class=poo>"
		  //"<td>loadavg</td>"
		  //"<td>1-minute sliding-window load average from "
		  //"/proc/loadavg."
		  //"</td>"
		  //"</tr>\n"

		  "<tr class=poo>"
		  "<td>mem used</td>"
		  "<td>percentage of memory currently used."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>cpu usage</td>"
		  "<td>percentage of cpu resources in use by the gb process."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>ping1 age</td>"
		  "<td>How long ago the last ping request was sent to "
		  "this host. Let's us know how fresh the ping time is."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>ping1</td>"
		  "<td>Ping time to this host on the primary network."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>ping2</td>"
		  "<td>Ping time to this host on the seconday/shotgun "
		  "network. This column is not visible if the shotgun "
		  "network is not enabled in the master controls."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>M (status flag)</td>"
		  "<td>Indicates host is merging files on disk."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>D (status flag)</td>"
		  "<td>Indicates host is dumping data to disk."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>S (status flag)</td>"
		  "<td>Indicates host has outstanding spiders."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>y (status flag)</td>"
		  "<td>Indicates host is performing the daily merge."
		  "</td>"
		  "</tr>\n"


		  ,
		  TABLE_STYLE
			);

	sb.safePrintf ( "</table><br></form><br>" );

	//p = g_pages.printAdminBottom ( p , pend );

	// calculate buffer length
	//long bufLen = p - buf;
	// . send this page
	// . encapsulates in html header and tail
	// . make a Mime
	return g_httpServer.sendDynamicPage ( s , (char*) sb.getBufStart() ,
						  sb.length() );
}