bool getUrl( char *path , void (* callback) (void *state, TcpSocket *sock) ) {
	SafeBuf sb;
	sb.safePrintf ( "http://%s:%li%s"
			, iptoa(g_hostdb.m_myHost->m_ip)
			, (long)g_hostdb.m_myHost->m_port
			, path
			);
	Url u;
	u.set ( sb.getBufStart() );
	if ( ! g_httpServer.getDoc ( u.getUrl() ,
				     0 , // ip
				     0 , // offset
				     -1 , // size
				     0 , // ifmodsince
				     NULL ,
				     callback ,
				     60*1000, // timeout
				     0, // proxyip
				     0, // proxyport
				     -1, // maxtextdoclen
				     -1, // maxotherdoclen
				     NULL ) ) // useragent
		return false;
	// error?
	log("qa: getUrl error: %s",mstrerror(g_errno));
	return true;
}	
Esempio n. 2
0
static INT_PTR CALLBACK icqUserInfoDlgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam)
{
    LPNMHDR hdr;

    switch (msg)
    {
    case WM_INITDIALOG:
        TranslateDialogDefault(hWnd);
        return TRUE;

    case WM_NOTIFY:
        hdr = (LPNMHDR)lParam;
        if (hdr->idFrom == 0 && hdr->code == PSN_INFOCHANGED)
        {
            char buffer[64];
            unsigned long ip, port;
			MCONTACT hContact = (MCONTACT)((LPPSHNOTIFY)lParam)->lParam;

            _itoa(db_get_dw(hContact, protoName, "UIN", 0), buffer, 10);
            setTextValue(hWnd, IDC_INFO_UIN, buffer);

            ip = db_get_dw(hContact, protoName, "IP", 0);
            setTextValue(hWnd, IDC_INFO_IP, ip ? iptoa(ip) : NULL);

            ip = db_get_dw(hContact, protoName, "RealIP", 0);
            setTextValue(hWnd, IDC_INFO_REALIP, ip ? iptoa(ip) : NULL);

            port = db_get_w(hContact, protoName, "Port", 0);
            _itoa(port, buffer, 10);
            setTextValue(hWnd, IDC_INFO_PORT, port ? buffer : NULL);

            setTextValue(hWnd, IDC_INFO_VERSION, NULL);
            setTextValue(hWnd, IDC_INFO_MIRVER, NULL);
            setTextValue(hWnd, IDC_INFO_PING, NULL);
        }
        break;

    case WM_COMMAND:
        if (LOWORD(wParam) == IDCANCEL) SendMessage(GetParent(hWnd), msg, wParam, lParam);
        break;
    }
    return FALSE;
}
Esempio n. 3
0
uint8_t REDFLY::socketConnect(uint8_t proto, uint8_t *ip, uint16_t port, uint16_t lport)
{
  uint8_t ret=INVALID_SOCKET, len;

  //ip
  iptoa(ip, (char*)buffer); 
  //port
  strcat_P((char*)buffer, PSTR(","));
  len = strlen((char*)buffer);
  uitoa(port, (char*)&buffer[len]);
  //local port
  strcat_P((char*)buffer, PSTR(","));
  len = strlen((char*)buffer);
  uitoa(lport, (char*)&buffer[len]);

  if(proto == PROTO_MCAST) //Multicast
  {
    proto = SOCKET_MCAST;
    if(cmd(buffer, 8, PSTR(CMD_MCAST), (char*)buffer) == 0) //xxx.xxx.xxx.xxx,aaaaa,bbbbb
    {
      ret = buffer[2]; //OKx
    }
  }
  else if(proto == PROTO_TCP) //TCP
  {
    proto = SOCKET_TCP;
    if(cmd(buffer, 8, PSTR(CMD_TCP), (char*)buffer) == 0) //xxx.xxx.xxx.xxx,aaaaa,bbbbb
    {
      ret = buffer[2]; //OKx
    }
  }
  else //UDP
  {
    proto = SOCKET_UDP;
    if(cmd(buffer, 8, PSTR(CMD_UDP), (char*)buffer) == 0) //xxx.xxx.xxx.xxx,aaaaa,bbbbb
    {
      ret = buffer[2]; //OKx
    }
  }

  if(ret != INVALID_SOCKET) //handle okay -> save socket handle and type
  {
    for(uint8_t i=0; i<MAX_SOCKETS; i++)
    {
      if(socket_state[i].handle == INVALID_SOCKET)
      {
        socket_state[i].handle = ret;
        socket_state[i].state  = proto;
        break;
      }
    }
  }

  return ret;
}
// . returns false if blocked, true otherwise
// . sets g_errno on error
bool sendPageInject ( TcpSocket *s , HttpRequest *r ) {
	// get the collection
	long  collLen = 0;
	char *coll  = r->getString ( "c" , &collLen  , NULL /*default*/);
	// get collection rec
	CollectionRec *cr = g_collectiondb.getRec ( coll );
	// bitch if no collection rec found
	if ( ! cr ) {
		g_errno = ENOCOLLREC;
		log("build: Injection from %s failed. "
		    "Collection \"%s\" does not exist.",
		    iptoa(s->m_ip),coll);
		return g_httpServer.sendErrorReply(s,500,
					      "collection does not exist");
	}

	// make a new state
	Msg7 *msg7;
	try { msg7= new (Msg7); }
	catch ( ... ) { 
		g_errno = ENOMEM;
		log("PageInject: new(%i): %s", 
		    sizeof(Msg7),mstrerror(g_errno));
		return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));}
	mnew ( msg7, sizeof(Msg7) , "PageInject" );

	msg7->m_socket = s;

	msg7->m_isScrape = false;

	// a scrape request?
	char *qts = r->getString("qts",NULL);
	if ( qts && ! qts[0] ) qts = NULL;
	if ( qts ) {
		// qts is html encoded? NO! fix that below then...
		//char *uf="http://www.google.com/search?num=50&"
		//	"q=%s&scoring=d&filter=0";
		strncpy(msg7->m_coll,coll,MAX_COLL_LEN);
		msg7->m_isScrape = true;
		msg7->m_qbuf.safeStrcpy(qts);
		msg7->m_linkDedupTable.set(4,0,512,NULL,0,false,0,"ldtab");
		msg7->m_useAhrefs = r->getLong("useahrefs",0);
		// default to yes, injectlinks.. no default to no
		msg7->m_injectLinks = r->getLong("injectlinks",0);
		if ( ! msg7->scrapeQuery ( ) ) return false;
		return sendReply ( msg7 );
	}

	if ( ! msg7->inject ( s , r , msg7 , sendReplyWrapper ) )
		return false;

	// it did not block, i gues we are done
	return sendReply ( msg7 );
}
int PageNetTest::openSock( long num, long type, struct sockaddr_in *name, 
			   long port ) {
	// set up our socket
        int sock  = socket ( AF_INET, SOCK_DGRAM , 0 );
        if ( sock < 0 ) {
		log( "net: nettest: socket-%s",strerror(errno) );
		return false;
	}

        // reset it all just to be safe
        bzero((char *)name, sizeof(*name));
        name->sin_family      = AF_INET;
        name->sin_addr.s_addr = 0; /*INADDR_ANY;*/
        name->sin_port        = htons(port);
        // we want to re-use port it if we need to restart
        int options = 1;
        if ( setsockopt(sock, SOL_SOCKET, SO_REUSEADDR ,
			&options,sizeof(options)) < 0 ) {
		log( "net: nettest: setsockopt-%s", strerror(errno) );
		return -1;
	}
	if( type == TEST_READ ) {
		struct timeval timeo;
		timeo.tv_sec  = 0;
		timeo.tv_usec = 500000;
		if ( setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO,
				&timeo,sizeof(timeo)) < 0 ) {
			log( "net: nettest: setsockopt-%s", strerror(errno) );
			return -1;
		}
	}
        // bind this name to the socket
        if ( bind ( sock, (struct sockaddr *)name, sizeof(*name)) < 0) {
                close ( sock );
                log( "net: nettest: bind on port %lu: %s", port, 
		     strerror(errno) );
		return -1;
        }

	if( type == TEST_SEND ) {
		m_to.sin_family      = AF_INET;
		m_to.sin_addr.s_addr = m_testIp[num];
		m_to.sin_port        = htons ( port );//2000 ) ; // m_port );
		bzero ( &(m_to.sin_zero) , 8 );
	}

	log( LOG_DEBUG, "net: nettest: open socket for %s on port %ld to %s", 
	     (type == TEST_SEND)?"sending":"receiving", port, 
	     iptoa(m_testIp[num]) );
	return sock;
}
// . check this ip in the list of admin ips
bool Conf::isMasterIp ( uint32_t ip ) {

	//if ( m_numMasterIps == 0 ) return false;
	//if ( m_numConnectIps == 0 ) return false;
	if ( m_connectIps.length() <= 0 ) return false;

	// for ( int32_t i = 0 ; i < m_numConnectIps ; i++ ) 
	// 	if ( m_connectIps[i] == (int32_t)ip )
	// 		return true;

	//if ( ip == atoip("10.5.0.2",8) ) return true;

	char *p = iptoa(ip);
	char *buf = m_connectIps.getBufStart();

	return isInWhiteSpaceList ( p , buf );
}
Esempio n. 7
0
/*	create a client connection to a specified host 
	Inputs:
		<iface> the network interface that the server binds
		<port> the listening port
	Returns:
		-1 on failure, otherwise the socket
*/
int
tcp_startup_server(unsigned int iface, int port)
{
    int sockfd;

    /* create a socket */
    sockfd = tcp_open_socket (1);
    if (sockfd < 0)
	    return -1;
    /* bind the socket on to a specified interface */
    if (bind_interface (sockfd, iface, port) == -1)
	    return -2;
    if (listen (sockfd, 5) < 0)
	    return -3;
    dbgprintf ("startup a TCP server %s at port %d fd %d",iptoa (iface),port, sockfd);
    return sockfd;
}
Esempio n. 8
0
void ifprint(pcap_if_t* d)
{	pcap_addr_t *a;
	for(a=d->addresses; a; a = a->next)
	{	printf("Address family:#%d\n", a->addr->sa_family);
		switch(a->addr->sa_family)
		{	case AF_INET:
				printf("Address family is AF_INET\n");
				if(a->addr)
	//				printf("\tAddress: %s",
						iptoa(((struct sockaddr_in*)a->addr)->sin_addr.s_addr);
					break;			
			default:
				printf("Address family unknow\n");		
				break;
		}
	}
}
// returns false if blocked, true otherwise, like on quick connect error
bool getUrl( char *path , long checkCRC = 0 , char *post = NULL ) {

	SafeBuf sb;
	sb.safePrintf ( "http://%s:%li%s"
			, iptoa(g_hostdb.m_myHost->m_ip)
			, (long)g_hostdb.m_myHost->m_httpPort
			, path
			);

	s_checkCRC = checkCRC;

	bool doPost = true;
	if ( strncmp ( path , "/search" , 7 ) == 0 )
		doPost = false;

	//Url u;
	s_url.set ( sb.getBufStart() );
	log("qa: getting %s",sb.getBufStart());
	if ( ! g_httpServer.getDoc ( s_url.getUrl() ,
				     0 , // ip
				     0 , // offset
				     -1 , // size
				     0 , // ifmodsince
				     NULL ,
				     gotReplyWrapper,
				     999999*1000, // timeout ms
				     0, // proxyip
				     0, // proxyport
				     -1, // maxtextdoclen
				     -1, // maxotherdoclen
				     NULL , // useragent
				     "HTTP/1.0" , // protocol
				     doPost , // doPost
				     NULL , // cookie
				     NULL , // additionalHeader
				     NULL , // fullRequest
				     post ) )
		return false;
	// error?
	processReply ( NULL , 0 );
	//log("qa: getUrl error: %s",mstrerror(g_errno));
	return true;
}	
Esempio n. 10
0
void handle_whois(puser users, int socket, char * buffer) {
	char * name = malloc(MAXLEN);
	char timebuf[20];
	int k;
	memset(name, '0', MAXLEN);
	sscanf(buffer, "/whois %s", name);
	k = search_name(users, name);

	if (k != -1) {
		time_t time = users[k].timez;
		strftime(timebuf, 20, "%Y/%m/%d@%H:%M:%S", localtime(&time));
		sprintf(buffer,
				"[Server] : %s is connected since %s with IP address %s and port number %i\n",
				name, timebuf, iptoa(users[k].ip), users[k].port);
	} else {
		sprintf(buffer, "[Server] : %s appears not to be a logged on user\n",
				name);
	}

	do_write(socket, buffer);
}
void gotReply ( void *state , TcpSocket *s ) {
	// send another
	Msg28 *THIS = (Msg28 *)state;
	// count em
	THIS->m_numReplies++;
	// do not free send buffer
	s->m_sendBuf = NULL;
	// debug
	Host *h = g_hostdb.getTcpHost ( s->m_ip , s->m_port );
	//if (THIS->m_sendToProxy)
	//	h = g_hostdb.getProxyFromTcpPort ( s->m_ip , s->m_port );
	log(LOG_INIT,"admin: got reply from hostid #%"INT32".",h->m_hostId);
	//slot->m_readBufSize,h->m_hostId);
	// log errors
	if ( g_errno ) {
		if ( h ) log("admin: Error broadcasting config request to "
			     "hostid #%"INT32" (%s:%"INT32"): %s.",
			     h->m_hostId,iptoa(h->m_ip),(int32_t)s->m_port,
			     mstrerror(g_errno));
		else     log("admin: Error broadcasting config request: "
			     "%s.",mstrerror(g_errno));
		g_errno = 0;
	}
	// try to send more
	if ( ! THIS->doSendLoop ( ) ) return;
	// do we have all the replies?
	//if ( THIS->m_numReplies < THIS->m_numRequests ) return;
	// do not finish until we got them all
	if ( THIS->m_hostId < 0 && THIS->m_numReplies < THIS->m_sendTotal ) 
		return;
	if ( THIS->m_hostId >= 0 && THIS->m_hostId2 >= 0 &&
	     THIS->m_numReplies < THIS->m_sendTotal ) 
		return;
	// all done, free the buf here
	if ( THIS->m_freeBuf ) 
		mfree ( THIS->m_buf , THIS->m_bufSize , "Msg28" );
	THIS->m_buf = NULL;
	// all done if did not block
	THIS->m_callback ( THIS->m_state );
}
Esempio n. 12
0
bool Conf::isCollAdmin2 ( TcpSocket *sock , 
			  HttpRequest *hr ,
			  CollectionRec *cr ) {

	if ( ! cr ) return false;

	//int32_t page = g_pages.getDynamicPageNumber(hr);

	// never for main or dmoz! must be root!
	if ( strcmp(cr->m_coll,"main")==0 ) return false;
	if ( strcmp(cr->m_coll,"dmoz")==0 ) return false;

	if ( ! g_conf.m_useCollectionPasswords) return false;

	// empty password field? then allow them through
	if ( cr->m_collectionPasswords.length() <= 0 &&
	     cr->m_collectionIps      .length() <= 0 )
		return true;

	// a good ip?
	char *p   = iptoa(sock->m_ip);
	char *buf = cr->m_collectionIps.getBufStart();
	if ( isInWhiteSpaceList ( p , buf ) ) return true;

	// if they got the password, let them in
	p = hr->getString("pwd");
	if ( ! p ) p = hr->getString("password");
	if ( ! p ) p = hr->getStringFromCookie("pwd");
	if ( ! p ) return false;
	buf = cr->m_collectionPasswords.getBufStart();
	if ( isInWhiteSpaceList ( p , buf ) ) return true;

	// the very act of just knowing the collname of a guest account
	// is good enough to update it
	//if ( strncmp ( cr->m_coll , "guest_" , 6 ) == 0 )
	//	return true;

	return false;
}
Esempio n. 13
0
uint8_t REDFLY::socketSendPGM(uint8_t socket, PGM_P stream, uint8_t *ip, uint16_t port)
{
  uint8_t len;
  uint16_t size = strlen_P(stream);

  //socket
  uitoa(socket, (char*)buffer);
  //size
  strcat_P((char*)buffer, PSTR(","));
  len = strlen((char*)buffer);
  uitoa(size, (char*)&buffer[len]);
  //ip
  if(ip && (socketState(socket) == SOCKET_UDP))
  {
    strcat_P((char*)buffer, PSTR(","));
    len = strlen((char*)buffer);
    iptoa(ip, (char*)&buffer[len]);
  }
  else
  {
    strcat_P((char*)buffer, PSTR(",0"));
  }
  //port
  if(port && (socketState(socket) == SOCKET_UDP))
  {
    strcat_P((char*)buffer, PSTR(","));
    len = strlen((char*)buffer);
    uitoa(port, (char*)&buffer[len]);
  }
  else
  {
    strcat_P((char*)buffer, PSTR(",0"));
  }
  //data
  strcat_P((char*)buffer, PSTR(","));

  return cmd(PSTR(CMD_SEND), (char*)buffer, stream); //x,xxxx,xxx.xxx.xxx.xxx,xxxxx,
}
bool PageNetTest::collectResults() {

	CollectionRec *cr = g_collectiondb.getRec ( m_coll );

	if( m_numResultsSent >= g_hostdb.getNumHosts() ) return true;
		
	char temp[64];
	long ip = g_hostdb.getHost( m_numResultsSent )->m_ip;
	long port = g_hostdb.getHost( m_numResultsSent )->m_httpPort;
	//long len = 0;
	sprintf(temp, "http://%s:%li/get?rnettest=1", iptoa(ip), port);
	log( LOG_DEBUG, "net: nettest: queried results from: %s", temp );
	
	//Url u;
	//u.set( temp, len );
	m_numResultsSent++;

	if ( ! g_httpServer.getDoc ( temp ,// &u                , 
				     0 , // ip
				     0                 , //offset
				     -1                , //size
				     0                 , //modifiedSince
				     this              , //state
				     gotResultsWrapper , //callback
				     30*1000           , //timeout
				     cr->m_proxyIp     , //proxyIp
				     cr->m_proxyPort   , //proxyPort
				     200               , //maxTextLen
				     200               ) ) return false;
	if ( g_errno ) {
		g_errno = 0;
		return gotResults ( NULL );
	}

	return true;
}
Esempio n. 15
0
bool Msg12::confirmLockAcquisition ( ) {

	// ensure not in use. not msg12 replies outstanding.
	if ( m_numRequests != m_numReplies ) { char *xx=NULL;*xx=0; }

	// no longer use this
	char *xx=NULL;*xx=0;

	// we are now removing 
	m_confirming = true;

	// make that the request
	// . point to start of the 12 byte request buffer
	// . m_lockSequence should still be valid
	ConfirmRequest *cq = &m_confirmRequest;
	char *request     = (char *)cq;
	int32_t  requestSize = sizeof(ConfirmRequest);
	// sanity
	if ( requestSize == sizeof(LockRequest)){ char *xx=NULL;*xx=0; }
	// set it
	cq->m_collnum   = m_collnum;
	cq->m_doledbKey = m_doledbKey;
	cq->m_firstIp   = m_firstIp;
	cq->m_lockKeyUh48 = m_lockKeyUh48;
	cq->m_maxSpidersOutPerIp = m_maxSpidersOutPerIp;
	// . use the locking group from when we sent the lock request
	// . get ptr to list of hosts in the group
	//Host *hosts = g_hostdb.getGroup ( m_lockGroupId );
	// the same group (shard) that has the spiderRequest/Reply is
	// the one responsible for locking.
	Host *hosts = g_hostdb.getMyShard();
	// this must select the same shard that is going to spider it!
	// i.e. our shard! because we check our local lock table to see
	// if a doled url is locked before spidering it ourselves.
	//Host *hosts = g_hostdb.getMyShard();
	// shortcut
	UdpServer *us = &g_udpServer;
	// get # of hosts in each mirror group
	int32_t hpg = g_hostdb.getNumHostsPerShard();
	// reset counts
	m_numRequests = 0;
	m_numReplies  = 0;
	// note it
	if ( g_conf.m_logDebugSpider )
		log("spider: confirming lock for uh48=%" PRIu64" firstip=%s",
		    m_lockKeyUh48,iptoa(m_firstIp));
	// loop over hosts in that shard
	for ( int32_t i = 0 ; i < hpg ; i++ ) {
		// get a host
		Host *h = &hosts[i];
		// skip if dead! no need to get a reply from dead guys
		if ( g_hostdb.isDead ( h ) ) continue;
		// send request to him
		if ( ! us->sendRequest ( request      ,
					 // a size of 2 should mean confirm
					 requestSize  ,
					 0x12         , // msgType
					 h->m_ip      ,
					 h->m_port    ,
					 h->m_hostId  ,
					 NULL         , // retSlotPtrPtr
					 this         , // state data
					 gotLockReplyWrapper ,
					 udpserver_sendrequest_infinite_timeout ) ) 
			// udpserver returns false and sets g_errno on error
			return true;
		// count them
		m_numRequests++;
	}
	// block?
	if ( m_numRequests > 0 ) return false;
	// did not block
	return true;
}
Esempio n. 16
0
// . slot should be auto-nuked upon transmission or error
// . TODO: ensure if this sendReply() fails does it really nuke the slot?
void gotListWrapper ( void *state , RdbList *listb , Msg5 *msg5xx ) {
	logTrace( g_conf.m_logTraceMsg0, "BEGIN" );
	
	// get the state
	State00 *st0 = (State00 *)state;
	// extract the udp slot and list and msg5
	UdpSlot   *slot =  st0->m_slot;
	RdbList   *list = &st0->m_list;
	Msg5      *msg5 = &st0->m_msg5;
	UdpServer *us   =  st0->m_us;

	// timing debug
	if ( g_conf.m_logTimingNet || g_conf.m_logDebugNet ) {
		//log("Msg0:hndled request %" PRIu64,gettimeofdayInMilliseconds());
		int32_t size = -1;
		if ( list ) size     = list->getListSize();
		log(LOG_TIMING|LOG_DEBUG,
		    "net: msg0: Handled request for data. "
		    "Now sending data termId=%" PRIu64" size=%" PRId32
		    " transId=%" PRId32" ip=%s port=%i took=%" PRId64" "
		    "(niceness=%" PRId32").",
		    g_posdb.getTermId(msg5->m_startKey),
		    size,slot->m_transId,
		    iptoa(slot->m_ip),slot->m_port,
		    gettimeofdayInMilliseconds() - st0->m_startTime ,
		    st0->m_niceness );
	}

	// on error nuke the list and it's data
	if ( g_errno ) {
		mdelete ( st0 , sizeof(State00) , "Msg0" );
		delete (st0);
		// TODO: free "slot" if this send fails
		
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( slot , g_errno );
		return;
	}

	QUICKPOLL(st0->m_niceness);
	// point to the serialized list in "list"
	char *data      = list->getList();
	int32_t  dataSize  = list->getListSize();
	char *alloc     = list->getAlloc();
	int32_t  allocSize = list->getAllocSize();
	// tell list not to free the data since it is a reply so UdpServer
	// will free it when it destroys the slot
	list->setOwnData ( false );
	// keep track of stats
	Rdb *rdb = getRdbFromId ( st0->m_rdbId );
	if ( rdb ) rdb->sentReplyGet ( dataSize );
	// TODO: can we free any memory here???

	// keep track of how long it takes to complete the send
	st0->m_startTime = gettimeofdayInMilliseconds();
	// debug point
	int32_t oldSize = msg5->m_minRecSizes;
	int32_t newSize = msg5->m_minRecSizes + 20;
	// watch for wrap around
	if ( newSize < oldSize ) newSize = 0x7fffffff;
	if ( dataSize > newSize && list->getFixedDataSize() == 0 &&
	     // do not annoy me with these linkdb msgs
	     dataSize > newSize+100 ) 
		log(LOG_LOGIC,"net: msg0: Sending more data than what was "
		    "requested. Ineffcient. Bad engineer. dataSize=%" PRId32" "
		    "minRecSizes=%" PRId32".",dataSize,oldSize);
		    
	//
	// for linkdb lists, remove all the keys that have the same IP32
	// and store a count of what we removed somewhere
	//
	if ( st0->m_rdbId == RDB_LINKDB ) {
		// store compressed list on itself
		char *dst = list->m_list;
		// keep stats
		int32_t totalOrigLinks = 0;
		int32_t ipDups = 0;
		int32_t lastIp32 = 0;
		char *listEnd = list->getListEnd();
		// compress the list
		for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) {
			// breathe
			QUICKPOLL ( st0->m_niceness );
			// count it
			totalOrigLinks++;
			// get rec
			char *rec = list->getCurrentRec();
			int32_t ip32 = g_linkdb.getLinkerIp_uk((key224_t *)rec );
			// same as one before?
			if ( ip32 == lastIp32 && 
			     // are we the last rec? include that for
			     // advancing the m_nextKey in Linkdb more 
			     // efficiently.
			     rec + LDBKS < listEnd ) {
				ipDups++;
				continue;
			}
			// store it
			gbmemcpy (dst , rec , LDBKS );
			dst += LDBKS;
			// update it
			lastIp32 = ip32;
		}
		// . if we removed one key, store the stats
		// . caller should recognize reply is not a multiple of
		//   the linkdb key size LDBKS and no its there!
		if ( ipDups ) {
			//*(int32_t *)dst = totalOrigLinks;
			//dst += 4;
			//*(int32_t *)dst = ipDups;
			//dst += 4;
		}
		// update list parms
		list->m_listSize = dst - list->m_list;
		list->m_listEnd  = list->m_list + list->m_listSize;
		data      = list->getList();
		dataSize  = list->getListSize();
	}


	//log("sending replySize=%" PRId32" min=%" PRId32,dataSize,msg5->m_minRecSizes);
	// . TODO: dataSize may not equal list->getListMaxSize() so
	//         Mem class may show an imblanace
	// . now g_udpServer is responsible for freeing data/dataSize
	// . the "true" means to call doneSending_ass() from the signal handler
	//   if need be
	st0->m_us->sendReply_ass( data, dataSize, alloc, allocSize, slot, st0, doneSending_ass, -1, -1, true );

	logTrace( g_conf.m_logTraceMsg0, "END" );
}	
Esempio n. 17
0
uint8_t REDFLY::begin(uint8_t dhcp, uint8_t *ip, uint8_t *dns, uint8_t *gateway, uint8_t *netmask)
{
  uint8_t len;

  //reset sockets and IP addr
  socketReset();
  memset(ipaddr, 0, sizeof(ipaddr));
  memset(buffer, 0, sizeof(buffer));

  //dhcp or auto ip
  if(dhcp == 2) //Auto-IP
  {
    if(cmd(buffer, sizeof(buffer), PSTR(CMD_IPCONF IPCONF_AUTOIP)) == 0) //OKMACaddrIPaddrSUBNETGateway
    {
      memcpy(&ipaddr[0], &buffer[8], 4);
      return 0;
    }
    return 2;
  }
  else if(dhcp) //DHCP
  {
    if(cmd(buffer, sizeof(buffer), PSTR(CMD_IPCONF IPCONF_DHCP)) == 0) //OKMACaddrIPaddrSUBNETGateway
    {
      memcpy(&ipaddr[0], &buffer[8], 4);
      return 0;
    }
    return 1;
  }

  //static ip settings
  if(dns)
  {
    iptoa(dns, (char*)&buffer[0]);
    cmd(PSTR(CMD_DNSSERVER), (char*)buffer); //set DNS server (FW >= 4.3.0 required)
  }
  if(ip)
  {
    iptoa(ip, (char*)buffer);
    memcpy(ipaddr, ip, 4);
  }
  else
  {
    strcat_P((char*)buffer, PSTR("192.168.0.1"));
    ipaddr[0] = 192;
    ipaddr[1] = 168;
    ipaddr[2] = 0;
    ipaddr[3] = 1;
  }
  if(netmask)
  {
    strcat_P((char*)buffer, PSTR(","));
    len = strlen((char*)buffer);
    iptoa(netmask, (char*)&buffer[len]);
  }
  else
  {
    strcat_P((char*)buffer, PSTR(",255.255.255.0"));
  }
  if(gateway)
  {
    strcat_P((char*)buffer, PSTR(","));
    len = strlen((char*)buffer);
    iptoa(gateway, (char*)&buffer[len]);
  }

  return cmd(PSTR(CMD_IPCONF "0,"), (char*)buffer); //xxx.xxx.xxx.xxx,yyy.yyy.yyy.yyy,zzz.zzz.zzz.zzz
}
// . returns false if blocked, true otherwise
// . sets errno on error
// . make a web page displaying the config of this host
// . call g_httpServer.sendDynamicPage() to send it
bool sendPageHosts ( TcpSocket *s , HttpRequest *r ) {
	// don't allow pages bigger than 128k in cache
	char  buf [ 64*1024 ];
	//char *p    = buf;
	//char *pend = buf + 64*1024;
	SafeBuf sb(buf, 64*1024);


	// XML OR JSON
	 char format = r->getReplyFormat();
	// if ( format == FORMAT_XML || format == FORMAT_JSON )
	// 	return sendPageHostsInXmlOrJson( s , r );


	// check for a sort request
	int32_t sort  = r->getLong ( "sort", -1 );
	// sort by hostid with dead on top by default
	if ( sort == -1 ) sort = 16;
	const char *coll = r->getString ( "c" );
	//char *pwd  = r->getString ( "pwd" );
	// check for setnote command
	int32_t setnote = r->getLong("setnote", 0);
	int32_t setsparenote = r->getLong("setsparenote", 0);
	// check for replace host command
	int32_t replaceHost = r->getLong("replacehost", 0);
	// check for sync host command
	int32_t syncHost = r->getLong("synchost", 0);
	// set note...
	if ( setnote == 1 ) {
		// get the host id to change
		int32_t host = r->getLong("host", -1);
		if ( host == -1 ) goto skipReplaceHost;
		// get the note to set
		int32_t  noteLen;
		const char *note = r->getString("note", &noteLen, "", 0);
		// set the note
		g_hostdb.setNote(host, note, noteLen);
	}
	// set spare note...
	if ( setsparenote == 1 ) {
		// get the host id to change
		int32_t spare = r->getLong("spare", -1);
		if ( spare == -1 ) goto skipReplaceHost;
		// get the note to set
		int32_t  noteLen;
		const char *note = r->getString("note", &noteLen, "", 0);
		// set the note
		g_hostdb.setSpareNote(spare, note, noteLen);
	}
	// replace host...
	if ( replaceHost == 1 ) {
		// get the host ids to swap
		int32_t rhost = r->getLong("rhost", -1);
		int32_t rspare = r->getLong("rspare", -1);
		if ( rhost == -1 || rspare == -1 )
			goto skipReplaceHost;
		// replace
		g_hostdb.replaceHost(rhost, rspare);
	}
	// sync host...
	if ( syncHost == 1 ) {
		// get the host id to sync
		int32_t syncHost = r->getLong("shost", -1);
		if ( syncHost == -1 ) goto skipReplaceHost;
		// call sync
		g_hostdb.syncHost(syncHost, false);
	}
	if ( syncHost == 2 ) {
		// get the host id to sync
		int32_t syncHost = r->getLong("shost", -1);
		if ( syncHost == -1 ) goto skipReplaceHost;
		// call sync
		g_hostdb.syncHost(syncHost, true);
	}

skipReplaceHost:

	int32_t refreshRate = r->getLong("rr", 0);
	if(refreshRate > 0 && format == FORMAT_HTML ) 
		sb.safePrintf("<META HTTP-EQUIV=\"refresh\" "
			      "content=\"%" PRId32"\"\\>",
			      refreshRate);

	// print standard header
	// 	char *pp    = sb.getBuf();
	// 	char *ppend = sb.getBufEnd();
	// 	if ( pp ) {
	if ( format == FORMAT_HTML ) g_pages.printAdminTop ( &sb , s , r );
	//	sb.incrementLength ( pp - sb.getBuf() );
	//	}
	const char *colspan = "30";
	//char *shotcol = "";
	char shotcol[1024];
	shotcol[0] = '\0';
	const char *cs = coll;
	if ( ! cs ) cs = "";

	if ( g_conf.m_useShotgun && format == FORMAT_HTML ) {
		colspan = "31";
		//shotcol = "<td><b>ip2</b></td>";
		sprintf ( shotcol, "<td><a href=\"/admin/hosts?c=%s"
			 	   "&sort=2\">"
			  "<b>ping2</b></td></a>",
			  cs);
	}

	// print host table
	if ( format == FORMAT_HTML )
		sb.safePrintf ( 
			       "<table %s>"
			       "<tr><td colspan=%s><center>"
			       //"<font size=+1>"
			       "<b>Hosts "
			       "(<a href=\"/admin/hosts?c=%s&sort=%" PRId32"&resetstats=1\">"
			       "reset)</a></b>"
			       //"</font>"
			       "</td></tr>" 
			       "<tr bgcolor=#%s>"
			       "<td><a href=\"/admin/hosts?c=%s&sort=0\">"

			       "<b>hostId</b></a></td>"
			       "<td><b>host ip</b></td>"
			       "<td><b>shard</b></td>"
			       "<td><b>mirror</b></td>" // mirror # within the shard

			       // i don't remember the last time i used this, so let's
			       // just comment it out to save space
			       //"<td><b>group mask</td>"

			       //"<td><b>ip1</td>"
			       //"<td><b>ip2</td>"
			       //"<td><b>udp port</td>"

			       // this is now more or less obsolete
			       //"<td><b>priority udp port</td>"

			       //"<td><b>dns client port</td>"
			       "<td><b>http port</b></td>"

			       // this is now obsolete since ide channel is. it was used
			       // so that only the guy with the token could merge,
			       // and it made sure that only one merge per ide channel
			       // and per group was going on at any one time for performance
			       // reasons.
			       //"<td><b>token group</td>"

			       //"<td><b>best switch id</td>"
			       //"<td><b>actual switch id</td>"
			       //"<td><b>switch id</td>"

			       // this is now fairly obsolete
			       //"<td><b>ide channel</td>"

			       //"<td><b>HD temps (C)</b></td>"
			       "<td><b>GB version</b></td>"

			       //"<td><b>resends sent</td>"
			       //"<td><b>errors recvd</td>"
			       "<td><b>try agains recvd</b></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=3\">"
			       "<b>dgrams resent</b></a></td>"

			       /*

				 MDW: take out for adding new stuff

			       "<td><a href=\"/admin/hosts?c=%s&sort=4\">"
			       "<b>errors recvd</a></td>"
			       "<td><a href=\"/admin/hosts?c=%s&sort=5\">"
			       "<b>ETRY AGAINS recvd</a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=6\">"
			       "<b>dgrams to</a></td>"
			       "<td><a href=\"/admin/hosts?c=%s&sort=7\">"
			       "<b>dgrams from</a></td>"
			       */

			       // "<td><a href=\"/admin/hosts?c=%s&sort=18\">"
			       // "<b>corrupts</a></td>"
			       // "<td><a href=\"/admin/hosts?c=%s&sort=19\">"
			       // "<b># ooms</a></td>"
			       // "<td><a href=\"/admin/hosts?c=%s&sort=20\">"
			       // "<b>socks closed</a></td>"


			       //"<td><a href=\"/admin/hosts?c=%s&sort=8\">"
			       //"<b>loadavg</a></td>"


			       "<td><a href=\"/admin/hosts?c=%s&sort=13\">"
			       "<b>avg split time</b></a></td>"

			       "<td><b>splits done</b></a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=12\">"
			       "<b>status</b></a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=15\">"
			       "<b>slow reads</b></a></td>"

			       "<td><b>docs indexed</a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=9\">"
			       "<b>mem used</a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=10\">"
			       "<b>cpu used</b></a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=17\">"
			       "<b>disk used</b></a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=14\">"
			       "<b>max ping1</b></a></td>"

			       "<td><a href=\"/admin/hosts?c=%s&sort=11\">"
			       "<b>ping1 age</b></a></td>"

			       //"<td><b>ip1</td>"
			       "<td><a href=\"/admin/hosts?c=%s&sort=1\">"
			       "<b>ping1</b></a></td>"

			       "%s"// "<td><b>ip2</td>"
			       //"<td><b>inSync</td>",
			       //"<td>avg roundtrip</td>"
			       //"<td>std. dev.</td></tr>"
			       "<td><b>note</b></td>",
			       TABLE_STYLE ,
			       colspan    ,

			       cs, sort,
			       DARK_BLUE  ,

			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       cs,
			       shotcol    );

	// loop through each host we know and print it's stats
	int32_t nh = g_hostdb.getNumHosts();
	// should we reset resends, errorsRecvd and ETRYAGAINS recvd?
	if ( r->getLong("resetstats",0) ) {
		for ( int32_t i = 0 ; i < nh ; i++ ) {
			// get the ith host (hostId)
			Host *h = g_hostdb.getHost ( i );
			h->m_pingInfo.m_totalResends   = 0;
			h->m_errorReplies = 0;
			h->m_pingInfo.m_etryagains   = 0;
			h->m_dgramsTo     = 0;
			h->m_dgramsFrom   = 0;
			h->m_splitTimes = 0;
			h->m_splitsDone = 0;
			h->m_pingInfo.m_slowDiskReads =0;
			
		}
	}

	// sort hosts if needed
	int32_t hostSort [ MAX_HOSTS ];
	for ( int32_t i = 0 ; i < nh ; i++ )
		hostSort [ i ] = i;
	switch ( sort ) {
	case 1: gbsort ( hostSort, nh, sizeof(int32_t), pingSort1      ); break;
	case 2: gbsort ( hostSort, nh, sizeof(int32_t), pingSort2      ); break;
	case 3: gbsort ( hostSort, nh, sizeof(int32_t), resendsSort    ); break;
	case 4: gbsort ( hostSort, nh, sizeof(int32_t), errorsSort     ); break;
	case 5: gbsort ( hostSort, nh, sizeof(int32_t), tryagainSort   ); break;
	case 6: gbsort ( hostSort, nh, sizeof(int32_t), dgramsToSort   ); break;
	case 7: gbsort ( hostSort, nh, sizeof(int32_t), dgramsFromSort ); break;
	//case 8: gbsort ( hostSort, nh, sizeof(int32_t), loadAvgSort    ); break;
	case 9: gbsort ( hostSort, nh, sizeof(int32_t), memUsedSort    ); break;
	case 10:gbsort ( hostSort, nh, sizeof(int32_t), cpuUsageSort   ); break;
	case 11:gbsort ( hostSort, nh, sizeof(int32_t), pingAgeSort    ); break;
	case 12:gbsort ( hostSort, nh, sizeof(int32_t), flagSort       ); break;
	case 13:gbsort ( hostSort, nh, sizeof(int32_t), splitTimeSort  ); break;
	case 14:gbsort ( hostSort, nh, sizeof(int32_t), pingMaxSort    ); break;
	case 15:gbsort ( hostSort, nh, sizeof(int32_t), slowDiskSort    ); break;
	case 16:gbsort ( hostSort, nh, sizeof(int32_t), defaultSort    ); break;
	case 17:gbsort ( hostSort, nh, sizeof(int32_t), diskUsageSort   ); break;

	}

	// we are the only one that uses these flags, so set them now
	/*
	static char s_properSet = 0;
	if ( ! s_properSet ) {
		s_properSet = 1;
		g_hostdb.setOnProperSwitchFlags();
	}
	*/

	if ( format == FORMAT_XML ) {
		sb.safePrintf("<response>\n");
		sb.safePrintf("\t<statusCode>0</statusCode>\n");
		sb.safePrintf("\t<statusMsg>Success</statusMsg>\n");
	}

	if ( format == FORMAT_JSON ) {
		sb.safePrintf("{\"response\":{\n");
		sb.safePrintf("\t\"statusCode\":0,\n");
		sb.safePrintf("\t\"statusMsg\":\"Success\",\n");
	}

	int64_t nowmsLocal = gettimeofdayInMillisecondsLocal();

	// compute majority gb version so we can highlight bad out of sync
	// gb versions in red below
	int32_t majorityHash32 = 0;
	int32_t lastCount = 0;
	// get majority gb version
	for ( int32_t si = 0 ; si < nh ; si++ ) {
		int32_t i = hostSort[si];
		// get the ith host (hostId)
		Host *h = g_hostdb.getHost ( i );
		char *vbuf = h->m_pingInfo.m_gbVersionStr;//gbVersionStrBuf;
		int32_t vhash32 = hash32n ( vbuf );
		if ( vhash32 == majorityHash32 ) lastCount++;
		else lastCount--;
		if ( lastCount < 0 ) majorityHash32 = vhash32;
	}


	// print it
	//int32_t ng = g_hostdb.getNumGroups();
	for ( int32_t si = 0 ; si < nh ; si++ ) {
		int32_t i = hostSort[si];
		// get the ith host (hostId)
		Host *h = g_hostdb.getHost ( i );
		// get avg/stdDev msg roundtrip times in ms for ith host
		//int32_t avg , stdDev;
		//g_hostdb.getTimes ( i , &avg , &stdDev );
                char ptr[256];
                int32_t pingAge = generatePingMsg(h, nowmsLocal, ptr);
		char pms[64];
		if ( h->m_pingMax < 0 ) sprintf(pms,"???");
		else                    sprintf(pms,"%" PRId32"ms",h->m_pingMax);
		// the sync status ascii-ized
		char syncStatus = h->m_syncStatus;
		const char *ptr2;
		if      (syncStatus==0) 
			ptr2 ="<b>N</b>";
		else if (syncStatus==1) 
			ptr2 ="Y";
		else 
			ptr2 ="?";
		char ipbuf1[64];
		char ipbuf2[64];
		strcpy(ipbuf1,iptoa(h->m_ip));
		strcpy(ipbuf2,iptoa(h->m_ipShotgun));

		/*
		char  hdbuf[128];
		char *hp = hdbuf;
		for ( int32_t k = 0 ; k < 4 ; k++ ) {
			int32_t temp = h->m_hdtemps[k];
			if ( temp > 50 && format == FORMAT_HTML )
				hp += sprintf(hp,"<font color=red><b>%" PRId32
					      "</b></font>",
					      temp);
			else
				hp += sprintf(hp,"%" PRId32,temp);
			if ( k < 3 ) *hp++ = '/';
			*hp = '\0';
		}
		*/
		char *vbuf = h->m_pingInfo.m_gbVersionStr;//m_gbVersionStrBuf;
		// get hash
		int32_t vhash32 = hash32n ( vbuf );
		const char *vbuf1 = "";
		const char *vbuf2 = "";
		if ( vhash32 != majorityHash32 ) {
			vbuf1 = "<font color=red><b>";
			vbuf2 = "</font></b>";
		}

		//int32_t switchGroup = 0;
		//if ( g_hostdb.m_indexSplits > 1 )
		//	switchGroup = h->m_group%g_hostdb.m_indexSplits;

		// host can have 2 ip addresses, get the one most
		// similar to that of the requester
		int32_t eip = g_hostdb.getBestIp ( h , s->m_ip );
		char ipbuf3[64];
		strcpy(ipbuf3,iptoa(eip));

		const char *fontTagFront = "";
		const char *fontTagBack  = "";
		if ( h->m_pingInfo.m_percentMemUsed >= 98.0 && 
		     format == FORMAT_HTML ) {
			fontTagFront = "<font color=red>";
			fontTagBack  = "</font>";
		}

		float cpu = h->m_pingInfo.m_cpuUsage;
		if ( cpu > 100.0 ) cpu = 100.0;
		if ( cpu < 0.0   ) cpu = -1.0;

		char diskUsageMsg[64];
		sprintf(diskUsageMsg,"%.1f%%",h->m_pingInfo.m_diskUsage);
		if ( h->m_pingInfo.m_diskUsage < 0.0 )
			sprintf(diskUsageMsg,"???");
		if ( h->m_pingInfo.m_diskUsage>=98.0 && format == FORMAT_HTML )
			sprintf(diskUsageMsg,"<font color=red><b>%.1f%%"
				"</b></font>",h->m_pingInfo.m_diskUsage);


		// split time, don't divide by zero!
		int32_t splitTime = 0;
		if ( h->m_splitsDone ) 
			splitTime = h->m_splitTimes / h->m_splitsDone;

		//char flagString[32];
		char tmpfb[64];
		SafeBuf fb(tmpfb,64);
		//char *fs = flagString;
		//*fs = '\0';

		// does its hosts.conf file disagree with ours?
		if ( h->m_pingInfo.m_hostsConfCRC &&
		     format == FORMAT_HTML &&
		     h->m_pingInfo.m_hostsConfCRC != g_hostdb.getCRC() )
			fb.safePrintf("<font color=red><b title=\"Hosts.conf "
				      "in disagreement with ours.\">H"
				      "</b></font>");
		if ( h->m_pingInfo.m_hostsConfCRC &&
		     format != FORMAT_HTML &&
		     h->m_pingInfo.m_hostsConfCRC != g_hostdb.getCRC() )
			fb.safePrintf("Hosts.conf in disagreement with ours");

		int32_t flags = h->m_pingInfo.m_flags;


		if ( format == FORMAT_HTML ) {
			// use these new ones for now
			int n = h->m_pingInfo.m_numCorruptDiskReads;
			if ( n )
				fb.safePrintf("<font color=red><b>"
					      "C"
					      "<sup>%" PRId32"</sup>"
					      "</b></font>"
					      , n );
			n = h->m_pingInfo.m_numOutOfMems;
			if ( n )
				fb.safePrintf("<font color=red><b>"
					      "O"
					      "<sup>%" PRId32"</sup>"
					      "</b></font>"
					      , n );
			n = h->m_pingInfo.m_socketsClosedFromHittingLimit;
			if ( n )
				fb.safePrintf("<font color=red><b>"
					      "K"
					      "<sup>%" PRId32"</sup>"
					      "</b></font>"
					      , n );
			if ( flags & PFLAG_OUTOFSYNC )
				fb.safePrintf("<font color=red><b>"
					      "N"
					      "</b></font>"
					      );
		}

		// recovery mode? reocvered from coring?
		if ((flags & PFLAG_RECOVERYMODE)&& format == FORMAT_HTML ) {
			fb.safePrintf("<b title=\"Recovered from core"
				      "\">x</b>");
			// this is only 8-bits at the moment so it's capped
			// at 255. this level is 1 the first time we core
			// and are restarted.
			if ( h->m_pingInfo.m_recoveryLevel > 1 )
			fb.safePrintf("<sup>%" PRId32"</sup>",
				      (int32_t)
				      h->m_pingInfo.m_recoveryLevel);
		}

		if ((flags & PFLAG_RECOVERYMODE)&& format != FORMAT_HTML )
			fb.safePrintf("Recovered from core");

		// rebalancing?
		if ( (flags & PFLAG_REBALANCING)&& format == FORMAT_HTML )
			fb.safePrintf("<b title=\"Currently "
				      "rebalancing\">R</b>");
		if ( (flags & PFLAG_REBALANCING)&& format != FORMAT_HTML )
			fb.safePrintf("Currently rebalancing");

		// has recs that should be in another shard? indicates
		// we need to rebalance or there is a bad hosts.conf
		if ((flags & PFLAG_FOREIGNRECS) && format == FORMAT_HTML )
			fb.safePrintf("<font color=red><b title=\"Foreign "
				      "data "
				      "detected. Needs rebalance.\">F"
				      "</b></font>");
		if ((flags & PFLAG_FOREIGNRECS) && format != FORMAT_HTML )
			fb.safePrintf("Foreign data detected. "
				      "Needs rebalance.");

		// if it has spiders going on say "S" with # as the superscript
		if ((flags & PFLAG_HASSPIDERS) && format == FORMAT_HTML )
			fb.safePrintf ( "<span title=\"Spidering\">S"
					"<sup>%" PRId32"</sup>"
					"</span>"
					,h->m_pingInfo.m_currentSpiders
					);

		if ( format == FORMAT_HTML && 
		     h->m_pingInfo.m_udpSlotsInUseIncoming ) {
			const char *f1 = "";
			const char *f2 = "";
			// MAXUDPSLOTS in Spider.cpp is 300 right now
			if ( h->m_pingInfo.m_udpSlotsInUseIncoming >= 300 ) {
				f1 = "<b>";
				f2 = "</b>";
			}
			if ( h->m_pingInfo.m_udpSlotsInUseIncoming >= 400 ) {
				f1 = "<b><font color=red>";
				f2 = "</font></b>";
			}
			fb.safePrintf("<span title=\"udpSlotsInUse\">"
				      "%s"
				      "U"
				      "<sup>%" PRId32"</sup>"
				      "%s"
				      "</span>"
				      ,f1
				      ,h->m_pingInfo.m_udpSlotsInUseIncoming
				      ,f2
				      );
		}

		if ( format == FORMAT_HTML && h->m_pingInfo.m_tcpSocketsInUse){
			const char *f1 = "";
			const char *f2 = "";
			if ( h->m_pingInfo.m_tcpSocketsInUse >= 100 ) {
				f1 = "<b>";
				f2 = "</b>";
			}
			if ( h->m_pingInfo.m_tcpSocketsInUse >= 200 ) {
				f1 = "<b><font color=red>";
				f2 = "</font></b>";
			}
			fb.safePrintf("<span title=\"tcpSocketsInUse\">"
				      "%s"
				      "T"
				      "<sup>%" PRId32"</sup>"
				      "%s"
				      "</span>"
				      ,f1
				      ,h->m_pingInfo.m_tcpSocketsInUse
				      ,f2
				      );
		}

		if ((flags & PFLAG_HASSPIDERS) && format != FORMAT_HTML )
			fb.safePrintf ( "Spidering");

		// say "M" if merging
		if ( (flags & PFLAG_MERGING) && format == FORMAT_HTML )
			fb.safePrintf ( "<span title=\"Merging\">M</span>");
		if ( (flags & PFLAG_MERGING) && format != FORMAT_HTML )
			fb.safePrintf ( "Merging");

		// say "D" if dumping
		if (   (flags & PFLAG_DUMPING) && format == FORMAT_HTML )
			fb.safePrintf ( "<span title=\"Dumping\">D</span>");
		if (   (flags & PFLAG_DUMPING) && format != FORMAT_HTML )
			fb.safePrintf ( "Dumping");


		// say "y" if doing the daily merge
		if (  !(flags & PFLAG_MERGEMODE0) )
			fb.safePrintf ( "y");


		if ( format == FORMAT_HTML && !h->m_spiderEnabled) {
			fb.safePrintf("<span title=\"Spider Disabled\" style=\"text-decoration:line-through;\">S</span>");
		}
		if ( format == FORMAT_HTML && !h->m_queryEnabled) {
			fb.safePrintf("<span title=\"Query Disabled\" style=\"text-decoration:line-through;\">Q</span>");
		}


		// clear it if it is us, this is invalid
		if ( ! h->m_gotPingReply ) {
			fb.reset();
			fb.safePrintf("??");
		}
		if ( fb.length() == 0 && format == FORMAT_HTML )
			fb.safePrintf("&nbsp;");

		fb.nullTerm();

		const char *bg = LIGHT_BLUE;
		if ( h->m_ping >= g_conf.m_deadHostTimeout ) 
			bg = "ffa6a6";


		//
		// BEGIN XML OUTPUT
		//
		if ( format == FORMAT_XML ) {
			
			sb.safePrintf("\t<host>\n"
				      "\t\t<name><![CDATA["
				      );
			sb.cdataEncode (h->m_hostname);
			sb.safePrintf("]]></name>\n");
			sb.safePrintf("\t\t<shard>%" PRId32"</shard>\n",
				      (int32_t)h->m_shardNum);
			sb.safePrintf("\t\t<mirror>%" PRId32"</mirror>\n",
				      h->m_stripe);

			sb.safePrintf("\t\t<ip1>%s</ip1>\n",
				      iptoa(h->m_ip));
			sb.safePrintf("\t\t<ip2>%s</ip2>\n",
				      iptoa(h->m_ipShotgun));

			sb.safePrintf("\t\t<httpPort>%" PRId32"</httpPort>\n",
				      (int32_t)h->m_httpPort);
			sb.safePrintf("\t\t<udpPort>%" PRId32"</udpPort>\n",
				      (int32_t)h->m_port);
			sb.safePrintf("\t\t<dnsPort>%" PRId32"</dnsPort>\n",
				      (int32_t)h->m_dnsClientPort);

			//sb.safePrintf("\t\t<hdTemp>%s</hdTemp>\n",hdbuf);
			sb.safePrintf("\t\t<gbVersion>%s</gbVersion>\n",vbuf);

			sb.safePrintf("\t\t<resends>%" PRId32"</resends>\n",
				      h->m_pingInfo.m_totalResends);

			/*
			  MDW: take out for new stuff
			sb.safePrintf("\t\t<errorReplies>%" PRId32"</errorReplies>\n",
				      h->m_errorReplies);
			*/

			sb.safePrintf("\t\t<errorTryAgains>%" PRId32
				      "</errorTryAgains>\n",
				      h->m_pingInfo.m_etryagains);

			sb.safePrintf("\t\t<udpSlotsInUse>%" PRId32
				      "</udpSlotsInUse>\n",
				      h->m_pingInfo.m_udpSlotsInUseIncoming);

			sb.safePrintf("\t\t<tcpSocketsInUse>%" PRId32
				      "</tcpSocketsInUse>\n",
				      h->m_pingInfo.m_tcpSocketsInUse);

			/*
			sb.safePrintf("\t\t<dgramsTo>%" PRId64"</dgramsTo>\n",
				      h->m_dgramsTo);
			sb.safePrintf("\t\t<dgramsFrom>%" PRId64"</dgramsFrom>\n",
				      h->m_dgramsFrom);
			*/

			sb.safePrintf("\t\t<numCorruptDiskReads>%" PRId32
				      "</numCorruptDiskReads>\n"
				      ,h->m_pingInfo.m_numCorruptDiskReads);
			sb.safePrintf("\t\t<numOutOfMems>%" PRId32
				      "</numOutOfMems>\n"
				      ,h->m_pingInfo.m_numOutOfMems);
			sb.safePrintf("\t\t<numClosedSockets>%" PRId32
				      "</numClosedSockets>\n"
				      ,h->m_pingInfo.
				      m_socketsClosedFromHittingLimit);
			sb.safePrintf("\t\t<numOutstandingSpiders>%" PRId32
				      "</numOutstandingSpiders>\n"
				      ,h->m_pingInfo.m_currentSpiders );


			sb.safePrintf("\t\t<splitTime>%" PRId32"</splitTime>\n",
				      splitTime);
			sb.safePrintf("\t\t<splitsDone>%" PRId32"</splitsDone>\n",
				      h->m_splitsDone);
			
			sb.safePrintf("\t\t<status><![CDATA[%s]]></status>\n",
				      fb.getBufStart());

			sb.safePrintf("\t\t<slowDiskReads>%" PRId32
				      "</slowDiskReads>\n",
				      h->m_pingInfo.m_slowDiskReads);

			sb.safePrintf("\t\t<docsIndexed>%" PRId32
				      "</docsIndexed>\n",
				      h->m_pingInfo.m_totalDocsIndexed);

			sb.safePrintf("\t\t<percentMemUsed>%.1f%%"
				      "</percentMemUsed>",
				      h->m_pingInfo.m_percentMemUsed); // float

			sb.safePrintf("\t\t<cpuUsage>%.1f%%"
				      "</cpuUsage>",
				      cpu );

			sb.safePrintf("\t\t<percentDiskUsed><![CDATA[%s]]>"
				      "</percentDiskUsed>",
				      diskUsageMsg);

			sb.safePrintf("\t\t<maxPing1>%s</maxPing1>\n",
				      pms );

			sb.safePrintf("\t\t<maxPingAge1>%" PRId32"ms</maxPingAge1>\n",
				      pingAge );

			sb.safePrintf("\t\t<ping1>%s</ping1>\n",
				      ptr );

			sb.safePrintf("\t\t<note>%s</note>\n",
				      h->m_note );

			sb.safePrintf("\t\t<spider>%" PRId32"</spider>\n",
						  (int32_t)h->m_spiderEnabled );


			sb.safePrintf("\t\t<query>%" PRId32"</query>\n",
						  (int32_t)h->m_queryEnabled );

			sb.safePrintf("\t</host>\n");

			continue;
		}
		//
		// END XML OUTPUT
		//


		//
		// BEGIN JSON OUTPUT
		//
		if ( format == FORMAT_JSON ) {
			
			sb.safePrintf("\t\"host\":{\n");
			sb.safePrintf("\t\t\"name\":\"%s\",\n",h->m_hostname);
			sb.safePrintf("\t\t\"shard\":%" PRId32",\n",
				      (int32_t)h->m_shardNum);
			sb.safePrintf("\t\t\"mirror\":%" PRId32",\n", h->m_stripe);

			sb.safePrintf("\t\t\"ip1\":\"%s\",\n",iptoa(h->m_ip));
			sb.safePrintf("\t\t\"ip2\":\"%s\",\n",
				      iptoa(h->m_ipShotgun));

			sb.safePrintf("\t\t\"httpPort\":%" PRId32",\n",
				      (int32_t)h->m_httpPort);
			sb.safePrintf("\t\t\"udpPort\":%" PRId32",\n",
				      (int32_t)h->m_port);
			sb.safePrintf("\t\t\"dnsPort\":%" PRId32",\n",
				      (int32_t)h->m_dnsClientPort);

			//sb.safePrintf("\t\t\"hdTemp\":\"%s\",\n",hdbuf);
			sb.safePrintf("\t\t\"gbVersion\":\"%s\",\n",vbuf);

			sb.safePrintf("\t\t\"resends\":%" PRId32",\n",
				      h->m_pingInfo.m_totalResends);

			/*
			sb.safePrintf("\t\t\"errorReplies\":%" PRId32",\n",
				      h->m_errorReplies);
			*/
			sb.safePrintf("\t\t\"errorTryAgains\":%" PRId32",\n",
				      h->m_pingInfo.m_etryagains);
			sb.safePrintf("\t\t\"udpSlotsInUse\":%" PRId32",\n",
				      h->m_pingInfo.m_udpSlotsInUseIncoming);
			sb.safePrintf("\t\t\"tcpSocketsInUse\":%" PRId32",\n",
				      h->m_pingInfo.m_tcpSocketsInUse);

			/*
			sb.safePrintf("\t\t\"dgramsTo\":%" PRId64",\n",
				      h->m_dgramsTo);
			sb.safePrintf("\t\t\"dgramsFrom\":%" PRId64",\n",
				      h->m_dgramsFrom);
			*/


			sb.safePrintf("\t\t\"numCorruptDiskReads\":%" PRId32",\n"
				      ,h->m_pingInfo.m_numCorruptDiskReads);
			sb.safePrintf("\t\t\"numOutOfMems\":%" PRId32",\n"
				      ,h->m_pingInfo.m_numOutOfMems);
			sb.safePrintf("\t\t\"numClosedSockets\":%" PRId32",\n"
				      ,h->m_pingInfo.
				      m_socketsClosedFromHittingLimit);
			sb.safePrintf("\t\t\"numOutstandingSpiders\":%" PRId32
				      ",\n"
				      ,h->m_pingInfo.m_currentSpiders );


			sb.safePrintf("\t\t\"splitTime\":%" PRId32",\n",
				      splitTime);
			sb.safePrintf("\t\t\"splitsDone\":%" PRId32",\n",
				      h->m_splitsDone);
			
			sb.safePrintf("\t\t\"status\":\"%s\",\n",
				      fb.getBufStart());

			sb.safePrintf("\t\t\"slowDiskReads\":%" PRId32",\n",
				      h->m_pingInfo.m_slowDiskReads);

			sb.safePrintf("\t\t\"docsIndexed\":%" PRId32",\n",
				      h->m_pingInfo.m_totalDocsIndexed);

			sb.safePrintf("\t\t\"percentMemUsed\":\"%.1f%%\",\n",
				      h->m_pingInfo.m_percentMemUsed); // float

			sb.safePrintf("\t\t\"cpuUsage\":\"%.1f%%\",\n",cpu);

			sb.safePrintf("\t\t\"percentDiskUsed\":\"%s\",\n",
				      diskUsageMsg);

			sb.safePrintf("\t\t\"maxPing1\":\"%s\",\n",pms);

			sb.safePrintf("\t\t\"maxPingAge1\":\"%" PRId32"ms\",\n",
				      pingAge );

			sb.safePrintf("\t\t\"ping1\":\"%s\",\n",
				      ptr );

			sb.safePrintf("\t\t\"note\":\"%s\"\n",
				      h->m_note );

			sb.safePrintf("\t\t\"spider\":\"%" PRId32"\"\n",
						  (int32_t)h->m_spiderEnabled );

			sb.safePrintf("\t\t\"query\":\"%" PRId32"\"\n",
						  (int32_t)h->m_queryEnabled );


            
			sb.safePrintf("\t},\n");

			continue;
		}
		//
		// END JSON OUTPUT
		//


		sb.safePrintf (
			  "<tr bgcolor=#%s>"
			  "<td><a href=\"http://%s:%hi/admin/hosts?"
			  ""
			  "c=%s"
			  "&sort=%" PRId32"\">%" PRId32"</a></td>"

			  "<td>%s</td>" // hostname

			  "<td>%" PRId32"</td>" // group
			  "<td>%" PRId32"</td>" // stripe
			  //"<td>0x%08" PRIx32"</td>" // group mask

			  //"<td>%s</td>" // ip1
			  //"<td>%s</td>" // ip2
			  //"<td>%hi</td>" // port
			  //"<td>%hi</td>" // client port
			  "<td>%hi</td>" // http port
			  //"<td>%" PRId32"</td>" // token group num
			  //"<td>%" PRId32"</td>" // switch group
			  //"<td>%s</td>" // tmpN

			  // hd temps
			  // no, this is gb version now
			  "<td><nobr>%s%s%s</nobr></td>"

			  // resends
			  "<td>%" PRId32"</td>"

			  // error replies
			  //"<td>%" PRId32"</td>"

			  // etryagains
			  "<td>%" PRId32"</td>"

			  // # dgrams sent to
			  //"<td>%" PRId64"</td>"
			  // # dgrams recvd from
			  //"<td>%" PRId64"</td>"

			  // loadavg
			  //"<td>%.2f</td>"

			  // split time
			  "<td>%" PRId32"</td>"
			  // splits done
			  "<td>%" PRId32"</td>"

			  // flags
			  "<td>%s</td>"

			  // slow disk reads
			  "<td>%" PRId32"</td>"

			  // docs indexed
			  "<td>%" PRId32"</td>"

			  // percent mem used
			  "<td>%s%.1f%%%s</td>"
			  // cpu usage
			  "<td>%.1f%%</td>"
			  // disk usage
			  "<td>%s</td>"

			  // ping max
			  "<td>%s</td>"

			  // ping age
			  "<td>%" PRId32"ms</td>"

			  // ping
			  "<td>%s</td>"
			  //"<td>%s</td>"
			  //"<td>%" PRId32"ms</td>"
			  "<td nowrap=1>%s</td>"
			  "</tr>" , 
			  bg,//LIGHT_BLUE ,
			  ipbuf3, h->m_httpPort, 
			  cs, sort,
			  i , 
			  h->m_hostname,
			  (int32_t)h->m_shardNum,//group,
			  h->m_stripe,
			  // group mask is not looked at a lot and is
			  // really only for indexdb and a few other rdbs
			  //g_hostdb.makeGroupId(i,ng) ,
			  //ipbuf1,
			  //ipbuf2,
			  //h->m_port , 
			  //h->m_dnsClientPort ,
			  h->m_httpPort ,
			  //h->m_tokenGroupNum,
			  //switchGroup ,
			  //tmpN,
			  vbuf1,
			  vbuf,//hdbuf,
			  vbuf2,

			  h->m_pingInfo.m_totalResends,


			  // h->m_errorReplies,
			  h->m_pingInfo.m_etryagains,
			  // h->m_dgramsTo,
			  // h->m_dgramsFrom,

			  //h->m_loadAvg, // double
			  splitTime,
			  h->m_splitsDone,

			  fb.getBufStart(),//flagString,

			  h->m_pingInfo.m_slowDiskReads,
			  h->m_pingInfo.m_totalDocsIndexed,

			  fontTagFront,
			  h->m_pingInfo.m_percentMemUsed, // float
			  fontTagBack,
			  cpu, // float
			  diskUsageMsg,

			  // ping max
			  pms,
			  // ping age
			  pingAge,

			  //avg , 
			  //stdDev,
			  //ping,
			  ptr ,
			  //ptr2 ,
			  h->m_note );
	}

	if ( format == FORMAT_XML ) {
		sb.safePrintf("</response>\n");
		return g_httpServer.sendDynamicPage ( s , 
						      sb.getBufStart(),
						      sb.length() ,
						      0, 
						      false, 
						      "text/xml");
	}

	if ( format == FORMAT_JSON ) {
		// remove last \n, from json host{}
		sb.m_length -= 2;
		sb.safePrintf("\n}\n}");
		return g_httpServer.sendDynamicPage ( s , 
						      sb.getBufStart(),
						      sb.length() ,
						      0, 
						      false, 
						      "application/json");
	}


	// end the table now
	sb.safePrintf ( "</table><br>\n" );

	

	if( g_hostdb.m_numSpareHosts ) {
		// print spare hosts table
		sb.safePrintf ( 
					   "<table %s>"
					   "<tr class=hdrow><td colspan=10><center>"
					   //"<font size=+1>"
					   "<b>Spares</b>"
					   //"</font>"
					   "</td></tr>" 
					   "<tr bgcolor=#%s>"
					   "<td><b>spareId</td>"
					   "<td><b>host name</td>"
					   "<td><b>ip1</td>"
					   "<td><b>ip2</td>"
					   //"<td><b>udp port</td>"
					   //"<td><b>priority udp port</td>"
					   //"<td><b>dns client port</td>"
					   "<td><b>http port</td>"
					   //"<td><b>switch id</td>"

					   // this is now fairly obsolete
					   //"<td><b>ide channel</td>"

					   "<td><b>note</td>",
					   TABLE_STYLE,
					   DARK_BLUE  );

		for ( int32_t i = 0; i < g_hostdb.m_numSpareHosts; i++ ) {
			// get the ith host (hostId)
			Host *h = g_hostdb.getSpare ( i );

			char ipbuf1[64];
			char ipbuf2[64];
			strcpy(ipbuf1,iptoa(h->m_ip));
			strcpy(ipbuf2,iptoa(h->m_ipShotgun));

			// print it
			sb.safePrintf (
						   "<tr bgcolor=#%s>"
						   "<td>%" PRId32"</td>"
						   "<td>%s</td>"
						   "<td>%s</td>"
						   "<td>%s</td>"
						   //"<td>%hi</td>"
						   //"<td>%hi</td>" // priority udp port
						   //"<td>%hi</td>"
						   "<td>%hi</td>"
						   //"<td>%i</td>" // switch id
						   "<td>%s</td>"
						   "</tr>" , 
						   LIGHT_BLUE,
						   i , 
						   h->m_hostname,
						   ipbuf1,
						   ipbuf2,
						   //h->m_port , 
						   //h->m_port2 , 
						   //h->m_dnsClientPort ,
						   h->m_httpPort ,
						   //h->m_switchId,
						   h->m_note );
		}
		sb.safePrintf ( "</table><br>" );
	}



	/*
	// print proxy hosts table
	sb.safePrintf ( 
		  "<table %s>"
		  "<tr class=hdrow><td colspan=12><center>"
		  //"<font size=+1>"
		  "<b>Proxies</b>"
		  //"</font>"
		  "</td></tr>" 
		  "<tr bgcolor=#%s>"
		  "<td><b>proxyId</b></td>"
		  "<td><b>type</b></td>"
		  "<td><b>host name</b></td>"
		  "<td><b>ip1</b></td>"
		  "<td><b>ip2</b></td>"
		  //"<td><b>udp port</td>"

		  //"<td><b>priority udp port</td>"

		  //"<td><b>dns client port</td>"
		  "<td><b>http port</b></td>"
		  //"<td><b>switch id</td>"
                  "<td><b>max ping1</b></td>"
                  "<td><b>ping1 age</b></td>"
                  "<td><b>ping1</b></td>"
		  //"<td><b>ping2</b></td>"
		  // this is now fairly obsolete
		  //"<td><b>ide channel</td>"

		  "<td><b>note</td>",
		  TABLE_STYLE,
		  DARK_BLUE 
			);
	for ( int32_t i = 0; i < g_hostdb.m_numProxyHosts; i++ ) {
		// get the ith host (hostId)
		Host *h = g_hostdb.getProxy ( i );

                char ptr[256];
                int32_t pingAge = generatePingMsg(h, nowmsLocal, ptr);

		char ipbuf1[64];
		char ipbuf2[64];
		strcpy(ipbuf1,iptoa(h->m_ip));
		strcpy(ipbuf2,iptoa(h->m_ipShotgun));

		// host can have 2 ip addresses, get the one most
		// similar to that of the requester
		int32_t eip = g_hostdb.getBestIp ( h , s->m_ip );
		char ipbuf3[64];
		strcpy(ipbuf3,iptoa(eip));


		char pms[64];
		if ( h->m_pingMax < 0 ) sprintf(pms,"???");
		else                    sprintf(pms,"%" PRId32"ms",h->m_pingMax);
		// the sync status ascii-ized

		char *type = "proxy";
		if ( h->m_type == HT_QCPROXY ) type = "qcproxy";
		if ( h->m_type == HT_SCPROXY ) type = "scproxy";

		// print it
		sb.safePrintf (
			  "<tr bgcolor=#%s>"

			  "<td><a href=\"http://%s:%hi/admin/hosts?"
			  ""
			  "c=%s\">"
			  "%" PRId32"</a></td>"

			  "<td>%s</td>"
			  "<td>%s</td>"
			  "<td>%s</td>"
			  "<td>%s</td>"
			  //"<td>%hi</td>"
			  //"<td>%hi</td>" // priority udp port
			  //"<td>%hi</td>"
			  "<td>%hi</td>"
			  //"<td>%i</td>" // switch id
			  "<td>%s</td>" // ping max
			  "<td>%" PRId32"ms</td>" // ping age
			  "<td>%s</td>" // ping
			  //"<td>%" PRId32"</td>" // ide channel
			  "<td>%s </td>"
			  "</tr>" , 

			  LIGHT_BLUE,
			  ipbuf3,
			  h->m_httpPort,
			  cs,
			  i , 

			  type,
			  h->m_hostname,
			  ipbuf1,
			  ipbuf2,
			  //h->m_port , 
			  //h->m_port2 , 
			  //h->m_dnsClientPort ,
			  h->m_httpPort ,
			  //h->m_switchId,
			  pms,
                          pingAge,
                          ptr,
			  //h->m_ideChannel ,
			  h->m_note );
	}
	sb.safePrintf ( "</table><br><br>" );
	*/

	sb.safePrintf(
		      "<style>"
		      ".poo { background-color:#%s;}\n"
		      "</style>\n" ,
		      LIGHT_BLUE );


	// print help table
	sb.safePrintf ( 
		  "<table %s>"
		  "<tr class=hdrow><td colspan=10><center>"
		  //"<font size=+1>"
		  "<b>Key</b>"
		  //"</font>"
		  "</td></tr>" 

		  "<tr class=poo>"
		  "<td>host ip</td>"
		  "<td>The primary IP address of the host."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>shard</td>"
		  "<td>"
		  "The index is split into shards. Which shard does this "
		  "host serve?"
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>mirror</td>"
		  "<td>"
		  "A shard can be mirrored multiple times for "
		  "data redundancy."
		  "</td>"
		  "</tr>\n"

		  /*
		  "<tr class=poo>"
		  "<td>ip2</td>"
		  "<td>The secondary IP address of the host."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>udp port</td>"
		  "<td>The UDP port the host uses to send and recieve "
		  "datagrams."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>dns client port</td>"
		  "<td>The UDP port used to send and receive dns traffic with."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>http port</td>"
		  "<td>The port you can connect a browser to."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>best switch id</td>"
		  "<td>The host prefers to be on this switch because it "
		  "needs to send a lot of data to other hosts on this swtich. "
		  "Therefore, ideally, the best switch id should match the "
		  "actual switch id for optimal performance."
		  "</td>"
		  "</tr>\n"
		  */

		  /*
		  "<tr class=poo>"
		  "<td>switch id</td>"
		  "<td>Hosts that share the same switch id are "
		  "physically on the same switch."
		  "</td>"
		  "</tr>\n"
		  */

		  "<tr class=poo>"
		  "<td>dgrams resent</td>"
		  "<td>How many datagrams have had to be resent to a host "
		  "because it was not ACKed quick enough or because it was "
		  "fully ACKed but the entire request was resent in case "
		  "the host was reset."
		  "</td>"
		  "</tr>\n"

		  /*
		  "<tr class=poo>"
		  "<td>errors recvd</td>"
		  "<td>How many errors were received from a host in response "
		  "to a request to retrieve or insert data."
		  "</td>"
		  "</tr>\n"
		  */

		  "<tr class=poo>"
		  "<td>try agains recvd</td>"
		  "<td>How many ETRYAGAIN errors "
		  "were received in response to a "
		  "request to add data. Usually because the host's memory "
		  "is full and it is dumping its data to disk. This number "
		  "can be high if the host if failing to dump the data "
		  "to disk because of some malfunction, and it can therefore "
		  "bottleneck the entire cluster."
		  "</td>"
		  "</tr>\n"

		  /*
		  "<tr class=poo>"
		  "<td>dgrams to</td>"
		  "<td>How many datagrams were sent to the host from the "
		  "selected host since startup. Includes ACK datagrams. This "
		  "can actually be higher than the number of dgrams read "
		  "when the selected host is the same as the host in the "
		  "table because of resends. Gigablast will resend datagrams "
		  "that are not promptly ACKknowledged."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>dgrams from</td>"
		  "<td>How many datagrams were received from the host by the "
		  "selected host since startup. Includes ACK datagrams."
		  "</td>"
		  "</tr>\n"
		  */

		  "<tr class=poo>"
		  "<td>avg split time</td>"
		  "<td>Average time this host took to compute the docids "
		  "for a query. Useful for guaging the slowness of a host "
		  "compare to other hosts."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>splits done</td>"
		  "<td>Number of queries this host completed. Used in "
		  "computation of the <i>avg split time</i>."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>status</td>"
		  "<td>Status flags for the host. See key below."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>slow reads</td>"
		  "<td>Number of slow disk reads the host has had. "
		  "When this is big compared to other hosts it is a good "
		  "indicator its drives are relatively slow."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>docs indexed</td>"
		  "<td>Number of documents this host has indexed over all "
		  "collections. All hosts should have close to the same "
		  "number in a well-sharded situation."
		  "</td>"
		  "</tr>\n"

		  //"<tr class=poo>"
		  //"<td>loadavg</td>"
		  //"<td>1-minute sliding-window load average from "
		  //"/proc/loadavg."
		  //"</td>"
		  //"</tr>\n"

		  "<tr class=poo>"
		  "<td>mem used</td>"
		  "<td>Percentage of memory currently used."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>cpu used</td>"
		  "<td>Percentage of cpu resources in use by the gb process."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>disk used</td>"
		  "<td>Percentage of disk in use. When this gets close to "
		  "100%% you need to do something."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>max ping1</td>"
		  "<td>The worst ping latency from host to host."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>ping1 age</td>"
		  "<td>How long ago the last ping request was sent to "
		  "this host. Let's us know how fresh the ping time is."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>ping1</td>"
		  "<td>Ping time to this host on the primary network."
		  "</td>"
		  "</tr>\n"

		  /*
		  "<tr class=poo>"
		  "<td>ping2</td>"
		  "<td>Ping time to this host on the seconday/shotgun "
		  "network. This column is not visible if the shotgun "
		  "network is not enabled in the master controls."
		  "</td>"
		  "</tr>\n"
		  */

		  "<tr class=poo>"
		  "<td>M (status flag)</td>"
		  "<td>Indicates host is merging files on disk."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>D (status flag)</td>"
		  "<td>Indicates host is dumping data to disk."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>S (status flag)</td>"
		  "<td>Indicates host has outstanding spiders."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>y (status flag)</td>"
		  "<td>Indicates host is performing the daily merge."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>R (status flag)</td>"
		  "<td>Indicates host is performing a rebalance operation."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>F (status flag)</td>"
		  "<td>Indicates host has foreign records and requires "
		  "a rebalance operation."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>x (status flag)</td>"
		  "<td>Indicates host has abruptly exited due to a fatal "
		  "error (cored) and "
		  "restarted itself. The exponent is how many times it has "
		  "done this. If no exponent, it only did it once."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>C (status flag)</td>"
		  "<td>Indicates # of corrupted disk reads."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td>K (status flag)</td>"
		  "<td>Indicates # of sockets closed from hitting limit."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td><nobr>O (status flag)</nobr></td>"
		  "<td>Indicates # of times we ran out of memory."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td><nobr>N (status flag)</nobr></td>"
		  "<td>Indicates host's clock is NOT in sync with host #0. "
		  "Gigablast should automatically sync on startup, "
		  "so this would be a problem "
		  "if it does not go away. Hosts need to have their clocks "
		  "in sync before they can add data to their index."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td><nobr>U (status flag)</nobr></td>"
		  "<td>Indicates the number of active UDP transactions "
		  "which are incoming requests. These will pile up if a "
		  "host can't handle them fast enough."
		  "</td>"
		  "</tr>\n"

		  "<tr class=poo>"
		  "<td><nobr>T (status flag)</nobr></td>"
		  "<td>Indicates the number of active TCP transactions "
		  "which are either outgoing or incoming requests."
		  "</td>"
		  "</tr>\n"

		  ,
		  TABLE_STYLE
			);

	sb.safePrintf ( "</table><br></form><br>" );

	//p = g_pages.printAdminBottom ( p , pend );

	// calculate buffer length
	//int32_t bufLen = p - buf;
	// . send this page
	// . encapsulates in html header and tail
	// . make a Mime
	return g_httpServer.sendDynamicPage ( s , (char*) sb.getBufStart() ,
						  sb.length() );
}
Esempio n. 19
0
// . slot should be auto-nuked upon transmission or error
// . TODO: ensure if this sendReply() fails does it really nuke the slot?
void gotListWrapper ( void *state , RdbList *listb , Msg5 *msg5xx ) {
	// get the state
	State00 *st0 = (State00 *)state;
	// extract the udp slot and list and msg5
	UdpSlot   *slot =  st0->m_slot;
	RdbList   *list = &st0->m_list;
	Msg5      *msg5 = &st0->m_msg5;
	UdpServer *us   =  st0->m_us;
	// sanity check -- ensure they match
	//if ( niceness != st0->m_niceness )
	//	log("Msg0: niceness mismatch");
	// debug msg
	//if ( niceness != 0 ) 
	//	log("HEY! niceness is not 0");
	// timing debug
	if ( g_conf.m_logTimingNet || g_conf.m_logDebugNet ) {
		//log("Msg0:hndled request %"UINT64"",gettimeofdayInMilliseconds());
		int32_t size = -1;
		if ( list ) size     = list->getListSize();
		log(LOG_TIMING|LOG_DEBUG,
		    "net: msg0: Handled request for data. "
		    "Now sending data termId=%"UINT64" size=%"INT32""
		    " transId=%"INT32" ip=%s port=%i took=%"INT64" "
		    "(niceness=%"INT32").",
		    g_posdb.getTermId(msg5->m_startKey),
		    size,slot->m_transId,
		    iptoa(slot->m_ip),slot->m_port,
		    gettimeofdayInMilliseconds() - st0->m_startTime ,
		    st0->m_niceness );
	}
	// debug
	//if ( ! msg5->m_includeTree )
	//	log("hotit\n");
	// on error nuke the list and it's data
	if ( g_errno ) {
		mdelete ( st0 , sizeof(State00) , "Msg0" );
		delete (st0);
		// TODO: free "slot" if this send fails
		us->sendErrorReply ( slot , g_errno );
		return;
	}

	QUICKPOLL(st0->m_niceness);
	// point to the serialized list in "list"
	char *data      = list->getList();
	int32_t  dataSize  = list->getListSize();
	char *alloc     = list->getAlloc();
	int32_t  allocSize = list->getAllocSize();
	// tell list not to free the data since it is a reply so UdpServer
	// will free it when it destroys the slot
	list->setOwnData ( false );
	// keep track of stats
	Rdb *rdb = getRdbFromId ( st0->m_rdbId );
	if ( rdb ) rdb->sentReplyGet ( dataSize );
	// TODO: can we free any memory here???

	// keep track of how long it takes to complete the send
	st0->m_startTime = gettimeofdayInMilliseconds();
	// debug point
	int32_t oldSize = msg5->m_minRecSizes;
	int32_t newSize = msg5->m_minRecSizes + 20;
	// watch for wrap around
	if ( newSize < oldSize ) newSize = 0x7fffffff;
	if ( dataSize > newSize && list->getFixedDataSize() == 0 &&
	     // do not annoy me with these linkdb msgs
	     dataSize > newSize+100 ) 
		log(LOG_LOGIC,"net: msg0: Sending more data than what was "
		    "requested. Ineffcient. Bad engineer. dataSize=%"INT32" "
		    "minRecSizes=%"INT32".",dataSize,oldSize);
	/*
	// always compress these lists
	if ( st0->m_rdbId == RDB_SECTIONDB ) { // && 1 == 3) {

		// get sh48, the sitehash
		key128_t *startKey = (key128_t *)msg5->m_startKey ;
		int64_t sh48 = g_datedb.getTermId(startKey);

		// debug
		//log("msg0: got sectiondblist from disk listsize=%"INT32"",
		//    list->getListSize());

		if ( dataSize > 50000 )
			log("msg0: sending back list rdb=%"INT32" "
			    "listsize=%"INT32" sh48=0x%"XINT64"",
			    (int32_t)st0->m_rdbId,
			    dataSize,
			    sh48);

		// save it
		int32_t origDataSize = dataSize;
		// store compressed list on itself
		char *dst = list->m_list;
		// warn if niceness is 0!
		if ( st0->m_niceness == 0 )
			log("msg0: compressing sectiondb list at niceness 0!");
		// compress the list
		uint32_t lastVoteHash32 = 0LL;
		SectionVote *lastVote = NULL;
		for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) {
			// breathe
			QUICKPOLL ( st0->m_niceness );
			// get rec
			char *rec = list->getCurrentRec();
			// for ehre
			key128_t *key = (key128_t *)rec;
			// the score is the bit which is was set in 
			// Section::m_flags for that docid
			int32_t secType = g_indexdb.getScore ( (char *)key );
			// 0 means it probably used to count # of voters
			// from this site, so i don't think xmldoc uses
			// that any more
			if ( secType == SV_SITE_VOTER ) continue;
			// treat key like a datedb key and get the taghash
			uint32_t h32 = g_datedb.getDate ( key );
			// get data/vote from the current record in the 
			// sectiondb list
			SectionVote *sv=(SectionVote *)list->getCurrentData ();
			// get the average score for this doc
			float avg = sv->m_score ;
			if ( sv->m_numSampled > 0.0 ) avg /= sv->m_numSampled;
			// if same as last guy, add to it
			if ( lastVoteHash32 == h32 && lastVote ) {
				// turn possible multi-vote into single docid
				// into a single vote, with the score averaged.
				lastVote->m_score += avg;
				lastVote->m_numSampled++;
				continue;
			}
			// otherwise, add in a new guy!
			*(key128_t *)dst = *key;
			dst += sizeof(key128_t);
			// the new vote
			SectionVote *dsv = (SectionVote *)dst;
			dsv->m_score = avg;
			dsv->m_numSampled = 1;
			// set this
			lastVote = dsv;
			lastVoteHash32 = h32;
			// skip over
			dst += sizeof(SectionVote);
		}
		// update the list size now for sending back
		dataSize = dst - data;
		// if the list was over the requested minrecsizes we need
		// to set a flag so that the caller will do a re-call.
		// so making the entire odd, will be the flag.
	        if ( origDataSize > msg5->m_minRecSizes && 
		     dataSize < origDataSize ) {
			*dst++ = '\0';
			dataSize++;
		}

		// debug
		//log("msg0: compressed sectiondblist from disk "
		//    "newlistsize=%"INT32"", dataSize);
		
		// use this timestamp
		int32_t now = getTimeLocal();//Global();
		// finally, cache this sucker
		s_sectiondbCache.addRecord ( msg5->m_coll,
					     (char *)startKey,//(char *)&sh48
					     data, 
					     dataSize ,
					     now );
		// ignore errors
		g_errno = 0;
	}
	*/
		    
	//
	// for linkdb lists, remove all the keys that have the same IP32
	// and store a count of what we removed somewhere
	//
	if ( st0->m_rdbId == RDB_LINKDB ) {
		// store compressed list on itself
		char *dst = list->m_list;
		// keep stats
		int32_t totalOrigLinks = 0;
		int32_t ipDups = 0;
		int32_t lastIp32 = 0;
		char *listEnd = list->getListEnd();
		// compress the list
		for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) {
			// breathe
			QUICKPOLL ( st0->m_niceness );
			// count it
			totalOrigLinks++;
			// get rec
			char *rec = list->getCurrentRec();
			int32_t ip32 = g_linkdb.getLinkerIp_uk((key224_t *)rec );
			// same as one before?
			if ( ip32 == lastIp32 && 
			     // are we the last rec? include that for
			     // advancing the m_nextKey in Linkdb more 
			     // efficiently.
			     rec + LDBKS < listEnd ) {
				ipDups++;
				continue;
			}
			// store it
			gbmemcpy (dst , rec , LDBKS );
			dst += LDBKS;
			// update it
			lastIp32 = ip32;
		}
		// . if we removed one key, store the stats
		// . caller should recognize reply is not a multiple of
		//   the linkdb key size LDBKS and no its there!
		if ( ipDups ) {
			//*(int32_t *)dst = totalOrigLinks;
			//dst += 4;
			//*(int32_t *)dst = ipDups;
			//dst += 4;
		}
		// update list parms
		list->m_listSize = dst - list->m_list;
		list->m_listEnd  = list->m_list + list->m_listSize;
		data      = list->getList();
		dataSize  = list->getListSize();
	}


	//log("sending replySize=%"INT32" min=%"INT32"",dataSize,msg5->m_minRecSizes);
	// . TODO: dataSize may not equal list->getListMaxSize() so
	//         Mem class may show an imblanace
	// . now g_udpServer is responsible for freeing data/dataSize
	// . the "true" means to call doneSending_ass() from the signal handler
	//   if need be
	st0->m_us->sendReply_ass  ( data            ,
				    dataSize        ,
				    alloc           , // alloc
				    allocSize       , // alloc size
				    slot            ,
				    60              ,
				    st0             ,
				    doneSending_ass ,
				    -1              ,
				    -1              ,
				    true            );
}	
void Syncdb::syncStart_r ( bool amThread ) {

	// turn this off
	g_process.m_suspendAutoSave = true;

	char cmd[1024];
	// get synchost best ip
	char *ips = iptoa ( g_hostdb.getAliveIp ( g_hostdb.m_syncHost ) );
	// his dir
	char *dir = g_hostdb.m_syncHost->m_dir;
	// use
	Host *me = g_hostdb.m_myHost;
	// ours
	char *mydir = me->m_dir;
	// generic
	long err;

	// loop over every rdb and every data and map file in each rdb
	for ( long i = 0 ; i < RDB_END ; i++ ) {

	// skip SYNCDB
	if  ( i == RDB_SYNCDB ) continue;
	// get that rdb
	Rdb *rdb = getRdbFromId ( i );
	// skip if none
	if ( ! rdb ) continue;

	// get coll
	for ( long j = 0 ; j < rdb->getNumBases() ; j++ ) {

		// get that base
		RdbBase *base = rdb->getBase(j);//m_bases[j];
		if ( ! base ) continue;

	// get coll
	char *coll = base->m_coll;
	// and num
	long collnum = base->m_collnum;
	// make the dir
	sprintf ( cmd , "ssh %s 'mkdir %scoll.%s.%li'",
		  ips,dir,coll,collnum);
	// excecute
	log ( LOG_INFO, "sync: %s", cmd );
	//int err = my_system_r ( cmd, 3600*24 );
	//if ( err != 0 ) goto hadError;

	// copy the files
	for ( long k = 0 ; k < base->m_numFiles ; k++ ) {

	// sleep while dumping. we are in a thread.
	if ( base->isDumping() ) sleep ( 1 );


	// get map
	RdbMap *map = base->m_maps[k];
	// copy the map file
	sprintf ( cmd , "rcp %s %s:%scoll.%s.%li/'",
		  map->getFilename(),ips,dir,coll,collnum);
	log ( LOG_INFO, "sync: %s", cmd );
	if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError;
	
	// get the file
	BigFile *f = base->m_files[k];

	// loop over each little part file
	for ( long m = 0 ; m < f->m_numParts ; m++ ) {

	// get part file
	File *p = f->m_files[m];
	// copy that
	sprintf ( cmd , "rcp %s %s:%scoll.%s.%li/'",
		  p->m_filename,ips,dir,coll,collnum);
	// excecute
	log ( LOG_INFO, "sync: %s", cmd );
	if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError;

	}
	}
	}
	}

	// make the dirs
	sprintf ( cmd , "ssh %s '"
		  "mkdir %s/dict/ ;"
		  "mkdir %s/dict/en/ ;"
		  "mkdir %s/ucdata/ ;"
		  "mkdir %s/.antiword/ ;"
		  "'" ,
		  ips,
		  dir,
		  dir,
		  dir,
		  dir
		  );
	// excecute
	log ( LOG_INFO, "sync: %s", cmd );
	if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError;


	// loop over the files in Process.cpp
	for ( long i = 0 ; i < 99999 ; i++ ) {
		// null means end
		if ( ! g_files[i] ) break;
		sprintf ( cmd , "rcp %s%s %s:%s",
			  mydir,g_files[i],ips,dir);
		// excecute
		log ( LOG_INFO, "sync: %s", cmd );
		if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError;
	}

	// new guy is NOT in sync
	sprintf ( cmd , "ssh %s 'echo 0 > %sinsync.dat", ips,dir);
	// excecute
	log ( LOG_INFO, "sync: %s", cmd );
	if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError;

	// saved files
	sprintf ( cmd , "rcp %s*-saved.dat %s:%sinsync.dat", 
		  mydir,ips,dir);
	// excecute
	log ( LOG_INFO, "sync: %s", cmd );
	if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError;
	
	// completed!
	return;

 hadError:
	log ( "sync: Call to system(\"%s\") had error %s.",cmd,strerror(err));
	g_hostdb.m_syncHost->m_doingSync = 0;
	g_hostdb.m_syncHost              = NULL;
	return;
}
void printUdpTable ( SafeBuf *p, char *title, UdpServer *server ,
		     char *coll, char *pwd , long fromIp ,
		     bool isDns ) {
	if ( ! coll ) coll = "main";
	//if ( ! pwd  ) pwd  = "";

	// time now
	long long now = gettimeofdayInMilliseconds();
	// get # of used nodes
	//long n = server->getTopUsedSlot();
	// store in buffer for sorting
	long     times[50000];//MAX_UDP_SLOTS];
	UdpSlot *slots[50000];//MAX_UDP_SLOTS];
	long nn = 0;
	for ( UdpSlot *s = server->getActiveHead() ; s ; s = s->m_next2 ) {
		if ( nn >= 50000 ) {
			log("admin: Too many udp sockets.");
			break;
		}
		// if empty skip it
		//if ( server->isEmpty ( i ) ) continue;
		// get the UdpSlot
		//UdpSlot *s = server->getUdpSlotNum(i);
		// if data is NULL that's an error
		//if ( ! s ) continue;
		// store it
		times[nn] = now - s->m_startTime;
		slots[nn] = s;
		nn++;
	}
	// bubble sort
 keepSorting:
	// assume no swap will happen
	bool didSwap = false;
	for ( long i = 1 ; i < nn ; i++ ) {
		if ( times[i-1] >= times[i] ) continue;
		long     tmpTime = times[i-1];
		UdpSlot *tmpSlot = slots[i-1]; 
		times[i-1] = times[i];
		slots[i-1] = slots[i];
		times[i  ] = tmpTime;
		slots[i  ] = tmpSlot;
		didSwap = true;
	}
	if ( didSwap ) goto keepSorting;

	// count how many of each msg we have
	long msgCount0[96];
	long msgCount1[96];
	for ( long i = 0; i < 96; i++ ) {
		msgCount0[i] = 0;
		msgCount1[i] = 0;
	}
	for ( long i = 0; i < nn; i++ ) {
		UdpSlot *s = slots[i];
		if ( s->m_msgType >= 96 ) continue;
		if ( s->m_niceness == 0 )
			msgCount0[s->m_msgType]++;
		else
			msgCount1[s->m_msgType]++;
	}
	// print the counts
	p->safePrintf ( "<table %s>"
			"<tr class=hdrow><td colspan=19>"
			"<center>"
			"<b>%s Summary</b> (%li transactions)"
			"</td></tr>"
			"<tr bgcolor=#%s>"
			"<td><b>niceness</td>"
			"<td><b>msg type</td>"
			"<td><b>total</td>"
			"</tr>",
			TABLE_STYLE,
			title , server->getNumUsedSlots() ,
			DARK_BLUE );
	for ( long i = 0; i < 96; i++ ) {
		if ( msgCount0[i] <= 0 ) continue;
		p->safePrintf("<tr bgcolor=#%s>"
			      "<td>0</td><td>0x%lx</td><td>%li</td></tr>",
			      LIGHT_BLUE,i, msgCount0[i]);
	}
	for ( long i = 0; i < 96; i++ ) {
		if ( msgCount1[i] <= 0 ) continue;
		p->safePrintf("<tr bgcolor=#%s>"
			      "<td>1</td><td>0x%lx</td><td>%li</td></tr>",
			      LIGHT_BLUE,i, msgCount1[i]);
	}
	p->safePrintf ( "</table><br>" );

	char *dd = "";
	if ( ! isDns ) 
		dd =    "<td><b>msgType</td>"
			"<td><b>desc</td>"
			"<td><b>hostId</td>";
	else {
		dd = //"<td><b>dns ip</b></td>"
		     "<td><b>hostname</b></td>";
	}

	p->safePrintf ( "<table %s>"
			"<tr class=hdrow><td colspan=19>"
			"<center>"
			//"<font size=+1>"
			"<b>%s</b> (%li transactions)"
			//"</font>"
			"</td></tr>"
			"<tr bgcolor=#%s>"
			"<td><b>age</td>"
			"<td><b>last read</td>"
			"<td><b>last send</td>"
			"<td><b>timeout</td>"
			"<td><b>ip</td>"
			//"<td><b>port</td>"
			//"<td><b>desc</td>"
			//"<td><b>hostId</td>"
			//"<td><b>nice</td>";
			"%s"
			"<td><b>nice</td>"
			"<td><b>transId</td>"
			"<td><b>called</td>"
			"<td><b>dgrams read</td>"
			"<td><b>dgrams to read</td>"
			"<td><b>acks sent</td>"
			"<td><b>dgrams sent</td>"
			"<td><b>dgrams to send</td>"
			"<td><b>acks read</td>"
			"<td><b>resends</td>"
			"</tr>\n" , 
			TABLE_STYLE,
			title , server->getNumUsedSlots() , 
			DARK_BLUE ,
			dd );


	// now fill in the columns
	for ( long i = 0 ; i < nn ; i++ ) {
		// get from sorted list
		UdpSlot *s = slots[i];
		// set socket state
		//char *st = "ERROR";
		//if ( ! s->isDoneReading() ) st = "reading";
		//if ( ! s->isDoneSending() ) st = "reading";
		// times
		long long elapsed0 = (now - s->m_startTime    ) ;
		long long elapsed1 = (now - s->m_lastReadTime ) ;
		long long elapsed2 = (now - s->m_lastSendTime ) ;
		char e0[32],e1[32], e2[32];
		sprintf ( e0 , "%llims" , elapsed0 );
		sprintf ( e1 , "%llims" , elapsed1 );
		sprintf ( e2 , "%llims" , elapsed2 );
		if ( s->m_startTime    == 0LL ) strcpy ( e0 , "--" );
		if ( s->m_lastReadTime == 0LL ) strcpy ( e1 , "--" );
		if ( s->m_lastSendTime == 0LL ) strcpy ( e2 , "--" );
		// bgcolor is lighter for incoming requests
		char *bg = LIGHT_BLUE;//"c0c0f0";
		// is it incoming
		if ( ! s->m_callback ) bg = LIGHTER_BLUE;//"e8e8ff";
		Host *h = g_hostdb.getHost ( s->m_ip , s->m_port );
		char           *eip     = "??";
		unsigned short  eport   =  0 ;
		//long          ehostId = -1 ;
		char           *ehostId = "-1";
		//char tmpIp    [64];
		// print the ip

		char tmpHostId[64];
		if ( h ) {
			// host can have 2 ip addresses, get the one most
			// similar to that of the requester
			eip     = iptoa(g_hostdb.getBestIp ( h , fromIp ));
			//eip     = iptoa(h->m_externalIp) ;
			//eip     = iptoa(h->m_ip) ;
			eport   = h->m_externalHttpPort ;
			//ehostId = h->m_hostId ;
			if ( h->m_isProxy )
				sprintf(tmpHostId,"proxy%li",h->m_hostId);
			else
				sprintf(tmpHostId,"%li",h->m_hostId);
			ehostId = tmpHostId;
		}
		// if no corresponding host, it could be a request from an external
		// cluster, so just show the ip
		else {
		        sprintf ( tmpHostId , "%s" , iptoa(s->m_ip) );
			ehostId = tmpHostId;
			eip     = tmpHostId;
		}
		// set description of the msg
		long msgType        = s->m_msgType;
		char *desc          = "";
		char *rbuf          = s->m_readBuf;
		char *sbuf          = s->m_sendBuf;
		long  rbufSize      = s->m_readBufSize;
		long  sbufSize      = s->m_sendBufSize;
		bool  weInit        = s->m_callback;
		char  calledHandler = s->m_calledHandler;
		if ( weInit ) calledHandler = s->m_calledCallback;
		char *buf     = NULL;
		long  bufSize = 0;
		char tt [ 64 ];
		if ( msgType == 0x00 &&   weInit ) buf = sbuf;
		if ( msgType == 0x00 && ! weInit ) buf = rbuf;
		if ( msgType == 0x01 &&   weInit ) buf = sbuf;
		if ( msgType == 0x01 && ! weInit ) buf = rbuf;
		// . if callback was called this slot's sendbuf can be bogus
		// . i put this here to try to avoid a core dump
		if ( msgType == 0x13 &&   weInit && ! s->m_calledCallback ) {
			buf = sbuf; bufSize = sbufSize; }
		if ( msgType == 0x13 && ! weInit ) {
			buf = rbuf; bufSize = rbufSize; }
		if ( buf ) {
			long rdbId = -1;
			if (msgType == 0x01) rdbId = buf[0];
			//else               rdbId = buf[8+sizeof(key_t)*2+16];
			else                 rdbId = buf[24];
			Rdb *rdb = NULL;
			if ( rdbId >= 0 && ! isDns ) 
				rdb = getRdbFromId ((uint8_t)rdbId );
			char *cmd;
			if ( msgType == 0x01 ) cmd = "add to";
			else                   cmd = "get from";
			tt[0] = ' '; tt[1]='\0';
			if ( rdb ) sprintf ( tt , "%s %s" ,
					     cmd,rdb->m_dbname );
			desc = tt;
		}
		if ( msgType == 0x10 ) desc = "add links";
		if ( msgType == 0x0c ) desc = "getting ip";
		if ( msgType == 0x0d ) desc = "get outlink ips/qualities";
		if ( msgType == 0x11 ) desc = "ping";
		if ( msgType == 0x12 ) desc = "get lock";
		if ( msgType == 0x06 ) desc = "spider lock";
		if ( msgType == 0x04 ) desc = "meta add";
		if ( msgType == 0x13 ) {
			char isRobotsTxt = 1;
			if ( buf && bufSize >= 
			     (long)sizeof(Msg13Request)-(long)MAX_URL_LEN ) {
				Msg13Request *r = (Msg13Request *)buf;
				isRobotsTxt = r->m_isRobotsTxt;
			}
			if ( isRobotsTxt ) desc = "get robots.txt";
			else               desc = "get web page";
		}
		if ( msgType == 0x09 ) desc = "add site";
		if ( msgType == 0x08 ) desc = "get site";
		if ( msgType == 0x8b ) desc = "get catid";
		if ( msgType == 0x34 ) desc = "get load";
		if ( msgType == 0x02 ) desc = "get lists";
		if ( msgType == 0x22 ) desc = "get titlerec";
		if ( msgType == 0x36 ) desc = "get termFreq";
		if ( msgType == 0x20 ) desc = "get summary";
		if ( msgType == 0x2c ) desc = "get address";
		if ( msgType == 0x24 ) desc = "get gigabits";
		if ( msgType == 0x39 ) desc = "get docids";
		if ( msgType == 0x17 ) desc = "cache access";
		if ( msgType == 0x23 ) desc = "get linktext";
		if ( msgType == 0x07 ) desc = "inject";
		if ( msgType == 0x35 ) desc = "merge token";
		if ( msgType == 0x3b ) desc = "get docid score";
		if ( msgType == 0x50 ) desc = "get root quality";
		if ( msgType == 0x25 ) desc = "get link info";
		if ( msgType == 0xfd ) desc = "proxy forward";
		
		p->safePrintf ( "<tr bgcolor=#%s>"
				"<td>%s</td>"  // age
				"<td>%s</td>"  // last read
				"<td>%s</td>"  // last send
				"<td>%li</td>",  // timeout
				bg ,
				e0 ,
				e1 ,
				e2 ,
				s->m_timeout );

		// now use the ip for dns and hosts
		p->safePrintf("<td>%s:%lu</td>",
			      iptoa(s->m_ip),(long)s->m_port);

		char *cf1 = "";
		char *cf2 = "";
		if ( s->m_convertedNiceness ) {
			cf1 = "<font color=red>";
			cf2 = "</font>";
		}

		if ( isDns ) {
			//p->safePrintf("<td>%s</td>",iptoa(s->m_ip));
			char *hostname = (char *)s->m_tmpVar;
			p->safePrintf("<td><nobr>%s"
				      ,hostname);
			// get the domain from the hostname
			long dlen;
			char *dbuf = ::getDomFast ( hostname,&dlen,false);
			p->safePrintf(
			      " <a href=\"/admin/tagdb?"
			      "user=admin&"
			      "tagtype0=manualban&"
			      "tagdata0=1&"
			      "u=%s&c=%s\">"
			      "[<font color=red><b>BAN %s</b></font>]"
			      "</nobr></a> " ,
			      dbuf , coll , dbuf );
			p->safePrintf("</td>"
				      "<td>%s%li%s</td>",
				      cf1,
				      (long)s->m_niceness,
				      cf2);
		}

		if ( ! isDns ) {
			//"<td>%s</td>"  // ip
			//"<td>%hu</td>" // port
			// clickable hostId
			char *toFrom = "to";
			if ( ! s->m_callback ) toFrom = "from";
			//"<td><a href=http://%s:%hu/cgi/15.cgi>%li</a></td>"
			p->safePrintf (	"<td>0x%hhx</td>"  // msgtype
					"<td><nobr>%s</nobr></td>"  // desc
					"<td><nobr>%s <a href=http://%s:%hu/"
					"master/sockets?"
					"c=%s>%s</a></nobr></td>"
					"<td>%s%li%s</td>" , // niceness
					s->m_msgType ,
					desc,
					//iptoa(s->m_ip) ,
					//s->m_port ,
					// begin clickable hostId
					toFrom,
					eip     ,
					eport   ,
					coll ,
					ehostId ,
					cf1,
					(long)s->m_niceness,
					cf2
					// end clickable hostId
					);
		}

		p->safePrintf ( "<td>%lu</td>" // transId
				"<td>%i</td>" // called handler
				"<td>%li</td>" // dgrams read
				"<td>%li</td>" // dgrams to read
				"<td>%li</td>" // acks sent
				"<td>%li</td>" // dgrams sent
				"<td>%li</td>" // dgrams to send
				"<td>%li</td>" // acks read
				"<td>%hhu</td>" // resend count
				"</tr>\n" ,
				s->m_transId,
				calledHandler,
				s->getNumDgramsRead() ,
				s->m_dgramsToRead ,
				s->getNumAcksSent() ,
				s->getNumDgramsSent() ,
				s->m_dgramsToSend ,
				s->getNumAcksRead() ,
				s->m_resendCount );
	}
	// end the table
	p->safePrintf ("</table><br>\n" );
}
// . this is called
// . destroys the UdpSlot if false is returned
static void handleRequest20(UdpSlot *slot, int32_t netnice) {
	// . check g_errno
	// . before, we were not sending a reply back here and we continued
	//   to process the request, even though it was empty. the slot
	//   had a NULL m_readBuf because it could not alloc mem for the read
	//   buf i'm assuming. and the slot was saved in a line below here...
	//   state20->m_msg22.m_parent = slot;
	if ( g_errno ) {
		log(LOG_WARN, "net: Msg20 handler got error: %s.",mstrerror(g_errno));
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		g_udpServer.sendErrorReply ( slot , g_errno );
		return;
	}

	// ensure request is big enough
	if ( slot->m_readBufSize < (int32_t)sizeof(Msg20Request) ) {
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. Bad request size", __FILE__, __func__, __LINE__);
		g_udpServer.sendErrorReply ( slot , EBADREQUESTSIZE );
		return;
	}

	// parse the request
	Msg20Request *req = (Msg20Request *)slot->m_readBuf;

	// . turn the string offsets into ptrs in the request
	// . this is "destructive" on "request"
	int32_t nb = req->deserialize();
	// sanity check
	if ( nb != slot->m_readBufSize ) { g_process.shutdownAbort(true); }

	// sanity check, the size include the \0
	if ( req->m_collnum < 0 ) {
		log(LOG_WARN, "query: Got empty collection in msg20 handler. FIX! "
		    "from ip=%s port=%i",iptoa(slot->getIp()),(int)slot->getPort());
		    
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		g_udpServer.sendErrorReply ( slot , ENOTFOUND );
		return; 
	}

	int64_t cache_key = req->makeCacheKey();
	const void *cached_summary;
	size_t cached_summary_len;
	if(g_stable_summary_cache.lookup(cache_key, &cached_summary, &cached_summary_len) ||
	   g_unstable_summary_cache.lookup(cache_key, &cached_summary, &cached_summary_len))
	{
		log(LOG_DEBUG, "query: Summary cache hit");
		sendCachedReply(req,cached_summary,cached_summary_len,slot);
		return;
	} else
		log(LOG_DEBUG, "query: Summary cache miss");

	// if it's not stored locally that's an error
	if ( req->m_docId >= 0 && ! Titledb::isLocal ( req->m_docId ) ) {
		log(LOG_WARN, "query: Got msg20 request for non-local docId %" PRId64, req->m_docId);
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		g_udpServer.sendErrorReply ( slot , ENOTLOCAL ); 
		return; 
	}

	// sanity
	if ( req->m_docId == 0 && ! req->ptr_ubuf ) { //g_process.shutdownAbort(true); }
		log( LOG_WARN, "query: Got msg20 request for docid of 0 and no url for "
		    "collnum=%" PRId32" query %s",(int32_t)req->m_collnum,req->ptr_qbuf);

		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		g_udpServer.sendErrorReply ( slot , ENOTFOUND );
		return; 
	}

	int64_t startTime = gettimeofdayInMilliseconds();

	// alloc a new state to get the titlerec
	Msg20State *state;
	try {
		state = new Msg20State(slot,req);
	} catch(...) {
		g_errno = ENOMEM;
		log("query: msg20 new(%" PRId32"): %s", (int32_t)sizeof(XmlDoc),
		    mstrerror(g_errno));
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. error=%s", __FILE__, __func__, __LINE__, mstrerror( g_errno ));
		g_udpServer.sendErrorReply ( slot, g_errno ); 
		return; 
	}
	mnew(state, sizeof(*state), "xd20");

	// ok, let's use the new XmlDoc.cpp class now!
	state->m_xmldoc.setMsg20Request(req);

	// set the callback
	state->m_xmldoc.setCallback(state, gotReplyWrapperxd);

	// set set time
	state->m_xmldoc.m_setTime = startTime;
	state->m_xmldoc.m_cpuSummaryStartTime = 0;

	// . now as for the msg20 reply!
	// . TODO: move the parse state cache into just a cache of the
	//   XmlDoc itself, and put that cache logic into XmlDoc.cpp so
	//   it can be used more generally.
	Msg20Reply *reply = state->m_xmldoc.getMsg20Reply ( );

	// this is just blocked
	if ( reply == (void *)-1 ) return;

	// got it?
	gotReplyWrapperxd (state);
}
// . we call this from Parms.cpp which prints out the proxy related controls
//   and this table below them...
// . allows user to see the stats of each spider proxy
bool printSpiderProxyTable ( SafeBuf *sb ) {

	// only host #0 will have the stats ... so print that link
	if ( g_hostdb.m_myHost->m_hostId != 0 ) {
		Host *h = g_hostdb.getHost(0);
		sb->safePrintf("<br>"
			       "<b>See table on <a href=http://%s:%" PRId32"/"
			       "admin/proxies>"
			       "host #0</a></b>"
			       "<br>"
			       , iptoa(h->m_ip)
			       , (int32_t)(h->getInternalHttpPort())
			       );
		//return true;
	}

	// print host table
	sb->safePrintf ( 
		       "<table %s>"

		       "<tr><td colspan=10><center>"
		       "<b>Spider Proxies "
		       "</b>"
		       "</center></td></tr>" 

		       "<tr bgcolor=#%s>"
		       "<td>"
		       "<b>proxy IP</b></td>"
		       "<td><b>proxy port</b></td>"

		       "<td><b>times used</b></td>"

		       "<td><b># website IPs banning</b></td>"

		       "<td><b>load points</b></td>"

		       "<td><b>currently out</b></td>"

		       // time of last successful download. print "none"
		       // if never successfully used
		       "<td><b>test url last successful download</b></td>"
		       // we fetch a test url every minute or so through
		       // each proxy to ensure it is up. typically this should
		       // be your website so you do not make someone angry.
		       "<td><b>test url last download attempt</b></td>"
		       // print "FAILED" in red if it failed to download
		       "<td><b>test url download took</b></td>"

		       "<td><b>last bytes downloaded</b></td>"

		       "<td><b>last test url error</b></td>"

		       "</tr>"
		       
		       , TABLE_STYLE
		       , DARK_BLUE 
			);

	int32_t now = getTimeLocal();

	// print it
	for ( int32_t i = 0 ; i < s_iptab.getNumSlots() ; i++ ) {
		// skip empty slots
		if ( ! s_iptab.m_flags[i] ) continue;

		SpiderProxy *sp = (SpiderProxy *)s_iptab.getValueFromSlot(i);

		const char *bg = LIGHT_BLUE;
		// mark with light red bg if last test url attempt failed
		if ( sp->m_lastDownloadTookMS == -1 &&
		     sp->m_lastDownloadTestAttemptMS>0 )
			bg = "ffa6a6";

		// or a perm denied error (as opposed to a timeout above)
		if ( sp->m_lastDownloadError )
			bg = "ffa6a6";

		// print it
		sb->safePrintf (
			       "<tr bgcolor=#%s>"
			       "<td>%s</td>" // proxy ip
			       "<td>%" PRIu32"</td>" // port
			       , bg
			       , iptoa(sp->m_ip)
			       , (uint32_t)(uint16_t)sp->m_port
			       );

		sb->safePrintf("<td>%" PRId64"</td>",sp->m_timesUsed);

		int32_t banCount = s_banCountTable.getScore32(sp->m_ip);
		if ( banCount < 0 ) banCount = 0;
		sb->safePrintf("<td>%" PRId32"</td>",banCount);

		int32_t currentLoad;

		// get # times it appears in loadtable
		int32_t np = getNumLoadPoints ( sp , &currentLoad );
		sb->safePrintf("<td>%" PRId32"</td>",np);

		// currently outstanding downloads on this proxy
		sb->safePrintf("<td>%" PRId32"</td>",currentLoad);

		// last SUCCESSFUL download time ago. when it completed.
		int32_t ago = now - sp->m_lastSuccessfulTestMS/1000;
		sb->safePrintf("<td>");
		// like 1 minute ago etc.
		if ( sp->m_lastSuccessfulTestMS <= 0 )
			sb->safePrintf("none");
		else
			printTimeAgo(sb, ago, now, true);
		sb->safePrintf("</td>");

		// last download time ago
		ago = now - sp->m_lastDownloadTestAttemptMS/1000;
		sb->safePrintf("<td>");
		// like 1 minute ago etc.
		if ( sp->m_lastDownloadTestAttemptMS<= 0 )
			sb->safePrintf("none");
		else
			printTimeAgo(sb, ago, now, true);
		sb->safePrintf("</td>");

		// how long to download the test url?
		if ( sp->m_lastDownloadTookMS != -1 )
			sb->safePrintf("<td>%" PRId32"ms</td>",
				       (int32_t)sp->m_lastDownloadTookMS);
		else if ( sp->m_lastDownloadTestAttemptMS<= 0 )
			sb->safePrintf("<td>unknown</td>");
		else
			sb->safePrintf("<td>"
				       "<font color=red>FAILED</font>"
				       "</td>");

		sb->safePrintf("<td>%" PRId32"</td>",sp->m_lastBytesDownloaded);

		if ( sp->m_lastDownloadError )
			sb->safePrintf("<td><font color=red>%s</font></td>",
				       mstrerror(sp->m_lastDownloadError));
		else
			sb->safePrintf("<td>none</td>");

		sb->safePrintf("</tr>\n");
	}

	sb->safePrintf("</table><br>");
	return true;
}
 // . parse an incoming request
 // . return false and set g_errno on error
 // . CAUTION: we destroy "req" by replacing it's last char with a \0
 // . last char must be \n or \r for it to be a proper request anyway
 bool HttpRequest::set ( char *origReq , long origReqLen , TcpSocket *sock ) {
	 // reset number of cgi field terms
	 reset();

	 if ( ! m_reqBuf.reserve ( origReqLen + 1 ) ) {
		 log("http: failed to copy request: %s",mstrerror(g_errno));
		 return false;
	 }

	 // copy it to avoid mangling it
	 m_reqBuf.safeMemcpy ( origReq , origReqLen );
	 // NULL term
	 m_reqBuf.pushChar('\0');

	 m_reqBufValid = true;

	 // and point to that
	 char *req    = m_reqBuf.getBufStart();
	 long  reqLen = m_reqBuf.length() - 1;

	 // save this
	 m_userIP = 0; if ( sock ) m_userIP = sock->m_ip;
	 m_isSSL  = 0; if ( sock ) m_isSSL = (bool)sock->m_ssl;

	 // TcpServer should always give us a NULL terminated request
	 if ( req[reqLen] != '\0' ) { char *xx = NULL; *xx = 0; }
	 
	 // how long is the first line, the primary request
	 long i;
	 // for ( i = 0 ; i<reqLen && i<MAX_REQ_LEN && 
	 //	       req[i]!='\n' && req[i]!='\r'; i++);
	 // . now fill up m_buf, used to log the request
	 // . make sure the url was encoded correctly
	 // . we don't want assholes encoding every char so we can't see what
	 //   url they are submitting to be spidered/indexed
	 // . also, don't de-code encoded ' ' '+' '?' '=' '&' because that would
	 //   change the meaning of the url
	 // . and finally, non-ascii chars that don't display correctly
	 // . this should NULL terminate m_buf, too
	 // . turn this off for now, just try to log a different way
	 // m_bufLen = urlNormCode ( m_buf , MAX_REQ_LEN - 1 , req , i );
	 // ensure it's big enough to be a valid request
	 if ( reqLen < 5 ) { 
		 log("http: got reqlen<5 = %s",req);
		 g_errno = EBADREQUEST; 
		 return false; 
	 }
	 // or if first line too long
	 //if ( i >= 1024 )  { g_errno = EBADREQUEST; return false; }
	 // get the type, must be GET or HEAD
	 if      ( strncmp ( req , "GET "  , 4 ) == 0 ) m_requestType = 0;
	 // these means a compressed reply was requested. use by query
	 // compression proxies.
	 else if ( strncmp ( req , "ZET "  , 4 ) == 0 ) m_requestType = 0;
	 else if ( strncmp ( req , "HEAD " , 5 ) == 0 ) m_requestType = 1;
	 else if ( strncmp ( req , "POST " , 5 ) == 0 ) m_requestType = 2;
	 else { 
		 log("http: got bad request cmd: %s",req);
		 g_errno = EBADREQUEST; 
		 return false; 
	 }
	 // . NULL terminate the request (a destructive operation!)
	 // . this removes the last \n in the trailing \r\n 
	 // . shit, but it f***s up POST requests
	 if ( m_requestType != 2 ) { req [ reqLen - 1 ] = '\0'; reqLen--; }

	 // POST requests can be absolutely huge if you are injecting a 100MB
	 // file, so limit our strstrs to the end of the mime
	 char *d = NULL;
	 char  dc;
	 // check for body if it was a POST request
	 if ( m_requestType == 2 ) {
		 d = strstr ( req , "\r\n\r\n" );
		 if ( d ) { dc = *d; *d = '\0'; }
		 else log("http: Got POST request without \\r\\n\\r\\n.");
	 }

	 // . point to the file path 
	 // . skip over the "GET "
	 long filenameStart = 4 ;
	 // skip over extra char if it's a "HEAD " request
	 if ( m_requestType == 1 || m_requestType == 2 ) filenameStart++;

	 // are we a redirect?
	 i = filenameStart;
	 m_redirLen = 0;
	 if ( strncmp ( &req[i] , "/?redir=" , 8 ) == 0 ) {
		 for ( long k = i+8; k<reqLen && m_redirLen<126 ; k++) {
			 if ( req[k] == '\r' ) break;
			 if ( req[k] == '\n' ) break;
			 if ( req[k] == '\t' ) break;
			 if ( req[k] ==  ' ' ) break;
			 m_redir[m_redirLen++] = req[k];
		 }
	 }
	 m_redir[m_redirLen] = '\0';

	 // find a \n space \r or ? that delimits the filename
	 for ( i = filenameStart ; i < reqLen ; i++ ) {
		 if ( is_wspace_a ( req [ i ] ) ) break;
		 if ( req [ i ] == '?' ) break;
	 }

	 // now calc the filename length
	 m_filenameLen = i - filenameStart;
	 // return false and set g_errno if it's 0
	 if ( m_filenameLen <= 0  ) { 
		 log("http: got filenameLen<=0: %s",req);
		 g_errno = EBADREQUEST; 
		 return false; 
	 }
	 // . bitch if too big
	 // . leave room for strcatting "index.html" below
	 if ( m_filenameLen >= MAX_HTTP_FILENAME_LEN - 10 ) { 
		 log("http: got filenameLen>=max");
		 g_errno = EBADREQUEST; 
		 return false; 
	 }
	 // . decode the filename into m_filename and reassign it's length
	 // . decode %2F to / , etc...
	 m_filenameLen = urlDecode(m_filename,req+filenameStart,m_filenameLen);
	 // NULL terminate m_filename
	 m_filename [ m_filenameLen ] = '\0';
	 // does it have a file extension AFTER the last / in the filename?
	 bool hasExtension = false;
	 for ( long j = m_filenameLen-1 ; j >= 0 ; j-- ) {
		 if ( m_filename[j] == '.' ) { hasExtension = true; break; }
		 if ( m_filename[j] == '/' ) break;
	 }
	 // if it has no file extension append a /index.html
	 if ( ! hasExtension && m_filename [ m_filenameLen - 1 ] == '/' ) {
		 strcat ( m_filename , "index.html" );
		 m_filenameLen = gbstrlen ( m_filename );
	 }
	 // set file offset/size defaults
	 m_fileOffset = 0;
	 // -1 means ALL the file from m_fileOffset onwards
	 m_fileSize   = -1;  
	 // "e" points to where the range actually starts, if any
	 //char *e;
	 // . TODO: speed up by doing one strstr for Range: and maybe range:
	 // . do they have a Range: 0-100\n in the mime denoting a partial get?
	 //char *s = strstr ( req ,"Range:bytes=" );
	 //e = s + 12;
	 // try alternate formats
	 //if ( ! s ) { s = strstr ( req ,"Range: bytes=" ); e = s + 13; }
	 //if ( ! s ) { s = strstr ( req ,"Range: "       ); e = s +  7; }
	 // parse out the range if we got one
	 //if ( s ) {
	 //	long x = 0;
	 //	sscanf ( e ,"%li-%li" , &m_fileOffset , &x );
	 //	// get all file if range's 2nd number is non-existant
	 //	if ( x == 0 ) m_fileSize = -1;
	 //	else          m_fileSize = x - m_fileOffset;
	 //	// ensure legitimacy
	 //	if ( m_fileOffset < 0 ) m_fileOffset = 0;
	 //}
	 // reset our hostname
	 m_hostLen = 0;
	 // assume request is NOT from local network
	 //m_isAdmin = false;
	 m_isLocal = false;
	 // get the virtual hostname they want to use
	 char *s = strstr ( req ,"Host:" );
	 // try alternate formats
	 if ( ! s ) s = strstr ( req , "host:" ); 
	 // must be on its own line, otherwise it's not valid
	 if ( s && s > req && *(s-1) !='\n' ) s = NULL;
	 // parse out the host if we got one
	 if ( s ) {
		 // skip field name, host:
		 s += 5;
		 // skip e to beginning of the host name after "host:"
		 while ( *s==' ' || *s=='\t' ) s++;
		 // find end of the host name
		 char *end = s;
		 while ( *end && !is_wspace_a(*end) ) end++;
		 // . now *end should be \0, \n, \r, ' ', ...
		 // . get host len
		 m_hostLen = end - s;
		 // truncate if too big
		 if ( m_hostLen >= 255 ) m_hostLen = 254;
		 // copy into hostname
		 memcpy ( m_host , s , m_hostLen );
	 }
	 // NULL terminate it
	 m_host [ m_hostLen ] = '\0';

	 // get Referer: field
	 s = strstr ( req ,"Referer:" );
	 // find another
	 if ( ! s ) s = strstr ( req ,"referer:" );
	 // must be on its own line, otherwise it's not valid
	 if ( s && s > req && *(s-1) !='\n' ) s = NULL;
	 // assume no referer
	 m_refLen = 0;
	 // parse out the referer if we got one
	 if ( s ) {
		 // skip field name, referer:
		 s += 8;
		 // skip e to beginning of the host name after ':'
		 while ( *s==' ' || *s=='\t' ) s++;
		 // find end of the host name
		 char *end = s;
		 while ( *end && !is_wspace_a(*end) ) end++;
		 // . now *end should be \0, \n, \r, ' ', ...
		 // . get len
		 m_refLen = end - s;
		 // truncate if too big
		 if ( m_refLen >= 255 ) m_refLen = 254;
		 // copy into m_ref
		 memcpy ( m_ref , s , m_refLen );
	 }
	 // NULL terminate it
	 m_ref [ m_refLen ] = '\0';

	 // get User-Agent: field
	 s = strstr ( req ,"User-Agent:" );
	 // find another
	 if ( ! s ) s = strstr ( req ,"user-agent:" );
	 // must be on its own line, otherwise it's not valid
	 if ( s && s > req && *(s-1) !='\n' ) s = NULL;
	 // assume empty
	 long len = 0;
	 // parse out the referer if we got one
	 if ( s ) {
		 // skip field name, referer:
		 s += 11;
		 // skip e to beginning of the host name after ':'
		 while ( *s==' ' || *s=='\t' ) s++;
		 // find end of the agent name
		 char *end = s;
		 while ( *end && *end!='\n' && *end!='\r' ) end++;
		 // . now *end should be \0, \n, \r, ' ', ...
		 // . get agent len
		 len = end - s;
		 // truncate if too big
		 if ( len > 127 ) len = 127;
		 // copy into m_userAgent
		 memcpy ( m_userAgent , s , len );
	 }
	 // NULL terminate it
	 m_userAgent [ len ] = '\0';

	 m_isMSIE = false;
	 if ( strstr ( m_userAgent , "MSIE" ) )
		 m_isMSIE = true;

	 // get Cookie: field
	 s = strstr ( req, "Cookie:" );
	 // find another
	 if ( !s ) s = strstr ( req, "cookie:" );
	 // must be on its own line, otherwise it's not valid
	 if ( s && s > req && *(s-1) != '\n' ) s = NULL;
	 // assume empty
	 // m_cookieBufLen = 0;
	 m_cookiePtr = s;
	 // parse out the cookie if we got one
	 if ( s ) {
		 // skip field name, Cookie:
		 s += 7;
		 // skip s to beginning of cookie after ':'
		 while ( *s == ' ' || *s == '\t' ) s++;
		 // find end of the cookie
		 char *end = s;
		 while ( *end && *end != '\n' && *end != '\r' ) end++;
		 // save length
		 m_cookieLen = end - m_cookiePtr;
		 // get cookie len
		 //m_cookieBufLen = end - s;
		 // trunc if too big
		 //if (m_cookieBufLen > 1023) m_cookieBufLen = 1023;
		 // copy into m_cookieBuf
		 //memcpy(m_cookieBuf, s, m_cookieBufLen);
	 }
	 // NULL terminate it
	 if ( m_cookiePtr ) m_cookiePtr[m_cookieLen] = '\0';
	 //m_cookieBuf[m_cookieBufLen] = '\0';
	 // convert every '&' in cookie to a \0 for parsing the fields
	 // for ( long j = 0 ; j < m_cookieBufLen ; j++ ) 
	 //	 if ( m_cookieBuf[j] == '&' ) m_cookieBuf[j] = '\0';

	 // mark it as cgi if it has a ?
	 bool isCgi = ( req [ i ] == '?' ) ;
	 // reset m_filename length to exclude the ?* stuff
	 if ( isCgi ) {
		 // skip over the '?'
		 i++;
		 // find a space the delmits end of cgi
		 long j;
		 for ( j = i; j < reqLen; j++) if (is_wspace_a(req[j])) break;
		 // now add it
		 if ( ! addCgi ( &req[i] , j-i ) ) return false;
		 // update i
		 i = j;
	 }

	 // . set path ptrs
	 // . the whole /cgi/14.cgi?coll=xxx&..... thang
	 m_path = req + filenameStart;
	 m_plen = i - filenameStart;
	 // we're local if hostname is 192.168.[0|1].y
	 //if ( strncmp(iptoa(sock->m_ip),"192.168.1.",10) == 0) {
	 //	m_isAdmin = true; m_isLocal = true; }
	 //if ( strncmp(iptoa(sock->m_ip),"192.168.0.",10) == 0) {
	 //	m_isAdmin = true; m_isLocal = true; }
	 //if(strncmp(iptoa(sock->m_ip),"192.168.1.",10) == 0) m_isLocal = true;
	 //if(strncmp(iptoa(sock->m_ip),"192.168.0.",10) == 0) m_isLocal = true;
	 if ( sock && strncmp(iptoa(sock->m_ip),"192.168.",8) == 0) 
		 m_isLocal = true;
	 if ( sock && strncmp(iptoa(sock->m_ip),"10.",3) == 0) 
		 m_isLocal = true;
	 // steve cook's comcast at home:
	 // if ( sock && strncmp(iptoa(sock->m_ip),"68.35.100.143",13) == 0) 
	 // m_isLocal = true;
	 // procog's ip
	 // if ( sock && strncmp(iptoa(sock->m_ip),"216.168.36.21",13) == 0) 
	 //	 m_isLocal = true;

	 // roadrunner ip
	 // if ( sock && strncmp(iptoa(sock->m_ip),"66.162.42.131",13) == 0) 
	 //	 m_isLocal = true;

	 // cnsp ip
	 //if ( sock && strncmp(iptoa(sock->m_ip),"67.130.216.27",13) == 0) 
	 //	 m_isLocal = true;

	 // emily parker
	 //if ( sock && strncmp(iptoa(sock->m_ip),"69.92.68.202",12) == 0) 
	 //m_isLocal = true;
	 

	 // 127.0.0.1
	 if ( sock && sock->m_ip == 16777343 )
		 m_isLocal = true;
	 // steve cook's webserver
	 //if ( sock && strncmp(iptoa(sock->m_ip),"216.168.36.21",13) == 0) 
	 //	 m_isLocal = true;
	 // . also if we're coming from lenny at my house consider it local
	 // . this is a security risk, however... TODO: FIX!!!
	 //if ( sock->m_ip == atoip ("68.35.105.199" , 13 ) ) m_isAdmin = true;
	 // . TODO: now add any cgi data from a POST.....
	 // . look after the mime
	 //char *d = NULL;
	 // check for body if it was a POST request
	 //if ( m_requestType == 2 ) d = strstr ( req , "\r\n\r\n" );

	 // now put d's char back, just in case... does it really matter?
	 if ( d ) *d = dc;

	 // return true now if no cgi stuff to parse
	 if ( d ) {
		 char *post    = d + 4;
		 long  postLen = reqLen-(d+4-req) ;
		 // post sometimes has a \r or\n after it
		 while ( postLen > 0 && post[postLen-1]=='\r' ) postLen--;
		 // add it to m_cgiBuf, filter and everything
		 if ( ! addCgi ( post , postLen ) ) return false;
	 }
	 // sometimes i don't want to be admin
	 //if ( getLong ( "admin" , 1 ) == 0 ) m_isAdmin = false;
	 // success
	 
	 /////
	 // Handle Extra parms...

	 char *ep = g_conf.m_extraParms;
	 char *epend = g_conf.m_extraParms + g_conf.m_extraParmsLen;

	 char *qstr = m_cgiBuf;
	 long qlen = m_cgiBufLen;

	 while (ep < epend){
		 char buf[AUTOBAN_TEXT_SIZE];
		 long bufLen = 0;
		 // get next substring
		 while (*ep && ep < epend && *ep != ' ' && *ep != '\n'){
			 buf[bufLen++] = *ep++;
		 }
		 // skip whitespace
		 while (*ep && ep < epend && *ep == ' '){
			 ep++;
		 }
		 // null terminate 
		 buf[bufLen] = '\0';

		
		 // No match
		 if (!bufLen ||
		     !strnstr(qstr, qlen, buf)){
			 // skip to end of line
			 while (*ep && ep < epend && *ep != '\n') ep++;
			 // skip newline
			 while (*ep && ep < epend && *ep == '\n') ep++;
			 // try next substr
			 continue;
		 }
		 // found a match...
		 // get parm string
		 bufLen = 0;
		 while (*ep && ep < epend && *ep != '\n'){
			 buf[bufLen++] = *ep++;
		 }
		 buf[bufLen] = '\0';
		 
		 // skip newline
		 while (*ep && ep < epend && *ep == '\n') ep++;

		 logf(LOG_DEBUG, "query: appending \"%s\" to query", buf);
		
		 long newSize = m_cgiBuf2Size + bufLen+1;
		 char *newBuf = (char*)mmalloc(newSize, "extraParms");
		 if (!newBuf){
			 return log("query: unable to allocate %ld bytes "
				    "for extraParms", newSize);
		 }
		 char *p = newBuf;
		 if (m_cgiBuf2Size) {
			 memcpy(newBuf, m_cgiBuf2, m_cgiBuf2Size);
			 p += m_cgiBuf2Size-1;
			 mfree(m_cgiBuf2, m_cgiBuf2Size, "extraParms");
			 m_cgiBuf2 = NULL;
			 m_cgiBuf2Size = 0;
		 }
		 memcpy(p, buf, bufLen);
		 m_cgiBuf2 = newBuf;
		 m_cgiBuf2Size = newSize;
		 p += bufLen;
		 *p = '\0';
	 }

	 // Put '\0' back into the HttpRequest buffer...
	 if (m_cgiBuf){
		 // do not mangle the "ucontent"!
		 long cgiBufLen = m_cgiBufLen;
		 cgiBufLen -= m_ucontentLen;
		 char *buf = m_cgiBuf;
		 for (long i = 0; i < cgiBufLen ; i++) 
			 if (buf[i] == '&') buf[i] = '\0';
		 // don't decode the ucontent= field!
		 long decodeLen = m_cgiBufLen;
		 // so subtract that
		 if ( m_ucontent ) decodeLen -= m_ucontentLen;
		 // decode everything
		 long len = urlDecode ( m_cgiBuf , m_cgiBuf , decodeLen );
		 // we're parsing crap after the null if the last parm 
		 // has no value
		 //memset(m_cgiBuf+len, '\0', m_cgiBufLen-len);
		 m_cgiBufLen = len;
		 // ensure that is null i guess
		 if ( ! m_ucontent ) m_cgiBuf[len] = '\0';
	 }
	
	 if (m_cgiBuf2){
		 char *buf = m_cgiBuf2;
		 for (long i = 0; i < m_cgiBuf2Size-1 ; i++) 
			 if (buf[i] == '&') buf[i] = '\0';
		 long len = urlDecode ( m_cgiBuf2 , m_cgiBuf2 , m_cgiBuf2Size);
		 memset(m_cgiBuf2+len, '\0', m_cgiBuf2Size-len);
	 }
	 // . parse the fields after the ? in a cgi filename
	 // . or fields in the content if it's a POST
	 // . m_cgiBuf must be and is NULL terminated for this
	 parseFields ( m_cgiBuf , m_cgiBufLen );
	 // Add extra parms to the request.  
	 if (m_cgiBuf2Size){
		 parseFields(m_cgiBuf2, m_cgiBuf2Size);
	 }

	 // urldecode the cookie buf too!!
	 if ( m_cookiePtr ) {
		 char *p = m_cookiePtr;
		 for (long i = 0; i < m_cookieLen ; i++) {
			 //if (p[i] == '&') p[i] = '\0';
			 // cookies are separated with ';' in the request only
			 if (p[i] == ';') p[i] = '\0';
			 // a hack for the metacookie=....
			 // which uses &'s to separate its subcookies
			 // this is a hack for msie's limit of 50 cookies
			 if ( p[i] == '&' ) p[i] = '\0';
			 // set m_metaCookie to start of meta cookie
			 if ( p[i] == 'm' && p[i+1] == 'e' &&
			      strncmp(p,"metacookie",10) == 0 )
				 m_metaCookie = p;
		 }
		 long len = urlDecode ( m_cookiePtr , 
					m_cookiePtr,
					m_cookieLen );
		 // we're parsing crap after the null if the last parm 
		 // has no value
		 memset(m_cookiePtr+len, '\0', m_cookieLen-len);
		 m_cookieLen = len;
	 }

	 return true;
 }
Esempio n. 25
0
void handleRequest12 ( UdpSlot *udpSlot , int32_t niceness ) {
	// get request
	char *request = udpSlot->m_readBuf;
	int32_t  reqSize = udpSlot->m_readBufSize;
	// shortcut
	UdpServer *us = &g_udpServer;
	// breathe
	QUICKPOLL ( niceness );

	// shortcut
	char *reply = udpSlot->m_tmpBuf;

	//
	// . is it confirming that he got all the locks?
	// . if so, remove the doledb record and dock the doleiptable count
	//   before adding a waiting tree entry to re-pop the doledb record
	//
	if ( reqSize == sizeof(ConfirmRequest) ) {
		char *msg = NULL;
		ConfirmRequest *cq = (ConfirmRequest *)request;

		// confirm the lock
		HashTableX *ht = &g_spiderLoop.m_lockTable;
		int32_t slot = ht->getSlot ( &cq->m_lockKeyUh48 );
		if ( slot < 0 ) { 
			log("spider: got a confirm request for a key not "
			    "in the table! coll must have been deleted "
			    " or reset "
			    "while lock request was outstanding.");
			g_errno = EBADENGINEER;
			
			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
			us->sendErrorReply ( udpSlot , g_errno );
			return;
			//char *xx=NULL;*xx=0; }
		}
		UrlLock *lock = (UrlLock *)ht->getValueFromSlot ( slot );
		lock->m_confirmed = true;

		// note that
		if ( g_conf.m_logDebugSpider ) // Wait )
			log("spider: got confirm lock request for ip=%s",
			    iptoa(lock->m_firstIp));

		// get it
		SpiderColl *sc = g_spiderCache.getSpiderColl(cq->m_collnum);
		// make it negative
		cq->m_doledbKey.n0 &= 0xfffffffffffffffeLL;
		// and add the negative rec to doledb (deletion operation)
		Rdb *rdb = &g_doledb.m_rdb;
		if ( ! rdb->addRecord ( cq->m_collnum,
					(char *)&cq->m_doledbKey,
					NULL , // data
					0    , //dataSize
					1 )){ // niceness
			// tree is dumping or something, probably ETRYAGAIN
			if ( g_errno != ETRYAGAIN ) {msg = "error adding neg rec to doledb";	log("spider: %s %s",msg,mstrerror(g_errno));
			}
			//char *xx=NULL;*xx=0;
			
			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
			us->sendErrorReply ( udpSlot , g_errno );
			return;
		}
		// now remove from doleiptable since we removed from doledb
		if ( sc ) sc->removeFromDoledbTable ( cq->m_firstIp );

		// how many spiders outstanding for this coll and IP?
		//int32_t out=g_spiderLoop.getNumSpidersOutPerIp ( cq->m_firstIp);

		// DO NOT add back to waiting tree if max spiders
		// out per ip was 1 OR there was a crawldelay. but better
		// yet, take care of that in the winReq code above.

		// . now add to waiting tree so we add another spiderdb
		//   record for this firstip to doledb
		// . true = callForScan
		// . do not add to waiting tree if we have enough outstanding
		//   spiders for this ip. we will add to waiting tree when
		//   we receive a SpiderReply in addSpiderReply()
		if ( sc && //out < cq->m_maxSpidersOutPerIp &&
		     // this will just return true if we are not the 
		     // responsible host for this firstip
		    // DO NOT populate from this!!! say "false" here...
		     ! sc->addToWaitingTree ( 0 , cq->m_firstIp, false ) &&
		     // must be an error...
		     g_errno ) {
			msg = "FAILED TO ADD TO WAITING TREE";
			log("spider: %s %s",msg,mstrerror(g_errno));
			
			log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
			us->sendErrorReply ( udpSlot , g_errno );
			return;
		}
		// success!!
		reply[0] = 1;
		us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
		return;
	}



	// sanity check
	if ( reqSize != sizeof(LockRequest) ) {
		log("spider: bad msg12 request size of %" PRId32,reqSize);
		
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( udpSlot , EBADREQUEST );
		return;
	}
	// deny it if we are not synced yet! otherwise we core in 
	// getTimeGlobal() below
	if ( ! isClockInSync() ) { 
		// log it so we can debug it
		//log("spider: clock not in sync with host #0. so "
		//    "returning etryagain for lock reply");
		// let admin know why we are not spidering
		
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( udpSlot , ETRYAGAIN );
		return;
	}

	LockRequest *lr = (LockRequest *)request;
	//uint64_t lockKey = *(int64_t *)request;
	//int32_t lockSequence = *(int32_t *)(request+8);
	// is this a remove operation? assume not
	//bool remove = false;
	// get top bit
	//if ( lockKey & 0x8000000000000000LL ) remove = true;

	// mask it out
	//lockKey &= 0x7fffffffffffffffLL;
	// sanity check, just 6 bytes! (48 bits)
	if ( lr->m_lockKeyUh48 &0xffff000000000000LL ) { char *xx=NULL;*xx=0; }
	// note it
	if ( g_conf.m_logDebugSpider )
		log("spider: got msg12 request uh48=%" PRId64" remove=%" PRId32,
		    lr->m_lockKeyUh48, (int32_t)lr->m_removeLock);
	// get time
	int32_t nowGlobal = getTimeGlobal();
	// shortcut
	HashTableX *ht = &g_spiderLoop.m_lockTable;

	int32_t hostId = g_hostdb.getHostId ( udpSlot->m_ip , udpSlot->m_port );
	// this must be legit - sanity check
	if ( hostId < 0 ) { char *xx=NULL;*xx=0; }

	// remove expired locks from locktable
	removeExpiredLocks ( hostId );

	int64_t lockKey = lr->m_lockKeyUh48;

	// check tree
	int32_t slot = ht->getSlot ( &lockKey ); // lr->m_lockKeyUh48 );
	// put it here
	UrlLock *lock = NULL;
	// if there say no no
	if ( slot >= 0 ) lock = (UrlLock *)ht->getValueFromSlot ( slot );

	// if doing a remove operation and that was our hostid then unlock it
	if ( lr->m_removeLock && 
	     lock && 
	     lock->m_hostId == hostId &&
	     lock->m_lockSequence == lr->m_lockSequence ) {
		// note it for now
		if ( g_conf.m_logDebugSpider )
			log("spider: removing lock for lockkey=%" PRIu64" hid=%" PRId32,
			    lr->m_lockKeyUh48,hostId);
		// unlock it
		ht->removeSlot ( slot );
		// it is gone
		lock = NULL;
	}
	// ok, at this point all remove ops return
	if ( lr->m_removeLock ) {
		reply[0] = 1;
		us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
		return;
	}

	/////////
	//
	// add new lock
	//
	/////////


	// if lock > 1 hour old then remove it automatically!!
	if ( lock && nowGlobal - lock->m_timestamp > MAX_LOCK_AGE ) {
		// note it for now
		log("spider: removing lock after %" PRId32" seconds "
		    "for lockKey=%" PRIu64" hid=%" PRId32,
		    (nowGlobal - lock->m_timestamp),
		    lr->m_lockKeyUh48,hostId);
		// unlock it
		ht->removeSlot ( slot );
		// it is gone
		lock = NULL;
	}
	// if lock still there, do not grant another lock
	if ( lock ) {
		// note it for now
		if ( g_conf.m_logDebugSpider )
			log("spider: refusing lock for lockkey=%" PRIu64" hid=%" PRId32,
			    lr->m_lockKeyUh48,hostId);
		reply[0] = 0;
		us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
		return;
	}
	// make the new lock
	UrlLock tmp;
	tmp.m_hostId       = hostId;
	tmp.m_lockSequence = lr->m_lockSequence;
	tmp.m_timestamp    = nowGlobal;
	tmp.m_expires      = 0;
	tmp.m_firstIp      = lr->m_firstIp;
	tmp.m_collnum      = lr->m_collnum;

	// when the spider returns we remove its lock on reception of the
	// spiderReply, however, we actually just set the m_expires time
	// to 5 seconds into the future in case there is a current request
	// to get a lock for that url in progress. but, we do need to
	// indicate that the spider has indeed completed by setting
	// m_spiderOutstanding to true. this way, addToWaitingTree() will
	// not count it towards a "max spiders per IP" quota when deciding
	// on if it should add a new entry for this IP.
	tmp.m_spiderOutstanding = true;
	// this is set when all hosts in the group (shard) have granted the
	// lock and the host sends out a confirmLockAcquisition() request.
	// until then we do not know if the lock will be granted by all hosts
	// in the group (shard)
	tmp.m_confirmed    = false;

	// put it into the table
	if ( ! ht->addKey ( &lockKey , &tmp ) ) {
		// return error if that failed!
		
		log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__);
		us->sendErrorReply ( udpSlot , g_errno );
		return;
	}
	// note it for now
	if ( g_conf.m_logDebugSpider )
		log("spider: granting lock for lockKey=%" PRIu64" hid=%" PRId32,
		    lr->m_lockKeyUh48,hostId);
	// grant the lock
	reply[0] = 1;
	us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot );
	return;
}
Esempio n. 26
0
void Conf::setRootIps ( ) {

	//m_numDns = 16;
	//for ( int32_t i = 0; i < m_numDns; i++ )
	//	m_dnsPorts[i] = 53;
	//m_numDns = 0;

	// set m_numDns based on Conf::m_dnsIps[] array
	int32_t i; for ( i = 0; i < 16 ; i++ ) {
		m_dnsPorts[i] = 53;
		if ( ! g_conf.m_dnsIps[i] ) break;
	}
	m_numDns = i;


	// hardcode google for now...
	//m_dnsIps[0] = atoip("8.8.8.8",7);
	//m_dnsIps[1] = atoip("8.8.4.4",7);
	//m_numDns = 2;
	Host *h = g_hostdb.getMyHost();
	//char *ipStr = "10.5.0.3";
	//char *ipStr = "10.5.56.78"; // gk268 now on roadrunner
	//char *ipStr = "10.5.56.77"; // gk267 now cnsp-routed bind9 server
	// now sp1 for speed (quad processor)
	//char *ipStr = "10.5.66.11";
	// fail back to google public dns
	char *ipStr = "8.8.8.8";
	// try google first dibs. NO! they are unresponsive after a while
	//char *ipStr = "8.8.4.4";
	// for some reason scproxy2 local bind9 not responding to us!!! fix!
	//if ( h->m_type & HT_SCPROXY ) ipStr = "127.0.0.1";
	//if ( h->m_type & HT_PROXY ) ipStr = "127.0.0.1";
	if ( h->m_type & HT_SCPROXY ) ipStr = "8.8.8.8"; 
	if ( h->m_type & HT_PROXY ) ipStr = "8.8.8.8"; 
	// if we are a proxy, notably a spider compression proxy...
	//if ( g_proxy.isProxy() ) ipStr = "127.0.0.1";
	if ( m_numDns == 0 ) {
		m_dnsIps[0] = atoip( ipStr , gbstrlen(ipStr) );
		m_dnsPorts[0] = 53;
		m_numDns = 1;
	}


	// default this to off on startup for now until it works better
	m_askRootNameservers = false;
	// and return as well
	return;

	char *rootIps[] = {
		"192.228.79.201",
		"192.33.4.12",
		"128.8.10.90",
		//"192.203.230.10", ping timedout
		"192.5.5.241",
		//"192.112.36.4", ping timedout
		//"128.63.2.53", ping timedout
		//"192.36.148.17",
		"192.58.128.30",
		"193.0.14.129",
		//"198.32.64.12",
		"199.7.83.42", // new guy
		"202.12.27.33",
		"198.41.0.4"
	};

	int32_t n = sizeof(rootIps)/sizeof(char *);
	if ( n > MAX_RNSIPS ) {
		log("admin: Too many root nameserver ips. Truncating.");
		n = MAX_RNSIPS;
	}
	m_numRns = n;
	for ( int32_t i = 0 ; i < n ; i++ ) {
		m_rnsIps  [i] = atoip(rootIps[i],gbstrlen(rootIps[i]));
		m_rnsPorts[i] = 53;
		log(LOG_INIT,"dns: Using root nameserver #%"INT32" %s.",
		    i,iptoa(m_rnsIps[i]));
	}
}
Esempio n. 27
0
void http_settings(char *rx, unsigned int rx_len)
{
  unsigned int item, found, len;
  int i;
  char buf[16];
  MAC_Addr mac;
  IP_Addr ip;
  unsigned int rgb;

  for(; rx_len!=0;)
  {
    len = strlen("restartwebradio=");
    if(strncmpi(rx, "restartwebradio=", len) == 0)
    {
      rx += len; rx_len -= len;
      cpu_reset();
    }

    for(item=0, found=0; item<SETTINGSITEMS; item++)
    {
      if(settingsmenu[item].ini[0] == 0)
      {
        continue;
      }
      len = sprintf(buf, "%s=", settingsmenu[item].ini);
      if(strncmpi(rx, buf, len) == 0)
      {
        rx += len; rx_len -= len;
        len = url_decode(rx, rx, rx_len);
        i   = 0;
        switch(settingsmenu[item].format)
        {
          case F_NR:  //p1-p2, p3=step size
            i = atoi(rx);
                 if(i < settingsmenu[item].p1){ i = settingsmenu[item].p1; }
            else if(i > settingsmenu[item].p2){ i = settingsmenu[item].p2; }
            itoa(i, buf, 10);
            ini_setentry(SETTINGS_FILE, settingsmenu[item].ini, buf);
            if(settingsmenu[item].set){ settingsmenu[item].set((void*)(int)i); }
            break;

          case F_OR:  //p1 or p2
            i = atoi(rx);
            if((i != settingsmenu[item].p1) && 
               (i != settingsmenu[item].p2)){ i = settingsmenu[item].p1; }
            itoa(i, buf, 10);
            ini_setentry(SETTINGS_FILE, settingsmenu[item].ini, buf);
            if(settingsmenu[item].set){ settingsmenu[item].set((void*)(int)i); }
            break;

          case F_STR: //p1=max len
            if((settingsmenu[item].p1 != 0) &&
              (strlen(rx) > (unsigned)settingsmenu[item].p1))
            {
              rx[settingsmenu[item].p1] = 0;
            }
            ini_setentry(SETTINGS_FILE, settingsmenu[item].ini, rx);
            if(settingsmenu[item].set){ settingsmenu[item].set(rx); }
            break;

          case F_MAC:
            mac = atomac(rx);
            ini_setentry(SETTINGS_FILE, settingsmenu[item].ini, mactoa(mac));
            //if(settingsmenu[item].set){ settingsmenu[item].set((void*)(MAC_Addr)mac); }
            break;

          case F_IP:
            ip = atoip(rx);
            ini_setentry(SETTINGS_FILE, settingsmenu[item].ini, iptoa(ip));
            if(settingsmenu[item].set){ settingsmenu[item].set((void*)(IP_Addr)atoip(rx)); }
            break;

          case F_RGB:
            rgb = atorgb(rx);
            sprintf(buf, "%03i,%03i,%03i", GET_RED(rgb), GET_GREEN(rgb), GET_BLUE(rgb));
            ini_setentry(SETTINGS_FILE, settingsmenu[item].ini, buf);
            if(settingsmenu[item].set){ settingsmenu[item].set((void*)(unsigned int)rgb); }
            break;
        }
        rx += len; rx_len -= len;
        found = 1;
        break;
      }
    }
    if(found == 0)
    {
      rx++; rx_len--;
    }
  }

  menu_drawwnd(1);

  return;
}
Esempio n. 28
0
void Stats::logAvgQueryTime(long long startTime) {
	long long now = gettimeofdayInMilliseconds();
	long long took = now - startTime;
	static long s_lastSendTime = 0;
	// if just one query took an insanely long time,
	// do not sound the alarm. this is in seconds,
	// so multiply by 1000.
	//long long maxTook = 
	//	(long long)(g_conf.m_maxQueryTime*1000.0) ;
	//if ( took > maxTook ) took = maxTook;
	m_queryTimes += took;
	m_numQueries++;

	if ( m_numQueries > g_conf.m_numQueryTimes )
		goto reset;

	if (m_numQueries != g_conf.m_numQueryTimes) return;
	// otherwise, store this info
	m_avgQueryTime  = (float)m_queryTimes /
		((float)m_numQueries * 1000.0);
	m_successRate = (float)m_numSuccess / 
		(float)(m_numSuccess + m_numFails);
	//(number of queries) / seconds that it took to get this many queries
	m_avgQueriesPerSec = ((float)m_numQueries * 1000.0) / 
		(float)(now - m_lastQueryLogTime);
	m_lastQueryLogTime = now;

	if(m_avgQueryTime > g_conf.m_avgQueryTimeThreshold ||
	   m_successRate  < g_conf.m_querySuccessThreshold) {
		char msgbuf[1024];
		Host *h = g_hostdb.getHost ( 0 );
		snprintf(msgbuf, 1024,
			 "Average latency: %f sec. "
			 "success rate: %f.  "
			 "queries/sec: %f.  "
			 "host: %s.",
			 m_avgQueryTime, m_successRate, m_avgQueriesPerSec,
			 iptoa(h->m_ip));
		log(LOG_WARN, "query: %s",msgbuf);
		// prevent machinegunning text msgs
		long now = getTimeLocal();
		if ( now - s_lastSendTime > 300 ) {
			s_lastSendTime = now;
			g_pingServer.sendEmail(NULL, msgbuf);
		}
	}
	else {
		log(LOG_INFO, "query: Average latency is %f seconds, "
		    "succeeding at a rate of %f, serving %f queries/sec.",
		    m_avgQueryTime, m_successRate, m_avgQueriesPerSec);
	}
 reset:
	m_totalNumQueries += m_numSuccess + m_numFails;
	m_totalNumSuccess += m_numSuccess;
	m_totalNumFails   += m_numFails;
	
	m_numQueries = 0;
	m_queryTimes = 0;
	m_numSuccess = 0;
	m_numFails = 0;
}
// returns false if blocked, true otherwise
bool processLoop ( void *state ) {
	// get it
	State2 *st = (State2 *)state;
	// get the tcp socket from the state
	TcpSocket *s = st->m_socket;
	// get it
	XmlDoc *xd = &st->m_xd;

	if ( ! xd->m_loaded ) {
		// setting just the docid. niceness is 0.
		//xd->set3 ( st->m_docId , st->m_coll , 0 );
		// callback
		xd->setCallback ( state , processLoop );
		// . and tell it to load from the old title rec
		// . this sets xd->m_oldTitleRec/m_oldTitleRecSize
		// . this sets xd->ptr_* and all other member vars from
		//   the old title rec if found in titledb.
		if ( ! xd->loadFromOldTitleRec ( ) ) return false;
	}

	if ( g_errno ) return sendErrorReply ( st , g_errno );
	// now force it to load old title rec
	//char **tr = xd->getTitleRec();
	SafeBuf *tr = xd->getTitleRecBuf();
	// blocked? return false if so. it will call processLoop() when it rets
	if ( tr == (void *)-1 ) return false;
	// we did not block. check for error? this will free "st" too.
	if ( ! tr ) return sendErrorReply ( st , g_errno );
	// if title rec was empty, that is a problem
	if ( xd->m_titleRecBuf.length() == 0 ) 
		return sendErrorReply ( st , ENOTFOUND);

	// set callback
	char *na = xd->getIsNoArchive();
	// wait if blocked
	if ( na == (void *)-1 ) return false;
	// error?
	if ( ! na ) return sendErrorReply ( st , g_errno );
	// forbidden? allow turkeys through though...
	if ( ! st->m_isAdmin && *na )
		return sendErrorReply ( st , ENOCACHE );

	SafeBuf *sb = &st->m_sb;


	// &page=4 will print rainbow sections
	if ( ! st->m_printed && st->m_r.getLong("page",0) ) {
		// do not repeat this call
		st->m_printed = true;
		// this will call us again since we called
		// xd->setCallback() above to us
		if ( ! xd->printDocForProCog ( sb , &st->m_r ) )
			return false;
	}

	char *contentType = "text/html";
	char format = st->m_format;
	if ( format == FORMAT_XML ) contentType = "text/xml";
	if ( format == FORMAT_JSON ) contentType = "application/json";

	// if we printed a special page (like rainbow sections) then return now
	if ( st->m_printed ) {
		bool status = g_httpServer.sendDynamicPage (s,
							    //buf,bufLen,
							    sb->getBufStart(),
							    sb->getLength(),
							    -1,false,
							    //"text/html",
							    contentType,
							    -1, NULL, "utf8" );
		// nuke state2
		mdelete ( st , sizeof(State2) , "PageGet1" );
		delete (st);
		return status;
	}

	/*
	  // this was calling XmlDoc and setting sections, etc. to
	  // get the SpiderReply junk... no no no
	// is it banned or filtered? this ignores the TagRec in the titleRec
	// and uses msg8a to get it fresh instead
	char *vi = xd->getIsFiltered();//Visible( );
	// wait if blocked
	if ( vi == (void *)-1 ) return false;
	// error?
	if ( ! vi ) return sendErrorReply ( st , g_errno );
	// banned?
	if ( ! st->m_isAdmin && ! *vi ) return sendErrorReply (st,EDOCBANNED);
	*/

	// get the utf8 content
	char **utf8 = xd->getUtf8Content();
	//long   len  = xd->size_utf8Content - 1;
	// wait if blocked???
	if ( utf8 == (void *)-1 ) return false;
	// strange
	if ( xd->size_utf8Content<=0) {
		log("pageget: utf8 content <= 0");
		return sendErrorReply(st,EBADENGINEER );
	}
	// alloc error?
	if ( ! utf8 ) return sendErrorReply ( st , g_errno );

	// get this host
	Host *h = g_hostdb.getHost ( g_hostdb.m_hostId );
	if ( ! h ) {
		log("pageget: hostid %li is bad",g_hostdb.m_hostId);
		return sendErrorReply(st,EBADENGINEER );
	}


	char *content    = xd->ptr_utf8Content;
	long  contentLen = xd->size_utf8Content - 1;

	// shortcut
	char strip = st->m_strip;

	// alloc buffer now
	//char *buf = NULL;
	//long  bufMaxSize = 0;
	//bufMaxSize = len + ( 32 * 1024 ) ;
	//bufMaxSize = contentLen + ( 32 * 1024 ) ;
	//buf        = (char *)mmalloc ( bufMaxSize , "PageGet2" );
	//char *p          = buf;
	//char *bufEnd     = buf + bufMaxSize;
	//if ( ! buf ) {
	//	return sendErrorReply ( st , g_errno );
	//}

	// for undoing the header
	//char *start1 = p;
	long startLen1 = sb->length();

	// we are always utfu
	if ( strip != 2 )
		sb->safePrintf( "<meta http-equiv=\"Content-Type\" "
			     "content=\"text/html;charset=utf8\">\n");

	// base href
	//Url *base = &xd->m_firstUrl;
	//if ( xd->ptr_redirUrl.m_url[0] )
	//	base = &xd->m_redirUrl;
	char *base = xd->ptr_firstUrl;
	if ( xd->ptr_redirUrl ) base = xd->ptr_redirUrl;
	//Url *redir = *xd->getRedirUrl();
	if ( strip != 2 ) {
		sb->safePrintf ( "<BASE HREF=\"%s\">" , base );
		//p += gbstrlen ( p );
	}

	// default colors in case css files missing
	if ( strip != 2 ) {
		sb->safePrintf( "\n<style type=\"text/css\">\n"
			  "body{background-color:white;color:black;}\n"
			  "</style>\n");
		//p += gbstrlen ( p );
	}

	//char format = st->m_format;
	if ( format == FORMAT_XML ) sb->reset();
	if ( format == FORMAT_JSON ) sb->reset();

	// for undoing the stuff below
	long startLen2 = sb->length();//p;

	// query should be NULL terminated
	char *q    = st->m_q;
	long  qlen = st->m_qlen;

	char styleTitle[128] =  "font-size:14px;font-weight:600;"
				"color:#000000;";
	char styleText[128]  =  "font-size:14px;font-weight:400;"
				"color:#000000;";
	char styleLink[128] =  "font-size:14px;font-weight:400;"
				"color:#0000ff;";
	char styleTell[128] =  "font-size:14px;font-weight:600;"
				"color:#cc0000;";

	// get the url of the title rec
	Url *f = xd->getFirstUrl();

	bool printDisclaimer = st->m_printDisclaimer;

	if ( xd->m_contentType == CT_JSON )
		printDisclaimer = false;

	if ( format == FORMAT_XML ) printDisclaimer = false;
	if ( format == FORMAT_JSON ) printDisclaimer = false;

	char tbuf[100];
	tbuf[0] = 0;
	time_t lastSpiderDate = xd->m_spideredTime;

	if ( printDisclaimer ||
	     format == FORMAT_XML ||
	     format == FORMAT_JSON ) {
		struct tm *timeStruct = gmtime ( &lastSpiderDate );
		strftime ( tbuf, 100,"%b %d, %Y UTC", timeStruct);
	}

	// We should always be displaying this disclaimer.
	// - May eventually want to display this at a different location
	//   on the page, or on the click 'n' scroll browser page itself
	//   when this page is not being viewed solo.
	// CNS: if ( ! st->m_clickNScroll ) {
	if ( printDisclaimer ) {

		sb->safePrintf(//sprintf ( p , 
			  //"<BASE HREF=\"%s\">"
			  //"<table border=1 width=100%%>"
			  //"<tr><td>"
			  "<table border=\"1\" bgcolor=\"#"
			  BGCOLOR
			  "\" cellpadding=\"10\" "
			  //"id=\"gbcnsdisctable\" class=\"gbcnsdisctable_v\""
			  "cellspacing=\"0\" width=\"100%%\" color=\"#ffffff\">"
			  "<tr"
			  //" id=\"gbcnsdisctr\" class=\"gbcnsdisctr_v\""
			  "><td>"
			  //"<font face=times,sans-serif color=black size=-1>"
			  "<span style=\"%s\">"
			  "This is Gigablast's cached page of </span>"
			  "<a href=\"%s\" style=\"%s\">%s</a>"
			  "" , styleTitle, f->getUrl(), styleLink,
			  f->getUrl() );
		//p += gbstrlen ( p );
		// then the rest
		//sprintf(p , 
		sb->safePrintf(
			"<span style=\"%s\">. "
			"Gigablast is not responsible for the content of "
			"this page.</span>", styleTitle );
		//p += gbstrlen ( p );

		sb->safePrintf ( "<br/><span style=\"%s\">"
			  "Cached: </span>"
			  "<span style=\"%s\">",
			  styleTitle, styleText );
		//p += gbstrlen ( p );

		// then the spider date in GMT
		// time_t lastSpiderDate = xd->m_spideredTime;
		// struct tm *timeStruct = gmtime ( &lastSpiderDate );
		// char tbuf[100];
		// strftime ( tbuf, 100,"%b %d, %Y UTC", timeStruct);
		//p += gbstrlen ( p );
		sb->safeStrcpy(tbuf);

		// Moved over from PageResults.cpp
		sb->safePrintf( "</span> - <a href=\""
			      "/get?"
			      "q=%s&amp;c=%s&amp;rtq=%li&amp;"
			      "d=%lli&amp;strip=1\""
			      " style=\"%s\">"
			      "[stripped]</a>", 
			      q , st->m_coll , 
			      (long)st->m_rtq,
			      st->m_docId, styleLink ); 

		// a link to alexa
		if ( f->getUrlLen() > 5 ) {
			sb->safePrintf( " - <a href=\"http:"
					 "//web.archive.org/web/*/%s\""
					 " style=\"%s\">"
					 "[older copies]</a>" ,
					 f->getUrl(), styleLink );
		}

		if (st->m_noArchive){
			sb->safePrintf( " - <span style=\"%s\"><b>"
				     "[NOARCHIVE]</b></span>",
				     styleTell );
		}
		if (st->m_isBanned){
			sb->safePrintf(" - <span style=\"%s\"><b>"
				     "[BANNED]</b></span>",
				     styleTell );
		}

		// only print this if we got a query
		if ( qlen > 0 ) {
			sb->safePrintf("<br/><br/><span style=\"%s\"> "
				   "These search terms have been "
				   "highlighted:  ",
				   styleText );
			//p += gbstrlen ( p );
		}
		
	}

	// how much space left in p?
	//long avail = bufEnd - p;
	// . make the url that we're outputting for (like in PageResults.cpp)
	// . "thisUrl" is the baseUrl for click & scroll
	char thisUrl[MAX_URL_LEN];
	char *thisUrlEnd = thisUrl + MAX_URL_LEN;
	char *x = thisUrl;
	// . use the external ip of our gateway
	// . construct the NAT mapped port
	// . you should have used iptables to map port to the correct
	//   internal ip:port
	//unsigned long  ip   =g_conf.m_mainExternalIp  ; // h->m_externalIp;
	//unsigned short port=g_conf.m_mainExternalPort;//h->m_externalHttpPort
	// local check
	//if ( st->m_isLocal ) {
	unsigned long  ip   = h->m_ip;
	unsigned short port = h->m_httpPort;
	//}
	//sprintf ( x , "http://%s:%li/get?q=" , iptoa ( ip ) , port );
	// . we no longer put the port in here
	// . but still need http:// since we use <base href=>
	if (port == 80) sprintf(x,"http://%s/get?q=",iptoa(ip));
	else            sprintf(x,"http://%s:%hu/get?q=",iptoa(ip),port);
	x += gbstrlen ( x );
	// the query url encoded
	long elen = urlEncode ( x , thisUrlEnd - x , q , qlen );
	x += elen;
	// separate cgi vars with a &
	//sprintf ( x, "&seq=%li&rtq=%lid=%lli",
	//	  (long)st->m_seq,(long)st->m_rtq,st->m_msg22.getDocId());
	sprintf ( x, "&d=%lli",st->m_docId );
	x += gbstrlen(x);		
	// set our query for highlighting
	Query qq;
	qq.set2 ( q, st->m_langId , true );

	// print the query terms into our highlight buffer
	Highlight hi;
	// make words so we can set the scores to ignore fielded terms
	Words qw;
	qw.set ( q            ,  // content being highlighted, utf8
		 qlen         ,  // content being highlighted, utf8
		 TITLEREC_CURRENT_VERSION,
		 true         ,  // computeIds
		 false        ); // hasHtmlEntities?
	// . assign scores of 0 to query words that should be ignored
	// . TRICKY: loop over words in qq.m_qwords, but they should be 1-1
	//   with words in qw.
	// . sanity check
	//if ( qw.getNumWords() != qq.m_numWords ) { char *xx = NULL; *xx = 0;}
	// declare up here
	Matches m;
	// do the loop
	//Scores ss;
	//ss.set ( &qw , NULL );
	//for ( long i = 0 ; i < qq.m_numWords ; i++ )
	//	if ( ! m.matchWord ( &qq.m_qwords[i],i ) ) ss.m_scores[i] = 0;
	// now set m.m_matches[] to those words in qw that match a query word
	// or phrase in qq.
	m.setQuery ( &qq );
	//m.addMatches ( &qw , &ss , true );
	m.addMatches ( &qw );
	long hilen = 0;

	// CNS: if ( ! st->m_clickNScroll ) {
	// and highlight the matches
	if ( printDisclaimer ) {
		hilen = hi.set ( //p       ,
				 //avail   ,
				sb ,
				 &qw     , // words to highlight
				 &m      , // matches relative to qw
				 false   , // doSteming
				 false   , // st->m_clickAndScroll , 
				 (char *)thisUrl );// base url for ClcknScrll
		//p += hilen;
		// now an hr
		//memcpy ( p , "</span></table></table>\n" , 24 );   p += 24;
		sb->safeStrcpy("</span></table></table>\n");
	}


	bool includeHeader = st->m_includeHeader;

	// do not show header for json object display
	if ( xd->m_contentType == CT_JSON )
		includeHeader = false;

	if ( format == FORMAT_XML ) includeHeader = false;
	if ( format == FORMAT_JSON ) includeHeader = false;

	//mfree(uq, uqCapacity, "PageGet");
	// undo the header writes if we should
	if ( ! includeHeader ) {
		// including base href is off by default when not including
		// the header, so the caller must explicitly turn it back on
		if ( st->m_includeBaseHref ) sb->m_length=startLen2;//p=start2;
		else                         sb->m_length=startLen1;//p=start1;
	}

	//sb->safeStrcpy(tbuf);



	if ( format == FORMAT_XML ) {
		sb->safePrintf("<response>\n");
		sb->safePrintf("<statusCode>0</statusCode>\n");
		sb->safePrintf("<statusMsg>Success</statusMsg>\n");
		sb->safePrintf("<url><![CDATA[");
		sb->cdataEncode(xd->m_firstUrl.m_url);
		sb->safePrintf("]]></url>\n");
		sb->safePrintf("<docId>%llu</docId>\n",xd->m_docId);
		sb->safePrintf("\t<cachedTimeUTC>%lu</cachedTimeUTC>\n",
			      lastSpiderDate);
		sb->safePrintf("\t<cachedTimeStr>%s</cachedTimeStr>\n",tbuf);
	}

	if ( format == FORMAT_JSON ) {
		sb->safePrintf("{\"response\":{\n");
		sb->safePrintf("\t\"statusCode\":0,\n");
		sb->safePrintf("\t\"statusMsg\":\"Success\",\n");
		sb->safePrintf("\t\"url\":\"");
		sb->jsonEncode(xd->m_firstUrl.m_url);
		sb->safePrintf("\",\n");
		sb->safePrintf("\t\"docId\":%llu,\n",xd->m_docId);
		sb->safePrintf("\t\"cachedTimeUTC\":%lu,\n",lastSpiderDate);
		sb->safePrintf("\t\"cachedTimeStr\":\"%s\",\n",tbuf);
	}

	// identify start of <title> tag we wrote out
	char *sbstart = sb->getBufStart();
	char *sbend   = sb->getBufEnd();
	char *titleStart = NULL;
	char *titleEnd   = NULL;
	for ( char *t = sbstart ; t < sbend ; t++ ) {
		// title tag?
		if ( t[0]!='<' ) continue;
		if ( to_lower_a(t[1])!='t' ) continue;
		if ( to_lower_a(t[2])!='i' ) continue;
		if ( to_lower_a(t[3])!='t' ) continue;
		if ( to_lower_a(t[4])!='l' ) continue;
		if ( to_lower_a(t[5])!='e' ) continue;
		// point to it
		char *x = t + 5;
		// max - to keep things fast
		char *max = x + 500;
		for ( ; *x && *x != '>' && x < max ; x++ );
		x++;
		// find end
		char *e = x;
		for ( ; *e && e < max ; e++ ) {
			if ( e[0]=='<' &&
			     to_lower_a(e[1])=='/' &&
			     to_lower_a(e[2])=='t' &&
			     to_lower_a(e[3])=='i' &&
			     to_lower_a(e[4])=='t' &&
			     to_lower_a(e[5])=='l' &&
			     to_lower_a(e[6])=='e' )
				break;
		}
		if ( e < max ) {
			titleStart = x;
			titleEnd   = e;
		}
		break;
	}

	// . print title at top!
	// . consider moving
	if ( titleStart ) {

		char *ebuf = st->m_r.getString("eb");
		if ( ! ebuf ) ebuf = "";

		//p += sprintf ( p , 
		sb->safePrintf(
			       "<table border=1 "
			       "cellpadding=10 "
			       "cellspacing=0 "
			       "width=100%% "
			       "color=#ffffff>" );

		long printLinks = st->m_r.getLong("links",0);

		if ( ! printDisclaimer && printLinks )
			sb->safePrintf(//p += sprintf ( p , 
				       // first put cached and live link
				       "<tr>"
				       "<td bgcolor=lightyellow>"
				       // print cached link
				       //"<center>"
				       "&nbsp; "
				       "<b>"
				       "<a "
				       "style=\"font-size:18px;font-weight:600;"
				       "color:#000000;\" "
				       "href=\""
				       "/get?"
				       "c=%s&d=%lli&qh=0&cnsp=1&eb=%s\">"
				       "cached link</a>"
				       " &nbsp; "
				       "<a "
				       "style=\"font-size:18px;font-weight:600;"
				       "color:#000000;\" "
				       "href=%s>live link</a>"
				       "</b>"
				       //"</center>"
				       "</td>"
				       "</tr>\n"
				       ,st->m_coll
				       ,st->m_docId 
				       ,ebuf
				       ,thisUrl // st->ptr_ubuf
				       );

		if ( printLinks ) {
			sb->safePrintf(//p += sprintf ( p ,
				       "<tr><td bgcolor=pink>"
				       "<span style=\"font-size:18px;"
				       "font-weight:600;"
				       "color:#000000;\">"
				       "&nbsp; "
				       "<b>PAGE TITLE:</b> "
				       );
			long tlen = titleEnd - titleStart;
			sb->safeMemcpy ( titleStart , tlen );
			sb->safePrintf ( "</span></td></tr>" );
		}

		sb->safePrintf( "</table><br>\n" );

	}

	// is the content preformatted?
	bool pre = false;
	char ctype = (char)xd->m_contentType;
	if ( ctype == CT_TEXT ) pre = true ; // text/plain
	if ( ctype == CT_DOC  ) pre = true ; // filtered msword
	if ( ctype == CT_PS   ) pre = true ; // filtered postscript

	if ( format == FORMAT_XML ) pre = false;
	if ( format == FORMAT_JSON ) pre = false;

	// if it is content-type text, add a <pre>
	if ( pre ) {//p + 5 < bufEnd && pre ) {
		sb->safePrintf("<pre>");
		//p += 5;
	}

	if ( st->m_strip == 1 )
		contentLen = stripHtml( content, contentLen, 
					(long)xd->m_version, st->m_strip );
	// it returns -1 and sets g_errno on error, line OOM
	if ( contentLen == -1 ) {
		//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );	
		return sendErrorReply ( st , g_errno );
	}

	Xml xml;
	Words ww;

	// if no highlighting, skip it
	bool queryHighlighting = st->m_queryHighlighting;
	if ( st->m_strip == 2 ) queryHighlighting = false;

	// do not do term highlighting if json
	if ( xd->m_contentType == CT_JSON )
		queryHighlighting = false;

	SafeBuf tmp;
	SafeBuf *xb = sb;
	if ( format == FORMAT_XML ) xb = &tmp;
	if ( format == FORMAT_JSON ) xb = &tmp;
	

	if ( ! queryHighlighting ) {
		xb->safeMemcpy ( content , contentLen );
		//p += contentLen ;
	}
	else {
		// get the content as xhtml (should be NULL terminated)
		//Words *ww = xd->getWords();
		if ( ! xml.set ( content , contentLen , false ,
				 0 , false , TITLEREC_CURRENT_VERSION ,
				 false , 0 , CT_HTML ) ) { // niceness is 0
			//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
			return sendErrorReply ( st , g_errno );
		}			
		if ( ! ww.set ( &xml , true , 0 ) ) { // niceness is 0
			//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
			return sendErrorReply ( st , g_errno );
		}
		// sanity check
		//if ( ! xd->m_wordsValid ) { char *xx=NULL;*xx=0; }
		// how much space left in p?
		//avail = bufEnd - p;

		Matches m;
		m.setQuery ( &qq );
		m.addMatches ( &ww );
		hilen = hi.set ( xb , // p , avail , 
				 &ww , &m ,
				 false /*doStemming?*/ ,  
				 st->m_clickAndScroll , 
				 thisUrl /*base url for click & scroll*/);
		//p += hilen;
		log(LOG_DEBUG, "query: Done highlighting cached page content");
	}


	if ( format == FORMAT_XML ) {
		sb->safePrintf("\t<content><![CDATA[");
		sb->cdataEncode ( xb->getBufStart() );
		sb->safePrintf("]]></content>\n");
		sb->safePrintf("</response>\n");
	}

	if ( format == FORMAT_JSON ) {
		sb->safePrintf("\t\"content\":\"\n");
		sb->jsonEncode ( xb->getBufStart() );
		sb->safePrintf("\"\n}\n}\n");
	}


	// if it is content-type text, add a </pre>
	if ( pre ) { // p + 6 < bufEnd && pre ) {
		sb->safeMemcpy ( "</pre>" , 6 );
		//p += 6;
	}

	// calculate bufLen
	//long bufLen = p - buf;

	long ct = xd->m_contentType;

	// now filter the entire buffer to escape out the xml tags
	// so it is displayed nice
	SafeBuf newbuf;

	if ( ct == CT_XML ) {
		// encode the xml tags into &lt;tagname&gt; sequences
		if ( !newbuf.htmlEncodeXmlTags ( sb->getBufStart() ,
						 sb->getLength(),
						 0)){// niceness=0
			//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );
			return sendErrorReply ( st , g_errno );
		}
		// free out buffer that we alloc'd before returning since this
		// should have copied it into another buffer
		//if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" );	
		// reassign
		//buf    = newbuf.getBufStart();
		//bufLen = newbuf.length();
		sb->stealBuf ( &newbuf );
	}

	// now encapsulate it in html head/tail and send it off
	// sendErr:
	contentType = "text/html";
	if ( strip == 2 ) contentType = "text/xml";
	// xml is usually buggy and this throws browser off
	//if ( ctype == CT_XML ) contentType = "text/xml";

	if ( xd->m_contentType == CT_JSON )
		contentType = "application/json";

	if ( format == FORMAT_XML ) contentType = "text/xml";
	if ( format == FORMAT_JSON ) contentType = "application/json";

	// safebuf, sb, is a member of "st" so this should copy the buffer
	// when it constructs the http reply, and we gotta call delete(st)
	// AFTER this so sb is still valid.
	bool status = g_httpServer.sendDynamicPage (s,
						    //buf,bufLen,
						    sb->getBufStart(),
						    sb->getLength(),
						    -1,false,
						    contentType,
						     -1, NULL, "utf8" );

	// nuke state2
	mdelete ( st , sizeof(State2) , "PageGet1" );
	delete (st);


	// free out buffer that we alloc'd before returning since this
	// should have copied it into another buffer

	//if      ( ct == CT_XML ) newbuf.purge();
	//else if ( buf          ) mfree ( buf , bufMaxSize , "PageGet2" );
	
	// and convey the status
	return status;
}
void printTcpTable ( SafeBuf* p, char *title, TcpServer *server ) {
	// table headers for urls current being spiderd
	p->safePrintf ( "<table %s>"
		       "<tr class=hdrow><td colspan=19>"
		       "<center>"
		       //"<font size=+1>"
		       "<b>%s</b>"
		       //"</font>"
		       "</td></tr>"
		       "<tr bgcolor=#%s>"
		       "<td><b>#</td>"
		       "<td><b>fd</td>"
		       "<td><b>age</td>"
		       "<td><b>idle</td>"
		       //"<td><b>timeout</td>"
		       "<td><b>ip</td>"
		       "<td><b>port</td>"
		       "<td><b>state</td>"
		       "<td><b>bytes read</td>"
		       "<td><b>bytes to read</td>"
		       "<td><b>bytes sent</td>"
		       "<td><b>bytes to send</td>"
		       "</tr>\n"
			, TABLE_STYLE
			, title 
			, DARK_BLUE
			);
	// current time in milliseconds
	long long now = gettimeofdayInMilliseconds();
	// store in buffer for sorting
	long       times[MAX_TCP_SOCKS];
	TcpSocket *socks[MAX_TCP_SOCKS];
	long nn = 0;
	for ( long i = 0 ; i<=server->m_lastFilled && nn<MAX_TCP_SOCKS; i++ ) {
		// get the ith socket
		TcpSocket *s = server->m_tcpSockets[i];
		// continue if empty
		if ( ! s ) continue;
		// store it
		times[nn] = now - s->m_startTime;
		socks[nn] = s;
		nn++;
	}
	// bubble sort
 keepSorting:
	// assume no swap will happen
	bool didSwap = false;
	for ( long i = 1 ; i < nn ; i++ ) {
		if ( times[i-1] >= times[i] ) continue;
		long       tmpTime = times[i-1];
		TcpSocket *tmpSock = socks[i-1]; 
		times[i-1] = times[i];
		socks[i-1] = socks[i];
		times[i  ] = tmpTime;
		socks[i  ] = tmpSock;
		didSwap = true;
	}
	if ( didSwap ) goto keepSorting;

	// now fill in the columns
	for ( long i = 0 ; i < nn ; i++ ) {
		// get the ith socket
		TcpSocket *s = socks[i];
		// set socket state
		char *st = "ERROR";
		switch ( s->m_sockState ) {
		case ST_AVAILABLE:  st="available";  break;
		//case ST_CLOSED:     st="closed";     break;
		case ST_CONNECTING: st="connecting"; break;
		case ST_READING:    st="reading";    break;
		case ST_SSL_ACCEPT:    st="ssl accept";    break;
		case ST_SSL_SHUTDOWN:    st="ssl shutdown";    break;
		case ST_WRITING:    st="sending";    break;
		case ST_NEEDS_CLOSE:    st="needs close";    break;
		case ST_CLOSE_CALLED:    st="close called";    break;
		}
		// bgcolor is lighter for incoming requests
		char *bg = "c0c0f0";
		if ( s->m_isIncoming ) bg = "e8e8ff";
		// times
		long elapsed1 = now - s->m_startTime      ;
		long elapsed2 = now - s->m_lastActionTime ;
		p->safePrintf ("<tr bgcolor=#%s>"
			       "<td>%li</td>" // i
			       "<td>%i</td>" // fd
			       "<td>%lims</td>"  // elapsed seconds since start
			       "<td>%lims</td>"  // last action
			       //"<td>%li</td>"  // timeout			  
			       "<td>%s</td>"  // ip
			       "<td>%hu</td>" // port
			       "<td>%s</td>"  // state
			       "<td>%li</td>" // bytes read
			       "<td>%li</td>" // bytes to read
			       "<td>%li</td>" // bytes sent
			       "<td>%li</td>" // bytes to send
			       "</tr>\n" ,
			       bg ,
			       i,
			       s->m_sd ,
			       elapsed1,
			       elapsed2,
			       //s->m_timeout ,
			       iptoa(s->m_ip) ,
			       s->m_port ,
			       st ,
			       s->m_readOffset ,
			       s->m_totalToRead ,
			       s->m_sendOffset  ,
			       s->m_totalToSend );
	}
	// end the table
	p->safePrintf ("</table><br>\n" );
}