bool getUrl( char *path , void (* callback) (void *state, TcpSocket *sock) ) { SafeBuf sb; sb.safePrintf ( "http://%s:%li%s" , iptoa(g_hostdb.m_myHost->m_ip) , (long)g_hostdb.m_myHost->m_port , path ); Url u; u.set ( sb.getBufStart() ); if ( ! g_httpServer.getDoc ( u.getUrl() , 0 , // ip 0 , // offset -1 , // size 0 , // ifmodsince NULL , callback , 60*1000, // timeout 0, // proxyip 0, // proxyport -1, // maxtextdoclen -1, // maxotherdoclen NULL ) ) // useragent return false; // error? log("qa: getUrl error: %s",mstrerror(g_errno)); return true; }
static INT_PTR CALLBACK icqUserInfoDlgProc(HWND hWnd, UINT msg, WPARAM wParam, LPARAM lParam) { LPNMHDR hdr; switch (msg) { case WM_INITDIALOG: TranslateDialogDefault(hWnd); return TRUE; case WM_NOTIFY: hdr = (LPNMHDR)lParam; if (hdr->idFrom == 0 && hdr->code == PSN_INFOCHANGED) { char buffer[64]; unsigned long ip, port; MCONTACT hContact = (MCONTACT)((LPPSHNOTIFY)lParam)->lParam; _itoa(db_get_dw(hContact, protoName, "UIN", 0), buffer, 10); setTextValue(hWnd, IDC_INFO_UIN, buffer); ip = db_get_dw(hContact, protoName, "IP", 0); setTextValue(hWnd, IDC_INFO_IP, ip ? iptoa(ip) : NULL); ip = db_get_dw(hContact, protoName, "RealIP", 0); setTextValue(hWnd, IDC_INFO_REALIP, ip ? iptoa(ip) : NULL); port = db_get_w(hContact, protoName, "Port", 0); _itoa(port, buffer, 10); setTextValue(hWnd, IDC_INFO_PORT, port ? buffer : NULL); setTextValue(hWnd, IDC_INFO_VERSION, NULL); setTextValue(hWnd, IDC_INFO_MIRVER, NULL); setTextValue(hWnd, IDC_INFO_PING, NULL); } break; case WM_COMMAND: if (LOWORD(wParam) == IDCANCEL) SendMessage(GetParent(hWnd), msg, wParam, lParam); break; } return FALSE; }
uint8_t REDFLY::socketConnect(uint8_t proto, uint8_t *ip, uint16_t port, uint16_t lport) { uint8_t ret=INVALID_SOCKET, len; //ip iptoa(ip, (char*)buffer); //port strcat_P((char*)buffer, PSTR(",")); len = strlen((char*)buffer); uitoa(port, (char*)&buffer[len]); //local port strcat_P((char*)buffer, PSTR(",")); len = strlen((char*)buffer); uitoa(lport, (char*)&buffer[len]); if(proto == PROTO_MCAST) //Multicast { proto = SOCKET_MCAST; if(cmd(buffer, 8, PSTR(CMD_MCAST), (char*)buffer) == 0) //xxx.xxx.xxx.xxx,aaaaa,bbbbb { ret = buffer[2]; //OKx } } else if(proto == PROTO_TCP) //TCP { proto = SOCKET_TCP; if(cmd(buffer, 8, PSTR(CMD_TCP), (char*)buffer) == 0) //xxx.xxx.xxx.xxx,aaaaa,bbbbb { ret = buffer[2]; //OKx } } else //UDP { proto = SOCKET_UDP; if(cmd(buffer, 8, PSTR(CMD_UDP), (char*)buffer) == 0) //xxx.xxx.xxx.xxx,aaaaa,bbbbb { ret = buffer[2]; //OKx } } if(ret != INVALID_SOCKET) //handle okay -> save socket handle and type { for(uint8_t i=0; i<MAX_SOCKETS; i++) { if(socket_state[i].handle == INVALID_SOCKET) { socket_state[i].handle = ret; socket_state[i].state = proto; break; } } } return ret; }
// . returns false if blocked, true otherwise // . sets g_errno on error bool sendPageInject ( TcpSocket *s , HttpRequest *r ) { // get the collection long collLen = 0; char *coll = r->getString ( "c" , &collLen , NULL /*default*/); // get collection rec CollectionRec *cr = g_collectiondb.getRec ( coll ); // bitch if no collection rec found if ( ! cr ) { g_errno = ENOCOLLREC; log("build: Injection from %s failed. " "Collection \"%s\" does not exist.", iptoa(s->m_ip),coll); return g_httpServer.sendErrorReply(s,500, "collection does not exist"); } // make a new state Msg7 *msg7; try { msg7= new (Msg7); } catch ( ... ) { g_errno = ENOMEM; log("PageInject: new(%i): %s", sizeof(Msg7),mstrerror(g_errno)); return g_httpServer.sendErrorReply(s,500,mstrerror(g_errno));} mnew ( msg7, sizeof(Msg7) , "PageInject" ); msg7->m_socket = s; msg7->m_isScrape = false; // a scrape request? char *qts = r->getString("qts",NULL); if ( qts && ! qts[0] ) qts = NULL; if ( qts ) { // qts is html encoded? NO! fix that below then... //char *uf="http://www.google.com/search?num=50&" // "q=%s&scoring=d&filter=0"; strncpy(msg7->m_coll,coll,MAX_COLL_LEN); msg7->m_isScrape = true; msg7->m_qbuf.safeStrcpy(qts); msg7->m_linkDedupTable.set(4,0,512,NULL,0,false,0,"ldtab"); msg7->m_useAhrefs = r->getLong("useahrefs",0); // default to yes, injectlinks.. no default to no msg7->m_injectLinks = r->getLong("injectlinks",0); if ( ! msg7->scrapeQuery ( ) ) return false; return sendReply ( msg7 ); } if ( ! msg7->inject ( s , r , msg7 , sendReplyWrapper ) ) return false; // it did not block, i gues we are done return sendReply ( msg7 ); }
int PageNetTest::openSock( long num, long type, struct sockaddr_in *name, long port ) { // set up our socket int sock = socket ( AF_INET, SOCK_DGRAM , 0 ); if ( sock < 0 ) { log( "net: nettest: socket-%s",strerror(errno) ); return false; } // reset it all just to be safe bzero((char *)name, sizeof(*name)); name->sin_family = AF_INET; name->sin_addr.s_addr = 0; /*INADDR_ANY;*/ name->sin_port = htons(port); // we want to re-use port it if we need to restart int options = 1; if ( setsockopt(sock, SOL_SOCKET, SO_REUSEADDR , &options,sizeof(options)) < 0 ) { log( "net: nettest: setsockopt-%s", strerror(errno) ); return -1; } if( type == TEST_READ ) { struct timeval timeo; timeo.tv_sec = 0; timeo.tv_usec = 500000; if ( setsockopt(sock, SOL_SOCKET, SO_RCVTIMEO, &timeo,sizeof(timeo)) < 0 ) { log( "net: nettest: setsockopt-%s", strerror(errno) ); return -1; } } // bind this name to the socket if ( bind ( sock, (struct sockaddr *)name, sizeof(*name)) < 0) { close ( sock ); log( "net: nettest: bind on port %lu: %s", port, strerror(errno) ); return -1; } if( type == TEST_SEND ) { m_to.sin_family = AF_INET; m_to.sin_addr.s_addr = m_testIp[num]; m_to.sin_port = htons ( port );//2000 ) ; // m_port ); bzero ( &(m_to.sin_zero) , 8 ); } log( LOG_DEBUG, "net: nettest: open socket for %s on port %ld to %s", (type == TEST_SEND)?"sending":"receiving", port, iptoa(m_testIp[num]) ); return sock; }
// . check this ip in the list of admin ips bool Conf::isMasterIp ( uint32_t ip ) { //if ( m_numMasterIps == 0 ) return false; //if ( m_numConnectIps == 0 ) return false; if ( m_connectIps.length() <= 0 ) return false; // for ( int32_t i = 0 ; i < m_numConnectIps ; i++ ) // if ( m_connectIps[i] == (int32_t)ip ) // return true; //if ( ip == atoip("10.5.0.2",8) ) return true; char *p = iptoa(ip); char *buf = m_connectIps.getBufStart(); return isInWhiteSpaceList ( p , buf ); }
/* create a client connection to a specified host Inputs: <iface> the network interface that the server binds <port> the listening port Returns: -1 on failure, otherwise the socket */ int tcp_startup_server(unsigned int iface, int port) { int sockfd; /* create a socket */ sockfd = tcp_open_socket (1); if (sockfd < 0) return -1; /* bind the socket on to a specified interface */ if (bind_interface (sockfd, iface, port) == -1) return -2; if (listen (sockfd, 5) < 0) return -3; dbgprintf ("startup a TCP server %s at port %d fd %d",iptoa (iface),port, sockfd); return sockfd; }
void ifprint(pcap_if_t* d) { pcap_addr_t *a; for(a=d->addresses; a; a = a->next) { printf("Address family:#%d\n", a->addr->sa_family); switch(a->addr->sa_family) { case AF_INET: printf("Address family is AF_INET\n"); if(a->addr) // printf("\tAddress: %s", iptoa(((struct sockaddr_in*)a->addr)->sin_addr.s_addr); break; default: printf("Address family unknow\n"); break; } } }
// returns false if blocked, true otherwise, like on quick connect error bool getUrl( char *path , long checkCRC = 0 , char *post = NULL ) { SafeBuf sb; sb.safePrintf ( "http://%s:%li%s" , iptoa(g_hostdb.m_myHost->m_ip) , (long)g_hostdb.m_myHost->m_httpPort , path ); s_checkCRC = checkCRC; bool doPost = true; if ( strncmp ( path , "/search" , 7 ) == 0 ) doPost = false; //Url u; s_url.set ( sb.getBufStart() ); log("qa: getting %s",sb.getBufStart()); if ( ! g_httpServer.getDoc ( s_url.getUrl() , 0 , // ip 0 , // offset -1 , // size 0 , // ifmodsince NULL , gotReplyWrapper, 999999*1000, // timeout ms 0, // proxyip 0, // proxyport -1, // maxtextdoclen -1, // maxotherdoclen NULL , // useragent "HTTP/1.0" , // protocol doPost , // doPost NULL , // cookie NULL , // additionalHeader NULL , // fullRequest post ) ) return false; // error? processReply ( NULL , 0 ); //log("qa: getUrl error: %s",mstrerror(g_errno)); return true; }
void handle_whois(puser users, int socket, char * buffer) { char * name = malloc(MAXLEN); char timebuf[20]; int k; memset(name, '0', MAXLEN); sscanf(buffer, "/whois %s", name); k = search_name(users, name); if (k != -1) { time_t time = users[k].timez; strftime(timebuf, 20, "%Y/%m/%d@%H:%M:%S", localtime(&time)); sprintf(buffer, "[Server] : %s is connected since %s with IP address %s and port number %i\n", name, timebuf, iptoa(users[k].ip), users[k].port); } else { sprintf(buffer, "[Server] : %s appears not to be a logged on user\n", name); } do_write(socket, buffer); }
void gotReply ( void *state , TcpSocket *s ) { // send another Msg28 *THIS = (Msg28 *)state; // count em THIS->m_numReplies++; // do not free send buffer s->m_sendBuf = NULL; // debug Host *h = g_hostdb.getTcpHost ( s->m_ip , s->m_port ); //if (THIS->m_sendToProxy) // h = g_hostdb.getProxyFromTcpPort ( s->m_ip , s->m_port ); log(LOG_INIT,"admin: got reply from hostid #%"INT32".",h->m_hostId); //slot->m_readBufSize,h->m_hostId); // log errors if ( g_errno ) { if ( h ) log("admin: Error broadcasting config request to " "hostid #%"INT32" (%s:%"INT32"): %s.", h->m_hostId,iptoa(h->m_ip),(int32_t)s->m_port, mstrerror(g_errno)); else log("admin: Error broadcasting config request: " "%s.",mstrerror(g_errno)); g_errno = 0; } // try to send more if ( ! THIS->doSendLoop ( ) ) return; // do we have all the replies? //if ( THIS->m_numReplies < THIS->m_numRequests ) return; // do not finish until we got them all if ( THIS->m_hostId < 0 && THIS->m_numReplies < THIS->m_sendTotal ) return; if ( THIS->m_hostId >= 0 && THIS->m_hostId2 >= 0 && THIS->m_numReplies < THIS->m_sendTotal ) return; // all done, free the buf here if ( THIS->m_freeBuf ) mfree ( THIS->m_buf , THIS->m_bufSize , "Msg28" ); THIS->m_buf = NULL; // all done if did not block THIS->m_callback ( THIS->m_state ); }
bool Conf::isCollAdmin2 ( TcpSocket *sock , HttpRequest *hr , CollectionRec *cr ) { if ( ! cr ) return false; //int32_t page = g_pages.getDynamicPageNumber(hr); // never for main or dmoz! must be root! if ( strcmp(cr->m_coll,"main")==0 ) return false; if ( strcmp(cr->m_coll,"dmoz")==0 ) return false; if ( ! g_conf.m_useCollectionPasswords) return false; // empty password field? then allow them through if ( cr->m_collectionPasswords.length() <= 0 && cr->m_collectionIps .length() <= 0 ) return true; // a good ip? char *p = iptoa(sock->m_ip); char *buf = cr->m_collectionIps.getBufStart(); if ( isInWhiteSpaceList ( p , buf ) ) return true; // if they got the password, let them in p = hr->getString("pwd"); if ( ! p ) p = hr->getString("password"); if ( ! p ) p = hr->getStringFromCookie("pwd"); if ( ! p ) return false; buf = cr->m_collectionPasswords.getBufStart(); if ( isInWhiteSpaceList ( p , buf ) ) return true; // the very act of just knowing the collname of a guest account // is good enough to update it //if ( strncmp ( cr->m_coll , "guest_" , 6 ) == 0 ) // return true; return false; }
uint8_t REDFLY::socketSendPGM(uint8_t socket, PGM_P stream, uint8_t *ip, uint16_t port) { uint8_t len; uint16_t size = strlen_P(stream); //socket uitoa(socket, (char*)buffer); //size strcat_P((char*)buffer, PSTR(",")); len = strlen((char*)buffer); uitoa(size, (char*)&buffer[len]); //ip if(ip && (socketState(socket) == SOCKET_UDP)) { strcat_P((char*)buffer, PSTR(",")); len = strlen((char*)buffer); iptoa(ip, (char*)&buffer[len]); } else { strcat_P((char*)buffer, PSTR(",0")); } //port if(port && (socketState(socket) == SOCKET_UDP)) { strcat_P((char*)buffer, PSTR(",")); len = strlen((char*)buffer); uitoa(port, (char*)&buffer[len]); } else { strcat_P((char*)buffer, PSTR(",0")); } //data strcat_P((char*)buffer, PSTR(",")); return cmd(PSTR(CMD_SEND), (char*)buffer, stream); //x,xxxx,xxx.xxx.xxx.xxx,xxxxx, }
bool PageNetTest::collectResults() { CollectionRec *cr = g_collectiondb.getRec ( m_coll ); if( m_numResultsSent >= g_hostdb.getNumHosts() ) return true; char temp[64]; long ip = g_hostdb.getHost( m_numResultsSent )->m_ip; long port = g_hostdb.getHost( m_numResultsSent )->m_httpPort; //long len = 0; sprintf(temp, "http://%s:%li/get?rnettest=1", iptoa(ip), port); log( LOG_DEBUG, "net: nettest: queried results from: %s", temp ); //Url u; //u.set( temp, len ); m_numResultsSent++; if ( ! g_httpServer.getDoc ( temp ,// &u , 0 , // ip 0 , //offset -1 , //size 0 , //modifiedSince this , //state gotResultsWrapper , //callback 30*1000 , //timeout cr->m_proxyIp , //proxyIp cr->m_proxyPort , //proxyPort 200 , //maxTextLen 200 ) ) return false; if ( g_errno ) { g_errno = 0; return gotResults ( NULL ); } return true; }
bool Msg12::confirmLockAcquisition ( ) { // ensure not in use. not msg12 replies outstanding. if ( m_numRequests != m_numReplies ) { char *xx=NULL;*xx=0; } // no longer use this char *xx=NULL;*xx=0; // we are now removing m_confirming = true; // make that the request // . point to start of the 12 byte request buffer // . m_lockSequence should still be valid ConfirmRequest *cq = &m_confirmRequest; char *request = (char *)cq; int32_t requestSize = sizeof(ConfirmRequest); // sanity if ( requestSize == sizeof(LockRequest)){ char *xx=NULL;*xx=0; } // set it cq->m_collnum = m_collnum; cq->m_doledbKey = m_doledbKey; cq->m_firstIp = m_firstIp; cq->m_lockKeyUh48 = m_lockKeyUh48; cq->m_maxSpidersOutPerIp = m_maxSpidersOutPerIp; // . use the locking group from when we sent the lock request // . get ptr to list of hosts in the group //Host *hosts = g_hostdb.getGroup ( m_lockGroupId ); // the same group (shard) that has the spiderRequest/Reply is // the one responsible for locking. Host *hosts = g_hostdb.getMyShard(); // this must select the same shard that is going to spider it! // i.e. our shard! because we check our local lock table to see // if a doled url is locked before spidering it ourselves. //Host *hosts = g_hostdb.getMyShard(); // shortcut UdpServer *us = &g_udpServer; // get # of hosts in each mirror group int32_t hpg = g_hostdb.getNumHostsPerShard(); // reset counts m_numRequests = 0; m_numReplies = 0; // note it if ( g_conf.m_logDebugSpider ) log("spider: confirming lock for uh48=%" PRIu64" firstip=%s", m_lockKeyUh48,iptoa(m_firstIp)); // loop over hosts in that shard for ( int32_t i = 0 ; i < hpg ; i++ ) { // get a host Host *h = &hosts[i]; // skip if dead! no need to get a reply from dead guys if ( g_hostdb.isDead ( h ) ) continue; // send request to him if ( ! us->sendRequest ( request , // a size of 2 should mean confirm requestSize , 0x12 , // msgType h->m_ip , h->m_port , h->m_hostId , NULL , // retSlotPtrPtr this , // state data gotLockReplyWrapper , udpserver_sendrequest_infinite_timeout ) ) // udpserver returns false and sets g_errno on error return true; // count them m_numRequests++; } // block? if ( m_numRequests > 0 ) return false; // did not block return true; }
// . slot should be auto-nuked upon transmission or error // . TODO: ensure if this sendReply() fails does it really nuke the slot? void gotListWrapper ( void *state , RdbList *listb , Msg5 *msg5xx ) { logTrace( g_conf.m_logTraceMsg0, "BEGIN" ); // get the state State00 *st0 = (State00 *)state; // extract the udp slot and list and msg5 UdpSlot *slot = st0->m_slot; RdbList *list = &st0->m_list; Msg5 *msg5 = &st0->m_msg5; UdpServer *us = st0->m_us; // timing debug if ( g_conf.m_logTimingNet || g_conf.m_logDebugNet ) { //log("Msg0:hndled request %" PRIu64,gettimeofdayInMilliseconds()); int32_t size = -1; if ( list ) size = list->getListSize(); log(LOG_TIMING|LOG_DEBUG, "net: msg0: Handled request for data. " "Now sending data termId=%" PRIu64" size=%" PRId32 " transId=%" PRId32" ip=%s port=%i took=%" PRId64" " "(niceness=%" PRId32").", g_posdb.getTermId(msg5->m_startKey), size,slot->m_transId, iptoa(slot->m_ip),slot->m_port, gettimeofdayInMilliseconds() - st0->m_startTime , st0->m_niceness ); } // on error nuke the list and it's data if ( g_errno ) { mdelete ( st0 , sizeof(State00) , "Msg0" ); delete (st0); // TODO: free "slot" if this send fails log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); us->sendErrorReply ( slot , g_errno ); return; } QUICKPOLL(st0->m_niceness); // point to the serialized list in "list" char *data = list->getList(); int32_t dataSize = list->getListSize(); char *alloc = list->getAlloc(); int32_t allocSize = list->getAllocSize(); // tell list not to free the data since it is a reply so UdpServer // will free it when it destroys the slot list->setOwnData ( false ); // keep track of stats Rdb *rdb = getRdbFromId ( st0->m_rdbId ); if ( rdb ) rdb->sentReplyGet ( dataSize ); // TODO: can we free any memory here??? // keep track of how long it takes to complete the send st0->m_startTime = gettimeofdayInMilliseconds(); // debug point int32_t oldSize = msg5->m_minRecSizes; int32_t newSize = msg5->m_minRecSizes + 20; // watch for wrap around if ( newSize < oldSize ) newSize = 0x7fffffff; if ( dataSize > newSize && list->getFixedDataSize() == 0 && // do not annoy me with these linkdb msgs dataSize > newSize+100 ) log(LOG_LOGIC,"net: msg0: Sending more data than what was " "requested. Ineffcient. Bad engineer. dataSize=%" PRId32" " "minRecSizes=%" PRId32".",dataSize,oldSize); // // for linkdb lists, remove all the keys that have the same IP32 // and store a count of what we removed somewhere // if ( st0->m_rdbId == RDB_LINKDB ) { // store compressed list on itself char *dst = list->m_list; // keep stats int32_t totalOrigLinks = 0; int32_t ipDups = 0; int32_t lastIp32 = 0; char *listEnd = list->getListEnd(); // compress the list for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) { // breathe QUICKPOLL ( st0->m_niceness ); // count it totalOrigLinks++; // get rec char *rec = list->getCurrentRec(); int32_t ip32 = g_linkdb.getLinkerIp_uk((key224_t *)rec ); // same as one before? if ( ip32 == lastIp32 && // are we the last rec? include that for // advancing the m_nextKey in Linkdb more // efficiently. rec + LDBKS < listEnd ) { ipDups++; continue; } // store it gbmemcpy (dst , rec , LDBKS ); dst += LDBKS; // update it lastIp32 = ip32; } // . if we removed one key, store the stats // . caller should recognize reply is not a multiple of // the linkdb key size LDBKS and no its there! if ( ipDups ) { //*(int32_t *)dst = totalOrigLinks; //dst += 4; //*(int32_t *)dst = ipDups; //dst += 4; } // update list parms list->m_listSize = dst - list->m_list; list->m_listEnd = list->m_list + list->m_listSize; data = list->getList(); dataSize = list->getListSize(); } //log("sending replySize=%" PRId32" min=%" PRId32,dataSize,msg5->m_minRecSizes); // . TODO: dataSize may not equal list->getListMaxSize() so // Mem class may show an imblanace // . now g_udpServer is responsible for freeing data/dataSize // . the "true" means to call doneSending_ass() from the signal handler // if need be st0->m_us->sendReply_ass( data, dataSize, alloc, allocSize, slot, st0, doneSending_ass, -1, -1, true ); logTrace( g_conf.m_logTraceMsg0, "END" ); }
uint8_t REDFLY::begin(uint8_t dhcp, uint8_t *ip, uint8_t *dns, uint8_t *gateway, uint8_t *netmask) { uint8_t len; //reset sockets and IP addr socketReset(); memset(ipaddr, 0, sizeof(ipaddr)); memset(buffer, 0, sizeof(buffer)); //dhcp or auto ip if(dhcp == 2) //Auto-IP { if(cmd(buffer, sizeof(buffer), PSTR(CMD_IPCONF IPCONF_AUTOIP)) == 0) //OKMACaddrIPaddrSUBNETGateway { memcpy(&ipaddr[0], &buffer[8], 4); return 0; } return 2; } else if(dhcp) //DHCP { if(cmd(buffer, sizeof(buffer), PSTR(CMD_IPCONF IPCONF_DHCP)) == 0) //OKMACaddrIPaddrSUBNETGateway { memcpy(&ipaddr[0], &buffer[8], 4); return 0; } return 1; } //static ip settings if(dns) { iptoa(dns, (char*)&buffer[0]); cmd(PSTR(CMD_DNSSERVER), (char*)buffer); //set DNS server (FW >= 4.3.0 required) } if(ip) { iptoa(ip, (char*)buffer); memcpy(ipaddr, ip, 4); } else { strcat_P((char*)buffer, PSTR("192.168.0.1")); ipaddr[0] = 192; ipaddr[1] = 168; ipaddr[2] = 0; ipaddr[3] = 1; } if(netmask) { strcat_P((char*)buffer, PSTR(",")); len = strlen((char*)buffer); iptoa(netmask, (char*)&buffer[len]); } else { strcat_P((char*)buffer, PSTR(",255.255.255.0")); } if(gateway) { strcat_P((char*)buffer, PSTR(",")); len = strlen((char*)buffer); iptoa(gateway, (char*)&buffer[len]); } return cmd(PSTR(CMD_IPCONF "0,"), (char*)buffer); //xxx.xxx.xxx.xxx,yyy.yyy.yyy.yyy,zzz.zzz.zzz.zzz }
// . returns false if blocked, true otherwise // . sets errno on error // . make a web page displaying the config of this host // . call g_httpServer.sendDynamicPage() to send it bool sendPageHosts ( TcpSocket *s , HttpRequest *r ) { // don't allow pages bigger than 128k in cache char buf [ 64*1024 ]; //char *p = buf; //char *pend = buf + 64*1024; SafeBuf sb(buf, 64*1024); // XML OR JSON char format = r->getReplyFormat(); // if ( format == FORMAT_XML || format == FORMAT_JSON ) // return sendPageHostsInXmlOrJson( s , r ); // check for a sort request int32_t sort = r->getLong ( "sort", -1 ); // sort by hostid with dead on top by default if ( sort == -1 ) sort = 16; const char *coll = r->getString ( "c" ); //char *pwd = r->getString ( "pwd" ); // check for setnote command int32_t setnote = r->getLong("setnote", 0); int32_t setsparenote = r->getLong("setsparenote", 0); // check for replace host command int32_t replaceHost = r->getLong("replacehost", 0); // check for sync host command int32_t syncHost = r->getLong("synchost", 0); // set note... if ( setnote == 1 ) { // get the host id to change int32_t host = r->getLong("host", -1); if ( host == -1 ) goto skipReplaceHost; // get the note to set int32_t noteLen; const char *note = r->getString("note", ¬eLen, "", 0); // set the note g_hostdb.setNote(host, note, noteLen); } // set spare note... if ( setsparenote == 1 ) { // get the host id to change int32_t spare = r->getLong("spare", -1); if ( spare == -1 ) goto skipReplaceHost; // get the note to set int32_t noteLen; const char *note = r->getString("note", ¬eLen, "", 0); // set the note g_hostdb.setSpareNote(spare, note, noteLen); } // replace host... if ( replaceHost == 1 ) { // get the host ids to swap int32_t rhost = r->getLong("rhost", -1); int32_t rspare = r->getLong("rspare", -1); if ( rhost == -1 || rspare == -1 ) goto skipReplaceHost; // replace g_hostdb.replaceHost(rhost, rspare); } // sync host... if ( syncHost == 1 ) { // get the host id to sync int32_t syncHost = r->getLong("shost", -1); if ( syncHost == -1 ) goto skipReplaceHost; // call sync g_hostdb.syncHost(syncHost, false); } if ( syncHost == 2 ) { // get the host id to sync int32_t syncHost = r->getLong("shost", -1); if ( syncHost == -1 ) goto skipReplaceHost; // call sync g_hostdb.syncHost(syncHost, true); } skipReplaceHost: int32_t refreshRate = r->getLong("rr", 0); if(refreshRate > 0 && format == FORMAT_HTML ) sb.safePrintf("<META HTTP-EQUIV=\"refresh\" " "content=\"%" PRId32"\"\\>", refreshRate); // print standard header // char *pp = sb.getBuf(); // char *ppend = sb.getBufEnd(); // if ( pp ) { if ( format == FORMAT_HTML ) g_pages.printAdminTop ( &sb , s , r ); // sb.incrementLength ( pp - sb.getBuf() ); // } const char *colspan = "30"; //char *shotcol = ""; char shotcol[1024]; shotcol[0] = '\0'; const char *cs = coll; if ( ! cs ) cs = ""; if ( g_conf.m_useShotgun && format == FORMAT_HTML ) { colspan = "31"; //shotcol = "<td><b>ip2</b></td>"; sprintf ( shotcol, "<td><a href=\"/admin/hosts?c=%s" "&sort=2\">" "<b>ping2</b></td></a>", cs); } // print host table if ( format == FORMAT_HTML ) sb.safePrintf ( "<table %s>" "<tr><td colspan=%s><center>" //"<font size=+1>" "<b>Hosts " "(<a href=\"/admin/hosts?c=%s&sort=%" PRId32"&resetstats=1\">" "reset)</a></b>" //"</font>" "</td></tr>" "<tr bgcolor=#%s>" "<td><a href=\"/admin/hosts?c=%s&sort=0\">" "<b>hostId</b></a></td>" "<td><b>host ip</b></td>" "<td><b>shard</b></td>" "<td><b>mirror</b></td>" // mirror # within the shard // i don't remember the last time i used this, so let's // just comment it out to save space //"<td><b>group mask</td>" //"<td><b>ip1</td>" //"<td><b>ip2</td>" //"<td><b>udp port</td>" // this is now more or less obsolete //"<td><b>priority udp port</td>" //"<td><b>dns client port</td>" "<td><b>http port</b></td>" // this is now obsolete since ide channel is. it was used // so that only the guy with the token could merge, // and it made sure that only one merge per ide channel // and per group was going on at any one time for performance // reasons. //"<td><b>token group</td>" //"<td><b>best switch id</td>" //"<td><b>actual switch id</td>" //"<td><b>switch id</td>" // this is now fairly obsolete //"<td><b>ide channel</td>" //"<td><b>HD temps (C)</b></td>" "<td><b>GB version</b></td>" //"<td><b>resends sent</td>" //"<td><b>errors recvd</td>" "<td><b>try agains recvd</b></td>" "<td><a href=\"/admin/hosts?c=%s&sort=3\">" "<b>dgrams resent</b></a></td>" /* MDW: take out for adding new stuff "<td><a href=\"/admin/hosts?c=%s&sort=4\">" "<b>errors recvd</a></td>" "<td><a href=\"/admin/hosts?c=%s&sort=5\">" "<b>ETRY AGAINS recvd</a></td>" "<td><a href=\"/admin/hosts?c=%s&sort=6\">" "<b>dgrams to</a></td>" "<td><a href=\"/admin/hosts?c=%s&sort=7\">" "<b>dgrams from</a></td>" */ // "<td><a href=\"/admin/hosts?c=%s&sort=18\">" // "<b>corrupts</a></td>" // "<td><a href=\"/admin/hosts?c=%s&sort=19\">" // "<b># ooms</a></td>" // "<td><a href=\"/admin/hosts?c=%s&sort=20\">" // "<b>socks closed</a></td>" //"<td><a href=\"/admin/hosts?c=%s&sort=8\">" //"<b>loadavg</a></td>" "<td><a href=\"/admin/hosts?c=%s&sort=13\">" "<b>avg split time</b></a></td>" "<td><b>splits done</b></a></td>" "<td><a href=\"/admin/hosts?c=%s&sort=12\">" "<b>status</b></a></td>" "<td><a href=\"/admin/hosts?c=%s&sort=15\">" "<b>slow reads</b></a></td>" "<td><b>docs indexed</a></td>" "<td><a href=\"/admin/hosts?c=%s&sort=9\">" "<b>mem used</a></td>" "<td><a href=\"/admin/hosts?c=%s&sort=10\">" "<b>cpu used</b></a></td>" "<td><a href=\"/admin/hosts?c=%s&sort=17\">" "<b>disk used</b></a></td>" "<td><a href=\"/admin/hosts?c=%s&sort=14\">" "<b>max ping1</b></a></td>" "<td><a href=\"/admin/hosts?c=%s&sort=11\">" "<b>ping1 age</b></a></td>" //"<td><b>ip1</td>" "<td><a href=\"/admin/hosts?c=%s&sort=1\">" "<b>ping1</b></a></td>" "%s"// "<td><b>ip2</td>" //"<td><b>inSync</td>", //"<td>avg roundtrip</td>" //"<td>std. dev.</td></tr>" "<td><b>note</b></td>", TABLE_STYLE , colspan , cs, sort, DARK_BLUE , cs, cs, cs, cs, cs, cs, cs, cs, cs, cs, cs, shotcol ); // loop through each host we know and print it's stats int32_t nh = g_hostdb.getNumHosts(); // should we reset resends, errorsRecvd and ETRYAGAINS recvd? if ( r->getLong("resetstats",0) ) { for ( int32_t i = 0 ; i < nh ; i++ ) { // get the ith host (hostId) Host *h = g_hostdb.getHost ( i ); h->m_pingInfo.m_totalResends = 0; h->m_errorReplies = 0; h->m_pingInfo.m_etryagains = 0; h->m_dgramsTo = 0; h->m_dgramsFrom = 0; h->m_splitTimes = 0; h->m_splitsDone = 0; h->m_pingInfo.m_slowDiskReads =0; } } // sort hosts if needed int32_t hostSort [ MAX_HOSTS ]; for ( int32_t i = 0 ; i < nh ; i++ ) hostSort [ i ] = i; switch ( sort ) { case 1: gbsort ( hostSort, nh, sizeof(int32_t), pingSort1 ); break; case 2: gbsort ( hostSort, nh, sizeof(int32_t), pingSort2 ); break; case 3: gbsort ( hostSort, nh, sizeof(int32_t), resendsSort ); break; case 4: gbsort ( hostSort, nh, sizeof(int32_t), errorsSort ); break; case 5: gbsort ( hostSort, nh, sizeof(int32_t), tryagainSort ); break; case 6: gbsort ( hostSort, nh, sizeof(int32_t), dgramsToSort ); break; case 7: gbsort ( hostSort, nh, sizeof(int32_t), dgramsFromSort ); break; //case 8: gbsort ( hostSort, nh, sizeof(int32_t), loadAvgSort ); break; case 9: gbsort ( hostSort, nh, sizeof(int32_t), memUsedSort ); break; case 10:gbsort ( hostSort, nh, sizeof(int32_t), cpuUsageSort ); break; case 11:gbsort ( hostSort, nh, sizeof(int32_t), pingAgeSort ); break; case 12:gbsort ( hostSort, nh, sizeof(int32_t), flagSort ); break; case 13:gbsort ( hostSort, nh, sizeof(int32_t), splitTimeSort ); break; case 14:gbsort ( hostSort, nh, sizeof(int32_t), pingMaxSort ); break; case 15:gbsort ( hostSort, nh, sizeof(int32_t), slowDiskSort ); break; case 16:gbsort ( hostSort, nh, sizeof(int32_t), defaultSort ); break; case 17:gbsort ( hostSort, nh, sizeof(int32_t), diskUsageSort ); break; } // we are the only one that uses these flags, so set them now /* static char s_properSet = 0; if ( ! s_properSet ) { s_properSet = 1; g_hostdb.setOnProperSwitchFlags(); } */ if ( format == FORMAT_XML ) { sb.safePrintf("<response>\n"); sb.safePrintf("\t<statusCode>0</statusCode>\n"); sb.safePrintf("\t<statusMsg>Success</statusMsg>\n"); } if ( format == FORMAT_JSON ) { sb.safePrintf("{\"response\":{\n"); sb.safePrintf("\t\"statusCode\":0,\n"); sb.safePrintf("\t\"statusMsg\":\"Success\",\n"); } int64_t nowmsLocal = gettimeofdayInMillisecondsLocal(); // compute majority gb version so we can highlight bad out of sync // gb versions in red below int32_t majorityHash32 = 0; int32_t lastCount = 0; // get majority gb version for ( int32_t si = 0 ; si < nh ; si++ ) { int32_t i = hostSort[si]; // get the ith host (hostId) Host *h = g_hostdb.getHost ( i ); char *vbuf = h->m_pingInfo.m_gbVersionStr;//gbVersionStrBuf; int32_t vhash32 = hash32n ( vbuf ); if ( vhash32 == majorityHash32 ) lastCount++; else lastCount--; if ( lastCount < 0 ) majorityHash32 = vhash32; } // print it //int32_t ng = g_hostdb.getNumGroups(); for ( int32_t si = 0 ; si < nh ; si++ ) { int32_t i = hostSort[si]; // get the ith host (hostId) Host *h = g_hostdb.getHost ( i ); // get avg/stdDev msg roundtrip times in ms for ith host //int32_t avg , stdDev; //g_hostdb.getTimes ( i , &avg , &stdDev ); char ptr[256]; int32_t pingAge = generatePingMsg(h, nowmsLocal, ptr); char pms[64]; if ( h->m_pingMax < 0 ) sprintf(pms,"???"); else sprintf(pms,"%" PRId32"ms",h->m_pingMax); // the sync status ascii-ized char syncStatus = h->m_syncStatus; const char *ptr2; if (syncStatus==0) ptr2 ="<b>N</b>"; else if (syncStatus==1) ptr2 ="Y"; else ptr2 ="?"; char ipbuf1[64]; char ipbuf2[64]; strcpy(ipbuf1,iptoa(h->m_ip)); strcpy(ipbuf2,iptoa(h->m_ipShotgun)); /* char hdbuf[128]; char *hp = hdbuf; for ( int32_t k = 0 ; k < 4 ; k++ ) { int32_t temp = h->m_hdtemps[k]; if ( temp > 50 && format == FORMAT_HTML ) hp += sprintf(hp,"<font color=red><b>%" PRId32 "</b></font>", temp); else hp += sprintf(hp,"%" PRId32,temp); if ( k < 3 ) *hp++ = '/'; *hp = '\0'; } */ char *vbuf = h->m_pingInfo.m_gbVersionStr;//m_gbVersionStrBuf; // get hash int32_t vhash32 = hash32n ( vbuf ); const char *vbuf1 = ""; const char *vbuf2 = ""; if ( vhash32 != majorityHash32 ) { vbuf1 = "<font color=red><b>"; vbuf2 = "</font></b>"; } //int32_t switchGroup = 0; //if ( g_hostdb.m_indexSplits > 1 ) // switchGroup = h->m_group%g_hostdb.m_indexSplits; // host can have 2 ip addresses, get the one most // similar to that of the requester int32_t eip = g_hostdb.getBestIp ( h , s->m_ip ); char ipbuf3[64]; strcpy(ipbuf3,iptoa(eip)); const char *fontTagFront = ""; const char *fontTagBack = ""; if ( h->m_pingInfo.m_percentMemUsed >= 98.0 && format == FORMAT_HTML ) { fontTagFront = "<font color=red>"; fontTagBack = "</font>"; } float cpu = h->m_pingInfo.m_cpuUsage; if ( cpu > 100.0 ) cpu = 100.0; if ( cpu < 0.0 ) cpu = -1.0; char diskUsageMsg[64]; sprintf(diskUsageMsg,"%.1f%%",h->m_pingInfo.m_diskUsage); if ( h->m_pingInfo.m_diskUsage < 0.0 ) sprintf(diskUsageMsg,"???"); if ( h->m_pingInfo.m_diskUsage>=98.0 && format == FORMAT_HTML ) sprintf(diskUsageMsg,"<font color=red><b>%.1f%%" "</b></font>",h->m_pingInfo.m_diskUsage); // split time, don't divide by zero! int32_t splitTime = 0; if ( h->m_splitsDone ) splitTime = h->m_splitTimes / h->m_splitsDone; //char flagString[32]; char tmpfb[64]; SafeBuf fb(tmpfb,64); //char *fs = flagString; //*fs = '\0'; // does its hosts.conf file disagree with ours? if ( h->m_pingInfo.m_hostsConfCRC && format == FORMAT_HTML && h->m_pingInfo.m_hostsConfCRC != g_hostdb.getCRC() ) fb.safePrintf("<font color=red><b title=\"Hosts.conf " "in disagreement with ours.\">H" "</b></font>"); if ( h->m_pingInfo.m_hostsConfCRC && format != FORMAT_HTML && h->m_pingInfo.m_hostsConfCRC != g_hostdb.getCRC() ) fb.safePrintf("Hosts.conf in disagreement with ours"); int32_t flags = h->m_pingInfo.m_flags; if ( format == FORMAT_HTML ) { // use these new ones for now int n = h->m_pingInfo.m_numCorruptDiskReads; if ( n ) fb.safePrintf("<font color=red><b>" "C" "<sup>%" PRId32"</sup>" "</b></font>" , n ); n = h->m_pingInfo.m_numOutOfMems; if ( n ) fb.safePrintf("<font color=red><b>" "O" "<sup>%" PRId32"</sup>" "</b></font>" , n ); n = h->m_pingInfo.m_socketsClosedFromHittingLimit; if ( n ) fb.safePrintf("<font color=red><b>" "K" "<sup>%" PRId32"</sup>" "</b></font>" , n ); if ( flags & PFLAG_OUTOFSYNC ) fb.safePrintf("<font color=red><b>" "N" "</b></font>" ); } // recovery mode? reocvered from coring? if ((flags & PFLAG_RECOVERYMODE)&& format == FORMAT_HTML ) { fb.safePrintf("<b title=\"Recovered from core" "\">x</b>"); // this is only 8-bits at the moment so it's capped // at 255. this level is 1 the first time we core // and are restarted. if ( h->m_pingInfo.m_recoveryLevel > 1 ) fb.safePrintf("<sup>%" PRId32"</sup>", (int32_t) h->m_pingInfo.m_recoveryLevel); } if ((flags & PFLAG_RECOVERYMODE)&& format != FORMAT_HTML ) fb.safePrintf("Recovered from core"); // rebalancing? if ( (flags & PFLAG_REBALANCING)&& format == FORMAT_HTML ) fb.safePrintf("<b title=\"Currently " "rebalancing\">R</b>"); if ( (flags & PFLAG_REBALANCING)&& format != FORMAT_HTML ) fb.safePrintf("Currently rebalancing"); // has recs that should be in another shard? indicates // we need to rebalance or there is a bad hosts.conf if ((flags & PFLAG_FOREIGNRECS) && format == FORMAT_HTML ) fb.safePrintf("<font color=red><b title=\"Foreign " "data " "detected. Needs rebalance.\">F" "</b></font>"); if ((flags & PFLAG_FOREIGNRECS) && format != FORMAT_HTML ) fb.safePrintf("Foreign data detected. " "Needs rebalance."); // if it has spiders going on say "S" with # as the superscript if ((flags & PFLAG_HASSPIDERS) && format == FORMAT_HTML ) fb.safePrintf ( "<span title=\"Spidering\">S" "<sup>%" PRId32"</sup>" "</span>" ,h->m_pingInfo.m_currentSpiders ); if ( format == FORMAT_HTML && h->m_pingInfo.m_udpSlotsInUseIncoming ) { const char *f1 = ""; const char *f2 = ""; // MAXUDPSLOTS in Spider.cpp is 300 right now if ( h->m_pingInfo.m_udpSlotsInUseIncoming >= 300 ) { f1 = "<b>"; f2 = "</b>"; } if ( h->m_pingInfo.m_udpSlotsInUseIncoming >= 400 ) { f1 = "<b><font color=red>"; f2 = "</font></b>"; } fb.safePrintf("<span title=\"udpSlotsInUse\">" "%s" "U" "<sup>%" PRId32"</sup>" "%s" "</span>" ,f1 ,h->m_pingInfo.m_udpSlotsInUseIncoming ,f2 ); } if ( format == FORMAT_HTML && h->m_pingInfo.m_tcpSocketsInUse){ const char *f1 = ""; const char *f2 = ""; if ( h->m_pingInfo.m_tcpSocketsInUse >= 100 ) { f1 = "<b>"; f2 = "</b>"; } if ( h->m_pingInfo.m_tcpSocketsInUse >= 200 ) { f1 = "<b><font color=red>"; f2 = "</font></b>"; } fb.safePrintf("<span title=\"tcpSocketsInUse\">" "%s" "T" "<sup>%" PRId32"</sup>" "%s" "</span>" ,f1 ,h->m_pingInfo.m_tcpSocketsInUse ,f2 ); } if ((flags & PFLAG_HASSPIDERS) && format != FORMAT_HTML ) fb.safePrintf ( "Spidering"); // say "M" if merging if ( (flags & PFLAG_MERGING) && format == FORMAT_HTML ) fb.safePrintf ( "<span title=\"Merging\">M</span>"); if ( (flags & PFLAG_MERGING) && format != FORMAT_HTML ) fb.safePrintf ( "Merging"); // say "D" if dumping if ( (flags & PFLAG_DUMPING) && format == FORMAT_HTML ) fb.safePrintf ( "<span title=\"Dumping\">D</span>"); if ( (flags & PFLAG_DUMPING) && format != FORMAT_HTML ) fb.safePrintf ( "Dumping"); // say "y" if doing the daily merge if ( !(flags & PFLAG_MERGEMODE0) ) fb.safePrintf ( "y"); if ( format == FORMAT_HTML && !h->m_spiderEnabled) { fb.safePrintf("<span title=\"Spider Disabled\" style=\"text-decoration:line-through;\">S</span>"); } if ( format == FORMAT_HTML && !h->m_queryEnabled) { fb.safePrintf("<span title=\"Query Disabled\" style=\"text-decoration:line-through;\">Q</span>"); } // clear it if it is us, this is invalid if ( ! h->m_gotPingReply ) { fb.reset(); fb.safePrintf("??"); } if ( fb.length() == 0 && format == FORMAT_HTML ) fb.safePrintf(" "); fb.nullTerm(); const char *bg = LIGHT_BLUE; if ( h->m_ping >= g_conf.m_deadHostTimeout ) bg = "ffa6a6"; // // BEGIN XML OUTPUT // if ( format == FORMAT_XML ) { sb.safePrintf("\t<host>\n" "\t\t<name><![CDATA[" ); sb.cdataEncode (h->m_hostname); sb.safePrintf("]]></name>\n"); sb.safePrintf("\t\t<shard>%" PRId32"</shard>\n", (int32_t)h->m_shardNum); sb.safePrintf("\t\t<mirror>%" PRId32"</mirror>\n", h->m_stripe); sb.safePrintf("\t\t<ip1>%s</ip1>\n", iptoa(h->m_ip)); sb.safePrintf("\t\t<ip2>%s</ip2>\n", iptoa(h->m_ipShotgun)); sb.safePrintf("\t\t<httpPort>%" PRId32"</httpPort>\n", (int32_t)h->m_httpPort); sb.safePrintf("\t\t<udpPort>%" PRId32"</udpPort>\n", (int32_t)h->m_port); sb.safePrintf("\t\t<dnsPort>%" PRId32"</dnsPort>\n", (int32_t)h->m_dnsClientPort); //sb.safePrintf("\t\t<hdTemp>%s</hdTemp>\n",hdbuf); sb.safePrintf("\t\t<gbVersion>%s</gbVersion>\n",vbuf); sb.safePrintf("\t\t<resends>%" PRId32"</resends>\n", h->m_pingInfo.m_totalResends); /* MDW: take out for new stuff sb.safePrintf("\t\t<errorReplies>%" PRId32"</errorReplies>\n", h->m_errorReplies); */ sb.safePrintf("\t\t<errorTryAgains>%" PRId32 "</errorTryAgains>\n", h->m_pingInfo.m_etryagains); sb.safePrintf("\t\t<udpSlotsInUse>%" PRId32 "</udpSlotsInUse>\n", h->m_pingInfo.m_udpSlotsInUseIncoming); sb.safePrintf("\t\t<tcpSocketsInUse>%" PRId32 "</tcpSocketsInUse>\n", h->m_pingInfo.m_tcpSocketsInUse); /* sb.safePrintf("\t\t<dgramsTo>%" PRId64"</dgramsTo>\n", h->m_dgramsTo); sb.safePrintf("\t\t<dgramsFrom>%" PRId64"</dgramsFrom>\n", h->m_dgramsFrom); */ sb.safePrintf("\t\t<numCorruptDiskReads>%" PRId32 "</numCorruptDiskReads>\n" ,h->m_pingInfo.m_numCorruptDiskReads); sb.safePrintf("\t\t<numOutOfMems>%" PRId32 "</numOutOfMems>\n" ,h->m_pingInfo.m_numOutOfMems); sb.safePrintf("\t\t<numClosedSockets>%" PRId32 "</numClosedSockets>\n" ,h->m_pingInfo. m_socketsClosedFromHittingLimit); sb.safePrintf("\t\t<numOutstandingSpiders>%" PRId32 "</numOutstandingSpiders>\n" ,h->m_pingInfo.m_currentSpiders ); sb.safePrintf("\t\t<splitTime>%" PRId32"</splitTime>\n", splitTime); sb.safePrintf("\t\t<splitsDone>%" PRId32"</splitsDone>\n", h->m_splitsDone); sb.safePrintf("\t\t<status><![CDATA[%s]]></status>\n", fb.getBufStart()); sb.safePrintf("\t\t<slowDiskReads>%" PRId32 "</slowDiskReads>\n", h->m_pingInfo.m_slowDiskReads); sb.safePrintf("\t\t<docsIndexed>%" PRId32 "</docsIndexed>\n", h->m_pingInfo.m_totalDocsIndexed); sb.safePrintf("\t\t<percentMemUsed>%.1f%%" "</percentMemUsed>", h->m_pingInfo.m_percentMemUsed); // float sb.safePrintf("\t\t<cpuUsage>%.1f%%" "</cpuUsage>", cpu ); sb.safePrintf("\t\t<percentDiskUsed><![CDATA[%s]]>" "</percentDiskUsed>", diskUsageMsg); sb.safePrintf("\t\t<maxPing1>%s</maxPing1>\n", pms ); sb.safePrintf("\t\t<maxPingAge1>%" PRId32"ms</maxPingAge1>\n", pingAge ); sb.safePrintf("\t\t<ping1>%s</ping1>\n", ptr ); sb.safePrintf("\t\t<note>%s</note>\n", h->m_note ); sb.safePrintf("\t\t<spider>%" PRId32"</spider>\n", (int32_t)h->m_spiderEnabled ); sb.safePrintf("\t\t<query>%" PRId32"</query>\n", (int32_t)h->m_queryEnabled ); sb.safePrintf("\t</host>\n"); continue; } // // END XML OUTPUT // // // BEGIN JSON OUTPUT // if ( format == FORMAT_JSON ) { sb.safePrintf("\t\"host\":{\n"); sb.safePrintf("\t\t\"name\":\"%s\",\n",h->m_hostname); sb.safePrintf("\t\t\"shard\":%" PRId32",\n", (int32_t)h->m_shardNum); sb.safePrintf("\t\t\"mirror\":%" PRId32",\n", h->m_stripe); sb.safePrintf("\t\t\"ip1\":\"%s\",\n",iptoa(h->m_ip)); sb.safePrintf("\t\t\"ip2\":\"%s\",\n", iptoa(h->m_ipShotgun)); sb.safePrintf("\t\t\"httpPort\":%" PRId32",\n", (int32_t)h->m_httpPort); sb.safePrintf("\t\t\"udpPort\":%" PRId32",\n", (int32_t)h->m_port); sb.safePrintf("\t\t\"dnsPort\":%" PRId32",\n", (int32_t)h->m_dnsClientPort); //sb.safePrintf("\t\t\"hdTemp\":\"%s\",\n",hdbuf); sb.safePrintf("\t\t\"gbVersion\":\"%s\",\n",vbuf); sb.safePrintf("\t\t\"resends\":%" PRId32",\n", h->m_pingInfo.m_totalResends); /* sb.safePrintf("\t\t\"errorReplies\":%" PRId32",\n", h->m_errorReplies); */ sb.safePrintf("\t\t\"errorTryAgains\":%" PRId32",\n", h->m_pingInfo.m_etryagains); sb.safePrintf("\t\t\"udpSlotsInUse\":%" PRId32",\n", h->m_pingInfo.m_udpSlotsInUseIncoming); sb.safePrintf("\t\t\"tcpSocketsInUse\":%" PRId32",\n", h->m_pingInfo.m_tcpSocketsInUse); /* sb.safePrintf("\t\t\"dgramsTo\":%" PRId64",\n", h->m_dgramsTo); sb.safePrintf("\t\t\"dgramsFrom\":%" PRId64",\n", h->m_dgramsFrom); */ sb.safePrintf("\t\t\"numCorruptDiskReads\":%" PRId32",\n" ,h->m_pingInfo.m_numCorruptDiskReads); sb.safePrintf("\t\t\"numOutOfMems\":%" PRId32",\n" ,h->m_pingInfo.m_numOutOfMems); sb.safePrintf("\t\t\"numClosedSockets\":%" PRId32",\n" ,h->m_pingInfo. m_socketsClosedFromHittingLimit); sb.safePrintf("\t\t\"numOutstandingSpiders\":%" PRId32 ",\n" ,h->m_pingInfo.m_currentSpiders ); sb.safePrintf("\t\t\"splitTime\":%" PRId32",\n", splitTime); sb.safePrintf("\t\t\"splitsDone\":%" PRId32",\n", h->m_splitsDone); sb.safePrintf("\t\t\"status\":\"%s\",\n", fb.getBufStart()); sb.safePrintf("\t\t\"slowDiskReads\":%" PRId32",\n", h->m_pingInfo.m_slowDiskReads); sb.safePrintf("\t\t\"docsIndexed\":%" PRId32",\n", h->m_pingInfo.m_totalDocsIndexed); sb.safePrintf("\t\t\"percentMemUsed\":\"%.1f%%\",\n", h->m_pingInfo.m_percentMemUsed); // float sb.safePrintf("\t\t\"cpuUsage\":\"%.1f%%\",\n",cpu); sb.safePrintf("\t\t\"percentDiskUsed\":\"%s\",\n", diskUsageMsg); sb.safePrintf("\t\t\"maxPing1\":\"%s\",\n",pms); sb.safePrintf("\t\t\"maxPingAge1\":\"%" PRId32"ms\",\n", pingAge ); sb.safePrintf("\t\t\"ping1\":\"%s\",\n", ptr ); sb.safePrintf("\t\t\"note\":\"%s\"\n", h->m_note ); sb.safePrintf("\t\t\"spider\":\"%" PRId32"\"\n", (int32_t)h->m_spiderEnabled ); sb.safePrintf("\t\t\"query\":\"%" PRId32"\"\n", (int32_t)h->m_queryEnabled ); sb.safePrintf("\t},\n"); continue; } // // END JSON OUTPUT // sb.safePrintf ( "<tr bgcolor=#%s>" "<td><a href=\"http://%s:%hi/admin/hosts?" "" "c=%s" "&sort=%" PRId32"\">%" PRId32"</a></td>" "<td>%s</td>" // hostname "<td>%" PRId32"</td>" // group "<td>%" PRId32"</td>" // stripe //"<td>0x%08" PRIx32"</td>" // group mask //"<td>%s</td>" // ip1 //"<td>%s</td>" // ip2 //"<td>%hi</td>" // port //"<td>%hi</td>" // client port "<td>%hi</td>" // http port //"<td>%" PRId32"</td>" // token group num //"<td>%" PRId32"</td>" // switch group //"<td>%s</td>" // tmpN // hd temps // no, this is gb version now "<td><nobr>%s%s%s</nobr></td>" // resends "<td>%" PRId32"</td>" // error replies //"<td>%" PRId32"</td>" // etryagains "<td>%" PRId32"</td>" // # dgrams sent to //"<td>%" PRId64"</td>" // # dgrams recvd from //"<td>%" PRId64"</td>" // loadavg //"<td>%.2f</td>" // split time "<td>%" PRId32"</td>" // splits done "<td>%" PRId32"</td>" // flags "<td>%s</td>" // slow disk reads "<td>%" PRId32"</td>" // docs indexed "<td>%" PRId32"</td>" // percent mem used "<td>%s%.1f%%%s</td>" // cpu usage "<td>%.1f%%</td>" // disk usage "<td>%s</td>" // ping max "<td>%s</td>" // ping age "<td>%" PRId32"ms</td>" // ping "<td>%s</td>" //"<td>%s</td>" //"<td>%" PRId32"ms</td>" "<td nowrap=1>%s</td>" "</tr>" , bg,//LIGHT_BLUE , ipbuf3, h->m_httpPort, cs, sort, i , h->m_hostname, (int32_t)h->m_shardNum,//group, h->m_stripe, // group mask is not looked at a lot and is // really only for indexdb and a few other rdbs //g_hostdb.makeGroupId(i,ng) , //ipbuf1, //ipbuf2, //h->m_port , //h->m_dnsClientPort , h->m_httpPort , //h->m_tokenGroupNum, //switchGroup , //tmpN, vbuf1, vbuf,//hdbuf, vbuf2, h->m_pingInfo.m_totalResends, // h->m_errorReplies, h->m_pingInfo.m_etryagains, // h->m_dgramsTo, // h->m_dgramsFrom, //h->m_loadAvg, // double splitTime, h->m_splitsDone, fb.getBufStart(),//flagString, h->m_pingInfo.m_slowDiskReads, h->m_pingInfo.m_totalDocsIndexed, fontTagFront, h->m_pingInfo.m_percentMemUsed, // float fontTagBack, cpu, // float diskUsageMsg, // ping max pms, // ping age pingAge, //avg , //stdDev, //ping, ptr , //ptr2 , h->m_note ); } if ( format == FORMAT_XML ) { sb.safePrintf("</response>\n"); return g_httpServer.sendDynamicPage ( s , sb.getBufStart(), sb.length() , 0, false, "text/xml"); } if ( format == FORMAT_JSON ) { // remove last \n, from json host{} sb.m_length -= 2; sb.safePrintf("\n}\n}"); return g_httpServer.sendDynamicPage ( s , sb.getBufStart(), sb.length() , 0, false, "application/json"); } // end the table now sb.safePrintf ( "</table><br>\n" ); if( g_hostdb.m_numSpareHosts ) { // print spare hosts table sb.safePrintf ( "<table %s>" "<tr class=hdrow><td colspan=10><center>" //"<font size=+1>" "<b>Spares</b>" //"</font>" "</td></tr>" "<tr bgcolor=#%s>" "<td><b>spareId</td>" "<td><b>host name</td>" "<td><b>ip1</td>" "<td><b>ip2</td>" //"<td><b>udp port</td>" //"<td><b>priority udp port</td>" //"<td><b>dns client port</td>" "<td><b>http port</td>" //"<td><b>switch id</td>" // this is now fairly obsolete //"<td><b>ide channel</td>" "<td><b>note</td>", TABLE_STYLE, DARK_BLUE ); for ( int32_t i = 0; i < g_hostdb.m_numSpareHosts; i++ ) { // get the ith host (hostId) Host *h = g_hostdb.getSpare ( i ); char ipbuf1[64]; char ipbuf2[64]; strcpy(ipbuf1,iptoa(h->m_ip)); strcpy(ipbuf2,iptoa(h->m_ipShotgun)); // print it sb.safePrintf ( "<tr bgcolor=#%s>" "<td>%" PRId32"</td>" "<td>%s</td>" "<td>%s</td>" "<td>%s</td>" //"<td>%hi</td>" //"<td>%hi</td>" // priority udp port //"<td>%hi</td>" "<td>%hi</td>" //"<td>%i</td>" // switch id "<td>%s</td>" "</tr>" , LIGHT_BLUE, i , h->m_hostname, ipbuf1, ipbuf2, //h->m_port , //h->m_port2 , //h->m_dnsClientPort , h->m_httpPort , //h->m_switchId, h->m_note ); } sb.safePrintf ( "</table><br>" ); } /* // print proxy hosts table sb.safePrintf ( "<table %s>" "<tr class=hdrow><td colspan=12><center>" //"<font size=+1>" "<b>Proxies</b>" //"</font>" "</td></tr>" "<tr bgcolor=#%s>" "<td><b>proxyId</b></td>" "<td><b>type</b></td>" "<td><b>host name</b></td>" "<td><b>ip1</b></td>" "<td><b>ip2</b></td>" //"<td><b>udp port</td>" //"<td><b>priority udp port</td>" //"<td><b>dns client port</td>" "<td><b>http port</b></td>" //"<td><b>switch id</td>" "<td><b>max ping1</b></td>" "<td><b>ping1 age</b></td>" "<td><b>ping1</b></td>" //"<td><b>ping2</b></td>" // this is now fairly obsolete //"<td><b>ide channel</td>" "<td><b>note</td>", TABLE_STYLE, DARK_BLUE ); for ( int32_t i = 0; i < g_hostdb.m_numProxyHosts; i++ ) { // get the ith host (hostId) Host *h = g_hostdb.getProxy ( i ); char ptr[256]; int32_t pingAge = generatePingMsg(h, nowmsLocal, ptr); char ipbuf1[64]; char ipbuf2[64]; strcpy(ipbuf1,iptoa(h->m_ip)); strcpy(ipbuf2,iptoa(h->m_ipShotgun)); // host can have 2 ip addresses, get the one most // similar to that of the requester int32_t eip = g_hostdb.getBestIp ( h , s->m_ip ); char ipbuf3[64]; strcpy(ipbuf3,iptoa(eip)); char pms[64]; if ( h->m_pingMax < 0 ) sprintf(pms,"???"); else sprintf(pms,"%" PRId32"ms",h->m_pingMax); // the sync status ascii-ized char *type = "proxy"; if ( h->m_type == HT_QCPROXY ) type = "qcproxy"; if ( h->m_type == HT_SCPROXY ) type = "scproxy"; // print it sb.safePrintf ( "<tr bgcolor=#%s>" "<td><a href=\"http://%s:%hi/admin/hosts?" "" "c=%s\">" "%" PRId32"</a></td>" "<td>%s</td>" "<td>%s</td>" "<td>%s</td>" "<td>%s</td>" //"<td>%hi</td>" //"<td>%hi</td>" // priority udp port //"<td>%hi</td>" "<td>%hi</td>" //"<td>%i</td>" // switch id "<td>%s</td>" // ping max "<td>%" PRId32"ms</td>" // ping age "<td>%s</td>" // ping //"<td>%" PRId32"</td>" // ide channel "<td>%s </td>" "</tr>" , LIGHT_BLUE, ipbuf3, h->m_httpPort, cs, i , type, h->m_hostname, ipbuf1, ipbuf2, //h->m_port , //h->m_port2 , //h->m_dnsClientPort , h->m_httpPort , //h->m_switchId, pms, pingAge, ptr, //h->m_ideChannel , h->m_note ); } sb.safePrintf ( "</table><br><br>" ); */ sb.safePrintf( "<style>" ".poo { background-color:#%s;}\n" "</style>\n" , LIGHT_BLUE ); // print help table sb.safePrintf ( "<table %s>" "<tr class=hdrow><td colspan=10><center>" //"<font size=+1>" "<b>Key</b>" //"</font>" "</td></tr>" "<tr class=poo>" "<td>host ip</td>" "<td>The primary IP address of the host." "</td>" "</tr>\n" "<tr class=poo>" "<td>shard</td>" "<td>" "The index is split into shards. Which shard does this " "host serve?" "</td>" "</tr>\n" "<tr class=poo>" "<td>mirror</td>" "<td>" "A shard can be mirrored multiple times for " "data redundancy." "</td>" "</tr>\n" /* "<tr class=poo>" "<td>ip2</td>" "<td>The secondary IP address of the host." "</td>" "</tr>\n" "<tr class=poo>" "<td>udp port</td>" "<td>The UDP port the host uses to send and recieve " "datagrams." "</td>" "</tr>\n" "<tr class=poo>" "<td>dns client port</td>" "<td>The UDP port used to send and receive dns traffic with." "</td>" "</tr>\n" "<tr class=poo>" "<td>http port</td>" "<td>The port you can connect a browser to." "</td>" "</tr>\n" "<tr class=poo>" "<td>best switch id</td>" "<td>The host prefers to be on this switch because it " "needs to send a lot of data to other hosts on this swtich. " "Therefore, ideally, the best switch id should match the " "actual switch id for optimal performance." "</td>" "</tr>\n" */ /* "<tr class=poo>" "<td>switch id</td>" "<td>Hosts that share the same switch id are " "physically on the same switch." "</td>" "</tr>\n" */ "<tr class=poo>" "<td>dgrams resent</td>" "<td>How many datagrams have had to be resent to a host " "because it was not ACKed quick enough or because it was " "fully ACKed but the entire request was resent in case " "the host was reset." "</td>" "</tr>\n" /* "<tr class=poo>" "<td>errors recvd</td>" "<td>How many errors were received from a host in response " "to a request to retrieve or insert data." "</td>" "</tr>\n" */ "<tr class=poo>" "<td>try agains recvd</td>" "<td>How many ETRYAGAIN errors " "were received in response to a " "request to add data. Usually because the host's memory " "is full and it is dumping its data to disk. This number " "can be high if the host if failing to dump the data " "to disk because of some malfunction, and it can therefore " "bottleneck the entire cluster." "</td>" "</tr>\n" /* "<tr class=poo>" "<td>dgrams to</td>" "<td>How many datagrams were sent to the host from the " "selected host since startup. Includes ACK datagrams. This " "can actually be higher than the number of dgrams read " "when the selected host is the same as the host in the " "table because of resends. Gigablast will resend datagrams " "that are not promptly ACKknowledged." "</td>" "</tr>\n" "<tr class=poo>" "<td>dgrams from</td>" "<td>How many datagrams were received from the host by the " "selected host since startup. Includes ACK datagrams." "</td>" "</tr>\n" */ "<tr class=poo>" "<td>avg split time</td>" "<td>Average time this host took to compute the docids " "for a query. Useful for guaging the slowness of a host " "compare to other hosts." "</td>" "</tr>\n" "<tr class=poo>" "<td>splits done</td>" "<td>Number of queries this host completed. Used in " "computation of the <i>avg split time</i>." "</td>" "</tr>\n" "<tr class=poo>" "<td>status</td>" "<td>Status flags for the host. See key below." "</td>" "</tr>\n" "<tr class=poo>" "<td>slow reads</td>" "<td>Number of slow disk reads the host has had. " "When this is big compared to other hosts it is a good " "indicator its drives are relatively slow." "</td>" "</tr>\n" "<tr class=poo>" "<td>docs indexed</td>" "<td>Number of documents this host has indexed over all " "collections. All hosts should have close to the same " "number in a well-sharded situation." "</td>" "</tr>\n" //"<tr class=poo>" //"<td>loadavg</td>" //"<td>1-minute sliding-window load average from " //"/proc/loadavg." //"</td>" //"</tr>\n" "<tr class=poo>" "<td>mem used</td>" "<td>Percentage of memory currently used." "</td>" "</tr>\n" "<tr class=poo>" "<td>cpu used</td>" "<td>Percentage of cpu resources in use by the gb process." "</td>" "</tr>\n" "<tr class=poo>" "<td>disk used</td>" "<td>Percentage of disk in use. When this gets close to " "100%% you need to do something." "</td>" "</tr>\n" "<tr class=poo>" "<td>max ping1</td>" "<td>The worst ping latency from host to host." "</td>" "</tr>\n" "<tr class=poo>" "<td>ping1 age</td>" "<td>How long ago the last ping request was sent to " "this host. Let's us know how fresh the ping time is." "</td>" "</tr>\n" "<tr class=poo>" "<td>ping1</td>" "<td>Ping time to this host on the primary network." "</td>" "</tr>\n" /* "<tr class=poo>" "<td>ping2</td>" "<td>Ping time to this host on the seconday/shotgun " "network. This column is not visible if the shotgun " "network is not enabled in the master controls." "</td>" "</tr>\n" */ "<tr class=poo>" "<td>M (status flag)</td>" "<td>Indicates host is merging files on disk." "</td>" "</tr>\n" "<tr class=poo>" "<td>D (status flag)</td>" "<td>Indicates host is dumping data to disk." "</td>" "</tr>\n" "<tr class=poo>" "<td>S (status flag)</td>" "<td>Indicates host has outstanding spiders." "</td>" "</tr>\n" "<tr class=poo>" "<td>y (status flag)</td>" "<td>Indicates host is performing the daily merge." "</td>" "</tr>\n" "<tr class=poo>" "<td>R (status flag)</td>" "<td>Indicates host is performing a rebalance operation." "</td>" "</tr>\n" "<tr class=poo>" "<td>F (status flag)</td>" "<td>Indicates host has foreign records and requires " "a rebalance operation." "</td>" "</tr>\n" "<tr class=poo>" "<td>x (status flag)</td>" "<td>Indicates host has abruptly exited due to a fatal " "error (cored) and " "restarted itself. The exponent is how many times it has " "done this. If no exponent, it only did it once." "</td>" "</tr>\n" "<tr class=poo>" "<td>C (status flag)</td>" "<td>Indicates # of corrupted disk reads." "</td>" "</tr>\n" "<tr class=poo>" "<td>K (status flag)</td>" "<td>Indicates # of sockets closed from hitting limit." "</td>" "</tr>\n" "<tr class=poo>" "<td><nobr>O (status flag)</nobr></td>" "<td>Indicates # of times we ran out of memory." "</td>" "</tr>\n" "<tr class=poo>" "<td><nobr>N (status flag)</nobr></td>" "<td>Indicates host's clock is NOT in sync with host #0. " "Gigablast should automatically sync on startup, " "so this would be a problem " "if it does not go away. Hosts need to have their clocks " "in sync before they can add data to their index." "</td>" "</tr>\n" "<tr class=poo>" "<td><nobr>U (status flag)</nobr></td>" "<td>Indicates the number of active UDP transactions " "which are incoming requests. These will pile up if a " "host can't handle them fast enough." "</td>" "</tr>\n" "<tr class=poo>" "<td><nobr>T (status flag)</nobr></td>" "<td>Indicates the number of active TCP transactions " "which are either outgoing or incoming requests." "</td>" "</tr>\n" , TABLE_STYLE ); sb.safePrintf ( "</table><br></form><br>" ); //p = g_pages.printAdminBottom ( p , pend ); // calculate buffer length //int32_t bufLen = p - buf; // . send this page // . encapsulates in html header and tail // . make a Mime return g_httpServer.sendDynamicPage ( s , (char*) sb.getBufStart() , sb.length() ); }
// . slot should be auto-nuked upon transmission or error // . TODO: ensure if this sendReply() fails does it really nuke the slot? void gotListWrapper ( void *state , RdbList *listb , Msg5 *msg5xx ) { // get the state State00 *st0 = (State00 *)state; // extract the udp slot and list and msg5 UdpSlot *slot = st0->m_slot; RdbList *list = &st0->m_list; Msg5 *msg5 = &st0->m_msg5; UdpServer *us = st0->m_us; // sanity check -- ensure they match //if ( niceness != st0->m_niceness ) // log("Msg0: niceness mismatch"); // debug msg //if ( niceness != 0 ) // log("HEY! niceness is not 0"); // timing debug if ( g_conf.m_logTimingNet || g_conf.m_logDebugNet ) { //log("Msg0:hndled request %"UINT64"",gettimeofdayInMilliseconds()); int32_t size = -1; if ( list ) size = list->getListSize(); log(LOG_TIMING|LOG_DEBUG, "net: msg0: Handled request for data. " "Now sending data termId=%"UINT64" size=%"INT32"" " transId=%"INT32" ip=%s port=%i took=%"INT64" " "(niceness=%"INT32").", g_posdb.getTermId(msg5->m_startKey), size,slot->m_transId, iptoa(slot->m_ip),slot->m_port, gettimeofdayInMilliseconds() - st0->m_startTime , st0->m_niceness ); } // debug //if ( ! msg5->m_includeTree ) // log("hotit\n"); // on error nuke the list and it's data if ( g_errno ) { mdelete ( st0 , sizeof(State00) , "Msg0" ); delete (st0); // TODO: free "slot" if this send fails us->sendErrorReply ( slot , g_errno ); return; } QUICKPOLL(st0->m_niceness); // point to the serialized list in "list" char *data = list->getList(); int32_t dataSize = list->getListSize(); char *alloc = list->getAlloc(); int32_t allocSize = list->getAllocSize(); // tell list not to free the data since it is a reply so UdpServer // will free it when it destroys the slot list->setOwnData ( false ); // keep track of stats Rdb *rdb = getRdbFromId ( st0->m_rdbId ); if ( rdb ) rdb->sentReplyGet ( dataSize ); // TODO: can we free any memory here??? // keep track of how long it takes to complete the send st0->m_startTime = gettimeofdayInMilliseconds(); // debug point int32_t oldSize = msg5->m_minRecSizes; int32_t newSize = msg5->m_minRecSizes + 20; // watch for wrap around if ( newSize < oldSize ) newSize = 0x7fffffff; if ( dataSize > newSize && list->getFixedDataSize() == 0 && // do not annoy me with these linkdb msgs dataSize > newSize+100 ) log(LOG_LOGIC,"net: msg0: Sending more data than what was " "requested. Ineffcient. Bad engineer. dataSize=%"INT32" " "minRecSizes=%"INT32".",dataSize,oldSize); /* // always compress these lists if ( st0->m_rdbId == RDB_SECTIONDB ) { // && 1 == 3) { // get sh48, the sitehash key128_t *startKey = (key128_t *)msg5->m_startKey ; int64_t sh48 = g_datedb.getTermId(startKey); // debug //log("msg0: got sectiondblist from disk listsize=%"INT32"", // list->getListSize()); if ( dataSize > 50000 ) log("msg0: sending back list rdb=%"INT32" " "listsize=%"INT32" sh48=0x%"XINT64"", (int32_t)st0->m_rdbId, dataSize, sh48); // save it int32_t origDataSize = dataSize; // store compressed list on itself char *dst = list->m_list; // warn if niceness is 0! if ( st0->m_niceness == 0 ) log("msg0: compressing sectiondb list at niceness 0!"); // compress the list uint32_t lastVoteHash32 = 0LL; SectionVote *lastVote = NULL; for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) { // breathe QUICKPOLL ( st0->m_niceness ); // get rec char *rec = list->getCurrentRec(); // for ehre key128_t *key = (key128_t *)rec; // the score is the bit which is was set in // Section::m_flags for that docid int32_t secType = g_indexdb.getScore ( (char *)key ); // 0 means it probably used to count # of voters // from this site, so i don't think xmldoc uses // that any more if ( secType == SV_SITE_VOTER ) continue; // treat key like a datedb key and get the taghash uint32_t h32 = g_datedb.getDate ( key ); // get data/vote from the current record in the // sectiondb list SectionVote *sv=(SectionVote *)list->getCurrentData (); // get the average score for this doc float avg = sv->m_score ; if ( sv->m_numSampled > 0.0 ) avg /= sv->m_numSampled; // if same as last guy, add to it if ( lastVoteHash32 == h32 && lastVote ) { // turn possible multi-vote into single docid // into a single vote, with the score averaged. lastVote->m_score += avg; lastVote->m_numSampled++; continue; } // otherwise, add in a new guy! *(key128_t *)dst = *key; dst += sizeof(key128_t); // the new vote SectionVote *dsv = (SectionVote *)dst; dsv->m_score = avg; dsv->m_numSampled = 1; // set this lastVote = dsv; lastVoteHash32 = h32; // skip over dst += sizeof(SectionVote); } // update the list size now for sending back dataSize = dst - data; // if the list was over the requested minrecsizes we need // to set a flag so that the caller will do a re-call. // so making the entire odd, will be the flag. if ( origDataSize > msg5->m_minRecSizes && dataSize < origDataSize ) { *dst++ = '\0'; dataSize++; } // debug //log("msg0: compressed sectiondblist from disk " // "newlistsize=%"INT32"", dataSize); // use this timestamp int32_t now = getTimeLocal();//Global(); // finally, cache this sucker s_sectiondbCache.addRecord ( msg5->m_coll, (char *)startKey,//(char *)&sh48 data, dataSize , now ); // ignore errors g_errno = 0; } */ // // for linkdb lists, remove all the keys that have the same IP32 // and store a count of what we removed somewhere // if ( st0->m_rdbId == RDB_LINKDB ) { // store compressed list on itself char *dst = list->m_list; // keep stats int32_t totalOrigLinks = 0; int32_t ipDups = 0; int32_t lastIp32 = 0; char *listEnd = list->getListEnd(); // compress the list for ( ; ! list->isExhausted() ; list->skipCurrentRecord() ) { // breathe QUICKPOLL ( st0->m_niceness ); // count it totalOrigLinks++; // get rec char *rec = list->getCurrentRec(); int32_t ip32 = g_linkdb.getLinkerIp_uk((key224_t *)rec ); // same as one before? if ( ip32 == lastIp32 && // are we the last rec? include that for // advancing the m_nextKey in Linkdb more // efficiently. rec + LDBKS < listEnd ) { ipDups++; continue; } // store it gbmemcpy (dst , rec , LDBKS ); dst += LDBKS; // update it lastIp32 = ip32; } // . if we removed one key, store the stats // . caller should recognize reply is not a multiple of // the linkdb key size LDBKS and no its there! if ( ipDups ) { //*(int32_t *)dst = totalOrigLinks; //dst += 4; //*(int32_t *)dst = ipDups; //dst += 4; } // update list parms list->m_listSize = dst - list->m_list; list->m_listEnd = list->m_list + list->m_listSize; data = list->getList(); dataSize = list->getListSize(); } //log("sending replySize=%"INT32" min=%"INT32"",dataSize,msg5->m_minRecSizes); // . TODO: dataSize may not equal list->getListMaxSize() so // Mem class may show an imblanace // . now g_udpServer is responsible for freeing data/dataSize // . the "true" means to call doneSending_ass() from the signal handler // if need be st0->m_us->sendReply_ass ( data , dataSize , alloc , // alloc allocSize , // alloc size slot , 60 , st0 , doneSending_ass , -1 , -1 , true ); }
void Syncdb::syncStart_r ( bool amThread ) { // turn this off g_process.m_suspendAutoSave = true; char cmd[1024]; // get synchost best ip char *ips = iptoa ( g_hostdb.getAliveIp ( g_hostdb.m_syncHost ) ); // his dir char *dir = g_hostdb.m_syncHost->m_dir; // use Host *me = g_hostdb.m_myHost; // ours char *mydir = me->m_dir; // generic long err; // loop over every rdb and every data and map file in each rdb for ( long i = 0 ; i < RDB_END ; i++ ) { // skip SYNCDB if ( i == RDB_SYNCDB ) continue; // get that rdb Rdb *rdb = getRdbFromId ( i ); // skip if none if ( ! rdb ) continue; // get coll for ( long j = 0 ; j < rdb->getNumBases() ; j++ ) { // get that base RdbBase *base = rdb->getBase(j);//m_bases[j]; if ( ! base ) continue; // get coll char *coll = base->m_coll; // and num long collnum = base->m_collnum; // make the dir sprintf ( cmd , "ssh %s 'mkdir %scoll.%s.%li'", ips,dir,coll,collnum); // excecute log ( LOG_INFO, "sync: %s", cmd ); //int err = my_system_r ( cmd, 3600*24 ); //if ( err != 0 ) goto hadError; // copy the files for ( long k = 0 ; k < base->m_numFiles ; k++ ) { // sleep while dumping. we are in a thread. if ( base->isDumping() ) sleep ( 1 ); // get map RdbMap *map = base->m_maps[k]; // copy the map file sprintf ( cmd , "rcp %s %s:%scoll.%s.%li/'", map->getFilename(),ips,dir,coll,collnum); log ( LOG_INFO, "sync: %s", cmd ); if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError; // get the file BigFile *f = base->m_files[k]; // loop over each little part file for ( long m = 0 ; m < f->m_numParts ; m++ ) { // get part file File *p = f->m_files[m]; // copy that sprintf ( cmd , "rcp %s %s:%scoll.%s.%li/'", p->m_filename,ips,dir,coll,collnum); // excecute log ( LOG_INFO, "sync: %s", cmd ); if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError; } } } } // make the dirs sprintf ( cmd , "ssh %s '" "mkdir %s/dict/ ;" "mkdir %s/dict/en/ ;" "mkdir %s/ucdata/ ;" "mkdir %s/.antiword/ ;" "'" , ips, dir, dir, dir, dir ); // excecute log ( LOG_INFO, "sync: %s", cmd ); if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError; // loop over the files in Process.cpp for ( long i = 0 ; i < 99999 ; i++ ) { // null means end if ( ! g_files[i] ) break; sprintf ( cmd , "rcp %s%s %s:%s", mydir,g_files[i],ips,dir); // excecute log ( LOG_INFO, "sync: %s", cmd ); if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError; } // new guy is NOT in sync sprintf ( cmd , "ssh %s 'echo 0 > %sinsync.dat", ips,dir); // excecute log ( LOG_INFO, "sync: %s", cmd ); if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError; // saved files sprintf ( cmd , "rcp %s*-saved.dat %s:%sinsync.dat", mydir,ips,dir); // excecute log ( LOG_INFO, "sync: %s", cmd ); if ( ( err = my_system_r ( cmd, 3600*24 ) ) ) goto hadError; // completed! return; hadError: log ( "sync: Call to system(\"%s\") had error %s.",cmd,strerror(err)); g_hostdb.m_syncHost->m_doingSync = 0; g_hostdb.m_syncHost = NULL; return; }
void printUdpTable ( SafeBuf *p, char *title, UdpServer *server , char *coll, char *pwd , long fromIp , bool isDns ) { if ( ! coll ) coll = "main"; //if ( ! pwd ) pwd = ""; // time now long long now = gettimeofdayInMilliseconds(); // get # of used nodes //long n = server->getTopUsedSlot(); // store in buffer for sorting long times[50000];//MAX_UDP_SLOTS]; UdpSlot *slots[50000];//MAX_UDP_SLOTS]; long nn = 0; for ( UdpSlot *s = server->getActiveHead() ; s ; s = s->m_next2 ) { if ( nn >= 50000 ) { log("admin: Too many udp sockets."); break; } // if empty skip it //if ( server->isEmpty ( i ) ) continue; // get the UdpSlot //UdpSlot *s = server->getUdpSlotNum(i); // if data is NULL that's an error //if ( ! s ) continue; // store it times[nn] = now - s->m_startTime; slots[nn] = s; nn++; } // bubble sort keepSorting: // assume no swap will happen bool didSwap = false; for ( long i = 1 ; i < nn ; i++ ) { if ( times[i-1] >= times[i] ) continue; long tmpTime = times[i-1]; UdpSlot *tmpSlot = slots[i-1]; times[i-1] = times[i]; slots[i-1] = slots[i]; times[i ] = tmpTime; slots[i ] = tmpSlot; didSwap = true; } if ( didSwap ) goto keepSorting; // count how many of each msg we have long msgCount0[96]; long msgCount1[96]; for ( long i = 0; i < 96; i++ ) { msgCount0[i] = 0; msgCount1[i] = 0; } for ( long i = 0; i < nn; i++ ) { UdpSlot *s = slots[i]; if ( s->m_msgType >= 96 ) continue; if ( s->m_niceness == 0 ) msgCount0[s->m_msgType]++; else msgCount1[s->m_msgType]++; } // print the counts p->safePrintf ( "<table %s>" "<tr class=hdrow><td colspan=19>" "<center>" "<b>%s Summary</b> (%li transactions)" "</td></tr>" "<tr bgcolor=#%s>" "<td><b>niceness</td>" "<td><b>msg type</td>" "<td><b>total</td>" "</tr>", TABLE_STYLE, title , server->getNumUsedSlots() , DARK_BLUE ); for ( long i = 0; i < 96; i++ ) { if ( msgCount0[i] <= 0 ) continue; p->safePrintf("<tr bgcolor=#%s>" "<td>0</td><td>0x%lx</td><td>%li</td></tr>", LIGHT_BLUE,i, msgCount0[i]); } for ( long i = 0; i < 96; i++ ) { if ( msgCount1[i] <= 0 ) continue; p->safePrintf("<tr bgcolor=#%s>" "<td>1</td><td>0x%lx</td><td>%li</td></tr>", LIGHT_BLUE,i, msgCount1[i]); } p->safePrintf ( "</table><br>" ); char *dd = ""; if ( ! isDns ) dd = "<td><b>msgType</td>" "<td><b>desc</td>" "<td><b>hostId</td>"; else { dd = //"<td><b>dns ip</b></td>" "<td><b>hostname</b></td>"; } p->safePrintf ( "<table %s>" "<tr class=hdrow><td colspan=19>" "<center>" //"<font size=+1>" "<b>%s</b> (%li transactions)" //"</font>" "</td></tr>" "<tr bgcolor=#%s>" "<td><b>age</td>" "<td><b>last read</td>" "<td><b>last send</td>" "<td><b>timeout</td>" "<td><b>ip</td>" //"<td><b>port</td>" //"<td><b>desc</td>" //"<td><b>hostId</td>" //"<td><b>nice</td>"; "%s" "<td><b>nice</td>" "<td><b>transId</td>" "<td><b>called</td>" "<td><b>dgrams read</td>" "<td><b>dgrams to read</td>" "<td><b>acks sent</td>" "<td><b>dgrams sent</td>" "<td><b>dgrams to send</td>" "<td><b>acks read</td>" "<td><b>resends</td>" "</tr>\n" , TABLE_STYLE, title , server->getNumUsedSlots() , DARK_BLUE , dd ); // now fill in the columns for ( long i = 0 ; i < nn ; i++ ) { // get from sorted list UdpSlot *s = slots[i]; // set socket state //char *st = "ERROR"; //if ( ! s->isDoneReading() ) st = "reading"; //if ( ! s->isDoneSending() ) st = "reading"; // times long long elapsed0 = (now - s->m_startTime ) ; long long elapsed1 = (now - s->m_lastReadTime ) ; long long elapsed2 = (now - s->m_lastSendTime ) ; char e0[32],e1[32], e2[32]; sprintf ( e0 , "%llims" , elapsed0 ); sprintf ( e1 , "%llims" , elapsed1 ); sprintf ( e2 , "%llims" , elapsed2 ); if ( s->m_startTime == 0LL ) strcpy ( e0 , "--" ); if ( s->m_lastReadTime == 0LL ) strcpy ( e1 , "--" ); if ( s->m_lastSendTime == 0LL ) strcpy ( e2 , "--" ); // bgcolor is lighter for incoming requests char *bg = LIGHT_BLUE;//"c0c0f0"; // is it incoming if ( ! s->m_callback ) bg = LIGHTER_BLUE;//"e8e8ff"; Host *h = g_hostdb.getHost ( s->m_ip , s->m_port ); char *eip = "??"; unsigned short eport = 0 ; //long ehostId = -1 ; char *ehostId = "-1"; //char tmpIp [64]; // print the ip char tmpHostId[64]; if ( h ) { // host can have 2 ip addresses, get the one most // similar to that of the requester eip = iptoa(g_hostdb.getBestIp ( h , fromIp )); //eip = iptoa(h->m_externalIp) ; //eip = iptoa(h->m_ip) ; eport = h->m_externalHttpPort ; //ehostId = h->m_hostId ; if ( h->m_isProxy ) sprintf(tmpHostId,"proxy%li",h->m_hostId); else sprintf(tmpHostId,"%li",h->m_hostId); ehostId = tmpHostId; } // if no corresponding host, it could be a request from an external // cluster, so just show the ip else { sprintf ( tmpHostId , "%s" , iptoa(s->m_ip) ); ehostId = tmpHostId; eip = tmpHostId; } // set description of the msg long msgType = s->m_msgType; char *desc = ""; char *rbuf = s->m_readBuf; char *sbuf = s->m_sendBuf; long rbufSize = s->m_readBufSize; long sbufSize = s->m_sendBufSize; bool weInit = s->m_callback; char calledHandler = s->m_calledHandler; if ( weInit ) calledHandler = s->m_calledCallback; char *buf = NULL; long bufSize = 0; char tt [ 64 ]; if ( msgType == 0x00 && weInit ) buf = sbuf; if ( msgType == 0x00 && ! weInit ) buf = rbuf; if ( msgType == 0x01 && weInit ) buf = sbuf; if ( msgType == 0x01 && ! weInit ) buf = rbuf; // . if callback was called this slot's sendbuf can be bogus // . i put this here to try to avoid a core dump if ( msgType == 0x13 && weInit && ! s->m_calledCallback ) { buf = sbuf; bufSize = sbufSize; } if ( msgType == 0x13 && ! weInit ) { buf = rbuf; bufSize = rbufSize; } if ( buf ) { long rdbId = -1; if (msgType == 0x01) rdbId = buf[0]; //else rdbId = buf[8+sizeof(key_t)*2+16]; else rdbId = buf[24]; Rdb *rdb = NULL; if ( rdbId >= 0 && ! isDns ) rdb = getRdbFromId ((uint8_t)rdbId ); char *cmd; if ( msgType == 0x01 ) cmd = "add to"; else cmd = "get from"; tt[0] = ' '; tt[1]='\0'; if ( rdb ) sprintf ( tt , "%s %s" , cmd,rdb->m_dbname ); desc = tt; } if ( msgType == 0x10 ) desc = "add links"; if ( msgType == 0x0c ) desc = "getting ip"; if ( msgType == 0x0d ) desc = "get outlink ips/qualities"; if ( msgType == 0x11 ) desc = "ping"; if ( msgType == 0x12 ) desc = "get lock"; if ( msgType == 0x06 ) desc = "spider lock"; if ( msgType == 0x04 ) desc = "meta add"; if ( msgType == 0x13 ) { char isRobotsTxt = 1; if ( buf && bufSize >= (long)sizeof(Msg13Request)-(long)MAX_URL_LEN ) { Msg13Request *r = (Msg13Request *)buf; isRobotsTxt = r->m_isRobotsTxt; } if ( isRobotsTxt ) desc = "get robots.txt"; else desc = "get web page"; } if ( msgType == 0x09 ) desc = "add site"; if ( msgType == 0x08 ) desc = "get site"; if ( msgType == 0x8b ) desc = "get catid"; if ( msgType == 0x34 ) desc = "get load"; if ( msgType == 0x02 ) desc = "get lists"; if ( msgType == 0x22 ) desc = "get titlerec"; if ( msgType == 0x36 ) desc = "get termFreq"; if ( msgType == 0x20 ) desc = "get summary"; if ( msgType == 0x2c ) desc = "get address"; if ( msgType == 0x24 ) desc = "get gigabits"; if ( msgType == 0x39 ) desc = "get docids"; if ( msgType == 0x17 ) desc = "cache access"; if ( msgType == 0x23 ) desc = "get linktext"; if ( msgType == 0x07 ) desc = "inject"; if ( msgType == 0x35 ) desc = "merge token"; if ( msgType == 0x3b ) desc = "get docid score"; if ( msgType == 0x50 ) desc = "get root quality"; if ( msgType == 0x25 ) desc = "get link info"; if ( msgType == 0xfd ) desc = "proxy forward"; p->safePrintf ( "<tr bgcolor=#%s>" "<td>%s</td>" // age "<td>%s</td>" // last read "<td>%s</td>" // last send "<td>%li</td>", // timeout bg , e0 , e1 , e2 , s->m_timeout ); // now use the ip for dns and hosts p->safePrintf("<td>%s:%lu</td>", iptoa(s->m_ip),(long)s->m_port); char *cf1 = ""; char *cf2 = ""; if ( s->m_convertedNiceness ) { cf1 = "<font color=red>"; cf2 = "</font>"; } if ( isDns ) { //p->safePrintf("<td>%s</td>",iptoa(s->m_ip)); char *hostname = (char *)s->m_tmpVar; p->safePrintf("<td><nobr>%s" ,hostname); // get the domain from the hostname long dlen; char *dbuf = ::getDomFast ( hostname,&dlen,false); p->safePrintf( " <a href=\"/admin/tagdb?" "user=admin&" "tagtype0=manualban&" "tagdata0=1&" "u=%s&c=%s\">" "[<font color=red><b>BAN %s</b></font>]" "</nobr></a> " , dbuf , coll , dbuf ); p->safePrintf("</td>" "<td>%s%li%s</td>", cf1, (long)s->m_niceness, cf2); } if ( ! isDns ) { //"<td>%s</td>" // ip //"<td>%hu</td>" // port // clickable hostId char *toFrom = "to"; if ( ! s->m_callback ) toFrom = "from"; //"<td><a href=http://%s:%hu/cgi/15.cgi>%li</a></td>" p->safePrintf ( "<td>0x%hhx</td>" // msgtype "<td><nobr>%s</nobr></td>" // desc "<td><nobr>%s <a href=http://%s:%hu/" "master/sockets?" "c=%s>%s</a></nobr></td>" "<td>%s%li%s</td>" , // niceness s->m_msgType , desc, //iptoa(s->m_ip) , //s->m_port , // begin clickable hostId toFrom, eip , eport , coll , ehostId , cf1, (long)s->m_niceness, cf2 // end clickable hostId ); } p->safePrintf ( "<td>%lu</td>" // transId "<td>%i</td>" // called handler "<td>%li</td>" // dgrams read "<td>%li</td>" // dgrams to read "<td>%li</td>" // acks sent "<td>%li</td>" // dgrams sent "<td>%li</td>" // dgrams to send "<td>%li</td>" // acks read "<td>%hhu</td>" // resend count "</tr>\n" , s->m_transId, calledHandler, s->getNumDgramsRead() , s->m_dgramsToRead , s->getNumAcksSent() , s->getNumDgramsSent() , s->m_dgramsToSend , s->getNumAcksRead() , s->m_resendCount ); } // end the table p->safePrintf ("</table><br>\n" ); }
// . this is called // . destroys the UdpSlot if false is returned static void handleRequest20(UdpSlot *slot, int32_t netnice) { // . check g_errno // . before, we were not sending a reply back here and we continued // to process the request, even though it was empty. the slot // had a NULL m_readBuf because it could not alloc mem for the read // buf i'm assuming. and the slot was saved in a line below here... // state20->m_msg22.m_parent = slot; if ( g_errno ) { log(LOG_WARN, "net: Msg20 handler got error: %s.",mstrerror(g_errno)); log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); g_udpServer.sendErrorReply ( slot , g_errno ); return; } // ensure request is big enough if ( slot->m_readBufSize < (int32_t)sizeof(Msg20Request) ) { log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. Bad request size", __FILE__, __func__, __LINE__); g_udpServer.sendErrorReply ( slot , EBADREQUESTSIZE ); return; } // parse the request Msg20Request *req = (Msg20Request *)slot->m_readBuf; // . turn the string offsets into ptrs in the request // . this is "destructive" on "request" int32_t nb = req->deserialize(); // sanity check if ( nb != slot->m_readBufSize ) { g_process.shutdownAbort(true); } // sanity check, the size include the \0 if ( req->m_collnum < 0 ) { log(LOG_WARN, "query: Got empty collection in msg20 handler. FIX! " "from ip=%s port=%i",iptoa(slot->getIp()),(int)slot->getPort()); log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); g_udpServer.sendErrorReply ( slot , ENOTFOUND ); return; } int64_t cache_key = req->makeCacheKey(); const void *cached_summary; size_t cached_summary_len; if(g_stable_summary_cache.lookup(cache_key, &cached_summary, &cached_summary_len) || g_unstable_summary_cache.lookup(cache_key, &cached_summary, &cached_summary_len)) { log(LOG_DEBUG, "query: Summary cache hit"); sendCachedReply(req,cached_summary,cached_summary_len,slot); return; } else log(LOG_DEBUG, "query: Summary cache miss"); // if it's not stored locally that's an error if ( req->m_docId >= 0 && ! Titledb::isLocal ( req->m_docId ) ) { log(LOG_WARN, "query: Got msg20 request for non-local docId %" PRId64, req->m_docId); log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); g_udpServer.sendErrorReply ( slot , ENOTLOCAL ); return; } // sanity if ( req->m_docId == 0 && ! req->ptr_ubuf ) { //g_process.shutdownAbort(true); } log( LOG_WARN, "query: Got msg20 request for docid of 0 and no url for " "collnum=%" PRId32" query %s",(int32_t)req->m_collnum,req->ptr_qbuf); log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); g_udpServer.sendErrorReply ( slot , ENOTFOUND ); return; } int64_t startTime = gettimeofdayInMilliseconds(); // alloc a new state to get the titlerec Msg20State *state; try { state = new Msg20State(slot,req); } catch(...) { g_errno = ENOMEM; log("query: msg20 new(%" PRId32"): %s", (int32_t)sizeof(XmlDoc), mstrerror(g_errno)); log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. error=%s", __FILE__, __func__, __LINE__, mstrerror( g_errno )); g_udpServer.sendErrorReply ( slot, g_errno ); return; } mnew(state, sizeof(*state), "xd20"); // ok, let's use the new XmlDoc.cpp class now! state->m_xmldoc.setMsg20Request(req); // set the callback state->m_xmldoc.setCallback(state, gotReplyWrapperxd); // set set time state->m_xmldoc.m_setTime = startTime; state->m_xmldoc.m_cpuSummaryStartTime = 0; // . now as for the msg20 reply! // . TODO: move the parse state cache into just a cache of the // XmlDoc itself, and put that cache logic into XmlDoc.cpp so // it can be used more generally. Msg20Reply *reply = state->m_xmldoc.getMsg20Reply ( ); // this is just blocked if ( reply == (void *)-1 ) return; // got it? gotReplyWrapperxd (state); }
// . we call this from Parms.cpp which prints out the proxy related controls // and this table below them... // . allows user to see the stats of each spider proxy bool printSpiderProxyTable ( SafeBuf *sb ) { // only host #0 will have the stats ... so print that link if ( g_hostdb.m_myHost->m_hostId != 0 ) { Host *h = g_hostdb.getHost(0); sb->safePrintf("<br>" "<b>See table on <a href=http://%s:%" PRId32"/" "admin/proxies>" "host #0</a></b>" "<br>" , iptoa(h->m_ip) , (int32_t)(h->getInternalHttpPort()) ); //return true; } // print host table sb->safePrintf ( "<table %s>" "<tr><td colspan=10><center>" "<b>Spider Proxies " "</b>" "</center></td></tr>" "<tr bgcolor=#%s>" "<td>" "<b>proxy IP</b></td>" "<td><b>proxy port</b></td>" "<td><b>times used</b></td>" "<td><b># website IPs banning</b></td>" "<td><b>load points</b></td>" "<td><b>currently out</b></td>" // time of last successful download. print "none" // if never successfully used "<td><b>test url last successful download</b></td>" // we fetch a test url every minute or so through // each proxy to ensure it is up. typically this should // be your website so you do not make someone angry. "<td><b>test url last download attempt</b></td>" // print "FAILED" in red if it failed to download "<td><b>test url download took</b></td>" "<td><b>last bytes downloaded</b></td>" "<td><b>last test url error</b></td>" "</tr>" , TABLE_STYLE , DARK_BLUE ); int32_t now = getTimeLocal(); // print it for ( int32_t i = 0 ; i < s_iptab.getNumSlots() ; i++ ) { // skip empty slots if ( ! s_iptab.m_flags[i] ) continue; SpiderProxy *sp = (SpiderProxy *)s_iptab.getValueFromSlot(i); const char *bg = LIGHT_BLUE; // mark with light red bg if last test url attempt failed if ( sp->m_lastDownloadTookMS == -1 && sp->m_lastDownloadTestAttemptMS>0 ) bg = "ffa6a6"; // or a perm denied error (as opposed to a timeout above) if ( sp->m_lastDownloadError ) bg = "ffa6a6"; // print it sb->safePrintf ( "<tr bgcolor=#%s>" "<td>%s</td>" // proxy ip "<td>%" PRIu32"</td>" // port , bg , iptoa(sp->m_ip) , (uint32_t)(uint16_t)sp->m_port ); sb->safePrintf("<td>%" PRId64"</td>",sp->m_timesUsed); int32_t banCount = s_banCountTable.getScore32(sp->m_ip); if ( banCount < 0 ) banCount = 0; sb->safePrintf("<td>%" PRId32"</td>",banCount); int32_t currentLoad; // get # times it appears in loadtable int32_t np = getNumLoadPoints ( sp , ¤tLoad ); sb->safePrintf("<td>%" PRId32"</td>",np); // currently outstanding downloads on this proxy sb->safePrintf("<td>%" PRId32"</td>",currentLoad); // last SUCCESSFUL download time ago. when it completed. int32_t ago = now - sp->m_lastSuccessfulTestMS/1000; sb->safePrintf("<td>"); // like 1 minute ago etc. if ( sp->m_lastSuccessfulTestMS <= 0 ) sb->safePrintf("none"); else printTimeAgo(sb, ago, now, true); sb->safePrintf("</td>"); // last download time ago ago = now - sp->m_lastDownloadTestAttemptMS/1000; sb->safePrintf("<td>"); // like 1 minute ago etc. if ( sp->m_lastDownloadTestAttemptMS<= 0 ) sb->safePrintf("none"); else printTimeAgo(sb, ago, now, true); sb->safePrintf("</td>"); // how long to download the test url? if ( sp->m_lastDownloadTookMS != -1 ) sb->safePrintf("<td>%" PRId32"ms</td>", (int32_t)sp->m_lastDownloadTookMS); else if ( sp->m_lastDownloadTestAttemptMS<= 0 ) sb->safePrintf("<td>unknown</td>"); else sb->safePrintf("<td>" "<font color=red>FAILED</font>" "</td>"); sb->safePrintf("<td>%" PRId32"</td>",sp->m_lastBytesDownloaded); if ( sp->m_lastDownloadError ) sb->safePrintf("<td><font color=red>%s</font></td>", mstrerror(sp->m_lastDownloadError)); else sb->safePrintf("<td>none</td>"); sb->safePrintf("</tr>\n"); } sb->safePrintf("</table><br>"); return true; }
// . parse an incoming request // . return false and set g_errno on error // . CAUTION: we destroy "req" by replacing it's last char with a \0 // . last char must be \n or \r for it to be a proper request anyway bool HttpRequest::set ( char *origReq , long origReqLen , TcpSocket *sock ) { // reset number of cgi field terms reset(); if ( ! m_reqBuf.reserve ( origReqLen + 1 ) ) { log("http: failed to copy request: %s",mstrerror(g_errno)); return false; } // copy it to avoid mangling it m_reqBuf.safeMemcpy ( origReq , origReqLen ); // NULL term m_reqBuf.pushChar('\0'); m_reqBufValid = true; // and point to that char *req = m_reqBuf.getBufStart(); long reqLen = m_reqBuf.length() - 1; // save this m_userIP = 0; if ( sock ) m_userIP = sock->m_ip; m_isSSL = 0; if ( sock ) m_isSSL = (bool)sock->m_ssl; // TcpServer should always give us a NULL terminated request if ( req[reqLen] != '\0' ) { char *xx = NULL; *xx = 0; } // how long is the first line, the primary request long i; // for ( i = 0 ; i<reqLen && i<MAX_REQ_LEN && // req[i]!='\n' && req[i]!='\r'; i++); // . now fill up m_buf, used to log the request // . make sure the url was encoded correctly // . we don't want assholes encoding every char so we can't see what // url they are submitting to be spidered/indexed // . also, don't de-code encoded ' ' '+' '?' '=' '&' because that would // change the meaning of the url // . and finally, non-ascii chars that don't display correctly // . this should NULL terminate m_buf, too // . turn this off for now, just try to log a different way // m_bufLen = urlNormCode ( m_buf , MAX_REQ_LEN - 1 , req , i ); // ensure it's big enough to be a valid request if ( reqLen < 5 ) { log("http: got reqlen<5 = %s",req); g_errno = EBADREQUEST; return false; } // or if first line too long //if ( i >= 1024 ) { g_errno = EBADREQUEST; return false; } // get the type, must be GET or HEAD if ( strncmp ( req , "GET " , 4 ) == 0 ) m_requestType = 0; // these means a compressed reply was requested. use by query // compression proxies. else if ( strncmp ( req , "ZET " , 4 ) == 0 ) m_requestType = 0; else if ( strncmp ( req , "HEAD " , 5 ) == 0 ) m_requestType = 1; else if ( strncmp ( req , "POST " , 5 ) == 0 ) m_requestType = 2; else { log("http: got bad request cmd: %s",req); g_errno = EBADREQUEST; return false; } // . NULL terminate the request (a destructive operation!) // . this removes the last \n in the trailing \r\n // . shit, but it f***s up POST requests if ( m_requestType != 2 ) { req [ reqLen - 1 ] = '\0'; reqLen--; } // POST requests can be absolutely huge if you are injecting a 100MB // file, so limit our strstrs to the end of the mime char *d = NULL; char dc; // check for body if it was a POST request if ( m_requestType == 2 ) { d = strstr ( req , "\r\n\r\n" ); if ( d ) { dc = *d; *d = '\0'; } else log("http: Got POST request without \\r\\n\\r\\n."); } // . point to the file path // . skip over the "GET " long filenameStart = 4 ; // skip over extra char if it's a "HEAD " request if ( m_requestType == 1 || m_requestType == 2 ) filenameStart++; // are we a redirect? i = filenameStart; m_redirLen = 0; if ( strncmp ( &req[i] , "/?redir=" , 8 ) == 0 ) { for ( long k = i+8; k<reqLen && m_redirLen<126 ; k++) { if ( req[k] == '\r' ) break; if ( req[k] == '\n' ) break; if ( req[k] == '\t' ) break; if ( req[k] == ' ' ) break; m_redir[m_redirLen++] = req[k]; } } m_redir[m_redirLen] = '\0'; // find a \n space \r or ? that delimits the filename for ( i = filenameStart ; i < reqLen ; i++ ) { if ( is_wspace_a ( req [ i ] ) ) break; if ( req [ i ] == '?' ) break; } // now calc the filename length m_filenameLen = i - filenameStart; // return false and set g_errno if it's 0 if ( m_filenameLen <= 0 ) { log("http: got filenameLen<=0: %s",req); g_errno = EBADREQUEST; return false; } // . bitch if too big // . leave room for strcatting "index.html" below if ( m_filenameLen >= MAX_HTTP_FILENAME_LEN - 10 ) { log("http: got filenameLen>=max"); g_errno = EBADREQUEST; return false; } // . decode the filename into m_filename and reassign it's length // . decode %2F to / , etc... m_filenameLen = urlDecode(m_filename,req+filenameStart,m_filenameLen); // NULL terminate m_filename m_filename [ m_filenameLen ] = '\0'; // does it have a file extension AFTER the last / in the filename? bool hasExtension = false; for ( long j = m_filenameLen-1 ; j >= 0 ; j-- ) { if ( m_filename[j] == '.' ) { hasExtension = true; break; } if ( m_filename[j] == '/' ) break; } // if it has no file extension append a /index.html if ( ! hasExtension && m_filename [ m_filenameLen - 1 ] == '/' ) { strcat ( m_filename , "index.html" ); m_filenameLen = gbstrlen ( m_filename ); } // set file offset/size defaults m_fileOffset = 0; // -1 means ALL the file from m_fileOffset onwards m_fileSize = -1; // "e" points to where the range actually starts, if any //char *e; // . TODO: speed up by doing one strstr for Range: and maybe range: // . do they have a Range: 0-100\n in the mime denoting a partial get? //char *s = strstr ( req ,"Range:bytes=" ); //e = s + 12; // try alternate formats //if ( ! s ) { s = strstr ( req ,"Range: bytes=" ); e = s + 13; } //if ( ! s ) { s = strstr ( req ,"Range: " ); e = s + 7; } // parse out the range if we got one //if ( s ) { // long x = 0; // sscanf ( e ,"%li-%li" , &m_fileOffset , &x ); // // get all file if range's 2nd number is non-existant // if ( x == 0 ) m_fileSize = -1; // else m_fileSize = x - m_fileOffset; // // ensure legitimacy // if ( m_fileOffset < 0 ) m_fileOffset = 0; //} // reset our hostname m_hostLen = 0; // assume request is NOT from local network //m_isAdmin = false; m_isLocal = false; // get the virtual hostname they want to use char *s = strstr ( req ,"Host:" ); // try alternate formats if ( ! s ) s = strstr ( req , "host:" ); // must be on its own line, otherwise it's not valid if ( s && s > req && *(s-1) !='\n' ) s = NULL; // parse out the host if we got one if ( s ) { // skip field name, host: s += 5; // skip e to beginning of the host name after "host:" while ( *s==' ' || *s=='\t' ) s++; // find end of the host name char *end = s; while ( *end && !is_wspace_a(*end) ) end++; // . now *end should be \0, \n, \r, ' ', ... // . get host len m_hostLen = end - s; // truncate if too big if ( m_hostLen >= 255 ) m_hostLen = 254; // copy into hostname memcpy ( m_host , s , m_hostLen ); } // NULL terminate it m_host [ m_hostLen ] = '\0'; // get Referer: field s = strstr ( req ,"Referer:" ); // find another if ( ! s ) s = strstr ( req ,"referer:" ); // must be on its own line, otherwise it's not valid if ( s && s > req && *(s-1) !='\n' ) s = NULL; // assume no referer m_refLen = 0; // parse out the referer if we got one if ( s ) { // skip field name, referer: s += 8; // skip e to beginning of the host name after ':' while ( *s==' ' || *s=='\t' ) s++; // find end of the host name char *end = s; while ( *end && !is_wspace_a(*end) ) end++; // . now *end should be \0, \n, \r, ' ', ... // . get len m_refLen = end - s; // truncate if too big if ( m_refLen >= 255 ) m_refLen = 254; // copy into m_ref memcpy ( m_ref , s , m_refLen ); } // NULL terminate it m_ref [ m_refLen ] = '\0'; // get User-Agent: field s = strstr ( req ,"User-Agent:" ); // find another if ( ! s ) s = strstr ( req ,"user-agent:" ); // must be on its own line, otherwise it's not valid if ( s && s > req && *(s-1) !='\n' ) s = NULL; // assume empty long len = 0; // parse out the referer if we got one if ( s ) { // skip field name, referer: s += 11; // skip e to beginning of the host name after ':' while ( *s==' ' || *s=='\t' ) s++; // find end of the agent name char *end = s; while ( *end && *end!='\n' && *end!='\r' ) end++; // . now *end should be \0, \n, \r, ' ', ... // . get agent len len = end - s; // truncate if too big if ( len > 127 ) len = 127; // copy into m_userAgent memcpy ( m_userAgent , s , len ); } // NULL terminate it m_userAgent [ len ] = '\0'; m_isMSIE = false; if ( strstr ( m_userAgent , "MSIE" ) ) m_isMSIE = true; // get Cookie: field s = strstr ( req, "Cookie:" ); // find another if ( !s ) s = strstr ( req, "cookie:" ); // must be on its own line, otherwise it's not valid if ( s && s > req && *(s-1) != '\n' ) s = NULL; // assume empty // m_cookieBufLen = 0; m_cookiePtr = s; // parse out the cookie if we got one if ( s ) { // skip field name, Cookie: s += 7; // skip s to beginning of cookie after ':' while ( *s == ' ' || *s == '\t' ) s++; // find end of the cookie char *end = s; while ( *end && *end != '\n' && *end != '\r' ) end++; // save length m_cookieLen = end - m_cookiePtr; // get cookie len //m_cookieBufLen = end - s; // trunc if too big //if (m_cookieBufLen > 1023) m_cookieBufLen = 1023; // copy into m_cookieBuf //memcpy(m_cookieBuf, s, m_cookieBufLen); } // NULL terminate it if ( m_cookiePtr ) m_cookiePtr[m_cookieLen] = '\0'; //m_cookieBuf[m_cookieBufLen] = '\0'; // convert every '&' in cookie to a \0 for parsing the fields // for ( long j = 0 ; j < m_cookieBufLen ; j++ ) // if ( m_cookieBuf[j] == '&' ) m_cookieBuf[j] = '\0'; // mark it as cgi if it has a ? bool isCgi = ( req [ i ] == '?' ) ; // reset m_filename length to exclude the ?* stuff if ( isCgi ) { // skip over the '?' i++; // find a space the delmits end of cgi long j; for ( j = i; j < reqLen; j++) if (is_wspace_a(req[j])) break; // now add it if ( ! addCgi ( &req[i] , j-i ) ) return false; // update i i = j; } // . set path ptrs // . the whole /cgi/14.cgi?coll=xxx&..... thang m_path = req + filenameStart; m_plen = i - filenameStart; // we're local if hostname is 192.168.[0|1].y //if ( strncmp(iptoa(sock->m_ip),"192.168.1.",10) == 0) { // m_isAdmin = true; m_isLocal = true; } //if ( strncmp(iptoa(sock->m_ip),"192.168.0.",10) == 0) { // m_isAdmin = true; m_isLocal = true; } //if(strncmp(iptoa(sock->m_ip),"192.168.1.",10) == 0) m_isLocal = true; //if(strncmp(iptoa(sock->m_ip),"192.168.0.",10) == 0) m_isLocal = true; if ( sock && strncmp(iptoa(sock->m_ip),"192.168.",8) == 0) m_isLocal = true; if ( sock && strncmp(iptoa(sock->m_ip),"10.",3) == 0) m_isLocal = true; // steve cook's comcast at home: // if ( sock && strncmp(iptoa(sock->m_ip),"68.35.100.143",13) == 0) // m_isLocal = true; // procog's ip // if ( sock && strncmp(iptoa(sock->m_ip),"216.168.36.21",13) == 0) // m_isLocal = true; // roadrunner ip // if ( sock && strncmp(iptoa(sock->m_ip),"66.162.42.131",13) == 0) // m_isLocal = true; // cnsp ip //if ( sock && strncmp(iptoa(sock->m_ip),"67.130.216.27",13) == 0) // m_isLocal = true; // emily parker //if ( sock && strncmp(iptoa(sock->m_ip),"69.92.68.202",12) == 0) //m_isLocal = true; // 127.0.0.1 if ( sock && sock->m_ip == 16777343 ) m_isLocal = true; // steve cook's webserver //if ( sock && strncmp(iptoa(sock->m_ip),"216.168.36.21",13) == 0) // m_isLocal = true; // . also if we're coming from lenny at my house consider it local // . this is a security risk, however... TODO: FIX!!! //if ( sock->m_ip == atoip ("68.35.105.199" , 13 ) ) m_isAdmin = true; // . TODO: now add any cgi data from a POST..... // . look after the mime //char *d = NULL; // check for body if it was a POST request //if ( m_requestType == 2 ) d = strstr ( req , "\r\n\r\n" ); // now put d's char back, just in case... does it really matter? if ( d ) *d = dc; // return true now if no cgi stuff to parse if ( d ) { char *post = d + 4; long postLen = reqLen-(d+4-req) ; // post sometimes has a \r or\n after it while ( postLen > 0 && post[postLen-1]=='\r' ) postLen--; // add it to m_cgiBuf, filter and everything if ( ! addCgi ( post , postLen ) ) return false; } // sometimes i don't want to be admin //if ( getLong ( "admin" , 1 ) == 0 ) m_isAdmin = false; // success ///// // Handle Extra parms... char *ep = g_conf.m_extraParms; char *epend = g_conf.m_extraParms + g_conf.m_extraParmsLen; char *qstr = m_cgiBuf; long qlen = m_cgiBufLen; while (ep < epend){ char buf[AUTOBAN_TEXT_SIZE]; long bufLen = 0; // get next substring while (*ep && ep < epend && *ep != ' ' && *ep != '\n'){ buf[bufLen++] = *ep++; } // skip whitespace while (*ep && ep < epend && *ep == ' '){ ep++; } // null terminate buf[bufLen] = '\0'; // No match if (!bufLen || !strnstr(qstr, qlen, buf)){ // skip to end of line while (*ep && ep < epend && *ep != '\n') ep++; // skip newline while (*ep && ep < epend && *ep == '\n') ep++; // try next substr continue; } // found a match... // get parm string bufLen = 0; while (*ep && ep < epend && *ep != '\n'){ buf[bufLen++] = *ep++; } buf[bufLen] = '\0'; // skip newline while (*ep && ep < epend && *ep == '\n') ep++; logf(LOG_DEBUG, "query: appending \"%s\" to query", buf); long newSize = m_cgiBuf2Size + bufLen+1; char *newBuf = (char*)mmalloc(newSize, "extraParms"); if (!newBuf){ return log("query: unable to allocate %ld bytes " "for extraParms", newSize); } char *p = newBuf; if (m_cgiBuf2Size) { memcpy(newBuf, m_cgiBuf2, m_cgiBuf2Size); p += m_cgiBuf2Size-1; mfree(m_cgiBuf2, m_cgiBuf2Size, "extraParms"); m_cgiBuf2 = NULL; m_cgiBuf2Size = 0; } memcpy(p, buf, bufLen); m_cgiBuf2 = newBuf; m_cgiBuf2Size = newSize; p += bufLen; *p = '\0'; } // Put '\0' back into the HttpRequest buffer... if (m_cgiBuf){ // do not mangle the "ucontent"! long cgiBufLen = m_cgiBufLen; cgiBufLen -= m_ucontentLen; char *buf = m_cgiBuf; for (long i = 0; i < cgiBufLen ; i++) if (buf[i] == '&') buf[i] = '\0'; // don't decode the ucontent= field! long decodeLen = m_cgiBufLen; // so subtract that if ( m_ucontent ) decodeLen -= m_ucontentLen; // decode everything long len = urlDecode ( m_cgiBuf , m_cgiBuf , decodeLen ); // we're parsing crap after the null if the last parm // has no value //memset(m_cgiBuf+len, '\0', m_cgiBufLen-len); m_cgiBufLen = len; // ensure that is null i guess if ( ! m_ucontent ) m_cgiBuf[len] = '\0'; } if (m_cgiBuf2){ char *buf = m_cgiBuf2; for (long i = 0; i < m_cgiBuf2Size-1 ; i++) if (buf[i] == '&') buf[i] = '\0'; long len = urlDecode ( m_cgiBuf2 , m_cgiBuf2 , m_cgiBuf2Size); memset(m_cgiBuf2+len, '\0', m_cgiBuf2Size-len); } // . parse the fields after the ? in a cgi filename // . or fields in the content if it's a POST // . m_cgiBuf must be and is NULL terminated for this parseFields ( m_cgiBuf , m_cgiBufLen ); // Add extra parms to the request. if (m_cgiBuf2Size){ parseFields(m_cgiBuf2, m_cgiBuf2Size); } // urldecode the cookie buf too!! if ( m_cookiePtr ) { char *p = m_cookiePtr; for (long i = 0; i < m_cookieLen ; i++) { //if (p[i] == '&') p[i] = '\0'; // cookies are separated with ';' in the request only if (p[i] == ';') p[i] = '\0'; // a hack for the metacookie=.... // which uses &'s to separate its subcookies // this is a hack for msie's limit of 50 cookies if ( p[i] == '&' ) p[i] = '\0'; // set m_metaCookie to start of meta cookie if ( p[i] == 'm' && p[i+1] == 'e' && strncmp(p,"metacookie",10) == 0 ) m_metaCookie = p; } long len = urlDecode ( m_cookiePtr , m_cookiePtr, m_cookieLen ); // we're parsing crap after the null if the last parm // has no value memset(m_cookiePtr+len, '\0', m_cookieLen-len); m_cookieLen = len; } return true; }
void handleRequest12 ( UdpSlot *udpSlot , int32_t niceness ) { // get request char *request = udpSlot->m_readBuf; int32_t reqSize = udpSlot->m_readBufSize; // shortcut UdpServer *us = &g_udpServer; // breathe QUICKPOLL ( niceness ); // shortcut char *reply = udpSlot->m_tmpBuf; // // . is it confirming that he got all the locks? // . if so, remove the doledb record and dock the doleiptable count // before adding a waiting tree entry to re-pop the doledb record // if ( reqSize == sizeof(ConfirmRequest) ) { char *msg = NULL; ConfirmRequest *cq = (ConfirmRequest *)request; // confirm the lock HashTableX *ht = &g_spiderLoop.m_lockTable; int32_t slot = ht->getSlot ( &cq->m_lockKeyUh48 ); if ( slot < 0 ) { log("spider: got a confirm request for a key not " "in the table! coll must have been deleted " " or reset " "while lock request was outstanding."); g_errno = EBADENGINEER; log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); us->sendErrorReply ( udpSlot , g_errno ); return; //char *xx=NULL;*xx=0; } } UrlLock *lock = (UrlLock *)ht->getValueFromSlot ( slot ); lock->m_confirmed = true; // note that if ( g_conf.m_logDebugSpider ) // Wait ) log("spider: got confirm lock request for ip=%s", iptoa(lock->m_firstIp)); // get it SpiderColl *sc = g_spiderCache.getSpiderColl(cq->m_collnum); // make it negative cq->m_doledbKey.n0 &= 0xfffffffffffffffeLL; // and add the negative rec to doledb (deletion operation) Rdb *rdb = &g_doledb.m_rdb; if ( ! rdb->addRecord ( cq->m_collnum, (char *)&cq->m_doledbKey, NULL , // data 0 , //dataSize 1 )){ // niceness // tree is dumping or something, probably ETRYAGAIN if ( g_errno != ETRYAGAIN ) {msg = "error adding neg rec to doledb"; log("spider: %s %s",msg,mstrerror(g_errno)); } //char *xx=NULL;*xx=0; log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); us->sendErrorReply ( udpSlot , g_errno ); return; } // now remove from doleiptable since we removed from doledb if ( sc ) sc->removeFromDoledbTable ( cq->m_firstIp ); // how many spiders outstanding for this coll and IP? //int32_t out=g_spiderLoop.getNumSpidersOutPerIp ( cq->m_firstIp); // DO NOT add back to waiting tree if max spiders // out per ip was 1 OR there was a crawldelay. but better // yet, take care of that in the winReq code above. // . now add to waiting tree so we add another spiderdb // record for this firstip to doledb // . true = callForScan // . do not add to waiting tree if we have enough outstanding // spiders for this ip. we will add to waiting tree when // we receive a SpiderReply in addSpiderReply() if ( sc && //out < cq->m_maxSpidersOutPerIp && // this will just return true if we are not the // responsible host for this firstip // DO NOT populate from this!!! say "false" here... ! sc->addToWaitingTree ( 0 , cq->m_firstIp, false ) && // must be an error... g_errno ) { msg = "FAILED TO ADD TO WAITING TREE"; log("spider: %s %s",msg,mstrerror(g_errno)); log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); us->sendErrorReply ( udpSlot , g_errno ); return; } // success!! reply[0] = 1; us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot ); return; } // sanity check if ( reqSize != sizeof(LockRequest) ) { log("spider: bad msg12 request size of %" PRId32,reqSize); log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); us->sendErrorReply ( udpSlot , EBADREQUEST ); return; } // deny it if we are not synced yet! otherwise we core in // getTimeGlobal() below if ( ! isClockInSync() ) { // log it so we can debug it //log("spider: clock not in sync with host #0. so " // "returning etryagain for lock reply"); // let admin know why we are not spidering log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); us->sendErrorReply ( udpSlot , ETRYAGAIN ); return; } LockRequest *lr = (LockRequest *)request; //uint64_t lockKey = *(int64_t *)request; //int32_t lockSequence = *(int32_t *)(request+8); // is this a remove operation? assume not //bool remove = false; // get top bit //if ( lockKey & 0x8000000000000000LL ) remove = true; // mask it out //lockKey &= 0x7fffffffffffffffLL; // sanity check, just 6 bytes! (48 bits) if ( lr->m_lockKeyUh48 &0xffff000000000000LL ) { char *xx=NULL;*xx=0; } // note it if ( g_conf.m_logDebugSpider ) log("spider: got msg12 request uh48=%" PRId64" remove=%" PRId32, lr->m_lockKeyUh48, (int32_t)lr->m_removeLock); // get time int32_t nowGlobal = getTimeGlobal(); // shortcut HashTableX *ht = &g_spiderLoop.m_lockTable; int32_t hostId = g_hostdb.getHostId ( udpSlot->m_ip , udpSlot->m_port ); // this must be legit - sanity check if ( hostId < 0 ) { char *xx=NULL;*xx=0; } // remove expired locks from locktable removeExpiredLocks ( hostId ); int64_t lockKey = lr->m_lockKeyUh48; // check tree int32_t slot = ht->getSlot ( &lockKey ); // lr->m_lockKeyUh48 ); // put it here UrlLock *lock = NULL; // if there say no no if ( slot >= 0 ) lock = (UrlLock *)ht->getValueFromSlot ( slot ); // if doing a remove operation and that was our hostid then unlock it if ( lr->m_removeLock && lock && lock->m_hostId == hostId && lock->m_lockSequence == lr->m_lockSequence ) { // note it for now if ( g_conf.m_logDebugSpider ) log("spider: removing lock for lockkey=%" PRIu64" hid=%" PRId32, lr->m_lockKeyUh48,hostId); // unlock it ht->removeSlot ( slot ); // it is gone lock = NULL; } // ok, at this point all remove ops return if ( lr->m_removeLock ) { reply[0] = 1; us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot ); return; } ///////// // // add new lock // ///////// // if lock > 1 hour old then remove it automatically!! if ( lock && nowGlobal - lock->m_timestamp > MAX_LOCK_AGE ) { // note it for now log("spider: removing lock after %" PRId32" seconds " "for lockKey=%" PRIu64" hid=%" PRId32, (nowGlobal - lock->m_timestamp), lr->m_lockKeyUh48,hostId); // unlock it ht->removeSlot ( slot ); // it is gone lock = NULL; } // if lock still there, do not grant another lock if ( lock ) { // note it for now if ( g_conf.m_logDebugSpider ) log("spider: refusing lock for lockkey=%" PRIu64" hid=%" PRId32, lr->m_lockKeyUh48,hostId); reply[0] = 0; us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot ); return; } // make the new lock UrlLock tmp; tmp.m_hostId = hostId; tmp.m_lockSequence = lr->m_lockSequence; tmp.m_timestamp = nowGlobal; tmp.m_expires = 0; tmp.m_firstIp = lr->m_firstIp; tmp.m_collnum = lr->m_collnum; // when the spider returns we remove its lock on reception of the // spiderReply, however, we actually just set the m_expires time // to 5 seconds into the future in case there is a current request // to get a lock for that url in progress. but, we do need to // indicate that the spider has indeed completed by setting // m_spiderOutstanding to true. this way, addToWaitingTree() will // not count it towards a "max spiders per IP" quota when deciding // on if it should add a new entry for this IP. tmp.m_spiderOutstanding = true; // this is set when all hosts in the group (shard) have granted the // lock and the host sends out a confirmLockAcquisition() request. // until then we do not know if the lock will be granted by all hosts // in the group (shard) tmp.m_confirmed = false; // put it into the table if ( ! ht->addKey ( &lockKey , &tmp ) ) { // return error if that failed! log(LOG_ERROR,"%s:%s:%d: call sendErrorReply.", __FILE__, __func__, __LINE__); us->sendErrorReply ( udpSlot , g_errno ); return; } // note it for now if ( g_conf.m_logDebugSpider ) log("spider: granting lock for lockKey=%" PRIu64" hid=%" PRId32, lr->m_lockKeyUh48,hostId); // grant the lock reply[0] = 1; us->sendReply_ass ( reply , 1 , reply , 1 , udpSlot ); return; }
void Conf::setRootIps ( ) { //m_numDns = 16; //for ( int32_t i = 0; i < m_numDns; i++ ) // m_dnsPorts[i] = 53; //m_numDns = 0; // set m_numDns based on Conf::m_dnsIps[] array int32_t i; for ( i = 0; i < 16 ; i++ ) { m_dnsPorts[i] = 53; if ( ! g_conf.m_dnsIps[i] ) break; } m_numDns = i; // hardcode google for now... //m_dnsIps[0] = atoip("8.8.8.8",7); //m_dnsIps[1] = atoip("8.8.4.4",7); //m_numDns = 2; Host *h = g_hostdb.getMyHost(); //char *ipStr = "10.5.0.3"; //char *ipStr = "10.5.56.78"; // gk268 now on roadrunner //char *ipStr = "10.5.56.77"; // gk267 now cnsp-routed bind9 server // now sp1 for speed (quad processor) //char *ipStr = "10.5.66.11"; // fail back to google public dns char *ipStr = "8.8.8.8"; // try google first dibs. NO! they are unresponsive after a while //char *ipStr = "8.8.4.4"; // for some reason scproxy2 local bind9 not responding to us!!! fix! //if ( h->m_type & HT_SCPROXY ) ipStr = "127.0.0.1"; //if ( h->m_type & HT_PROXY ) ipStr = "127.0.0.1"; if ( h->m_type & HT_SCPROXY ) ipStr = "8.8.8.8"; if ( h->m_type & HT_PROXY ) ipStr = "8.8.8.8"; // if we are a proxy, notably a spider compression proxy... //if ( g_proxy.isProxy() ) ipStr = "127.0.0.1"; if ( m_numDns == 0 ) { m_dnsIps[0] = atoip( ipStr , gbstrlen(ipStr) ); m_dnsPorts[0] = 53; m_numDns = 1; } // default this to off on startup for now until it works better m_askRootNameservers = false; // and return as well return; char *rootIps[] = { "192.228.79.201", "192.33.4.12", "128.8.10.90", //"192.203.230.10", ping timedout "192.5.5.241", //"192.112.36.4", ping timedout //"128.63.2.53", ping timedout //"192.36.148.17", "192.58.128.30", "193.0.14.129", //"198.32.64.12", "199.7.83.42", // new guy "202.12.27.33", "198.41.0.4" }; int32_t n = sizeof(rootIps)/sizeof(char *); if ( n > MAX_RNSIPS ) { log("admin: Too many root nameserver ips. Truncating."); n = MAX_RNSIPS; } m_numRns = n; for ( int32_t i = 0 ; i < n ; i++ ) { m_rnsIps [i] = atoip(rootIps[i],gbstrlen(rootIps[i])); m_rnsPorts[i] = 53; log(LOG_INIT,"dns: Using root nameserver #%"INT32" %s.", i,iptoa(m_rnsIps[i])); } }
void http_settings(char *rx, unsigned int rx_len) { unsigned int item, found, len; int i; char buf[16]; MAC_Addr mac; IP_Addr ip; unsigned int rgb; for(; rx_len!=0;) { len = strlen("restartwebradio="); if(strncmpi(rx, "restartwebradio=", len) == 0) { rx += len; rx_len -= len; cpu_reset(); } for(item=0, found=0; item<SETTINGSITEMS; item++) { if(settingsmenu[item].ini[0] == 0) { continue; } len = sprintf(buf, "%s=", settingsmenu[item].ini); if(strncmpi(rx, buf, len) == 0) { rx += len; rx_len -= len; len = url_decode(rx, rx, rx_len); i = 0; switch(settingsmenu[item].format) { case F_NR: //p1-p2, p3=step size i = atoi(rx); if(i < settingsmenu[item].p1){ i = settingsmenu[item].p1; } else if(i > settingsmenu[item].p2){ i = settingsmenu[item].p2; } itoa(i, buf, 10); ini_setentry(SETTINGS_FILE, settingsmenu[item].ini, buf); if(settingsmenu[item].set){ settingsmenu[item].set((void*)(int)i); } break; case F_OR: //p1 or p2 i = atoi(rx); if((i != settingsmenu[item].p1) && (i != settingsmenu[item].p2)){ i = settingsmenu[item].p1; } itoa(i, buf, 10); ini_setentry(SETTINGS_FILE, settingsmenu[item].ini, buf); if(settingsmenu[item].set){ settingsmenu[item].set((void*)(int)i); } break; case F_STR: //p1=max len if((settingsmenu[item].p1 != 0) && (strlen(rx) > (unsigned)settingsmenu[item].p1)) { rx[settingsmenu[item].p1] = 0; } ini_setentry(SETTINGS_FILE, settingsmenu[item].ini, rx); if(settingsmenu[item].set){ settingsmenu[item].set(rx); } break; case F_MAC: mac = atomac(rx); ini_setentry(SETTINGS_FILE, settingsmenu[item].ini, mactoa(mac)); //if(settingsmenu[item].set){ settingsmenu[item].set((void*)(MAC_Addr)mac); } break; case F_IP: ip = atoip(rx); ini_setentry(SETTINGS_FILE, settingsmenu[item].ini, iptoa(ip)); if(settingsmenu[item].set){ settingsmenu[item].set((void*)(IP_Addr)atoip(rx)); } break; case F_RGB: rgb = atorgb(rx); sprintf(buf, "%03i,%03i,%03i", GET_RED(rgb), GET_GREEN(rgb), GET_BLUE(rgb)); ini_setentry(SETTINGS_FILE, settingsmenu[item].ini, buf); if(settingsmenu[item].set){ settingsmenu[item].set((void*)(unsigned int)rgb); } break; } rx += len; rx_len -= len; found = 1; break; } } if(found == 0) { rx++; rx_len--; } } menu_drawwnd(1); return; }
void Stats::logAvgQueryTime(long long startTime) { long long now = gettimeofdayInMilliseconds(); long long took = now - startTime; static long s_lastSendTime = 0; // if just one query took an insanely long time, // do not sound the alarm. this is in seconds, // so multiply by 1000. //long long maxTook = // (long long)(g_conf.m_maxQueryTime*1000.0) ; //if ( took > maxTook ) took = maxTook; m_queryTimes += took; m_numQueries++; if ( m_numQueries > g_conf.m_numQueryTimes ) goto reset; if (m_numQueries != g_conf.m_numQueryTimes) return; // otherwise, store this info m_avgQueryTime = (float)m_queryTimes / ((float)m_numQueries * 1000.0); m_successRate = (float)m_numSuccess / (float)(m_numSuccess + m_numFails); //(number of queries) / seconds that it took to get this many queries m_avgQueriesPerSec = ((float)m_numQueries * 1000.0) / (float)(now - m_lastQueryLogTime); m_lastQueryLogTime = now; if(m_avgQueryTime > g_conf.m_avgQueryTimeThreshold || m_successRate < g_conf.m_querySuccessThreshold) { char msgbuf[1024]; Host *h = g_hostdb.getHost ( 0 ); snprintf(msgbuf, 1024, "Average latency: %f sec. " "success rate: %f. " "queries/sec: %f. " "host: %s.", m_avgQueryTime, m_successRate, m_avgQueriesPerSec, iptoa(h->m_ip)); log(LOG_WARN, "query: %s",msgbuf); // prevent machinegunning text msgs long now = getTimeLocal(); if ( now - s_lastSendTime > 300 ) { s_lastSendTime = now; g_pingServer.sendEmail(NULL, msgbuf); } } else { log(LOG_INFO, "query: Average latency is %f seconds, " "succeeding at a rate of %f, serving %f queries/sec.", m_avgQueryTime, m_successRate, m_avgQueriesPerSec); } reset: m_totalNumQueries += m_numSuccess + m_numFails; m_totalNumSuccess += m_numSuccess; m_totalNumFails += m_numFails; m_numQueries = 0; m_queryTimes = 0; m_numSuccess = 0; m_numFails = 0; }
// returns false if blocked, true otherwise bool processLoop ( void *state ) { // get it State2 *st = (State2 *)state; // get the tcp socket from the state TcpSocket *s = st->m_socket; // get it XmlDoc *xd = &st->m_xd; if ( ! xd->m_loaded ) { // setting just the docid. niceness is 0. //xd->set3 ( st->m_docId , st->m_coll , 0 ); // callback xd->setCallback ( state , processLoop ); // . and tell it to load from the old title rec // . this sets xd->m_oldTitleRec/m_oldTitleRecSize // . this sets xd->ptr_* and all other member vars from // the old title rec if found in titledb. if ( ! xd->loadFromOldTitleRec ( ) ) return false; } if ( g_errno ) return sendErrorReply ( st , g_errno ); // now force it to load old title rec //char **tr = xd->getTitleRec(); SafeBuf *tr = xd->getTitleRecBuf(); // blocked? return false if so. it will call processLoop() when it rets if ( tr == (void *)-1 ) return false; // we did not block. check for error? this will free "st" too. if ( ! tr ) return sendErrorReply ( st , g_errno ); // if title rec was empty, that is a problem if ( xd->m_titleRecBuf.length() == 0 ) return sendErrorReply ( st , ENOTFOUND); // set callback char *na = xd->getIsNoArchive(); // wait if blocked if ( na == (void *)-1 ) return false; // error? if ( ! na ) return sendErrorReply ( st , g_errno ); // forbidden? allow turkeys through though... if ( ! st->m_isAdmin && *na ) return sendErrorReply ( st , ENOCACHE ); SafeBuf *sb = &st->m_sb; // &page=4 will print rainbow sections if ( ! st->m_printed && st->m_r.getLong("page",0) ) { // do not repeat this call st->m_printed = true; // this will call us again since we called // xd->setCallback() above to us if ( ! xd->printDocForProCog ( sb , &st->m_r ) ) return false; } char *contentType = "text/html"; char format = st->m_format; if ( format == FORMAT_XML ) contentType = "text/xml"; if ( format == FORMAT_JSON ) contentType = "application/json"; // if we printed a special page (like rainbow sections) then return now if ( st->m_printed ) { bool status = g_httpServer.sendDynamicPage (s, //buf,bufLen, sb->getBufStart(), sb->getLength(), -1,false, //"text/html", contentType, -1, NULL, "utf8" ); // nuke state2 mdelete ( st , sizeof(State2) , "PageGet1" ); delete (st); return status; } /* // this was calling XmlDoc and setting sections, etc. to // get the SpiderReply junk... no no no // is it banned or filtered? this ignores the TagRec in the titleRec // and uses msg8a to get it fresh instead char *vi = xd->getIsFiltered();//Visible( ); // wait if blocked if ( vi == (void *)-1 ) return false; // error? if ( ! vi ) return sendErrorReply ( st , g_errno ); // banned? if ( ! st->m_isAdmin && ! *vi ) return sendErrorReply (st,EDOCBANNED); */ // get the utf8 content char **utf8 = xd->getUtf8Content(); //long len = xd->size_utf8Content - 1; // wait if blocked??? if ( utf8 == (void *)-1 ) return false; // strange if ( xd->size_utf8Content<=0) { log("pageget: utf8 content <= 0"); return sendErrorReply(st,EBADENGINEER ); } // alloc error? if ( ! utf8 ) return sendErrorReply ( st , g_errno ); // get this host Host *h = g_hostdb.getHost ( g_hostdb.m_hostId ); if ( ! h ) { log("pageget: hostid %li is bad",g_hostdb.m_hostId); return sendErrorReply(st,EBADENGINEER ); } char *content = xd->ptr_utf8Content; long contentLen = xd->size_utf8Content - 1; // shortcut char strip = st->m_strip; // alloc buffer now //char *buf = NULL; //long bufMaxSize = 0; //bufMaxSize = len + ( 32 * 1024 ) ; //bufMaxSize = contentLen + ( 32 * 1024 ) ; //buf = (char *)mmalloc ( bufMaxSize , "PageGet2" ); //char *p = buf; //char *bufEnd = buf + bufMaxSize; //if ( ! buf ) { // return sendErrorReply ( st , g_errno ); //} // for undoing the header //char *start1 = p; long startLen1 = sb->length(); // we are always utfu if ( strip != 2 ) sb->safePrintf( "<meta http-equiv=\"Content-Type\" " "content=\"text/html;charset=utf8\">\n"); // base href //Url *base = &xd->m_firstUrl; //if ( xd->ptr_redirUrl.m_url[0] ) // base = &xd->m_redirUrl; char *base = xd->ptr_firstUrl; if ( xd->ptr_redirUrl ) base = xd->ptr_redirUrl; //Url *redir = *xd->getRedirUrl(); if ( strip != 2 ) { sb->safePrintf ( "<BASE HREF=\"%s\">" , base ); //p += gbstrlen ( p ); } // default colors in case css files missing if ( strip != 2 ) { sb->safePrintf( "\n<style type=\"text/css\">\n" "body{background-color:white;color:black;}\n" "</style>\n"); //p += gbstrlen ( p ); } //char format = st->m_format; if ( format == FORMAT_XML ) sb->reset(); if ( format == FORMAT_JSON ) sb->reset(); // for undoing the stuff below long startLen2 = sb->length();//p; // query should be NULL terminated char *q = st->m_q; long qlen = st->m_qlen; char styleTitle[128] = "font-size:14px;font-weight:600;" "color:#000000;"; char styleText[128] = "font-size:14px;font-weight:400;" "color:#000000;"; char styleLink[128] = "font-size:14px;font-weight:400;" "color:#0000ff;"; char styleTell[128] = "font-size:14px;font-weight:600;" "color:#cc0000;"; // get the url of the title rec Url *f = xd->getFirstUrl(); bool printDisclaimer = st->m_printDisclaimer; if ( xd->m_contentType == CT_JSON ) printDisclaimer = false; if ( format == FORMAT_XML ) printDisclaimer = false; if ( format == FORMAT_JSON ) printDisclaimer = false; char tbuf[100]; tbuf[0] = 0; time_t lastSpiderDate = xd->m_spideredTime; if ( printDisclaimer || format == FORMAT_XML || format == FORMAT_JSON ) { struct tm *timeStruct = gmtime ( &lastSpiderDate ); strftime ( tbuf, 100,"%b %d, %Y UTC", timeStruct); } // We should always be displaying this disclaimer. // - May eventually want to display this at a different location // on the page, or on the click 'n' scroll browser page itself // when this page is not being viewed solo. // CNS: if ( ! st->m_clickNScroll ) { if ( printDisclaimer ) { sb->safePrintf(//sprintf ( p , //"<BASE HREF=\"%s\">" //"<table border=1 width=100%%>" //"<tr><td>" "<table border=\"1\" bgcolor=\"#" BGCOLOR "\" cellpadding=\"10\" " //"id=\"gbcnsdisctable\" class=\"gbcnsdisctable_v\"" "cellspacing=\"0\" width=\"100%%\" color=\"#ffffff\">" "<tr" //" id=\"gbcnsdisctr\" class=\"gbcnsdisctr_v\"" "><td>" //"<font face=times,sans-serif color=black size=-1>" "<span style=\"%s\">" "This is Gigablast's cached page of </span>" "<a href=\"%s\" style=\"%s\">%s</a>" "" , styleTitle, f->getUrl(), styleLink, f->getUrl() ); //p += gbstrlen ( p ); // then the rest //sprintf(p , sb->safePrintf( "<span style=\"%s\">. " "Gigablast is not responsible for the content of " "this page.</span>", styleTitle ); //p += gbstrlen ( p ); sb->safePrintf ( "<br/><span style=\"%s\">" "Cached: </span>" "<span style=\"%s\">", styleTitle, styleText ); //p += gbstrlen ( p ); // then the spider date in GMT // time_t lastSpiderDate = xd->m_spideredTime; // struct tm *timeStruct = gmtime ( &lastSpiderDate ); // char tbuf[100]; // strftime ( tbuf, 100,"%b %d, %Y UTC", timeStruct); //p += gbstrlen ( p ); sb->safeStrcpy(tbuf); // Moved over from PageResults.cpp sb->safePrintf( "</span> - <a href=\"" "/get?" "q=%s&c=%s&rtq=%li&" "d=%lli&strip=1\"" " style=\"%s\">" "[stripped]</a>", q , st->m_coll , (long)st->m_rtq, st->m_docId, styleLink ); // a link to alexa if ( f->getUrlLen() > 5 ) { sb->safePrintf( " - <a href=\"http:" "//web.archive.org/web/*/%s\"" " style=\"%s\">" "[older copies]</a>" , f->getUrl(), styleLink ); } if (st->m_noArchive){ sb->safePrintf( " - <span style=\"%s\"><b>" "[NOARCHIVE]</b></span>", styleTell ); } if (st->m_isBanned){ sb->safePrintf(" - <span style=\"%s\"><b>" "[BANNED]</b></span>", styleTell ); } // only print this if we got a query if ( qlen > 0 ) { sb->safePrintf("<br/><br/><span style=\"%s\"> " "These search terms have been " "highlighted: ", styleText ); //p += gbstrlen ( p ); } } // how much space left in p? //long avail = bufEnd - p; // . make the url that we're outputting for (like in PageResults.cpp) // . "thisUrl" is the baseUrl for click & scroll char thisUrl[MAX_URL_LEN]; char *thisUrlEnd = thisUrl + MAX_URL_LEN; char *x = thisUrl; // . use the external ip of our gateway // . construct the NAT mapped port // . you should have used iptables to map port to the correct // internal ip:port //unsigned long ip =g_conf.m_mainExternalIp ; // h->m_externalIp; //unsigned short port=g_conf.m_mainExternalPort;//h->m_externalHttpPort // local check //if ( st->m_isLocal ) { unsigned long ip = h->m_ip; unsigned short port = h->m_httpPort; //} //sprintf ( x , "http://%s:%li/get?q=" , iptoa ( ip ) , port ); // . we no longer put the port in here // . but still need http:// since we use <base href=> if (port == 80) sprintf(x,"http://%s/get?q=",iptoa(ip)); else sprintf(x,"http://%s:%hu/get?q=",iptoa(ip),port); x += gbstrlen ( x ); // the query url encoded long elen = urlEncode ( x , thisUrlEnd - x , q , qlen ); x += elen; // separate cgi vars with a & //sprintf ( x, "&seq=%li&rtq=%lid=%lli", // (long)st->m_seq,(long)st->m_rtq,st->m_msg22.getDocId()); sprintf ( x, "&d=%lli",st->m_docId ); x += gbstrlen(x); // set our query for highlighting Query qq; qq.set2 ( q, st->m_langId , true ); // print the query terms into our highlight buffer Highlight hi; // make words so we can set the scores to ignore fielded terms Words qw; qw.set ( q , // content being highlighted, utf8 qlen , // content being highlighted, utf8 TITLEREC_CURRENT_VERSION, true , // computeIds false ); // hasHtmlEntities? // . assign scores of 0 to query words that should be ignored // . TRICKY: loop over words in qq.m_qwords, but they should be 1-1 // with words in qw. // . sanity check //if ( qw.getNumWords() != qq.m_numWords ) { char *xx = NULL; *xx = 0;} // declare up here Matches m; // do the loop //Scores ss; //ss.set ( &qw , NULL ); //for ( long i = 0 ; i < qq.m_numWords ; i++ ) // if ( ! m.matchWord ( &qq.m_qwords[i],i ) ) ss.m_scores[i] = 0; // now set m.m_matches[] to those words in qw that match a query word // or phrase in qq. m.setQuery ( &qq ); //m.addMatches ( &qw , &ss , true ); m.addMatches ( &qw ); long hilen = 0; // CNS: if ( ! st->m_clickNScroll ) { // and highlight the matches if ( printDisclaimer ) { hilen = hi.set ( //p , //avail , sb , &qw , // words to highlight &m , // matches relative to qw false , // doSteming false , // st->m_clickAndScroll , (char *)thisUrl );// base url for ClcknScrll //p += hilen; // now an hr //memcpy ( p , "</span></table></table>\n" , 24 ); p += 24; sb->safeStrcpy("</span></table></table>\n"); } bool includeHeader = st->m_includeHeader; // do not show header for json object display if ( xd->m_contentType == CT_JSON ) includeHeader = false; if ( format == FORMAT_XML ) includeHeader = false; if ( format == FORMAT_JSON ) includeHeader = false; //mfree(uq, uqCapacity, "PageGet"); // undo the header writes if we should if ( ! includeHeader ) { // including base href is off by default when not including // the header, so the caller must explicitly turn it back on if ( st->m_includeBaseHref ) sb->m_length=startLen2;//p=start2; else sb->m_length=startLen1;//p=start1; } //sb->safeStrcpy(tbuf); if ( format == FORMAT_XML ) { sb->safePrintf("<response>\n"); sb->safePrintf("<statusCode>0</statusCode>\n"); sb->safePrintf("<statusMsg>Success</statusMsg>\n"); sb->safePrintf("<url><![CDATA["); sb->cdataEncode(xd->m_firstUrl.m_url); sb->safePrintf("]]></url>\n"); sb->safePrintf("<docId>%llu</docId>\n",xd->m_docId); sb->safePrintf("\t<cachedTimeUTC>%lu</cachedTimeUTC>\n", lastSpiderDate); sb->safePrintf("\t<cachedTimeStr>%s</cachedTimeStr>\n",tbuf); } if ( format == FORMAT_JSON ) { sb->safePrintf("{\"response\":{\n"); sb->safePrintf("\t\"statusCode\":0,\n"); sb->safePrintf("\t\"statusMsg\":\"Success\",\n"); sb->safePrintf("\t\"url\":\""); sb->jsonEncode(xd->m_firstUrl.m_url); sb->safePrintf("\",\n"); sb->safePrintf("\t\"docId\":%llu,\n",xd->m_docId); sb->safePrintf("\t\"cachedTimeUTC\":%lu,\n",lastSpiderDate); sb->safePrintf("\t\"cachedTimeStr\":\"%s\",\n",tbuf); } // identify start of <title> tag we wrote out char *sbstart = sb->getBufStart(); char *sbend = sb->getBufEnd(); char *titleStart = NULL; char *titleEnd = NULL; for ( char *t = sbstart ; t < sbend ; t++ ) { // title tag? if ( t[0]!='<' ) continue; if ( to_lower_a(t[1])!='t' ) continue; if ( to_lower_a(t[2])!='i' ) continue; if ( to_lower_a(t[3])!='t' ) continue; if ( to_lower_a(t[4])!='l' ) continue; if ( to_lower_a(t[5])!='e' ) continue; // point to it char *x = t + 5; // max - to keep things fast char *max = x + 500; for ( ; *x && *x != '>' && x < max ; x++ ); x++; // find end char *e = x; for ( ; *e && e < max ; e++ ) { if ( e[0]=='<' && to_lower_a(e[1])=='/' && to_lower_a(e[2])=='t' && to_lower_a(e[3])=='i' && to_lower_a(e[4])=='t' && to_lower_a(e[5])=='l' && to_lower_a(e[6])=='e' ) break; } if ( e < max ) { titleStart = x; titleEnd = e; } break; } // . print title at top! // . consider moving if ( titleStart ) { char *ebuf = st->m_r.getString("eb"); if ( ! ebuf ) ebuf = ""; //p += sprintf ( p , sb->safePrintf( "<table border=1 " "cellpadding=10 " "cellspacing=0 " "width=100%% " "color=#ffffff>" ); long printLinks = st->m_r.getLong("links",0); if ( ! printDisclaimer && printLinks ) sb->safePrintf(//p += sprintf ( p , // first put cached and live link "<tr>" "<td bgcolor=lightyellow>" // print cached link //"<center>" " " "<b>" "<a " "style=\"font-size:18px;font-weight:600;" "color:#000000;\" " "href=\"" "/get?" "c=%s&d=%lli&qh=0&cnsp=1&eb=%s\">" "cached link</a>" " " "<a " "style=\"font-size:18px;font-weight:600;" "color:#000000;\" " "href=%s>live link</a>" "</b>" //"</center>" "</td>" "</tr>\n" ,st->m_coll ,st->m_docId ,ebuf ,thisUrl // st->ptr_ubuf ); if ( printLinks ) { sb->safePrintf(//p += sprintf ( p , "<tr><td bgcolor=pink>" "<span style=\"font-size:18px;" "font-weight:600;" "color:#000000;\">" " " "<b>PAGE TITLE:</b> " ); long tlen = titleEnd - titleStart; sb->safeMemcpy ( titleStart , tlen ); sb->safePrintf ( "</span></td></tr>" ); } sb->safePrintf( "</table><br>\n" ); } // is the content preformatted? bool pre = false; char ctype = (char)xd->m_contentType; if ( ctype == CT_TEXT ) pre = true ; // text/plain if ( ctype == CT_DOC ) pre = true ; // filtered msword if ( ctype == CT_PS ) pre = true ; // filtered postscript if ( format == FORMAT_XML ) pre = false; if ( format == FORMAT_JSON ) pre = false; // if it is content-type text, add a <pre> if ( pre ) {//p + 5 < bufEnd && pre ) { sb->safePrintf("<pre>"); //p += 5; } if ( st->m_strip == 1 ) contentLen = stripHtml( content, contentLen, (long)xd->m_version, st->m_strip ); // it returns -1 and sets g_errno on error, line OOM if ( contentLen == -1 ) { //if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" ); return sendErrorReply ( st , g_errno ); } Xml xml; Words ww; // if no highlighting, skip it bool queryHighlighting = st->m_queryHighlighting; if ( st->m_strip == 2 ) queryHighlighting = false; // do not do term highlighting if json if ( xd->m_contentType == CT_JSON ) queryHighlighting = false; SafeBuf tmp; SafeBuf *xb = sb; if ( format == FORMAT_XML ) xb = &tmp; if ( format == FORMAT_JSON ) xb = &tmp; if ( ! queryHighlighting ) { xb->safeMemcpy ( content , contentLen ); //p += contentLen ; } else { // get the content as xhtml (should be NULL terminated) //Words *ww = xd->getWords(); if ( ! xml.set ( content , contentLen , false , 0 , false , TITLEREC_CURRENT_VERSION , false , 0 , CT_HTML ) ) { // niceness is 0 //if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" ); return sendErrorReply ( st , g_errno ); } if ( ! ww.set ( &xml , true , 0 ) ) { // niceness is 0 //if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" ); return sendErrorReply ( st , g_errno ); } // sanity check //if ( ! xd->m_wordsValid ) { char *xx=NULL;*xx=0; } // how much space left in p? //avail = bufEnd - p; Matches m; m.setQuery ( &qq ); m.addMatches ( &ww ); hilen = hi.set ( xb , // p , avail , &ww , &m , false /*doStemming?*/ , st->m_clickAndScroll , thisUrl /*base url for click & scroll*/); //p += hilen; log(LOG_DEBUG, "query: Done highlighting cached page content"); } if ( format == FORMAT_XML ) { sb->safePrintf("\t<content><![CDATA["); sb->cdataEncode ( xb->getBufStart() ); sb->safePrintf("]]></content>\n"); sb->safePrintf("</response>\n"); } if ( format == FORMAT_JSON ) { sb->safePrintf("\t\"content\":\"\n"); sb->jsonEncode ( xb->getBufStart() ); sb->safePrintf("\"\n}\n}\n"); } // if it is content-type text, add a </pre> if ( pre ) { // p + 6 < bufEnd && pre ) { sb->safeMemcpy ( "</pre>" , 6 ); //p += 6; } // calculate bufLen //long bufLen = p - buf; long ct = xd->m_contentType; // now filter the entire buffer to escape out the xml tags // so it is displayed nice SafeBuf newbuf; if ( ct == CT_XML ) { // encode the xml tags into <tagname> sequences if ( !newbuf.htmlEncodeXmlTags ( sb->getBufStart() , sb->getLength(), 0)){// niceness=0 //if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" ); return sendErrorReply ( st , g_errno ); } // free out buffer that we alloc'd before returning since this // should have copied it into another buffer //if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" ); // reassign //buf = newbuf.getBufStart(); //bufLen = newbuf.length(); sb->stealBuf ( &newbuf ); } // now encapsulate it in html head/tail and send it off // sendErr: contentType = "text/html"; if ( strip == 2 ) contentType = "text/xml"; // xml is usually buggy and this throws browser off //if ( ctype == CT_XML ) contentType = "text/xml"; if ( xd->m_contentType == CT_JSON ) contentType = "application/json"; if ( format == FORMAT_XML ) contentType = "text/xml"; if ( format == FORMAT_JSON ) contentType = "application/json"; // safebuf, sb, is a member of "st" so this should copy the buffer // when it constructs the http reply, and we gotta call delete(st) // AFTER this so sb is still valid. bool status = g_httpServer.sendDynamicPage (s, //buf,bufLen, sb->getBufStart(), sb->getLength(), -1,false, contentType, -1, NULL, "utf8" ); // nuke state2 mdelete ( st , sizeof(State2) , "PageGet1" ); delete (st); // free out buffer that we alloc'd before returning since this // should have copied it into another buffer //if ( ct == CT_XML ) newbuf.purge(); //else if ( buf ) mfree ( buf , bufMaxSize , "PageGet2" ); // and convey the status return status; }
void printTcpTable ( SafeBuf* p, char *title, TcpServer *server ) { // table headers for urls current being spiderd p->safePrintf ( "<table %s>" "<tr class=hdrow><td colspan=19>" "<center>" //"<font size=+1>" "<b>%s</b>" //"</font>" "</td></tr>" "<tr bgcolor=#%s>" "<td><b>#</td>" "<td><b>fd</td>" "<td><b>age</td>" "<td><b>idle</td>" //"<td><b>timeout</td>" "<td><b>ip</td>" "<td><b>port</td>" "<td><b>state</td>" "<td><b>bytes read</td>" "<td><b>bytes to read</td>" "<td><b>bytes sent</td>" "<td><b>bytes to send</td>" "</tr>\n" , TABLE_STYLE , title , DARK_BLUE ); // current time in milliseconds long long now = gettimeofdayInMilliseconds(); // store in buffer for sorting long times[MAX_TCP_SOCKS]; TcpSocket *socks[MAX_TCP_SOCKS]; long nn = 0; for ( long i = 0 ; i<=server->m_lastFilled && nn<MAX_TCP_SOCKS; i++ ) { // get the ith socket TcpSocket *s = server->m_tcpSockets[i]; // continue if empty if ( ! s ) continue; // store it times[nn] = now - s->m_startTime; socks[nn] = s; nn++; } // bubble sort keepSorting: // assume no swap will happen bool didSwap = false; for ( long i = 1 ; i < nn ; i++ ) { if ( times[i-1] >= times[i] ) continue; long tmpTime = times[i-1]; TcpSocket *tmpSock = socks[i-1]; times[i-1] = times[i]; socks[i-1] = socks[i]; times[i ] = tmpTime; socks[i ] = tmpSock; didSwap = true; } if ( didSwap ) goto keepSorting; // now fill in the columns for ( long i = 0 ; i < nn ; i++ ) { // get the ith socket TcpSocket *s = socks[i]; // set socket state char *st = "ERROR"; switch ( s->m_sockState ) { case ST_AVAILABLE: st="available"; break; //case ST_CLOSED: st="closed"; break; case ST_CONNECTING: st="connecting"; break; case ST_READING: st="reading"; break; case ST_SSL_ACCEPT: st="ssl accept"; break; case ST_SSL_SHUTDOWN: st="ssl shutdown"; break; case ST_WRITING: st="sending"; break; case ST_NEEDS_CLOSE: st="needs close"; break; case ST_CLOSE_CALLED: st="close called"; break; } // bgcolor is lighter for incoming requests char *bg = "c0c0f0"; if ( s->m_isIncoming ) bg = "e8e8ff"; // times long elapsed1 = now - s->m_startTime ; long elapsed2 = now - s->m_lastActionTime ; p->safePrintf ("<tr bgcolor=#%s>" "<td>%li</td>" // i "<td>%i</td>" // fd "<td>%lims</td>" // elapsed seconds since start "<td>%lims</td>" // last action //"<td>%li</td>" // timeout "<td>%s</td>" // ip "<td>%hu</td>" // port "<td>%s</td>" // state "<td>%li</td>" // bytes read "<td>%li</td>" // bytes to read "<td>%li</td>" // bytes sent "<td>%li</td>" // bytes to send "</tr>\n" , bg , i, s->m_sd , elapsed1, elapsed2, //s->m_timeout , iptoa(s->m_ip) , s->m_port , st , s->m_readOffset , s->m_totalToRead , s->m_sendOffset , s->m_totalToSend ); } // end the table p->safePrintf ("</table><br>\n" ); }