void Statsdb::addDocsIndexed ( ) { if ( ! isClockInSync() ) return; // only once per five seconds long now = getTimeLocal(); static long s_lastTime = 0; if ( now - s_lastTime < 5 ) return; s_lastTime = now; long long total = 0LL; static long long s_lastTotal = 0LL; // every 5 seconds update docs indexed count for ( long i = 0 ; i < g_hostdb.m_numHosts ; i++ ) { Host *h = &g_hostdb.m_hosts[i]; // must have something if ( h->m_docsIndexed <= 0 ) return; // add it up total += h->m_docsIndexed; } // divide by # of groups total /= g_hostdb.getNumGroups(); // skip if no change if ( total == s_lastTotal ) return; s_lastTotal = total; // add it if changed though long long nowms = gettimeofdayInMillisecondsGlobal(); addStat ( MAX_NICENESS,"docs_indexed", nowms, nowms, (float)total ); }
void Statsdb::addDocsIndexed ( ) { if ( ! isClockInSync() ) return; if ( g_hostdb.hasDeadHost() ) return; // only host #0 needs this if ( g_hostdb.m_hostId != 0 ) return; // only once per five seconds int32_t now = getTimeLocal(); static int32_t s_lastTime = 0; if ( now - s_lastTime < 5 ) return; int32_t interval = now - s_lastTime; s_lastTime = now; int64_t total = 0LL; static int64_t s_lastTotal = 0LL; // every 5 seconds update docs indexed count for ( int32_t i = 0 ; i < g_hostdb.m_numHosts ; i++ ) { Host *h = &g_hostdb.m_hosts[i]; // must have something if ( h->m_pingInfo.m_totalDocsIndexed <= 0 ) continue; // add it up total += h->m_pingInfo.m_totalDocsIndexed; } // divide by # of groups total /= g_hostdb.getNumHostsPerShard(); // skip if no change if ( total == s_lastTotal ) return; int32_t docsIndexedInInterval = total - s_lastTotal; float docsPerSecond = docsIndexedInInterval / (float)interval; log("build: total docs indexed: %f. docs per second %f %i %i", (float)total, docsPerSecond, docsIndexedInInterval, interval); // add it if changed though int64_t nowms = gettimeofdayInMillisecondsGlobal(); addStat ( MAX_NICENESS,"docs_indexed", nowms, nowms, (float)total ); // Prevent a datapoint which adds all of the docs indexed to date. if( s_lastTotal != 0 ) { addStat ( MAX_NICENESS,"docs_per_second", nowms, nowms, docsPerSecond ); } s_lastTotal = total; }
// . we call this from Parms.cpp which prints out the proxy related controls // and this table below them... // . allows user to see the stats of each spider proxy bool printSpiderProxyTable ( SafeBuf *sb ) { // only host #0 will have the stats ... so print that link if ( g_hostdb.m_myHost->m_hostId != 0 ) { Host *h = g_hostdb.getHost(0); sb->safePrintf("<br>" "<b>See table on <a href=http://%s:%" PRId32"/" "admin/proxies>" "host #0</a></b>" "<br>" , iptoa(h->m_ip) , (int32_t)(h->getInternalHttpPort()) ); //return true; } // print host table sb->safePrintf ( "<table %s>" "<tr><td colspan=10><center>" "<b>Spider Proxies " "</b>" "</center></td></tr>" "<tr bgcolor=#%s>" "<td>" "<b>proxy IP</b></td>" "<td><b>proxy port</b></td>" "<td><b>times used</b></td>" "<td><b># website IPs banning</b></td>" "<td><b>load points</b></td>" "<td><b>currently out</b></td>" // time of last successful download. print "none" // if never successfully used "<td><b>test url last successful download</b></td>" // we fetch a test url every minute or so through // each proxy to ensure it is up. typically this should // be your website so you do not make someone angry. "<td><b>test url last download attempt</b></td>" // print "FAILED" in red if it failed to download "<td><b>test url download took</b></td>" "<td><b>last bytes downloaded</b></td>" "<td><b>last test url error</b></td>" "</tr>" , TABLE_STYLE , DARK_BLUE ); int32_t now = getTimeLocal(); // print it for ( int32_t i = 0 ; i < s_iptab.getNumSlots() ; i++ ) { // skip empty slots if ( ! s_iptab.m_flags[i] ) continue; SpiderProxy *sp = (SpiderProxy *)s_iptab.getValueFromSlot(i); const char *bg = LIGHT_BLUE; // mark with light red bg if last test url attempt failed if ( sp->m_lastDownloadTookMS == -1 && sp->m_lastDownloadTestAttemptMS>0 ) bg = "ffa6a6"; // or a perm denied error (as opposed to a timeout above) if ( sp->m_lastDownloadError ) bg = "ffa6a6"; // print it sb->safePrintf ( "<tr bgcolor=#%s>" "<td>%s</td>" // proxy ip "<td>%" PRIu32"</td>" // port , bg , iptoa(sp->m_ip) , (uint32_t)(uint16_t)sp->m_port ); sb->safePrintf("<td>%" PRId64"</td>",sp->m_timesUsed); int32_t banCount = s_banCountTable.getScore32(sp->m_ip); if ( banCount < 0 ) banCount = 0; sb->safePrintf("<td>%" PRId32"</td>",banCount); int32_t currentLoad; // get # times it appears in loadtable int32_t np = getNumLoadPoints ( sp , ¤tLoad ); sb->safePrintf("<td>%" PRId32"</td>",np); // currently outstanding downloads on this proxy sb->safePrintf("<td>%" PRId32"</td>",currentLoad); // last SUCCESSFUL download time ago. when it completed. int32_t ago = now - sp->m_lastSuccessfulTestMS/1000; sb->safePrintf("<td>"); // like 1 minute ago etc. if ( sp->m_lastSuccessfulTestMS <= 0 ) sb->safePrintf("none"); else printTimeAgo(sb, ago, now, true); sb->safePrintf("</td>"); // last download time ago ago = now - sp->m_lastDownloadTestAttemptMS/1000; sb->safePrintf("<td>"); // like 1 minute ago etc. if ( sp->m_lastDownloadTestAttemptMS<= 0 ) sb->safePrintf("none"); else printTimeAgo(sb, ago, now, true); sb->safePrintf("</td>"); // how long to download the test url? if ( sp->m_lastDownloadTookMS != -1 ) sb->safePrintf("<td>%" PRId32"ms</td>", (int32_t)sp->m_lastDownloadTookMS); else if ( sp->m_lastDownloadTestAttemptMS<= 0 ) sb->safePrintf("<td>unknown</td>"); else sb->safePrintf("<td>" "<font color=red>FAILED</font>" "</td>"); sb->safePrintf("<td>%" PRId32"</td>",sp->m_lastBytesDownloaded); if ( sp->m_lastDownloadError ) sb->safePrintf("<td><font color=red>%s</font></td>", mstrerror(sp->m_lastDownloadError)); else sb->safePrintf("<td>none</td>"); sb->safePrintf("</tr>\n"); } sb->safePrintf("</table><br>"); return true; }
bool Log::init ( char *filename ) { // set the main process id //s_pid = getpidtid(); setPid(); // init these m_numErrors = 0; m_bufPtr = 0; m_fd = -1; m_disabled = false; #ifdef DEBUG g_dbufSize = 4096; g_dbuf = (char*)mmalloc(g_dbufSize,"Log: DebugBuffer"); if (!g_dbuf) fprintf(stderr, "Unable to init debug buffer"); #endif // m_hostname = g_conf.m_hostname; // m_port = port; // is there a filename to log our errors to? m_filename = filename; if ( ! m_filename ) return true; // skip this for now //return true; // // RENAME log000 to log000-2013_11_04-18:19:32 // if ( g_conf.m_runAsDaemon ) { File f; char tmp[16]; sprintf(tmp,"log%03li",g_hostdb.m_hostId); f.set ( g_hostdb.m_dir , tmp ); // make new filename like log000-2013_11_04-18:19:32 time_t now = getTimeLocal(); tm *tm1 = gmtime((const time_t *)&now); char tmp2[64]; strftime(tmp2,64,"%Y_%m_%d-%T",tm1); SafeBuf newName; if ( ! newName.safePrintf ( "%slog%03li-%s", g_hostdb.m_dir, g_hostdb.m_hostId, tmp2 ) ) { fprintf(stderr,"log rename failed\n"); return false; } // rename log000 to log000-2013_11_04-18:19:32 if ( f.doesExist() ) { //fprintf(stdout,"renaming file\n"); f.rename ( newName.getBufStart() ); } } // open it for appending. // create with -rw-rw-r-- permissions if it's not there. m_fd = open ( m_filename , O_APPEND | O_CREAT | O_RDWR , S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP | S_IROTH ); if ( m_fd >= 0 ) return true; // bitch to stderr and return false on error fprintf(stderr,"could not open log file %s for appending\n", m_filename); return false; }
// returns true if all done, false if waiting for more replies bool Msg12::gotLockReply ( UdpSlot *slot ) { // no longer use this char *xx=NULL;*xx=0; // got reply m_numReplies++; // don't let udpserver free the request, it's our m_request[] slot->m_sendBufAlloc = NULL; // check for a hammer reply char *reply = slot->m_readBuf; int32_t replySize = slot->m_readBufSize; // if error, treat as a not grant if ( g_errno ) { bool logIt = true; // note it if ( g_conf.m_logDebugSpider ) log("spider: got msg12 reply error = %s", mstrerror(g_errno)); // if we got an ETRYAGAIN when trying to confirm our lock // that means doledb was saving/dumping to disk and we // could not remove the record from doledb and add an // entry to the waiting tree, so we need to keep trying if ( g_errno == ETRYAGAIN && m_confirming ) { // c ount it again m_numRequests++; // use what we were using char *request = (char *)&m_confirmRequest; int32_t requestSize = sizeof(ConfirmRequest); Host *h = g_hostdb.getHost(slot->m_hostId); // send request to him UdpServer *us = &g_udpServer; if ( ! us->sendRequest ( request , requestSize , 0x12 , // msgType h->m_ip , h->m_port , h->m_hostId , NULL , // retSlotPtrPt this , // state data gotLockReplyWrapper , udpserver_sendrequest_infinite_timeout ) ) return false; // error? // don't spam the log! static int32_t s_last = 0; int32_t now = getTimeLocal(); if ( now - s_last >= 1 ) { s_last = now; log("spider: error re-sending confirm " "request: %s", mstrerror(g_errno)); } } // only log every 10 seconds for ETRYAGAIN if ( g_errno == ETRYAGAIN ) { static time_t s_lastTime = 0; time_t now = getTimeLocal(); logIt = false; if ( now - s_lastTime >= 3 ) { logIt = true; s_lastTime = now; } } if ( logIt ) log ( "sploop: host had error getting lock url=%s" ": %s" , m_url,mstrerror(g_errno) ); } // grant or not if ( replySize == 1 && ! g_errno && *reply == 1 ) m_grants++; // wait for all to get back if ( m_numReplies < m_numRequests ) return false; // all done if we were removing if ( m_removing ) { // note it if ( g_conf.m_logDebugSpider ) logf(LOG_DEBUG,"spider: done removing all locks " "(replies=%" PRId32") for %s", m_numReplies,m_url);//m_sreq->m_url); // we are done m_gettingLocks = false; return true; } // all done if we were confirming if ( m_confirming ) { // note it if ( g_conf.m_logDebugSpider ) logf(LOG_DEBUG,"spider: done confirming all locks " "for %s uh48=%" PRId64,m_url,m_origUh48);//m_sreq->m_url); // we are done m_gettingLocks = false; // . keep processing // . if the collection was nuked from under us the spiderUrl2 // will return true and set g_errno if ( ! m_callback ) return g_spiderLoop.spiderUrl2(); // if we had a callback let our parent call it return true; } // if got ALL locks, spider it if ( m_grants == m_numReplies ) { // note it if ( g_conf.m_logDebugSpider ) logf(LOG_DEBUG,"spider: got lock for docid=lockkey=%" PRIu64, m_lockKeyUh48); // flag this m_hasLock = true; // we are done //m_gettingLocks = false; /////// // // now tell our group (shard) to remove from doledb // and re-add to waiting tree. the evalIpLoop() function // should skip this probable docid because it is in the // LOCK TABLE! // // This logic should allow us to spider multiple urls // from the same IP at the same time. // /////// // returns false if would block if ( ! confirmLockAcquisition ( ) ) return false; // . we did it without blocking, maybe cuz we are a single node // . ok, they are all back, resume loop // . if the collection was nuked from under us the spiderUrl2 // will return true and set g_errno if ( ! m_callback ) g_spiderLoop.spiderUrl2 ( ); // all done return true; } // note it if ( g_conf.m_logDebugSpider ) logf(LOG_DEBUG,"spider: missed lock for %s lockkey=%" PRIu64" " "(grants=%" PRId32")", m_url,m_lockKeyUh48,m_grants); // . if it was locked by another then add to our lock cache so we do // not try to lock it again // . if grants is not 0 then one host granted us the lock, but not // all hosts, so we should probably keep trying on it until it is // locked up by one host if ( m_grants == 0 ) { int32_t now = getTimeGlobal(); g_spiderLoop.m_lockCache.addLong(0,m_lockKeyUh48,now,NULL); } // reset again m_numRequests = 0; m_numReplies = 0; // no need to remove them if none were granted because another // host in our group might have it 100% locked. if ( m_grants == 0 ) { // no longer in locks operation mode m_gettingLocks = false; // ok, they are all back, resume loop //if ( ! m_callback ) g_spiderLoop.spiderUrl2 ( ); // all done return true; } // note that if ( g_conf.m_logDebugSpider ) logf(LOG_DEBUG,"spider: sending request to all in shard to " "remove lock uh48=%" PRIu64". grants=%" PRId32, m_lockKeyUh48,(int32_t)m_grants); // remove all locks we tried to get, BUT only if from our hostid! // no no! that doesn't quite work right... we might be the ones // locking it! i.e. another one of our spiders has it locked... if ( ! removeAllLocks ( ) ) return false; // true; // if did not block, how'd that happen? log("sploop: did not block in removeAllLocks: %s",mstrerror(g_errno)); return true; }
void Stats::logAvgQueryTime(long long startTime) { long long now = gettimeofdayInMilliseconds(); long long took = now - startTime; static long s_lastSendTime = 0; // if just one query took an insanely long time, // do not sound the alarm. this is in seconds, // so multiply by 1000. //long long maxTook = // (long long)(g_conf.m_maxQueryTime*1000.0) ; //if ( took > maxTook ) took = maxTook; m_queryTimes += took; m_numQueries++; if ( m_numQueries > g_conf.m_numQueryTimes ) goto reset; if (m_numQueries != g_conf.m_numQueryTimes) return; // otherwise, store this info m_avgQueryTime = (float)m_queryTimes / ((float)m_numQueries * 1000.0); m_successRate = (float)m_numSuccess / (float)(m_numSuccess + m_numFails); //(number of queries) / seconds that it took to get this many queries m_avgQueriesPerSec = ((float)m_numQueries * 1000.0) / (float)(now - m_lastQueryLogTime); m_lastQueryLogTime = now; if(m_avgQueryTime > g_conf.m_avgQueryTimeThreshold || m_successRate < g_conf.m_querySuccessThreshold) { char msgbuf[1024]; Host *h = g_hostdb.getHost ( 0 ); snprintf(msgbuf, 1024, "Average latency: %f sec. " "success rate: %f. " "queries/sec: %f. " "host: %s.", m_avgQueryTime, m_successRate, m_avgQueriesPerSec, iptoa(h->m_ip)); log(LOG_WARN, "query: %s",msgbuf); // prevent machinegunning text msgs long now = getTimeLocal(); if ( now - s_lastSendTime > 300 ) { s_lastSendTime = now; g_pingServer.sendEmail(NULL, msgbuf); } } else { log(LOG_INFO, "query: Average latency is %f seconds, " "succeeding at a rate of %f, serving %f queries/sec.", m_avgQueryTime, m_successRate, m_avgQueriesPerSec); } reset: m_totalNumQueries += m_numSuccess + m_numFails; m_totalNumSuccess += m_numSuccess; m_totalNumFails += m_numFails; m_numQueries = 0; m_queryTimes = 0; m_numSuccess = 0; m_numFails = 0; }
// a cacheTime of -1 means browser should not cache at all void HttpMime::makeMime ( long totalContentLen , long cacheTime , time_t lastModified , long offset , long bytesToSend , char *ext , bool POSTReply , char *contentType , char *charset , long httpStatus , char *cookie ) { // assume UTF-8 //if ( ! charset ) charset = "utf-8"; // . make the content type line // . uses a static buffer if ( ! contentType ) contentType = (char *)getContentTypeFromExtension ( ext ); // do not cache plug ins if ( contentType && strcmp(contentType,"application/x-xpinstall")==0) cacheTime = -2; // assume UTF-8, but only if content type is text // . No No No!!! // . This prevents charset specification in html files // . -partap //if ( ! charset && contentType && strncmp(contentType,"text",4)==0) // charset = "utf-8"; // this is used for bz2 and gz files (mp3?) const char *contentEncoding = getContentEncodingFromExtension ( ext ); // the string char enc[128]; if ( contentEncoding ) sprintf ( enc , "Content-Encoding: %s\r\n", contentEncoding ); else enc[0] = '\0'; // get the time now //time_t now = getTimeGlobal(); time_t now; if ( isClockInSync() ) now = getTimeGlobal(); else now = getTimeLocal(); // get the greenwhich mean time (GMT) char ns[128]; struct tm *timeStruct = gmtime ( &now ); // Wed, 20 Mar 2002 16:47:30 GMT strftime ( ns , 126 , "%a, %d %b %Y %T GMT" , timeStruct ); // if lastModified is 0 use now if ( lastModified == 0 ) lastModified = now; // convert lastModified greenwhich mean time (GMT) char lms[128]; timeStruct = gmtime ( &lastModified ); // Wed, 20 Mar 2002 16:47:30 GMT strftime ( lms , 126 , "%a, %d %b %Y %T GMT" , timeStruct ); // . the pragma no cache string (used just for proxy servers?) // . also use cache-control: for the browser itself (HTTP1.1, though) // . pns = "Pragma: no-cache\nCache-Control: no-cache\nExpires: -1\n"; char tmp[128]; char *pns ; // with cache-control on, when you hit the back button, it reloads // the page, this is bad for most things... so we only avoid the // cache for index.html and PageAddUrl.cpp (the main and addurl page) if ( cacheTime == -2 ) pns = "Cache-Control: no-cache\r\n" "Pragma: no-cache\r\n" "Expires: -1\r\n"; // so when we click on a control link, it responds correctly. // like turning spiders on. else if ( cacheTime == -1 ) pns = "Pragma: no-cache\r\n" "Expires: -1\r\n"; // don't specify cache times if it's 0 (let browser regulate it) else if ( cacheTime == 0 ) pns = ""; // otherwise, expire tag: "Expires: Wed, 23 Dec 2001 10:23:01 GMT" else { time_t expDate = now + cacheTime; timeStruct = gmtime ( &expDate ); strftime ( tmp , 100 , "Expires: %a, %d %b %Y %T GMT\r\n", timeStruct ); pns = tmp; } // . set httpStatus // . a reply to a POST (not a GET or HEAD) should be 201 char *p = m_buf; char *smsg = ""; if ( POSTReply ) { if ( httpStatus == -1 ) httpStatus = 200; if ( httpStatus == 200 ) smsg = " OK"; if ( ! charset ) charset = "utf-8"; //sprintf ( m_buf , p += sprintf ( p, "HTTP/1.0 %li%s\r\n" "Date: %s\r\n" //"P3P: CP=\"CAO PSA OUR\"\r\n" "Server: Gigablast/1.0\r\n" "Content-Length: %li\r\n" //"Expires: Wed, 23 Dec 2003 10:23:01 GMT\r\n" //"Expires: -1\r\n" "Connection: Close\r\n" "%s" "Content-Type: %s\r\n\r\n", //"Connection: Keep-Alive\r\n" //"%s" //"Location: f**k\r\n" //"Location: http://192.168.0.4:8000/cgi/3.cgi\r\n" //"Last-Modified: %s\r\n\r\n" , httpStatus , smsg , ns , totalContentLen , enc , contentType ); //pns , //ns ); //lms ); } // . is it partial content? // . if bytesToSend is < 0 it means "totalContentLen" else if ( offset > 0 || bytesToSend != -1 ) { if ( httpStatus == -1 ) httpStatus = 206; if ( ! charset ) charset = "utf-8"; //sprintf ( m_buf , p += sprintf( p, "HTTP/1.0 %li Partial content\r\n" "%s" "Content-Length: %li\r\n" "Content-Range: %li-%li(%li)\r\n"// added "bytes" "Connection: Close\r\n" //"P3P: CP=\"CAO PSA OUR\"\r\n" "Server: Gigablast/1.0\r\n" "%s" "Date: %s\r\n" "Last-Modified: %s\r\n" "Content-Type: %s\r\n", httpStatus , enc ,bytesToSend , offset , offset + bytesToSend , totalContentLen , pns , ns , lms , contentType ); // otherwise, do a normal mime } else { char encoding[256]; if (charset) sprintf(encoding, "; charset=%s", charset); else encoding[0] = '\0'; if ( httpStatus == -1 ) httpStatus = 200; if ( httpStatus == 200 ) smsg = " OK"; //sprintf ( m_buf , p += sprintf( p, "HTTP/1.0 %li%s\r\n" // make it at least 4 spaces so we can change // the length of the content should we insert // a login bar in Proxy::storeLoginBar() "Content-Length: %04li\r\n" "%s" "Content-Type: %s", httpStatus , smsg , totalContentLen , enc , contentType ); if ( charset ) p += sprintf ( p , "; charset=%s", charset ); p += sprintf ( p , "\r\n"); p += sprintf ( p , //"Connection: Keep-Alive\r\n" "Connection: Close\r\n" //"P3P: CP=\"CAO PSA OUR\"\r\n" "Server: Gigablast/1.0\r\n" "%s" "Date: %s\r\n" "Last-Modified: %s\r\n" , pns , ns , lms ); } // write the cookie if we have one if (cookie) { // now it is a list of Set-Cookie: x=y\r\n lines //p += sprintf ( p, "Set-Cookie: %s\r\n", cookie); if ( strncmp(cookie,"Set-Cookie",10 ) ) p += sprintf(p,"Set-Cookie: "); p += sprintf ( p, "%s", cookie); if ( p[-1] != '\n' && p[-2] != '\r' ) { *p++ = '\r'; *p++ = '\n'; } } // write another line to end the mime p += sprintf(p, "\r\n"); // set the mime's length //m_bufLen = gbstrlen ( m_buf ); m_bufLen = p - m_buf; }
time_t getTime () { return getTimeLocal(); }