// . returns true if all done! // . returns false if still doing stuff bool Test::injectLoop ( ) { long dlen ; char *dom ; long fakeIp ; loop: // advance to next url for ( ; m_urlPtr < m_urlEnd && ! *m_urlPtr ; m_urlPtr++ ) ; // all done? if ( m_urlPtr >= m_urlEnd ) { // flush em out if ( ! flushMsg4Buffers ( this , injectedWrapper ) ) return false; // note it m_isAdding = false; // all done return true; } // error means all done if ( m_errno ) { m_isAdding = false; return true; } // point to it char *u = m_urlPtr; // advance to point to the next url for the next loop! for ( ; m_urlPtr < m_urlEnd && *m_urlPtr ; m_urlPtr++ ) ; // hash it long long h = hash64b ( u ); // dedup it lest we freeze up and stopIt() never gets called because // m_urlsAdded is never decremented all the way to zero in Spider.cpp if ( m_dt.isInTable ( &h ) ) goto loop; // add it. return true with g_errno set on error if ( ! m_dt.addKey ( &h ) ) goto hadError; // make the SpiderRequest from it m_sreq.reset(); // url strcpy ( m_sreq.m_url , u ); // get domain of url dom = getDomFast ( m_sreq.m_url , &dlen ); // make a fake ip fakeIp = 0x123456; // use domain if we got that if ( dom && dlen ) fakeIp = hash32 ( dom , dlen ); // first ip is fake m_sreq.m_firstIp = fakeIp; // 0x123456; // these too m_sreq.m_domHash32 = fakeIp; m_sreq.m_hostHash32 = fakeIp; m_sreq.m_siteHash32 = fakeIp; m_sreq.m_probDocId = g_titledb.getProbableDocId( m_sreq.m_url ); // this crap is fake m_sreq.m_isInjecting = 1; // use test-spider subdir for storing pages and spider times? if ( g_conf.m_testSpiderEnabled ) m_sreq.m_useTestSpiderDir = 1; // use this later m_sreq.m_hasContent = 0; // injected requests use this as the spider time i guess // so we can sort them by this m_sreq.m_addedTime = ++s_count; // no, because to compute XmlDoc::m_min/maxPubDate we need this to // be valid for our test run.. no no we will fix it to be // basically 2 days before spider time in the code... //m_sreq.m_addedTime = spiderTime; m_sreq.m_fakeFirstIp = 1; // make the key (parentDocId=0) m_sreq.setKey ( fakeIp, 0LL , false ); // test it if ( g_spiderdb.getFirstIp(&m_sreq.m_key) != fakeIp ) { char *xx=NULL;*xx=0;} // sanity check. check for http(s):// if ( m_sreq.m_url[0] != 'h' ) { char *xx=NULL;*xx=0; } // reset this g_errno = 0; // count it m_urlsAdded++; // note it //log("crazyout: %s",m_sreq.m_url ); logf(LOG_DEBUG,"spider: injecting test url %s",m_sreq.m_url); // the receiving end will realize that we are injecting into the test // collection and use the "/test/" subdir to load the file // "ips.txt" to do our ip lookups, and search for any downloads in // that subdirectory as well. if ( ! m_msg4.addMetaList ( (char *)&m_sreq , m_sreq.getRecSize() , m_coll , NULL , injectedWrapper , MAX_NICENESS , RDB_SPIDERDB ) ) // return false if blocked return false; // error? if ( g_errno ) { // jump down here from above on error hadError: // save it m_errno = g_errno; // flag it m_isAdding = false; // note it log("test: inject had error: %s",mstrerror(g_errno)); // stop, we are all done! return true; } // add the next spider request goto loop; }
bool sendPageCloneColl ( TcpSocket *s , HttpRequest *r ) { char format = r->getReplyFormat(); char *coll = r->getString("c"); if ( format == FORMAT_XML || format == FORMAT_JSON ) { if ( ! coll ) { g_errno = EBADENGINEER; char *msg = "no c parm provided"; return g_httpServer.sendErrorReply(s,g_errno,msg,NULL); } return g_httpServer.sendSuccessReply(s,format); } char buf [ 64*1024 ]; SafeBuf p(buf, 64*1024); // print standard header g_pages.printAdminTop ( &p , s , r ); char *msg = NULL; if ( g_errno ) msg = mstrerror(g_errno); if ( msg ) { p.safePrintf ( "<center>\n" "<font color=red>" "<b>Error cloning collection: %s. " "See log file for details.</b>" "</font>" "</center><br>\n",msg); } // print the clone box p.safePrintf ( "<center>\n<table %s>\n" "<tr class=hdrow><td colspan=2>" "<center><b>Clone Collection</b></center>" "</td></tr>\n", TABLE_STYLE); p.safePrintf ( "<tr bgcolor=#%s>" "<td><b>clone settings from this collection</b>" "<br><font size=1>Copy settings FROM this " "pre-existing collection into the currently " "selected collection." "</font></td>\n" "<td><input type=text name=clonecoll size=30>" "</td>" "</tr>" , LIGHT_BLUE ); p.safePrintf ( "</table></center><br>\n"); // wrap up the form started by printAdminTop g_pages.printAdminBottom ( &p ); long bufLen = p.length(); return g_httpServer.sendDynamicPage (s,p.getBufStart(),bufLen); }
bool PageNetTest::gotResults( TcpSocket *s ) { char *buf; long bufLen, bufMaxLen; HttpMime mime; if ( g_errno ) { log( "net: nettest: g_errno: %s", mstrerror(g_errno) ); g_errno = 0; return false; } if ( !s ) return false; buf = s->m_readBuf; bufLen = s->m_readOffset; bufMaxLen = s->m_readBufSize; char temp[64]; long len = 0; len = sprintf(temp, "http://%s:%i/get?rnettest=1", iptoa(s->m_ip), s->m_port); Url u; u.set( temp, len ); if ( !mime.set ( buf, bufLen, &u ) ) { log( "net: nettest: MIME.set() failed." ); return false; } if ( mime.getHttpStatus() != 200 ) { log( "net: nettest: MIME.getHttpStatus() failed." ); return false; } long state = 0; long hostId = 0; long testId = 0; if( !bufLen ) log( LOG_INFO, "net: nettest: we got an empty doc." ); buf += mime.getMimeLen(); bufLen -= mime.getMimeLen(); for( long i = 0; i < bufLen; i++ ){ if( buf[i] == ' ' ) continue; if( buf[i] == '\r' ) continue; if( buf[i] == '\n' ) continue; if( buf[i] < '0' ) continue; if( state == 0 ) { hostId = atoi(&buf[i]); log( LOG_DEBUG, "net: nettest: host id is %ld", hostId); state = 1; } else if( state == 1 ) { testId = atoi(&buf[i]); log( LOG_DEBUG, "net: nettest: test id is %ld", testId); state = 2; } else if( state == 2 ){ if( ((testId < hostId) || !hostId) && (testId) ) { if( !m_hostRates[0][hostId] ) m_hostRates[0][hostId] = atoi(&buf[i]); else m_hostRates[2][hostId] = atoi(&buf[i]); } else { if( !m_hostRates[2][hostId] ) m_hostRates[2][hostId] = atoi(&buf[i]); else m_hostRates[0][hostId] = atoi(&buf[i]); } state = 3; log( LOG_DEBUG, "net: nettest: send rate is %d", atoi(&buf[i])); } else if( state == 3 ) { if( ((testId < hostId) || !hostId) && (testId) ) { if( !m_hostRates[1][hostId] ) m_hostRates[1][hostId] = atoi(&buf[i]); else m_hostRates[3][hostId] = atoi(&buf[i]); } else { if( !m_hostRates[3][hostId] ) m_hostRates[3][hostId] = atoi(&buf[i]); else m_hostRates[1][hostId] = atoi(&buf[i]); } state = 0; log( LOG_DEBUG, "net: nettest: rcv rate is %d", atoi(&buf[i])); } while( buf[i+1] >= '0' ) i++; } if( m_numResultsSent < g_hostdb.getNumHosts() ) return collectResults(); if( ++m_numResultsRecv < m_numResultsSent ) return false; return true; }
// // . ENTRY POINT FOR IMPORTING TITLEDB RECS FROM ANOTHER CLUSTER // . when user clicks 'begin' in import page we come here.. // . so when that parm changes in Parms.cpp we sense that and call // beginImport(CollectionRec *cr) // . or on startup we call resumeImports to check each coll for // an import in progress. // . search for files named titledb*.dat // . if none found just return // . when msg7 inject competes it calls this // . call this from sleep wrapper in Process.cpp // . returns false if would block (outstanding injects), true otherwise // . sets g_errno on error bool ImportState::importLoop ( ) { CollectionRec *cr = g_collectiondb.getRec ( m_collnum ); if ( ! cr || g_hostdb.m_hostId != 0 ) { // if coll was deleted! log("import: collnum %li deleted while importing into", (long)m_collnum); //if ( m_numOut > m_numIn ) return true; // delete the entire import state i guess // what happens if we have a msg7 reply come back in? // it should see the collrec is NULL and just fail. mdelete ( this, sizeof(ImportState) , "impstate"); delete (this); return true; } INJECTLOOP: // stop if waiting on outstanding injects long long out = m_numOut - m_numIn; if ( out >= cr->m_numImportInjects ) { g_errno = 0; return false; } if ( ! cr->m_importEnabled ) { // wait for all to return if ( out > 0 ) return false; // then delete it log("import: collnum %li import loop disabled", (long)m_collnum); mdelete ( this, sizeof(ImportState) , "impstate"); delete (this); return true; } // scan each titledb file scanning titledb0001.dat first, // titledb0003.dat second etc. //long long offset = -1; // . when offset is too big for current m_bigFile file then // we go to the next and set offset to 0. // . sets m_bf and m_fileOffset if ( ! setCurrentTitleFileAndOffset ( ) ) {//cr , -1 ); log("import: import: no files to read"); //goto INJECTLOOP; return true; } // this is -1 if none remain! if ( m_fileOffset == -1 ) { log("import: import fileoffset is -1. done."); return true; } long long saved = m_fileOffset; //Msg7 *msg7; //GigablastRequest *gr; //SafeBuf *sbuf = NULL; long need = 12; long dataSize = -1; //XmlDoc xd; key_t tkey; bool status; SafeBuf tmp; SafeBuf *sbuf = &tmp; long long docId; long shardNum; long key; Multicast *mcast; char *req; long reqSize; if ( m_fileOffset >= m_bfFileSize ) { log("inject: import: done processing file %li %s", m_bfFileId,m_bf.getFilename()); goto nextFile; } // read in title rec key and data size status = m_bf.read ( &tkey, sizeof(key_t) , m_fileOffset ); //if ( n != 12 ) goto nextFile; if ( g_errno ) { log("inject: import: reading file error: %s. advancing " "to next file",mstrerror(g_errno)); goto nextFile; } m_fileOffset += 12; // if negative key, skip if ( (tkey.n0 & 0x01) == 0 ) { goto INJECTLOOP; } // if non-negative then read in size status = m_bf.read ( &dataSize , 4 , m_fileOffset ); if ( g_errno ) { log("main: failed to read in title rec " "file. %s. Skipping file %s", mstrerror(g_errno),m_bf.getFilename()); goto nextFile; } m_fileOffset += 4; need += 4; need += dataSize; need += 4; // collnum, first 4 bytes if ( dataSize < 0 || dataSize > 500000000 ) { log("main: could not scan in titledb rec of " "corrupt dataSize of %li. BAILING ENTIRE " "SCAN of file %s",dataSize,m_bf.getFilename()); goto nextFile; } //gr = &msg7->m_gr; //XmlDoc *xd = getAvailXmlDoc(); //msg7 = getAvailMsg7(); mcast = getAvailMulticast(); // if none, must have to wait for some to come back to us if ( ! mcast ) { // restore file offset //m_fileOffset = saved; // no, must have been a oom or something log("import: import no mcast available"); return true;//false; } // this is for holding a compressed titlerec //sbuf = &mcast->m_sbuf;//&gr->m_sbuf; // point to start of buf sbuf->reset(); // ensure we have enough room sbuf->reserve ( need ); // collnum first 4 bytes sbuf->pushLong( (long)m_collnum ); // store title key sbuf->safeMemcpy ( &tkey , sizeof(key_t) ); // then datasize if any. neg rec will have -1 datasize if ( dataSize >= 0 ) sbuf->pushLong ( dataSize ); // then read data rec itself into it, compressed titlerec part if ( dataSize > 0 ) { // read in the titlerec after the key/datasize status = m_bf.read ( sbuf->getBuf() , dataSize , m_fileOffset ); if ( g_errno ) { // n != dataSize ) { log("main: failed to read in title rec " "file. %s. Skipping file %s", mstrerror(g_errno),m_bf.getFilename()); // essentially free up this msg7 now //msg7->m_inUse = false; //msg7->reset(); goto nextFile; } // advance m_fileOffset += dataSize; // it's good, count it sbuf->m_length += dataSize; } // set xmldoc from the title rec //xd->set ( sbuf.getBufStart() ); //xd->m_masterState = NULL; //xd->m_masterCallback ( titledbInjectLoop ); // we use this so we know where the doc we are injecting // was in the foregien titledb file. so we can update our bookmark // code. mcast->m_hackFileOff = saved;//m_fileOffset; mcast->m_hackFileId = m_bfFileId; // // inject a title rec buf this time, we are doing an import // FROM A TITLEDB FILE!!! // //gr->m_titleRecBuf = &sbuf; // break it down into gw // xd.set2 ( sbuf.getBufStart() , // sbuf.length() , // max size // cr->m_coll, // use our coll // NULL , // pbuf for page parser // 1 , // niceness // NULL ); //sreq ); // // note it // log("import: importing %s",xd.m_firstUrl.getUrl()); // now we can set gr for the injection // TODO: inject the whole "sbuf" so we get sitenuminlinks etc // all exactly the same... // gr->m_url = xd.getFirstUrl()->getUrl(); // gr->m_queryToScrape = NULL; // gr->m_contentDelim = 0; // gr->m_contentTypeStr = g_contentTypeStrings [xd.m_contentType]; // gr->m_contentFile = NULL; // gr->m_content = xd.ptr_utf8Content; // gr->m_diffbotReply = NULL; // gr->m_injectLinks = false; // gr->m_spiderLinks = true; // gr->m_shortReply = false; // gr->m_newOnly = false; // gr->m_deleteUrl = false; // gr->m_recycle = true; // recycle content? or sitelinks? // gr->m_dedup = false; // gr->m_hasMime = false; // gr->m_doConsistencyTesting = false; // gr->m_getSections = false; // gr->m_gotSections = false; // gr->m_charset = xd.m_charset; // gr->m_hopCount = xd.m_hopCount; // // point to next doc in the titledb file // //m_fileOffset += need; // get docid from key docId = g_titledb.getDocIdFromKey ( &tkey ); // get shard that holds the titlerec for it shardNum = g_hostdb.getShardNumFromDocId ( docId ); // for selecting which host in the shard receives it key = (long)docId; m_numOut++; // then index it. master callback will be called //if ( ! xd->index() ) return false; // TODO: make this forward the request to an appropriate host!! // . gr->m_sbuf is set to the titlerec so this should handle that // and use XmlDoc::set4() or whatever // if ( msg7->injectTitleRec ( msg7 , // state // gotMsg7ReplyWrapper , // callback // cr )) { // // it didn't block somehow... // msg7->m_inUse = false; // msg7->gotMsg7Reply(); // } req = sbuf->getBufStart(); reqSize = sbuf->length(); if ( reqSize != need ) { char *xx=NULL;*xx=0 ; } // do not free it, let multicast free it after sending it sbuf->detachBuf(); if ( ! mcast->send ( req , reqSize , 0x07 , true , // ownmsg? shardNum, false, // send to whole shard? key , // for selecting host in shard mcast , // state NULL , // state2 gotMulticastReplyWrapper , 999999 ) ) { // total timeout in seconds log("import: import mcast had error: %s",mstrerror(g_errno)); m_numIn++; } goto INJECTLOOP; nextFile: // invalidate this flag //m_offIsValid = false; // . and call this function. we add one to m_bfFileId so we // do not re-get the file we just injected. // . sets m_bf and m_fileOffset // . returns false if nothing to read if ( ! setCurrentTitleFileAndOffset ( ) ) { //cr , m_bfFileId+1 ); log("import: import: no files left to read"); //goto INJECTLOOP; return true; } // if it returns NULL we are done! log("main: titledb injection loop completed. waiting for " "outstanding injects to return."); if ( m_numOut > m_numIn ) return false; log("main: all injects have returned. DONE."); // dummy return return true; }
// . returns false if blocked, true otherwise // . sets g_errno on error // . we are called by Parms::sendPageGeneric() to handle this request // which was called by Pages.cpp's sendDynamicReply() when it calls // pg->function() which is called by HttpServer::sendReply(s,r) when it // gets an http request // . so "hr" is on the stack in HttpServer::requestHandler() which calls // HttpServer::sendReply() so we gotta copy it here bool sendPageInject ( TcpSocket *sock , HttpRequest *hr ) { if ( ! g_conf.m_injectionEnabled ) { g_errno = EBADENGINEER; log("inject: injection disabled"); return g_httpServer.sendErrorReply(sock,500,"injection is " "disabled by " "the administrator in " "the master " "controls"); } // get the collection // make a new state Msg7 *msg7; try { msg7= new (Msg7); } catch ( ... ) { g_errno = ENOMEM; log("PageInject: new(%i): %s", (int)sizeof(Msg7),mstrerror(g_errno)); return g_httpServer.sendErrorReply(sock,500,mstrerror(g_errno)); } mnew ( msg7, sizeof(Msg7) , "PageInject" ); msg7->m_socket = sock; char format = hr->getReplyFormat(); // no url parm? if ( format != FORMAT_HTML && ! hr->getString("c",NULL) ) { g_errno = ENOCOLLREC; char *msg = mstrerror(g_errno); return g_httpServer.sendErrorReply(sock,g_errno,msg,NULL); } // set this. also sets gr->m_hr GigablastRequest *gr = &msg7->m_gr; // this will fill in GigablastRequest so all the parms we need are set g_parms.setGigablastRequest ( sock , hr , gr ); // if content is "" make it NULL so XmlDoc will download it // if user really wants empty content they can put a space in there // TODO: update help then... if ( gr->m_content && ! gr->m_content[0] ) gr->m_content = NULL; if ( gr->m_contentFile && ! gr->m_contentFile[0] ) gr->m_contentFile = NULL; if ( gr->m_contentDelim && ! gr->m_contentDelim[0] ) gr->m_contentDelim = NULL; // set this to false gr->m_gotSections = false; // if we had a delimeter but not content, zero it out... char *content = gr->m_content; if ( ! content ) content = gr->m_contentFile; if ( ! content ) gr->m_contentDelim = NULL; // get collection rec CollectionRec *cr = g_collectiondb.getRec ( gr->m_coll ); // bitch if no collection rec found if ( ! cr ) { g_errno = ENOCOLLREC; //log("build: Injection from %s failed. " // "Collection \"%s\" does not exist.", // iptoa(s->m_ip),coll); // g_errno should be set so it will return an error response return sendReply ( msg7 ); } // a scrape request? if ( gr->m_queryToScrape && gr->m_queryToScrape[0] ) { //char *uf="http://www.google.com/search?num=50&" // "q=%s&scoring=d&filter=0"; msg7->m_linkDedupTable.set(4,0,512,NULL,0,false,0,"ldtab"); if ( ! msg7->scrapeQuery ( ) ) return false; return sendReply ( msg7 ); } // if no url do not inject if ( ! gr->m_url || gr->m_url[0] == '\0' ) return sendReply ( msg7 ); // call sendReply() when inject completes if ( ! msg7->inject ( msg7 , sendReplyWrapper ) ) return false; // it did not block, i gues we are done return sendReply ( msg7 ); }
// . THIS Msg0 class must be alloc'd, i.e. not on the stack, etc. // . if list is stored locally this tries to get it locally // . otherwise tries to get the list from the network // . returns false if blocked, true otherwise // . sets g_errno on error // . NOTE: i was having problems with queries being cached too long, you // see the cache here is a NETWORK cache, so when the machines that owns // the list updates it on disk it can't flush our cache... so use a small // maxCacheAge of like , 30 seconds or so... bool Msg0::getList ( long long hostId , // host to ask (-1 if none) long ip , // info on hostId short port , long maxCacheAge , // max cached age in seconds bool addToCache , // add net recv'd list to cache? char rdbId , // specifies the rdb char *coll , RdbList *list , //key_t startKey , //key_t endKey , char *startKey , char *endKey , long minRecSizes , // use -1 for no max void *state , void (* callback)(void *state ),//, RdbList *list ) , long niceness , bool doErrorCorrection , bool includeTree , bool doMerge , long firstHostId , long startFileNum , long numFiles , long timeout , long long syncPoint , long preferLocalReads , Msg5 *msg5 , Msg5 *msg5b , bool isRealMerge , //#ifdef SPLIT_INDEXDB bool allowPageCache , bool forceLocalIndexdb , bool noSplit , // doIndexdbSplit , long forceParitySplit ) { //#else // bool allowPageCache ) { //#endif // this is obsolete! mostly, but we need it for PageIndexdb.cpp to // show a "termlist" for a given query term in its entirety so you // don't have to check each machine in the network. if this is true it // means to query each split and merge the results together into a // single unified termlist. only applies to indexdb/datedb. //if ( doIndexdbSplit ) { char *xx = NULL; *xx = 0; } // note this because if caller is wrong it hurts performance major!! //if ( doIndexdbSplit ) // logf(LOG_DEBUG,"net: doing msg0 with indexdb split true"); // warning if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg0."); //if ( doIndexdbSplit ) { char *xx=NULL;*xx=0; } // reset the list they passed us list->reset(); // get keySize of rdb m_ks = getKeySizeFromRdbId ( rdbId ); // if startKey > endKey, don't read anything //if ( startKey > endKey ) return true; if ( KEYCMP(startKey,endKey,m_ks)>0 ) { char *xx=NULL;*xx=0; }//rettrue // . reset hostid if it is dead // . this is causing UOR queries to take forever when we have a dead if ( hostId >= 0 && g_hostdb.isDead ( hostId ) ) hostId = -1; // no longer accept negative minrecsize if ( minRecSizes < 0 ) { g_errno = EBADENGINEER; log(LOG_LOGIC, "net: msg0: Negative minRecSizes no longer supported."); char *xx=NULL;*xx=0; return true; } // debug msg //if ( niceness != 0 ) log("HEY start"); // ensure startKey last bit clear, endKey last bit set //if ( (startKey.n0 & 0x01) == 0x01 ) // log("Msg0::getList: warning startKey lastbit set"); //if ( (endKey.n0 & 0x01) == 0x00 ) // log("Msg0::getList: warning endKey lastbit clear"); // remember these m_state = state; m_callback = callback; m_list = list; m_hostId = hostId; m_niceness = niceness; //m_ip = ip; //m_port = port; m_addToCache = addToCache; // . these define our request 100% //m_startKey = startKey; //m_endKey = endKey; KEYSET(m_startKey,startKey,m_ks); KEYSET(m_endKey,endKey,m_ks); m_minRecSizes = minRecSizes; m_rdbId = rdbId; m_coll = coll; m_isRealMerge = isRealMerge; m_allowPageCache = allowPageCache; // . group to ask is based on the first key // . we only do 1 group per call right now // . groupMask must turn on higher bits first (count downwards kinda) // . titledb and spiderdb use special masks to get groupId // did they force it? core until i figure out what this is if ( forceParitySplit >= 0 ) m_groupId = g_hostdb.getGroupId ( forceParitySplit ); else m_groupId = getGroupId ( m_rdbId , startKey , ! noSplit ); // how is this used? if ( forceLocalIndexdb ) m_groupId = g_hostdb.m_groupId; // . store these parameters // . get a handle to the rdb in case we can satisfy locally // . returns NULL and sets g_errno on error QUICKPOLL((m_niceness)); Rdb *rdb = getRdbFromId ( m_rdbId ); if ( ! rdb ) return true; // we need the fixedDataSize m_fixedDataSize = rdb->getFixedDataSize(); m_useHalfKeys = rdb->useHalfKeys(); // . debug msg // . Msg2 does this when checking for a cached compound list. // compound lists do not actually exist, they are merges of smaller // UOR'd lists. if ( maxCacheAge != 0 && ! addToCache && (numFiles > 0 || includeTree)) log(LOG_LOGIC,"net: msg0: " "Weird. check but don't add... rdbid=%li.",(long)m_rdbId); // set this here since we may not call msg5 if list not local //m_list->setFixedDataSize ( m_fixedDataSize ); // . now that we do load balancing we don't want to do a disk lookup // even if local if we are merging or dumping // . UNLESS g_conf.m_preferLocalReads is true if ( preferLocalReads == -1 ) preferLocalReads = g_conf.m_preferLocalReads; // . always prefer local for full split clusterdb // . and keep the tfndb/titledb lookups in the same stripe // . so basically we can't do biased caches if fully split //if ( g_conf.m_fullSplit ) preferLocalReads = true; preferLocalReads = true; // it it stored locally? bool isLocal = ( m_hostId == -1 && g_hostdb.m_groupId == m_groupId ); // only do local lookups if this is true if ( ! preferLocalReads ) isLocal = false; /* m_numSplit = 1; if ( g_hostdb.m_indexSplits > 1 && ( rdbId == RDB_POSDB || rdbId==RDB_DATEDB)&& ! forceLocalIndexdb && doIndexdbSplit ) { isLocal = false; //m_numSplit = INDEXDB_SPLIT; m_numSplit = g_hostdb.m_indexSplits; char *xx=NULL;*xx=0; } */ /* long long singleDocIdQuery = 0LL; if ( rdbId == RDB_POSDB ) { long long d1 = g_posdb.getDocId(m_startKey); long long d2 = g_posdb.getDocId(m_endKey); if ( d1+1 == d2 ) singleDocIdQuery = d1; } // . try the LOCAL termlist cache // . so when msg2 is evaluating a gbdocid:| query and it has to // use msg0 to go across the network to get the same damn termlist // over and over again for the same docid, this will help alot. // . ideally it'd be nice if the seo pipe in xmldoc.cpp can try to // send the same gbdocid:xxxx docids to the same hosts. maybe hash // based on docid into the list of hosts and if that host is busy // just chain until we find someone not busy. if ( singleDocIdQuery && getListFromTermListCache ( coll, m_startKey, m_endKey, maxCacheAge, list ) ) // found! return true; */ // but always local if only one host if ( g_hostdb.getNumHosts() == 1 ) isLocal = true; // force a msg0 if doing a docid restrictive query like // gbdocid:xxxx|<query> so we call cacheTermLists() //if ( singleDocIdQuery ) isLocal = false; // . if the group is local then do it locally // . Msg5::getList() returns false if blocked, true otherwise // . Msg5::getList() sets g_errno on error // . don't do this if m_hostId was specified if ( isLocal ) { // && !g_conf.m_interfaceMachine ) { if ( msg5 ) { m_msg5 = msg5; m_deleteMsg5 = false; } else { try { m_msg5 = new ( Msg5 ); } catch ( ... ) { g_errno = ENOMEM; log("net: Local alloc for disk read failed " "while tring to read data for %s. " "Trying remote request.", getDbnameFromId(m_rdbId)); goto skip; } mnew ( m_msg5 , sizeof(Msg5) , "Msg0" ); m_deleteMsg5 = true; } QUICKPOLL(m_niceness); // same for msg5b if ( msg5b ) { m_msg5b = msg5b; m_deleteMsg5b = false; } else if ( m_rdbId == RDB_TITLEDB ) { try { m_msg5b = new ( Msg5 ); } catch ( ... ) { g_errno = ENOMEM; log("net: Local alloc for disk read failed " "while tring to read data for %s. " "Trying remote request. 2.", getDbnameFromId(m_rdbId)); goto skip; } mnew ( m_msg5b , sizeof(Msg5) , "Msg0b" ); m_deleteMsg5b = true; } QUICKPOLL(m_niceness); if ( ! m_msg5->getList ( rdbId, coll , m_list , m_startKey , m_endKey , m_minRecSizes , includeTree , // include Tree? addToCache , // addToCache? maxCacheAge , startFileNum , numFiles , this , gotListWrapper2 , niceness , doErrorCorrection , NULL , // cacheKeyPtr 0 , // retryNum -1 , // maxRetries true , // compensateForMerge syncPoint , m_msg5b , m_isRealMerge , m_allowPageCache ) ) return false; // nuke it reset(); return true; } skip: // debug msg if ( g_conf.m_logDebugQuery ) log(LOG_DEBUG,"net: msg0: Sending request for data to " "group=%li listPtr=%li minRecSizes=%li termId=%llu " //"startKey.n1=%lx,n0=%llx (niceness=%li)", "startKey.n1=%llx,n0=%llx (niceness=%li)", g_hostdb.makeHostId ( m_groupId ) ,(long)m_list, m_minRecSizes, g_posdb.getTermId(m_startKey) , //m_startKey.n1,m_startKey.n0 , (long)m_niceness); KEY1(m_startKey,m_ks),KEY0(m_startKey), (long)m_niceness); char *replyBuf = NULL; long replyBufMaxSize = 0; bool freeReply = true; // adjust niceness for net transmission bool realtime = false; //if ( minRecSizes + 32 < TMPBUFSIZE ) realtime = true; // if we're niceness 0 we need to pre-allocate for reply since it // might be received within the asynchronous signal handler which // cannot call mmalloc() if ( realtime ) { // niceness <= 0 || netnice == 0 ) { // . we should not get back more than minRecSizes bytes since // we are now performing merges // . it should not slow things down too much since the hashing // is 10 times slower than merging anyhow... // . CAUTION: if rdb is not fixed-datasize then this will // not work for us! it can exceed m_minRecSizes. replyBufMaxSize = m_minRecSizes ; // . get a little extra to fix the error where we ask for 64 // but get 72 // . where is that coming from? // . when getting titleRecs we often exceed the minRecSizes // . ?Msg8? was having trouble. was short 32 bytes sometimes. replyBufMaxSize += 36; // why add ten percent? //replyBufMaxSize *= 110 ; //replyBufMaxSize /= 100 ; // make a buffer to hold the reply //#ifdef SPLIT_INDEXDB /* if ( m_numSplit > 1 ) { m_replyBufSize = replyBufMaxSize * m_numSplit; replyBuf = (char *) mmalloc(m_replyBufSize, "Msg0"); m_replyBuf = replyBuf; freeReply = false; } else */ //#endif replyBuf = (char *) mmalloc(replyBufMaxSize , "Msg0"); // g_errno is set and we return true if it failed if ( ! replyBuf ) { log("net: Failed to pre-allocate %li bytes to hold " "data read remotely from %s: %s.", replyBufMaxSize,getDbnameFromId(m_rdbId), mstrerror(g_errno)); return true; } } // . make a request with the info above (note: not in network order) // . IMPORTANT!!!!! if you change this change // Multicast.cpp::sleepWrapper1 too!!!!!!!!!!!! // no, not anymore, we commented out that request peeking code char *p = m_request; *(long long *) p = syncPoint ; p += 8; //*(key_t *) p = m_startKey ; p += sizeof(key_t); //*(key_t *) p = m_endKey ; p += sizeof(key_t); *(long *) p = m_minRecSizes ; p += 4; *(long *) p = startFileNum ; p += 4; *(long *) p = numFiles ; p += 4; *(long *) p = maxCacheAge ; p += 4; *p = m_rdbId ; p++; *p = addToCache ; p++; *p = doErrorCorrection; p++; *p = includeTree ; p++; *p = (char)niceness ; p++; *p = (char)m_allowPageCache; p++; KEYSET(p,m_startKey,m_ks); ; p+=m_ks; KEYSET(p,m_endKey,m_ks); ; p+=m_ks; // NULL terminated collection name strcpy ( p , coll ); p += gbstrlen ( coll ); *p++ = '\0'; m_requestSize = p - m_request; // ask an individual host for this list if hostId is NOT -1 if ( m_hostId != -1 ) { // get Host Host *h = g_hostdb.getHost ( m_hostId ); if ( ! h ) { g_errno = EBADHOSTID; log(LOG_LOGIC,"net: msg0: Bad hostId of %lli.", m_hostId); return true; } // if niceness is 0, use the higher priority udpServer UdpServer *us ; unsigned short port; QUICKPOLL(m_niceness); //if ( niceness <= 0 || netnice == 0 ) { //if ( realtime ) { // us = &g_udpServer2; port = h->m_port2; } //else { us = &g_udpServer ; port = h->m_port ; // . returns false on error and sets g_errno, true otherwise // . calls callback when reply is received (or error) // . we return true if it returns false if ( ! us->sendRequest ( m_request , m_requestSize , 0x00 , // msgType h->m_ip , port , m_hostId , NULL , // the slotPtr this , gotSingleReplyWrapper , timeout , -1 , // backoff -1 , // maxwait replyBuf , replyBufMaxSize , m_niceness ) ) // cback niceness return true; // return false cuz it blocked return false; } // timing debug if ( g_conf.m_logTimingNet ) m_startTime = gettimeofdayInMilliseconds(); else m_startTime = 0; //if ( m_rdbId == RDB_INDEXDB ) log("Msg0:: getting remote indexlist. " // "termId=%llu, " // "groupNum=%lu", // g_indexdb.getTermId(m_startKey) , // g_hostdb.makeHostId ( m_groupId ) ); /* // make the cache key so we can see what remote host cached it, if any char cacheKey[MAX_KEY_BYTES]; //key_t cacheKey = makeCacheKey ( startKey , makeCacheKey ( startKey , endKey , includeTree , minRecSizes , startFileNum , numFiles , cacheKey , m_ks ); */ // . get the top long of the key // . i guess this will work for 128 bit keys... hmmmmm long keyTop = hash32 ( (char *)startKey , m_ks ); /* // allocate space if ( m_numSplit > 1 ) { long need = m_numSplit * sizeof(Multicast) ; char *buf = (char *)mmalloc ( need,"msg0mcast" ); if ( ! buf ) return true; m_mcasts = (Multicast *)buf; for ( long i = 0; i < m_numSplit ; i++ ) m_mcasts[i].constructor(); } */ // . otherwise, multicast to a host in group "groupId" // . returns false and sets g_errno on error // . calls callback on completion // . select first host to send to in group based on upper 32 bits // of termId (m_startKey.n1) //#ifdef SPLIT_INDEXDB // . need to send out to all the indexdb split hosts m_numRequests = 0; m_numReplies = 0; //for ( long i = 0; i < m_numSplit; i++ ) { QUICKPOLL(m_niceness); long gr; char *buf; /* if ( m_numSplit > 1 ) { gr = g_indexdb.getSplitGroupId ( baseGroupId, i ); buf = &replyBuf[i*replyBufMaxSize]; } else { */ gr = m_groupId; buf = replyBuf; //} // get the multicast Multicast *m = &m_mcast; //if ( m_numSplit > 1 ) m = &m_mcasts[i]; if ( ! m->send ( m_request , //#else // if ( ! m_mcast.send ( m_request , //#endif m_requestSize, 0x00 , // msgType 0x00 false , // does multicast own request? //#ifdef SPLIT_INDEXDB gr , // group + offset //#else // m_groupId , // group to send to (groupKey) //#endif false , // send to whole group? //m_startKey.n1, // key is passed on startKey keyTop , // key is passed on startKey this , // state data NULL , // state data gotMulticastReplyWrapper0 , timeout , // timeout in seconds (was 30) niceness , realtime , firstHostId , //#ifdef SPLIT_INDEXDB // &replyBuf[i*replyBufMaxSize] , //#else // replyBuf , //#endif buf , replyBufMaxSize , freeReply , // free reply buf? true , // do disk load balancing? maxCacheAge , //(key_t *)cacheKey , // multicast uses it for determining the best // host to send the request to when doing // disk load balancing. if the host has our // data cached, then it will probably get to // handle the request. for now let's just assume // this is a 96-bit key. TODO: fix... 0 , // *(key_t *)cacheKey , rdbId , minRecSizes ) ) { log("net: Failed to send request for data from %s in group " "#%li over network: %s.", getDbnameFromId(m_rdbId),m_groupId, mstrerror(g_errno)); // no, multicast will free this when it is destroyed //if (replyBuf) mfree ( replyBuf , replyBufMaxSize , "Msg22" ); // but speed it up //#ifdef SPLIT_INDEXDB m_errno = g_errno; m->reset(); if ( m_numRequests > 0 ) return false; //#else // m_mcast.reset(); //#endif return true; } //#ifdef SPLIT_INDEXDB m_numRequests++; //#endif // we blocked return false; }
// . but now that we may get a list remotely to fix data corruption, // this may indeed block bool Msg3::doneScanning ( ) { QUICKPOLL(m_niceness); // . did we have any error on any scan? // . if so, repeat ALL of the scans g_errno = m_errno; // 2 retry is the default long max = 2; // see if explicitly provided by the caller if ( m_maxRetries >= 0 ) max = m_maxRetries; // now use -1 (no max) as the default no matter what max = -1; // ENOMEM is particulary contagious, so watch out with it... if ( g_errno == ENOMEM && m_maxRetries == -1 ) max = 0; // msg0 sets maxRetries to 2, don't let max stay set to -1 if ( g_errno == ENOMEM && m_maxRetries != -1 ) max = m_maxRetries; // when thread cannot alloc enough read buf it keeps the read buf // set to NULL and BigFile.cpp sets g_errno to EBUFTOOSMALL if ( g_errno == EBUFTOOSMALL && m_maxRetries == -1 ) max = 0; // msg0 sets maxRetries to 2, don't let max stay set to -1 if ( g_errno == EBUFTOOSMALL && m_maxRetries != -1 ) max = m_maxRetries; // . if no thread slots available, that hogs up serious memory. // the size of Msg3 is 82k, so having just 5000 of them is 430MB. // . i just made Msg3 alloc mem when it needs more than about 2k // so this problem is greatly reduced, therefore let's keep // retrying... forever if no thread slots in thread queue since // we become the thread queue in a way. if ( g_errno == ENOTHREADSLOTS ) max = -1; // this is set above if the map has the same consecutive key repeated // and the read is enormous if ( g_errno == ECORRUPTDATA ) max = 0; // usually bad disk failures, don't retry those forever //if ( g_errno == EIO ) max = 3; // no, now our hitachis return these even when they're good so // we have to keep retrying forever if ( g_errno == EIO ) max = -1; // count these so we do not take drives offline just because // kernel ring buffer complains... if ( g_errno == EIO ) g_numIOErrors++; // bail early on high priority reads for these errors if ( g_errno == EDISKSTUCK && m_niceness == 0 ) max = 0; if ( g_errno == EIO && m_niceness == 0 ) max = 0; // how does this happen? we should never bail out on a low priority // disk read... we just wait for it to complete... if ( g_errno == EDISKSTUCK && m_niceness != 0 ) { char *xx=NULL;*xx=0;} // on I/O, give up at call it corrupt after a while. some hitachis // have I/O errros on little spots, like gk88, maybe we can fix him if ( g_errno == EIO && m_retryNum >= 5 ) { m_errno = ECORRUPTDATA; m_hadCorruption = true; // do not do any retries any more max = 0; } // convert m_errno to ECORRUPTDATA if it is EBUFTOOSMALL and the // max of the bytesToRead are over 500MB. // if bytesToRead was ludicrous, then assume that the data file // was corrupted, the map was regenerated and it patched // over the corrupted bits which were 500MB or more in size. // we cannot practically allocate that much, so let's just // give back an empty buffer. treat it like corruption... // the way it patches is to store the same key over all the corrupted // pages, which can get pretty big. so if you read a range with that // key you will be hurting!! // this may be the same scenario as when the rdbmap has consecutive // same keys. see above where we set m_errno to ECORRUPTDATA... if ( g_errno == EBUFTOOSMALL ) { long biggest = 0; for ( long i = 0 ; i < m_numFileNums ; i++ ) { if ( m_scans[i].m_bytesToRead < biggest ) continue; biggest = m_scans[i].m_bytesToRead; } if ( biggest > 500000000 ) { log("db: Max read size was %li > 500000000. Assuming " "corrupt data in data file.",biggest); m_errno = ECORRUPTDATA; m_hadCorruption = true; // do not do any retries on this, the read was > 500MB max = 0; } } // get base, returns NULL and sets g_errno to ENOCOLLREC on error RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) return true; // this really slows things down because it blocks the cpu so // leave it out for now #ifdef _SANITY_CHECK_ // check for corruption here, do not do it again in Msg5 if we pass if ( ! g_errno ) { // && g_conf.m_doErrorCorrection ) { long i; for ( i = 0 ; i < m_numFileNums ; i++ ) if ( ! m_lists[i].checkList_r ( false, false ) ) break; if ( i < m_numFileNums ) { g_errno = ECORRUPTDATA; m_errno = ECORRUPTDATA; max = g_conf.m_corruptRetries; // try 100 times log("db: Encountered corrupt list in file %s.", base->getFile(m_fileNums[i])->getFilename()); } else m_listsChecked = true; } #endif // . if we had a ETRYAGAIN error, then try again now // . it usually means the whole file or a part of it was deleted // before we could finish reading it, so we should re-read all now // . RdbMerge deletes BigFiles after it merges them and also chops // off file heads // . now that we have threads i'd imagine we'd get EBADFD or something // . i've also seen "illegal seek" as well if ( m_errno && (m_retryNum < max || max < 0) && // this will complete in due time, we can't call a sleep wrapper // on it because the read is really still pending... m_errno != EDISKSTUCK ) { // print the error static time_t s_time = 0; time_t now = getTime(); if ( now - s_time > 5 || g_errno != ENOTHREADSLOTS ) { log("net: Had error reading %s: %s. Retrying. " "(retry #%li)", base->m_dbname,mstrerror(g_errno) , m_retryNum ); s_time = now; } // send email alert if in an infinite loop, but don't send // more than once every 2 hours static long s_lastSendTime = 0; if ( m_retryNum == 100 && getTime() - s_lastSendTime > 3600*2){ // remove this for now it is going off all the time //g_pingServer.sendEmail(NULL,//g_hostdb.getMyHost(), // "100 read retries",true); s_lastSendTime = getTime(); } // clear g_errno cuz we should for call to readList() g_errno = 0; // free the list buffer since if we have 1000 Msg3s retrying // it will totally use all of our memory for ( long i = 0 ; i < m_numChunks ; i++ ) m_lists[i].destructor(); // count retries m_retryNum++; // backoff scheme, wait 100ms more each time long wait ; if ( m_retryNum == 1 ) wait = 10; else wait = 200 * m_retryNum; // . don't wait more than 10 secs between tries // . i've seen gf0 and gf16 get mega saturated if ( wait > 10000 ) wait = 10000; // wait 500 ms if ( g_loop.registerSleepCallback ( wait , // ms this , doneSleepingWrapper3, m_niceness)) return false; // otherwise, registration failed log( "net: Failed to register sleep callback for retry. " "Abandoning read. This is bad."); // return, g_errno should be set g_errno = EBUFTOOSMALL; m_errno = EBUFTOOSMALL; return true; } // if we got an error and should not retry any more then give up if ( g_errno ) { log( "net: Had error reading %s: %s. Giving up after %li " "retries.", base->m_dbname,mstrerror(g_errno) , m_retryNum ); return true; } // note it if the retry finally worked if ( m_retryNum > 0 ) log(LOG_INFO,"disk: Read succeeded after retrying %li times.", (long)m_retryNum); // count total bytes for logging long count = 0; // . constrain all lists to make merging easier // . if we have only one list, then that's nice cuz the constrain // will allow us to send it right away w/ zero copying // . if we have only 1 list, it won't be merged into a final list, // that is, we'll just set m_list = &m_lists[i] for ( long i = 0 ; i < m_numFileNums ; i++ ) { QUICKPOLL(m_niceness); // count total bytes for logging count += m_lists[i].getListSize(); // . hint offset is relative to the offset of first key we read // . if that key was only 6 bytes RdbScan shift the list buf // down 6 bytes to make the first key 12 bytes... a // requirement for all RdbLists // . don't inc it, though, if it was 0, pointing to the start // of the list because our shift won't affect that if ( m_scans[i].m_shifted == 6 && m_hintOffsets[i] > 0 ) m_hintOffsets[i] += 6; // posdb double compression if ( m_scans[i].m_shifted == 12 && m_hintOffsets[i] > 0 ) m_hintOffsets[i] += 12; // . don't constrain on minRecSizes here because it may // make our endKey smaller, which will cause problems // when Msg5 merges these lists. // . If all lists have different endKeys RdbList's merge // chooses the min and will merge in recs beyond that // causing a bad list BECAUSE we don't check to make // sure that recs we are adding are below the endKey // . if we only read from one file then constrain based // on minRecSizes so we can send the list back w/o merging // OR if just merging with RdbTree's list long mrs ; // . constrain to m_minRecSizesOrig, not m_minRecSizes cuz // that could be adjusted by compensateForNegativeRecs() // . but, really, they should be the same if we only read from // the root file if ( m_numFileNums == 1 ) mrs = m_minRecSizesOrig; else mrs = -1; // . this returns false and sets g_errno on error // . like if data is corrupt BigFile *ff = base->getFile(m_fileNums[i]); if ( ! m_lists[i].constrain ( m_startKey , m_constrainKey , // m_endKey mrs , // m_minRecSizes m_hintOffsets[i] , //m_hintKeys [i] , &m_hintKeys [i*m_ks] , ff->getFilename() , m_niceness ) ) { log("net: Had error while constraining list read from " "%s: %s. This is likely caused by corrupted " "data on disk.", mstrerror(g_errno), ff->getFilename()); } } // print the time if ( g_conf.m_logTimingDb ) { long long now = gettimeofdayInMilliseconds(); long long took = now - m_startTime; log(LOG_TIMING, "net: Took %lli ms to read %li lists of %li bytes total" " from %s (niceness=%li).", took,m_numFileNums,count,base->m_dbname,m_niceness); } return true; }
// return false if blocked, true otherwise bool Msg39::addedLists ( ) { if ( m_posdbTable.m_t1 ) { // . measure time to add the lists in bright green // . use darker green if rat is false (default OR) long color; //char *label; color = 0x0000ff00 ; //label = "termlist_intersect"; g_stats.addStat_r ( 0 , m_posdbTable.m_t1 , m_posdbTable.m_t2 , color ); } // accumulate total hits count over each docid split m_numTotalHits += m_posdbTable.m_docIdVoteBuf.length() / 6; // before wrapping up, complete our docid split loops! // so do not send the reply back yet... send reply back from // the docid loop function... doDocIdSplitLoop() if ( m_numDocIdSplits >= 2 ) return true; // . save some memory,free m_topDocIdPtrs2,m_topScores2,m_topExplicits2 // . the m_topTree should have been filled from the call to // IndexTable2::fillTopDocIds() and it no longer has ptrs to the // docIds, but has the docIds themselves //m_posdbTable.freeMem(); // error? if ( m_posdbTable.m_errno ) { // we do not need to store the intersection i guess...?? m_posdbTable.freeMem(); g_errno = m_posdbTable.m_errno; log("query: posdbtable had error = %s",mstrerror(g_errno)); sendReply ( m_slot , this , NULL , 0 , 0 ,true); return true; } // should we put cluster recs in the tree? //m_gotClusterRecs = ( g_conf.m_fullSplit && m_r->m_doSiteClustering ); m_gotClusterRecs = ( m_r->m_doSiteClustering ); // . before we send the top docids back, lookup their site hashes // in clusterdb so we can do filtering at this point. // BUT only do this if we are in a "full split" config, because that // way we can guarantee all clusterdb recs are local (on this host) // and should be in the page cache. the page cache should do ultra // quick lookups and no memcpy()'s for this operation. it should // be <<1ms to lookup thousands of docids. // . when doing innerLoopSiteClustering we always use top tree now // because our number of "top docids" can be somewhat unpredictably // large due to having a ton of results with the same "domain hash" // (see the "vcount" in IndexTable2.cpp) // . do NOT do if we are just "getting weights", phr and aff weights if ( m_gotClusterRecs ) { // . set the clusterdb recs in the top tree return setClusterRecs ( ) ; } // if we did not call setClusterRecs, go on to estimate the hits estimateHits(); return true; }
// . send an add command to all machines in the appropriate group // . returns false if blocked, true otherwise // . sets g_errno on error // . groupId is -1 if we choose it automatically // . if waitForReply is false we return true right away, but we can only // launch MAX_MSG1S requests without waiting for replies, and // when the reply does come back we do NOT call the callback bool Msg1::addList ( RdbList *list , char rdbId , collnum_t collnum, // char *coll , void *state , void (* callback)(void *state) , bool forceLocal , int32_t niceness , bool injecting , bool waitForReply , bool *inTransit ) { // warning if ( collnum<0 ) log(LOG_LOGIC,"net: bad collection. msg1.cpp."); // if list has no records in it return true if ( ! list || list->isEmpty() ) return true; // sanity check if ( list->m_ks != 8 && list->m_ks != 12 && list->m_ks != 16 && list->m_ks != 24 ) { g_process.shutdownAbort(true); } // start at the beginning list->resetListPtr(); // if caller does not want reply try to accomodate him if ( ! waitForReply && list != &m_ourList ) { Msg1 *Y = getMsg1(); if ( ! Y ) { waitForReply = true; log(LOG_DEBUG,"net: msg1: " "No floating request slots " "available for adding data. " "Blocking on reply."); goto skip; } // steal the list, we don't want caller to free it gbmemcpy ( &Y->m_ourList , list , sizeof(RdbList) ); QUICKPOLL(niceness); // if list is small enough use our buf if ( ! list->m_ownData && list->m_listSize <= MSG1_BUF_SIZE ) { gbmemcpy ( Y->m_buf , list->m_list , list->m_listSize ); Y->m_ourList.m_list = Y->m_buf; Y->m_ourList.m_listEnd = Y->m_buf + list->m_listSize; Y->m_ourList.m_alloc = NULL; Y->m_ourList.m_ownData = false; } // otherwise, we cannot copy it and i don't want to mdup it... else if ( ! list->m_ownData ) { log(LOG_LOGIC,"net: msg1: List must own data. Bad " "engineer."); g_process.shutdownAbort(true); } // lastly, if it was a clean steal, don't let list free it else list->m_ownData = false; // reset m_listPtr and m_listPtrHi so we pass the isExhausted() // check in sendSomeOfList() below Y->m_ourList.resetListPtr(); // sanity test if ( Y->m_ourList.isExhausted() ) { log(LOG_LOGIC,"net: msg1: List is exhausted. " "Bad engineer."); g_process.shutdownAbort(true); } // now re-call bool inTransit; bool status = Y->addList ( &Y->m_ourList , rdbId , collnum , Y , // state returnMsg1 , // callback forceLocal , niceness , injecting , waitForReply , &inTransit ) ; // if we really blocked return false if ( ! status ) return false; // otherwise, it may have returned true because waitForReply // is false, but the request may still be in transit if ( inTransit ) return true; // debug msg //log("did not block, listSize=%" PRId32,m->m_ourList.m_listSize); // we did it without blocking, but it is still in transit // unless there was an error if ( g_errno ) log("net: Adding data to %s had error: %s.", getDbnameFromId(rdbId), mstrerror(g_errno)); // otherwise, if not in transit and no g_errno then it must // have really completed without blocking. in which case // we are done with "Y" returnMsg1 ( (void *)Y ); return true; } skip: // remember these vars m_list = list; m_rdbId = rdbId; m_collnum = collnum; m_state = state; m_callback = callback; m_forceLocal = forceLocal; m_niceness = niceness; m_injecting = injecting; m_waitForReply = waitForReply; QUICKPOLL(niceness); // reset m_listPtr to point to first record again list->resetListPtr(); // is the request in transit? assume not (assume did not block) if ( inTransit ) *inTransit = false; // . not all records in the list may belong to the same group // . records should be sorted by key so we don't need to sort them // . if this did not block, return true if ( sendSomeOfList ( ) ) return true; // it is in transit if ( inTransit ) *inTransit = true; // if we should waitForReply return false if ( m_waitForReply ) return false; // tell caller we did not block on the reply, even though we did return true; }
// . returns false if blocked, true otherwise // . sets g_errno on error bool Msg39::getLists () { if ( m_debug ) m_startTime = gettimeofdayInMilliseconds(); // . ask Indexdb for the IndexLists we need for these termIds // . each rec in an IndexList is a termId/score/docId tuple // // restrict to docid range? // // . get the docid start and end // . do docid paritioning so we can send to all hosts // in the network, not just one stripe long long docIdStart = 0; long long docIdEnd = MAX_DOCID; // . restrict to this docid? // . will really make gbdocid:| searches much faster! long long dr = m_tmpq.m_docIdRestriction; if ( dr ) { docIdStart = dr; docIdEnd = dr + 1; } // . override // . this is set from Msg39::doDocIdSplitLoop() to compute // search results in stages, so that we do not load massive // termlists into memory and got OOM (out of memory) if ( m_r->m_minDocId != -1 ) docIdStart = m_r->m_minDocId; if ( m_r->m_maxDocId != -1 ) docIdEnd = m_r->m_maxDocId+1; // if we have twins, then make sure the twins read different // pieces of the same docid range to make things 2x faster bool useTwins = false; if ( g_hostdb.getNumStripes() == 2 ) useTwins = true; if ( useTwins ) { long long delta2 = ( docIdEnd - docIdStart ) / 2; if ( m_r->m_stripe == 0 ) docIdEnd = docIdStart + delta2; else docIdStart = docIdStart + delta2; } // TODO: add triplet support later for this to split the // read 3 ways. 4 ways for quads, etc. if ( g_hostdb.getNumStripes() >= 3 ) { char *xx=NULL;*xx=0;} // do not go over MAX_DOCID because it gets masked and // ends up being 0!!! and we get empty lists if ( docIdEnd > MAX_DOCID ) docIdEnd = MAX_DOCID; // remember so Msg2.cpp can use them to restrict the termlists // from "whiteList" as well m_docIdStart = docIdStart; m_docIdEnd = docIdEnd; // // set startkey/endkey for each term/termlist // for ( long i = 0 ; i < m_tmpq.getNumTerms() ; i++ ) { // breathe QUICKPOLL ( m_r->m_niceness ); // shortcuts QueryTerm *qterm = &m_tmpq.m_qterms[i]; char *sk = qterm->m_startKey; char *ek = qterm->m_endKey; // get the term id long long tid = m_tmpq.getTermId(i); // if only 1 stripe //if ( g_hostdb.getNumStripes() == 1 ) { // docIdStart = 0; // docIdEnd = MAX_DOCID; //} // store now in qterm g_posdb.makeStartKey ( sk , tid , docIdStart ); g_posdb.makeEndKey ( ek , tid , docIdEnd ); qterm->m_ks = sizeof(POSDBKEY);//key144_t); } // debug msg if ( m_debug || g_conf.m_logDebugQuery ) { for ( long i = 0 ; i < m_tmpq.getNumTerms() ; i++ ) { // get the term in utf8 //char bb[256]; QueryTerm *qt = &m_tmpq.m_qterms[i]; //utf16ToUtf8(bb, 256, qt->m_term, qt->m_termLen); char *tpc = qt->m_term + qt->m_termLen; char tmp = *tpc; *tpc = '\0'; char sign = qt->m_termSign; if ( sign == 0 ) sign = '0'; QueryWord *qw = qt->m_qword; long wikiPhrId = qw->m_wikiPhraseId; if ( m_tmpq.isPhrase(i) ) wikiPhrId = 0; char leftwikibigram = 0; char rightwikibigram = 0; if ( qt->m_leftPhraseTerm && qt->m_leftPhraseTerm->m_isWikiHalfStopBigram ) leftwikibigram = 1; if ( qt->m_rightPhraseTerm && qt->m_rightPhraseTerm->m_isWikiHalfStopBigram ) rightwikibigram = 1; /* char c = m_tmpq.getTermSign(i); char tt[512]; long ttlen = m_tmpq.getTermLen(i); if ( ttlen > 254 ) ttlen = 254; if ( ttlen < 0 ) ttlen = 0; // old:painful: convert each term from unicode to ascii memcpy ( tt , m_tmpq.getTerm(i) , ttlen ); */ long isSynonym = 0; QueryTerm *st = qt->m_synonymOf; if ( st ) isSynonym = true; SafeBuf sb; // now we can display it //tt[ttlen]='\0'; //if ( c == '\0' ) c = ' '; sb.safePrintf( "query: msg39: [%lu] query term #%li \"%s\" " "phr=%li termId=%llu rawTermId=%llu " //"estimatedTermFreq=%lli (+/- ~16000) " "tfweight=%.02f " "sign=%c " "numPlusses=%hhu " "required=%li " "fielcode=%li " "ebit=0x%0llx " "impBits=0x%0llx " "wikiphrid=%li " "leftwikibigram=%li " "rightwikibigram=%li " //"range.startTermNum=%hhi range.endTermNum=%hhi " //"minRecSizes=%li " "readSizeInBytes=%li " //"ebit=0x%llx " //"impBits=0x%llx " "hc=%li " "component=%li " "otermLen=%li " "isSynonym=%li " "querylangid=%li ", (long)this , i , qt->m_term,//bb , (long)m_tmpq.isPhrase (i) , m_tmpq.getTermId (i) , m_tmpq.getRawTermId (i) , ((float *)m_r->ptr_termFreqWeights)[i] , sign , //c , 0 , (long)qt->m_isRequired, (long)qt->m_fieldCode, (long long)qt->m_explicitBit , (long long)qt->m_implicitBits , wikiPhrId, (long)leftwikibigram, (long)rightwikibigram, ((long *)m_r->ptr_readSizes)[i] , //(long long)m_tmpq.m_qterms[i].m_explicitBit , //(long long)m_tmpq.m_qterms[i].m_implicitBits , (long)m_tmpq.m_qterms[i].m_hardCount , (long)m_tmpq.m_componentCodes[i], (long)m_tmpq.getTermLen(i) , isSynonym, (long)m_tmpq.m_langId); // ,tt // put it back *tpc = tmp; if ( st ) { long stnum = st - m_tmpq.m_qterms; sb.safePrintf("synofterm#=%li",stnum); //sb.safeMemcpy(st->m_term,st->m_termLen); sb.pushChar(' '); sb.safePrintf("synwid0=%lli ",qt->m_synWids0); sb.safePrintf("synwid1=%lli ",qt->m_synWids1); sb.safePrintf("synalnumwords=%li ", qt->m_numAlnumWordsInSynonym); // like for synonym "nj" it's base, // "new jersey" has 2 alnum words! sb.safePrintf("synbasealnumwords=%li ", qt->m_numAlnumWordsInBase); } logf(LOG_DEBUG,"%s",sb.getBufStart()); } m_tmpq.printBooleanTree(); } // timestamp log if ( m_debug ) log(LOG_DEBUG,"query: msg39: [%lu] Getting %li index lists ", (long)this,m_tmpq.getNumTerms()); // . now get the index lists themselves // . return if it blocked // . not doing a merge (last parm) means that the lists we receive // will be an appending of a bunch of lists so keys won't be in order // . merging is uneccessary for us here because we hash the keys anyway // . and merging takes up valuable cpu time // . caution: the index lists returned from Msg2 are now compressed // . now i'm merging because it's 10 times faster than hashing anyway // and the reply buf should now always be <= minRecSizes so we can // pre-allocate one better, and, 3) this should fix the yahoo.com // reindex bug char rdbId = RDB_POSDB; // . TODO: MDW: fix // . partap says there is a bug in this??? we can't cache UOR'ed lists? bool checkCache = false; // split is us???? //long split = g_hostdb.m_myHost->m_group; long split = g_hostdb.m_myHost->m_shardNum; // call msg2 if ( ! m_msg2.getLists ( rdbId , m_r->ptr_coll , m_r->m_maxAge , m_r->m_addToCache , //m_tmpq.m_qterms , &m_tmpq, m_r->ptr_whiteList, // we need to restrict docid range for // whitelist as well! this is from // doDocIdSplitLoop() m_docIdStart, m_docIdEnd, // how much of each termlist to read in bytes (long *)m_r->ptr_readSizes , //m_tmpq.getNumTerms() , // numLists m_lists , this , gotListsWrapper , m_r , m_r->m_niceness , true , // do merge? m_debug , NULL , // best hostids m_r->m_restrictPosdbForQuery , split , checkCache )) { m_blocked = true; return false; } // error? if ( g_errno ) { log("msg39: Had error getting termlists2: %s.", mstrerror(g_errno)); // don't bail out here because we are in docIdSplitLoop() //sendReply (m_slot,this,NULL,0,0,true); return true; } return gotLists ( true ); }
// . now come here when we got the necessary index lists // . returns false if blocked, true otherwise // . sets g_errno on error bool Msg39::gotLists ( bool updateReadInfo ) { // bail on error if ( g_errno ) { log("msg39: Had error getting termlists: %s.", mstrerror(g_errno)); if ( ! g_errno ) { char *xx=NULL;*xx=0; } //sendReply (m_slot,this,NULL,0,0,true); return true; } // timestamp log if ( m_debug ) { log(LOG_DEBUG,"query: msg39: [%lu] Got %li lists in %lli ms" , (long)this,m_tmpq.getNumTerms(), gettimeofdayInMilliseconds() - m_startTime); m_startTime = gettimeofdayInMilliseconds(); } // breathe QUICKPOLL ( m_r->m_niceness ); // . set the IndexTable so it can set it's score weights from the // termFreqs of each termId in the query // . this now takes into account the special termIds used for sorting // by date (0xdadadada and 0xdadadad2 & TERMID_MASK) // . it should weight them so much so that the summation of scores // from other query terms cannot make up for a lower date score // . this will actually calculate the top // . this might also change m_tmpq.m_termSigns // . this won't do anything if it was already called m_posdbTable.init ( &m_tmpq , m_debug , this , &m_tt , m_r->ptr_coll , &m_msg2 , // m_lists , //m_tmpq.m_numTerms , // m_numLists m_r ); // breathe QUICKPOLL ( m_r->m_niceness ); // . we have to do this here now too // . but if we are getting weights, we don't need m_tt! // . actually we were using it before for rat=0/bool queries but // i got rid of NO_RAT_SLOTS if ( ! m_allocedTree && ! m_posdbTable.allocTopTree() ) { if ( ! g_errno ) { char *xx=NULL;*xx=0; } //sendReply ( m_slot , this , NULL , 0 , 0 , true); return true; } // we have to allocate this with each call because each call can // be a different docid range from doDocIdSplitLoop. if ( ! m_posdbTable.allocWhiteListTable() ) { log("msg39: Had error allocating white list table: %s.", mstrerror(g_errno)); if ( ! g_errno ) { char *xx=NULL;*xx=0; } //sendReply (m_slot,this,NULL,0,0,true); return true; } // do not re do it if doing docid range splitting m_allocedTree = true; // . now we must call this separately here, not in allocTopTree() // . we have to re-set the QueryTermInfos with each docid range split // since it will set the list ptrs from the msg2 lists if ( m_r->m_useNewAlgo && ! m_posdbTable.setQueryTermInfo () ) { return true; } // timestamp log if ( m_debug ) { log(LOG_DEBUG,"query: msg39: [%lu] Preparing to intersect " "took %lli ms", (long)this, gettimeofdayInMilliseconds() - m_startTime ); m_startTime = gettimeofdayInMilliseconds(); } // time it long long start = gettimeofdayInMilliseconds(); long long diff; // . don't bother making a thread if lists are small // . look at STAGE? in IndexReadInfo.cpp to see how we read in stages // . it's always saying msg39 handler is hogging cpu...could this be it //if ( m_msg2.getTotalRead() < 2000*8 ) goto skipThread; // debug //goto skipThread; // . NOW! let's do this in a thread so we can continue to service // incoming requests // . don't launch more than 1 thread at a time for this // . set callback when thread done // breathe QUICKPOLL ( m_r->m_niceness ); // . create the thread // . only one of these type of threads should be launched at a time if ( g_threads.call ( INTERSECT_THREAD , // threadType m_r->m_niceness , this , // top 4 bytes must be cback threadDoneWrapper , addListsWrapper ) ) { m_blocked = true; return false; } // if it failed //log(LOG_INFO,"query: Intersect thread creation failed. Doing " // "blocking. Hurts performance."); // check tree if ( m_tt.m_nodes == NULL ) { log(LOG_LOGIC,"query: msg39: Badness."); char *xx = NULL; *xx = 0; } // sometimes we skip the thread //skipThread: // . addLists() should never have a problem // . g_errno should be set by prepareToAddLists() above if there is // going to be a problem //if ( m_r->m_useNewAlgo ) m_posdbTable.intersectLists10_r ( ); //else // m_posdbTable.intersectLists9_r ( ); // time it diff = gettimeofdayInMilliseconds() - start; if ( diff > 10 ) log("query: Took %lli ms for intersection",diff); // returns false if blocked, true otherwise return addedLists (); }
// . returns false if blocked, true if done // . to avoid running out of memory, generate the search results for // multiple smaller docid-ranges, one range at a time. bool Msg39::doDocIdSplitLoop ( ) { long long delta = MAX_DOCID / (long long)m_numDocIdSplits; for ( ; m_ddd < m_dddEnd ; ) { // the starting docid... long long d0 = m_ddd; // advance to point to the exclusive endpoint m_ddd += delta; // ensure this is exclusive of ddd since it will be // inclusive in the following iteration. long long d1 = m_ddd; // fix rounding errors if ( d1 + 20LL > MAX_DOCID ) { d1 = MAX_DOCID; m_ddd = MAX_DOCID; } // fix it m_r->m_minDocId = d0; m_r->m_maxDocId = d1; // -1; // exclude d1 // allow posdbtable re-initialization each time to set // the msg2 termlist ptrs anew, otherwise we core in // call to PosdbTable::init() below //m_posdbTable.m_initialized = false; // reset ourselves, partially, anyway, not tmpq etc. reset2(); // debug log log("msg39: docid split phase %lli-%lli",d0,d1); // wtf? if ( d0 >= d1 ) break; // use this //m_debug = true; //log("call1"); // . get the lists // . i think this always should block! // . it will also intersect the termlists to get the search // results and accumulate the winners into the "tree" if ( ! getLists() ) return false; //log("call2 g_errno=%li",(long)g_errno); // if there was an error, stop! if ( g_errno ) break; } // return error reply if we had an error if ( g_errno ) { log("msg39: Had error3: %s.", mstrerror(g_errno)); sendReply (m_slot,this,NULL,0,0 , true); return true; } if ( m_debug ) log("msg39: done with all docid range splits"); // all done. this will send reply back //estimateHits(); //addedLists(); // should we put cluster recs in the tree? //m_gotClusterRecs = ( g_conf.m_fullSplit && m_r->m_doSiteClustering ); m_gotClusterRecs = ( m_r->m_doSiteClustering ); // . before we send the top docids back, lookup their site hashes // in clusterdb so we can do filtering at this point. // BUT only do this if we are in a "full split" config, because that // way we can guarantee all clusterdb recs are local (on this host) // and should be in the page cache. the page cache should do ultra // quick lookups and no memcpy()'s for this operation. it should // be <<1ms to lookup thousands of docids. // . when doing innerLoopSiteClustering we always use top tree now // because our number of "top docids" can be somewhat unpredictably // large due to having a ton of results with the same "domain hash" // (see the "vcount" in IndexTable2.cpp) // . do NOT do if we are just "getting weights", phr and aff weights if ( m_gotClusterRecs ) { // . set the clusterdb recs in the top tree // . this calls estimateHits() in its reply wrapper when done return setClusterRecs ( ) ; } // if we did not call setClusterRecs, go on to estimate the hits estimateHits(); // no block, we are done return true; }
void Msg39::getDocIds2 ( Msg39Request *req ) { // flag it as in use m_inUse = true; // store it, might be redundant if called from getDocIds() above m_r = req; // a handy thing m_debug = false; if ( m_r->m_debug ) m_debug = true; if ( g_conf.m_logDebugQuery ) m_debug = true; if ( g_conf.m_logTimingQuery ) m_debug = true; // ensure it's size is ok if ( m_r->size_coll <= 0 ) { g_errno = ENOCOLLREC; log(LOG_LOGIC,"query: msg39: getDocIds: %s." , mstrerror(g_errno) ); sendReply ( m_slot , this , NULL , 0 , 0 , true ); return ; } CollectionRec *cr = g_collectiondb.getRec ( m_r->ptr_coll ); if ( ! cr ) { g_errno = ENOCOLLREC; log(LOG_LOGIC,"query: msg39: getDocIds: %s." , mstrerror(g_errno) ); sendReply ( m_slot , this , NULL , 0 , 0 , true ); return ; } // . set our m_q class // . m_boolFlag is either 1 or 0 in this case, the caller did the // auto-detect (boolFlag of 2) before calling us // . this now calls Query::addCompoundTerms() for us if ( ! m_tmpq.set2 ( m_r->ptr_query , m_r->m_language , m_r->m_queryExpansion , m_r->m_useQueryStopWords ) ) { log(LOG_LOGIC,"query: msg39: setQuery: %s." , mstrerror(g_errno) ); sendReply ( m_slot , this , NULL , 0 , 0 , true ); return ; } QUICKPOLL ( m_r->m_niceness ); // set m_errno if ( m_tmpq.m_truncated ) m_errno = EQUERYTRUNCATED; // ensure matches with the msg3a sending us this request if ( m_tmpq.getNumTerms() != m_r->m_nqt ) { g_errno = EBADENGINEER; log("query: Query parsing inconsistency for q=%s. " "langid=%li. Check langids and m_queryExpansion parms " "which are the only parms that could be different in " "Query::set2()." ,m_tmpq.m_orig ,(long)m_r->m_language ); sendReply ( m_slot , this , NULL , 0 , 0 , true ); return ; } // debug if ( m_debug ) logf(LOG_DEBUG,"query: msg39: [%lu] Got request " "for q=%s", (long) this,m_tmpq.m_orig); // reset this m_tt.reset(); QUICKPOLL ( m_r->m_niceness ); // assume not doing special docid splitting m_numDocIdSplits = 1; // . do not do splits if caller is already specifying a docid range // like for gbdocid: queries i guess. // . make sure m_msg2 is non-NULL, because if it is NULL we are // evaluating a query for a single docid for seo tools if ( m_r->m_minDocId == -1 ) { // && m_msg2 ) { long nt = m_tmpq.getNumTerms(); m_numDocIdSplits = nt / 2; if ( m_numDocIdSplits == 0 ) m_numDocIdSplits = 1; } //if ( ! g_conf.m_doDocIdRangeSplitting ) // m_numDocIdSplits = 1; // limit to 10 if ( m_numDocIdSplits > 10 ) m_numDocIdSplits = 10; // . if caller already specified a docid range, then be loyal to that! // . or if we do not have enough query terms to warrant splitting if ( m_numDocIdSplits == 1 ) { getLists(); return; } // . set up docid range cursor // . do twin splitting if ( m_r->m_stripe == 1 ) { m_ddd = MAX_DOCID / 2LL; m_dddEnd = MAX_DOCID + 1LL; } else if ( m_r->m_stripe == 0 ) { m_ddd = 0; m_dddEnd = MAX_DOCID / 2LL; } // support triplets, etc. later else { char *xx=NULL;*xx=0; } // do not do twin splitting if only one host per group if ( g_hostdb.getNumStripes() == 1 ) { m_ddd = 0; m_dddEnd = MAX_DOCID; } // . otherwise, to prevent oom, split up docids into ranges // and get winners of each range. if ( ! doDocIdSplitLoop() ) return; // error? if ( g_errno ) { log(LOG_LOGIC,"query: msg39: doDocIdSplitLoop: %s." , mstrerror(g_errno) ); sendReply ( m_slot , this , NULL , 0 , 0 , true ); return ; } // it might not have blocked! if all lists in tree and used no thread // it will come here after sending the reply and destroying "this" return; }
void sendReply ( void *state ) { StateStatsdb *st = (StateStatsdb *)state; if ( g_errno ) { g_httpServer.sendErrorReply(st->m_socket, 500,mstrerror(g_errno)); return; } TcpSocket *s = st->m_socket; SafeBuf buf( 1024*32 ); SafeBuf tmpBuf( 1024 ); // // take these out until we need them! // /* // print the top of the page tmpBuf.safePrintf( //"<style type=\"text/css\">" //"@import url(/styles/statsdb.css);</style>\n" "<script type=\"text/javascript\" " "src=\"/scripts/statsdb.js\"></script>\n" "<!-- DHTML Calendar -->" "<style type=\"text/css\">" "@import url(/jsc/calendar-win2k-1.css);" "</style>\n" "<script type=\"text/javascript\" " "src=\"/jsc/calendar.js\"></script>\n" "<script type=\"text/javascript\" " "src=\"/jsc/lang/calendar-en.js\"></script>\n" "<script type=\"text/javascript\" " "src=\"/jsc/calendar-setup.js\"></script>\n" ); */ // make the query string char qs[1024]; sprintf(qs,"&date_period=%li&date_units=%li&samples=%li", st->m_datePeriod, st->m_dateUnits, st->m_samples); // print standard header g_pages.printAdminTop ( &buf , st->m_socket , &st->m_request , qs ); buf.cat ( tmpBuf ); //g_pages.printAdminTop2 ( &buf , st->m_socket , &st->m_request, NULL , // tmpBuf.getBufStart(), tmpBuf.length() ); // write the controls section of the page writeControls( &buf, st ); // Debug print of CGI parameters and errors char startTimeStr[30]; char endTimeStr[30]; strncpy( startTimeStr, ctime( &st->m_startDate ), 30 ); strncpy( endTimeStr, ctime( &st->m_endDate ), 30 ); buf.safePrintf("<center>\n"); if ( ! g_conf.m_useStatsdb ) buf.safePrintf("<font color=red><b>Statsdb disabled. " "Turn on in the master controls.</b>" "</font>\n" ); buf.safePrintf("<table cellpadding=10 border=0>\n"); buf.safePrintf("<tr><td>" "<center>" "<img src=\"/stats%li.gif\" height=%li width=%li " "border=\"0px\">" "</center>" //"class=\"statsdb_image\">" "</td></tr>\n", st->m_hostId, g_statsdb.getImgHeight(), g_statsdb.getImgWidth()); // the map key buf.safePrintf("<tr><td>"); buf.cat ( st->m_sb2 ); buf.safePrintf("</td></tr>\n"); buf.safePrintf( "</table>\n" ); buf.safePrintf("</center>"); // print the bottom of the page g_pages.printAdminBottom2( &buf ); g_errno = 0; mdelete ( st, sizeof(StateStatsdb), "PageStatsdb" ); delete st; g_httpServer.sendDynamicPage ( s, buf.getBufStart(), buf.length() ); }
// close the least used of all the file descriptors. // we don't touch files opened for writing, however. bool File::closeLeastUsed () { int64_t min ; int mini = -1; int64_t now = gettimeofdayInMillisecondsLocal(); int32_t notopen = 0; int32_t writing = 0; int32_t unlinking = 0; int32_t young = 0; // get the least used of all the actively opened file descriptors. // we can't get files that were opened for writing!!! int i; for ( i = 0 ; i < MAX_NUM_FDS ; i++ ) { //if ( s_fds [ i ] < 0 ) continue; if ( ! s_open[i] ) { notopen++; continue; } // fds opened for writing are not candidates, because if // we close on a threaded write, that fd may be used to // re-open another file which gets garbled! if ( s_writing [ i ] ) { writing++; continue; } // do not close guys being unlinked they are in the middle // of being closed ALREADY in close1_r(). There should only be // like one unlink thread allowed to be active at a time so we // don't have to worry about it hogging all the fds. if ( s_unlinking [ i ] ) { unlinking++; continue; } // when we got like 1000 reads queued up, it uses a *lot* of // memory and we can end up never being able to complete a // read because the descriptors are always getting closed on us // so do a hack fix and do not close descriptors that are // about .5 seconds old on avg. if ( s_timestamps [ i ] == now ) { young++; continue; } if ( s_timestamps [ i ] == now - 1 ) { young++; continue; } if ( mini == -1 || s_timestamps [ i ] < min ) { min = s_timestamps [ i ]; mini = i; } } /* // use the new linked list of active file descriptors // . file at tail is the most active File *f = s_activeHead; // if nothing to do return true //if ( ! f ) return true; int32_t mini2 = -1; // close the head if not writing for ( ; f ; f = f->m_nextActive ) { mini2 = f->m_vfd; // how can this be? if ( s_fds [ mini2 ] < 0 ) { char *xx=NULL;*xx=0; } if ( s_writing [ mini2 ] ) continue; if ( s_unlinking [ mini2 ] ) continue; // when we got like 1000 reads queued up, it uses a *lot* of // memory and we can end up never being able to complete a // read because the descriptors are always getting closed on us // so do a hack fix and do not close descriptors that are // about .5 seconds old on avg. if ( s_timestamps [ mini2 ] >= now - 1000 ) continue; break; } // debug why it doesn't work right if ( mini != mini2 ) { int fd1 = -1; int fd2 = -1; if ( mini >= 0 ) fd1 = s_fds[mini]; if ( mini2 >= 0 ) fd2 = s_fds[mini2]; int32_t age = now - s_timestamps[mini] ; log("File: linkedlistfd=%i != rightfd=%i agems=%i",fd1,fd2, (int)age); } */ // if nothing to free then return false if ( mini == -1 ) return log("File: closeLeastUsed: failed. All %"INT32" " "descriptors " "are unavailable to be closed and re-used to read " "from another file. notopen=%i writing=%i " "unlinking=%i young=%i" ,(int32_t)s_maxNumOpenFiles ,notopen ,writing ,unlinking ,young ); int fd = mini; // always block on close //int fd = s_fds[mini]; int flags = fcntl ( fd , F_GETFL ) ; // turn off these 2 flags on fd to make sure flags &= ~( O_NONBLOCK | O_ASYNC ); retry27: // return false on error if ( fcntl ( fd, F_SETFL, flags ) < 0 ) { // valgrind if ( errno == EINTR ) goto retry27; //char *xx = NULL; *xx = 1; log("disk: fcntl(%i): %s",fd,mstrerror(errno)); // return false; errno = 0; } // . tally up another close for this fd, if any // . so if an open happens shortly here after, and // gets this fd, then any read that was started // before that open will know it! //s_closeCounts [ fd ]++; // otherwise we gotta really close it again: if ( fd == 0 ) log("disk: closing3 fd of 0"); int status = ::close ( fd ); if ( status == -1 && errno == EINTR ) goto again; // -1 means can be reopened because File::close() wasn't called. // we're just conserving file descriptors //s_fds [ mini ] = -1; // if the real close was successful then decrement the # of open files if ( status == 0 ) { // it's not open s_open [ fd ] = 0; // if someone is trying to read on this let them know s_closeCounts [ fd ]++; s_numOpenFiles--; File *f = s_filePtrs [ fd ]; // don't let him use the stolen fd f->m_fd = -1 ; // debug msg if ( g_conf.m_logDebugDisk ) { File *f = s_filePtrs [ fd ]; char *fname = ""; if ( f ) fname = f->getFilename(); logf(LOG_DEBUG,"disk: force closed fd %i for" " %s. age=%"INT64" #openfiles=%i this=0x%"PTRFMT, fd,fname,now-s_timestamps[mini], (int)s_numOpenFiles, (PTRTYPE)this); } // no longer the owner s_filePtrs [ fd ] = NULL; // excise from linked list of active files //rmFileFromLinkedList ( f ); // getfd() may not execute in time to ince the closeCount // so do it here. test by setting the max open files to like // 10 or so and spidering heavily. //s_closeCounts [ fd ]++; } if ( status == -1 ) return log("disk: close(%i) : %s", fd , strerror(errno)); if ( g_conf.m_logDebugDisk ) sanityCheck(); return true; }
// . return false if blocked, true otherwise // . sets g_errno on error bool Msg1::sendData ( uint32_t shardNum, char *listData , int32_t listSize) { // debug msg //log("sendData: mcast=%" PRIu32" listSize=%" PRId32, // (int32_t)&m_mcast,(int32_t)listSize); // bail if this is an interface machine, don't write to the main if ( g_conf.m_interfaceMachine ) return true; // return true if no data if ( listSize == 0 ) return true; // how many hosts in this group //int32_t numHosts = g_hostdb.getNumHostsPerShard(); // . NOTE: for now i'm removing this until I handle ETRYAGAIN errors // properly... by waiting and retrying... // . if this is local data just for us just do an addList to OUR rdb /* if ( groupId == g_hostdb.m_groupId && numHosts == 1 ) { // this sets g_errno on error Msg0 msg0; Rdb *rdb = msg0.getRdb ( (char) m_rdbId ); if ( ! rdb ) return true; // make a list from this data RdbList list; list.set (listData,listSize,listSize,rdb->getFixedDataSize(), false) ; // ownData? // this returns false and sets g_errno on error rdb->addList ( &list ); // . if we got a ETRYAGAIN cuz the buffer we add to was full // then we should sleep and try again! // . return false cuz this blocks for a period of time // before trying again if ( g_errno == ETRYAGAIN ) { // try adding again in 1 second registerSleepCallback ( 1000, slot, tryAgainWrapper1 ); // return now return false; } // . always return true cuz we did not block // . g_errno may be set return true; } */ // if the data is being added to our group, don't send ourselves // a msg1, if we can add it right now // MDW: crap this is getting ETRYAGAIN and it isn't being tried again // i guess and Spider.cpp fails to add to doledb but the doleiptable // maintains a positive count, thereby hanging the spiders. let's // just always go through multicast so it will auto-retry ETRYAGAIN /* bool sendToSelf = true; if ( shardNum == getMyShardNum() && ! g_conf.m_interfaceMachine ) { // get the rdb to which it belongs, use Msg0::getRdb() Rdb *rdb = getRdbFromId ( (char) m_rdbId ); if ( ! rdb ) goto skip; // key size int32_t ks = getKeySizeFromRdbId ( m_rdbId ); // reset g_errno g_errno = 0; // . make a list from this data // . skip over the first 4 bytes which is the rdbId // . TODO: embed the rdbId in the msgtype or something... RdbList list; // set the list list.set ( listData , listSize , listData , listSize , rdb->getFixedDataSize() , false , // ownData? rdb->useHalfKeys() , ks ); // note that //log("msg1: local addlist niceness=%" PRId32,m_niceness); // this returns false and sets g_errno on error rdb->addList ( m_coll , &list , m_niceness ); // if titledb, add tfndb recs to map the title recs //if ( ! g_errno && rdb == g_titledb.getRdb() && m_injecting ) // // this returns false and sets g_errno on error // updateTfndb ( m_coll , &list , true , m_niceness); // if no error, no need to use a Msg1 UdpSlot for ourselves if ( ! g_errno ) sendToSelf = false; else { log("rdb: msg1 coll=%s rdb=%s had error: %s", m_coll,rdb->m_dbname,mstrerror(g_errno)); // this is messing up generate catdb's huge rdblist add // why did we put it in there??? from msg9b.cpp //return true; } QUICKPOLL(m_niceness); // if we're the only one in the group, bail, we're done if ( ! sendToSelf && g_hostdb.getNumHostsPerShard() == 1 ) return true; } skip: */ // . make an add record request to multicast to a bunch of machines // . this will alloc new space, returns NULL on failure //char *request = makeRequest ( listData, listSize, groupId , //m_rdbId , &requestLen ); //int32_t collLen = strlen ( m_coll ); // . returns NULL and sets g_errno on error // . calculate total size of the record // . 1 byte for rdbId, 1 byte for flags, // then collection NULL terminated, then list int32_t requestLen = 1 + 1 + sizeof(collnum_t) + listSize ; // make the request char *request = (char *) mmalloc ( requestLen ,"Msg1" ); if ( ! request ) return true; char *p = request; // store the rdbId at top of request *p++ = m_rdbId; // then the flags *p = 0; if ( m_injecting ) *p |= 0x80; p++; // then collection name //gbmemcpy ( p , m_coll , collLen ); //p += collLen; //*p++ = '\0'; *(collnum_t *)p = m_collnum; p += sizeof(collnum_t); // sanity check //if ( collLen <= 0 ) { // log(LOG_LOGIC,"net: No collection specified for list add."); // //g_process.shutdownAbort(true); // g_errno = ENOCOLLREC; // return true; //} //if ( m_deleteRecs ) request[1] |= 0x80; //if ( m_overwriteRecs ) request[1] |= 0x40; // store the list after coll gbmemcpy ( p , listData , listSize ); QUICKPOLL(m_niceness); // for small packets //int32_t niceness = 2; //if ( requestLen < TMPBUFSIZE - 32 ) niceness = 0; //log("msg1: sending mcast niceness=%" PRId32,m_niceness); // . multicast to all hosts in group "groupId" // . multicast::send() returns false and sets g_errno on error // . we return false if we block, true otherwise // . will loop indefinitely if a host in this group is down key_t k; k.setMin(); if ( m_mcast.send ( request , // sets mcast->m_msg to this requestLen , // sets mcast->m_msgLen to this msg_type_1 , true , // does multicast own msg? shardNum , // group to send to (groupKey) true , // send to whole group? 0 , // key is useless for us this , // state data NULL , // state data gotReplyWrapper1 , multicast_msg1_senddata_timeout , // timeout m_niceness , // niceness -1 , // first host to try NULL , // replyBuf = NULL , 0 , // replyBufMaxSize = 0 , true , // freeReplyBuf = true , false , // doDiskLoadBalancing = false , -1 , // no max cache age limit //(key_t)0 , // cache key k , // cache key RDB_NONE , // bogus rdbId -1 , // unknown minRecSizes read size true )) // sendToSelf )) return false; QUICKPOLL(m_niceness); // g_errno should be set log("net: Had error when sending request to add data to %s in shard " "#%" PRIu32": %s.", getDbnameFromId(m_rdbId),shardNum,mstrerror(g_errno)); return true; }
// . reply to a request for an RdbList // . MUST call g_udpServer::sendReply or sendErrorReply() so slot can // be destroyed void handleRequest0 ( UdpSlot *slot , long netnice ) { // if niceness is 0, use the higher priority udpServer UdpServer *us = &g_udpServer; //if ( netnice == 0 ) us = &g_udpServer2; // get the request char *request = slot->m_readBuf; long requestSize = slot->m_readBufSize; // collection is now stored in the request, so i commented this out //if ( requestSize != MSG0_REQ_SIZE ) { // log("net: Received bad data request size of %li bytes. " // "Should be %li.", requestSize ,(long)MSG0_REQ_SIZE); // us->sendErrorReply ( slot , EBADREQUESTSIZE ); // return; //} // parse the request char *p = request; long long syncPoint = *(long long *)p ; p += 8; //key_t startKey = *(key_t *)p ; p += sizeof(key_t); //key_t endKey = *(key_t *)p ; p += sizeof(key_t); long minRecSizes = *(long *)p ; p += 4; long startFileNum = *(long *)p ; p += 4; long numFiles = *(long *)p ; p += 4; long maxCacheAge = *(long *)p ; p += 4; char rdbId = *p++; char addToCache = *p++; char doErrorCorrection = *p++; char includeTree = *p++; // this was messing up our niceness conversion logic long niceness = slot->m_niceness;//(long)(*p++); // still need to skip it though! p++; bool allowPageCache = (bool)(*p++); char ks = getKeySizeFromRdbId ( rdbId ); char *startKey = p; p+=ks; char *endKey = p; p+=ks; // then null terminated collection char *coll = p; // error set from XmlDoc::cacheTermLists()? if ( g_errno ) { us->sendErrorReply ( slot , EBADRDBID ); return;} // is this being called from callWaitingHandlers() //bool isRecall = (netnice == 99); // . get the rdb we need to get the RdbList from // . returns NULL and sets g_errno on error //Msg0 msg0; //Rdb *rdb = msg0.getRdb ( rdbId ); Rdb *rdb = getRdbFromId ( rdbId ); if ( ! rdb ) { us->sendErrorReply ( slot , EBADRDBID ); return;} // keep track of stats rdb->readRequestGet ( requestSize ); /* // keep track of stats if ( ! isRecall ) rdb->readRequestGet ( requestSize ); long long singleDocId2 = 0LL; if ( rdbId == RDB_POSDB && maxCacheAge ) { long long d1 = g_posdb.getDocId(startKey); long long d2 = g_posdb.getDocId(endKey); if ( d1+1 == d2 ) singleDocId2 = d1; } // have we parsed this docid and cached its termlists? bool shouldBeCached2 = false; if ( singleDocId2 && isDocIdInTermListCache ( singleDocId2 , coll ) ) shouldBeCached2 = true; // if in the termlist cache, send it back right away char *trec; long trecSize; if ( singleDocId2 && getRecFromTermListCache(coll, startKey, endKey, maxCacheAge, &trec, &trecSize) ) { // if in cache send it back! us->sendReply_ass(trec,trecSize,trec,trecSize,slot); return; } // if should be cached but was not found then it's probably a // synonym form not in the doc content. make an empty list then. if ( shouldBeCached2 ) { // send back an empty termlist us->sendReply_ass(NULL,0,NULL,0,slot); return; } // MUST be in termlist cache! if not in there it is a probably // a synonym term termlist of a word in the doc. if ( isRecall ) { // send back an empty termlist us->sendReply_ass(NULL,0,NULL,0,slot); return; } // init waiting table? static bool s_waitInit = false; if ( ! s_waitInit ) { // do not repeat s_waitInit = true; // niceness = 0 if ( ! g_waitingTable.set(8,4,2048,NULL,0,true,0,"m5wtbl")){ log("msg5: failed to init waiting table"); // error kills us! us->sendErrorReply ( slot , EBADRDBID ); return; } } // wait in waiting table? if ( singleDocId2 && g_waitingTable.isInTable ( &singleDocId2 ) ) { g_waitingTable.addKey ( &singleDocId2 , &slot ); return; } // if it's for a special gbdocid: query then cache ALL termlists // for this docid into g_termListCache right now if ( singleDocId2 ) { // have all further incoming requests for this docid // wait in the waiting table g_waitingTable.addKey ( &singleDocId2 , &slot ); // load the title rec and store its posdb termlists in cache XmlDoc *xd; try { xd = new ( XmlDoc ); } catch ( ... ) { g_errno = ENOMEM; us->sendErrorReply ( slot , g_errno ); return; } mnew ( xd, sizeof(XmlDoc),"msg0xd"); // always use niceness 1 now even though we use niceness 0 // to make the cache hits fast //niceness = 1; // . load the old title rec first and just recycle all // . typically there might be a few hundred related docids // each with 50,000 matching queries on average to evaluate // with the gbdocid:xxxx| restriction? if ( ! xd->set3 ( singleDocId2 , coll , niceness ) ) { us->sendErrorReply ( slot , g_errno ); return;} // init the new xmldoc xd->m_callback1 = callWaitingHandlers; xd->m_state = xd; // . if this blocks then return // . should call loadOldTitleRec() and get JUST the posdb recs // by setting m_useTitledb, etc. to false. then it should // make posdb termlists with the compression using // RdbList::addRecord() and add those lists to // g_termListCache if ( ! xd->cacheTermLists ( ) ) return; // otherwise, it completed right away! callWaitingHandlers ( xd ); return; } */ /* // init special sectiondb cache? if ( rdbId == RDB_SECTIONDB && ! s_initCache ) { // try to init cache if ( ! s_sectiondbCache.init ( 20000000 , // 20MB max mem -1 , // fixed data size false , // support lists? 20000 , // 20k max recs false , // use half keys? "secdbche", // dbname false, // load from disk? sizeof(key128_t), //cachekeysize 0 , // data key size 20000 )) // numPtrs max log("msg0: failed to init sectiondb cache: %s", mstrerror(g_errno)); else s_initCache = true; } // check the sectiondb cache if ( rdbId == RDB_SECTIONDB ) { //long long sh48 = g_datedb.getTermId((key128_t *)startKey); // use the start key now!!! char *data; long dataSize; if (s_sectiondbCache.getRecord ( coll, startKey,//&sh48, &data, &dataSize, true, // docopy? 600, // maxage (10 mins) true, // inc counts? NULL, // cachedtime true // promoteRec? )){ // debug //log("msg0: got sectiondblist in cache datasize=%li", // dataSize); // send that back g_udpServer.sendReply_ass ( data , dataSize , data , dataSize , slot , 60 , NULL , doneSending_ass , -1 , -1 , true ); return; } } */ // . do a local get // . create a msg5 to get the list State00 *st0 ; try { st0 = new (State00); } catch ( ... ) { g_errno = ENOMEM; log("Msg0: new(%i): %s", sizeof(State00),mstrerror(g_errno)); us->sendErrorReply ( slot , g_errno ); return; } mnew ( st0 , sizeof(State00) , "State00" ); // timing debug if ( g_conf.m_logTimingNet ) st0->m_startTime = gettimeofdayInMilliseconds(); // save slot in state st0->m_slot = slot; // save udp server to send back reply on st0->m_us = us; // init this one st0->m_niceness = niceness; st0->m_rdbId = rdbId; QUICKPOLL(niceness); // debug msg if ( maxCacheAge != 0 && ! addToCache ) log(LOG_LOGIC,"net: msg0: check but don't add... rdbid=%li.", (long)rdbId); // . if this request came over on the high priority udp server // make sure the priority gets passed along // . return if this blocks // . we'll call sendReply later if ( ! st0->m_msg5.getList ( rdbId , coll , &st0->m_list , startKey , endKey , minRecSizes , includeTree , // include tree? addToCache , // addToCache? maxCacheAge , startFileNum , numFiles , st0 , gotListWrapper , niceness , doErrorCorrection , NULL , // cacheKeyPtr 0 , // retryNum 2 , // maxRetries true , // compensateForMerge syncPoint , &st0->m_msg5b , false, allowPageCache ) ) return; // call wrapper ouselves gotListWrapper ( st0 , NULL , NULL ); }
// g_errno may be set when this is called void addedList ( UdpSlot *slot , Rdb *rdb ) { // no memory means to try again if ( g_errno == ENOMEM ) g_errno = ETRYAGAIN; // doing a full rebuid will add collections if ( g_errno == ENOCOLLREC && g_repairMode > 0 ) //g_repair.m_fullRebuild ) g_errno = ETRYAGAIN; // it seems like someone can delete a collection and there can // be adds in transit to doledb and it logs // "doledb bad collnum of 30110" // so just absorb those if ( g_errno == ENOCOLLREC ) { log("msg1: missing collrec to add to to %s. just dropping.", rdb->m_dbname); g_errno = 0; } // . if we got a ETRYAGAIN cuz the buffer we add to was full // then we should sleep and try again! // . return false cuz this blocks for a period of time // before trying again // . but now to free the udp slot when we are doing an urgent merge // let's send an error back! //if ( g_errno == ETRYAGAIN ) { // debug msg //log("REGISTERING SLEEP CALLBACK"); // try adding again in 1 second // g_loop.registerSleepCallback ( 1000, slot, tryAgainWrapper ); // return now // return; //} // random test //if ( (rand() % 10) == 1 ) g_errno = ETRYAGAIN; //int32_t niceness = slot->getNiceness() ; // select udp server based on niceness UdpServer *us = &g_udpServer ; //if ( niceness == 0 ) us = &g_udpServer2; //else us = &g_udpServer ; // chalk it up rdb->sentReplyAdd ( 0 ); // are we done if ( ! g_errno ) { // . send an empty (non-error) reply as verification // . slot should be auto-nuked on transmission/timeout of reply // . udpServer should free the readBuf us->sendReply_ass ( NULL , 0 , NULL , 0 , slot ) ; return; } // on other errors just send the err code back log(LOG_ERROR,"%s:%s:%d: call sendErrorReply. error=%s", __FILE__, __func__, __LINE__, mstrerror(g_errno)); us->sendErrorReply ( slot , g_errno ); }
// . return false if blocked, true otherwise // . set g_errno on error // . read list of keys in [startKey,endKey] range // . read at least "minRecSizes" bytes of keys in that range // . the "m_endKey" of resulting, merged list may have a smaller endKey // than the argument, "endKey" due to limitation by "minRecSizes" // . resulting list will contain ALL keys between ITS [m_startKey,m_endKey] // . final merged list "should" try to have a size of at least "minRecSizes" // but due to negative/postive rec elimination may be less // . the endKey of the lists we read may be <= "endKey" provided // . we try to shrink the endKey if minRecSizes is >= 0 in order to // avoid excessive reading // . by shrinking the endKey we cannot take into account the size of deleted // records, so therefore we may fall short of "minRecSizes" in actuality, // in fact, the returned list may even be empty with a shrunken endKey // . we merge all lists read from disk into the provided "list" // . caller should call Msg3.getList(long i) and Msg3:getNumLists() to retrieve // . this makes the query engine faster since we don't need to merge the docIds // and can just send them across the network separately and they will be // hashed into IndexTable's table w/o having to do time-wasting merging. // . caller can specify array of filenums to read from so incremental syncing // in Sync class can just read from titledb*.dat files that were formed // since the last sync point. bool Msg3::readList ( char rdbId , char *coll , //key_t startKey , //key_t endKey , char *startKeyArg , char *endKeyArg , long minRecSizes , // max size of scan long startFileNum , // first file to scan long numFiles , // rel. to startFileNum void *state , // for callback void (* callback ) ( void *state ) , long niceness , long retryNum , long maxRetries , bool compensateForMerge , long long syncPoint , bool justGetEndKey , bool allowPageCache , bool hitDisk ) { // clear, this MUST be done so if we return true g_errno is correct g_errno = 0; // assume lists are not checked for corruption m_listsChecked = false; // warn if ( minRecSizes < -1 ) { log(LOG_LOGIC,"db: Msg3 got minRecSizes of %li, changing " "to -1.",minRecSizes); minRecSizes = -1; } // reset m_alloc and data in all lists in case we are a re-call reset(); // warning if ( ! coll ) log(LOG_LOGIC,"net: NULL collection. msg3."); // remember the callback m_rdbId = rdbId; m_coll = coll; m_callback = callback; m_state = state; m_niceness = niceness; m_numScansCompleted = 0; m_retryNum = retryNum; m_maxRetries = maxRetries; m_compensateForMerge = compensateForMerge; m_allowPageCache = allowPageCache; m_hitDisk = hitDisk; m_hadCorruption = false; // get keySize of rdb m_ks = getKeySizeFromRdbId ( m_rdbId ); // reset the group error m_errno = 0; // . reset all our lists // . these are reset in call the RdbScan::setRead() below //for ( long i = 0 ; i < MAX_RDB_FILES ; i++ ) m_lists[i].reset(); // . ensure startKey last bit clear, endKey last bit set // . no! this warning is now only in Msg5 // . if RdbMerge is merging some files, not involving the root // file, then we can expect to get a lot of unmatched negative recs. // . as a consequence, our endKeys may often be negative. This means // it may not annihilate with the positive key, but we should only // miss like this at the boundaries of the lists we fetch. // . so in that case RdbList::merge will stop merging once the // minRecSizes limit is reached even if it means ending on a negative // rec key //if ( (startKey.n0 & 0x01) == 0x01 ) if ( !KEYNEG(startKeyArg) ) log(LOG_REMIND,"net: msg3: StartKey lastbit set."); if ( KEYNEG(endKeyArg) ) log(LOG_REMIND,"net: msg3: EndKey lastbit clear."); // declare vars here becaues of 'goto skip' below long mergeFileNum = -1 ; long max ; // get base, returns NULL and sets g_errno to ENOCOLLREC on error RdbBase *base; if (!(base=getRdbBase(m_rdbId,m_coll))) return true; // if caller specified exactly /* m_syncPoint = syncPoint; if ( syncPoint != -1 && syncPoint != 0 ) { // . store them all // . what if we merged one of these files (or are merging)??? // . then sync class should not discard syncpoints until no // longer syncing and we'll know about it // . this should compensate for merges by including any files // that are merging a file in m_fileNums m_numFileNums = g_sync.getFileNums ( m_rdbId , m_coll , m_syncPoint , m_fileNums , MAX_RDB_FILES ); log("NOOOOOO. we do not alloc if we go to skip!!"); char *xx = NULL; *xx = 0; // bring back the comment below... i removed it because i added // "long chunk" et al below and didn't want to move them. //if ( m_numFileNums > 0 ) goto skip; log("net: Trying to read data in %s from files generated after" " a sync point %llu in \"sync\" file, but none found.", base->m_dbname,m_syncPoint); return true; } // should we read all? if ( m_syncPoint == 0 ) { numFiles = -1; startFileNum = 0; } */ // store the file numbers in the array, these are the files we read m_numFileNums = 0; // save startFileNum here, just for recall m_startFileNum = startFileNum; m_numFiles = numFiles; // . if we have a merge going on, we may have to change startFileNum // . if some files get unlinked because merge completes then our // reads will detect the error and loop back here // . we launch are reads right after this without giving up the cpu // and we use file descriptors, so any changes to Rdb::m_files[] // should not hurt us // . WARNING: just make sure you don't lose control of cpu until after // you call RdbScan::set() // . we use hasMergeFile() instead of isMerging() because he may not // be merging cuz he got suspended or he restarted and // hasn't called attemptMerge() yet, but he may still contain it if ( g_conf.m_logDebugQuery ) log(LOG_DEBUG, "net: msg3: " "c=%li hmf=%li sfn=%li msfn=%li nf=%li db=%s.", (long)compensateForMerge,(long)base->hasMergeFile(), (long)startFileNum,(long)base->m_mergeStartFileNum-1, (long)numFiles,base->m_dbname); long pre = -10; if ( compensateForMerge && base->hasMergeFile() && startFileNum >= base->m_mergeStartFileNum - 1 && (startFileNum > 0 || numFiles != -1) ) { // now also include the file being merged into, but only // if we are reading from a file being merged... if ( startFileNum < base->m_mergeStartFileNum + base->m_numFilesToMerge - 1 ) //m_fileNums [ m_numFileNums++ ] = // base->m_mergeStartFileNum - 1; pre = base->m_mergeStartFileNum - 1; // debug msg if ( g_conf.m_logDebugQuery ) log(LOG_DEBUG, "net: msg3: startFileNum from %li to %li (mfn=%li)", startFileNum,startFileNum+1,mergeFileNum); // if merge file was inserted before us, inc our file number startFileNum++; } // adjust num files if we need to, as well if ( compensateForMerge && base->hasMergeFile() && startFileNum < base->m_mergeStartFileNum - 1 && numFiles != -1 && startFileNum + numFiles - 1 >= base->m_mergeStartFileNum - 1 ) { // debug msg if ( g_conf.m_logDebugQuery ) log(LOG_DEBUG,"net: msg3: numFiles up one."); // if merge file was inserted before us, inc our file number numFiles++; } // . how many rdb files does this base have? // . IMPORTANT: this can change since files are unstable because they // might have all got merged into one! // . so do this check to make sure we're safe... especially if // there was an error before and we called readList() on ourselves max = base->getNumFiles(); // -1 means we should scan ALL the files in the base if ( numFiles == -1 ) numFiles = max; // limit it by startFileNum, however if ( numFiles > max - startFileNum ) numFiles = max - startFileNum; // set g_errno and return true if it is < 0 if ( numFiles < 0 ) { log(LOG_LOGIC, "net: msg3: readList: numFiles = %li < 0 (max=%li)(sf=%li)", numFiles , max , startFileNum ); g_errno = EBADENGINEER; // force core dump //char *p = NULL; //*p = 0; return true; } // . allocate buffer space // . m_scans, m_startpg, m_endpg, m_hintKeys, m_hintOffsets, // m_fileNums, m_lists, m_tfns long chunk = sizeof(RdbScan) + // m_scans 4 + // m_startpg 4 + // m_endpg //sizeof(key_t) + // m_hintKeys m_ks + // m_hintKeys 4 + // m_hintOffsets 4 + // m_fileNums sizeof(RdbList) + // m_lists 4 ; // m_tfns long nn = numFiles; if ( pre != -10 ) nn++; m_numChunks = nn; long need = nn * (chunk); m_alloc = m_buf; if ( need > (long)MSG3_BUF_SIZE ) { m_allocSize = need; m_alloc = (char *)mcalloc ( need , "Msg3" ); if ( ! m_alloc ) { log("disk: Could not allocate %li bytes read " "structures to read %s.",need,base->m_dbname); return true; } } char *p = m_alloc; m_scans = (RdbScan *)p; p += nn * sizeof(RdbScan); m_startpg = (long *)p; p += nn * 4; m_endpg = (long *)p; p += nn * 4; //m_hintKeys = (key_t *)p; p += nn * sizeof(key_t); m_hintKeys = (char *)p; p += nn * m_ks; m_hintOffsets = (long *)p; p += nn * 4; m_fileNums = (long *)p; p += nn * 4; m_lists = (RdbList *)p; p += nn * sizeof(RdbList); m_tfns = (long *)p; p += nn * 4; // sanity check if ( p - m_alloc != need ) { log(LOG_LOGIC,"disk: Bad malloc in Msg3.cpp."); char *xx = NULL; *xx = 0; } // call constructors for ( long i = 0 ; i < nn ; i++ ) m_lists[i].constructor(); // make fix from up top if ( pre != -10 ) m_fileNums [ m_numFileNums++ ] = pre; // store them all for ( long i = startFileNum ; i < startFileNum + numFiles ; i++ ) m_fileNums [ m_numFileNums++ ] = i; // we skip down to here when a syncPoint was used to set the // m_fileNums/m_numFileNums array of files to read from // JAB: warning abatement // skip: // . remove file nums that are being unlinked after a merge now // . keep it here (below skip: label) so sync point reads can use it long n = 0; for ( long i = 0 ; i < m_numFileNums ; i++ ) { // skip those that are being unlinked after the merge if ( base->m_isUnlinking && m_fileNums[i] >= base->m_mergeStartFileNum && m_fileNums[i] < base->m_mergeStartFileNum + base->m_numFilesToMerge ) continue; // otherwise, keep it m_fileNums[n++] = m_fileNums[i]; } m_numFileNums = n; // . if root file is being merged, he's file #0, & root file is file #1 // . this is a hack so caller gets what he wants //if ( startFileNum == 0 && base->getFileId(0) == 0 && numFiles == 1 ) // numFiles = 2; // remember the file range we should scan m_numScansStarted = 0; m_numScansCompleted = 0; //m_startKey = startKey; //m_endKey = endKey; //m_constrainKey = endKey; // set in case justGetEndKey is true KEYSET(m_startKey,startKeyArg,m_ks); KEYSET(m_endKey,endKeyArg,m_ks); KEYSET(m_constrainKey,endKeyArg,m_ks);//set incase justGetEndKey istrue m_minRecSizes = minRecSizes; m_compensateForMerge = compensateForMerge; // bail if 0 files to scan -- no! need to set startKey/endKey if ( numFiles == 0 ) return true; // don't read anything if endKey < startKey //if ( m_startKey > m_endKey ) return true; if ( KEYCMP(m_startKey,m_endKey,m_ks)>0 ) return true; // keep the original in tact in case g_errno == ETRYAGAIN //m_endKeyOrig = endKey; KEYSET(m_endKeyOrig,endKeyArg,m_ks); m_minRecSizesOrig = minRecSizes; // start reading at this key m_fileStartKey = startKeyArg; // start the timer, keep it fast for clusterdb though if ( g_conf.m_logTimingDb ) m_startTime = gettimeofdayInMilliseconds(); // translate base to an id, for the sake of m_msg0 //char baseId = m_msg0->getRdbId ( base ); // map ptrs RdbMap **maps = base->getMaps(); // . we now boost m_minRecSizes to account for negative recs // . but not if only reading one list, cuz it won't get merged and // it will be too big to send back if ( m_numFileNums > 1 ) compensateForNegativeRecs ( base ); // . often endKey is too big for an efficient read of minRecSizes bytes // because we end up reading too much from all the files // . this will set m_startpg[i], m_endpg[i] for each RdbScan/RdbFile // to ensure we read "minRecSizes" worth of records, not much more // . returns the new endKey for all ranges // . now this just overwrites m_endKey //m_endKey = setPageRanges ( base , setPageRanges ( base , m_fileNums , m_numFileNums , m_fileStartKey , // start reading @ key m_endKey , // stop reading @ key m_minRecSizes ); // . NEVER let m_endKey be a negative key, because it will // always be unmatched, since delbit is cleared // . adjusting it here ensures our generated hints are valid // . we will use this key to call constrain() with //m_constrainKey = m_endKey; //if ( ( m_constrainKey.n0 & 0x01) == 0x00 ) // m_constrainKey -= (unsigned long)1; KEYSET(m_constrainKey,m_endKey,m_ks); if ( KEYNEG(m_constrainKey) ) KEYSUB(m_constrainKey,1,m_ks); // if m_endKey splits some keys that should be together, we need to // decrease it so such a split doesn't happen. //if ( m_endKey != m_endKeyOrig && m_rdbId==RDB_TFNDB && numFiles > 0){ /* if ( KEYCMP(m_endKey,m_endKeyOrig,m_ks)!=0 && m_rdbId==RDB_TFNDB && numFiles > 0 ) { // . drop the docid down one and max out the tfn... // . we may lose some recs when we call constrain, but at least // we are guaranteed not to split a sequence with the same // docid but different tfns... thus the disk merge will // then work correctly. before we were splitting these // sequence between successive disk reads and they were not // getting annihilated together in the call to indexMerge_r() long long d = g_tfndb.getDocId ( (key_t *)&m_endKey ); if ( d > 0 ) d = d - 1LL; //m_constrainKey = g_tfndb.makeMaxKey(d); *(key_t *)m_constrainKey = g_tfndb.makeMaxKey(d); // set the half bit on //m_constrainKey.n0 |= 0x02; *m_constrainKey |= 0x02; // note it //logf(LOG_DEBUG,"oldukey.n1=%lx n0=%llx new.n1=%lx n0=%llx", // m_endKey.n1,m_endKey.n0, // m_constrainKey.n1,m_constrainKey.n0); } */ // Msg5 likes to get the endkey for getting the list from the tree if ( justGetEndKey ) return true; // sanity check if ( m_numFileNums > nn ) { log(LOG_LOGIC,"disk: Failed sanity check in Msg3."); char *xx = NULL; *xx = 0; } // debug msg //log("msg3 getting list (msg5=%lu)",m_state); // . MDW removed this -- go ahead an end on a delete key // . RdbMerge might not pick it up this round, but oh well // . so we can have both positive and negative co-existing in same file // make sure the last bit is set so we don't end on a delete key //m_endKey.n0 |= 0x01LL; // . now start reading/scanning the files // . our m_scans array starts at 0 for ( long i = 0 ; i < m_numFileNums ; i++ ) { // get the page range //long p1 = m_startpg [ i ]; //long p2 = m_endpg [ i ]; //#ifdef _SANITYCHECK_ long fn = m_fileNums[i]; // this can happen somehow! if ( fn < 0 ) { log(LOG_LOGIC,"net: msg3: fn=%li. Bad engineer.",fn); continue; } // sanity check if ( i > 0 && m_fileNums[i-1] >= fn ) { log(LOG_LOGIC, "net: msg3: files must be read in order " "from oldest to newest so RdbList::indexMerge_r " "works properly. Otherwise, corruption will " "result. "); char *xx = NULL; *xx = 0; return true; } // . sanity check? // . no, we must get again since we turn on endKey's last bit long p1 , p2; maps[fn]->getPageRange ( m_fileStartKey , m_endKey , &p1 , &p2 , NULL ); //if ( p1 != p1c || p2 != p2c ) { // fprintf(stderr,"Msg3::bad page range\n"); // sleep(50000); //} // sanity check, each endpg's key should be > endKey //if ( p2 < maps[fn]->getNumPages() && // maps[fn]->getKey ( p2 ) <= m_endKey ) { // fprintf(stderr,"Msg3::bad page range 2\n"); // sleep(50000); //} //#endif //long p1 , p2; //maps[fn]->getPageRange (startKey,endKey,minRecSizes,&p1,&p2); // now get some read info long long offset = maps[fn]->getAbsoluteOffset ( p1 ); long bytesToRead = maps[fn]->getRecSizes ( p1, p2, false); // max out the endkey for this list // debug msg //#ifdef _DEBUG_ //if ( minRecSizes == 2000000 ) //log("Msg3:: reading %li bytes from file #%li",bytesToRead,i); //#endif // inc our m_numScans m_numScansStarted++; // . keep stats on our disk accesses // . count disk seeks (assuming no fragmentation) // . count disk bytes read if ( bytesToRead > 0 ) { base->m_rdb->didSeek ( ); base->m_rdb->didRead ( bytesToRead ); } // . the startKey may be different for each RdbScan class // . RdbLists must have all keys within their [startKey,endKey] // . therefore set startKey individually from first page in map // . this endKey must be >= m_endKey // . this startKey must be < m_startKey //key_t startKey = maps[fn]->getKey ( p1 ); //key_t endKey = maps[fn]->getKey ( p2 ); char startKey2 [ MAX_KEY_BYTES ]; char endKey2 [ MAX_KEY_BYTES ]; maps[fn]->getKey ( p1 , startKey2 ); maps[fn]->getKey ( p2 , endKey2 ); //char *startKey = maps[fn]->getKeyPtr ( p1 ); //char *endKey = maps[fn]->getKeyPtr ( p2 ); // store in here m_startpg [ i ] = p1; m_endpg [ i ] = p2; // . we read UP TO that endKey, so reduce by 1 // . but iff p2 is NOT the last page in the map/file // . maps[fn]->getKey(lastPage) will return the LAST KEY // and maps[fn]->getOffset(lastPage) the length of the file //if ( maps[fn]->getNumPages()!=p2) endKey -=(unsigned long)1; if ( maps[fn]->getNumPages() != p2 ) KEYSUB(endKey2,1,m_ks); // otherwise, if we're reading all pages, then force the // endKey to virtual inifinite //else endKey.setMax(); else KEYMAX(endKey2,m_ks); // . set up the hints // . these are only used if we are only reading from 1 file // . these are used to call constrain() so we can constrain // the end of the list w/o looping through all the recs // in the list long h2 = p2 ; // decrease by one page if we're on the last page if ( h2 > p1 && maps[fn]->getNumPages() == h2 ) h2--; // . decrease hint page until key is <= endKey on that page // AND offset is NOT -1 because the old way would give // us hints passed the endkey // . also decrease so we can constrain on minRecSizes in // case we're the only list being read // . use >= m_minRecSizes instead of >, otherwise we may // never be able to set "size" in RdbList::constrain() // because "p" could equal "maxPtr" right away while ( h2 > p1 && //( maps[fn]->getKey (h2) > m_constrainKey || (KEYCMP(maps[fn]->getKeyPtr(h2),m_constrainKey,m_ks)>0|| maps[fn]->getOffset(h2) == -1 || maps[fn]->getAbsoluteOffset(h2) - offset >= m_minRecSizes ) ) h2--; // now set the hint m_hintOffsets [ i ] = maps[fn]->getAbsoluteOffset ( h2 ) - maps[fn]->getAbsoluteOffset ( p1 ) ; //m_hintKeys [ i ] = maps[fn]->getKey ( h2 ); KEYSET(&m_hintKeys[i*m_ks],maps[fn]->getKeyPtr(h2),m_ks); // reset g_errno before calling setRead() g_errno = 0; // . this fix is now in RdbList::checklist_r() // . we can now have dup keys, so, we may read in // a rec with key "lastMinKey" even though we don't read // in the first key on the end page, so don't subtract 1... //if ( endKey != m_endKeyOrig ) // endKey += (unsigned long) 1; // timing debug if ( g_conf.m_logTimingDb ) log(LOG_TIMING, "net: msg: reading %li bytes from %s file #%li " "(niceness=%li)", bytesToRead,base->m_dbname,i,m_niceness); // set the tfn if ( m_rdbId == RDB_TITLEDB ) m_tfns[i] = base->getFileId2(m_fileNums[i]); // log huge reads, those hurt us if ( bytesToRead > 150000000 ) { logf(LOG_INFO,"disk: Reading %li bytes at offset %lli " "from %s.", bytesToRead,offset,base->m_dbname); } // if any keys in the map are the same report corruption char tmpKey [16]; char lastTmpKey[16]; long ccount = 0; if ( bytesToRead > 10000000 && bytesToRead / 2 > m_minRecSizes && base->m_fixedDataSize >= 0 ) { for ( long pn = p1 ; pn <= p2 ; pn++ ) { maps[fn]->getKey ( pn , tmpKey ); if ( KEYCMP(tmpKey,lastTmpKey,m_ks) == 0 ) ccount++; memcpy(lastTmpKey,tmpKey,m_ks); } } if ( ccount > 10 ) { logf(LOG_INFO,"disk: Reading %li bytes from %s file #" "%li when min " "required is %li. Map is corrupt and has %li " "identical consecutive page keys because the " "map was \"repaired\" because out of order keys " "in the index.", (long)bytesToRead, base->m_dbname,fn, (long)m_minRecSizes, (long)ccount); m_numScansCompleted++; m_errno = ECORRUPTDATA; m_hadCorruption = true; //m_maxRetries = 0; break; } // . do the scan/read of file #i // . this returns false if blocked, true otherwise // . this will set g_errno on error bool done = m_scans[i].setRead (base->getFile(m_fileNums[i]), base->m_fixedDataSize , offset , bytesToRead , startKey2 , endKey2 , m_ks , &m_lists[i] , this , doneScanningWrapper , base->useHalfKeys() , m_rdbId, m_niceness , m_allowPageCache , m_hitDisk ) ; // . damn, usually the above will indirectly launch a thread // to do the reading, but it sets g_errno to EINTR, // "interrupted system call"! // . i guess the thread does the read w/o blocking and then // queues the signal on g_loop's queue before it exits // . try ignoring, and keep going if ( g_errno == EINTR ) { log("net: Interrupted system call while reading file. " "Ignoring."); g_errno = 0; } // debug msg //fprintf(stderr,"Msg3:: reading %li bytes from file #%li," // "done=%li,offset=%lli,g_errno=%s," // "startKey=n1=%lu,n0=%llu, " // "endKey=n1=%lu,n0=%llu\n", // bytesToRead,i,(long)done,offset,mstrerror(g_errno), // m_startKey,m_endKey); //if ( bytesToRead == 0 ) // fprintf(stderr,"shit\n"); // if it did not block then it completed, so count it if ( done ) m_numScansCompleted++; // break on an error, and remember g_errno in case we block if ( g_errno && g_errno != ENOTHREADSLOTS ) { long tt = LOG_WARN; if ( g_errno == EFILECLOSED ) tt = LOG_INFO; log(tt,"disk: Reading %s had error: %s.", base->m_dbname, mstrerror(g_errno)); m_errno = g_errno; break; } } // debug test //if ( rand() % 100 <= 10 ) m_errno = EIO; // if we blocked, return false if ( m_numScansCompleted < m_numScansStarted ) return false; // . if all scans completed without blocking then wrap it up & ret true // . doneScanning may now block if it finds data corruption and must // get the list remotely return doneScanning(); }
// . returns false if blocked, true otherwise // . sets g_errno on error // . dumps the RdbTree, m_tree, into m_file // . also sets and writes the RdbMap for m_file // . we methodically get RdbLists from the RdbTree // . dumped recs are ordered by key if "orderedDump" was true in call to set() // otherwise, lists are ordered by node # // . we write each list of recs to the file until the whole tree has been done // . we delete all records in list from the tree after we've written the list // . if a cache was provided we incorporate the list into the cache before // deleting it from the tree to keep the cache in sync. NO we do NOT! // . called again by writeBuf() when it's done writing the whole list bool RdbDump::dumpTree ( bool recall ) { // set up some vars //int32_t nextNode; //key_t maxEndKey; //maxEndKey.setMax(); char maxEndKey[MAX_KEY_BYTES]; KEYMAX(maxEndKey,m_ks); // if dumping statsdb, we can only dump records 30 seconds old or // more because Statsdb.cpp can "back modify" such records in the tree // because it may have a query that took 10 seconds come in then it // needs to add a partial stat to the last 10 stats for those 10 secs. // we use Global time at this juncture if ( m_rdb->m_rdbId == RDB_STATSDB ) { int32_t nowSecs = getTimeGlobal(); StatKey *sk = (StatKey *)maxEndKey; sk->m_zero = 0x01; sk->m_labelHash = 0xffffffff; // leave last 60 seconds in there just to be safe sk->m_time1 = nowSecs - 60; } // this list will hold the list of nodes/recs from m_tree m_list = &m_ourList; // convert coll to collnum //collnum_t collnum = g_collectiondb.getCollnum ( m_coll ); // a collnum of -1 is for collectionless rdbs //if ( collnum < 0 ) { // //if ( g_catdb->getRdb() == m_rdb ) // if ( ! m_rdb->m_isCollectionLess ) { // char *xx=NULL;*xx=0; //return true; // } // g_errno = 0; // collnum = 0; //} // getMemOccupiedForList2() can take some time, so breathe int32_t niceness = 1; loop: // if the lastKey was the max end key last time then we're done if ( m_rolledOver ) return true; // this is set to -1 when we're done with our unordered dump if ( m_nextNode == -1 ) return true; // . NOTE: list's buffer space should be re-used!! (TODO) // . "lastNode" is set to the last node # in the list bool status = true; //if ( ! m_orderedDump ) { // status = ((RdbTree *)m_tree)->getListUnordered ( m_nextNode , // m_maxBufSize , // m_list , // &nextNode ); // // this is -1 when no more nodes are left // m_nextNode = nextNode; //} // "lastKey" is set to the last key in the list //else { { // can we remove neg recs? // class RdbBase *base = m_rdb->getBase(m_collnum); // bool removeNegRecs = false; // if ( base->m_numFiles <= 0 ) removeNegRecs = true; if ( recall ) goto skip; // debug msg //log("RdbDump:: getting list"); m_t1 = gettimeofdayInMilliseconds(); if(m_tree) status = m_tree->getList ( m_collnum , m_nextKey , maxEndKey , m_maxBufSize , // max recSizes m_list , &m_numPosRecs , &m_numNegRecs , m_useHalfKeys , niceness ); else if(m_buckets) status = m_buckets->getList ( m_collnum, m_nextKey , maxEndKey , m_maxBufSize , // max recSizes m_list , &m_numPosRecs , &m_numNegRecs , m_useHalfKeys ); // don't dump out any neg recs if it is our first time dumping // to a file for this rdb/coll. TODO: implement this later. //if ( removeNegRecs ) // m_list.removeNegRecs(); // if(!m_list->checkList_r ( false , // removeNegRecs? // false , // sleep on problem? // m_rdb->m_rdbId )) { // log("db: list to dump is not sane!"); // char *xx=NULL;*xx=0; // } skip: int64_t t2; //key_t lastKey; char *lastKey; // if error getting list (out of memory?) if ( ! status ) goto hadError; // debug msg t2 = gettimeofdayInMilliseconds(); log(LOG_INFO,"db: Get list took %"INT64" ms. " "%"INT32" positive. %"INT32" negative.", t2 - m_t1 , m_numPosRecs , m_numNegRecs ); // keep a total count for reporting when done m_totalPosDumped += m_numPosRecs; m_totalNegDumped += m_numNegRecs; // . check the list we got from the tree for problems // . ensures keys are ordered from lowest to highest as well //#ifdef GBSANITYCHECK if ( g_conf.m_verifyWrites ) { char *s = "none"; if ( m_rdb ) s = getDbnameFromId(m_rdb->m_rdbId); log("dump: verifying list before dumping (rdb=%s)",s); m_list->checkList_r ( false , // removeNegRecs? false , // sleep on problem? m_rdb->m_rdbId ); } // if list is empty, we're done! if ( status && m_list->isEmpty() ) { // consider that a rollover? if ( m_rdb->m_rdbId == RDB_STATSDB ) m_rolledOver = true; return true; } // get the last key of the list lastKey = m_list->getLastKey(); // advance m_nextKey //m_nextKey = lastKey ; //m_nextKey += (uint32_t)1; //if ( m_nextKey < lastKey ) m_rolledOver = true; KEYSET(m_nextKey,lastKey,m_ks); KEYADD(m_nextKey,1,m_ks); if (KEYCMP(m_nextKey,lastKey,m_ks)<0) m_rolledOver = true; // debug msg //log(0,"RdbDump:lastKey.n1=%"UINT32",n0=%"UINT64"",lastKey.n1,lastKey.n0); //log(0,"RdbDump:next.n1=%"UINT32",n0=%"UINT64"",m_nextKey.n1,m_nextKey.n0); } // . return true on error, g_errno should have been set // . this is probably out of memory error if ( ! status ) { hadError: log("db: Had error getting data for dump: %s. Retrying.", mstrerror(g_errno)); // debug msg //log("RdbDump::getList: sleeping and retrying"); // retry for the remaining two types of errors if (!g_loop.registerSleepCallback(1000,this,tryAgainWrapper2)){ log( "db: Retry failed. Could not register callback."); return true; } // wait for sleep return false; } // if list is empty, we're done! if ( m_list->isEmpty() ) return true; // . set m_firstKeyInQueue and m_lastKeyInQueue // . this doesn't work if you're doing an unordered dump, but we should // not allow adds when closing m_lastKeyInQueue = m_list->getLastKey(); //m_firstKeyInQueue = m_list->getCurrentKey(); m_list->getCurrentKey(m_firstKeyInQueue); // . write this list to disk // . returns false if blocked, true otherwise // . sets g_errno on error // . if this blocks it should call us (dumpTree() back) if ( ! dumpList ( m_list , m_niceness , false ) ) return false; // close up shop on a write/dumpList error if ( g_errno ) return true; // . if dumpList() did not block then keep on truckin' // . otherwise, wait for callback of dumpTree() goto loop; }
// . m_key bitmap in statsdb: // tttttttt tttttttt tttttttt tttttttt t = time in milliseconds, t1 // tttttttt tttttttt tttttttt tttttttt // hhhhhhhh hhhhhhhh hhhhhhhh hhhhhhhh h = hash32 of m_title // . returns false if could not add stat, true otherwise // . do not set g_errno if we return false just to keep things simple // . we only add the stat to our local statsdb rdb, but because // we might be dumping statsdb to disk or something it is possible // we get an ETRYAGAIN error, so we try to accumulate stats in a // local buffer in that case // . "label" is something like "queryLatency" or whatever // . [t1,t2] are the time endpoints for the operation being measured // . "value" is usually "numBytes", or a quantity indicator of whatever // was processed. // . oldVal, newVal are reflect a state change, like maybe changing the // value of a parm. typically for such things t1 equals t2 bool Statsdb::addStat ( int32_t niceness , const char *label , int64_t t1Arg , int64_t t2Arg , float value , // y-value really, "numBytes" int32_t parmHash , float oldVal , float newVal , int32_t userId32 ) { if ( ! g_conf.m_useStatsdb ) return true; // so Process.cpp can turn it off when dumping core if ( m_disabled ) return true; // not thread safe! //if ( g_threads.amThread() ) { // log("statsdb: called from thread"); // g_process.shutdownAbort(true); //} // . for now we can only add stats if we are synced with host #0 clock // . this is kinda a hack and it would be nice to not miss stats! if ( ! isClockInSync() ) return true; RdbTree *tree = &m_rdb.m_tree; // do not add stats to our tree if it is loading if ( tree->m_isLoading ) return true; // convert into host #0 synced time t1Arg = localToGlobalTimeMilliseconds ( t1Arg ); t2Arg = localToGlobalTimeMilliseconds ( t2Arg ); // sanity check if ( ! label ) { g_process.shutdownAbort(true); } int32_t labelHash; if ( parmHash ) labelHash = parmHash; else labelHash = hash32n ( label ); // fix it for parm changes, and docs_indexed stat, etc. if ( t1Arg == t2Arg ) t2Arg++; // how many SECONDS did the op take? (convert from ms to secs) float dtms = (t2Arg - t1Arg); float dtSecs = dtms / 1000.0; // we have already flushed stats 30+ seconds old, so if this op took // 30 seconds, discard it! if ( dtSecs >= 30 ) { //log("statsdb: stat is %" PRId32" secs > 30 secs old, discarding.", // (int32_t)dtSecs); return true; } int64_t nextup; // loop over all "second" buckets for ( int64_t tx = t1Arg ; tx < t2Arg ; tx = nextup ) { // get next second-aligned point in milliseconds nextup = ((tx +1000)/ 1000) * 1000; // truncate if we need to if ( nextup > t2Arg ) nextup = t2Arg; // . how much of the stat is in this time interval? // . like if operation took 3 seconds, we might cover // 50% of the first 1-second interval. so we use this // as a weight for the stats we keep for that particular // second. then we can plot a point for each second // in time which is an average of all the queries that // were in progress at that second. float fractionTime = ((float)(nextup - tx)) / dtms; // . get the time point bucket in which this stat belongs // . every "second" in time has a bucket uint32_t t1 = tx / 1000; StatKey sk; memset(&sk,0,sizeof(sk)); sk.m_zero = 0x01; // make it a positive key sk.m_time1 = t1; sk.m_labelHash = labelHash; // so we can show just the stats for a particular user... if ( userId32 ) { sk.m_zero = userId32; // make it positive sk.m_zero |= 0x01; } // if we already have added a bucket for this "second" then // get it from the tree so we can add to its accumulated stats. int32_t node1 = tree->getNode ( 0 , (char *)&sk ); int32_t node2; StatData *sd; // get that stat, see if we are accumulating it already if ( node1 >= 0 ) sd = (StatData *)tree->getData ( node1 ); // make a new one if not there else { StatData tmp; // init it memset(&tmp,0,sizeof(tmp)); tmp.m_totalOps = 0.0; tmp.m_totalQuantity = 0.0; tmp.m_totalTime = 0.0; // save this int32_t saved = g_errno; // need to add using rdb so it can gbmemcpy the data if ( ! m_rdb.addRecord ( (collnum_t)0 , (char *)&sk, (char *)&tmp, sizeof(StatData), niceness ) ) { if ( g_errno != ETRYAGAIN ) log("statsdb: add rec failed: %s", mstrerror(g_errno)); // caller does not care about g_errno g_errno = saved; return false; } // caller does not care about g_errno g_errno = saved; // get the node in the tree //sd = (StatData *)tree->getData ( node1 ); // must be there! node2 = tree->getNode ( 0 , (char *)&sk ); // must be there! if ( node2 < 0 ) { g_process.shutdownAbort(true); } // point to it sd = (StatData *)tree->getData ( node2 ); } // use the milliseconds elapsed as the value if none given //if ( value == 0 && ! parmHash ) // value = t2Arg - t1Arg; // if we got it for this time, accumulate it // convert x into pixel position sd->m_totalOps += 1 * fractionTime; sd->m_totalQuantity += value * fractionTime; sd->m_totalTime += dtSecs * fractionTime; if ( ! parmHash ) continue; sd->m_totalOps = 0; sd->m_totalQuantity = oldVal; sd->m_newVal = newVal; // no fractions for this! break; } //logf(LOG_DEBUG,"statsdb: sp=0x%" PRIx32,(int32_t)sp); return true; }
// . return false if blocked, true otherwise // . sets g_errno on error // . this one is also called by RdbMerge to dump lists bool RdbDump::dumpList ( RdbList *list , int32_t niceness , bool recall ) { // if we had a write error and are being recalled... if ( recall ) { m_offset -= m_bytesToWrite; goto recallskip; } // assume we don't hack the list m_hacked = false; m_hacked12 = false; // save ptr to list... why? m_list = list; // nothing to do if list is empty if ( m_list->isEmpty() ) return true; // we're now in dump mode again m_isDumping = true; //#ifdef GBSANITYCHECK // don't check list if we're dumping an unordered list from tree! if ( g_conf.m_verifyWrites && m_orderedDump ) { m_list->checkList_r ( false /*removedNegRecs?*/ ); // print list stats // log("dump: sk=%s ",KEYSTR(m_list->m_startKey,m_ks)); // log("dump: ek=%s ",KEYSTR(m_list->m_endKey,m_ks)); } //#endif // before calling RdbMap::addList(), always reset list ptr // since we no longer call this in RdbMap::addList() so we don't // mess up the possible HACK below m_list->resetListPtr(); // . SANITY CHECK // . ensure first key is >= last key added to the map map if ( m_offset > 0 && m_map ) { //key_t k = m_list->getCurrentKey(); char k[MAX_KEY_BYTES]; m_list->getCurrentKey(k); //key_t lastKey = m_map->getLastKey (); // m_lastKey char lastKey[MAX_KEY_BYTES]; m_map->getLastKey(lastKey); //char *lastKey = m_map->getLastKey(); //if ( k <= lastKey ) { if ( KEYCMP(k,lastKey,m_ks)<=0 ) { log(LOG_LOGIC,"db: Dumping list key out of order. " //"lastKey.n1=%"XINT32" n0=%"XINT64" k.n1=%"XINT32" n0=%"XINT64"", //lastKey.n1,lastKey.n0,k.n1,k.n0); "lastKey=%s k=%s", KEYSTR(lastKey,m_ks), KEYSTR(k,m_ks)); g_errno = EBADENGINEER; //return true; char *xx = NULL; *xx = 0; } } if ( g_conf.m_verifyWrites ) { char rdbId = 0; if ( m_rdb ) rdbId = m_rdb->m_rdbId; m_list->checkList_r(false,false,rdbId);//RDB_POSDB); m_list->resetListPtr(); } // HACK! POSDB if ( m_ks == 18 && m_orderedDump && m_offset > 0 ) { char k[MAX_KEY_BYTES]; m_list->getCurrentKey(k); // . same top 6 bytes as last key we added? // . if so, we should only add 6 bytes from this key, not 12 // so on disk it is compressed consistently if ( memcmp ( (k ) + (m_ks-12) , (m_prevLastKey ) + (m_ks-12) , 12 ) == 0 ) { char tmp[MAX_KEY_BYTES]; char *p = m_list->getList(); // swap high 12 bytes with low 6 bytes for first key gbmemcpy ( tmp , p , m_ks-12 ); gbmemcpy ( p , p + (m_ks-12) , 12 ); gbmemcpy ( p + 12, tmp , m_ks-12 ); // big hack here m_list->m_list = p + 12; m_list->m_listPtr = p + 12; m_list->m_listPtrLo = p ; m_list->m_listPtrHi = p + 6; m_list->m_listSize -= 12 ; // turn on both bits to indicate double compression *(p+12) |= 0x06; m_hacked12 = true; } } // . HACK // . if we're doing an ordered dump then hack the list's first 12 byte // key to make it a 6 byte iff the last key we dumped last time // shares the same top 6 bytes as the first key of this list // . this way we maintain compression consistency on the disk // so IndexTable.cpp can expect all 6 byte keys for the same termid // and RdbList::checkList_r() can expect the half bits to always be // on when they can be on // . IMPORTANT: calling m_list->resetListPtr() will mess this HACK up!! if ( m_useHalfKeys && m_orderedDump && m_offset > 0 && ! m_hacked12 ) { //key_t k = m_list->getCurrentKey(); char k[MAX_KEY_BYTES]; m_list->getCurrentKey(k); // . same top 6 bytes as last key we added? // . if so, we should only add 6 bytes from this key, not 12 // so on disk it is compressed consistently //if ( memcmp ( ((char *)&k ) + 6 , // ((char *)&m_prevLastKey ) + 6 , 6 ) == 0 ) { if ( memcmp ( (k ) + (m_ks-6) , (m_prevLastKey ) + (m_ks-6) , 6 ) == 0 ) { m_hacked = true; //char tmp[6]; char tmp[MAX_KEY_BYTES]; char *p = m_list->getList(); //gbmemcpy ( tmp , p , 6 ); //gbmemcpy ( p , p + 6 , 6 ); //gbmemcpy ( p + 6 , tmp , 6 ); gbmemcpy ( tmp , p , m_ks-6 ); gbmemcpy ( p , p + (m_ks-6) , 6 ); gbmemcpy ( p + 6 , tmp , m_ks-6 ); // big hack here m_list->m_list = p + 6; m_list->m_listPtr = p + 6; // make this work for POSDB, too m_list->m_listPtrLo = p + 6 + 6; m_list->m_listPtrHi = p ; m_list->m_listSize -= 6 ; // hack on the half bit, too *(p+6) |= 0x02; } } // update old last key //m_prevLastKey = m_list->getLastKey(); m_list->getLastKey(m_prevLastKey); // now write it to disk m_buf = m_list->getList (); m_bytesToWrite = m_list->getListSize(); //#ifdef GBSANITYCHECK //if (m_list->getListSize()!=m_list->getListEnd() - m_list->getList()){ // log("RdbDump::dumpList: major problem here!"); // sleep(50000); //} //#endif recallskip: // make sure we have enough mem to add to map after a successful // dump up here, otherwise, if we write it and fail to add to map // the map is not in sync if we core thereafter if ( m_addToMap && m_map && ! m_map->prealloc ( m_list ) ) { log("db: Failed to prealloc list into map: %s.", mstrerror(g_errno)); // g_errno should be set to something if that failed if ( ! g_errno ) { char *xx = NULL; *xx = 0; } return true; } // tab to the old offset int64_t offset = m_offset; // might as well update the offset now, even before write is done m_offset += m_bytesToWrite ; // write thread is out m_writing = true; //m_bytesWritten = 0; // sanity check //log("dump: writing %"INT32" bytes at offset %"INT64"",m_bytesToWrite,offset); // . if we're called by RdbMerge directly use m_callback/m_state // . otherwise, use doneWritingWrapper() which will call dumpTree() // . BigFile::write() return 0 if blocked,-1 on error,>0 on completion // . it also sets g_errno on error bool isDone = m_file->write ( m_buf , m_bytesToWrite , offset , &m_fstate , this , doneWritingWrapper , niceness ); // debug msg //log("RdbDump dumped %"INT32" bytes, done=%"INT32"\n", // m_bytesToWrite,isDone); // return false if it blocked if ( ! isDone ) return false; // done writing m_writing = false; // return true on error if ( g_errno ) return true; // . delete list from tree, incorporate list into cache, add to map // . returns false if blocked, true otherwise, sets g_errno on error // . will only block in calling updateTfndb() return doneDumpingList ( true ); }
bool sendReply ( void *state ) { // get the state properly Msg7 *msg7= (Msg7 *) state; GigablastRequest *gr = &msg7->m_gr; // extract info from state TcpSocket *sock = gr->m_socket; XmlDoc *xd = &msg7->m_xd; // log it //if ( msg7->m_url[0] ) xd->logIt(); // msg7 has the docid for what we injected, iff g_errno is not set //long long docId = msg7->m_msg7.m_docId; //long hostId = msg7->m_msg7.m_hostId; long long docId = xd->m_docId; long hostId = 0;//msg7->m_msg7.m_hostId; // set g_errno to index code if ( xd->m_indexCodeValid && xd->m_indexCode && ! g_errno ) g_errno = xd->m_indexCode; char format = gr->m_hr.getReplyFormat(); // no url parm? if ( ! g_errno && ! gr->m_url && format != FORMAT_HTML ) g_errno = EMISSINGINPUT; if ( g_errno && g_errno != EDOCUNCHANGED ) { long save = g_errno; mdelete ( msg7, sizeof(Msg7) , "PageInject" ); delete (msg7); g_errno = save; char *msg = mstrerror(g_errno); return g_httpServer.sendErrorReply(sock,save,msg,NULL); } char abuf[320]; SafeBuf am(abuf,320,0,false); am.setLabel("injbuf"); char *ct = NULL; // a success reply, include docid and url i guess if ( format == FORMAT_XML ) { am.safePrintf("<response>\n"); am.safePrintf("\t<statusCode>%li</statusCode>\n", (long)g_errno); am.safePrintf("\t<statusMsg><![CDATA["); am.cdataEncode(mstrerror(g_errno)); am.safePrintf("]]></statusMsg>\n"); am.safePrintf("\t<docId>%lli</docId>\n",xd->m_docId); if ( gr->m_getSections ) { SafeBuf *secBuf = xd->getInlineSectionVotingBuf(); am.safePrintf("\t<htmlSrc><![CDATA["); if ( secBuf->length() ) am.cdataEncode(secBuf->getBufStart()); am.safePrintf("]]></htmlSrc>\n"); } am.safePrintf("</response>\n"); ct = "text/xml"; } if ( format == FORMAT_JSON ) { am.safePrintf("{\"response\":{\n"); am.safePrintf("\t\"statusCode\":%li,\n",(long)g_errno); am.safePrintf("\t\"statusMsg\":\""); am.jsonEncode(mstrerror(g_errno)); am.safePrintf("\",\n"); am.safePrintf("\t\"docId\":%lli,\n",xd->m_docId); if ( gr->m_getSections ) { SafeBuf *secBuf = xd->getInlineSectionVotingBuf(); am.safePrintf("\t\"htmlSrc\":\""); if ( secBuf->length() ) am.jsonEncode(secBuf->getBufStart()); am.safePrintf("\",\n"); } // subtract ",\n" am.m_length -= 2; am.safePrintf("\n}\n}\n"); ct = "application/json"; } if ( format == FORMAT_XML || format == FORMAT_JSON ) { mdelete ( msg7, sizeof(Msg7) , "PageInject" ); delete (msg7); return g_httpServer.sendDynamicPage(sock, am.getBufStart(), am.length(), 0, false, ct ); } // // debug // /* // now get the meta list, in the process it will print out a // bunch of junk into msg7->m_pbuf if ( xd->m_docId ) { char *metalist = xd->getMetaList ( 1,1,1,1,1,1 ); if ( ! metalist || metalist==(void *)-1){char *xx=NULL;*xx=0;} // print it out SafeBuf *pbuf = &msg7->m_sbuf; xd->printDoc( pbuf ); bool status = g_httpServer.sendDynamicPage( msg7->m_socket , pbuf->getBufStart(), pbuf->length() , -1, //cachtime false ,//postreply? NULL, //ctype -1 , //httpstatus NULL,//cookie "utf-8"); // delete the state now mdelete ( st , sizeof(Msg7) , "PageInject" ); delete (st); // return the status return status; } */ // // end debug // char *url = gr->m_url; // . if we're talking w/ a robot he doesn't care about this crap // . send him back the error code (0 means success) if ( url && gr->m_shortReply ) { char buf[1024*32]; char *p = buf; // return docid and hostid if ( ! g_errno ) p += sprintf ( p , "0,docId=%lli,hostId=%li," , docId , hostId ); // print error number here else p += sprintf ( p , "%li,0,0,", (long)g_errno ); // print error msg out, too or "Success" p += sprintf ( p , "%s", mstrerror(g_errno)); mdelete ( msg7, sizeof(Msg7) , "PageInject" ); delete (msg7); return g_httpServer.sendDynamicPage ( sock,buf, gbstrlen(buf) , -1/*cachetime*/); } SafeBuf sb; // print admin bar g_pages.printAdminTop ( &sb, sock , &gr->m_hr ); // print a response msg if rendering the page after a submission if ( g_errno ) sb.safePrintf ( "<center>Error injecting url: <b>%s[%i]</b>" "</center>", mstrerror(g_errno) , g_errno); else if ( (gr->m_url&&gr->m_url[0]) || (gr->m_queryToScrape&&gr->m_queryToScrape[0]) ) sb.safePrintf ( "<center><b>Sucessfully injected %s" "</center><br>" , xd->m_firstUrl.m_url ); // print the table of injection parms g_parms.printParmTable ( &sb , sock , &gr->m_hr ); // clear g_errno, if any, so our reply send goes through g_errno = 0; // calculate buffer length //long bufLen = p - buf; // nuke state mdelete ( msg7, sizeof(Msg7) , "PageInject" ); delete (msg7); // . send this page // . encapsulates in html header and tail // . make a Mime // . i thought we need -2 for cacheTime, but i guess not return g_httpServer.sendDynamicPage (sock, sb.getBufStart(), sb.length(), -1/*cachetime*/); }
// . delete list from tree, incorporate list into cache, add to map // . returns false if blocked, true otherwise, sets g_errno on error bool RdbDump::doneDumpingList ( bool addToMap ) { // we can get suspended when gigablast is shutting down, in which // case the map may have been deleted. only RdbMerge suspends its // m_dump class, not Rdb::m_dump. return false so caller nevers // gets called back. we can not resume from this suspension! //if ( m_isSuspended ) return false; // . if error was EFILECLOSE (file got closed before we wrote to it) // then try again. file can close because fd pool needed more fds // . we cannot do this retry in BigFile.cpp because the BigFile // may have been deleted/unlinked from a merge, but we could move // this check to Msg3... and do it for writes, too... // . seem to be getting EBADFD errors now, too (what code is it?) // i don't remember, just do it on *all* errors for now! //if ( g_errno == EFILECLOSED || g_errno == EBADFD ) { if ( g_errno && ! m_isSuspended ) { log(LOG_INFO,"db: Had error dumping data: %s. Retrying.", mstrerror(g_errno)); // . deal with the EBADF bug, it will loop forever on this // . i still don't know how the fd gets closed and s_fds[vfd] // is not set to -1?!?!?! if ( g_errno == EBADF ) { // note it log(LOG_LOGIC,"db: setting fd for vfd to -1."); // mark our fd as not there... //int32_t i=(m_offset-m_bytesToWrite) / MAX_PART_SIZE; // sets s_fds[vfd] to -1 // MDW: no, can't do this now // if ( m_file->m_files[i] ) // releaseVfd ( m_file->m_files[i]->m_vfd ); } //log("RdbDump::doneDumpingList: retrying."); return dumpList ( m_list , m_niceness , true ); } // bail on error if ( g_errno ) { log("db: Had error dumping data: %s.", mstrerror(g_errno)); //log("RdbDump::doneDumpingList: %s",mstrerror(g_errno)); return true; } // . don't delete the list if we were dumping an unordered list // . we only dump unordered lists when we do a save // . it saves time not having to delete the list and it also allows // us to do saves without deleting our data! good! if ( ! m_orderedDump ) return true; //--turn this off until save works // save for verify routine m_addToMap = addToMap; // should we verify what we wrote? useful for preventing disk // corruption from those pesky Western Digitals and Maxtors? if ( g_conf.m_verifyWrites ) { // a debug message, if log disk debug messages is enabled log(LOG_DEBUG,"disk: Verifying %"INT32" bytes written.", m_bytesToWrite); // make a read buf if ( m_verifyBuf && m_verifyBufSize < m_bytesToWrite ) { mfree ( m_verifyBuf , m_verifyBufSize , "RdbDump3" ); m_verifyBuf = NULL; m_verifyBufSize = 0; } if ( ! m_verifyBuf ) { m_verifyBuf = (char *)mmalloc ( m_bytesToWrite , "RdbDump3" ); m_verifyBufSize = m_bytesToWrite; } // out of mem? if so, skip the write verify if ( ! m_verifyBuf ) return doneReadingForVerify(); // read what we wrote bool isDone = m_file->read ( m_verifyBuf , m_bytesToWrite , m_offset - m_bytesToWrite , &m_fstate , this , doneReadingForVerifyWrapper , m_niceness ); // debug msg //log("RdbDump dumped %"INT32" bytes, done=%"INT32"\n", // m_bytesToWrite,isDone); // return false if it blocked if ( ! isDone ) return false; } return doneReadingForVerify(); }
bool sendPageAddDelColl ( TcpSocket *s , HttpRequest *r , bool add ) { // get collection name //long nclen; //char *nc = r->getString ( "nc" , &nclen ); //long cpclen; //char *cpc = r->getString ( "cpc" , &cpclen ); g_errno = 0; //bool cast = r->getLong("cast",0); char *msg = NULL; // if any host in network is dead, do not do this //if ( g_hostdb.hasDeadHost() ) msg = "A host in the network is dead."; // . are we adding a collection? // . return if error adding, might already exist! // . g_errno should be set // . WE DO NOT NEED THIS ANYMORE. Pages.cpp now broadcasts // addcoll as CommandAddColl() parm. /* if ( nclen > 0 && add && ! cast ) { // do not allow "main" that is used for the "" collection // for backwards compatibility //if ( strcmp ( nc , "main" ) != 0 ) g_collectiondb.addRec (nc,cpc,cpclen,true,(collnum_t)-1, false , // isdump? true ) ;// save it? //else // log("admin: \"main\" collection is forbidden."); } if ( ! add && ! cast ) g_collectiondb.deleteRecs ( r ) ; */ char format = r->getReplyFormat(); if ( format == FORMAT_XML || format == FORMAT_JSON ) { // no addcoll given? long page = g_pages.getDynamicPageNumber ( r ); char *addcoll = r->getString("addcoll",NULL); char *delcoll = r->getString("delcoll",NULL); if ( ! addcoll ) addcoll = r->getString("addColl",NULL); if ( ! delcoll ) delcoll = r->getString("delColl",NULL); if ( page == PAGE_ADDCOLL && ! addcoll ) { g_errno = EBADENGINEER; char *msg = "no addcoll parm provided"; return g_httpServer.sendErrorReply(s,g_errno,msg,NULL); } if ( page == PAGE_DELCOLL && ! delcoll ) { g_errno = EBADENGINEER; char *msg = "no delcoll parm provided"; return g_httpServer.sendErrorReply(s,g_errno,msg,NULL); } return g_httpServer.sendSuccessReply(s,format); } // error? char *action = r->getString("action",NULL); char *addColl = r->getString("addcoll",NULL); char buf [ 64*1024 ]; SafeBuf p(buf, 64*1024); // // CLOUD SEARCH ENGIEN SUPPORT - GIGABOT ERRORS // SafeBuf gtmp; char *gmsg = NULL; // is it too big? if ( action && addColl && gbstrlen(addColl) > MAX_COLL_LEN ) { gtmp.safePrintf("search engine name is too long"); gmsg = gtmp.getBufStart(); } // from Collectiondb.cpp::addNewColl() ensure coll name is legit char *x = addColl; for ( ; x && *x ; x++ ) { if ( is_alnum_a(*x) ) continue; if ( *x == '-' ) continue; if ( *x == '_' ) continue; // underscore now allowed break; } if ( x && *x ) { g_errno = EBADENGINEER; gtmp.safePrintf("<font color=red>Error. \"%s\" is a " "malformed name because it " "contains the '%c' character.</font><br><br>", addColl,*x); gmsg = gtmp.getBufStart(); } // // END GIGABOT ERRORS // // // CLOUD SEARCH ENGINE SUPPORT // // if added the coll successfully, do not print same page, jump to // printing the basic settings page so they can add sites to it. // crap, this GET request, "r", is missing the "c" parm sometimes. // we need to use the "addcoll" parm anyway. maybe print a meta // redirect then? char guide = r->getLong("guide",0); // do not redirect if gmsg is set, there was a problem with the name if ( action && ! msg && format == FORMAT_HTML && guide && ! gmsg ) { //return g_parms.sendPageGeneric ( s, r, PAGE_BASIC_SETTINGS ); // just redirect to it if ( addColl ) p.safePrintf("<meta http-equiv=Refresh " "content=\"0; URL=/admin/settings" "?guide=1&c=%s\">", addColl); return g_httpServer.sendDynamicPage (s, p.getBufStart(), p.length()); } // print standard header g_pages.printAdminTop ( &p , s , r , NULL, "onload=document." "getElementById('acbox').focus();"); // gigabot error? //if ( gmsg ) // p.safePrintf("Gigabot says: %s<br><br>",gmsg); //long page = g_pages.getDynamicPageNumber ( r ); //char *coll = r->getString ( "c" ); //char *pwd = r->getString ( "pwd" ); //char *username = g_users.getUsername( r ); //long user = g_pages.getUserType ( s , r ); //if ( ! coll ) coll = ""; //if ( ! nc ) nc = ""; //if ( ! pwd ) pwd = ""; if ( g_errno ) msg = mstrerror(g_errno); if ( msg && ! guide ) { char *cc = "deleting"; if ( add ) cc = "adding"; p.safePrintf ( "<center>\n" "<font color=red>" "<b>Error %s collection: %s. " "See log file for details.</b>" "</font>" "</center><br>\n",cc,msg); } // // CLOUD SEARCH ENGINE SUPPORT // if ( add && guide ) printGigabotAdvice ( &p , PAGE_ADDCOLL , r , gmsg ); // print the add collection box if ( add /*&& (! nc[0] || g_errno ) */ ) { char *t1 = "Add Collection"; if ( guide ) t1 = "Add Search Engine"; p.safePrintf ( "<center>\n<table %s>\n" "<tr class=hdrow><td colspan=2>" "<center><b>%s</b></center>" "</td></tr>\n" ,TABLE_STYLE ,t1 ); char *t2 = "collection"; if ( guide ) t2 = "search engine"; char *str = addColl; if ( ! addColl ) str = ""; p.safePrintf ( "<tr bgcolor=#%s>" "<td><b>name of new %s to add</td>\n" "<td><input type=text name=addcoll size=30 " "id=acbox " "value=\"%s\">" "</td></tr>\n" , LIGHT_BLUE , t2 , str ); // don't show the clone box if we are under gigabot the guide if ( ! guide ) p.safePrintf( "<tr bgcolor=#%s>" "<td><b>clone settings from this " "collection</b>" "<br><font size=1>Copy settings from " "this pre-existing collection. Leave " "blank to " "accept default values.</font></td>\n" "<td><input type=text name=clonecoll " "size=30>" "</td>" "</tr>" , LIGHT_BLUE ); // now list collections from which to copy the config //p.safePrintf ( // "<tr><td><b>copy configuration from this " // "collection</b><br><font size=1>Leave blank to " // "accept default values.</font></td>\n" // "<td><input type=text name=cpc value=\"%s\" size=30>" // "</td></tr>\n",coll); p.safePrintf ( "</table></center><br>\n"); // wrap up the form started by printAdminTop g_pages.printAdminBottom ( &p ); long bufLen = p.length(); return g_httpServer.sendDynamicPage (s,p.getBufStart(),bufLen); } // if we added a collection, print its page //if ( add && nc[0] && ! g_errno ) // return g_parms.sendPageGeneric2 ( s , r , PAGE_SEARCH , // nc , pwd ); if ( g_collectiondb.m_numRecsUsed <= 0 ) goto skip; // print all collections out in a checklist so you can check the // ones you want to delete, the values will be the id of that collectn p.safePrintf ( "<center>\n<table %s>\n" "<tr class=hdrow><td><center><b>Delete Collections" "</b></center></td></tr>\n" "<tr bgcolor=#%s><td>" "<center><b>Select the collections you wish to delete. " //"<font color=red>This feature is currently under " //"development.</font>" "</b></center></td></tr>\n" "<tr bgcolor=#%s><td>" // table within a table "<center><table width=20%%>\n", TABLE_STYLE, LIGHT_BLUE, DARK_BLUE ); for ( long i = 0 ; i < g_collectiondb.m_numRecs ; i++ ) { CollectionRec *cr = g_collectiondb.m_recs[i]; if ( ! cr ) continue; p.safePrintf ( "<tr bgcolor=#%s><td>" "<input type=checkbox name=delcoll value=\"%s\"> " "%s</td></tr>\n", DARK_BLUE, cr->m_coll,cr->m_coll); } p.safePrintf( "</table></center></td></tr></table><br>\n" ); skip: // wrap up the form started by printAdminTop g_pages.printAdminBottom ( &p ); long bufLen = p.length(); return g_httpServer.sendDynamicPage (s,p.getBufStart(),bufLen); }
bool AutoBan::printTable( TcpSocket *s , HttpRequest *r ) { SafeBuf sb(512 * 512,"autobbuf"); //read in all of the possible cgi parms off the bat: //long user = g_pages.getUserType( s , r ); //char *username = g_users.getUsername(r); //char *pwd = r->getString ("pwd"); char *coll = r->getString ("c"); long banIpsLen; char *banIps = r->getString ("banIps" , &banIpsLen , NULL); long allowIpsLen; char *allowIps = r->getString ("allowIps" , &allowIpsLen , NULL); long clearLen; char *clear = r->getString ("clear" , &clearLen , NULL); bool changed = false; long validCodesLen; char *validCodes = r->getString ("validCodes", &validCodesLen, NULL); long showAllIps = r->getLong("showAllIps", 0); long showLongView = r->getLong("longview", 0); // do it all from parm now //long banRegexLen; //char *banRegex = r->getString("banRegex", &banRegexLen, NULL); // char *ss = sb.getBuf(); // char *ssend = sb.getBufEnd(); g_pages.printAdminTop ( &sb, s , r ); //sb.incrementLength(sss - ss); // MDW: moved to here long now = getTime(); long days; long hours; long minutes; long secs; long msecs; if(r->getLong("resetcodes", 0)) { setCodesFromConf(); } sb.safePrintf("\n<br><br><table %s>\n",TABLE_STYLE); getCalendarFromMs((now - m_codeResetTime) * 1000, &days, &hours, &minutes, &secs, &msecs); sb.safePrintf("<tr><td colspan=18 bgcolor=#%s>" "<center><b>Code Usage " "(<a href=\"/admin/" "autoban?c=%s&resetcodes=1\">reset</a> " "%li days %li hours %li " "minutes %li sec ago)" "</b></center></td></tr>", DARK_BLUE, coll, days, hours, minutes, secs); sb.safePrintf("<tr bgcolor=#%s>" "<td><center><b>Code</b></center></td>" "<td><center><b>IP</b></center></td>" "<td><center><b>Query Count</b></center></td>" "<td><center><b>Bytes Read</b></center></td>" "<td><center><b>Bytes Sent</b></center></td>" "<td><center><b>Outstanding Count</b></center></td>" "<td><center><b>Most Ever Outstanding</b></center></td>" "<td><center><b>Max Outstanding</b></center></td>" "</tr>", LIGHT_BLUE); for(long i = 0; i < m_ht.getNumSlots(); i++) { if ( m_ht.getKey ( i ) == 0 ) continue; CodeVal *cv = m_ht.getValuePointerFromSlot ( i ); if ( ! cv ) continue; sb.safePrintf("<tr>"); sb.safePrintf("<td>"); sb.copyToken(cv->m_code);//m_codeVals[i].m_code); sb.safePrintf("</td>"); sb.safePrintf("<td><center>%s</center> </td>", iptoa(cv->m_ip)); sb.safePrintf("<td><center>%lli</center></td>", cv->m_count); sb.safePrintf("<td><center>%lli</center></td>", cv->m_bytesRead); sb.safePrintf("<td><center>%lli</center></td>", cv->m_bytesSent); sb.safePrintf("<td><center>%li</center></td>", cv->m_outstanding); sb.safePrintf("<td><center>%li</center></td>", cv->m_maxEver); if ( cv->m_maxOutstanding != 50 ) sb.safePrintf("<td><center><b>%li</b></center></td>", cv->m_maxOutstanding); else sb.safePrintf("<td><center>%li</center></td>", cv->m_maxOutstanding); sb.safePrintf("</tr>"); } sb.safePrintf ("</table><br><br>\n" ); if(clear && clearLen < 64) { long ip = atoip(clear, clearLen); if(ip) { removeIp(ip); char *beginning; char ipbuf[64];//gotta NULL terminate for strstr memcpy(ipbuf, clear, clearLen); ipbuf[clearLen] = '\0'; beginning = findToken(g_conf.m_banIps, ipbuf, clearLen); if(beginning) { char *to = beginning; char *from = beginning + clearLen; while(*to) *to++ = *from++; } beginning = findToken(g_conf.m_allowIps, ipbuf, clearLen); if(beginning) { char *to = beginning; char *from = beginning + clearLen; while(*to) *to++ = *from++; } changed = true; } } long allowLen; char *allow = r->getString ( "allow" , &allowLen , NULL ); if(allow && allowLen < 64) { long ip = atoip(allow, allowLen); if(ip) { char *beginning; char ipbuf[64];//gotta NULL terminate for strstr memcpy(ipbuf, allow, allowLen); ipbuf[allowLen] = '\0'; beginning = findToken(g_conf.m_allowIps, ipbuf, allowLen); if(!beginning) { //its not present, so add it. char *p = g_conf.m_allowIps; while(*p) p++; if(p - g_conf.m_allowIps + allowLen + 2 < AUTOBAN_TEXT_SIZE) { *p++ = '\n'; memcpy(p, ipbuf,allowLen); *(p + allowLen) = '\0'; } else { sb.safePrintf("<font color=red>" "Not enough stack space " "to fit allowIps. " "Increase " "AUTOBAN_TEXT_SIZE in " "Conf.h. " "Had %i need %li." "</font>", AUTOBAN_TEXT_SIZE, p - g_conf.m_allowIps + allowLen + 2); goto dontRemove1; } } beginning = findToken(g_conf.m_banIps, ipbuf, allowLen); if(beginning) { //remove it from banned if present. char *to = beginning; char *from = beginning + allowLen; while(*to) *to++ = *from++; } changed = true; } } dontRemove1: long denyLen; char *deny = r->getString ( "deny" , &denyLen , NULL ); if(deny && denyLen < 64) { long ip = atoip(deny, denyLen); if(ip) { char *beginning; char ipbuf[64];//gotta NULL terminate for strstr memcpy(ipbuf, deny, denyLen); ipbuf[denyLen] = '\0'; beginning = findToken(g_conf.m_banIps, ipbuf, denyLen); if(!beginning) { //its not present, so add it. char *p =g_conf.m_banIps; while(*p) p++; if(p - g_conf.m_banIps + denyLen + 2 < AUTOBAN_TEXT_SIZE) { *p++ = '\n'; memcpy(p, ipbuf,denyLen); *(p + denyLen) = '\0'; } else { sb.safePrintf("<font color=red>Not " "enough stack space " "to fit bannedIPs. " "Increase " "AUTOBAN_TEXT_SIZE in " "Conf.h. " "Had %i need %li." "</font>", AUTOBAN_TEXT_SIZE, p - g_conf.m_banIps + denyLen + 2); goto dontRemove2; } } beginning = findToken(g_conf.m_allowIps, ipbuf, denyLen); if(beginning) { //remove it from allowed list if present. char *to = beginning; char *from = beginning + denyLen; while(*to) *to++ = *from++; } changed = true; } } dontRemove2: if(!g_conf.m_doAutoBan) { sb.safePrintf("<center><font color=red><b>Autoban is disabled, " "turn it on in Master Controls.</b></font></center><br>"); } if(validCodes) { if(validCodesLen >= AUTOBAN_TEXT_SIZE) { sb.safePrintf("<font color=red>Not enough stack space " "to fit codes. " "Increase AUTOBAN_TEXT_SIZE in Conf.h. " "Had %i need %li.</font>", AUTOBAN_TEXT_SIZE, validCodesLen); validCodes = NULL; validCodesLen = 0; } else { memcpy(g_conf.m_validCodes, validCodes, validCodesLen); g_conf.m_validCodes[validCodesLen] = '\0'; trimWhite(g_conf.m_validCodes); setCodesFromConf(); } } //first remove all of the ips in the conf, then add the passed in // ones to the conf parm; if (banIps) { //ack, the browser puts in crlf when this comes back, so //we will have a longer string here than the one we sent //out. trim back all extrainious whitespace before we do //bounds checking. trimWhite(banIps); banIpsLen = gbstrlen(banIps); if(banIpsLen >= AUTOBAN_TEXT_SIZE) { sb.safePrintf("<font color=red>Not enough stack space " "to fit bannedIps. " "Increase AUTOBAN_TEXT_SIZE in Conf.h. " "Had %i need %li.</font>", AUTOBAN_TEXT_SIZE, banIpsLen); banIpsLen = AUTOBAN_TEXT_SIZE - 1; } for(long i = 0; i < m_tableSize; i++) { if(m_detectKeys[i] == 0) continue; //check the 'set from conf' bit, and clear those. if(m_detectVals[i].m_flags & FROMCONF) { removeIp(m_detectKeys[i]); } } memcpy(g_conf.m_banIps, banIps, banIpsLen); g_conf.m_banIps[banIpsLen] = '\0'; changed = true; } if (allowIps) { trimWhite(allowIps); allowIpsLen = gbstrlen(allowIps); if(allowIpsLen >= AUTOBAN_TEXT_SIZE) { sb.safePrintf("<font color=red>Not enough stack space " "to fit allowIps. " "Increase AUTOBAN_TEXT_SIZE in Conf.h. " "Had %i need %li.</font>", AUTOBAN_TEXT_SIZE, allowIpsLen); allowIpsLen = AUTOBAN_TEXT_SIZE - 1; } for(long i = 0; i < m_tableSize; i++) { if(m_detectKeys[i] == 0) continue; //check the 'set from conf' bit, and clear those. if(m_detectVals[i].m_flags & FROMCONF) { removeIp(m_detectKeys[i]); } } memcpy(g_conf.m_allowIps, allowIps, allowIpsLen); g_conf.m_allowIps[allowIpsLen] = '\0'; changed = true; } if(changed) { trimWhite(g_conf.m_allowIps); trimWhite(g_conf.m_banIps); setFromConf(); } sb.safePrintf("\n<table %s>\n",TABLE_STYLE); sb.safePrintf("<tr><td colspan=2 bgcolor=#%s>" "<center><b>Add IPs</b></center></td></tr>", DARK_BLUE); // ss = sb.getBuf(); // ssend = sb.getBufEnd(); g_parms.printParms (&sb, s, r); // sb.incrementLength(sss - ss); sb.safePrintf ("<tr><td>" "<center>" "<input type=submit value=\"Update\" " "method=\"POST\" border=0>" "</center></td></tr>"); sb.safePrintf ("</table><br><br>\n" ); if(!showLongView) { sb.safePrintf("<b><a href=\"autoban" "?c=%s" "&showAllIps=%li" "&longview=1\">Show watched ips table...</a></b>", coll, showAllIps); return g_httpServer.sendDynamicPage ( s , sb.getBufStart() , sb.length() , -1 , false); } ///////////////////////////////////////////////////////////////////// sb.safePrintf("\n<table %s>\n",TABLE_STYLE); sb.safePrintf("<tr><td colspan=3 bgcolor=#%s>" "<center><b>Watched Ips</b></center></td></tr>", DARK_BLUE); sb.safePrintf("<tr bgcolor=#%s>" "<td><center><b>IP</b></center></td>" "<td><center><b>Description</b></center></td>" // "<td><center><b>Time Added</b></center></td>" "<td><center><b>Allow/Deny/Clear</b></center></td>" "</tr>", LIGHT_BLUE); long *sortedIndices = (long*)mmalloc(m_tableSize * sizeof(long), "AutoBanH"); if(!sortedIndices) { return g_httpServer.sendErrorReply(s,500,mstrerror(ENOMEM)); } long numEntries = 0; for(long i = 0; i < m_tableSize; i++) { if(m_detectKeys[i] == 0) continue; sortedIndices[numEntries++] = i; } SorterTable = m_detectKeys; gbsort(sortedIndices, numEntries, sizeof(long), ip_cmp); //lets put each class of watched ip in its own safebuf then cat //them together at the end. SafeBuf allowed; SafeBuf banned; SafeBuf feedLeachers; SafeBuf cowBots; SafeBuf *e; for(long j = 0; j < numEntries; j++) { long i = sortedIndices[j]; if(m_detectKeys[i] == 0) continue; //if(!(m_detectVals[i].m_flags & FROMCONF)) continue; bool allow = m_detectVals[i].m_flags & ALLOW && m_detectVals[i].m_flags & FROMCONF; bool deny = m_detectVals[i].m_flags & DENY && m_detectVals[i].m_flags & FROMCONF; bool explicitban = deny && m_detectVals[i].m_flags & FROMCONF; unsigned short dayCount = m_detectVals[i].m_dayCount; unsigned char minuteCount = m_detectVals[i].m_minuteCount; bool day = dayCount >= g_conf.m_numFreeQueriesPerDay; bool minute = minuteCount >= g_conf.m_numFreeQueriesPerMinute; char *description; char *color; if(allow) { color = GREEN; description = "Allowed"; e = &allowed; } else if(explicitban) { color = RED; description = "Banned"; e = &banned; } else if(minute) { color = RED; description = "Cow Bot"; e = &cowBots; } else if(day) { color = RED; description = "Feed Leacher"; e = &feedLeachers; } else { //this can happen when someone was banned due to //exceeding the quota, then the quota was lowered. m_detectVals[i].m_flags &= ~DENY; //log("autoban: ohshit-banning %s",iptoa(s->m_ip)); continue; } e->safePrintf("<tr>"); e->safePrintf("<td bgcolor=#%s><center>%s</center></td><td>" "<center>%s</center></td>" // "<td><center>" // "%li days %li hrs %li min ago" // "</center></td>" "<td><center><a href=\"/admin/" "autoban?c=%s&allow=%s&showAllIps=%li\">" "allow/</a>" "<a href=\"/admin/" "autoban?c=%s&deny=%s&showAllIps=%li\">" "deny/</a>" "<a href=\"/admin/" "autoban?c=%s&clear=%s&showAllIps=%li\">" "clear</a></center>" "</td>",color, iptoa(m_detectKeys[i]), description, // days,hours,minutes, coll, iptoa(m_detectKeys[i]), showAllIps, coll, iptoa(m_detectKeys[i]), showAllIps, coll, iptoa(m_detectKeys[i]), showAllIps); e->safePrintf("</tr>"); } sb.cat(allowed); sb.cat(banned); sb.cat(feedLeachers); sb.cat(cowBots); sb.safePrintf ("</table><br><br>\n" ); // MDW moved from here sb.safePrintf("\n<br><br><table %s>\n",TABLE_STYLE); sb.safePrintf("<tr><td colspan=5 bgcolor=#%s>" "<center><b>Control Panel</b></center></td></tr>", DARK_BLUE); sb.safePrintf("<tr>" "<td bgcolor=#%s><center><b>Show Ips by Number of Queries" "</b></center></td>", LIGHT_BLUE); sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/" "autoban?c=%s&showAllIps=0\">" "0 Queries</a></b>" "</font></center></td>", coll); sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/" "autoban?c=%s&showAllIps=1\">" "1 Query</a></b>" "</font></center></td>", coll); sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/" "autoban?c=%s&showAllIps=10\">" "10 Queries</a></b>" "</font></center></td>", coll); sb.safePrintf("<td><center><font color=red><b><a href=\"/admin/" "autoban?c=%s&showAllIps=100\">" "100 Queries</a></b>" "</font></center></td></tr>", coll); sb.safePrintf ("</table><br><br>\n"); if(!showAllIps) { char* ss = (char*) sb.getBufStart(); long sslen = sb.length(); mfree(sortedIndices, m_tableSize * sizeof(long),"AutoBanH"); return g_httpServer.sendDynamicPage ( s , ss , sslen , -1 , false); } sb.safePrintf("\n<br><br><table %s>\n",TABLE_STYLE); sb.safePrintf("<tr><td colspan=6 bgcolor=#%s>" "<center><b>Queries Today</b></center></td></tr>", DARK_BLUE); sb.safePrintf("<tr bgcolor=#%s>" "<td><center><b>IP</b></center></td>" "<td><center><b>Minute count</b></center></td>" "<td><center><b>Day count</b></center></td>" "<td><center><b>Time Until Reset</b></center></td>" "<td><center><b>Times Banned</b></center></td>" "<td><center><b>Allow/Deny</b></center></td>" "</tr>", LIGHT_BLUE); char minBuf[128]; char dayBuf[128]; unsigned long lastIpGroup = 0; for(long j = 0; j < numEntries; j++) { long i = sortedIndices[j]; long dayCount = m_detectVals[i].m_dayCount; unsigned char minuteCount = m_detectVals[i].m_minuteCount; if(!(m_detectVals[i].m_flags & FROMCONF)) { if(m_detectVals[i].m_minuteExpires < now) minuteCount = 0; if(!(m_detectVals[i].m_flags & DENY) && m_detectVals[i].m_dayExpires < now) dayCount = 0; } //a hack: if( dayCount < showAllIps) continue; char *color = YELLOW; if(m_detectVals[i].m_flags & ALLOW) { color = GREEN; snprintf(minBuf, 128, "--"); snprintf(dayBuf, 128, "%li", dayCount); } else if(m_detectVals[i].m_flags & DENY) { color = RED; snprintf(minBuf, 128, "--"); snprintf(dayBuf, 128, "%li", dayCount); } else { snprintf(minBuf, 128, "%li", (long)minuteCount); snprintf(dayBuf, 128, "%li", (long)dayCount); } unsigned long thisIpGroup = (unsigned long)m_detectKeys[i] & 0x00ffffff; sb.safePrintf("<tr><center>"); if(m_detectVals[i].m_flags & FROMCONF) { sb.safePrintf("<td bgcolor=#%s><center>%s%s%s</center></td>" "<td><center>%s</center> </td>" "<td><center>%s</center></td>" "<td><center><font color=red>" "<b>NEVER</b>" "</font></center></td>" "<td><center>--</center></td>", color, (thisIpGroup == lastIpGroup)?"<b>":"", iptoa(m_detectKeys[i]), (thisIpGroup == lastIpGroup)?"</b>":"", minBuf, dayBuf); } else { //they haven't done a query since being unbanned, //unban them now so we don't get negative resets displayed. /* no, don't unban the bots!!! MDW yippy project if(m_detectVals[i].m_dayExpires < now) { m_detectVals[i].m_flags &= ~DENY; //log("autoban: dayexpire-unbanning %s", // iptoa(ip)); m_detectVals[i].m_dayExpires = now + ONE_DAY; m_detectVals[i].m_minuteExpires = now + 60; m_detectVals[i].m_dayCount = 0; m_detectVals[i].m_minuteCount = 0; sb.safePrintf("</center></tr>"); continue; } */ getCalendarFromMs((m_detectVals[i].m_dayExpires - now)* 1000, &days, &hours, &minutes, &secs, &msecs); sb.safePrintf("<td bgcolor=#%s><center>%s%s%s</center></td>" "<td><center>%s</center> </td>" "<td><center>%s</center></td>" "<td><center><font color=red>" "<b>%li days %li hrs %li min %li sec</b>" "</font></center></td>" "<td><center>%i</center></td>", color, (thisIpGroup == lastIpGroup)?"<b>":"", iptoa(m_detectKeys[i]), (thisIpGroup == lastIpGroup)?"</b>":"", minBuf, dayBuf, days, hours, minutes, secs, m_detectVals[i].m_timesBanned); } sb.safePrintf("<td><center>" "<a href=\"/admin/" "autoban?c=%s&allow=%s&showAllIps=%li\">" "allow/</a>" "<a href=\"/admin/" "autoban?c=%s&deny=%s&showAllIps=%li\">" "deny</a></center>" "</td>", coll, iptoa(m_detectKeys[i]), showAllIps, coll, iptoa(m_detectKeys[i]), showAllIps); sb.safePrintf("</center></tr>"); lastIpGroup = thisIpGroup; } sb.safePrintf ("</table><br><br>\n" ); char* ss = (char*) sb.getBufStart(); long sslen = sb.length(); mfree(sortedIndices, m_tableSize * sizeof(long),"AutoBanH"); return g_httpServer.sendDynamicPage ( s , ss , sslen , -1 , false); }
/////////// // // main > Basic > Status // /////////// bool sendPageBasicStatus ( TcpSocket *socket , HttpRequest *hr ) { char buf [ 128000 ]; SafeBuf sb(buf,128000); sb.reset(); char format = hr->getReplyFormat(); // true = usedefault coll? CollectionRec *cr = g_collectiondb.getRec ( hr , true ); if ( ! cr ) { g_httpServer.sendErrorReply(socket,500,"invalid collection"); return true; } if ( format == FORMAT_JSON || format == FORMAT_XML) { // this is in PageCrawlBot.cpp printCrawlDetails2 ( &sb , cr , format ); char *ct = "text/xml"; if ( format == FORMAT_JSON ) ct = "application/json"; return g_httpServer.sendDynamicPage (socket, sb.getBufStart(), sb.length(), 0, // cachetime false,//POSTReply , ct); } // print standard header if ( format == FORMAT_HTML ) // this prints the <form tag as well g_pages.printAdminTop ( &sb , socket , hr ); // table to split between widget and stats in left and right panes if ( format == FORMAT_HTML ) { sb.safePrintf("<TABLE id=pane>" "<TR><TD valign=top>"); } int32_t savedLen1, savedLen2; // // widget // // put the widget in here, just sort results by spidered date // // the scripts do "infinite" scrolling both up and down. // but if you are at the top then new results will load above // you and we try to maintain your current visual state even though // the scrollbar position will change. // if ( format == FORMAT_HTML ) { // save position so we can output the widget code // so user can embed it into their own web page savedLen1 = sb.length(); printScrollingWidget ( &sb , cr ); savedLen2 = sb.length(); } // the right table pane is the crawl stats if ( format == FORMAT_HTML ) { sb.safePrintf("</TD><TD valign=top>"); } // // show stats // if ( format == FORMAT_HTML ) { char *seedStr = cr->m_diffbotSeeds.getBufStart(); if ( ! seedStr ) seedStr = ""; SafeBuf tmp; int32_t crawlStatus = -1; getSpiderStatusMsg ( cr , &tmp , &crawlStatus ); CrawlInfo *ci = &cr->m_localCrawlInfo; int32_t sentAlert = (int32_t)ci->m_sentCrawlDoneAlert; if ( sentAlert ) sentAlert = 1; //sb.safePrintf( // "<form method=get action=/crawlbot>" // "%s" // , sb.getBufStart() // hidden input token/name/.. // ); char *hurts = "No"; if ( cr->m_globalCrawlInfo.m_hasUrlsReadyToSpider ) hurts = "Yes"; sb.safePrintf(//"<TABLE border=0>" //"<TR><TD valign=top>" "<table id=stats border=0 cellpadding=5>" "<tr>" "<td><b>Crawl Status Code:</td>" "<td>%" PRId32"</td>" "</tr>" "<tr>" "<td><b>Crawl Status Msg:</td>" "<td>%s</td>" "</tr>" //"<tr>" //"<td><b>Rounds Completed:</td>" //"<td>%" PRId32"</td>" //"</tr>" "<tr>" "<td><b>Has Urls Ready to Spider:</td>" "<td>%s</td>" "</tr>" // this will have to be in crawlinfo too! //"<tr>" //"<td><b>pages indexed</b>" //"<td>%" PRId64"</td>" //"</tr>" "<tr>" "<td><b><nobr>URLs Harvested</b> " "(may include dups)</nobr></td>" "<td>%" PRId64"</td>" "</tr>" //"<tr>" //"<td><b>URLs Examined</b></td>" //"<td>%" PRId64"</td>" //"</tr>" "<tr>" "<td><b>Page Crawl Attempts</b></td>" "<td>%" PRId64"</td>" "</tr>" "<tr>" "<td><b>Page Crawl Successes</b></td>" "<td>%" PRId64"</td>" "</tr>" , crawlStatus , tmp.getBufStart() //, cr->m_spiderRoundNum //, cr->m_globalCrawlInfo.m_hasUrlsReadyToSpider , hurts , cr->m_globalCrawlInfo.m_urlsHarvested //, cr->m_globalCrawlInfo.m_urlsConsidered , cr->m_globalCrawlInfo.m_pageDownloadAttempts , cr->m_globalCrawlInfo.m_pageDownloadSuccesses ); // // begin status code breakdown // for ( int32_t i = 0 ; i < 65536 ; i++ ) { if ( g_stats.m_allErrorsNew[i] == 0 && g_stats.m_allErrorsOld[i] == 0 ) continue; sb.safePrintf ( "<tr>" "<td><b> <a href=/search?c=%s&q=" "gbstatusmsg%%3A" "%%22" , cr->m_coll ); sb.urlEncode(mstrerror(i)); sb.safePrintf ("%%22>" "%s" "</a>" "</b></td>" "<td>%" PRId64"</td>" "</tr>\n" , mstrerror(i), g_stats.m_allErrorsNew[i] + g_stats.m_allErrorsOld[i] ); } // // end status code breakdown // char tmp3[64]; struct tm *timeStruct; time_t tt = (time_t)cr->m_diffbotCrawlStartTime; timeStruct = localtime(&tt); // Jan 01 1970 at 10:30:00 strftime ( tmp3,64 , "%b %d %Y at %H:%M:%S",timeStruct); sb.safePrintf("<tr><td><b>Collection Created</b></td>" "<td>%s (local time)</td></tr>",tmp3); // print link to embed the code in their own site SafeBuf embed; embed.htmlEncode(sb.getBufStart()+savedLen1, savedLen2-savedLen1, false); // encodePoundSign #? // convert all ''s to "'s for php's echo ''; cmd embed.replaceChar('\'','\"'); sb.safePrintf("<tr>" "<td valign=top>" "<a onclick=\"" "var dd=document.getElementById('hcode');" "if ( dd.style.display=='none' ) " "dd.style.display=''; " "else " "dd.style.display='none';" "\" style=color:blue;>" "<u>" "show Widget HTML code" "</u>" "</a>" "</td><td>" "<div id=hcode style=display:none;" "max-width:800px;>" "%s" "</div>" "</td></tr>" , embed.getBufStart() ); sb.safePrintf("<tr>" "<td valign=top>" "<a onclick=\"" "var dd=document.getElementById('pcode');" "if ( dd.style.display=='none' ) " "dd.style.display=''; " "else " "dd.style.display='none';" "\" style=color:blue;>" "<u>" "show Widget PHP code" "</u>" "</a>" "</td>" "<td>" "<div id=pcode style=display:none;" "max-width:800px;>" "<i>" "echo '" "%s" "';" "</i>" "</div>" "</td></tr>" , embed.getBufStart() ); sb.safePrintf("</table>\n\n"); } // end the right table pane if ( format == FORMAT_HTML ) { sb.safePrintf("</TD></TR></TABLE>"); } //if ( format != FORMAT_JSON ) // // wrap up the form, print a submit button // g_pages.printAdminBottom ( &sb ); return g_httpServer.sendDynamicPage (socket, sb.getBufStart(), sb.length(), 0); // cachetime }
bool Msge0::sendMsg8a ( long i ) { // handle errors if ( g_errno && ! m_errno ) m_errno = g_errno; g_errno = 0; Msg8a *m = &m_msg8as[i]; //TagRec *m = &m_tagRecs[i]; // save state into Msg8a m->m_state2 = this; m->m_state3 = (void *)i; // how big are all the tags we got for this url long need = sizeof(TagRec); // sanity check if ( need > SLAB_SIZE ) { char *xx=NULL;*xx=0; } // how much space left in the latest buffer if ( m_slabPtr + need > m_slabEnd ) { // inc the buffer number m_slabNum++; // allocate a new 8k buffer m_slab[m_slabNum] = (char *)mmalloc (SLAB_SIZE,"msgeslab"); // failed? if ( ! m_slab[m_slabNum] ) { // do not free if null above m_slabNum--; // count as reply m_numReplies++; // make it available again m_used[i] = false; // record error if ( ! m_errno ) m_errno = g_errno; // error out log("msge0: slab alloc: %s",mstrerror(g_errno)); return true; } // uh oh? if ( ! m_slab[m_slabNum] && m_errno == 0 ) m_errno = g_errno; // set it (will be NULL if malloc failed) m_slabPtr = m_slab[m_slabNum]; m_slabEnd = m_slabPtr + SLAB_SIZE; } // we are processing the nth url long n = m_ns[i]; // now use it m_tagRecPtrs[n] = (TagRec *)m_slabPtr; // constructor m_tagRecPtrs[n]->constructor(); // advance it m_slabPtr += sizeof(TagRec); // skip for debug //return doneSending(i); // . this now employs the tagdb filters table for lookups // . that is really a hack until we find a way to identify subsites // on a domain automatically, like blogspot.com/users/harry/ is a // subsite. if ( ! m->getTagRec ( &m_urls[i] , NULL, // sites[i] , m_collnum , // if domain is banned, we will miss that here! true , // skip domain lookup? m_niceness , m , // state gotTagRecWrapper , m_tagRecPtrs[n]) ) return false; return doneSending ( i ); }
void Scraper::gotPhrase ( ) { // error getting random phrase? bail! if ( g_errno ) log("scraper: got error getting random phrase: %s", mstrerror(g_errno)); CollectionRec *cr = g_collectiondb.getRec ( m_coll ); loop: // what type of query should we do? m_qtype = rand() % 3; // make sure web, news, blog is enabled if ( m_qtype == 0 && ! cr->m_scrapingEnabledWeb ) goto loop; if ( m_qtype == 1 && ! cr->m_scrapingEnabledNews ) goto loop; if ( m_qtype == 2 && ! cr->m_scrapingEnabledBlogs ) goto loop; // scraping is off when repairing obviously if ( g_repairMode ) return; // get it char *s = g_wiki.m_randPhrase; // convert _'s to spaces for ( char *p = s ; *p ; p++ ) if ( *p == '_' ) *p = ' '; // . url encode the random phrase // . truncate it to 200 bytes to keep things sane // . Wiki::doneReadingWiki() keeps it below 128 i think anyway char qe[400]; urlEncode(qe, 200, s , gbstrlen(s) ); char *end = qe + 390; // half the time append a random word from dictionary so that we // discovery those tail-end sites better if ( m_qtype == 0 && (rand() % 2) ) { // point into it for appending char *p = qe + gbstrlen(qe); // add a space, url encoded *p++ = '+'; // append a random word to it from dictionary char *rw = g_speller.getRandomWord(); // append that in urlEncode( p , end - p - 1 , rw , gbstrlen(rw) ); } // make a query to scrape char buf[2048]; char *uf ; if ( m_qtype == 0 ) uf="http://www.google.com/search?num=50&q=%s&scoring=d" "&filter=0"; // google news query? sort by date. else if ( m_qtype == 1 ) uf="http://news.google.com/news?num=50&q=%s&sort=n" "&filter=0"; // google blog query? else if ( m_qtype == 2 ) uf="http://www.google.com/blogsearch?num=50&q=%s&scoring=d" "&filter=0"; // sanity check else { char *xx=NULL;*xx=0; } // make the url we will download sprintf ( buf , uf , qe ); SpiderRequest sreq; // set the SpiderRequest strcpy(sreq.m_url, uf); // . tell it to only add the hosts of each outlink for now! // . that will be passed on to when XmlDoc calls Links::set() i guess // . xd will not reschedule the scraped url into spiderdb either sreq.m_isScraping = 1; sreq.m_fakeFirstIp = 1; long firstIp = hash32n(uf); if ( firstIp == 0 || firstIp == -1 ) firstIp = 1; sreq.m_firstIp = firstIp; // parent docid is 0 sreq.setKey(firstIp,0LL,false); // forceDEl = false, niceness = 0 m_xd.set4 ( &sreq , NULL , m_coll , NULL , 0 ); //m_xd.m_isScraping = true; // download without throttling //m_xd.m_throttleDownload = false; // disregard this m_xd.m_useRobotsTxt = false; // call this when index completes m_xd.setCallback ( NULL , indexedDocWrapper ); // assume it blocked m_numSent++; // scraper is special m_xd.m_usePosdb = false; m_xd.m_useDatedb = false; m_xd.m_useClusterdb = false; m_xd.m_useLinkdb = false; m_xd.m_useSpiderdb = true; // only this one i guess m_xd.m_useTitledb = false; m_xd.m_useTagdb = false; m_xd.m_usePlacedb = false; //m_xd.m_useTimedb = false; //m_xd.m_useSectiondb = false; //m_xd.m_useRevdb = false; // . return false if this blocks // . will add the spider recs to spiderdb of the outlinks // . will add "ingoogle", etc. tags for each outlink if ( ! m_xd.indexDoc ( ) ) return ; // we didn't block indexedDoc ( ); }
// . return ptr to the buffer we serialize into // . return NULL and set g_errno on error bool Msg20Reply::sendReply ( XmlDoc *xd ) { // get it UdpSlot *slot = (UdpSlot *)xd->m_slot; if ( g_errno ) { // extract titleRec ptr log("query: Had error generating msg20 reply for d=%"INT64": " "%s",xd->m_docId, mstrerror(g_errno)); // don't forget to delete this list haderror: mdelete ( xd, sizeof(XmlDoc) , "Msg20" ); delete ( xd ); g_udpServer.sendErrorReply ( slot , g_errno ) ; return true; } // now create a buffer to store title/summary/url/docLen and send back int32_t need = getStoredSize(); char *buf = (char *)mmalloc ( need , "Msg20Reply" ); if ( ! buf ) goto haderror; // should never have an error! int32_t used = serialize ( buf , need ); // sanity if ( used != need ) { char *xx=NULL;*xx=0; } // sanity check, no, might have been banned/filtered above around // line 956 and just called sendReply directly //if ( st->m_memUsed == 0 ) { char *xx=NULL;*xx=0; } // use blue for our color int32_t color = 0x0000ff; // but use dark blue for niceness > 0 if ( xd->m_niceness > 0 ) color = 0x0000b0; //Msg20Reply *tt = (Msg20Reply *)buf; // sanity check if ( ! xd->m_utf8ContentValid ) { char *xx=NULL;*xx=0; } // for records int32_t clen = 0; if ( xd->m_utf8ContentValid ) clen = xd->size_utf8Content - 1; // show it in performance graph if ( xd->m_startTimeValid ) g_stats.addStat_r ( clen , xd->m_startTime , gettimeofdayInMilliseconds() , color ); // . del the list at this point, we've copied all the data into reply // . this will free a non-null State20::m_ps (ParseState) for us mdelete ( xd , sizeof(XmlDoc) , "xd20" ); delete ( xd ); g_udpServer.sendReply_ass ( buf , need , buf , need , slot ); return true; }